summaryrefslogtreecommitdiffstats
path: root/src/compiler
diff options
context:
space:
mode:
authorEmil Velikov <[email protected]>2016-01-18 12:54:03 +0200
committerEmil Velikov <[email protected]>2016-01-26 16:08:30 +0000
commita39a8fbbaa129f4e52f2a3ad2747182e9a74d910 (patch)
tree823e881d54c149cde315bb140e46a8b781cdccb7 /src/compiler
parentf694da80c75cb2a51d0af3b24d68aae9c53d61aa (diff)
nir: move to compiler/
Signed-off-by: Emil Velikov <[email protected]> Acked-by: Matt Turner <[email protected]> Acked-by: Jose Fonseca <[email protected]>
Diffstat (limited to 'src/compiler')
-rw-r--r--src/compiler/Android.gen.mk91
-rw-r--r--src/compiler/Android.mk23
-rw-r--r--src/compiler/Makefile.am78
-rw-r--r--src/compiler/Makefile.sources71
-rw-r--r--src/compiler/nir/.gitignore5
-rw-r--r--src/compiler/nir/README118
-rw-r--r--src/compiler/nir/glsl_to_nir.cpp2031
-rw-r--r--src/compiler/nir/glsl_to_nir.h41
-rw-r--r--src/compiler/nir/nir.c1665
-rw-r--r--src/compiler/nir/nir.h2111
-rw-r--r--src/compiler/nir/nir_algebraic.py305
-rw-r--r--src/compiler/nir/nir_array.h96
-rw-r--r--src/compiler/nir/nir_builder.h364
-rw-r--r--src/compiler/nir/nir_builder_opcodes_h.py38
-rw-r--r--src/compiler/nir/nir_clone.c659
-rw-r--r--src/compiler/nir/nir_constant_expressions.h31
-rw-r--r--src/compiler/nir/nir_constant_expressions.py336
-rw-r--r--src/compiler/nir/nir_control_flow.c808
-rw-r--r--src/compiler/nir/nir_control_flow.h162
-rw-r--r--src/compiler/nir/nir_control_flow_private.h37
-rw-r--r--src/compiler/nir/nir_dominance.c350
-rw-r--r--src/compiler/nir/nir_from_ssa.c805
-rw-r--r--src/compiler/nir/nir_gs_count_vertices.c93
-rw-r--r--src/compiler/nir/nir_instr_set.c519
-rw-r--r--src/compiler/nir/nir_instr_set.h62
-rw-r--r--src/compiler/nir/nir_intrinsics.c49
-rw-r--r--src/compiler/nir/nir_intrinsics.h316
-rw-r--r--src/compiler/nir/nir_liveness.c297
-rw-r--r--src/compiler/nir/nir_lower_alu_to_scalar.c210
-rw-r--r--src/compiler/nir/nir_lower_atomics.c166
-rw-r--r--src/compiler/nir/nir_lower_clip.c339
-rw-r--r--src/compiler/nir/nir_lower_global_vars_to_local.c113
-rw-r--r--src/compiler/nir/nir_lower_gs_intrinsics.c219
-rw-r--r--src/compiler/nir/nir_lower_idiv.c151
-rw-r--r--src/compiler/nir/nir_lower_io.c350
-rw-r--r--src/compiler/nir/nir_lower_load_const_to_scalar.c89
-rw-r--r--src/compiler/nir/nir_lower_locals_to_regs.c396
-rw-r--r--src/compiler/nir/nir_lower_outputs_to_temporaries.c133
-rw-r--r--src/compiler/nir/nir_lower_phis_to_scalar.c293
-rw-r--r--src/compiler/nir/nir_lower_samplers.c187
-rw-r--r--src/compiler/nir/nir_lower_system_values.c98
-rw-r--r--src/compiler/nir/nir_lower_tex.c355
-rw-r--r--src/compiler/nir/nir_lower_to_source_mods.c196
-rw-r--r--src/compiler/nir/nir_lower_two_sided_color.c212
-rw-r--r--src/compiler/nir/nir_lower_var_copies.c190
-rw-r--r--src/compiler/nir/nir_lower_vars_to_ssa.c973
-rw-r--r--src/compiler/nir/nir_lower_vec_to_movs.c310
-rw-r--r--src/compiler/nir/nir_metadata.c90
-rw-r--r--src/compiler/nir/nir_move_vec_src_uses_to_dest.c197
-rw-r--r--src/compiler/nir/nir_normalize_cubemap_coords.c120
-rw-r--r--src/compiler/nir/nir_opcodes.py668
-rw-r--r--src/compiler/nir/nir_opcodes_c.py55
-rw-r--r--src/compiler/nir/nir_opcodes_h.py47
-rw-r--r--src/compiler/nir/nir_opt_algebraic.py285
-rw-r--r--src/compiler/nir/nir_opt_constant_folding.c201
-rw-r--r--src/compiler/nir/nir_opt_copy_propagate.c290
-rw-r--r--src/compiler/nir/nir_opt_cse.c93
-rw-r--r--src/compiler/nir/nir_opt_dce.c183
-rw-r--r--src/compiler/nir/nir_opt_dead_cf.c358
-rw-r--r--src/compiler/nir/nir_opt_gcm.c494
-rw-r--r--src/compiler/nir/nir_opt_global_to_local.c102
-rw-r--r--src/compiler/nir/nir_opt_peephole_select.c256
-rw-r--r--src/compiler/nir/nir_opt_remove_phis.c130
-rw-r--r--src/compiler/nir/nir_opt_undef.c104
-rw-r--r--src/compiler/nir/nir_print.c1069
-rw-r--r--src/compiler/nir/nir_remove_dead_variables.c141
-rw-r--r--src/compiler/nir/nir_search.c379
-rw-r--r--src/compiler/nir/nir_search.h99
-rw-r--r--src/compiler/nir/nir_split_var_copies.c285
-rw-r--r--src/compiler/nir/nir_sweep.c173
-rw-r--r--src/compiler/nir/nir_to_ssa.c536
-rw-r--r--src/compiler/nir/nir_validate.c1071
-rw-r--r--src/compiler/nir/nir_vla.h54
-rw-r--r--src/compiler/nir/nir_worklist.c144
-rw-r--r--src/compiler/nir/nir_worklist.h91
-rw-r--r--src/compiler/nir/tests/control_flow_tests.cpp148
76 files changed, 24404 insertions, 0 deletions
diff --git a/src/compiler/Android.gen.mk b/src/compiler/Android.gen.mk
new file mode 100644
index 00000000000..fcd5f94d459
--- /dev/null
+++ b/src/compiler/Android.gen.mk
@@ -0,0 +1,91 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <[email protected]>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# included by glsl Android.mk for source generation
+
+ifeq ($(LOCAL_MODULE_CLASS),)
+LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+endif
+
+intermediates := $(call local-generated-sources-dir)
+
+LOCAL_SRC_FILES := $(LOCAL_SRC_FILES)
+
+LOCAL_C_INCLUDES += \
+ $(intermediates)/nir \
+ $(MESA_TOP)/src/compiler/nir
+
+LOCAL_EXPORT_C_INCLUDE_DIRS += \
+ $(intermediates)/nir \
+ $(MESA_TOP)/src/compiler/nir
+
+LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \
+ $(NIR_GENERATED_FILES))
+
+
+nir_builder_opcodes_gen := $(LOCAL_PATH)/nir/nir_builder_opcodes_h.py
+nir_builder_opcodes_deps := \
+ $(LOCAL_PATH)/nir/nir_opcodes.py \
+ $(LOCAL_PATH)/nir/nir_builder_opcodes_h.py
+
+$(intermediates)/nir/nir_builder_opcodes.h: $(nir_builder_opcodes_deps)
+ @mkdir -p $(dir $@)
+ $(hide) $(MESA_PYTHON2) $(nir_builder_opcodes_gen) $< > $@
+
+nir_constant_expressions_gen := $(LOCAL_PATH)/nir/nir_constant_expressions.py
+nir_constant_expressions_deps := \
+ $(LOCAL_PATH)/nir/nir_opcodes.py \
+ $(LOCAL_PATH)/nir/nir_constant_expressions.py
+
+$(intermediates)/nir/nir_constant_expressions.c: $(nir_constant_expressions_deps)
+ @mkdir -p $(dir $@)
+ $(hide) $(MESA_PYTHON2) $(nir_constant_expressions_gen) $< > $@
+
+nir_opcodes_h_gen := $(LOCAL_PATH)/nir/nir_opcodes_h.py
+nir_opcodes_h_deps := \
+ $(LOCAL_PATH)/nir/nir_opcodes.py \
+ $(LOCAL_PATH)/nir/nir_opcodes_h.py
+
+$(intermediates)/nir/nir_opcodes.h: $(nir_opcodes_h_deps)
+ @mkdir -p $(dir $@)
+ $(hide) $(MESA_PYTHON2) $(nir_opcodes_h_gen) $< > $@
+
+$(LOCAL_PATH)/nir/nir.h: $(intermediates)/nir/nir_opcodes.h
+
+nir_opcodes_c_gen := $(LOCAL_PATH)/nir/nir_opcodes_c.py
+nir_opcodes_c_deps := \
+ $(LOCAL_PATH)/nir/nir_opcodes.py \
+ $(LOCAL_PATH)/nir/nir_opcodes_c.py
+
+$(intermediates)/nir/nir_opcodes.c: $(nir_opcodes_c_deps)
+ @mkdir -p $(dir $@)
+ $(hide) $(MESA_PYTHON2) $(nir_opcodes_c_gen) $< > $@
+
+nir_opt_algebraic_gen := $(LOCAL_PATH)/nir/nir_opt_algebraic.py
+nir_opt_algebraic_deps := \
+ $(LOCAL_PATH)/nir/nir_opt_algebraic.py \
+ $(LOCAL_PATH)/nir/nir_algebraic.py
+
+$(intermediates)/nir/nir_opt_algebraic.c: $(nir_opt_algebraic_deps)
+ @mkdir -p $(dir $@)
+ $(hide) $(MESA_PYTHON2) $(nir_opt_algebraic_gen) $< > $@
diff --git a/src/compiler/Android.mk b/src/compiler/Android.mk
index 8ebd49778ef..888780ba3fb 100644
--- a/src/compiler/Android.mk
+++ b/src/compiler/Android.mk
@@ -42,3 +42,26 @@ LOCAL_MODULE := libmesa_compiler
include $(MESA_COMMON_MK)
include $(BUILD_STATIC_LIBRARY)
+
+# ---------------------------------------
+# Build libmesa_nir
+# ---------------------------------------
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+ $(NIR_FILES)
+
+LOCAL_C_INCLUDES := \
+ $(MESA_TOP)/src/mapi \
+ $(MESA_TOP)/src/mesa \
+ $(MESA_TOP)/src/gallium/include \
+ $(MESA_TOP)/src/gallium/auxiliary
+
+LOCAL_STATIC_LIBRARIES := libmesa_compiler
+
+LOCAL_MODULE := libmesa_nir
+
+include $(LOCAL_PATH)/Android.gen.mk
+include $(MESA_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/src/compiler/Makefile.am b/src/compiler/Makefile.am
index fd1dd4b6d30..1e3778df8d5 100644
--- a/src/compiler/Makefile.am
+++ b/src/compiler/Makefile.am
@@ -29,6 +29,7 @@ AM_CPPFLAGS = \
-I$(top_srcdir)/src/mesa/ \
-I$(top_srcdir)/src/gallium/include \
-I$(top_srcdir)/src/gallium/auxiliary \
+ -I$(top_srcdir)/src/gtest/include \
$(DEFINES)
AM_CFLAGS = \
@@ -43,4 +44,81 @@ noinst_LTLIBRARIES = libcompiler.la
libcompiler_la_SOURCES = $(LIBCOMPILER_FILES)
+check_PROGRAMS =
+TESTS =
+BUILT_SOURCES =
+CLEANFILES =
EXTRA_DIST = SConscript
+
+
+noinst_LTLIBRARIES += nir/libnir.la
+
+nir_libnir_la_CPPFLAGS = \
+ $(AM_CPPFLAGS) \
+ -I$(top_builddir)/src/compiler/nir \
+ -I$(top_srcdir)/src/compiler/nir
+
+nir_libnir_la_LIBADD = \
+ libcompiler.la
+
+nir_libnir_la_SOURCES = \
+ $(NIR_FILES) \
+ $(NIR_GENERATED_FILES)
+
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
+
+nir/nir_builder_opcodes.h: nir/nir_opcodes.py nir/nir_builder_opcodes_h.py
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/nir/nir_builder_opcodes_h.py > $@ || ($(RM) $@; false)
+
+nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/nir/nir_constant_expressions.py > $@ || ($(RM) $@; false)
+
+nir/nir_opcodes.h: nir/nir_opcodes.py nir/nir_opcodes_h.py
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_h.py > $@ || ($(RM) $@; false)
+
+nir/nir_opcodes.c: nir/nir_opcodes.py nir/nir_opcodes_c.py
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_c.py > $@ || ($(RM) $@; false)
+
+nir/nir_opt_algebraic.c: nir/nir_opt_algebraic.py nir/nir_algebraic.py
+ $(MKDIR_GEN)
+ $(PYTHON_GEN) $(srcdir)/nir/nir_opt_algebraic.py > $@ || ($(RM) $@; false)
+
+
+check_PROGRAMS += nir/tests/control_flow_tests
+
+nir_tests_control_flow_tests_CPPFLAGS = \
+ $(AM_CPPFLAGS) \
+ -I$(top_builddir)/src/compiler/nir \
+ -I$(top_srcdir)/src/compiler/nir
+
+nir_tests_control_flow_tests_SOURCES = \
+ nir/tests/control_flow_tests.cpp
+nir_tests_control_flow_tests_CFLAGS = \
+ $(PTHREAD_CFLAGS)
+nir_tests_control_flow_tests_LDADD = \
+ $(top_builddir)/src/gtest/libgtest.la \
+ $(top_builddir)/src/compiler/nir/libnir.la \
+ $(top_builddir)/src/util/libmesautil.la \
+ $(PTHREAD_LIBS)
+
+
+TESTS += nir/tests/control_flow_tests
+
+
+BUILT_SOURCES += $(NIR_GENERATED_FILES)
+CLEANFILES += $(NIR_GENERATED_FILES)
+
+EXTRA_DIST += \
+ nir/nir_algebraic.py \
+ nir/nir_builder_opcodes_h.py \
+ nir/nir_constant_expressions.py \
+ nir/nir_opcodes.py \
+ nir/nir_opcodes_c.py \
+ nir/nir_opcodes_h.py \
+ nir/nir_opt_algebraic.py \
+ nir/tests \
+ nir/Makefile.sources
diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index e1228cafa21..fe12e419afb 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -6,3 +6,74 @@ LIBCOMPILER_FILES = \
nir_types.h \
shader_enums.c \
shader_enums.h
+
+NIR_GENERATED_FILES = \
+ nir/nir_builder_opcodes.h \
+ nir/nir_constant_expressions.c \
+ nir/nir_opcodes.c \
+ nir/nir_opcodes.h \
+ nir/nir_opt_algebraic.c
+
+NIR_FILES = \
+ nir/glsl_to_nir.cpp \
+ nir/glsl_to_nir.h \
+ nir/nir.c \
+ nir/nir.h \
+ nir/nir_array.h \
+ nir/nir_builder.h \
+ nir/nir_clone.c \
+ nir/nir_constant_expressions.h \
+ nir/nir_control_flow.c \
+ nir/nir_control_flow.h \
+ nir/nir_control_flow_private.h \
+ nir/nir_dominance.c \
+ nir/nir_from_ssa.c \
+ nir/nir_gs_count_vertices.c \
+ nir/nir_intrinsics.c \
+ nir/nir_intrinsics.h \
+ nir/nir_instr_set.c \
+ nir/nir_instr_set.h \
+ nir/nir_liveness.c \
+ nir/nir_lower_alu_to_scalar.c \
+ nir/nir_lower_atomics.c \
+ nir/nir_lower_clip.c \
+ nir/nir_lower_global_vars_to_local.c \
+ nir/nir_lower_gs_intrinsics.c \
+ nir/nir_lower_load_const_to_scalar.c \
+ nir/nir_lower_locals_to_regs.c \
+ nir/nir_lower_idiv.c \
+ nir/nir_lower_io.c \
+ nir/nir_lower_outputs_to_temporaries.c \
+ nir/nir_lower_phis_to_scalar.c \
+ nir/nir_lower_samplers.c \
+ nir/nir_lower_system_values.c \
+ nir/nir_lower_tex.c \
+ nir/nir_lower_to_source_mods.c \
+ nir/nir_lower_two_sided_color.c \
+ nir/nir_lower_vars_to_ssa.c \
+ nir/nir_lower_var_copies.c \
+ nir/nir_lower_vec_to_movs.c \
+ nir/nir_metadata.c \
+ nir/nir_move_vec_src_uses_to_dest.c \
+ nir/nir_normalize_cubemap_coords.c \
+ nir/nir_opt_constant_folding.c \
+ nir/nir_opt_copy_propagate.c \
+ nir/nir_opt_cse.c \
+ nir/nir_opt_dce.c \
+ nir/nir_opt_dead_cf.c \
+ nir/nir_opt_gcm.c \
+ nir/nir_opt_global_to_local.c \
+ nir/nir_opt_peephole_select.c \
+ nir/nir_opt_remove_phis.c \
+ nir/nir_opt_undef.c \
+ nir/nir_print.c \
+ nir/nir_remove_dead_variables.c \
+ nir/nir_search.c \
+ nir/nir_search.h \
+ nir/nir_split_var_copies.c \
+ nir/nir_sweep.c \
+ nir/nir_to_ssa.c \
+ nir/nir_validate.c \
+ nir/nir_vla.h \
+ nir/nir_worklist.c \
+ nir/nir_worklist.h
diff --git a/src/compiler/nir/.gitignore b/src/compiler/nir/.gitignore
new file mode 100644
index 00000000000..64828eba6d3
--- /dev/null
+++ b/src/compiler/nir/.gitignore
@@ -0,0 +1,5 @@
+nir_builder_opcodes.h
+nir_opt_algebraic.c
+nir_opcodes.c
+nir_opcodes.h
+nir_constant_expressions.c
diff --git a/src/compiler/nir/README b/src/compiler/nir/README
new file mode 100644
index 00000000000..2c81db9db61
--- /dev/null
+++ b/src/compiler/nir/README
@@ -0,0 +1,118 @@
+New IR, or NIR, is an IR for Mesa intended to sit below GLSL IR and Mesa IR.
+Its design inherits from the various IR's that Mesa has used in the past, as
+well as Direct3D assembly, and it includes a few new ideas as well. It is a
+flat (in terms of using instructions instead of expressions), typeless IR,
+similar to TGSI and Mesa IR. It also supports SSA (although it doesn't require
+it).
+
+Variables
+=========
+
+NIR includes support for source-level GLSL variables through a structure mostly
+copied from GLSL IR. These will be used for linking and conversion from GLSL IR
+(and later, from an AST), but for the most part, they will be lowered to
+registers (see below) and loads/stores.
+
+Registers
+=========
+
+Registers are light-weight; they consist of a structure that only contains its
+size, its index for liveness analysis, and an optional name for debugging. In
+addition, registers can be local to a function or global to the entire shader;
+the latter will be used in ARB_shader_subroutine for passing parameters and
+getting return values from subroutines. Registers can also be an array, in which
+case they can be accessed indirectly. Each ALU instruction (add, subtract, etc.)
+works directly with registers or SSA values (see below).
+
+SSA
+========
+
+Everywhere a register can be loaded/stored, an SSA value can be used instead.
+The only exception is that arrays/indirect addressing are not supported with
+SSA; although research has been done on extensions of SSA to arrays before, it's
+usually for the purpose of parallelization (which we're not interested in), and
+adds some overhead in the form of adding copies or extra arrays (which is much
+more expensive than introducing copies between non-array registers). SSA uses
+point directly to their corresponding definition, which in turn points to the
+instruction it is part of. This creates an implicit use-def chain and avoids the
+need for an external structure for each SSA register.
+
+Functions
+=========
+
+Support for function calls is mostly similar to GLSL IR. Each shader contains a
+list of functions, and each function has a list of overloads. Each overload
+contains a list of parameters, and may contain an implementation which specifies
+the variables that correspond to the parameters and return value. Inlining a
+function, assuming it has a single return point, is as simple as copying its
+instructions, registers, and local variables into the target function and then
+inserting copies to and from the new parameters as appropriate. After functions
+are inlined and any non-subroutine functions are deleted, parameters and return
+variables will be converted to global variables and then global registers. We
+don't do this lowering earlier (i.e. the fortranizer idea) for a few reasons:
+
+- If we want to do optimizations before link time, we need to have the function
+signature available during link-time.
+
+- If we do any inlining before link time, then we might wind up with the
+inlined function and the non-inlined function using the same global
+variables/registers which would preclude optimization.
+
+Intrinsics
+=========
+
+Any operation (other than function calls and textures) which touches a variable
+or is not referentially transparent is represented by an intrinsic. Intrinsics
+are similar to the idea of a "builtin function," i.e. a function declaration
+whose implementation is provided by the backend, except they are more powerful
+in the following ways:
+
+- They can also load and store registers when appropriate, which limits the
+number of variables needed in later stages of the IR while obviating the need
+for a separate load/store variable instruction.
+
+- Intrinsics can be marked as side-effect free, which permits them to be
+treated like any other instruction when it comes to optimizations. This allows
+load intrinsics to be represented as intrinsics while still being optimized
+away by dead code elimination, common subexpression elimination, etc.
+
+Intrinsics are used for:
+
+- Atomic operations
+- Memory barriers
+- Subroutine calls
+- Geometry shader emitVertex and endPrimitive
+- Loading and storing variables (before lowering)
+- Loading and storing uniforms, shader inputs and outputs, etc (after lowering)
+- Copying variables (cases where in GLSL the destination is a structure or
+array)
+- The kitchen sink
+- ...
+
+Textures
+=========
+
+Unfortunately, there are far too many texture operations to represent each one
+of them with an intrinsic, so there's a special texture instruction similar to
+the GLSL IR one. The biggest difference is that, while the texture instruction
+has a sampler dereference field used just like in GLSL IR, this gets lowered to
+a texture unit index (with a possible indirect offset) while the type
+information of the original sampler is kept around for backends. Also, all the
+non-constant sources are stored in a single array to make it easier for
+optimization passes to iterate over all the sources.
+
+Control Flow
+=========
+
+Like in GLSL IR, control flow consists of a tree of "control flow nodes", which
+include if statements and loops, and jump instructions (break, continue, and
+return). Unlike GLSL IR, though, the leaves of the tree aren't statements but
+basic blocks. Each basic block also keeps track of its successors and
+predecessors, and function implementations keep track of the beginning basic
+block (the first basic block of the function) and the ending basic block (a fake
+basic block that every return statement points to). Together, these elements
+make up the control flow graph, in this case a redundant piece of information on
+top of the control flow tree that will be used by almost all the optimizations.
+There are helper functions to add and remove control flow nodes that also update
+the control flow graph, and so usually it doesn't need to be touched by passes
+that modify control flow nodes.
diff --git a/src/compiler/nir/glsl_to_nir.cpp b/src/compiler/nir/glsl_to_nir.cpp
new file mode 100644
index 00000000000..33b1f5c7b9e
--- /dev/null
+++ b/src/compiler/nir/glsl_to_nir.cpp
@@ -0,0 +1,2031 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott ([email protected])
+ *
+ */
+
+#include "glsl_to_nir.h"
+#include "nir_control_flow.h"
+#include "nir_builder.h"
+#include "glsl/ir_visitor.h"
+#include "glsl/ir_hierarchical_visitor.h"
+#include "glsl/ir.h"
+#include "main/imports.h"
+
+/*
+ * pass to lower GLSL IR to NIR
+ *
+ * This will lower variable dereferences to loads/stores of corresponding
+ * variables in NIR - the variables will be converted to registers in a later
+ * pass.
+ */
+
+namespace {
+
+class nir_visitor : public ir_visitor
+{
+public:
+ nir_visitor(nir_shader *shader);
+ ~nir_visitor();
+
+ virtual void visit(ir_variable *);
+ virtual void visit(ir_function *);
+ virtual void visit(ir_function_signature *);
+ virtual void visit(ir_loop *);
+ virtual void visit(ir_if *);
+ virtual void visit(ir_discard *);
+ virtual void visit(ir_loop_jump *);
+ virtual void visit(ir_return *);
+ virtual void visit(ir_call *);
+ virtual void visit(ir_assignment *);
+ virtual void visit(ir_emit_vertex *);
+ virtual void visit(ir_end_primitive *);
+ virtual void visit(ir_expression *);
+ virtual void visit(ir_swizzle *);
+ virtual void visit(ir_texture *);
+ virtual void visit(ir_constant *);
+ virtual void visit(ir_dereference_variable *);
+ virtual void visit(ir_dereference_record *);
+ virtual void visit(ir_dereference_array *);
+ virtual void visit(ir_barrier *);
+
+ void create_function(ir_function_signature *ir);
+
+private:
+ void add_instr(nir_instr *instr, unsigned num_components);
+ nir_ssa_def *evaluate_rvalue(ir_rvalue *ir);
+
+ nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def **srcs);
+ nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1);
+ nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1,
+ nir_ssa_def *src2);
+ nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1,
+ nir_ssa_def *src2, nir_ssa_def *src3);
+
+ bool supports_ints;
+
+ nir_shader *shader;
+ nir_function_impl *impl;
+ nir_builder b;
+ nir_ssa_def *result; /* result of the expression tree last visited */
+
+ nir_deref_var *evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir);
+
+ /* the head of the dereference chain we're creating */
+ nir_deref_var *deref_head;
+ /* the tail of the dereference chain we're creating */
+ nir_deref *deref_tail;
+
+ nir_variable *var; /* variable created by ir_variable visitor */
+
+ /* whether the IR we're operating on is per-function or global */
+ bool is_global;
+
+ /* map of ir_variable -> nir_variable */
+ struct hash_table *var_table;
+
+ /* map of ir_function_signature -> nir_function_overload */
+ struct hash_table *overload_table;
+};
+
+/*
+ * This visitor runs before the main visitor, calling create_function() for
+ * each function so that the main visitor can resolve forward references in
+ * calls.
+ */
+
+class nir_function_visitor : public ir_hierarchical_visitor
+{
+public:
+ nir_function_visitor(nir_visitor *v) : visitor(v)
+ {
+ }
+ virtual ir_visitor_status visit_enter(ir_function *);
+
+private:
+ nir_visitor *visitor;
+};
+
+}; /* end of anonymous namespace */
+
+nir_shader *
+glsl_to_nir(const struct gl_shader_program *shader_prog,
+ gl_shader_stage stage,
+ const nir_shader_compiler_options *options)
+{
+ struct gl_shader *sh = shader_prog->_LinkedShaders[stage];
+
+ nir_shader *shader = nir_shader_create(NULL, stage, options);
+
+ nir_visitor v1(shader);
+ nir_function_visitor v2(&v1);
+ v2.run(sh->ir);
+ visit_exec_list(sh->ir, &v1);
+
+ nir_lower_outputs_to_temporaries(shader);
+
+ shader->info.name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name);
+ if (shader_prog->Label)
+ shader->info.label = ralloc_strdup(shader, shader_prog->Label);
+ shader->info.num_textures = _mesa_fls(sh->Program->SamplersUsed);
+ shader->info.num_ubos = sh->NumUniformBlocks;
+ shader->info.num_abos = shader_prog->NumAtomicBuffers;
+ shader->info.num_ssbos = sh->NumShaderStorageBlocks;
+ shader->info.num_images = sh->NumImages;
+ shader->info.inputs_read = sh->Program->InputsRead;
+ shader->info.outputs_written = sh->Program->OutputsWritten;
+ shader->info.patch_inputs_read = sh->Program->PatchInputsRead;
+ shader->info.patch_outputs_written = sh->Program->PatchOutputsWritten;
+ shader->info.system_values_read = sh->Program->SystemValuesRead;
+ shader->info.uses_texture_gather = sh->Program->UsesGather;
+ shader->info.uses_clip_distance_out =
+ sh->Program->ClipDistanceArraySize != 0;
+ shader->info.separate_shader = shader_prog->SeparateShader;
+ shader->info.has_transform_feedback_varyings =
+ shader_prog->TransformFeedback.NumVarying > 0;
+
+ switch (stage) {
+ case MESA_SHADER_TESS_CTRL:
+ shader->info.tcs.vertices_out = shader_prog->TessCtrl.VerticesOut;
+ break;
+
+ case MESA_SHADER_GEOMETRY:
+ shader->info.gs.vertices_in = shader_prog->Geom.VerticesIn;
+ shader->info.gs.output_primitive = sh->Geom.OutputType;
+ shader->info.gs.vertices_out = sh->Geom.VerticesOut;
+ shader->info.gs.invocations = sh->Geom.Invocations;
+ shader->info.gs.uses_end_primitive = shader_prog->Geom.UsesEndPrimitive;
+ shader->info.gs.uses_streams = shader_prog->Geom.UsesStreams;
+ break;
+
+ case MESA_SHADER_FRAGMENT: {
+ struct gl_fragment_program *fp =
+ (struct gl_fragment_program *)sh->Program;
+
+ shader->info.fs.uses_discard = fp->UsesKill;
+ shader->info.fs.early_fragment_tests = sh->EarlyFragmentTests;
+ shader->info.fs.depth_layout = fp->FragDepthLayout;
+ break;
+ }
+
+ case MESA_SHADER_COMPUTE: {
+ struct gl_compute_program *cp = (struct gl_compute_program *)sh->Program;
+ shader->info.cs.local_size[0] = cp->LocalSize[0];
+ shader->info.cs.local_size[1] = cp->LocalSize[1];
+ shader->info.cs.local_size[2] = cp->LocalSize[2];
+ break;
+ }
+
+ default:
+ break; /* No stage-specific info */
+ }
+
+ return shader;
+}
+
+nir_visitor::nir_visitor(nir_shader *shader)
+{
+ this->supports_ints = shader->options->native_integers;
+ this->shader = shader;
+ this->is_global = true;
+ this->var_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ this->overload_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+}
+
+nir_visitor::~nir_visitor()
+{
+ _mesa_hash_table_destroy(this->var_table, NULL);
+ _mesa_hash_table_destroy(this->overload_table, NULL);
+}
+
+nir_deref_var *
+nir_visitor::evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir)
+{
+ ir->accept(this);
+ ralloc_steal(mem_ctx, this->deref_head);
+ return this->deref_head;
+}
+
+static nir_constant *
+constant_copy(ir_constant *ir, void *mem_ctx)
+{
+ if (ir == NULL)
+ return NULL;
+
+ nir_constant *ret = ralloc(mem_ctx, nir_constant);
+
+ unsigned total_elems = ir->type->components();
+ unsigned i;
+
+ ret->num_elements = 0;
+ switch (ir->type->base_type) {
+ case GLSL_TYPE_UINT:
+ for (i = 0; i < total_elems; i++)
+ ret->value.u[i] = ir->value.u[i];
+ break;
+
+ case GLSL_TYPE_INT:
+ for (i = 0; i < total_elems; i++)
+ ret->value.i[i] = ir->value.i[i];
+ break;
+
+ case GLSL_TYPE_FLOAT:
+ for (i = 0; i < total_elems; i++)
+ ret->value.f[i] = ir->value.f[i];
+ break;
+
+ case GLSL_TYPE_BOOL:
+ for (i = 0; i < total_elems; i++)
+ ret->value.b[i] = ir->value.b[i];
+ break;
+
+ case GLSL_TYPE_STRUCT:
+ ret->elements = ralloc_array(mem_ctx, nir_constant *,
+ ir->type->length);
+ ret->num_elements = ir->type->length;
+
+ i = 0;
+ foreach_in_list(ir_constant, field, &ir->components) {
+ ret->elements[i] = constant_copy(field, mem_ctx);
+ i++;
+ }
+ break;
+
+ case GLSL_TYPE_ARRAY:
+ ret->elements = ralloc_array(mem_ctx, nir_constant *,
+ ir->type->length);
+ ret->num_elements = ir->type->length;
+
+ for (i = 0; i < ir->type->length; i++)
+ ret->elements[i] = constant_copy(ir->array_elements[i], mem_ctx);
+ break;
+
+ default:
+ unreachable("not reached");
+ }
+
+ return ret;
+}
+
+void
+nir_visitor::visit(ir_variable *ir)
+{
+ nir_variable *var = ralloc(shader, nir_variable);
+ var->type = ir->type;
+ var->name = ralloc_strdup(var, ir->name);
+
+ var->data.read_only = ir->data.read_only;
+ var->data.centroid = ir->data.centroid;
+ var->data.sample = ir->data.sample;
+ var->data.patch = ir->data.patch;
+ var->data.invariant = ir->data.invariant;
+ var->data.location = ir->data.location;
+
+ switch(ir->data.mode) {
+ case ir_var_auto:
+ case ir_var_temporary:
+ if (is_global)
+ var->data.mode = nir_var_global;
+ else
+ var->data.mode = nir_var_local;
+ break;
+
+ case ir_var_function_in:
+ case ir_var_function_out:
+ case ir_var_function_inout:
+ case ir_var_const_in:
+ var->data.mode = nir_var_local;
+ break;
+
+ case ir_var_shader_in:
+ if (shader->stage == MESA_SHADER_FRAGMENT &&
+ ir->data.location == VARYING_SLOT_FACE) {
+ /* For whatever reason, GLSL IR makes gl_FrontFacing an input */
+ var->data.location = SYSTEM_VALUE_FRONT_FACE;
+ var->data.mode = nir_var_system_value;
+ } else if (shader->stage == MESA_SHADER_GEOMETRY &&
+ ir->data.location == VARYING_SLOT_PRIMITIVE_ID) {
+ /* For whatever reason, GLSL IR makes gl_PrimitiveIDIn an input */
+ var->data.location = SYSTEM_VALUE_PRIMITIVE_ID;
+ var->data.mode = nir_var_system_value;
+ } else {
+ var->data.mode = nir_var_shader_in;
+ }
+ break;
+
+ case ir_var_shader_out:
+ var->data.mode = nir_var_shader_out;
+ break;
+
+ case ir_var_uniform:
+ var->data.mode = nir_var_uniform;
+ break;
+
+ case ir_var_shader_storage:
+ var->data.mode = nir_var_shader_storage;
+ break;
+
+ case ir_var_system_value:
+ var->data.mode = nir_var_system_value;
+ break;
+
+ default:
+ unreachable("not reached");
+ }
+
+ var->data.interpolation = ir->data.interpolation;
+ var->data.origin_upper_left = ir->data.origin_upper_left;
+ var->data.pixel_center_integer = ir->data.pixel_center_integer;
+ var->data.explicit_location = ir->data.explicit_location;
+ var->data.explicit_index = ir->data.explicit_index;
+ var->data.explicit_binding = ir->data.explicit_binding;
+ var->data.has_initializer = ir->data.has_initializer;
+ var->data.location_frac = ir->data.location_frac;
+ var->data.from_named_ifc_block_array = ir->data.from_named_ifc_block_array;
+ var->data.from_named_ifc_block_nonarray = ir->data.from_named_ifc_block_nonarray;
+
+ switch (ir->data.depth_layout) {
+ case ir_depth_layout_none:
+ var->data.depth_layout = nir_depth_layout_none;
+ break;
+ case ir_depth_layout_any:
+ var->data.depth_layout = nir_depth_layout_any;
+ break;
+ case ir_depth_layout_greater:
+ var->data.depth_layout = nir_depth_layout_greater;
+ break;
+ case ir_depth_layout_less:
+ var->data.depth_layout = nir_depth_layout_less;
+ break;
+ case ir_depth_layout_unchanged:
+ var->data.depth_layout = nir_depth_layout_unchanged;
+ break;
+ default:
+ unreachable("not reached");
+ }
+
+ var->data.index = ir->data.index;
+ var->data.binding = ir->data.binding;
+ var->data.offset = ir->data.offset;
+ var->data.image.read_only = ir->data.image_read_only;
+ var->data.image.write_only = ir->data.image_write_only;
+ var->data.image.coherent = ir->data.image_coherent;
+ var->data.image._volatile = ir->data.image_volatile;
+ var->data.image.restrict_flag = ir->data.image_restrict;
+ var->data.image.format = ir->data.image_format;
+ var->data.max_array_access = ir->data.max_array_access;
+
+ var->num_state_slots = ir->get_num_state_slots();
+ if (var->num_state_slots > 0) {
+ var->state_slots = ralloc_array(var, nir_state_slot,
+ var->num_state_slots);
+
+ ir_state_slot *state_slots = ir->get_state_slots();
+ for (unsigned i = 0; i < var->num_state_slots; i++) {
+ for (unsigned j = 0; j < 5; j++)
+ var->state_slots[i].tokens[j] = state_slots[i].tokens[j];
+ var->state_slots[i].swizzle = state_slots[i].swizzle;
+ }
+ } else {
+ var->state_slots = NULL;
+ }
+
+ var->constant_initializer = constant_copy(ir->constant_initializer, var);
+
+ var->interface_type = ir->get_interface_type();
+
+ if (var->data.mode == nir_var_local)
+ nir_function_impl_add_variable(impl, var);
+ else
+ nir_shader_add_variable(shader, var);
+
+ _mesa_hash_table_insert(var_table, ir, var);
+ this->var = var;
+}
+
+ir_visitor_status
+nir_function_visitor::visit_enter(ir_function *ir)
+{
+ foreach_in_list(ir_function_signature, sig, &ir->signatures) {
+ visitor->create_function(sig);
+ }
+ return visit_continue_with_parent;
+}
+
+void
+nir_visitor::create_function(ir_function_signature *ir)
+{
+ if (ir->is_intrinsic)
+ return;
+
+ nir_function *func = nir_function_create(shader, ir->function_name());
+
+ unsigned num_params = ir->parameters.length();
+ func->num_params = num_params;
+ func->params = ralloc_array(shader, nir_parameter, num_params);
+
+ unsigned i = 0;
+ foreach_in_list(ir_variable, param, &ir->parameters) {
+ switch (param->data.mode) {
+ case ir_var_function_in:
+ func->params[i].param_type = nir_parameter_in;
+ break;
+
+ case ir_var_function_out:
+ func->params[i].param_type = nir_parameter_out;
+ break;
+
+ case ir_var_function_inout:
+ func->params[i].param_type = nir_parameter_inout;
+ break;
+
+ default:
+ unreachable("not reached");
+ }
+
+ func->params[i].type = param->type;
+ i++;
+ }
+
+ func->return_type = ir->return_type;
+
+ _mesa_hash_table_insert(this->overload_table, ir, func);
+}
+
+void
+nir_visitor::visit(ir_function *ir)
+{
+ foreach_in_list(ir_function_signature, sig, &ir->signatures)
+ sig->accept(this);
+}
+
+void
+nir_visitor::visit(ir_function_signature *ir)
+{
+ if (ir->is_intrinsic)
+ return;
+
+ struct hash_entry *entry =
+ _mesa_hash_table_search(this->overload_table, ir);
+
+ assert(entry);
+ nir_function *func = (nir_function *) entry->data;
+
+ if (ir->is_defined) {
+ nir_function_impl *impl = nir_function_impl_create(func);
+ this->impl = impl;
+
+ unsigned num_params = func->num_params;
+ impl->num_params = num_params;
+ impl->params = ralloc_array(this->shader, nir_variable *, num_params);
+ unsigned i = 0;
+ foreach_in_list(ir_variable, param, &ir->parameters) {
+ param->accept(this);
+ impl->params[i] = this->var;
+ i++;
+ }
+
+ if (func->return_type == glsl_type::void_type) {
+ impl->return_var = NULL;
+ } else {
+ impl->return_var = ralloc(this->shader, nir_variable);
+ impl->return_var->name = ralloc_strdup(impl->return_var,
+ "return_var");
+ impl->return_var->type = func->return_type;
+ }
+
+ this->is_global = false;
+
+ nir_builder_init(&b, impl);
+ b.cursor = nir_after_cf_list(&impl->body);
+ visit_exec_list(&ir->body, this);
+
+ this->is_global = true;
+ } else {
+ func->impl = NULL;
+ }
+}
+
+void
+nir_visitor::visit(ir_loop *ir)
+{
+ nir_loop *loop = nir_loop_create(this->shader);
+ nir_builder_cf_insert(&b, &loop->cf_node);
+
+ b.cursor = nir_after_cf_list(&loop->body);
+ visit_exec_list(&ir->body_instructions, this);
+ b.cursor = nir_after_cf_node(&loop->cf_node);
+}
+
+void
+nir_visitor::visit(ir_if *ir)
+{
+ nir_src condition =
+ nir_src_for_ssa(evaluate_rvalue(ir->condition));
+
+ nir_if *if_stmt = nir_if_create(this->shader);
+ if_stmt->condition = condition;
+ nir_builder_cf_insert(&b, &if_stmt->cf_node);
+
+ b.cursor = nir_after_cf_list(&if_stmt->then_list);
+ visit_exec_list(&ir->then_instructions, this);
+
+ b.cursor = nir_after_cf_list(&if_stmt->else_list);
+ visit_exec_list(&ir->else_instructions, this);
+
+ b.cursor = nir_after_cf_node(&if_stmt->cf_node);
+}
+
+void
+nir_visitor::visit(ir_discard *ir)
+{
+ /*
+ * discards aren't treated as control flow, because before we lower them
+ * they can appear anywhere in the shader and the stuff after them may still
+ * be executed (yay, crazy GLSL rules!). However, after lowering, all the
+ * discards will be immediately followed by a return.
+ */
+
+ nir_intrinsic_instr *discard;
+ if (ir->condition) {
+ discard = nir_intrinsic_instr_create(this->shader,
+ nir_intrinsic_discard_if);
+ discard->src[0] =
+ nir_src_for_ssa(evaluate_rvalue(ir->condition));
+ } else {
+ discard = nir_intrinsic_instr_create(this->shader, nir_intrinsic_discard);
+ }
+
+ nir_builder_instr_insert(&b, &discard->instr);
+}
+
+void
+nir_visitor::visit(ir_emit_vertex *ir)
+{
+ nir_intrinsic_instr *instr =
+ nir_intrinsic_instr_create(this->shader, nir_intrinsic_emit_vertex);
+ instr->const_index[0] = ir->stream_id();
+ nir_builder_instr_insert(&b, &instr->instr);
+}
+
+void
+nir_visitor::visit(ir_end_primitive *ir)
+{
+ nir_intrinsic_instr *instr =
+ nir_intrinsic_instr_create(this->shader, nir_intrinsic_end_primitive);
+ instr->const_index[0] = ir->stream_id();
+ nir_builder_instr_insert(&b, &instr->instr);
+}
+
+void
+nir_visitor::visit(ir_loop_jump *ir)
+{
+ nir_jump_type type;
+ switch (ir->mode) {
+ case ir_loop_jump::jump_break:
+ type = nir_jump_break;
+ break;
+ case ir_loop_jump::jump_continue:
+ type = nir_jump_continue;
+ break;
+ default:
+ unreachable("not reached");
+ }
+
+ nir_jump_instr *instr = nir_jump_instr_create(this->shader, type);
+ nir_builder_instr_insert(&b, &instr->instr);
+}
+
+void
+nir_visitor::visit(ir_return *ir)
+{
+ if (ir->value != NULL) {
+ nir_intrinsic_instr *copy =
+ nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var);
+
+ copy->variables[0] = nir_deref_var_create(copy, this->impl->return_var);
+ copy->variables[1] = evaluate_deref(&copy->instr, ir->value);
+ }
+
+ nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return);
+ nir_builder_instr_insert(&b, &instr->instr);
+}
+
+void
+nir_visitor::visit(ir_call *ir)
+{
+ if (ir->callee->is_intrinsic) {
+ nir_intrinsic_op op;
+ if (strcmp(ir->callee_name(), "__intrinsic_atomic_read") == 0) {
+ op = nir_intrinsic_atomic_counter_read_var;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_increment") == 0) {
+ op = nir_intrinsic_atomic_counter_inc_var;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_predecrement") == 0) {
+ op = nir_intrinsic_atomic_counter_dec_var;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_load") == 0) {
+ op = nir_intrinsic_image_load;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_store") == 0) {
+ op = nir_intrinsic_image_store;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_add") == 0) {
+ op = nir_intrinsic_image_atomic_add;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_min") == 0) {
+ op = nir_intrinsic_image_atomic_min;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_max") == 0) {
+ op = nir_intrinsic_image_atomic_max;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_and") == 0) {
+ op = nir_intrinsic_image_atomic_and;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_or") == 0) {
+ op = nir_intrinsic_image_atomic_or;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_xor") == 0) {
+ op = nir_intrinsic_image_atomic_xor;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_exchange") == 0) {
+ op = nir_intrinsic_image_atomic_exchange;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_comp_swap") == 0) {
+ op = nir_intrinsic_image_atomic_comp_swap;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier") == 0) {
+ op = nir_intrinsic_memory_barrier;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_size") == 0) {
+ op = nir_intrinsic_image_size;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_image_samples") == 0) {
+ op = nir_intrinsic_image_samples;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_store_ssbo") == 0) {
+ op = nir_intrinsic_store_ssbo;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_load_ssbo") == 0) {
+ op = nir_intrinsic_load_ssbo;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_add_ssbo") == 0) {
+ op = nir_intrinsic_ssbo_atomic_add;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_and_ssbo") == 0) {
+ op = nir_intrinsic_ssbo_atomic_and;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_or_ssbo") == 0) {
+ op = nir_intrinsic_ssbo_atomic_or;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_xor_ssbo") == 0) {
+ op = nir_intrinsic_ssbo_atomic_xor;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_min_ssbo") == 0) {
+ assert(ir->return_deref);
+ if (ir->return_deref->type == glsl_type::int_type)
+ op = nir_intrinsic_ssbo_atomic_imin;
+ else if (ir->return_deref->type == glsl_type::uint_type)
+ op = nir_intrinsic_ssbo_atomic_umin;
+ else
+ unreachable("Invalid type");
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_max_ssbo") == 0) {
+ assert(ir->return_deref);
+ if (ir->return_deref->type == glsl_type::int_type)
+ op = nir_intrinsic_ssbo_atomic_imax;
+ else if (ir->return_deref->type == glsl_type::uint_type)
+ op = nir_intrinsic_ssbo_atomic_umax;
+ else
+ unreachable("Invalid type");
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_exchange_ssbo") == 0) {
+ op = nir_intrinsic_ssbo_atomic_exchange;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_comp_swap_ssbo") == 0) {
+ op = nir_intrinsic_ssbo_atomic_comp_swap;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_shader_clock") == 0) {
+ op = nir_intrinsic_shader_clock;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_group_memory_barrier") == 0) {
+ op = nir_intrinsic_group_memory_barrier;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_atomic_counter") == 0) {
+ op = nir_intrinsic_memory_barrier_atomic_counter;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_buffer") == 0) {
+ op = nir_intrinsic_memory_barrier_buffer;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_image") == 0) {
+ op = nir_intrinsic_memory_barrier_image;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_shared") == 0) {
+ op = nir_intrinsic_memory_barrier_shared;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_load_shared") == 0) {
+ op = nir_intrinsic_load_shared;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_store_shared") == 0) {
+ op = nir_intrinsic_store_shared;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_add_shared") == 0) {
+ op = nir_intrinsic_shared_atomic_add;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_and_shared") == 0) {
+ op = nir_intrinsic_shared_atomic_and;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_or_shared") == 0) {
+ op = nir_intrinsic_shared_atomic_or;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_xor_shared") == 0) {
+ op = nir_intrinsic_shared_atomic_xor;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_min_shared") == 0) {
+ assert(ir->return_deref);
+ if (ir->return_deref->type == glsl_type::int_type)
+ op = nir_intrinsic_shared_atomic_imin;
+ else if (ir->return_deref->type == glsl_type::uint_type)
+ op = nir_intrinsic_shared_atomic_umin;
+ else
+ unreachable("Invalid type");
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_max_shared") == 0) {
+ assert(ir->return_deref);
+ if (ir->return_deref->type == glsl_type::int_type)
+ op = nir_intrinsic_shared_atomic_imax;
+ else if (ir->return_deref->type == glsl_type::uint_type)
+ op = nir_intrinsic_shared_atomic_umax;
+ else
+ unreachable("Invalid type");
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_exchange_shared") == 0) {
+ op = nir_intrinsic_shared_atomic_exchange;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_comp_swap_shared") == 0) {
+ op = nir_intrinsic_shared_atomic_comp_swap;
+ } else {
+ unreachable("not reached");
+ }
+
+ nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op);
+ nir_dest *dest = &instr->dest;
+
+ switch (op) {
+ case nir_intrinsic_atomic_counter_read_var:
+ case nir_intrinsic_atomic_counter_inc_var:
+ case nir_intrinsic_atomic_counter_dec_var: {
+ ir_dereference *param =
+ (ir_dereference *) ir->actual_parameters.get_head();
+ instr->variables[0] = evaluate_deref(&instr->instr, param);
+ nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+ nir_builder_instr_insert(&b, &instr->instr);
+ break;
+ }
+ case nir_intrinsic_image_load:
+ case nir_intrinsic_image_store:
+ case nir_intrinsic_image_atomic_add:
+ case nir_intrinsic_image_atomic_min:
+ case nir_intrinsic_image_atomic_max:
+ case nir_intrinsic_image_atomic_and:
+ case nir_intrinsic_image_atomic_or:
+ case nir_intrinsic_image_atomic_xor:
+ case nir_intrinsic_image_atomic_exchange:
+ case nir_intrinsic_image_atomic_comp_swap:
+ case nir_intrinsic_image_samples:
+ case nir_intrinsic_image_size: {
+ nir_ssa_undef_instr *instr_undef =
+ nir_ssa_undef_instr_create(shader, 1);
+ nir_builder_instr_insert(&b, &instr_undef->instr);
+
+ /* Set the image variable dereference. */
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_dereference *image = (ir_dereference *)param;
+ const glsl_type *type =
+ image->variable_referenced()->type->without_array();
+
+ instr->variables[0] = evaluate_deref(&instr->instr, image);
+ param = param->get_next();
+
+ /* Set the intrinsic destination. */
+ if (ir->return_deref) {
+ const nir_intrinsic_info *info =
+ &nir_intrinsic_infos[instr->intrinsic];
+ nir_ssa_dest_init(&instr->instr, &instr->dest,
+ info->dest_components, NULL);
+ }
+
+ if (op == nir_intrinsic_image_size ||
+ op == nir_intrinsic_image_samples) {
+ nir_builder_instr_insert(&b, &instr->instr);
+ break;
+ }
+
+ /* Set the address argument, extending the coordinate vector to four
+ * components.
+ */
+ nir_ssa_def *src_addr =
+ evaluate_rvalue((ir_dereference *)param);
+ nir_ssa_def *srcs[4];
+
+ for (int i = 0; i < 4; i++) {
+ if (i < type->coordinate_components())
+ srcs[i] = nir_channel(&b, src_addr, i);
+ else
+ srcs[i] = &instr_undef->def;
+ }
+
+ instr->src[0] = nir_src_for_ssa(nir_vec(&b, srcs, 4));
+ param = param->get_next();
+
+ /* Set the sample argument, which is undefined for single-sample
+ * images.
+ */
+ if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) {
+ instr->src[1] =
+ nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
+ param = param->get_next();
+ } else {
+ instr->src[1] = nir_src_for_ssa(&instr_undef->def);
+ }
+
+ /* Set the intrinsic parameters. */
+ if (!param->is_tail_sentinel()) {
+ instr->src[2] =
+ nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
+ param = param->get_next();
+ }
+
+ if (!param->is_tail_sentinel()) {
+ instr->src[3] =
+ nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
+ param = param->get_next();
+ }
+ nir_builder_instr_insert(&b, &instr->instr);
+ break;
+ }
+ case nir_intrinsic_memory_barrier:
+ case nir_intrinsic_group_memory_barrier:
+ case nir_intrinsic_memory_barrier_atomic_counter:
+ case nir_intrinsic_memory_barrier_buffer:
+ case nir_intrinsic_memory_barrier_image:
+ case nir_intrinsic_memory_barrier_shared:
+ nir_builder_instr_insert(&b, &instr->instr);
+ break;
+ case nir_intrinsic_shader_clock:
+ nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+ nir_builder_instr_insert(&b, &instr->instr);
+ break;
+ case nir_intrinsic_store_ssbo: {
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
+ assert(write_mask);
+
+ instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val));
+ instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block));
+ instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset));
+ instr->const_index[0] = write_mask->value.u[0];
+ instr->num_components = val->type->vector_elements;
+
+ nir_builder_instr_insert(&b, &instr->instr);
+ break;
+ }
+ case nir_intrinsic_load_ssbo: {
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+ instr->src[0] = nir_src_for_ssa(evaluate_rvalue(block));
+ instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset));
+
+ const glsl_type *type = ir->return_deref->var->type;
+ instr->num_components = type->vector_elements;
+
+ /* Setup destination register */
+ nir_ssa_dest_init(&instr->instr, &instr->dest,
+ type->vector_elements, NULL);
+
+ /* Insert the created nir instruction now since in the case of boolean
+ * result we will need to emit another instruction after it
+ */
+ nir_builder_instr_insert(&b, &instr->instr);
+
+ /*
+ * In SSBO/UBO's, a true boolean value is any non-zero value, but we
+ * consider a true boolean to be ~0. Fix this up with a != 0
+ * comparison.
+ */
+ if (type->base_type == GLSL_TYPE_BOOL) {
+ nir_alu_instr *load_ssbo_compare =
+ nir_alu_instr_create(shader, nir_op_ine);
+ load_ssbo_compare->src[0].src.is_ssa = true;
+ load_ssbo_compare->src[0].src.ssa = &instr->dest.ssa;
+ load_ssbo_compare->src[1].src =
+ nir_src_for_ssa(nir_imm_int(&b, 0));
+ for (unsigned i = 0; i < type->vector_elements; i++)
+ load_ssbo_compare->src[1].swizzle[i] = 0;
+ nir_ssa_dest_init(&load_ssbo_compare->instr,
+ &load_ssbo_compare->dest.dest,
+ type->vector_elements, NULL);
+ load_ssbo_compare->dest.write_mask = (1 << type->vector_elements) - 1;
+ nir_builder_instr_insert(&b, &load_ssbo_compare->instr);
+ dest = &load_ssbo_compare->dest.dest;
+ }
+ break;
+ }
+ case nir_intrinsic_ssbo_atomic_add:
+ case nir_intrinsic_ssbo_atomic_imin:
+ case nir_intrinsic_ssbo_atomic_umin:
+ case nir_intrinsic_ssbo_atomic_imax:
+ case nir_intrinsic_ssbo_atomic_umax:
+ case nir_intrinsic_ssbo_atomic_and:
+ case nir_intrinsic_ssbo_atomic_or:
+ case nir_intrinsic_ssbo_atomic_xor:
+ case nir_intrinsic_ssbo_atomic_exchange:
+ case nir_intrinsic_ssbo_atomic_comp_swap: {
+ int param_count = ir->actual_parameters.length();
+ assert(param_count == 3 || param_count == 4);
+
+ /* Block index */
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_instruction *inst = (ir_instruction *) param;
+ instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+
+ /* Offset */
+ param = param->get_next();
+ inst = (ir_instruction *) param;
+ instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+
+ /* data1 parameter (this is always present) */
+ param = param->get_next();
+ inst = (ir_instruction *) param;
+ instr->src[2] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+
+ /* data2 parameter (only with atomic_comp_swap) */
+ if (param_count == 4) {
+ assert(op == nir_intrinsic_ssbo_atomic_comp_swap);
+ param = param->get_next();
+ inst = (ir_instruction *) param;
+ instr->src[3] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+ }
+
+ /* Atomic result */
+ assert(ir->return_deref);
+ nir_ssa_dest_init(&instr->instr, &instr->dest,
+ ir->return_deref->type->vector_elements, NULL);
+ nir_builder_instr_insert(&b, &instr->instr);
+ break;
+ }
+ case nir_intrinsic_load_shared: {
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+ instr->const_index[0] = 0;
+ instr->src[0] = nir_src_for_ssa(evaluate_rvalue(offset));
+
+ const glsl_type *type = ir->return_deref->var->type;
+ instr->num_components = type->vector_elements;
+
+ /* Setup destination register */
+ nir_ssa_dest_init(&instr->instr, &instr->dest,
+ type->vector_elements, NULL);
+
+ nir_builder_instr_insert(&b, &instr->instr);
+ break;
+ }
+ case nir_intrinsic_store_shared: {
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
+ assert(write_mask);
+
+ instr->const_index[0] = 0;
+ instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset));
+
+ instr->const_index[1] = write_mask->value.u[0];
+
+ instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val));
+ instr->num_components = val->type->vector_elements;
+
+ nir_builder_instr_insert(&b, &instr->instr);
+ break;
+ }
+ case nir_intrinsic_shared_atomic_add:
+ case nir_intrinsic_shared_atomic_imin:
+ case nir_intrinsic_shared_atomic_umin:
+ case nir_intrinsic_shared_atomic_imax:
+ case nir_intrinsic_shared_atomic_umax:
+ case nir_intrinsic_shared_atomic_and:
+ case nir_intrinsic_shared_atomic_or:
+ case nir_intrinsic_shared_atomic_xor:
+ case nir_intrinsic_shared_atomic_exchange:
+ case nir_intrinsic_shared_atomic_comp_swap: {
+ int param_count = ir->actual_parameters.length();
+ assert(param_count == 2 || param_count == 3);
+
+ /* Offset */
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_instruction *inst = (ir_instruction *) param;
+ instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+
+ /* data1 parameter (this is always present) */
+ param = param->get_next();
+ inst = (ir_instruction *) param;
+ instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+
+ /* data2 parameter (only with atomic_comp_swap) */
+ if (param_count == 3) {
+ assert(op == nir_intrinsic_shared_atomic_comp_swap);
+ param = param->get_next();
+ inst = (ir_instruction *) param;
+ instr->src[2] =
+ nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+ }
+
+ /* Atomic result */
+ assert(ir->return_deref);
+ nir_ssa_dest_init(&instr->instr, &instr->dest,
+ ir->return_deref->type->vector_elements, NULL);
+ nir_builder_instr_insert(&b, &instr->instr);
+ break;
+ }
+ default:
+ unreachable("not reached");
+ }
+
+ if (ir->return_deref) {
+ nir_intrinsic_instr *store_instr =
+ nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
+ store_instr->num_components = ir->return_deref->type->vector_elements;
+ store_instr->const_index[0] = (1 << store_instr->num_components) - 1;
+
+ store_instr->variables[0] =
+ evaluate_deref(&store_instr->instr, ir->return_deref);
+ store_instr->src[0] = nir_src_for_ssa(&dest->ssa);
+
+ nir_builder_instr_insert(&b, &store_instr->instr);
+ }
+
+ return;
+ }
+
+ struct hash_entry *entry =
+ _mesa_hash_table_search(this->overload_table, ir->callee);
+ assert(entry);
+ nir_function *callee = (nir_function *) entry->data;
+
+ nir_call_instr *instr = nir_call_instr_create(this->shader, callee);
+
+ unsigned i = 0;
+ foreach_in_list(ir_dereference, param, &ir->actual_parameters) {
+ instr->params[i] = evaluate_deref(&instr->instr, param);
+ i++;
+ }
+
+ instr->return_deref = evaluate_deref(&instr->instr, ir->return_deref);
+ nir_builder_instr_insert(&b, &instr->instr);
+}
+
+void
+nir_visitor::visit(ir_assignment *ir)
+{
+ unsigned num_components = ir->lhs->type->vector_elements;
+
+ if ((ir->rhs->as_dereference() || ir->rhs->as_constant()) &&
+ (ir->write_mask == (1 << num_components) - 1 || ir->write_mask == 0)) {
+ /* We're doing a plain-as-can-be copy, so emit a copy_var */
+ nir_intrinsic_instr *copy =
+ nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var);
+
+ copy->variables[0] = evaluate_deref(&copy->instr, ir->lhs);
+ copy->variables[1] = evaluate_deref(&copy->instr, ir->rhs);
+
+ if (ir->condition) {
+ nir_if *if_stmt = nir_if_create(this->shader);
+ if_stmt->condition = nir_src_for_ssa(evaluate_rvalue(ir->condition));
+ nir_builder_cf_insert(&b, &if_stmt->cf_node);
+ nir_instr_insert_after_cf_list(&if_stmt->then_list, &copy->instr);
+ b.cursor = nir_after_cf_node(&if_stmt->cf_node);
+ } else {
+ nir_builder_instr_insert(&b, &copy->instr);
+ }
+ return;
+ }
+
+ assert(ir->rhs->type->is_scalar() || ir->rhs->type->is_vector());
+
+ ir->lhs->accept(this);
+ nir_deref_var *lhs_deref = this->deref_head;
+ nir_ssa_def *src = evaluate_rvalue(ir->rhs);
+
+ if (ir->write_mask != (1 << num_components) - 1 && ir->write_mask != 0) {
+ /* GLSL IR will give us the input to the write-masked assignment in a
+ * single packed vector. So, for example, if the writemask is xzw, then
+ * we have to swizzle x -> x, y -> z, and z -> w and get the y component
+ * from the load.
+ */
+ unsigned swiz[4];
+ unsigned component = 0;
+ for (unsigned i = 0; i < 4; i++) {
+ swiz[i] = ir->write_mask & (1 << i) ? component++ : 0;
+ }
+ src = nir_swizzle(&b, src, swiz, num_components, !supports_ints);
+ }
+
+ nir_intrinsic_instr *store =
+ nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var);
+ store->num_components = ir->lhs->type->vector_elements;
+ store->const_index[0] = ir->write_mask;
+ nir_deref *store_deref = nir_copy_deref(store, &lhs_deref->deref);
+ store->variables[0] = nir_deref_as_var(store_deref);
+ store->src[0] = nir_src_for_ssa(src);
+
+ if (ir->condition) {
+ nir_if *if_stmt = nir_if_create(this->shader);
+ if_stmt->condition = nir_src_for_ssa(evaluate_rvalue(ir->condition));
+ nir_builder_cf_insert(&b, &if_stmt->cf_node);
+ nir_instr_insert_after_cf_list(&if_stmt->then_list, &store->instr);
+ b.cursor = nir_after_cf_node(&if_stmt->cf_node);
+ } else {
+ nir_builder_instr_insert(&b, &store->instr);
+ }
+}
+
+/*
+ * Given an instruction, returns a pointer to its destination or NULL if there
+ * is no destination.
+ *
+ * Note that this only handles instructions we generate at this level.
+ */
+static nir_dest *
+get_instr_dest(nir_instr *instr)
+{
+ nir_alu_instr *alu_instr;
+ nir_intrinsic_instr *intrinsic_instr;
+ nir_tex_instr *tex_instr;
+
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ alu_instr = nir_instr_as_alu(instr);
+ return &alu_instr->dest.dest;
+
+ case nir_instr_type_intrinsic:
+ intrinsic_instr = nir_instr_as_intrinsic(instr);
+ if (nir_intrinsic_infos[intrinsic_instr->intrinsic].has_dest)
+ return &intrinsic_instr->dest;
+ else
+ return NULL;
+
+ case nir_instr_type_tex:
+ tex_instr = nir_instr_as_tex(instr);
+ return &tex_instr->dest;
+
+ default:
+ unreachable("not reached");
+ }
+
+ return NULL;
+}
+
+void
+nir_visitor::add_instr(nir_instr *instr, unsigned num_components)
+{
+ nir_dest *dest = get_instr_dest(instr);
+
+ if (dest)
+ nir_ssa_dest_init(instr, dest, num_components, NULL);
+
+ nir_builder_instr_insert(&b, instr);
+
+ if (dest) {
+ assert(dest->is_ssa);
+ this->result = &dest->ssa;
+ }
+}
+
+nir_ssa_def *
+nir_visitor::evaluate_rvalue(ir_rvalue* ir)
+{
+ ir->accept(this);
+ if (ir->as_dereference() || ir->as_constant()) {
+ /*
+ * A dereference is being used on the right hand side, which means we
+ * must emit a variable load.
+ */
+
+ nir_intrinsic_instr *load_instr =
+ nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var);
+ load_instr->num_components = ir->type->vector_elements;
+ load_instr->variables[0] = this->deref_head;
+ ralloc_steal(load_instr, load_instr->variables[0]);
+ add_instr(&load_instr->instr, ir->type->vector_elements);
+ }
+
+ return this->result;
+}
+
+void
+nir_visitor::visit(ir_expression *ir)
+{
+ /* Some special cases */
+ switch (ir->operation) {
+ case ir_binop_ubo_load: {
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_ubo);
+ load->num_components = ir->type->vector_elements;
+ load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0]));
+ load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
+ add_instr(&load->instr, ir->type->vector_elements);
+
+ /*
+ * In UBO's, a true boolean value is any non-zero value, but we consider
+ * a true boolean to be ~0. Fix this up with a != 0 comparison.
+ */
+
+ if (ir->type->base_type == GLSL_TYPE_BOOL)
+ this->result = nir_ine(&b, &load->dest.ssa, nir_imm_int(&b, 0));
+
+ return;
+ }
+
+ case ir_unop_interpolate_at_centroid:
+ case ir_binop_interpolate_at_offset:
+ case ir_binop_interpolate_at_sample: {
+ ir_dereference *deref = ir->operands[0]->as_dereference();
+ ir_swizzle *swizzle = NULL;
+ if (!deref) {
+ /* the api does not allow a swizzle here, but the varying packing code
+ * may have pushed one into here.
+ */
+ swizzle = ir->operands[0]->as_swizzle();
+ assert(swizzle);
+ deref = swizzle->val->as_dereference();
+ assert(deref);
+ }
+
+ deref->accept(this);
+
+ nir_intrinsic_op op;
+ if (this->deref_head->var->data.mode == nir_var_shader_in) {
+ switch (ir->operation) {
+ case ir_unop_interpolate_at_centroid:
+ op = nir_intrinsic_interp_var_at_centroid;
+ break;
+ case ir_binop_interpolate_at_offset:
+ op = nir_intrinsic_interp_var_at_offset;
+ break;
+ case ir_binop_interpolate_at_sample:
+ op = nir_intrinsic_interp_var_at_sample;
+ break;
+ default:
+ unreachable("Invalid interpolation intrinsic");
+ }
+ } else {
+ /* This case can happen if the vertex shader does not write the
+ * given varying. In this case, the linker will lower it to a
+ * global variable. Since interpolating a variable makes no
+ * sense, we'll just turn it into a load which will probably
+ * eventually end up as an SSA definition.
+ */
+ assert(this->deref_head->var->data.mode == nir_var_global);
+ op = nir_intrinsic_load_var;
+ }
+
+ nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(shader, op);
+ intrin->num_components = deref->type->vector_elements;
+ intrin->variables[0] = this->deref_head;
+ ralloc_steal(intrin, intrin->variables[0]);
+
+ if (intrin->intrinsic == nir_intrinsic_interp_var_at_offset ||
+ intrin->intrinsic == nir_intrinsic_interp_var_at_sample)
+ intrin->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
+
+ add_instr(&intrin->instr, deref->type->vector_elements);
+
+ if (swizzle) {
+ unsigned swiz[4] = {
+ swizzle->mask.x, swizzle->mask.y, swizzle->mask.z, swizzle->mask.w
+ };
+
+ result = nir_swizzle(&b, result, swiz,
+ swizzle->type->vector_elements, false);
+ }
+
+ return;
+ }
+
+ default:
+ break;
+ }
+
+ nir_ssa_def *srcs[4];
+ for (unsigned i = 0; i < ir->get_num_operands(); i++)
+ srcs[i] = evaluate_rvalue(ir->operands[i]);
+
+ glsl_base_type types[4];
+ for (unsigned i = 0; i < ir->get_num_operands(); i++)
+ if (supports_ints)
+ types[i] = ir->operands[i]->type->base_type;
+ else
+ types[i] = GLSL_TYPE_FLOAT;
+
+ glsl_base_type out_type;
+ if (supports_ints)
+ out_type = ir->type->base_type;
+ else
+ out_type = GLSL_TYPE_FLOAT;
+
+ switch (ir->operation) {
+ case ir_unop_bit_not: result = nir_inot(&b, srcs[0]); break;
+ case ir_unop_logic_not:
+ result = supports_ints ? nir_inot(&b, srcs[0]) : nir_fnot(&b, srcs[0]);
+ break;
+ case ir_unop_neg:
+ result = (types[0] == GLSL_TYPE_FLOAT) ? nir_fneg(&b, srcs[0])
+ : nir_ineg(&b, srcs[0]);
+ break;
+ case ir_unop_abs:
+ result = (types[0] == GLSL_TYPE_FLOAT) ? nir_fabs(&b, srcs[0])
+ : nir_iabs(&b, srcs[0]);
+ break;
+ case ir_unop_saturate:
+ assert(types[0] == GLSL_TYPE_FLOAT);
+ result = nir_fsat(&b, srcs[0]);
+ break;
+ case ir_unop_sign:
+ result = (types[0] == GLSL_TYPE_FLOAT) ? nir_fsign(&b, srcs[0])
+ : nir_isign(&b, srcs[0]);
+ break;
+ case ir_unop_rcp: result = nir_frcp(&b, srcs[0]); break;
+ case ir_unop_rsq: result = nir_frsq(&b, srcs[0]); break;
+ case ir_unop_sqrt: result = nir_fsqrt(&b, srcs[0]); break;
+ case ir_unop_exp: unreachable("ir_unop_exp should have been lowered");
+ case ir_unop_log: unreachable("ir_unop_log should have been lowered");
+ case ir_unop_exp2: result = nir_fexp2(&b, srcs[0]); break;
+ case ir_unop_log2: result = nir_flog2(&b, srcs[0]); break;
+ case ir_unop_i2f:
+ result = supports_ints ? nir_i2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]);
+ break;
+ case ir_unop_u2f:
+ result = supports_ints ? nir_u2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]);
+ break;
+ case ir_unop_b2f:
+ result = supports_ints ? nir_b2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]);
+ break;
+ case ir_unop_f2i: result = nir_f2i(&b, srcs[0]); break;
+ case ir_unop_f2u: result = nir_f2u(&b, srcs[0]); break;
+ case ir_unop_f2b: result = nir_f2b(&b, srcs[0]); break;
+ case ir_unop_i2b: result = nir_i2b(&b, srcs[0]); break;
+ case ir_unop_b2i: result = nir_b2i(&b, srcs[0]); break;
+ case ir_unop_i2u:
+ case ir_unop_u2i:
+ case ir_unop_bitcast_i2f:
+ case ir_unop_bitcast_f2i:
+ case ir_unop_bitcast_u2f:
+ case ir_unop_bitcast_f2u:
+ case ir_unop_subroutine_to_int:
+ /* no-op */
+ result = nir_imov(&b, srcs[0]);
+ break;
+ case ir_unop_trunc: result = nir_ftrunc(&b, srcs[0]); break;
+ case ir_unop_ceil: result = nir_fceil(&b, srcs[0]); break;
+ case ir_unop_floor: result = nir_ffloor(&b, srcs[0]); break;
+ case ir_unop_fract: result = nir_ffract(&b, srcs[0]); break;
+ case ir_unop_round_even: result = nir_fround_even(&b, srcs[0]); break;
+ case ir_unop_sin: result = nir_fsin(&b, srcs[0]); break;
+ case ir_unop_cos: result = nir_fcos(&b, srcs[0]); break;
+ case ir_unop_dFdx: result = nir_fddx(&b, srcs[0]); break;
+ case ir_unop_dFdy: result = nir_fddy(&b, srcs[0]); break;
+ case ir_unop_dFdx_fine: result = nir_fddx_fine(&b, srcs[0]); break;
+ case ir_unop_dFdy_fine: result = nir_fddy_fine(&b, srcs[0]); break;
+ case ir_unop_dFdx_coarse: result = nir_fddx_coarse(&b, srcs[0]); break;
+ case ir_unop_dFdy_coarse: result = nir_fddy_coarse(&b, srcs[0]); break;
+ case ir_unop_pack_snorm_2x16:
+ result = nir_pack_snorm_2x16(&b, srcs[0]);
+ break;
+ case ir_unop_pack_snorm_4x8:
+ result = nir_pack_snorm_4x8(&b, srcs[0]);
+ break;
+ case ir_unop_pack_unorm_2x16:
+ result = nir_pack_unorm_2x16(&b, srcs[0]);
+ break;
+ case ir_unop_pack_unorm_4x8:
+ result = nir_pack_unorm_4x8(&b, srcs[0]);
+ break;
+ case ir_unop_pack_half_2x16:
+ result = nir_pack_half_2x16(&b, srcs[0]);
+ break;
+ case ir_unop_unpack_snorm_2x16:
+ result = nir_unpack_snorm_2x16(&b, srcs[0]);
+ break;
+ case ir_unop_unpack_snorm_4x8:
+ result = nir_unpack_snorm_4x8(&b, srcs[0]);
+ break;
+ case ir_unop_unpack_unorm_2x16:
+ result = nir_unpack_unorm_2x16(&b, srcs[0]);
+ break;
+ case ir_unop_unpack_unorm_4x8:
+ result = nir_unpack_unorm_4x8(&b, srcs[0]);
+ break;
+ case ir_unop_unpack_half_2x16:
+ result = nir_unpack_half_2x16(&b, srcs[0]);
+ break;
+ case ir_unop_unpack_half_2x16_split_x:
+ result = nir_unpack_half_2x16_split_x(&b, srcs[0]);
+ break;
+ case ir_unop_unpack_half_2x16_split_y:
+ result = nir_unpack_half_2x16_split_y(&b, srcs[0]);
+ break;
+ case ir_unop_bitfield_reverse:
+ result = nir_bitfield_reverse(&b, srcs[0]);
+ break;
+ case ir_unop_bit_count:
+ result = nir_bit_count(&b, srcs[0]);
+ break;
+ case ir_unop_find_msb:
+ switch (types[0]) {
+ case GLSL_TYPE_UINT:
+ result = nir_ufind_msb(&b, srcs[0]);
+ break;
+ case GLSL_TYPE_INT:
+ result = nir_ifind_msb(&b, srcs[0]);
+ break;
+ default:
+ unreachable("Invalid type for findMSB()");
+ }
+ break;
+ case ir_unop_find_lsb:
+ result = nir_find_lsb(&b, srcs[0]);
+ break;
+
+ case ir_unop_noise:
+ switch (ir->type->vector_elements) {
+ case 1:
+ switch (ir->operands[0]->type->vector_elements) {
+ case 1: result = nir_fnoise1_1(&b, srcs[0]); break;
+ case 2: result = nir_fnoise1_2(&b, srcs[0]); break;
+ case 3: result = nir_fnoise1_3(&b, srcs[0]); break;
+ case 4: result = nir_fnoise1_4(&b, srcs[0]); break;
+ default: unreachable("not reached");
+ }
+ break;
+ case 2:
+ switch (ir->operands[0]->type->vector_elements) {
+ case 1: result = nir_fnoise2_1(&b, srcs[0]); break;
+ case 2: result = nir_fnoise2_2(&b, srcs[0]); break;
+ case 3: result = nir_fnoise2_3(&b, srcs[0]); break;
+ case 4: result = nir_fnoise2_4(&b, srcs[0]); break;
+ default: unreachable("not reached");
+ }
+ break;
+ case 3:
+ switch (ir->operands[0]->type->vector_elements) {
+ case 1: result = nir_fnoise3_1(&b, srcs[0]); break;
+ case 2: result = nir_fnoise3_2(&b, srcs[0]); break;
+ case 3: result = nir_fnoise3_3(&b, srcs[0]); break;
+ case 4: result = nir_fnoise3_4(&b, srcs[0]); break;
+ default: unreachable("not reached");
+ }
+ break;
+ case 4:
+ switch (ir->operands[0]->type->vector_elements) {
+ case 1: result = nir_fnoise4_1(&b, srcs[0]); break;
+ case 2: result = nir_fnoise4_2(&b, srcs[0]); break;
+ case 3: result = nir_fnoise4_3(&b, srcs[0]); break;
+ case 4: result = nir_fnoise4_4(&b, srcs[0]); break;
+ default: unreachable("not reached");
+ }
+ break;
+ default:
+ unreachable("not reached");
+ }
+ break;
+ case ir_unop_get_buffer_size: {
+ nir_intrinsic_instr *load = nir_intrinsic_instr_create(
+ this->shader,
+ nir_intrinsic_get_buffer_size);
+ load->num_components = ir->type->vector_elements;
+ load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0]));
+ add_instr(&load->instr, ir->type->vector_elements);
+ return;
+ }
+
+ case ir_binop_add:
+ result = (out_type == GLSL_TYPE_FLOAT) ? nir_fadd(&b, srcs[0], srcs[1])
+ : nir_iadd(&b, srcs[0], srcs[1]);
+ break;
+ case ir_binop_sub:
+ result = (out_type == GLSL_TYPE_FLOAT) ? nir_fsub(&b, srcs[0], srcs[1])
+ : nir_isub(&b, srcs[0], srcs[1]);
+ break;
+ case ir_binop_mul:
+ result = (out_type == GLSL_TYPE_FLOAT) ? nir_fmul(&b, srcs[0], srcs[1])
+ : nir_imul(&b, srcs[0], srcs[1]);
+ break;
+ case ir_binop_div:
+ if (out_type == GLSL_TYPE_FLOAT)
+ result = nir_fdiv(&b, srcs[0], srcs[1]);
+ else if (out_type == GLSL_TYPE_INT)
+ result = nir_idiv(&b, srcs[0], srcs[1]);
+ else
+ result = nir_udiv(&b, srcs[0], srcs[1]);
+ break;
+ case ir_binop_mod:
+ result = (out_type == GLSL_TYPE_FLOAT) ? nir_fmod(&b, srcs[0], srcs[1])
+ : nir_umod(&b, srcs[0], srcs[1]);
+ break;
+ case ir_binop_min:
+ if (out_type == GLSL_TYPE_FLOAT)
+ result = nir_fmin(&b, srcs[0], srcs[1]);
+ else if (out_type == GLSL_TYPE_INT)
+ result = nir_imin(&b, srcs[0], srcs[1]);
+ else
+ result = nir_umin(&b, srcs[0], srcs[1]);
+ break;
+ case ir_binop_max:
+ if (out_type == GLSL_TYPE_FLOAT)
+ result = nir_fmax(&b, srcs[0], srcs[1]);
+ else if (out_type == GLSL_TYPE_INT)
+ result = nir_imax(&b, srcs[0], srcs[1]);
+ else
+ result = nir_umax(&b, srcs[0], srcs[1]);
+ break;
+ case ir_binop_pow: result = nir_fpow(&b, srcs[0], srcs[1]); break;
+ case ir_binop_bit_and: result = nir_iand(&b, srcs[0], srcs[1]); break;
+ case ir_binop_bit_or: result = nir_ior(&b, srcs[0], srcs[1]); break;
+ case ir_binop_bit_xor: result = nir_ixor(&b, srcs[0], srcs[1]); break;
+ case ir_binop_logic_and:
+ result = supports_ints ? nir_iand(&b, srcs[0], srcs[1])
+ : nir_fand(&b, srcs[0], srcs[1]);
+ break;
+ case ir_binop_logic_or:
+ result = supports_ints ? nir_ior(&b, srcs[0], srcs[1])
+ : nir_for(&b, srcs[0], srcs[1]);
+ break;
+ case ir_binop_logic_xor:
+ result = supports_ints ? nir_ixor(&b, srcs[0], srcs[1])
+ : nir_fxor(&b, srcs[0], srcs[1]);
+ break;
+ case ir_binop_lshift: result = nir_ishl(&b, srcs[0], srcs[1]); break;
+ case ir_binop_rshift:
+ result = (out_type == GLSL_TYPE_INT) ? nir_ishr(&b, srcs[0], srcs[1])
+ : nir_ushr(&b, srcs[0], srcs[1]);
+ break;
+ case ir_binop_imul_high:
+ result = (out_type == GLSL_TYPE_INT) ? nir_imul_high(&b, srcs[0], srcs[1])
+ : nir_umul_high(&b, srcs[0], srcs[1]);
+ break;
+ case ir_binop_carry: result = nir_uadd_carry(&b, srcs[0], srcs[1]); break;
+ case ir_binop_borrow: result = nir_usub_borrow(&b, srcs[0], srcs[1]); break;
+ case ir_binop_less:
+ if (supports_ints) {
+ if (types[0] == GLSL_TYPE_FLOAT)
+ result = nir_flt(&b, srcs[0], srcs[1]);
+ else if (types[0] == GLSL_TYPE_INT)
+ result = nir_ilt(&b, srcs[0], srcs[1]);
+ else
+ result = nir_ult(&b, srcs[0], srcs[1]);
+ } else {
+ result = nir_slt(&b, srcs[0], srcs[1]);
+ }
+ break;
+ case ir_binop_greater:
+ if (supports_ints) {
+ if (types[0] == GLSL_TYPE_FLOAT)
+ result = nir_flt(&b, srcs[1], srcs[0]);
+ else if (types[0] == GLSL_TYPE_INT)
+ result = nir_ilt(&b, srcs[1], srcs[0]);
+ else
+ result = nir_ult(&b, srcs[1], srcs[0]);
+ } else {
+ result = nir_slt(&b, srcs[1], srcs[0]);
+ }
+ break;
+ case ir_binop_lequal:
+ if (supports_ints) {
+ if (types[0] == GLSL_TYPE_FLOAT)
+ result = nir_fge(&b, srcs[1], srcs[0]);
+ else if (types[0] == GLSL_TYPE_INT)
+ result = nir_ige(&b, srcs[1], srcs[0]);
+ else
+ result = nir_uge(&b, srcs[1], srcs[0]);
+ } else {
+ result = nir_slt(&b, srcs[1], srcs[0]);
+ }
+ break;
+ case ir_binop_gequal:
+ if (supports_ints) {
+ if (types[0] == GLSL_TYPE_FLOAT)
+ result = nir_fge(&b, srcs[0], srcs[1]);
+ else if (types[0] == GLSL_TYPE_INT)
+ result = nir_ige(&b, srcs[0], srcs[1]);
+ else
+ result = nir_uge(&b, srcs[0], srcs[1]);
+ } else {
+ result = nir_slt(&b, srcs[0], srcs[1]);
+ }
+ break;
+ case ir_binop_equal:
+ if (supports_ints) {
+ if (types[0] == GLSL_TYPE_FLOAT)
+ result = nir_feq(&b, srcs[0], srcs[1]);
+ else
+ result = nir_ieq(&b, srcs[0], srcs[1]);
+ } else {
+ result = nir_seq(&b, srcs[0], srcs[1]);
+ }
+ break;
+ case ir_binop_nequal:
+ if (supports_ints) {
+ if (types[0] == GLSL_TYPE_FLOAT)
+ result = nir_fne(&b, srcs[0], srcs[1]);
+ else
+ result = nir_ine(&b, srcs[0], srcs[1]);
+ } else {
+ result = nir_sne(&b, srcs[0], srcs[1]);
+ }
+ break;
+ case ir_binop_all_equal:
+ if (supports_ints) {
+ if (types[0] == GLSL_TYPE_FLOAT) {
+ switch (ir->operands[0]->type->vector_elements) {
+ case 1: result = nir_feq(&b, srcs[0], srcs[1]); break;
+ case 2: result = nir_ball_fequal2(&b, srcs[0], srcs[1]); break;
+ case 3: result = nir_ball_fequal3(&b, srcs[0], srcs[1]); break;
+ case 4: result = nir_ball_fequal4(&b, srcs[0], srcs[1]); break;
+ default:
+ unreachable("not reached");
+ }
+ } else {
+ switch (ir->operands[0]->type->vector_elements) {
+ case 1: result = nir_ieq(&b, srcs[0], srcs[1]); break;
+ case 2: result = nir_ball_iequal2(&b, srcs[0], srcs[1]); break;
+ case 3: result = nir_ball_iequal3(&b, srcs[0], srcs[1]); break;
+ case 4: result = nir_ball_iequal4(&b, srcs[0], srcs[1]); break;
+ default:
+ unreachable("not reached");
+ }
+ }
+ } else {
+ switch (ir->operands[0]->type->vector_elements) {
+ case 1: result = nir_seq(&b, srcs[0], srcs[1]); break;
+ case 2: result = nir_fall_equal2(&b, srcs[0], srcs[1]); break;
+ case 3: result = nir_fall_equal3(&b, srcs[0], srcs[1]); break;
+ case 4: result = nir_fall_equal4(&b, srcs[0], srcs[1]); break;
+ default:
+ unreachable("not reached");
+ }
+ }
+ break;
+ case ir_binop_any_nequal:
+ if (supports_ints) {
+ if (types[0] == GLSL_TYPE_FLOAT) {
+ switch (ir->operands[0]->type->vector_elements) {
+ case 1: result = nir_fne(&b, srcs[0], srcs[1]); break;
+ case 2: result = nir_bany_fnequal2(&b, srcs[0], srcs[1]); break;
+ case 3: result = nir_bany_fnequal3(&b, srcs[0], srcs[1]); break;
+ case 4: result = nir_bany_fnequal4(&b, srcs[0], srcs[1]); break;
+ default:
+ unreachable("not reached");
+ }
+ } else {
+ switch (ir->operands[0]->type->vector_elements) {
+ case 1: result = nir_ine(&b, srcs[0], srcs[1]); break;
+ case 2: result = nir_bany_inequal2(&b, srcs[0], srcs[1]); break;
+ case 3: result = nir_bany_inequal3(&b, srcs[0], srcs[1]); break;
+ case 4: result = nir_bany_inequal4(&b, srcs[0], srcs[1]); break;
+ default:
+ unreachable("not reached");
+ }
+ }
+ } else {
+ switch (ir->operands[0]->type->vector_elements) {
+ case 1: result = nir_sne(&b, srcs[0], srcs[1]); break;
+ case 2: result = nir_fany_nequal2(&b, srcs[0], srcs[1]); break;
+ case 3: result = nir_fany_nequal3(&b, srcs[0], srcs[1]); break;
+ case 4: result = nir_fany_nequal4(&b, srcs[0], srcs[1]); break;
+ default:
+ unreachable("not reached");
+ }
+ }
+ break;
+ case ir_binop_dot:
+ switch (ir->operands[0]->type->vector_elements) {
+ case 2: result = nir_fdot2(&b, srcs[0], srcs[1]); break;
+ case 3: result = nir_fdot3(&b, srcs[0], srcs[1]); break;
+ case 4: result = nir_fdot4(&b, srcs[0], srcs[1]); break;
+ default:
+ unreachable("not reached");
+ }
+ break;
+
+ case ir_binop_pack_half_2x16_split:
+ result = nir_pack_half_2x16_split(&b, srcs[0], srcs[1]);
+ break;
+ case ir_binop_ldexp: result = nir_ldexp(&b, srcs[0], srcs[1]); break;
+ case ir_triop_fma:
+ result = nir_ffma(&b, srcs[0], srcs[1], srcs[2]);
+ break;
+ case ir_triop_lrp:
+ result = nir_flrp(&b, srcs[0], srcs[1], srcs[2]);
+ break;
+ case ir_triop_csel:
+ if (supports_ints)
+ result = nir_bcsel(&b, srcs[0], srcs[1], srcs[2]);
+ else
+ result = nir_fcsel(&b, srcs[0], srcs[1], srcs[2]);
+ break;
+ case ir_triop_bitfield_extract:
+ result = (out_type == GLSL_TYPE_INT) ?
+ nir_ibitfield_extract(&b, srcs[0], srcs[1], srcs[2]) :
+ nir_ubitfield_extract(&b, srcs[0], srcs[1], srcs[2]);
+ break;
+ case ir_quadop_bitfield_insert:
+ result = nir_bitfield_insert(&b, srcs[0], srcs[1], srcs[2], srcs[3]);
+ break;
+ case ir_quadop_vector:
+ result = nir_vec(&b, srcs, ir->type->vector_elements);
+ break;
+
+ default:
+ unreachable("not reached");
+ }
+}
+
+void
+nir_visitor::visit(ir_swizzle *ir)
+{
+ unsigned swizzle[4] = { ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w };
+ result = nir_swizzle(&b, evaluate_rvalue(ir->val), swizzle,
+ ir->type->vector_elements, !supports_ints);
+}
+
+void
+nir_visitor::visit(ir_texture *ir)
+{
+ unsigned num_srcs;
+ nir_texop op;
+ switch (ir->op) {
+ case ir_tex:
+ op = nir_texop_tex;
+ num_srcs = 1; /* coordinate */
+ break;
+
+ case ir_txb:
+ case ir_txl:
+ op = (ir->op == ir_txb) ? nir_texop_txb : nir_texop_txl;
+ num_srcs = 2; /* coordinate, bias/lod */
+ break;
+
+ case ir_txd:
+ op = nir_texop_txd; /* coordinate, dPdx, dPdy */
+ num_srcs = 3;
+ break;
+
+ case ir_txf:
+ op = nir_texop_txf;
+ if (ir->lod_info.lod != NULL)
+ num_srcs = 2; /* coordinate, lod */
+ else
+ num_srcs = 1; /* coordinate */
+ break;
+
+ case ir_txf_ms:
+ op = nir_texop_txf_ms;
+ num_srcs = 2; /* coordinate, sample_index */
+ break;
+
+ case ir_txs:
+ op = nir_texop_txs;
+ if (ir->lod_info.lod != NULL)
+ num_srcs = 1; /* lod */
+ else
+ num_srcs = 0;
+ break;
+
+ case ir_lod:
+ op = nir_texop_lod;
+ num_srcs = 1; /* coordinate */
+ break;
+
+ case ir_tg4:
+ op = nir_texop_tg4;
+ num_srcs = 1; /* coordinate */
+ break;
+
+ case ir_query_levels:
+ op = nir_texop_query_levels;
+ num_srcs = 0;
+ break;
+
+ case ir_texture_samples:
+ op = nir_texop_texture_samples;
+ num_srcs = 0;
+ break;
+
+ case ir_samples_identical:
+ op = nir_texop_samples_identical;
+ num_srcs = 1; /* coordinate */
+ break;
+
+ default:
+ unreachable("not reached");
+ }
+
+ if (ir->projector != NULL)
+ num_srcs++;
+ if (ir->shadow_comparitor != NULL)
+ num_srcs++;
+ if (ir->offset != NULL && ir->offset->as_constant() == NULL)
+ num_srcs++;
+
+ nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs);
+
+ instr->op = op;
+ instr->sampler_dim =
+ (glsl_sampler_dim) ir->sampler->type->sampler_dimensionality;
+ instr->is_array = ir->sampler->type->sampler_array;
+ instr->is_shadow = ir->sampler->type->sampler_shadow;
+ if (instr->is_shadow)
+ instr->is_new_style_shadow = (ir->type->vector_elements == 1);
+ switch (ir->type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ instr->dest_type = nir_type_float;
+ break;
+ case GLSL_TYPE_INT:
+ instr->dest_type = nir_type_int;
+ break;
+ case GLSL_TYPE_BOOL:
+ case GLSL_TYPE_UINT:
+ instr->dest_type = nir_type_uint;
+ break;
+ default:
+ unreachable("not reached");
+ }
+
+ instr->sampler = evaluate_deref(&instr->instr, ir->sampler);
+
+ unsigned src_number = 0;
+
+ if (ir->coordinate != NULL) {
+ instr->coord_components = ir->coordinate->type->vector_elements;
+ instr->src[src_number].src =
+ nir_src_for_ssa(evaluate_rvalue(ir->coordinate));
+ instr->src[src_number].src_type = nir_tex_src_coord;
+ src_number++;
+ }
+
+ if (ir->projector != NULL) {
+ instr->src[src_number].src =
+ nir_src_for_ssa(evaluate_rvalue(ir->projector));
+ instr->src[src_number].src_type = nir_tex_src_projector;
+ src_number++;
+ }
+
+ if (ir->shadow_comparitor != NULL) {
+ instr->src[src_number].src =
+ nir_src_for_ssa(evaluate_rvalue(ir->shadow_comparitor));
+ instr->src[src_number].src_type = nir_tex_src_comparitor;
+ src_number++;
+ }
+
+ if (ir->offset != NULL) {
+ /* we don't support multiple offsets yet */
+ assert(ir->offset->type->is_vector() || ir->offset->type->is_scalar());
+
+ ir_constant *const_offset = ir->offset->as_constant();
+ if (const_offset != NULL) {
+ for (unsigned i = 0; i < const_offset->type->vector_elements; i++)
+ instr->const_offset[i] = const_offset->value.i[i];
+ } else {
+ instr->src[src_number].src =
+ nir_src_for_ssa(evaluate_rvalue(ir->offset));
+ instr->src[src_number].src_type = nir_tex_src_offset;
+ src_number++;
+ }
+ }
+
+ switch (ir->op) {
+ case ir_txb:
+ instr->src[src_number].src =
+ nir_src_for_ssa(evaluate_rvalue(ir->lod_info.bias));
+ instr->src[src_number].src_type = nir_tex_src_bias;
+ src_number++;
+ break;
+
+ case ir_txl:
+ case ir_txf:
+ case ir_txs:
+ if (ir->lod_info.lod != NULL) {
+ instr->src[src_number].src =
+ nir_src_for_ssa(evaluate_rvalue(ir->lod_info.lod));
+ instr->src[src_number].src_type = nir_tex_src_lod;
+ src_number++;
+ }
+ break;
+
+ case ir_txd:
+ instr->src[src_number].src =
+ nir_src_for_ssa(evaluate_rvalue(ir->lod_info.grad.dPdx));
+ instr->src[src_number].src_type = nir_tex_src_ddx;
+ src_number++;
+ instr->src[src_number].src =
+ nir_src_for_ssa(evaluate_rvalue(ir->lod_info.grad.dPdy));
+ instr->src[src_number].src_type = nir_tex_src_ddy;
+ src_number++;
+ break;
+
+ case ir_txf_ms:
+ instr->src[src_number].src =
+ nir_src_for_ssa(evaluate_rvalue(ir->lod_info.sample_index));
+ instr->src[src_number].src_type = nir_tex_src_ms_index;
+ src_number++;
+ break;
+
+ case ir_tg4:
+ instr->component = ir->lod_info.component->as_constant()->value.u[0];
+ break;
+
+ default:
+ break;
+ }
+
+ assert(src_number == num_srcs);
+
+ add_instr(&instr->instr, nir_tex_instr_dest_size(instr));
+}
+
+void
+nir_visitor::visit(ir_constant *ir)
+{
+ /*
+ * We don't know if this variable is an an array or struct that gets
+ * dereferenced, so do the safe thing an make it a variable with a
+ * constant initializer and return a dereference.
+ */
+
+ nir_variable *var =
+ nir_local_variable_create(this->impl, ir->type, "const_temp");
+ var->data.read_only = true;
+ var->constant_initializer = constant_copy(ir, var);
+
+ this->deref_head = nir_deref_var_create(this->shader, var);
+ this->deref_tail = &this->deref_head->deref;
+}
+
+void
+nir_visitor::visit(ir_dereference_variable *ir)
+{
+ struct hash_entry *entry =
+ _mesa_hash_table_search(this->var_table, ir->var);
+ assert(entry);
+ nir_variable *var = (nir_variable *) entry->data;
+
+ nir_deref_var *deref = nir_deref_var_create(this->shader, var);
+ this->deref_head = deref;
+ this->deref_tail = &deref->deref;
+}
+
+void
+nir_visitor::visit(ir_dereference_record *ir)
+{
+ ir->record->accept(this);
+
+ int field_index = this->deref_tail->type->field_index(ir->field);
+ assert(field_index >= 0);
+
+ nir_deref_struct *deref = nir_deref_struct_create(this->deref_tail, field_index);
+ deref->deref.type = ir->type;
+ this->deref_tail->child = &deref->deref;
+ this->deref_tail = &deref->deref;
+}
+
+void
+nir_visitor::visit(ir_dereference_array *ir)
+{
+ nir_deref_array *deref = nir_deref_array_create(this->shader);
+ deref->deref.type = ir->type;
+
+ ir_constant *const_index = ir->array_index->as_constant();
+ if (const_index != NULL) {
+ deref->deref_array_type = nir_deref_array_type_direct;
+ deref->base_offset = const_index->value.u[0];
+ } else {
+ deref->deref_array_type = nir_deref_array_type_indirect;
+ deref->indirect =
+ nir_src_for_ssa(evaluate_rvalue(ir->array_index));
+ }
+
+ ir->array->accept(this);
+
+ this->deref_tail->child = &deref->deref;
+ ralloc_steal(this->deref_tail, deref);
+ this->deref_tail = &deref->deref;
+}
+
+void
+nir_visitor::visit(ir_barrier *ir)
+{
+ nir_intrinsic_instr *instr =
+ nir_intrinsic_instr_create(this->shader, nir_intrinsic_barrier);
+ nir_builder_instr_insert(&b, &instr->instr);
+}
diff --git a/src/compiler/nir/glsl_to_nir.h b/src/compiler/nir/glsl_to_nir.h
new file mode 100644
index 00000000000..20d2a380a26
--- /dev/null
+++ b/src/compiler/nir/glsl_to_nir.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott ([email protected])
+ *
+ */
+
+#include "nir.h"
+#include "glsl/glsl_parser_extras.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+nir_shader *glsl_to_nir(const struct gl_shader_program *shader_prog,
+ gl_shader_stage stage,
+ const nir_shader_compiler_options *options);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
new file mode 100644
index 00000000000..21bf678c04e
--- /dev/null
+++ b/src/compiler/nir/nir.c
@@ -0,0 +1,1665 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott ([email protected])
+ *
+ */
+
+#include "nir.h"
+#include "nir_control_flow_private.h"
+#include <assert.h>
+
+nir_shader *
+nir_shader_create(void *mem_ctx,
+ gl_shader_stage stage,
+ const nir_shader_compiler_options *options)
+{
+ nir_shader *shader = ralloc(mem_ctx, nir_shader);
+
+ exec_list_make_empty(&shader->uniforms);
+ exec_list_make_empty(&shader->inputs);
+ exec_list_make_empty(&shader->outputs);
+
+ shader->options = options;
+ memset(&shader->info, 0, sizeof(shader->info));
+
+ exec_list_make_empty(&shader->functions);
+ exec_list_make_empty(&shader->registers);
+ exec_list_make_empty(&shader->globals);
+ exec_list_make_empty(&shader->system_values);
+ shader->reg_alloc = 0;
+
+ shader->num_inputs = 0;
+ shader->num_outputs = 0;
+ shader->num_uniforms = 0;
+
+ shader->stage = stage;
+
+ return shader;
+}
+
+static nir_register *
+reg_create(void *mem_ctx, struct exec_list *list)
+{
+ nir_register *reg = ralloc(mem_ctx, nir_register);
+
+ list_inithead(&reg->uses);
+ list_inithead(&reg->defs);
+ list_inithead(&reg->if_uses);
+
+ reg->num_components = 0;
+ reg->num_array_elems = 0;
+ reg->is_packed = false;
+ reg->name = NULL;
+
+ exec_list_push_tail(list, &reg->node);
+
+ return reg;
+}
+
+nir_register *
+nir_global_reg_create(nir_shader *shader)
+{
+ nir_register *reg = reg_create(shader, &shader->registers);
+ reg->index = shader->reg_alloc++;
+ reg->is_global = true;
+
+ return reg;
+}
+
+nir_register *
+nir_local_reg_create(nir_function_impl *impl)
+{
+ nir_register *reg = reg_create(ralloc_parent(impl), &impl->registers);
+ reg->index = impl->reg_alloc++;
+ reg->is_global = false;
+
+ return reg;
+}
+
+void
+nir_reg_remove(nir_register *reg)
+{
+ exec_node_remove(&reg->node);
+}
+
+void
+nir_shader_add_variable(nir_shader *shader, nir_variable *var)
+{
+ switch (var->data.mode) {
+ case nir_var_all:
+ assert(!"invalid mode");
+ break;
+
+ case nir_var_local:
+ assert(!"nir_shader_add_variable cannot be used for local variables");
+ break;
+
+ case nir_var_global:
+ exec_list_push_tail(&shader->globals, &var->node);
+ break;
+
+ case nir_var_shader_in:
+ exec_list_push_tail(&shader->inputs, &var->node);
+ break;
+
+ case nir_var_shader_out:
+ exec_list_push_tail(&shader->outputs, &var->node);
+ break;
+
+ case nir_var_uniform:
+ case nir_var_shader_storage:
+ exec_list_push_tail(&shader->uniforms, &var->node);
+ break;
+
+ case nir_var_system_value:
+ exec_list_push_tail(&shader->system_values, &var->node);
+ break;
+ }
+}
+
+nir_variable *
+nir_variable_create(nir_shader *shader, nir_variable_mode mode,
+ const struct glsl_type *type, const char *name)
+{
+ nir_variable *var = rzalloc(shader, nir_variable);
+ var->name = ralloc_strdup(var, name);
+ var->type = type;
+ var->data.mode = mode;
+
+ if ((mode == nir_var_shader_in && shader->stage != MESA_SHADER_VERTEX) ||
+ (mode == nir_var_shader_out && shader->stage != MESA_SHADER_FRAGMENT))
+ var->data.interpolation = INTERP_QUALIFIER_SMOOTH;
+
+ if (mode == nir_var_shader_in || mode == nir_var_uniform)
+ var->data.read_only = true;
+
+ nir_shader_add_variable(shader, var);
+
+ return var;
+}
+
+nir_variable *
+nir_local_variable_create(nir_function_impl *impl,
+ const struct glsl_type *type, const char *name)
+{
+ nir_variable *var = rzalloc(impl->function->shader, nir_variable);
+ var->name = ralloc_strdup(var, name);
+ var->type = type;
+ var->data.mode = nir_var_local;
+
+ nir_function_impl_add_variable(impl, var);
+
+ return var;
+}
+
+nir_function *
+nir_function_create(nir_shader *shader, const char *name)
+{
+ nir_function *func = ralloc(shader, nir_function);
+
+ exec_list_push_tail(&shader->functions, &func->node);
+
+ func->name = ralloc_strdup(func, name);
+ func->shader = shader;
+ func->num_params = 0;
+ func->params = NULL;
+ func->return_type = glsl_void_type();
+ func->impl = NULL;
+
+ return func;
+}
+
+void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx)
+{
+ dest->is_ssa = src->is_ssa;
+ if (src->is_ssa) {
+ dest->ssa = src->ssa;
+ } else {
+ dest->reg.base_offset = src->reg.base_offset;
+ dest->reg.reg = src->reg.reg;
+ if (src->reg.indirect) {
+ dest->reg.indirect = ralloc(mem_ctx, nir_src);
+ nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
+ } else {
+ dest->reg.indirect = NULL;
+ }
+ }
+}
+
+void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr)
+{
+ /* Copying an SSA definition makes no sense whatsoever. */
+ assert(!src->is_ssa);
+
+ dest->is_ssa = false;
+
+ dest->reg.base_offset = src->reg.base_offset;
+ dest->reg.reg = src->reg.reg;
+ if (src->reg.indirect) {
+ dest->reg.indirect = ralloc(instr, nir_src);
+ nir_src_copy(dest->reg.indirect, src->reg.indirect, instr);
+ } else {
+ dest->reg.indirect = NULL;
+ }
+}
+
+void
+nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
+ nir_alu_instr *instr)
+{
+ nir_src_copy(&dest->src, &src->src, &instr->instr);
+ dest->abs = src->abs;
+ dest->negate = src->negate;
+ for (unsigned i = 0; i < 4; i++)
+ dest->swizzle[i] = src->swizzle[i];
+}
+
+void
+nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
+ nir_alu_instr *instr)
+{
+ nir_dest_copy(&dest->dest, &src->dest, &instr->instr);
+ dest->write_mask = src->write_mask;
+ dest->saturate = src->saturate;
+}
+
+
+static void
+cf_init(nir_cf_node *node, nir_cf_node_type type)
+{
+ exec_node_init(&node->node);
+ node->parent = NULL;
+ node->type = type;
+}
+
+nir_function_impl *
+nir_function_impl_create(nir_function *function)
+{
+ assert(function->impl == NULL);
+
+ void *mem_ctx = ralloc_parent(function);
+
+ nir_function_impl *impl = ralloc(mem_ctx, nir_function_impl);
+
+ function->impl = impl;
+ impl->function = function;
+
+ cf_init(&impl->cf_node, nir_cf_node_function);
+
+ exec_list_make_empty(&impl->body);
+ exec_list_make_empty(&impl->registers);
+ exec_list_make_empty(&impl->locals);
+ impl->num_params = 0;
+ impl->params = NULL;
+ impl->return_var = NULL;
+ impl->reg_alloc = 0;
+ impl->ssa_alloc = 0;
+ impl->valid_metadata = nir_metadata_none;
+
+ /* create start & end blocks */
+ nir_block *start_block = nir_block_create(mem_ctx);
+ nir_block *end_block = nir_block_create(mem_ctx);
+ start_block->cf_node.parent = &impl->cf_node;
+ end_block->cf_node.parent = &impl->cf_node;
+ impl->end_block = end_block;
+
+ exec_list_push_tail(&impl->body, &start_block->cf_node.node);
+
+ start_block->successors[0] = end_block;
+ _mesa_set_add(end_block->predecessors, start_block);
+ return impl;
+}
+
+nir_block *
+nir_block_create(nir_shader *shader)
+{
+ nir_block *block = ralloc(shader, nir_block);
+
+ cf_init(&block->cf_node, nir_cf_node_block);
+
+ block->successors[0] = block->successors[1] = NULL;
+ block->predecessors = _mesa_set_create(block, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ block->imm_dom = NULL;
+ /* XXX maybe it would be worth it to defer allocation? This
+ * way it doesn't get allocated for shader ref's that never run
+ * nir_calc_dominance? For example, state-tracker creates an
+ * initial IR, clones that, runs appropriate lowering pass, passes
+ * to driver which does common lowering/opt, and then stores ref
+ * which is later used to do state specific lowering and futher
+ * opt. Do any of the references not need dominance metadata?
+ */
+ block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ exec_list_make_empty(&block->instr_list);
+
+ return block;
+}
+
+static inline void
+src_init(nir_src *src)
+{
+ src->is_ssa = false;
+ src->reg.reg = NULL;
+ src->reg.indirect = NULL;
+ src->reg.base_offset = 0;
+}
+
+nir_if *
+nir_if_create(nir_shader *shader)
+{
+ nir_if *if_stmt = ralloc(shader, nir_if);
+
+ cf_init(&if_stmt->cf_node, nir_cf_node_if);
+ src_init(&if_stmt->condition);
+
+ nir_block *then = nir_block_create(shader);
+ exec_list_make_empty(&if_stmt->then_list);
+ exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node);
+ then->cf_node.parent = &if_stmt->cf_node;
+
+ nir_block *else_stmt = nir_block_create(shader);
+ exec_list_make_empty(&if_stmt->else_list);
+ exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node);
+ else_stmt->cf_node.parent = &if_stmt->cf_node;
+
+ return if_stmt;
+}
+
+nir_loop *
+nir_loop_create(nir_shader *shader)
+{
+ nir_loop *loop = ralloc(shader, nir_loop);
+
+ cf_init(&loop->cf_node, nir_cf_node_loop);
+
+ nir_block *body = nir_block_create(shader);
+ exec_list_make_empty(&loop->body);
+ exec_list_push_tail(&loop->body, &body->cf_node.node);
+ body->cf_node.parent = &loop->cf_node;
+
+ body->successors[0] = body;
+ _mesa_set_add(body->predecessors, body);
+
+ return loop;
+}
+
+static void
+instr_init(nir_instr *instr, nir_instr_type type)
+{
+ instr->type = type;
+ instr->block = NULL;
+ exec_node_init(&instr->node);
+}
+
+static void
+dest_init(nir_dest *dest)
+{
+ dest->is_ssa = false;
+ dest->reg.reg = NULL;
+ dest->reg.indirect = NULL;
+ dest->reg.base_offset = 0;
+}
+
+static void
+alu_dest_init(nir_alu_dest *dest)
+{
+ dest_init(&dest->dest);
+ dest->saturate = false;
+ dest->write_mask = 0xf;
+}
+
+static void
+alu_src_init(nir_alu_src *src)
+{
+ src_init(&src->src);
+ src->abs = src->negate = false;
+ src->swizzle[0] = 0;
+ src->swizzle[1] = 1;
+ src->swizzle[2] = 2;
+ src->swizzle[3] = 3;
+}
+
+nir_alu_instr *
+nir_alu_instr_create(nir_shader *shader, nir_op op)
+{
+ unsigned num_srcs = nir_op_infos[op].num_inputs;
+ nir_alu_instr *instr =
+ ralloc_size(shader,
+ sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
+
+ instr_init(&instr->instr, nir_instr_type_alu);
+ instr->op = op;
+ alu_dest_init(&instr->dest);
+ for (unsigned i = 0; i < num_srcs; i++)
+ alu_src_init(&instr->src[i]);
+
+ return instr;
+}
+
+nir_jump_instr *
+nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
+{
+ nir_jump_instr *instr = ralloc(shader, nir_jump_instr);
+ instr_init(&instr->instr, nir_instr_type_jump);
+ instr->type = type;
+ return instr;
+}
+
+nir_load_const_instr *
+nir_load_const_instr_create(nir_shader *shader, unsigned num_components)
+{
+ nir_load_const_instr *instr = ralloc(shader, nir_load_const_instr);
+ instr_init(&instr->instr, nir_instr_type_load_const);
+
+ nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
+
+ return instr;
+}
+
+nir_intrinsic_instr *
+nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
+{
+ unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
+ nir_intrinsic_instr *instr =
+ ralloc_size(shader,
+ sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
+
+ instr_init(&instr->instr, nir_instr_type_intrinsic);
+ instr->intrinsic = op;
+
+ if (nir_intrinsic_infos[op].has_dest)
+ dest_init(&instr->dest);
+
+ for (unsigned i = 0; i < num_srcs; i++)
+ src_init(&instr->src[i]);
+
+ return instr;
+}
+
+nir_call_instr *
+nir_call_instr_create(nir_shader *shader, nir_function *callee)
+{
+ nir_call_instr *instr = ralloc(shader, nir_call_instr);
+ instr_init(&instr->instr, nir_instr_type_call);
+
+ instr->callee = callee;
+ instr->num_params = callee->num_params;
+ instr->params = ralloc_array(instr, nir_deref_var *, instr->num_params);
+ instr->return_deref = NULL;
+
+ return instr;
+}
+
+nir_tex_instr *
+nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
+{
+ nir_tex_instr *instr = rzalloc(shader, nir_tex_instr);
+ instr_init(&instr->instr, nir_instr_type_tex);
+
+ dest_init(&instr->dest);
+
+ instr->num_srcs = num_srcs;
+ instr->src = ralloc_array(instr, nir_tex_src, num_srcs);
+ for (unsigned i = 0; i < num_srcs; i++)
+ src_init(&instr->src[i].src);
+
+ instr->sampler_index = 0;
+ instr->sampler_array_size = 0;
+ instr->sampler = NULL;
+
+ return instr;
+}
+
+nir_phi_instr *
+nir_phi_instr_create(nir_shader *shader)
+{
+ nir_phi_instr *instr = ralloc(shader, nir_phi_instr);
+ instr_init(&instr->instr, nir_instr_type_phi);
+
+ dest_init(&instr->dest);
+ exec_list_make_empty(&instr->srcs);
+ return instr;
+}
+
+nir_parallel_copy_instr *
+nir_parallel_copy_instr_create(nir_shader *shader)
+{
+ nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr);
+ instr_init(&instr->instr, nir_instr_type_parallel_copy);
+
+ exec_list_make_empty(&instr->entries);
+
+ return instr;
+}
+
+nir_ssa_undef_instr *
+nir_ssa_undef_instr_create(nir_shader *shader, unsigned num_components)
+{
+ nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr);
+ instr_init(&instr->instr, nir_instr_type_ssa_undef);
+
+ nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
+
+ return instr;
+}
+
+nir_deref_var *
+nir_deref_var_create(void *mem_ctx, nir_variable *var)
+{
+ nir_deref_var *deref = ralloc(mem_ctx, nir_deref_var);
+ deref->deref.deref_type = nir_deref_type_var;
+ deref->deref.child = NULL;
+ deref->deref.type = var->type;
+ deref->var = var;
+ return deref;
+}
+
+nir_deref_array *
+nir_deref_array_create(void *mem_ctx)
+{
+ nir_deref_array *deref = ralloc(mem_ctx, nir_deref_array);
+ deref->deref.deref_type = nir_deref_type_array;
+ deref->deref.child = NULL;
+ deref->deref_array_type = nir_deref_array_type_direct;
+ src_init(&deref->indirect);
+ deref->base_offset = 0;
+ return deref;
+}
+
+nir_deref_struct *
+nir_deref_struct_create(void *mem_ctx, unsigned field_index)
+{
+ nir_deref_struct *deref = ralloc(mem_ctx, nir_deref_struct);
+ deref->deref.deref_type = nir_deref_type_struct;
+ deref->deref.child = NULL;
+ deref->index = field_index;
+ return deref;
+}
+
+static nir_deref_var *
+copy_deref_var(void *mem_ctx, nir_deref_var *deref)
+{
+ nir_deref_var *ret = nir_deref_var_create(mem_ctx, deref->var);
+ ret->deref.type = deref->deref.type;
+ if (deref->deref.child)
+ ret->deref.child = nir_copy_deref(ret, deref->deref.child);
+ return ret;
+}
+
+static nir_deref_array *
+copy_deref_array(void *mem_ctx, nir_deref_array *deref)
+{
+ nir_deref_array *ret = nir_deref_array_create(mem_ctx);
+ ret->base_offset = deref->base_offset;
+ ret->deref_array_type = deref->deref_array_type;
+ if (deref->deref_array_type == nir_deref_array_type_indirect) {
+ nir_src_copy(&ret->indirect, &deref->indirect, mem_ctx);
+ }
+ ret->deref.type = deref->deref.type;
+ if (deref->deref.child)
+ ret->deref.child = nir_copy_deref(ret, deref->deref.child);
+ return ret;
+}
+
+static nir_deref_struct *
+copy_deref_struct(void *mem_ctx, nir_deref_struct *deref)
+{
+ nir_deref_struct *ret = nir_deref_struct_create(mem_ctx, deref->index);
+ ret->deref.type = deref->deref.type;
+ if (deref->deref.child)
+ ret->deref.child = nir_copy_deref(ret, deref->deref.child);
+ return ret;
+}
+
+nir_deref *
+nir_copy_deref(void *mem_ctx, nir_deref *deref)
+{
+ switch (deref->deref_type) {
+ case nir_deref_type_var:
+ return &copy_deref_var(mem_ctx, nir_deref_as_var(deref))->deref;
+ case nir_deref_type_array:
+ return &copy_deref_array(mem_ctx, nir_deref_as_array(deref))->deref;
+ case nir_deref_type_struct:
+ return &copy_deref_struct(mem_ctx, nir_deref_as_struct(deref))->deref;
+ default:
+ unreachable("Invalid dereference type");
+ }
+
+ return NULL;
+}
+
+/* Returns a load_const instruction that represents the constant
+ * initializer for the given deref chain. The caller is responsible for
+ * ensuring that there actually is a constant initializer.
+ */
+nir_load_const_instr *
+nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref)
+{
+ nir_constant *constant = deref->var->constant_initializer;
+ assert(constant);
+
+ const nir_deref *tail = &deref->deref;
+ unsigned matrix_offset = 0;
+ while (tail->child) {
+ switch (tail->child->deref_type) {
+ case nir_deref_type_array: {
+ nir_deref_array *arr = nir_deref_as_array(tail->child);
+ assert(arr->deref_array_type == nir_deref_array_type_direct);
+ if (glsl_type_is_matrix(tail->type)) {
+ assert(arr->deref.child == NULL);
+ matrix_offset = arr->base_offset;
+ } else {
+ constant = constant->elements[arr->base_offset];
+ }
+ break;
+ }
+
+ case nir_deref_type_struct: {
+ constant = constant->elements[nir_deref_as_struct(tail->child)->index];
+ break;
+ }
+
+ default:
+ unreachable("Invalid deref child type");
+ }
+
+ tail = tail->child;
+ }
+
+ nir_load_const_instr *load =
+ nir_load_const_instr_create(shader, glsl_get_vector_elements(tail->type));
+
+ matrix_offset *= load->def.num_components;
+ for (unsigned i = 0; i < load->def.num_components; i++) {
+ switch (glsl_get_base_type(tail->type)) {
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_UINT:
+ load->value.u[i] = constant->value.u[matrix_offset + i];
+ break;
+ case GLSL_TYPE_BOOL:
+ load->value.u[i] = constant->value.b[matrix_offset + i] ?
+ NIR_TRUE : NIR_FALSE;
+ break;
+ default:
+ unreachable("Invalid immediate type");
+ }
+ }
+
+ return load;
+}
+
+nir_function_impl *
+nir_cf_node_get_function(nir_cf_node *node)
+{
+ while (node->type != nir_cf_node_function) {
+ node = node->parent;
+ }
+
+ return nir_cf_node_as_function(node);
+}
+
+static bool
+add_use_cb(nir_src *src, void *state)
+{
+ nir_instr *instr = state;
+
+ src->parent_instr = instr;
+ list_addtail(&src->use_link,
+ src->is_ssa ? &src->ssa->uses : &src->reg.reg->uses);
+
+ return true;
+}
+
+static bool
+add_ssa_def_cb(nir_ssa_def *def, void *state)
+{
+ nir_instr *instr = state;
+
+ if (instr->block && def->index == UINT_MAX) {
+ nir_function_impl *impl =
+ nir_cf_node_get_function(&instr->block->cf_node);
+
+ def->index = impl->ssa_alloc++;
+ }
+
+ return true;
+}
+
+static bool
+add_reg_def_cb(nir_dest *dest, void *state)
+{
+ nir_instr *instr = state;
+
+ if (!dest->is_ssa) {
+ dest->reg.parent_instr = instr;
+ list_addtail(&dest->reg.def_link, &dest->reg.reg->defs);
+ }
+
+ return true;
+}
+
+static void
+add_defs_uses(nir_instr *instr)
+{
+ nir_foreach_src(instr, add_use_cb, instr);
+ nir_foreach_dest(instr, add_reg_def_cb, instr);
+ nir_foreach_ssa_def(instr, add_ssa_def_cb, instr);
+}
+
+void
+nir_instr_insert(nir_cursor cursor, nir_instr *instr)
+{
+ switch (cursor.option) {
+ case nir_cursor_before_block:
+ /* Only allow inserting jumps into empty blocks. */
+ if (instr->type == nir_instr_type_jump)
+ assert(exec_list_is_empty(&cursor.block->instr_list));
+
+ instr->block = cursor.block;
+ add_defs_uses(instr);
+ exec_list_push_head(&cursor.block->instr_list, &instr->node);
+ break;
+ case nir_cursor_after_block: {
+ /* Inserting instructions after a jump is illegal. */
+ nir_instr *last = nir_block_last_instr(cursor.block);
+ assert(last == NULL || last->type != nir_instr_type_jump);
+ (void) last;
+
+ instr->block = cursor.block;
+ add_defs_uses(instr);
+ exec_list_push_tail(&cursor.block->instr_list, &instr->node);
+ break;
+ }
+ case nir_cursor_before_instr:
+ assert(instr->type != nir_instr_type_jump);
+ instr->block = cursor.instr->block;
+ add_defs_uses(instr);
+ exec_node_insert_node_before(&cursor.instr->node, &instr->node);
+ break;
+ case nir_cursor_after_instr:
+ /* Inserting instructions after a jump is illegal. */
+ assert(cursor.instr->type != nir_instr_type_jump);
+
+ /* Only allow inserting jumps at the end of the block. */
+ if (instr->type == nir_instr_type_jump)
+ assert(cursor.instr == nir_block_last_instr(cursor.instr->block));
+
+ instr->block = cursor.instr->block;
+ add_defs_uses(instr);
+ exec_node_insert_after(&cursor.instr->node, &instr->node);
+ break;
+ }
+
+ if (instr->type == nir_instr_type_jump)
+ nir_handle_add_jump(instr->block);
+}
+
+static bool
+src_is_valid(const nir_src *src)
+{
+ return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL);
+}
+
+static bool
+remove_use_cb(nir_src *src, void *state)
+{
+ if (src_is_valid(src))
+ list_del(&src->use_link);
+
+ return true;
+}
+
+static bool
+remove_def_cb(nir_dest *dest, void *state)
+{
+ if (!dest->is_ssa)
+ list_del(&dest->reg.def_link);
+
+ return true;
+}
+
+static void
+remove_defs_uses(nir_instr *instr)
+{
+ nir_foreach_dest(instr, remove_def_cb, instr);
+ nir_foreach_src(instr, remove_use_cb, instr);
+}
+
+void nir_instr_remove(nir_instr *instr)
+{
+ remove_defs_uses(instr);
+ exec_node_remove(&instr->node);
+
+ if (instr->type == nir_instr_type_jump) {
+ nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
+ nir_handle_remove_jump(instr->block, jump_instr->type);
+ }
+}
+
+/*@}*/
+
+void
+nir_index_local_regs(nir_function_impl *impl)
+{
+ unsigned index = 0;
+ foreach_list_typed(nir_register, reg, node, &impl->registers) {
+ reg->index = index++;
+ }
+ impl->reg_alloc = index;
+}
+
+void
+nir_index_global_regs(nir_shader *shader)
+{
+ unsigned index = 0;
+ foreach_list_typed(nir_register, reg, node, &shader->registers) {
+ reg->index = index++;
+ }
+ shader->reg_alloc = index;
+}
+
+static bool
+visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb cb, void *state)
+{
+ return cb(&instr->dest.dest, state);
+}
+
+static bool
+visit_intrinsic_dest(nir_intrinsic_instr *instr, nir_foreach_dest_cb cb,
+ void *state)
+{
+ if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+ return cb(&instr->dest, state);
+
+ return true;
+}
+
+static bool
+visit_texture_dest(nir_tex_instr *instr, nir_foreach_dest_cb cb,
+ void *state)
+{
+ return cb(&instr->dest, state);
+}
+
+static bool
+visit_phi_dest(nir_phi_instr *instr, nir_foreach_dest_cb cb, void *state)
+{
+ return cb(&instr->dest, state);
+}
+
+static bool
+visit_parallel_copy_dest(nir_parallel_copy_instr *instr,
+ nir_foreach_dest_cb cb, void *state)
+{
+ nir_foreach_parallel_copy_entry(instr, entry) {
+ if (!cb(&entry->dest, state))
+ return false;
+ }
+
+ return true;
+}
+
+bool
+nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state)
+{
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ return visit_alu_dest(nir_instr_as_alu(instr), cb, state);
+ case nir_instr_type_intrinsic:
+ return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state);
+ case nir_instr_type_tex:
+ return visit_texture_dest(nir_instr_as_tex(instr), cb, state);
+ case nir_instr_type_phi:
+ return visit_phi_dest(nir_instr_as_phi(instr), cb, state);
+ case nir_instr_type_parallel_copy:
+ return visit_parallel_copy_dest(nir_instr_as_parallel_copy(instr),
+ cb, state);
+
+ case nir_instr_type_load_const:
+ case nir_instr_type_ssa_undef:
+ case nir_instr_type_call:
+ case nir_instr_type_jump:
+ break;
+
+ default:
+ unreachable("Invalid instruction type");
+ break;
+ }
+
+ return true;
+}
+
+struct foreach_ssa_def_state {
+ nir_foreach_ssa_def_cb cb;
+ void *client_state;
+};
+
+static inline bool
+nir_ssa_def_visitor(nir_dest *dest, void *void_state)
+{
+ struct foreach_ssa_def_state *state = void_state;
+
+ if (dest->is_ssa)
+ return state->cb(&dest->ssa, state->client_state);
+ else
+ return true;
+}
+
+bool
+nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state)
+{
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ case nir_instr_type_tex:
+ case nir_instr_type_intrinsic:
+ case nir_instr_type_phi:
+ case nir_instr_type_parallel_copy: {
+ struct foreach_ssa_def_state foreach_state = {cb, state};
+ return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state);
+ }
+
+ case nir_instr_type_load_const:
+ return cb(&nir_instr_as_load_const(instr)->def, state);
+ case nir_instr_type_ssa_undef:
+ return cb(&nir_instr_as_ssa_undef(instr)->def, state);
+ case nir_instr_type_call:
+ case nir_instr_type_jump:
+ return true;
+ default:
+ unreachable("Invalid instruction type");
+ }
+}
+
+static bool
+visit_src(nir_src *src, nir_foreach_src_cb cb, void *state)
+{
+ if (!cb(src, state))
+ return false;
+ if (!src->is_ssa && src->reg.indirect)
+ return cb(src->reg.indirect, state);
+ return true;
+}
+
+static bool
+visit_deref_array_src(nir_deref_array *deref, nir_foreach_src_cb cb,
+ void *state)
+{
+ if (deref->deref_array_type == nir_deref_array_type_indirect)
+ return visit_src(&deref->indirect, cb, state);
+ return true;
+}
+
+static bool
+visit_deref_src(nir_deref_var *deref, nir_foreach_src_cb cb, void *state)
+{
+ nir_deref *cur = &deref->deref;
+ while (cur != NULL) {
+ if (cur->deref_type == nir_deref_type_array)
+ if (!visit_deref_array_src(nir_deref_as_array(cur), cb, state))
+ return false;
+
+ cur = cur->child;
+ }
+
+ return true;
+}
+
+static bool
+visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state)
+{
+ for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
+ if (!visit_src(&instr->src[i].src, cb, state))
+ return false;
+
+ return true;
+}
+
+static bool
+visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state)
+{
+ for (unsigned i = 0; i < instr->num_srcs; i++)
+ if (!visit_src(&instr->src[i].src, cb, state))
+ return false;
+
+ if (instr->sampler != NULL)
+ if (!visit_deref_src(instr->sampler, cb, state))
+ return false;
+
+ return true;
+}
+
+static bool
+visit_intrinsic_src(nir_intrinsic_instr *instr, nir_foreach_src_cb cb,
+ void *state)
+{
+ unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
+ for (unsigned i = 0; i < num_srcs; i++)
+ if (!visit_src(&instr->src[i], cb, state))
+ return false;
+
+ unsigned num_vars =
+ nir_intrinsic_infos[instr->intrinsic].num_variables;
+ for (unsigned i = 0; i < num_vars; i++)
+ if (!visit_deref_src(instr->variables[i], cb, state))
+ return false;
+
+ return true;
+}
+
+static bool
+visit_call_src(nir_call_instr *instr, nir_foreach_src_cb cb, void *state)
+{
+ return true;
+}
+
+static bool
+visit_load_const_src(nir_load_const_instr *instr, nir_foreach_src_cb cb,
+ void *state)
+{
+ return true;
+}
+
+static bool
+visit_phi_src(nir_phi_instr *instr, nir_foreach_src_cb cb, void *state)
+{
+ nir_foreach_phi_src(instr, src) {
+ if (!visit_src(&src->src, cb, state))
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+visit_parallel_copy_src(nir_parallel_copy_instr *instr,
+ nir_foreach_src_cb cb, void *state)
+{
+ nir_foreach_parallel_copy_entry(instr, entry) {
+ if (!visit_src(&entry->src, cb, state))
+ return false;
+ }
+
+ return true;
+}
+
+typedef struct {
+ void *state;
+ nir_foreach_src_cb cb;
+} visit_dest_indirect_state;
+
+static bool
+visit_dest_indirect(nir_dest *dest, void *_state)
+{
+ visit_dest_indirect_state *state = (visit_dest_indirect_state *) _state;
+
+ if (!dest->is_ssa && dest->reg.indirect)
+ return state->cb(dest->reg.indirect, state->state);
+
+ return true;
+}
+
+bool
+nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state)
+{
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ if (!visit_alu_src(nir_instr_as_alu(instr), cb, state))
+ return false;
+ break;
+ case nir_instr_type_intrinsic:
+ if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state))
+ return false;
+ break;
+ case nir_instr_type_tex:
+ if (!visit_tex_src(nir_instr_as_tex(instr), cb, state))
+ return false;
+ break;
+ case nir_instr_type_call:
+ if (!visit_call_src(nir_instr_as_call(instr), cb, state))
+ return false;
+ break;
+ case nir_instr_type_load_const:
+ if (!visit_load_const_src(nir_instr_as_load_const(instr), cb, state))
+ return false;
+ break;
+ case nir_instr_type_phi:
+ if (!visit_phi_src(nir_instr_as_phi(instr), cb, state))
+ return false;
+ break;
+ case nir_instr_type_parallel_copy:
+ if (!visit_parallel_copy_src(nir_instr_as_parallel_copy(instr),
+ cb, state))
+ return false;
+ break;
+ case nir_instr_type_jump:
+ case nir_instr_type_ssa_undef:
+ return true;
+
+ default:
+ unreachable("Invalid instruction type");
+ break;
+ }
+
+ visit_dest_indirect_state dest_state;
+ dest_state.state = state;
+ dest_state.cb = cb;
+ return nir_foreach_dest(instr, visit_dest_indirect, &dest_state);
+}
+
+nir_const_value *
+nir_src_as_const_value(nir_src src)
+{
+ if (!src.is_ssa)
+ return NULL;
+
+ if (src.ssa->parent_instr->type != nir_instr_type_load_const)
+ return NULL;
+
+ nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
+
+ return &load->value;
+}
+
+/**
+ * Returns true if the source is known to be dynamically uniform. Otherwise it
+ * returns false which means it may or may not be dynamically uniform but it
+ * can't be determined.
+ */
+bool
+nir_src_is_dynamically_uniform(nir_src src)
+{
+ if (!src.is_ssa)
+ return false;
+
+ /* Constants are trivially dynamically uniform */
+ if (src.ssa->parent_instr->type == nir_instr_type_load_const)
+ return true;
+
+ /* As are uniform variables */
+ if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr);
+
+ if (intr->intrinsic == nir_intrinsic_load_uniform)
+ return true;
+ }
+
+ /* XXX: this could have many more tests, such as when a sampler function is
+ * called with dynamically uniform arguments.
+ */
+ return false;
+}
+
+static void
+src_remove_all_uses(nir_src *src)
+{
+ for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
+ if (!src_is_valid(src))
+ continue;
+
+ list_del(&src->use_link);
+ }
+}
+
+static void
+src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
+{
+ for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
+ if (!src_is_valid(src))
+ continue;
+
+ if (parent_instr) {
+ src->parent_instr = parent_instr;
+ if (src->is_ssa)
+ list_addtail(&src->use_link, &src->ssa->uses);
+ else
+ list_addtail(&src->use_link, &src->reg.reg->uses);
+ } else {
+ assert(parent_if);
+ src->parent_if = parent_if;
+ if (src->is_ssa)
+ list_addtail(&src->use_link, &src->ssa->if_uses);
+ else
+ list_addtail(&src->use_link, &src->reg.reg->if_uses);
+ }
+ }
+}
+
+void
+nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src)
+{
+ assert(!src_is_valid(src) || src->parent_instr == instr);
+
+ src_remove_all_uses(src);
+ *src = new_src;
+ src_add_all_uses(src, instr, NULL);
+}
+
+void
+nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src)
+{
+ assert(!src_is_valid(dest) || dest->parent_instr == dest_instr);
+
+ src_remove_all_uses(dest);
+ src_remove_all_uses(src);
+ *dest = *src;
+ *src = NIR_SRC_INIT;
+ src_add_all_uses(dest, dest_instr, NULL);
+}
+
+void
+nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src)
+{
+ nir_src *src = &if_stmt->condition;
+ assert(!src_is_valid(src) || src->parent_if == if_stmt);
+
+ src_remove_all_uses(src);
+ *src = new_src;
+ src_add_all_uses(src, NULL, if_stmt);
+}
+
+void
+nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest)
+{
+ if (dest->is_ssa) {
+ /* We can only overwrite an SSA destination if it has no uses. */
+ assert(list_empty(&dest->ssa.uses) && list_empty(&dest->ssa.if_uses));
+ } else {
+ list_del(&dest->reg.def_link);
+ if (dest->reg.indirect)
+ src_remove_all_uses(dest->reg.indirect);
+ }
+
+ /* We can't re-write with an SSA def */
+ assert(!new_dest.is_ssa);
+
+ nir_dest_copy(dest, &new_dest, instr);
+
+ dest->reg.parent_instr = instr;
+ list_addtail(&dest->reg.def_link, &new_dest.reg.reg->defs);
+
+ if (dest->reg.indirect)
+ src_add_all_uses(dest->reg.indirect, instr, NULL);
+}
+
+void
+nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
+ unsigned num_components, const char *name)
+{
+ def->name = name;
+ def->parent_instr = instr;
+ list_inithead(&def->uses);
+ list_inithead(&def->if_uses);
+ def->num_components = num_components;
+
+ if (instr->block) {
+ nir_function_impl *impl =
+ nir_cf_node_get_function(&instr->block->cf_node);
+
+ def->index = impl->ssa_alloc++;
+ } else {
+ def->index = UINT_MAX;
+ }
+}
+
+void
+nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
+ unsigned num_components, const char *name)
+{
+ dest->is_ssa = true;
+ nir_ssa_def_init(instr, &dest->ssa, num_components, name);
+}
+
+void
+nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src)
+{
+ assert(!new_src.is_ssa || def != new_src.ssa);
+
+ nir_foreach_use_safe(def, use_src)
+ nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
+
+ nir_foreach_if_use_safe(def, use_src)
+ nir_if_rewrite_condition(use_src->parent_if, new_src);
+}
+
+static bool
+is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
+{
+ assert(start->block == end->block);
+
+ if (between->block != start->block)
+ return false;
+
+ /* Search backwards looking for "between" */
+ while (start != end) {
+ if (between == end)
+ return true;
+
+ end = nir_instr_prev(end);
+ assert(end);
+ }
+
+ return false;
+}
+
+/* Replaces all uses of the given SSA def with the given source but only if
+ * the use comes after the after_me instruction. This can be useful if you
+ * are emitting code to fix up the result of some instruction: you can freely
+ * use the result in that code and then call rewrite_uses_after and pass the
+ * last fixup instruction as after_me and it will replace all of the uses you
+ * want without touching the fixup code.
+ *
+ * This function assumes that after_me is in the same block as
+ * def->parent_instr and that after_me comes after def->parent_instr.
+ */
+void
+nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
+ nir_instr *after_me)
+{
+ assert(!new_src.is_ssa || def != new_src.ssa);
+
+ nir_foreach_use_safe(def, use_src) {
+ assert(use_src->parent_instr != def->parent_instr);
+ /* Since def already dominates all of its uses, the only way a use can
+ * not be dominated by after_me is if it is between def and after_me in
+ * the instruction list.
+ */
+ if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr))
+ nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
+ }
+
+ nir_foreach_if_use_safe(def, use_src)
+ nir_if_rewrite_condition(use_src->parent_if, new_src);
+}
+
+static bool foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
+ bool reverse, void *state);
+
+static inline bool
+foreach_if(nir_if *if_stmt, nir_foreach_block_cb cb, bool reverse, void *state)
+{
+ if (reverse) {
+ foreach_list_typed_reverse_safe(nir_cf_node, node, node,
+ &if_stmt->else_list) {
+ if (!foreach_cf_node(node, cb, reverse, state))
+ return false;
+ }
+
+ foreach_list_typed_reverse_safe(nir_cf_node, node, node,
+ &if_stmt->then_list) {
+ if (!foreach_cf_node(node, cb, reverse, state))
+ return false;
+ }
+ } else {
+ foreach_list_typed_safe(nir_cf_node, node, node, &if_stmt->then_list) {
+ if (!foreach_cf_node(node, cb, reverse, state))
+ return false;
+ }
+
+ foreach_list_typed_safe(nir_cf_node, node, node, &if_stmt->else_list) {
+ if (!foreach_cf_node(node, cb, reverse, state))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static inline bool
+foreach_loop(nir_loop *loop, nir_foreach_block_cb cb, bool reverse, void *state)
+{
+ if (reverse) {
+ foreach_list_typed_reverse_safe(nir_cf_node, node, node, &loop->body) {
+ if (!foreach_cf_node(node, cb, reverse, state))
+ return false;
+ }
+ } else {
+ foreach_list_typed_safe(nir_cf_node, node, node, &loop->body) {
+ if (!foreach_cf_node(node, cb, reverse, state))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool
+foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
+ bool reverse, void *state)
+{
+ switch (node->type) {
+ case nir_cf_node_block:
+ return cb(nir_cf_node_as_block(node), state);
+ case nir_cf_node_if:
+ return foreach_if(nir_cf_node_as_if(node), cb, reverse, state);
+ case nir_cf_node_loop:
+ return foreach_loop(nir_cf_node_as_loop(node), cb, reverse, state);
+ break;
+
+ default:
+ unreachable("Invalid CFG node type");
+ break;
+ }
+
+ return false;
+}
+
+bool
+nir_foreach_block_in_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
+ void *state)
+{
+ return foreach_cf_node(node, cb, false, state);
+}
+
+bool
+nir_foreach_block(nir_function_impl *impl, nir_foreach_block_cb cb, void *state)
+{
+ foreach_list_typed_safe(nir_cf_node, node, node, &impl->body) {
+ if (!foreach_cf_node(node, cb, false, state))
+ return false;
+ }
+
+ return cb(impl->end_block, state);
+}
+
+bool
+nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb,
+ void *state)
+{
+ if (!cb(impl->end_block, state))
+ return false;
+
+ foreach_list_typed_reverse_safe(nir_cf_node, node, node, &impl->body) {
+ if (!foreach_cf_node(node, cb, true, state))
+ return false;
+ }
+
+ return true;
+}
+
+nir_if *
+nir_block_get_following_if(nir_block *block)
+{
+ if (exec_node_is_tail_sentinel(&block->cf_node.node))
+ return NULL;
+
+ if (nir_cf_node_is_last(&block->cf_node))
+ return NULL;
+
+ nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
+
+ if (next_node->type != nir_cf_node_if)
+ return NULL;
+
+ return nir_cf_node_as_if(next_node);
+}
+
+nir_loop *
+nir_block_get_following_loop(nir_block *block)
+{
+ if (exec_node_is_tail_sentinel(&block->cf_node.node))
+ return NULL;
+
+ if (nir_cf_node_is_last(&block->cf_node))
+ return NULL;
+
+ nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
+
+ if (next_node->type != nir_cf_node_loop)
+ return NULL;
+
+ return nir_cf_node_as_loop(next_node);
+}
+static bool
+index_block(nir_block *block, void *state)
+{
+ unsigned *index = state;
+ block->index = (*index)++;
+ return true;
+}
+
+void
+nir_index_blocks(nir_function_impl *impl)
+{
+ unsigned index = 0;
+
+ if (impl->valid_metadata & nir_metadata_block_index)
+ return;
+
+ nir_foreach_block(impl, index_block, &index);
+
+ impl->num_blocks = index;
+}
+
+static bool
+index_ssa_def_cb(nir_ssa_def *def, void *state)
+{
+ unsigned *index = (unsigned *) state;
+ def->index = (*index)++;
+
+ return true;
+}
+
+static bool
+index_ssa_block(nir_block *block, void *state)
+{
+ nir_foreach_instr(block, instr)
+ nir_foreach_ssa_def(instr, index_ssa_def_cb, state);
+
+ return true;
+}
+
+/**
+ * The indices are applied top-to-bottom which has the very nice property
+ * that, if A dominates B, then A->index <= B->index.
+ */
+void
+nir_index_ssa_defs(nir_function_impl *impl)
+{
+ unsigned index = 0;
+ nir_foreach_block(impl, index_ssa_block, &index);
+ impl->ssa_alloc = index;
+}
+
+static bool
+index_instrs_block(nir_block *block, void *state)
+{
+ unsigned *index = state;
+ nir_foreach_instr(block, instr)
+ instr->index = (*index)++;
+
+ return true;
+}
+
+/**
+ * The indices are applied top-to-bottom which has the very nice property
+ * that, if A dominates B, then A->index <= B->index.
+ */
+unsigned
+nir_index_instrs(nir_function_impl *impl)
+{
+ unsigned index = 0;
+ nir_foreach_block(impl, index_instrs_block, &index);
+ return index;
+}
+
+nir_intrinsic_op
+nir_intrinsic_from_system_value(gl_system_value val)
+{
+ switch (val) {
+ case SYSTEM_VALUE_VERTEX_ID:
+ return nir_intrinsic_load_vertex_id;
+ case SYSTEM_VALUE_INSTANCE_ID:
+ return nir_intrinsic_load_instance_id;
+ case SYSTEM_VALUE_DRAW_ID:
+ return nir_intrinsic_load_draw_id;
+ case SYSTEM_VALUE_BASE_INSTANCE:
+ return nir_intrinsic_load_base_instance;
+ case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
+ return nir_intrinsic_load_vertex_id_zero_base;
+ case SYSTEM_VALUE_BASE_VERTEX:
+ return nir_intrinsic_load_base_vertex;
+ case SYSTEM_VALUE_INVOCATION_ID:
+ return nir_intrinsic_load_invocation_id;
+ case SYSTEM_VALUE_FRONT_FACE:
+ return nir_intrinsic_load_front_face;
+ case SYSTEM_VALUE_SAMPLE_ID:
+ return nir_intrinsic_load_sample_id;
+ case SYSTEM_VALUE_SAMPLE_POS:
+ return nir_intrinsic_load_sample_pos;
+ case SYSTEM_VALUE_SAMPLE_MASK_IN:
+ return nir_intrinsic_load_sample_mask_in;
+ case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
+ return nir_intrinsic_load_local_invocation_id;
+ case SYSTEM_VALUE_WORK_GROUP_ID:
+ return nir_intrinsic_load_work_group_id;
+ case SYSTEM_VALUE_NUM_WORK_GROUPS:
+ return nir_intrinsic_load_num_work_groups;
+ case SYSTEM_VALUE_PRIMITIVE_ID:
+ return nir_intrinsic_load_primitive_id;
+ case SYSTEM_VALUE_TESS_COORD:
+ return nir_intrinsic_load_tess_coord;
+ case SYSTEM_VALUE_TESS_LEVEL_OUTER:
+ return nir_intrinsic_load_tess_level_outer;
+ case SYSTEM_VALUE_TESS_LEVEL_INNER:
+ return nir_intrinsic_load_tess_level_inner;
+ case SYSTEM_VALUE_VERTICES_IN:
+ return nir_intrinsic_load_patch_vertices_in;
+ case SYSTEM_VALUE_HELPER_INVOCATION:
+ return nir_intrinsic_load_helper_invocation;
+ default:
+ unreachable("system value does not directly correspond to intrinsic");
+ }
+}
+
+gl_system_value
+nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
+{
+ switch (intrin) {
+ case nir_intrinsic_load_vertex_id:
+ return SYSTEM_VALUE_VERTEX_ID;
+ case nir_intrinsic_load_instance_id:
+ return SYSTEM_VALUE_INSTANCE_ID;
+ case nir_intrinsic_load_draw_id:
+ return SYSTEM_VALUE_DRAW_ID;
+ case nir_intrinsic_load_base_instance:
+ return SYSTEM_VALUE_BASE_INSTANCE;
+ case nir_intrinsic_load_vertex_id_zero_base:
+ return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
+ case nir_intrinsic_load_base_vertex:
+ return SYSTEM_VALUE_BASE_VERTEX;
+ case nir_intrinsic_load_invocation_id:
+ return SYSTEM_VALUE_INVOCATION_ID;
+ case nir_intrinsic_load_front_face:
+ return SYSTEM_VALUE_FRONT_FACE;
+ case nir_intrinsic_load_sample_id:
+ return SYSTEM_VALUE_SAMPLE_ID;
+ case nir_intrinsic_load_sample_pos:
+ return SYSTEM_VALUE_SAMPLE_POS;
+ case nir_intrinsic_load_sample_mask_in:
+ return SYSTEM_VALUE_SAMPLE_MASK_IN;
+ case nir_intrinsic_load_local_invocation_id:
+ return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
+ case nir_intrinsic_load_num_work_groups:
+ return SYSTEM_VALUE_NUM_WORK_GROUPS;
+ case nir_intrinsic_load_work_group_id:
+ return SYSTEM_VALUE_WORK_GROUP_ID;
+ case nir_intrinsic_load_primitive_id:
+ return SYSTEM_VALUE_PRIMITIVE_ID;
+ case nir_intrinsic_load_tess_coord:
+ return SYSTEM_VALUE_TESS_COORD;
+ case nir_intrinsic_load_tess_level_outer:
+ return SYSTEM_VALUE_TESS_LEVEL_OUTER;
+ case nir_intrinsic_load_tess_level_inner:
+ return SYSTEM_VALUE_TESS_LEVEL_INNER;
+ case nir_intrinsic_load_patch_vertices_in:
+ return SYSTEM_VALUE_VERTICES_IN;
+ case nir_intrinsic_load_helper_invocation:
+ return SYSTEM_VALUE_HELPER_INVOCATION;
+ default:
+ unreachable("intrinsic doesn't produce a system value");
+ }
+}
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
new file mode 100644
index 00000000000..54e23eb4754
--- /dev/null
+++ b/src/compiler/nir/nir.h
@@ -0,0 +1,2111 @@
+/*
+ * Copyright © 2014 Connor Abbott
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott ([email protected])
+ *
+ */
+
+#pragma once
+
+#include "util/hash_table.h"
+#include "glsl/list.h"
+#include "GL/gl.h" /* GLenum */
+#include "util/list.h"
+#include "util/ralloc.h"
+#include "util/set.h"
+#include "util/bitset.h"
+#include "compiler/nir_types.h"
+#include "compiler/shader_enums.h"
+#include <stdio.h>
+
+#include "nir_opcodes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct gl_program;
+struct gl_shader_program;
+
+#define NIR_FALSE 0u
+#define NIR_TRUE (~0u)
+
+/** Defines a cast function
+ *
+ * This macro defines a cast function from in_type to out_type where
+ * out_type is some structure type that contains a field of type out_type.
+ *
+ * Note that you have to be a bit careful as the generated cast function
+ * destroys constness.
+ */
+#define NIR_DEFINE_CAST(name, in_type, out_type, field) \
+static inline out_type * \
+name(const in_type *parent) \
+{ \
+ return exec_node_data(out_type, parent, field); \
+}
+
+struct nir_function;
+struct nir_shader;
+struct nir_instr;
+
+
+/**
+ * Description of built-in state associated with a uniform
+ *
+ * \sa nir_variable::state_slots
+ */
+typedef struct {
+ int tokens[5];
+ int swizzle;
+} nir_state_slot;
+
+typedef enum {
+ nir_var_all = -1,
+ nir_var_shader_in,
+ nir_var_shader_out,
+ nir_var_global,
+ nir_var_local,
+ nir_var_uniform,
+ nir_var_shader_storage,
+ nir_var_system_value
+} nir_variable_mode;
+
+/**
+ * Data stored in an nir_constant
+ */
+union nir_constant_data {
+ unsigned u[16];
+ int i[16];
+ float f[16];
+ bool b[16];
+};
+
+typedef struct nir_constant {
+ /**
+ * Value of the constant.
+ *
+ * The field used to back the values supplied by the constant is determined
+ * by the type associated with the \c nir_variable. Constants may be
+ * scalars, vectors, or matrices.
+ */
+ union nir_constant_data value;
+
+ /* we could get this from the var->type but makes clone *much* easier to
+ * not have to care about the type.
+ */
+ unsigned num_elements;
+
+ /* Array elements / Structure Fields */
+ struct nir_constant **elements;
+} nir_constant;
+
+/**
+ * \brief Layout qualifiers for gl_FragDepth.
+ *
+ * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared
+ * with a layout qualifier.
+ */
+typedef enum {
+ nir_depth_layout_none, /**< No depth layout is specified. */
+ nir_depth_layout_any,
+ nir_depth_layout_greater,
+ nir_depth_layout_less,
+ nir_depth_layout_unchanged
+} nir_depth_layout;
+
+/**
+ * Either a uniform, global variable, shader input, or shader output. Based on
+ * ir_variable - it should be easy to translate between the two.
+ */
+
+typedef struct nir_variable {
+ struct exec_node node;
+
+ /**
+ * Declared type of the variable
+ */
+ const struct glsl_type *type;
+
+ /**
+ * Declared name of the variable
+ */
+ char *name;
+
+ struct nir_variable_data {
+
+ /**
+ * Is the variable read-only?
+ *
+ * This is set for variables declared as \c const, shader inputs,
+ * and uniforms.
+ */
+ unsigned read_only:1;
+ unsigned centroid:1;
+ unsigned sample:1;
+ unsigned patch:1;
+ unsigned invariant:1;
+
+ /**
+ * Storage class of the variable.
+ *
+ * \sa nir_variable_mode
+ */
+ nir_variable_mode mode:4;
+
+ /**
+ * Interpolation mode for shader inputs / outputs
+ *
+ * \sa glsl_interp_qualifier
+ */
+ unsigned interpolation:2;
+
+ /**
+ * \name ARB_fragment_coord_conventions
+ * @{
+ */
+ unsigned origin_upper_left:1;
+ unsigned pixel_center_integer:1;
+ /*@}*/
+
+ /**
+ * Was the location explicitly set in the shader?
+ *
+ * If the location is explicitly set in the shader, it \b cannot be changed
+ * by the linker or by the API (e.g., calls to \c glBindAttribLocation have
+ * no effect).
+ */
+ unsigned explicit_location:1;
+ unsigned explicit_index:1;
+
+ /**
+ * Was an initial binding explicitly set in the shader?
+ *
+ * If so, constant_initializer contains an integer nir_constant
+ * representing the initial binding point.
+ */
+ unsigned explicit_binding:1;
+
+ /**
+ * Does this variable have an initializer?
+ *
+ * This is used by the linker to cross-validiate initializers of global
+ * variables.
+ */
+ unsigned has_initializer:1;
+
+ /**
+ * If non-zero, then this variable may be packed along with other variables
+ * into a single varying slot, so this offset should be applied when
+ * accessing components. For example, an offset of 1 means that the x
+ * component of this variable is actually stored in component y of the
+ * location specified by \c location.
+ */
+ unsigned location_frac:2;
+
+ /**
+ * Non-zero if this variable was created by lowering a named interface
+ * block which was not an array.
+ *
+ * Note that this variable and \c from_named_ifc_block_array will never
+ * both be non-zero.
+ */
+ unsigned from_named_ifc_block_nonarray:1;
+
+ /**
+ * Non-zero if this variable was created by lowering a named interface
+ * block which was an array.
+ *
+ * Note that this variable and \c from_named_ifc_block_nonarray will never
+ * both be non-zero.
+ */
+ unsigned from_named_ifc_block_array:1;
+
+ /**
+ * \brief Layout qualifier for gl_FragDepth.
+ *
+ * This is not equal to \c ir_depth_layout_none if and only if this
+ * variable is \c gl_FragDepth and a layout qualifier is specified.
+ */
+ nir_depth_layout depth_layout;
+
+ /**
+ * Storage location of the base of this variable
+ *
+ * The precise meaning of this field depends on the nature of the variable.
+ *
+ * - Vertex shader input: one of the values from \c gl_vert_attrib.
+ * - Vertex shader output: one of the values from \c gl_varying_slot.
+ * - Geometry shader input: one of the values from \c gl_varying_slot.
+ * - Geometry shader output: one of the values from \c gl_varying_slot.
+ * - Fragment shader input: one of the values from \c gl_varying_slot.
+ * - Fragment shader output: one of the values from \c gl_frag_result.
+ * - Uniforms: Per-stage uniform slot number for default uniform block.
+ * - Uniforms: Index within the uniform block definition for UBO members.
+ * - Non-UBO Uniforms: uniform slot number.
+ * - Other: This field is not currently used.
+ *
+ * If the variable is a uniform, shader input, or shader output, and the
+ * slot has not been assigned, the value will be -1.
+ */
+ int location;
+
+ /**
+ * The actual location of the variable in the IR. Only valid for inputs
+ * and outputs.
+ */
+ unsigned int driver_location;
+
+ /**
+ * output index for dual source blending.
+ */
+ int index;
+
+ /**
+ * Initial binding point for a sampler or UBO.
+ *
+ * For array types, this represents the binding point for the first element.
+ */
+ int binding;
+
+ /**
+ * Location an atomic counter is stored at.
+ */
+ unsigned offset;
+
+ /**
+ * ARB_shader_image_load_store qualifiers.
+ */
+ struct {
+ bool read_only; /**< "readonly" qualifier. */
+ bool write_only; /**< "writeonly" qualifier. */
+ bool coherent;
+ bool _volatile;
+ bool restrict_flag;
+
+ /** Image internal format if specified explicitly, otherwise GL_NONE. */
+ GLenum format;
+ } image;
+
+ /**
+ * Highest element accessed with a constant expression array index
+ *
+ * Not used for non-array variables.
+ */
+ unsigned max_array_access;
+
+ } data;
+
+ /**
+ * Built-in state that backs this uniform
+ *
+ * Once set at variable creation, \c state_slots must remain invariant.
+ * This is because, ideally, this array would be shared by all clones of
+ * this variable in the IR tree. In other words, we'd really like for it
+ * to be a fly-weight.
+ *
+ * If the variable is not a uniform, \c num_state_slots will be zero and
+ * \c state_slots will be \c NULL.
+ */
+ /*@{*/
+ unsigned num_state_slots; /**< Number of state slots used */
+ nir_state_slot *state_slots; /**< State descriptors. */
+ /*@}*/
+
+ /**
+ * Constant expression assigned in the initializer of the variable
+ */
+ nir_constant *constant_initializer;
+
+ /**
+ * For variables that are in an interface block or are an instance of an
+ * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block.
+ *
+ * \sa ir_variable::location
+ */
+ const struct glsl_type *interface_type;
+} nir_variable;
+
+#define nir_foreach_variable(var, var_list) \
+ foreach_list_typed(nir_variable, var, node, var_list)
+
+typedef struct nir_register {
+ struct exec_node node;
+
+ unsigned num_components; /** < number of vector components */
+ unsigned num_array_elems; /** < size of array (0 for no array) */
+
+ /** generic register index. */
+ unsigned index;
+
+ /** only for debug purposes, can be NULL */
+ const char *name;
+
+ /** whether this register is local (per-function) or global (per-shader) */
+ bool is_global;
+
+ /**
+ * If this flag is set to true, then accessing channels >= num_components
+ * is well-defined, and simply spills over to the next array element. This
+ * is useful for backends that can do per-component accessing, in
+ * particular scalar backends. By setting this flag and making
+ * num_components equal to 1, structures can be packed tightly into
+ * registers and then registers can be accessed per-component to get to
+ * each structure member, even if it crosses vec4 boundaries.
+ */
+ bool is_packed;
+
+ /** set of nir_src's where this register is used (read from) */
+ struct list_head uses;
+
+ /** set of nir_dest's where this register is defined (written to) */
+ struct list_head defs;
+
+ /** set of nir_if's where this register is used as a condition */
+ struct list_head if_uses;
+} nir_register;
+
+typedef enum {
+ nir_instr_type_alu,
+ nir_instr_type_call,
+ nir_instr_type_tex,
+ nir_instr_type_intrinsic,
+ nir_instr_type_load_const,
+ nir_instr_type_jump,
+ nir_instr_type_ssa_undef,
+ nir_instr_type_phi,
+ nir_instr_type_parallel_copy,
+} nir_instr_type;
+
+typedef struct nir_instr {
+ struct exec_node node;
+ nir_instr_type type;
+ struct nir_block *block;
+
+ /** generic instruction index. */
+ unsigned index;
+
+ /* A temporary for optimization and analysis passes to use for storing
+ * flags. For instance, DCE uses this to store the "dead/live" info.
+ */
+ uint8_t pass_flags;
+} nir_instr;
+
+static inline nir_instr *
+nir_instr_next(nir_instr *instr)
+{
+ struct exec_node *next = exec_node_get_next(&instr->node);
+ if (exec_node_is_tail_sentinel(next))
+ return NULL;
+ else
+ return exec_node_data(nir_instr, next, node);
+}
+
+static inline nir_instr *
+nir_instr_prev(nir_instr *instr)
+{
+ struct exec_node *prev = exec_node_get_prev(&instr->node);
+ if (exec_node_is_head_sentinel(prev))
+ return NULL;
+ else
+ return exec_node_data(nir_instr, prev, node);
+}
+
+static inline bool
+nir_instr_is_first(nir_instr *instr)
+{
+ return exec_node_is_head_sentinel(exec_node_get_prev(&instr->node));
+}
+
+static inline bool
+nir_instr_is_last(nir_instr *instr)
+{
+ return exec_node_is_tail_sentinel(exec_node_get_next(&instr->node));
+}
+
+typedef struct nir_ssa_def {
+ /** for debugging only, can be NULL */
+ const char* name;
+
+ /** generic SSA definition index. */
+ unsigned index;
+
+ /** Index into the live_in and live_out bitfields */
+ unsigned live_index;
+
+ nir_instr *parent_instr;
+
+ /** set of nir_instr's where this register is used (read from) */
+ struct list_head uses;
+
+ /** set of nir_if's where this register is used as a condition */
+ struct list_head if_uses;
+
+ uint8_t num_components;
+} nir_ssa_def;
+
+struct nir_src;
+
+typedef struct {
+ nir_register *reg;
+ struct nir_src *indirect; /** < NULL for no indirect offset */
+ unsigned base_offset;
+
+ /* TODO use-def chain goes here */
+} nir_reg_src;
+
+typedef struct {
+ nir_instr *parent_instr;
+ struct list_head def_link;
+
+ nir_register *reg;
+ struct nir_src *indirect; /** < NULL for no indirect offset */
+ unsigned base_offset;
+
+ /* TODO def-use chain goes here */
+} nir_reg_dest;
+
+struct nir_if;
+
+typedef struct nir_src {
+ union {
+ nir_instr *parent_instr;
+ struct nir_if *parent_if;
+ };
+
+ struct list_head use_link;
+
+ union {
+ nir_reg_src reg;
+ nir_ssa_def *ssa;
+ };
+
+ bool is_ssa;
+} nir_src;
+
+#define NIR_SRC_INIT (nir_src) { { NULL } }
+
+#define nir_foreach_use(reg_or_ssa_def, src) \
+ list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link)
+
+#define nir_foreach_use_safe(reg_or_ssa_def, src) \
+ list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->uses, use_link)
+
+#define nir_foreach_if_use(reg_or_ssa_def, src) \
+ list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link)
+
+#define nir_foreach_if_use_safe(reg_or_ssa_def, src) \
+ list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link)
+
+typedef struct {
+ union {
+ nir_reg_dest reg;
+ nir_ssa_def ssa;
+ };
+
+ bool is_ssa;
+} nir_dest;
+
+#define NIR_DEST_INIT (nir_dest) { { { NULL } } }
+
+#define nir_foreach_def(reg, dest) \
+ list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link)
+
+#define nir_foreach_def_safe(reg, dest) \
+ list_for_each_entry_safe(nir_dest, dest, &(reg)->defs, reg.def_link)
+
+static inline nir_src
+nir_src_for_ssa(nir_ssa_def *def)
+{
+ nir_src src = NIR_SRC_INIT;
+
+ src.is_ssa = true;
+ src.ssa = def;
+
+ return src;
+}
+
+static inline nir_src
+nir_src_for_reg(nir_register *reg)
+{
+ nir_src src = NIR_SRC_INIT;
+
+ src.is_ssa = false;
+ src.reg.reg = reg;
+ src.reg.indirect = NULL;
+ src.reg.base_offset = 0;
+
+ return src;
+}
+
+static inline nir_dest
+nir_dest_for_reg(nir_register *reg)
+{
+ nir_dest dest = NIR_DEST_INIT;
+
+ dest.reg.reg = reg;
+
+ return dest;
+}
+
+void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if);
+void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr);
+
+typedef struct {
+ nir_src src;
+
+ /**
+ * \name input modifiers
+ */
+ /*@{*/
+ /**
+ * For inputs interpreted as floating point, flips the sign bit. For
+ * inputs interpreted as integers, performs the two's complement negation.
+ */
+ bool negate;
+
+ /**
+ * Clears the sign bit for floating point values, and computes the integer
+ * absolute value for integers. Note that the negate modifier acts after
+ * the absolute value modifier, therefore if both are set then all inputs
+ * will become negative.
+ */
+ bool abs;
+ /*@}*/
+
+ /**
+ * For each input component, says which component of the register it is
+ * chosen from. Note that which elements of the swizzle are used and which
+ * are ignored are based on the write mask for most opcodes - for example,
+ * a statement like "foo.xzw = bar.zyx" would have a writemask of 1101b and
+ * a swizzle of {2, x, 1, 0} where x means "don't care."
+ */
+ uint8_t swizzle[4];
+} nir_alu_src;
+
+typedef struct {
+ nir_dest dest;
+
+ /**
+ * \name saturate output modifier
+ *
+ * Only valid for opcodes that output floating-point numbers. Clamps the
+ * output to between 0.0 and 1.0 inclusive.
+ */
+
+ bool saturate;
+
+ unsigned write_mask : 4; /* ignored if dest.is_ssa is true */
+} nir_alu_dest;
+
+typedef enum {
+ nir_type_invalid = 0, /* Not a valid type */
+ nir_type_float,
+ nir_type_int,
+ nir_type_uint,
+ nir_type_bool
+} nir_alu_type;
+
+typedef enum {
+ NIR_OP_IS_COMMUTATIVE = (1 << 0),
+ NIR_OP_IS_ASSOCIATIVE = (1 << 1),
+} nir_op_algebraic_property;
+
+typedef struct {
+ const char *name;
+
+ unsigned num_inputs;
+
+ /**
+ * The number of components in the output
+ *
+ * If non-zero, this is the size of the output and input sizes are
+ * explicitly given; swizzle and writemask are still in effect, but if
+ * the output component is masked out, then the input component may
+ * still be in use.
+ *
+ * If zero, the opcode acts in the standard, per-component manner; the
+ * operation is performed on each component (except the ones that are
+ * masked out) with the input being taken from the input swizzle for
+ * that component.
+ *
+ * The size of some of the inputs may be given (i.e. non-zero) even
+ * though output_size is zero; in that case, the inputs with a zero
+ * size act per-component, while the inputs with non-zero size don't.
+ */
+ unsigned output_size;
+
+ /**
+ * The type of vector that the instruction outputs. Note that the
+ * staurate modifier is only allowed on outputs with the float type.
+ */
+
+ nir_alu_type output_type;
+
+ /**
+ * The number of components in each input
+ */
+ unsigned input_sizes[4];
+
+ /**
+ * The type of vector that each input takes. Note that negate and
+ * absolute value are only allowed on inputs with int or float type and
+ * behave differently on the two.
+ */
+ nir_alu_type input_types[4];
+
+ nir_op_algebraic_property algebraic_properties;
+} nir_op_info;
+
+extern const nir_op_info nir_op_infos[nir_num_opcodes];
+
+typedef struct nir_alu_instr {
+ nir_instr instr;
+ nir_op op;
+ nir_alu_dest dest;
+ nir_alu_src src[];
+} nir_alu_instr;
+
+void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
+ nir_alu_instr *instr);
+void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
+ nir_alu_instr *instr);
+
+/* is this source channel used? */
+static inline bool
+nir_alu_instr_channel_used(nir_alu_instr *instr, unsigned src, unsigned channel)
+{
+ if (nir_op_infos[instr->op].input_sizes[src] > 0)
+ return channel < nir_op_infos[instr->op].input_sizes[src];
+
+ return (instr->dest.write_mask >> channel) & 1;
+}
+
+/*
+ * For instructions whose destinations are SSA, get the number of channels
+ * used for a source
+ */
+static inline unsigned
+nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src)
+{
+ assert(instr->dest.dest.is_ssa);
+
+ if (nir_op_infos[instr->op].input_sizes[src] > 0)
+ return nir_op_infos[instr->op].input_sizes[src];
+
+ return instr->dest.dest.ssa.num_components;
+}
+
+typedef enum {
+ nir_deref_type_var,
+ nir_deref_type_array,
+ nir_deref_type_struct
+} nir_deref_type;
+
+typedef struct nir_deref {
+ nir_deref_type deref_type;
+ struct nir_deref *child;
+ const struct glsl_type *type;
+} nir_deref;
+
+typedef struct {
+ nir_deref deref;
+
+ nir_variable *var;
+} nir_deref_var;
+
+/* This enum describes how the array is referenced. If the deref is
+ * direct then the base_offset is used. If the deref is indirect then then
+ * offset is given by base_offset + indirect. If the deref is a wildcard
+ * then the deref refers to all of the elements of the array at the same
+ * time. Wildcard dereferences are only ever allowed in copy_var
+ * intrinsics and the source and destination derefs must have matching
+ * wildcards.
+ */
+typedef enum {
+ nir_deref_array_type_direct,
+ nir_deref_array_type_indirect,
+ nir_deref_array_type_wildcard,
+} nir_deref_array_type;
+
+typedef struct {
+ nir_deref deref;
+
+ nir_deref_array_type deref_array_type;
+ unsigned base_offset;
+ nir_src indirect;
+} nir_deref_array;
+
+typedef struct {
+ nir_deref deref;
+
+ unsigned index;
+} nir_deref_struct;
+
+NIR_DEFINE_CAST(nir_deref_as_var, nir_deref, nir_deref_var, deref)
+NIR_DEFINE_CAST(nir_deref_as_array, nir_deref, nir_deref_array, deref)
+NIR_DEFINE_CAST(nir_deref_as_struct, nir_deref, nir_deref_struct, deref)
+
+/* Returns the last deref in the chain. */
+static inline nir_deref *
+nir_deref_tail(nir_deref *deref)
+{
+ while (deref->child)
+ deref = deref->child;
+ return deref;
+}
+
+typedef struct {
+ nir_instr instr;
+
+ unsigned num_params;
+ nir_deref_var **params;
+ nir_deref_var *return_deref;
+
+ struct nir_function *callee;
+} nir_call_instr;
+
+#define INTRINSIC(name, num_srcs, src_components, has_dest, dest_components, \
+ num_variables, num_indices, flags) \
+ nir_intrinsic_##name,
+
+#define LAST_INTRINSIC(name) nir_last_intrinsic = nir_intrinsic_##name,
+
+typedef enum {
+#include "nir_intrinsics.h"
+ nir_num_intrinsics = nir_last_intrinsic + 1
+} nir_intrinsic_op;
+
+#undef INTRINSIC
+#undef LAST_INTRINSIC
+
+/** Represents an intrinsic
+ *
+ * An intrinsic is an instruction type for handling things that are
+ * more-or-less regular operations but don't just consume and produce SSA
+ * values like ALU operations do. Intrinsics are not for things that have
+ * special semantic meaning such as phi nodes and parallel copies.
+ * Examples of intrinsics include variable load/store operations, system
+ * value loads, and the like. Even though texturing more-or-less falls
+ * under this category, texturing is its own instruction type because
+ * trying to represent texturing with intrinsics would lead to a
+ * combinatorial explosion of intrinsic opcodes.
+ *
+ * By having a single instruction type for handling a lot of different
+ * cases, optimization passes can look for intrinsics and, for the most
+ * part, completely ignore them. Each intrinsic type also has a few
+ * possible flags that govern whether or not they can be reordered or
+ * eliminated. That way passes like dead code elimination can still work
+ * on intrisics without understanding the meaning of each.
+ *
+ * Each intrinsic has some number of constant indices, some number of
+ * variables, and some number of sources. What these sources, variables,
+ * and indices mean depends on the intrinsic and is documented with the
+ * intrinsic declaration in nir_intrinsics.h. Intrinsics and texture
+ * instructions are the only types of instruction that can operate on
+ * variables.
+ */
+typedef struct {
+ nir_instr instr;
+
+ nir_intrinsic_op intrinsic;
+
+ nir_dest dest;
+
+ /** number of components if this is a vectorized intrinsic
+ *
+ * Similarly to ALU operations, some intrinsics are vectorized.
+ * An intrinsic is vectorized if nir_intrinsic_infos.dest_components == 0.
+ * For vectorized intrinsics, the num_components field specifies the
+ * number of destination components and the number of source components
+ * for all sources with nir_intrinsic_infos.src_components[i] == 0.
+ */
+ uint8_t num_components;
+
+ int const_index[3];
+
+ nir_deref_var *variables[2];
+
+ nir_src src[];
+} nir_intrinsic_instr;
+
+/**
+ * \name NIR intrinsics semantic flags
+ *
+ * information about what the compiler can do with the intrinsics.
+ *
+ * \sa nir_intrinsic_info::flags
+ */
+typedef enum {
+ /**
+ * whether the intrinsic can be safely eliminated if none of its output
+ * value is not being used.
+ */
+ NIR_INTRINSIC_CAN_ELIMINATE = (1 << 0),
+
+ /**
+ * Whether the intrinsic can be reordered with respect to any other
+ * intrinsic, i.e. whether the only reordering dependencies of the
+ * intrinsic are due to the register reads/writes.
+ */
+ NIR_INTRINSIC_CAN_REORDER = (1 << 1),
+} nir_intrinsic_semantic_flag;
+
+#define NIR_INTRINSIC_MAX_INPUTS 4
+
+typedef struct {
+ const char *name;
+
+ unsigned num_srcs; /** < number of register/SSA inputs */
+
+ /** number of components of each input register
+ *
+ * If this value is 0, the number of components is given by the
+ * num_components field of nir_intrinsic_instr.
+ */
+ unsigned src_components[NIR_INTRINSIC_MAX_INPUTS];
+
+ bool has_dest;
+
+ /** number of components of the output register
+ *
+ * If this value is 0, the number of components is given by the
+ * num_components field of nir_intrinsic_instr.
+ */
+ unsigned dest_components;
+
+ /** the number of inputs/outputs that are variables */
+ unsigned num_variables;
+
+ /** the number of constant indices used by the intrinsic */
+ unsigned num_indices;
+
+ /** semantic flags for calls to this intrinsic */
+ nir_intrinsic_semantic_flag flags;
+} nir_intrinsic_info;
+
+extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics];
+
+/**
+ * \group texture information
+ *
+ * This gives semantic information about textures which is useful to the
+ * frontend, the backend, and lowering passes, but not the optimizer.
+ */
+
+typedef enum {
+ nir_tex_src_coord,
+ nir_tex_src_projector,
+ nir_tex_src_comparitor, /* shadow comparitor */
+ nir_tex_src_offset,
+ nir_tex_src_bias,
+ nir_tex_src_lod,
+ nir_tex_src_ms_index, /* MSAA sample index */
+ nir_tex_src_ddx,
+ nir_tex_src_ddy,
+ nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */
+ nir_num_tex_src_types
+} nir_tex_src_type;
+
+typedef struct {
+ nir_src src;
+ nir_tex_src_type src_type;
+} nir_tex_src;
+
+typedef enum {
+ nir_texop_tex, /**< Regular texture look-up */
+ nir_texop_txb, /**< Texture look-up with LOD bias */
+ nir_texop_txl, /**< Texture look-up with explicit LOD */
+ nir_texop_txd, /**< Texture look-up with partial derivatvies */
+ nir_texop_txf, /**< Texel fetch with explicit LOD */
+ nir_texop_txf_ms, /**< Multisample texture fetch */
+ nir_texop_txs, /**< Texture size */
+ nir_texop_lod, /**< Texture lod query */
+ nir_texop_tg4, /**< Texture gather */
+ nir_texop_query_levels, /**< Texture levels query */
+ nir_texop_texture_samples, /**< Texture samples query */
+ nir_texop_samples_identical, /**< Query whether all samples are definitely
+ * identical.
+ */
+} nir_texop;
+
+typedef struct {
+ nir_instr instr;
+
+ enum glsl_sampler_dim sampler_dim;
+ nir_alu_type dest_type;
+
+ nir_texop op;
+ nir_dest dest;
+ nir_tex_src *src;
+ unsigned num_srcs, coord_components;
+ bool is_array, is_shadow;
+
+ /**
+ * If is_shadow is true, whether this is the old-style shadow that outputs 4
+ * components or the new-style shadow that outputs 1 component.
+ */
+ bool is_new_style_shadow;
+
+ /* constant offset - must be 0 if the offset source is used */
+ int const_offset[4];
+
+ /* gather component selector */
+ unsigned component : 2;
+
+ /** The sampler index
+ *
+ * If this texture instruction has a nir_tex_src_sampler_offset source,
+ * then the sampler index is given by sampler_index + sampler_offset.
+ */
+ unsigned sampler_index;
+
+ /** The size of the sampler array or 0 if it's not an array */
+ unsigned sampler_array_size;
+
+ nir_deref_var *sampler; /* if this is NULL, use sampler_index instead */
+} nir_tex_instr;
+
+static inline unsigned
+nir_tex_instr_dest_size(nir_tex_instr *instr)
+{
+ switch (instr->op) {
+ case nir_texop_txs: {
+ unsigned ret;
+ switch (instr->sampler_dim) {
+ case GLSL_SAMPLER_DIM_1D:
+ case GLSL_SAMPLER_DIM_BUF:
+ ret = 1;
+ break;
+ case GLSL_SAMPLER_DIM_2D:
+ case GLSL_SAMPLER_DIM_CUBE:
+ case GLSL_SAMPLER_DIM_MS:
+ case GLSL_SAMPLER_DIM_RECT:
+ case GLSL_SAMPLER_DIM_EXTERNAL:
+ ret = 2;
+ break;
+ case GLSL_SAMPLER_DIM_3D:
+ ret = 3;
+ break;
+ default:
+ unreachable("not reached");
+ }
+ if (instr->is_array)
+ ret++;
+ return ret;
+ }
+
+ case nir_texop_lod:
+ return 2;
+
+ case nir_texop_texture_samples:
+ case nir_texop_query_levels:
+ case nir_texop_samples_identical:
+ return 1;
+
+ default:
+ if (instr->is_shadow && instr->is_new_style_shadow)
+ return 1;
+
+ return 4;
+ }
+}
+
+/* Returns true if this texture operation queries something about the texture
+ * rather than actually sampling it.
+ */
+static inline bool
+nir_tex_instr_is_query(nir_tex_instr *instr)
+{
+ switch (instr->op) {
+ case nir_texop_txs:
+ case nir_texop_lod:
+ case nir_texop_texture_samples:
+ case nir_texop_query_levels:
+ return true;
+ case nir_texop_tex:
+ case nir_texop_txb:
+ case nir_texop_txl:
+ case nir_texop_txd:
+ case nir_texop_txf:
+ case nir_texop_txf_ms:
+ case nir_texop_tg4:
+ return false;
+ default:
+ unreachable("Invalid texture opcode");
+ }
+}
+
+static inline unsigned
+nir_tex_instr_src_size(nir_tex_instr *instr, unsigned src)
+{
+ if (instr->src[src].src_type == nir_tex_src_coord)
+ return instr->coord_components;
+
+
+ if (instr->src[src].src_type == nir_tex_src_offset ||
+ instr->src[src].src_type == nir_tex_src_ddx ||
+ instr->src[src].src_type == nir_tex_src_ddy) {
+ if (instr->is_array)
+ return instr->coord_components - 1;
+ else
+ return instr->coord_components;
+ }
+
+ return 1;
+}
+
+static inline int
+nir_tex_instr_src_index(nir_tex_instr *instr, nir_tex_src_type type)
+{
+ for (unsigned i = 0; i < instr->num_srcs; i++)
+ if (instr->src[i].src_type == type)
+ return (int) i;
+
+ return -1;
+}
+
+typedef struct {
+ union {
+ float f[4];
+ int32_t i[4];
+ uint32_t u[4];
+ };
+} nir_const_value;
+
+typedef struct {
+ nir_instr instr;
+
+ nir_const_value value;
+
+ nir_ssa_def def;
+} nir_load_const_instr;
+
+typedef enum {
+ nir_jump_return,
+ nir_jump_break,
+ nir_jump_continue,
+} nir_jump_type;
+
+typedef struct {
+ nir_instr instr;
+ nir_jump_type type;
+} nir_jump_instr;
+
+/* creates a new SSA variable in an undefined state */
+
+typedef struct {
+ nir_instr instr;
+ nir_ssa_def def;
+} nir_ssa_undef_instr;
+
+typedef struct {
+ struct exec_node node;
+
+ /* The predecessor block corresponding to this source */
+ struct nir_block *pred;
+
+ nir_src src;
+} nir_phi_src;
+
+#define nir_foreach_phi_src(phi, entry) \
+ foreach_list_typed(nir_phi_src, entry, node, &(phi)->srcs)
+#define nir_foreach_phi_src_safe(phi, entry) \
+ foreach_list_typed_safe(nir_phi_src, entry, node, &(phi)->srcs)
+
+typedef struct {
+ nir_instr instr;
+
+ struct exec_list srcs; /** < list of nir_phi_src */
+
+ nir_dest dest;
+} nir_phi_instr;
+
+typedef struct {
+ struct exec_node node;
+ nir_src src;
+ nir_dest dest;
+} nir_parallel_copy_entry;
+
+#define nir_foreach_parallel_copy_entry(pcopy, entry) \
+ foreach_list_typed(nir_parallel_copy_entry, entry, node, &(pcopy)->entries)
+
+typedef struct {
+ nir_instr instr;
+
+ /* A list of nir_parallel_copy_entry's. The sources of all of the
+ * entries are copied to the corresponding destinations "in parallel".
+ * In other words, if we have two entries: a -> b and b -> a, the values
+ * get swapped.
+ */
+ struct exec_list entries;
+} nir_parallel_copy_instr;
+
+NIR_DEFINE_CAST(nir_instr_as_alu, nir_instr, nir_alu_instr, instr)
+NIR_DEFINE_CAST(nir_instr_as_call, nir_instr, nir_call_instr, instr)
+NIR_DEFINE_CAST(nir_instr_as_jump, nir_instr, nir_jump_instr, instr)
+NIR_DEFINE_CAST(nir_instr_as_tex, nir_instr, nir_tex_instr, instr)
+NIR_DEFINE_CAST(nir_instr_as_intrinsic, nir_instr, nir_intrinsic_instr, instr)
+NIR_DEFINE_CAST(nir_instr_as_load_const, nir_instr, nir_load_const_instr, instr)
+NIR_DEFINE_CAST(nir_instr_as_ssa_undef, nir_instr, nir_ssa_undef_instr, instr)
+NIR_DEFINE_CAST(nir_instr_as_phi, nir_instr, nir_phi_instr, instr)
+NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr,
+ nir_parallel_copy_instr, instr)
+
+/*
+ * Control flow
+ *
+ * Control flow consists of a tree of control flow nodes, which include
+ * if-statements and loops. The leaves of the tree are basic blocks, lists of
+ * instructions that always run start-to-finish. Each basic block also keeps
+ * track of its successors (blocks which may run immediately after the current
+ * block) and predecessors (blocks which could have run immediately before the
+ * current block). Each function also has a start block and an end block which
+ * all return statements point to (which is always empty). Together, all the
+ * blocks with their predecessors and successors make up the control flow
+ * graph (CFG) of the function. There are helpers that modify the tree of
+ * control flow nodes while modifying the CFG appropriately; these should be
+ * used instead of modifying the tree directly.
+ */
+
+typedef enum {
+ nir_cf_node_block,
+ nir_cf_node_if,
+ nir_cf_node_loop,
+ nir_cf_node_function
+} nir_cf_node_type;
+
+typedef struct nir_cf_node {
+ struct exec_node node;
+ nir_cf_node_type type;
+ struct nir_cf_node *parent;
+} nir_cf_node;
+
+typedef struct nir_block {
+ nir_cf_node cf_node;
+
+ struct exec_list instr_list; /** < list of nir_instr */
+
+ /** generic block index; generated by nir_index_blocks */
+ unsigned index;
+
+ /*
+ * Each block can only have up to 2 successors, so we put them in a simple
+ * array - no need for anything more complicated.
+ */
+ struct nir_block *successors[2];
+
+ /* Set of nir_block predecessors in the CFG */
+ struct set *predecessors;
+
+ /*
+ * this node's immediate dominator in the dominance tree - set to NULL for
+ * the start block.
+ */
+ struct nir_block *imm_dom;
+
+ /* This node's children in the dominance tree */
+ unsigned num_dom_children;
+ struct nir_block **dom_children;
+
+ /* Set of nir_block's on the dominance frontier of this block */
+ struct set *dom_frontier;
+
+ /*
+ * These two indices have the property that dom_{pre,post}_index for each
+ * child of this block in the dominance tree will always be between
+ * dom_pre_index and dom_post_index for this block, which makes testing if
+ * a given block is dominated by another block an O(1) operation.
+ */
+ unsigned dom_pre_index, dom_post_index;
+
+ /* live in and out for this block; used for liveness analysis */
+ BITSET_WORD *live_in;
+ BITSET_WORD *live_out;
+} nir_block;
+
+static inline nir_instr *
+nir_block_first_instr(nir_block *block)
+{
+ struct exec_node *head = exec_list_get_head(&block->instr_list);
+ return exec_node_data(nir_instr, head, node);
+}
+
+static inline nir_instr *
+nir_block_last_instr(nir_block *block)
+{
+ struct exec_node *tail = exec_list_get_tail(&block->instr_list);
+ return exec_node_data(nir_instr, tail, node);
+}
+
+#define nir_foreach_instr(block, instr) \
+ foreach_list_typed(nir_instr, instr, node, &(block)->instr_list)
+#define nir_foreach_instr_reverse(block, instr) \
+ foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list)
+#define nir_foreach_instr_safe(block, instr) \
+ foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list)
+#define nir_foreach_instr_reverse_safe(block, instr) \
+ foreach_list_typed_reverse_safe(nir_instr, instr, node, &(block)->instr_list)
+
+typedef struct nir_if {
+ nir_cf_node cf_node;
+ nir_src condition;
+
+ struct exec_list then_list; /** < list of nir_cf_node */
+ struct exec_list else_list; /** < list of nir_cf_node */
+} nir_if;
+
+static inline nir_cf_node *
+nir_if_first_then_node(nir_if *if_stmt)
+{
+ struct exec_node *head = exec_list_get_head(&if_stmt->then_list);
+ return exec_node_data(nir_cf_node, head, node);
+}
+
+static inline nir_cf_node *
+nir_if_last_then_node(nir_if *if_stmt)
+{
+ struct exec_node *tail = exec_list_get_tail(&if_stmt->then_list);
+ return exec_node_data(nir_cf_node, tail, node);
+}
+
+static inline nir_cf_node *
+nir_if_first_else_node(nir_if *if_stmt)
+{
+ struct exec_node *head = exec_list_get_head(&if_stmt->else_list);
+ return exec_node_data(nir_cf_node, head, node);
+}
+
+static inline nir_cf_node *
+nir_if_last_else_node(nir_if *if_stmt)
+{
+ struct exec_node *tail = exec_list_get_tail(&if_stmt->else_list);
+ return exec_node_data(nir_cf_node, tail, node);
+}
+
+typedef struct {
+ nir_cf_node cf_node;
+
+ struct exec_list body; /** < list of nir_cf_node */
+} nir_loop;
+
+static inline nir_cf_node *
+nir_loop_first_cf_node(nir_loop *loop)
+{
+ return exec_node_data(nir_cf_node, exec_list_get_head(&loop->body), node);
+}
+
+static inline nir_cf_node *
+nir_loop_last_cf_node(nir_loop *loop)
+{
+ return exec_node_data(nir_cf_node, exec_list_get_tail(&loop->body), node);
+}
+
+/**
+ * Various bits of metadata that can may be created or required by
+ * optimization and analysis passes
+ */
+typedef enum {
+ nir_metadata_none = 0x0,
+ nir_metadata_block_index = 0x1,
+ nir_metadata_dominance = 0x2,
+ nir_metadata_live_ssa_defs = 0x4,
+ nir_metadata_not_properly_reset = 0x8,
+} nir_metadata;
+
+typedef struct {
+ nir_cf_node cf_node;
+
+ /** pointer to the function of which this is an implementation */
+ struct nir_function *function;
+
+ struct exec_list body; /** < list of nir_cf_node */
+
+ nir_block *end_block;
+
+ /** list for all local variables in the function */
+ struct exec_list locals;
+
+ /** array of variables used as parameters */
+ unsigned num_params;
+ nir_variable **params;
+
+ /** variable used to hold the result of the function */
+ nir_variable *return_var;
+
+ /** list of local registers in the function */
+ struct exec_list registers;
+
+ /** next available local register index */
+ unsigned reg_alloc;
+
+ /** next available SSA value index */
+ unsigned ssa_alloc;
+
+ /* total number of basic blocks, only valid when block_index_dirty = false */
+ unsigned num_blocks;
+
+ nir_metadata valid_metadata;
+} nir_function_impl;
+
+static inline nir_block *
+nir_start_block(nir_function_impl *impl)
+{
+ return (nir_block *) exec_list_get_head(&impl->body);
+}
+
+static inline nir_cf_node *
+nir_cf_node_next(nir_cf_node *node)
+{
+ struct exec_node *next = exec_node_get_next(&node->node);
+ if (exec_node_is_tail_sentinel(next))
+ return NULL;
+ else
+ return exec_node_data(nir_cf_node, next, node);
+}
+
+static inline nir_cf_node *
+nir_cf_node_prev(nir_cf_node *node)
+{
+ struct exec_node *prev = exec_node_get_prev(&node->node);
+ if (exec_node_is_head_sentinel(prev))
+ return NULL;
+ else
+ return exec_node_data(nir_cf_node, prev, node);
+}
+
+static inline bool
+nir_cf_node_is_first(const nir_cf_node *node)
+{
+ return exec_node_is_head_sentinel(node->node.prev);
+}
+
+static inline bool
+nir_cf_node_is_last(const nir_cf_node *node)
+{
+ return exec_node_is_tail_sentinel(node->node.next);
+}
+
+NIR_DEFINE_CAST(nir_cf_node_as_block, nir_cf_node, nir_block, cf_node)
+NIR_DEFINE_CAST(nir_cf_node_as_if, nir_cf_node, nir_if, cf_node)
+NIR_DEFINE_CAST(nir_cf_node_as_loop, nir_cf_node, nir_loop, cf_node)
+NIR_DEFINE_CAST(nir_cf_node_as_function, nir_cf_node, nir_function_impl, cf_node)
+
+typedef enum {
+ nir_parameter_in,
+ nir_parameter_out,
+ nir_parameter_inout,
+} nir_parameter_type;
+
+typedef struct {
+ nir_parameter_type param_type;
+ const struct glsl_type *type;
+} nir_parameter;
+
+typedef struct nir_function {
+ struct exec_node node;
+
+ const char *name;
+ struct nir_shader *shader;
+
+ unsigned num_params;
+ nir_parameter *params;
+ const struct glsl_type *return_type;
+
+ /** The implementation of this function.
+ *
+ * If the function is only declared and not implemented, this is NULL.
+ */
+ nir_function_impl *impl;
+} nir_function;
+
+typedef struct nir_shader_compiler_options {
+ bool lower_fdiv;
+ bool lower_ffma;
+ bool lower_flrp;
+ bool lower_fpow;
+ bool lower_fsat;
+ bool lower_fsqrt;
+ bool lower_fmod;
+ bool lower_bitfield_extract;
+ bool lower_bitfield_insert;
+ bool lower_uadd_carry;
+ bool lower_usub_borrow;
+ /** lowers fneg and ineg to fsub and isub. */
+ bool lower_negate;
+ /** lowers fsub and isub to fadd+fneg and iadd+ineg. */
+ bool lower_sub;
+
+ /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */
+ bool lower_scmp;
+
+ /* Does the native fdot instruction replicate its result for four
+ * components? If so, then opt_algebraic_late will turn all fdotN
+ * instructions into fdot_replicatedN instructions.
+ */
+ bool fdot_replicates;
+
+ /** lowers ffract to fsub+ffloor: */
+ bool lower_ffract;
+
+ /**
+ * Does the driver support real 32-bit integers? (Otherwise, integers
+ * are simulated by floats.)
+ */
+ bool native_integers;
+} nir_shader_compiler_options;
+
+typedef struct nir_shader_info {
+ const char *name;
+
+ /* Descriptive name provided by the client; may be NULL */
+ const char *label;
+
+ /* Number of textures used by this shader */
+ unsigned num_textures;
+ /* Number of uniform buffers used by this shader */
+ unsigned num_ubos;
+ /* Number of atomic buffers used by this shader */
+ unsigned num_abos;
+ /* Number of shader storage buffers used by this shader */
+ unsigned num_ssbos;
+ /* Number of images used by this shader */
+ unsigned num_images;
+
+ /* Which inputs are actually read */
+ uint64_t inputs_read;
+ /* Which outputs are actually written */
+ uint64_t outputs_written;
+ /* Which system values are actually read */
+ uint64_t system_values_read;
+
+ /* Which patch inputs are actually read */
+ uint32_t patch_inputs_read;
+ /* Which patch outputs are actually written */
+ uint32_t patch_outputs_written;
+
+ /* Whether or not this shader ever uses textureGather() */
+ bool uses_texture_gather;
+
+ /* Whether or not this shader uses the gl_ClipDistance output */
+ bool uses_clip_distance_out;
+
+ /* Whether or not separate shader objects were used */
+ bool separate_shader;
+
+ /** Was this shader linked with any transform feedback varyings? */
+ bool has_transform_feedback_varyings;
+
+ union {
+ struct {
+ /** The number of vertices recieves per input primitive */
+ unsigned vertices_in;
+
+ /** The output primitive type (GL enum value) */
+ unsigned output_primitive;
+
+ /** The maximum number of vertices the geometry shader might write. */
+ unsigned vertices_out;
+
+ /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
+ unsigned invocations;
+
+ /** Whether or not this shader uses EndPrimitive */
+ bool uses_end_primitive;
+
+ /** Whether or not this shader uses non-zero streams */
+ bool uses_streams;
+ } gs;
+
+ struct {
+ bool uses_discard;
+
+ /**
+ * Whether early fragment tests are enabled as defined by
+ * ARB_shader_image_load_store.
+ */
+ bool early_fragment_tests;
+
+ /** gl_FragDepth layout for ARB_conservative_depth. */
+ enum gl_frag_depth_layout depth_layout;
+ } fs;
+
+ struct {
+ unsigned local_size[3];
+ } cs;
+
+ struct {
+ /** The number of vertices in the TCS output patch. */
+ unsigned vertices_out;
+ } tcs;
+ };
+} nir_shader_info;
+
+typedef struct nir_shader {
+ /** list of uniforms (nir_variable) */
+ struct exec_list uniforms;
+
+ /** list of inputs (nir_variable) */
+ struct exec_list inputs;
+
+ /** list of outputs (nir_variable) */
+ struct exec_list outputs;
+
+ /** Set of driver-specific options for the shader.
+ *
+ * The memory for the options is expected to be kept in a single static
+ * copy by the driver.
+ */
+ const struct nir_shader_compiler_options *options;
+
+ /** Various bits of compile-time information about a given shader */
+ struct nir_shader_info info;
+
+ /** list of global variables in the shader (nir_variable) */
+ struct exec_list globals;
+
+ /** list of system value variables in the shader (nir_variable) */
+ struct exec_list system_values;
+
+ struct exec_list functions; /** < list of nir_function */
+
+ /** list of global register in the shader */
+ struct exec_list registers;
+
+ /** next available global register index */
+ unsigned reg_alloc;
+
+ /**
+ * the highest index a load_input_*, load_uniform_*, etc. intrinsic can
+ * access plus one
+ */
+ unsigned num_inputs, num_uniforms, num_outputs;
+
+ /** The shader stage, such as MESA_SHADER_VERTEX. */
+ gl_shader_stage stage;
+} nir_shader;
+
+#define nir_foreach_function(shader, func) \
+ foreach_list_typed(nir_function, func, node, &(shader)->functions)
+
+nir_shader *nir_shader_create(void *mem_ctx,
+ gl_shader_stage stage,
+ const nir_shader_compiler_options *options);
+
+/** creates a register, including assigning it an index and adding it to the list */
+nir_register *nir_global_reg_create(nir_shader *shader);
+
+nir_register *nir_local_reg_create(nir_function_impl *impl);
+
+void nir_reg_remove(nir_register *reg);
+
+/** Adds a variable to the appropreate list in nir_shader */
+void nir_shader_add_variable(nir_shader *shader, nir_variable *var);
+
+static inline void
+nir_function_impl_add_variable(nir_function_impl *impl, nir_variable *var)
+{
+ assert(var->data.mode == nir_var_local);
+ exec_list_push_tail(&impl->locals, &var->node);
+}
+
+/** creates a variable, sets a few defaults, and adds it to the list */
+nir_variable *nir_variable_create(nir_shader *shader,
+ nir_variable_mode mode,
+ const struct glsl_type *type,
+ const char *name);
+/** creates a local variable and adds it to the list */
+nir_variable *nir_local_variable_create(nir_function_impl *impl,
+ const struct glsl_type *type,
+ const char *name);
+
+/** creates a function and adds it to the shader's list of functions */
+nir_function *nir_function_create(nir_shader *shader, const char *name);
+
+nir_function_impl *nir_function_impl_create(nir_function *func);
+
+nir_block *nir_block_create(nir_shader *shader);
+nir_if *nir_if_create(nir_shader *shader);
+nir_loop *nir_loop_create(nir_shader *shader);
+
+nir_function_impl *nir_cf_node_get_function(nir_cf_node *node);
+
+/** requests that the given pieces of metadata be generated */
+void nir_metadata_require(nir_function_impl *impl, nir_metadata required);
+/** dirties all but the preserved metadata */
+void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved);
+
+/** creates an instruction with default swizzle/writemask/etc. with NULL registers */
+nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op);
+
+nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type);
+
+nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader,
+ unsigned num_components);
+
+nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader,
+ nir_intrinsic_op op);
+
+nir_call_instr *nir_call_instr_create(nir_shader *shader,
+ nir_function *callee);
+
+nir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs);
+
+nir_phi_instr *nir_phi_instr_create(nir_shader *shader);
+
+nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader);
+
+nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader,
+ unsigned num_components);
+
+nir_deref_var *nir_deref_var_create(void *mem_ctx, nir_variable *var);
+nir_deref_array *nir_deref_array_create(void *mem_ctx);
+nir_deref_struct *nir_deref_struct_create(void *mem_ctx, unsigned field_index);
+
+nir_deref *nir_copy_deref(void *mem_ctx, nir_deref *deref);
+
+nir_load_const_instr *
+nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref);
+
+/**
+ * NIR Cursors and Instruction Insertion API
+ * @{
+ *
+ * A tiny struct representing a point to insert/extract instructions or
+ * control flow nodes. Helps reduce the combinatorial explosion of possible
+ * points to insert/extract.
+ *
+ * \sa nir_control_flow.h
+ */
+typedef enum {
+ nir_cursor_before_block,
+ nir_cursor_after_block,
+ nir_cursor_before_instr,
+ nir_cursor_after_instr,
+} nir_cursor_option;
+
+typedef struct {
+ nir_cursor_option option;
+ union {
+ nir_block *block;
+ nir_instr *instr;
+ };
+} nir_cursor;
+
+static inline nir_cursor
+nir_before_block(nir_block *block)
+{
+ nir_cursor cursor;
+ cursor.option = nir_cursor_before_block;
+ cursor.block = block;
+ return cursor;
+}
+
+static inline nir_cursor
+nir_after_block(nir_block *block)
+{
+ nir_cursor cursor;
+ cursor.option = nir_cursor_after_block;
+ cursor.block = block;
+ return cursor;
+}
+
+static inline nir_cursor
+nir_before_instr(nir_instr *instr)
+{
+ nir_cursor cursor;
+ cursor.option = nir_cursor_before_instr;
+ cursor.instr = instr;
+ return cursor;
+}
+
+static inline nir_cursor
+nir_after_instr(nir_instr *instr)
+{
+ nir_cursor cursor;
+ cursor.option = nir_cursor_after_instr;
+ cursor.instr = instr;
+ return cursor;
+}
+
+static inline nir_cursor
+nir_after_block_before_jump(nir_block *block)
+{
+ nir_instr *last_instr = nir_block_last_instr(block);
+ if (last_instr && last_instr->type == nir_instr_type_jump) {
+ return nir_before_instr(last_instr);
+ } else {
+ return nir_after_block(block);
+ }
+}
+
+static inline nir_cursor
+nir_before_cf_node(nir_cf_node *node)
+{
+ if (node->type == nir_cf_node_block)
+ return nir_before_block(nir_cf_node_as_block(node));
+
+ return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node)));
+}
+
+static inline nir_cursor
+nir_after_cf_node(nir_cf_node *node)
+{
+ if (node->type == nir_cf_node_block)
+ return nir_after_block(nir_cf_node_as_block(node));
+
+ return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node)));
+}
+
+static inline nir_cursor
+nir_before_cf_list(struct exec_list *cf_list)
+{
+ nir_cf_node *first_node = exec_node_data(nir_cf_node,
+ exec_list_get_head(cf_list), node);
+ return nir_before_cf_node(first_node);
+}
+
+static inline nir_cursor
+nir_after_cf_list(struct exec_list *cf_list)
+{
+ nir_cf_node *last_node = exec_node_data(nir_cf_node,
+ exec_list_get_tail(cf_list), node);
+ return nir_after_cf_node(last_node);
+}
+
+/**
+ * Insert a NIR instruction at the given cursor.
+ *
+ * Note: This does not update the cursor.
+ */
+void nir_instr_insert(nir_cursor cursor, nir_instr *instr);
+
+static inline void
+nir_instr_insert_before(nir_instr *instr, nir_instr *before)
+{
+ nir_instr_insert(nir_before_instr(instr), before);
+}
+
+static inline void
+nir_instr_insert_after(nir_instr *instr, nir_instr *after)
+{
+ nir_instr_insert(nir_after_instr(instr), after);
+}
+
+static inline void
+nir_instr_insert_before_block(nir_block *block, nir_instr *before)
+{
+ nir_instr_insert(nir_before_block(block), before);
+}
+
+static inline void
+nir_instr_insert_after_block(nir_block *block, nir_instr *after)
+{
+ nir_instr_insert(nir_after_block(block), after);
+}
+
+static inline void
+nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before)
+{
+ nir_instr_insert(nir_before_cf_node(node), before);
+}
+
+static inline void
+nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after)
+{
+ nir_instr_insert(nir_after_cf_node(node), after);
+}
+
+static inline void
+nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before)
+{
+ nir_instr_insert(nir_before_cf_list(list), before);
+}
+
+static inline void
+nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after)
+{
+ nir_instr_insert(nir_after_cf_list(list), after);
+}
+
+void nir_instr_remove(nir_instr *instr);
+
+/** @} */
+
+typedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state);
+typedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state);
+typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state);
+bool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb,
+ void *state);
+bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state);
+bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state);
+
+nir_const_value *nir_src_as_const_value(nir_src src);
+bool nir_src_is_dynamically_uniform(nir_src src);
+bool nir_srcs_equal(nir_src src1, nir_src src2);
+void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src);
+void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src);
+void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src);
+void nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest,
+ nir_dest new_dest);
+
+void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
+ unsigned num_components, const char *name);
+void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
+ unsigned num_components, const char *name);
+void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src);
+void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
+ nir_instr *after_me);
+
+/* visits basic blocks in source-code order */
+typedef bool (*nir_foreach_block_cb)(nir_block *block, void *state);
+bool nir_foreach_block(nir_function_impl *impl, nir_foreach_block_cb cb,
+ void *state);
+bool nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb,
+ void *state);
+bool nir_foreach_block_in_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
+ void *state);
+
+/* If the following CF node is an if, this function returns that if.
+ * Otherwise, it returns NULL.
+ */
+nir_if *nir_block_get_following_if(nir_block *block);
+
+nir_loop *nir_block_get_following_loop(nir_block *block);
+
+void nir_index_local_regs(nir_function_impl *impl);
+void nir_index_global_regs(nir_shader *shader);
+void nir_index_ssa_defs(nir_function_impl *impl);
+unsigned nir_index_instrs(nir_function_impl *impl);
+
+void nir_index_blocks(nir_function_impl *impl);
+
+void nir_print_shader(nir_shader *shader, FILE *fp);
+void nir_print_instr(const nir_instr *instr, FILE *fp);
+
+nir_shader * nir_shader_clone(void *mem_ctx, const nir_shader *s);
+
+#ifdef DEBUG
+void nir_validate_shader(nir_shader *shader);
+void nir_metadata_set_validation_flag(nir_shader *shader);
+void nir_metadata_check_validation_flag(nir_shader *shader);
+
+#include "util/debug.h"
+static inline bool
+should_clone_nir(void)
+{
+ static int should_clone = -1;
+ if (should_clone < 0)
+ should_clone = env_var_as_boolean("NIR_TEST_CLONE", false);
+
+ return should_clone;
+}
+#else
+static inline void nir_validate_shader(nir_shader *shader) { (void) shader; }
+static inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void) shader; }
+static inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; }
+static inline bool should_clone_nir(void) { return false; }
+#endif /* DEBUG */
+
+#define _PASS(nir, do_pass) do { \
+ do_pass \
+ nir_validate_shader(nir); \
+ if (should_clone_nir()) { \
+ nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \
+ ralloc_free(nir); \
+ nir = clone; \
+ } \
+} while (0)
+
+#define NIR_PASS(progress, nir, pass, ...) _PASS(nir, \
+ nir_metadata_set_validation_flag(nir); \
+ if (pass(nir, ##__VA_ARGS__)) { \
+ progress = true; \
+ nir_metadata_check_validation_flag(nir); \
+ } \
+)
+
+#define NIR_PASS_V(nir, pass, ...) _PASS(nir, \
+ pass(nir, ##__VA_ARGS__); \
+)
+
+void nir_calc_dominance_impl(nir_function_impl *impl);
+void nir_calc_dominance(nir_shader *shader);
+
+nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2);
+bool nir_block_dominates(nir_block *parent, nir_block *child);
+
+void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp);
+void nir_dump_dom_tree(nir_shader *shader, FILE *fp);
+
+void nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp);
+void nir_dump_dom_frontier(nir_shader *shader, FILE *fp);
+
+void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp);
+void nir_dump_cfg(nir_shader *shader, FILE *fp);
+
+int nir_gs_count_vertices(const nir_shader *shader);
+
+bool nir_split_var_copies(nir_shader *shader);
+
+void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx);
+void nir_lower_var_copies(nir_shader *shader);
+
+bool nir_lower_global_vars_to_local(nir_shader *shader);
+
+bool nir_lower_locals_to_regs(nir_shader *shader);
+
+void nir_lower_outputs_to_temporaries(nir_shader *shader);
+
+void nir_assign_var_locations(struct exec_list *var_list,
+ unsigned *size,
+ int (*type_size)(const struct glsl_type *));
+
+void nir_lower_io(nir_shader *shader,
+ nir_variable_mode mode,
+ int (*type_size)(const struct glsl_type *));
+nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr);
+nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr);
+
+void nir_lower_vars_to_ssa(nir_shader *shader);
+
+bool nir_remove_dead_variables(nir_shader *shader);
+
+void nir_move_vec_src_uses_to_dest(nir_shader *shader);
+bool nir_lower_vec_to_movs(nir_shader *shader);
+void nir_lower_alu_to_scalar(nir_shader *shader);
+void nir_lower_load_const_to_scalar(nir_shader *shader);
+
+void nir_lower_phis_to_scalar(nir_shader *shader);
+
+void nir_lower_samplers(nir_shader *shader,
+ const struct gl_shader_program *shader_program);
+
+bool nir_lower_system_values(nir_shader *shader);
+
+typedef struct nir_lower_tex_options {
+ /**
+ * bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which
+ * sampler types a texture projector is lowered.
+ */
+ unsigned lower_txp;
+
+ /**
+ * If true, lower rect textures to 2D, using txs to fetch the
+ * texture dimensions and dividing the texture coords by the
+ * texture dims to normalize.
+ */
+ bool lower_rect;
+
+ /**
+ * To emulate certain texture wrap modes, this can be used
+ * to saturate the specified tex coord to [0.0, 1.0]. The
+ * bits are according to sampler #, ie. if, for example:
+ *
+ * (conf->saturate_s & (1 << n))
+ *
+ * is true, then the s coord for sampler n is saturated.
+ *
+ * Note that clamping must happen *after* projector lowering
+ * so any projected texture sample instruction with a clamped
+ * coordinate gets automatically lowered, regardless of the
+ * 'lower_txp' setting.
+ */
+ unsigned saturate_s;
+ unsigned saturate_t;
+ unsigned saturate_r;
+
+ /* Bitmask of samplers that need swizzling.
+ *
+ * If (swizzle_result & (1 << sampler_index)), then the swizzle in
+ * swizzles[sampler_index] is applied to the result of the texturing
+ * operation.
+ */
+ unsigned swizzle_result;
+
+ /* A swizzle for each sampler. Values 0-3 represent x, y, z, or w swizzles
+ * while 4 and 5 represent 0 and 1 respectively.
+ */
+ uint8_t swizzles[32][4];
+} nir_lower_tex_options;
+
+bool nir_lower_tex(nir_shader *shader,
+ const nir_lower_tex_options *options);
+
+void nir_lower_idiv(nir_shader *shader);
+
+void nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables);
+void nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables);
+
+void nir_lower_two_sided_color(nir_shader *shader);
+
+void nir_lower_atomics(nir_shader *shader,
+ const struct gl_shader_program *shader_program);
+void nir_lower_to_source_mods(nir_shader *shader);
+
+bool nir_lower_gs_intrinsics(nir_shader *shader);
+
+bool nir_normalize_cubemap_coords(nir_shader *shader);
+
+void nir_live_ssa_defs_impl(nir_function_impl *impl);
+bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b);
+
+void nir_convert_to_ssa_impl(nir_function_impl *impl);
+void nir_convert_to_ssa(nir_shader *shader);
+
+/* If phi_webs_only is true, only convert SSA values involved in phi nodes to
+ * registers. If false, convert all values (even those not involved in a phi
+ * node) to registers.
+ */
+void nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only);
+
+bool nir_opt_algebraic(nir_shader *shader);
+bool nir_opt_algebraic_late(nir_shader *shader);
+bool nir_opt_constant_folding(nir_shader *shader);
+
+bool nir_opt_global_to_local(nir_shader *shader);
+
+bool nir_copy_prop(nir_shader *shader);
+
+bool nir_opt_cse(nir_shader *shader);
+
+bool nir_opt_dce(nir_shader *shader);
+
+bool nir_opt_dead_cf(nir_shader *shader);
+
+void nir_opt_gcm(nir_shader *shader);
+
+bool nir_opt_peephole_select(nir_shader *shader);
+
+bool nir_opt_remove_phis(nir_shader *shader);
+
+bool nir_opt_undef(nir_shader *shader);
+
+void nir_sweep(nir_shader *shader);
+
+nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val);
+gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
diff --git a/src/compiler/nir/nir_algebraic.py b/src/compiler/nir/nir_algebraic.py
new file mode 100644
index 00000000000..a30652f2afd
--- /dev/null
+++ b/src/compiler/nir/nir_algebraic.py
@@ -0,0 +1,305 @@
+#! /usr/bin/env python
+#
+# Copyright (C) 2014 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#
+# Authors:
+# Jason Ekstrand ([email protected])
+
+import itertools
+import struct
+import sys
+import mako.template
+import re
+
+# Represents a set of variables, each with a unique id
+class VarSet(object):
+ def __init__(self):
+ self.names = {}
+ self.ids = itertools.count()
+ self.immutable = False;
+
+ def __getitem__(self, name):
+ if name not in self.names:
+ assert not self.immutable, "Unknown replacement variable: " + name
+ self.names[name] = self.ids.next()
+
+ return self.names[name]
+
+ def lock(self):
+ self.immutable = True
+
+class Value(object):
+ @staticmethod
+ def create(val, name_base, varset):
+ if isinstance(val, tuple):
+ return Expression(val, name_base, varset)
+ elif isinstance(val, Expression):
+ return val
+ elif isinstance(val, (str, unicode)):
+ return Variable(val, name_base, varset)
+ elif isinstance(val, (bool, int, long, float)):
+ return Constant(val, name_base)
+
+ __template = mako.template.Template("""
+static const ${val.c_type} ${val.name} = {
+ { ${val.type_enum} },
+% if isinstance(val, Constant):
+ { ${hex(val)} /* ${val.value} */ },
+% elif isinstance(val, Variable):
+ ${val.index}, /* ${val.var_name} */
+ ${'true' if val.is_constant else 'false'},
+ nir_type_${ val.required_type or 'invalid' },
+% elif isinstance(val, Expression):
+ nir_op_${val.opcode},
+ { ${', '.join(src.c_ptr for src in val.sources)} },
+% endif
+};""")
+
+ def __init__(self, name, type_str):
+ self.name = name
+ self.type_str = type_str
+
+ @property
+ def type_enum(self):
+ return "nir_search_value_" + self.type_str
+
+ @property
+ def c_type(self):
+ return "nir_search_" + self.type_str
+
+ @property
+ def c_ptr(self):
+ return "&{0}.value".format(self.name)
+
+ def render(self):
+ return self.__template.render(val=self,
+ Constant=Constant,
+ Variable=Variable,
+ Expression=Expression)
+
+class Constant(Value):
+ def __init__(self, val, name):
+ Value.__init__(self, name, "constant")
+ self.value = val
+
+ def __hex__(self):
+ # Even if it's an integer, we still need to unpack as an unsigned
+ # int. This is because, without C99, we can only assign to the first
+ # element of a union in an initializer.
+ if isinstance(self.value, (bool)):
+ return 'NIR_TRUE' if self.value else 'NIR_FALSE'
+ if isinstance(self.value, (int, long)):
+ return hex(struct.unpack('I', struct.pack('i', self.value))[0])
+ elif isinstance(self.value, float):
+ return hex(struct.unpack('I', struct.pack('f', self.value))[0])
+ else:
+ assert False
+
+_var_name_re = re.compile(r"(?P<const>#)?(?P<name>\w+)(?:@(?P<type>\w+))?")
+
+class Variable(Value):
+ def __init__(self, val, name, varset):
+ Value.__init__(self, name, "variable")
+
+ m = _var_name_re.match(val)
+ assert m and m.group('name') is not None
+
+ self.var_name = m.group('name')
+ self.is_constant = m.group('const') is not None
+ self.required_type = m.group('type')
+
+ if self.required_type is not None:
+ assert self.required_type in ('float', 'bool', 'int', 'unsigned')
+
+ self.index = varset[self.var_name]
+
+class Expression(Value):
+ def __init__(self, expr, name_base, varset):
+ Value.__init__(self, name_base, "expression")
+ assert isinstance(expr, tuple)
+
+ self.opcode = expr[0]
+ self.sources = [ Value.create(src, "{0}_{1}".format(name_base, i), varset)
+ for (i, src) in enumerate(expr[1:]) ]
+
+ def render(self):
+ srcs = "\n".join(src.render() for src in self.sources)
+ return srcs + super(Expression, self).render()
+
+_optimization_ids = itertools.count()
+
+condition_list = ['true']
+
+class SearchAndReplace(object):
+ def __init__(self, transform):
+ self.id = _optimization_ids.next()
+
+ search = transform[0]
+ replace = transform[1]
+ if len(transform) > 2:
+ self.condition = transform[2]
+ else:
+ self.condition = 'true'
+
+ if self.condition not in condition_list:
+ condition_list.append(self.condition)
+ self.condition_index = condition_list.index(self.condition)
+
+ varset = VarSet()
+ if isinstance(search, Expression):
+ self.search = search
+ else:
+ self.search = Expression(search, "search{0}".format(self.id), varset)
+
+ varset.lock()
+
+ if isinstance(replace, Value):
+ self.replace = replace
+ else:
+ self.replace = Value.create(replace, "replace{0}".format(self.id), varset)
+
+_algebraic_pass_template = mako.template.Template("""
+#include "nir.h"
+#include "nir_search.h"
+
+#ifndef NIR_OPT_ALGEBRAIC_STRUCT_DEFS
+#define NIR_OPT_ALGEBRAIC_STRUCT_DEFS
+
+struct transform {
+ const nir_search_expression *search;
+ const nir_search_value *replace;
+ unsigned condition_offset;
+};
+
+struct opt_state {
+ void *mem_ctx;
+ bool progress;
+ const bool *condition_flags;
+};
+
+#endif
+
+% for (opcode, xform_list) in xform_dict.iteritems():
+% for xform in xform_list:
+ ${xform.search.render()}
+ ${xform.replace.render()}
+% endfor
+
+static const struct transform ${pass_name}_${opcode}_xforms[] = {
+% for xform in xform_list:
+ { &${xform.search.name}, ${xform.replace.c_ptr}, ${xform.condition_index} },
+% endfor
+};
+% endfor
+
+static bool
+${pass_name}_block(nir_block *block, void *void_state)
+{
+ struct opt_state *state = void_state;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_alu)
+ continue;
+
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+ if (!alu->dest.dest.is_ssa)
+ continue;
+
+ switch (alu->op) {
+ % for opcode in xform_dict.keys():
+ case nir_op_${opcode}:
+ for (unsigned i = 0; i < ARRAY_SIZE(${pass_name}_${opcode}_xforms); i++) {
+ const struct transform *xform = &${pass_name}_${opcode}_xforms[i];
+ if (state->condition_flags[xform->condition_offset] &&
+ nir_replace_instr(alu, xform->search, xform->replace,
+ state->mem_ctx)) {
+ state->progress = true;
+ break;
+ }
+ }
+ break;
+ % endfor
+ default:
+ break;
+ }
+ }
+
+ return true;
+}
+
+static bool
+${pass_name}_impl(nir_function_impl *impl, const bool *condition_flags)
+{
+ struct opt_state state;
+
+ state.mem_ctx = ralloc_parent(impl);
+ state.progress = false;
+ state.condition_flags = condition_flags;
+
+ nir_foreach_block(impl, ${pass_name}_block, &state);
+
+ if (state.progress)
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ return state.progress;
+}
+
+
+bool
+${pass_name}(nir_shader *shader)
+{
+ bool progress = false;
+ bool condition_flags[${len(condition_list)}];
+ const nir_shader_compiler_options *options = shader->options;
+
+ % for index, condition in enumerate(condition_list):
+ condition_flags[${index}] = ${condition};
+ % endfor
+
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ progress |= ${pass_name}_impl(function->impl, condition_flags);
+ }
+
+ return progress;
+}
+""")
+
+class AlgebraicPass(object):
+ def __init__(self, pass_name, transforms):
+ self.xform_dict = {}
+ self.pass_name = pass_name
+
+ for xform in transforms:
+ if not isinstance(xform, SearchAndReplace):
+ xform = SearchAndReplace(xform)
+
+ if xform.search.opcode not in self.xform_dict:
+ self.xform_dict[xform.search.opcode] = []
+
+ self.xform_dict[xform.search.opcode].append(xform)
+
+ def render(self):
+ return _algebraic_pass_template.render(pass_name=self.pass_name,
+ xform_dict=self.xform_dict,
+ condition_list=condition_list)
diff --git a/src/compiler/nir/nir_array.h b/src/compiler/nir/nir_array.h
new file mode 100644
index 00000000000..1db4e8cea36
--- /dev/null
+++ b/src/compiler/nir/nir_array.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand ([email protected])
+ *
+ */
+
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+ void *mem_ctx;
+ size_t size;
+ size_t alloc;
+ void *data;
+} nir_array;
+
+static inline void
+nir_array_init(nir_array *arr, void *mem_ctx)
+{
+ arr->mem_ctx = mem_ctx;
+ arr->size = 0;
+ arr->alloc = 0;
+ arr->data = NULL;
+}
+
+static inline void
+nir_array_fini(nir_array *arr)
+{
+ if (arr->mem_ctx)
+ ralloc_free(arr->data);
+ else
+ free(arr->data);
+}
+
+#define NIR_ARRAY_INITIAL_SIZE 64
+
+/* Increments the size of the array by the given ammount and returns a
+ * pointer to the beginning of the newly added space.
+ */
+static inline void *
+nir_array_grow(nir_array *arr, size_t additional)
+{
+ size_t new_size = arr->size + additional;
+ if (new_size > arr->alloc) {
+ if (arr->alloc == 0)
+ arr->alloc = NIR_ARRAY_INITIAL_SIZE;
+
+ while (new_size > arr->alloc)
+ arr->alloc *= 2;
+
+ if (arr->mem_ctx)
+ arr->data = reralloc_size(arr->mem_ctx, arr->data, arr->alloc);
+ else
+ arr->data = realloc(arr->data, arr->alloc);
+ }
+
+ void *ptr = (void *)((char *)arr->data + arr->size);
+ arr->size = new_size;
+
+ return ptr;
+}
+
+#define nir_array_add(arr, type, elem) \
+ *(type *)nir_array_grow(arr, sizeof(type)) = (elem)
+
+#define nir_array_foreach(arr, type, elem) \
+ for (type *elem = (type *)(arr)->data; \
+ elem < (type *)((char *)(arr)->data + (arr)->size); elem++)
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
new file mode 100644
index 00000000000..88ba3a1c269
--- /dev/null
+++ b/src/compiler/nir/nir_builder.h
@@ -0,0 +1,364 @@
+/*
+ * Copyright © 2014-2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef NIR_BUILDER_H
+#define NIR_BUILDER_H
+
+#include "nir_control_flow.h"
+
+struct exec_list;
+
+typedef struct nir_builder {
+ nir_cursor cursor;
+
+ nir_shader *shader;
+ nir_function_impl *impl;
+} nir_builder;
+
+static inline void
+nir_builder_init(nir_builder *build, nir_function_impl *impl)
+{
+ memset(build, 0, sizeof(*build));
+ build->impl = impl;
+ build->shader = impl->function->shader;
+}
+
+static inline void
+nir_builder_init_simple_shader(nir_builder *build, void *mem_ctx,
+ gl_shader_stage stage,
+ const nir_shader_compiler_options *options)
+{
+ build->shader = nir_shader_create(mem_ctx, stage, options);
+ nir_function *func = nir_function_create(build->shader, "main");
+ build->impl = nir_function_impl_create(func);
+ build->cursor = nir_after_cf_list(&build->impl->body);
+}
+
+static inline void
+nir_builder_instr_insert(nir_builder *build, nir_instr *instr)
+{
+ nir_instr_insert(build->cursor, instr);
+
+ /* Move the cursor forward. */
+ build->cursor = nir_after_instr(instr);
+}
+
+static inline void
+nir_builder_cf_insert(nir_builder *build, nir_cf_node *cf)
+{
+ nir_cf_node_insert(build->cursor, cf);
+}
+
+static inline nir_ssa_def *
+nir_build_imm(nir_builder *build, unsigned num_components, nir_const_value value)
+{
+ nir_load_const_instr *load_const =
+ nir_load_const_instr_create(build->shader, num_components);
+ if (!load_const)
+ return NULL;
+
+ load_const->value = value;
+
+ nir_builder_instr_insert(build, &load_const->instr);
+
+ return &load_const->def;
+}
+
+static inline nir_ssa_def *
+nir_imm_float(nir_builder *build, float x)
+{
+ nir_const_value v;
+
+ memset(&v, 0, sizeof(v));
+ v.f[0] = x;
+
+ return nir_build_imm(build, 1, v);
+}
+
+static inline nir_ssa_def *
+nir_imm_vec4(nir_builder *build, float x, float y, float z, float w)
+{
+ nir_const_value v;
+
+ memset(&v, 0, sizeof(v));
+ v.f[0] = x;
+ v.f[1] = y;
+ v.f[2] = z;
+ v.f[3] = w;
+
+ return nir_build_imm(build, 4, v);
+}
+
+static inline nir_ssa_def *
+nir_imm_int(nir_builder *build, int x)
+{
+ nir_const_value v;
+
+ memset(&v, 0, sizeof(v));
+ v.i[0] = x;
+
+ return nir_build_imm(build, 1, v);
+}
+
+static inline nir_ssa_def *
+nir_imm_ivec4(nir_builder *build, int x, int y, int z, int w)
+{
+ nir_const_value v;
+
+ memset(&v, 0, sizeof(v));
+ v.i[0] = x;
+ v.i[1] = y;
+ v.i[2] = z;
+ v.i[3] = w;
+
+ return nir_build_imm(build, 4, v);
+}
+
+static inline nir_ssa_def *
+nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
+ nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3)
+{
+ const nir_op_info *op_info = &nir_op_infos[op];
+ nir_alu_instr *instr = nir_alu_instr_create(build->shader, op);
+ if (!instr)
+ return NULL;
+
+ instr->src[0].src = nir_src_for_ssa(src0);
+ if (src1)
+ instr->src[1].src = nir_src_for_ssa(src1);
+ if (src2)
+ instr->src[2].src = nir_src_for_ssa(src2);
+ if (src3)
+ instr->src[3].src = nir_src_for_ssa(src3);
+
+ /* Guess the number of components the destination temporary should have
+ * based on our input sizes, if it's not fixed for the op.
+ */
+ unsigned num_components = op_info->output_size;
+ if (num_components == 0) {
+ for (unsigned i = 0; i < op_info->num_inputs; i++) {
+ if (op_info->input_sizes[i] == 0)
+ num_components = MAX2(num_components,
+ instr->src[i].src.ssa->num_components);
+ }
+ }
+ assert(num_components != 0);
+
+ /* Make sure we don't swizzle from outside of our source vector (like if a
+ * scalar value was passed into a multiply with a vector).
+ */
+ for (unsigned i = 0; i < op_info->num_inputs; i++) {
+ for (unsigned j = instr->src[i].src.ssa->num_components; j < 4; j++) {
+ instr->src[i].swizzle[j] = instr->src[i].src.ssa->num_components - 1;
+ }
+ }
+
+ nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL);
+ instr->dest.write_mask = (1 << num_components) - 1;
+
+ nir_builder_instr_insert(build, &instr->instr);
+
+ return &instr->dest.dest.ssa;
+}
+
+#define ALU1(op) \
+static inline nir_ssa_def * \
+nir_##op(nir_builder *build, nir_ssa_def *src0) \
+{ \
+ return nir_build_alu(build, nir_op_##op, src0, NULL, NULL, NULL); \
+}
+
+#define ALU2(op) \
+static inline nir_ssa_def * \
+nir_##op(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1) \
+{ \
+ return nir_build_alu(build, nir_op_##op, src0, src1, NULL, NULL); \
+}
+
+#define ALU3(op) \
+static inline nir_ssa_def * \
+nir_##op(nir_builder *build, nir_ssa_def *src0, \
+ nir_ssa_def *src1, nir_ssa_def *src2) \
+{ \
+ return nir_build_alu(build, nir_op_##op, src0, src1, src2, NULL); \
+}
+
+#define ALU4(op) \
+static inline nir_ssa_def * \
+nir_##op(nir_builder *build, nir_ssa_def *src0, \
+ nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3) \
+{ \
+ return nir_build_alu(build, nir_op_##op, src0, src1, src2, src3); \
+}
+
+#include "nir_builder_opcodes.h"
+
+static inline nir_ssa_def *
+nir_vec(nir_builder *build, nir_ssa_def **comp, unsigned num_components)
+{
+ switch (num_components) {
+ case 4:
+ return nir_vec4(build, comp[0], comp[1], comp[2], comp[3]);
+ case 3:
+ return nir_vec3(build, comp[0], comp[1], comp[2]);
+ case 2:
+ return nir_vec2(build, comp[0], comp[1]);
+ case 1:
+ return comp[0];
+ default:
+ unreachable("bad component count");
+ return NULL;
+ }
+}
+
+/**
+ * Similar to nir_fmov, but takes a nir_alu_src instead of a nir_ssa_def.
+ */
+static inline nir_ssa_def *
+nir_fmov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
+{
+ nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_fmov);
+ nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL);
+ mov->dest.write_mask = (1 << num_components) - 1;
+ mov->src[0] = src;
+ nir_builder_instr_insert(build, &mov->instr);
+
+ return &mov->dest.dest.ssa;
+}
+
+static inline nir_ssa_def *
+nir_imov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
+{
+ nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_imov);
+ nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL);
+ mov->dest.write_mask = (1 << num_components) - 1;
+ mov->src[0] = src;
+ nir_builder_instr_insert(build, &mov->instr);
+
+ return &mov->dest.dest.ssa;
+}
+
+/**
+ * Construct an fmov or imov that reswizzles the source's components.
+ */
+static inline nir_ssa_def *
+nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4],
+ unsigned num_components, bool use_fmov)
+{
+ nir_alu_src alu_src = { NIR_SRC_INIT };
+ alu_src.src = nir_src_for_ssa(src);
+ for (unsigned i = 0; i < num_components; i++)
+ alu_src.swizzle[i] = swiz[i];
+
+ return use_fmov ? nir_fmov_alu(build, alu_src, num_components) :
+ nir_imov_alu(build, alu_src, num_components);
+}
+
+static inline nir_ssa_def *
+nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c)
+{
+ unsigned swizzle[4] = {c, c, c, c};
+ return nir_swizzle(b, def, swizzle, 1, false);
+}
+
+/**
+ * Turns a nir_src into a nir_ssa_def * so it can be passed to
+ * nir_build_alu()-based builder calls.
+ *
+ * See nir_ssa_for_alu_src() for alu instructions.
+ */
+static inline nir_ssa_def *
+nir_ssa_for_src(nir_builder *build, nir_src src, int num_components)
+{
+ if (src.is_ssa && src.ssa->num_components == num_components)
+ return src.ssa;
+
+ nir_alu_src alu = { NIR_SRC_INIT };
+ alu.src = src;
+ for (int j = 0; j < 4; j++)
+ alu.swizzle[j] = j;
+
+ return nir_imov_alu(build, alu, num_components);
+}
+
+/**
+ * Similar to nir_ssa_for_src(), but for alu src's, respecting the
+ * nir_alu_src's swizzle.
+ */
+static inline nir_ssa_def *
+nir_ssa_for_alu_src(nir_builder *build, nir_alu_instr *instr, unsigned srcn)
+{
+ static uint8_t trivial_swizzle[4] = { 0, 1, 2, 3 };
+ nir_alu_src *src = &instr->src[srcn];
+ unsigned num_components = nir_ssa_alu_instr_src_components(instr, srcn);
+
+ if (src->src.is_ssa && (src->src.ssa->num_components == num_components) &&
+ !src->abs && !src->negate &&
+ (memcmp(src->swizzle, trivial_swizzle, num_components) == 0))
+ return src->src.ssa;
+
+ return nir_imov_alu(build, *src, num_components);
+}
+
+static inline nir_ssa_def *
+nir_load_var(nir_builder *build, nir_variable *var)
+{
+ const unsigned num_components = glsl_get_vector_elements(var->type);
+
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_var);
+ load->num_components = num_components;
+ load->variables[0] = nir_deref_var_create(load, var);
+ nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
+ nir_builder_instr_insert(build, &load->instr);
+ return &load->dest.ssa;
+}
+
+static inline void
+nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value,
+ unsigned writemask)
+{
+ const unsigned num_components = glsl_get_vector_elements(var->type);
+
+ nir_intrinsic_instr *store =
+ nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var);
+ store->num_components = num_components;
+ store->const_index[0] = writemask;
+ store->variables[0] = nir_deref_var_create(store, var);
+ store->src[0] = nir_src_for_ssa(value);
+ nir_builder_instr_insert(build, &store->instr);
+}
+
+static inline nir_ssa_def *
+nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index)
+{
+ nir_intrinsic_instr *load = nir_intrinsic_instr_create(build->shader, op);
+ load->num_components = nir_intrinsic_infos[op].dest_components;
+ load->const_index[0] = index;
+ nir_ssa_dest_init(&load->instr, &load->dest,
+ nir_intrinsic_infos[op].dest_components, NULL);
+ nir_builder_instr_insert(build, &load->instr);
+ return &load->dest.ssa;
+}
+
+#endif /* NIR_BUILDER_H */
diff --git a/src/compiler/nir/nir_builder_opcodes_h.py b/src/compiler/nir/nir_builder_opcodes_h.py
new file mode 100644
index 00000000000..e27206ea8fc
--- /dev/null
+++ b/src/compiler/nir/nir_builder_opcodes_h.py
@@ -0,0 +1,38 @@
+#! /usr/bin/env python
+
+template = """\
+/* Copyright (C) 2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _NIR_BUILDER_OPCODES_
+#define _NIR_BUILDER_OPCODES_
+
+% for name, opcode in sorted(opcodes.iteritems()):
+ALU${opcode.num_inputs}(${name});
+% endfor
+
+#endif /* _NIR_BUILDER_OPCODES_ */"""
+
+from nir_opcodes import opcodes
+from mako.template import Template
+
+print Template(template).render(opcodes=opcodes)
diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c
new file mode 100644
index 00000000000..5eff743d835
--- /dev/null
+++ b/src/compiler/nir/nir_clone.c
@@ -0,0 +1,659 @@
+/*
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_control_flow_private.h"
+
+/* Secret Decoder Ring:
+ * clone_foo():
+ * Allocate and clone a foo.
+ * __clone_foo():
+ * Clone body of foo (ie. parent class, embedded struct, etc)
+ */
+
+typedef struct {
+ /* maps orig ptr -> cloned ptr: */
+ struct hash_table *ptr_table;
+
+ /* List of phi sources. */
+ struct list_head phi_srcs;
+
+ /* new shader object, used as memctx for just about everything else: */
+ nir_shader *ns;
+} clone_state;
+
+static void
+init_clone_state(clone_state *state)
+{
+ state->ptr_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ list_inithead(&state->phi_srcs);
+}
+
+static void
+free_clone_state(clone_state *state)
+{
+ _mesa_hash_table_destroy(state->ptr_table, NULL);
+}
+
+static void *
+lookup_ptr(clone_state *state, const void *ptr)
+{
+ struct hash_entry *entry;
+
+ if (!ptr)
+ return NULL;
+
+ entry = _mesa_hash_table_search(state->ptr_table, ptr);
+ assert(entry && "Failed to find pointer!");
+ if (!entry)
+ return NULL;
+
+ return entry->data;
+}
+
+static void
+store_ptr(clone_state *state, void *nptr, const void *ptr)
+{
+ _mesa_hash_table_insert(state->ptr_table, ptr, nptr);
+}
+
+static nir_constant *
+clone_constant(clone_state *state, const nir_constant *c, nir_variable *nvar)
+{
+ nir_constant *nc = ralloc(nvar, nir_constant);
+
+ nc->value = c->value;
+ nc->num_elements = c->num_elements;
+ nc->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
+ for (unsigned i = 0; i < c->num_elements; i++) {
+ nc->elements[i] = clone_constant(state, c->elements[i], nvar);
+ }
+
+ return nc;
+}
+
+/* NOTE: for cloning nir_variable's, bypass nir_variable_create to avoid
+ * having to deal with locals and globals separately:
+ */
+static nir_variable *
+clone_variable(clone_state *state, const nir_variable *var)
+{
+ nir_variable *nvar = rzalloc(state->ns, nir_variable);
+ store_ptr(state, nvar, var);
+
+ nvar->type = var->type;
+ nvar->name = ralloc_strdup(nvar, var->name);
+ nvar->data = var->data;
+ nvar->num_state_slots = var->num_state_slots;
+ nvar->state_slots = ralloc_array(nvar, nir_state_slot, var->num_state_slots);
+ memcpy(nvar->state_slots, var->state_slots,
+ var->num_state_slots * sizeof(nir_state_slot));
+ if (var->constant_initializer) {
+ nvar->constant_initializer =
+ clone_constant(state, var->constant_initializer, nvar);
+ }
+ nvar->interface_type = var->interface_type;
+
+ return nvar;
+}
+
+/* clone list of nir_variable: */
+static void
+clone_var_list(clone_state *state, struct exec_list *dst,
+ const struct exec_list *list)
+{
+ exec_list_make_empty(dst);
+ foreach_list_typed(nir_variable, var, node, list) {
+ nir_variable *nvar = clone_variable(state, var);
+ exec_list_push_tail(dst, &nvar->node);
+ }
+}
+
+/* NOTE: for cloning nir_register's, bypass nir_global/local_reg_create()
+ * to avoid having to deal with locals and globals separately:
+ */
+static nir_register *
+clone_register(clone_state *state, const nir_register *reg)
+{
+ nir_register *nreg = rzalloc(state->ns, nir_register);
+ store_ptr(state, nreg, reg);
+
+ nreg->num_components = reg->num_components;
+ nreg->num_array_elems = reg->num_array_elems;
+ nreg->index = reg->index;
+ nreg->name = ralloc_strdup(nreg, reg->name);
+ nreg->is_global = reg->is_global;
+ nreg->is_packed = reg->is_packed;
+
+ /* reconstructing uses/defs/if_uses handled by nir_instr_insert() */
+ list_inithead(&nreg->uses);
+ list_inithead(&nreg->defs);
+ list_inithead(&nreg->if_uses);
+
+ return nreg;
+}
+
+/* clone list of nir_register: */
+static void
+clone_reg_list(clone_state *state, struct exec_list *dst,
+ const struct exec_list *list)
+{
+ exec_list_make_empty(dst);
+ foreach_list_typed(nir_register, reg, node, list) {
+ nir_register *nreg = clone_register(state, reg);
+ exec_list_push_tail(dst, &nreg->node);
+ }
+}
+
+static void
+__clone_src(clone_state *state, void *ninstr_or_if,
+ nir_src *nsrc, const nir_src *src)
+{
+ nsrc->is_ssa = src->is_ssa;
+ if (src->is_ssa) {
+ nsrc->ssa = lookup_ptr(state, src->ssa);
+ } else {
+ nsrc->reg.reg = lookup_ptr(state, src->reg.reg);
+ if (src->reg.indirect) {
+ nsrc->reg.indirect = ralloc(ninstr_or_if, nir_src);
+ __clone_src(state, ninstr_or_if, nsrc->reg.indirect, src->reg.indirect);
+ }
+ nsrc->reg.base_offset = src->reg.base_offset;
+ }
+}
+
+static void
+__clone_dst(clone_state *state, nir_instr *ninstr,
+ nir_dest *ndst, const nir_dest *dst)
+{
+ ndst->is_ssa = dst->is_ssa;
+ if (dst->is_ssa) {
+ nir_ssa_dest_init(ninstr, ndst, dst->ssa.num_components, dst->ssa.name);
+ store_ptr(state, &ndst->ssa, &dst->ssa);
+ } else {
+ ndst->reg.reg = lookup_ptr(state, dst->reg.reg);
+ if (dst->reg.indirect) {
+ ndst->reg.indirect = ralloc(ninstr, nir_src);
+ __clone_src(state, ninstr, ndst->reg.indirect, dst->reg.indirect);
+ }
+ ndst->reg.base_offset = dst->reg.base_offset;
+ }
+}
+
+static nir_deref *clone_deref(clone_state *state, const nir_deref *deref,
+ nir_instr *ninstr, nir_deref *parent);
+
+static nir_deref_var *
+clone_deref_var(clone_state *state, const nir_deref_var *dvar,
+ nir_instr *ninstr)
+{
+ nir_variable *nvar = lookup_ptr(state, dvar->var);
+ nir_deref_var *ndvar = nir_deref_var_create(ninstr, nvar);
+
+ if (dvar->deref.child)
+ ndvar->deref.child = clone_deref(state, dvar->deref.child,
+ ninstr, &ndvar->deref);
+
+ return ndvar;
+}
+
+static nir_deref_array *
+clone_deref_array(clone_state *state, const nir_deref_array *darr,
+ nir_instr *ninstr, nir_deref *parent)
+{
+ nir_deref_array *ndarr = nir_deref_array_create(parent);
+
+ ndarr->deref.type = darr->deref.type;
+ if (darr->deref.child)
+ ndarr->deref.child = clone_deref(state, darr->deref.child,
+ ninstr, &ndarr->deref);
+
+ ndarr->deref_array_type = darr->deref_array_type;
+ ndarr->base_offset = darr->base_offset;
+ if (ndarr->deref_array_type == nir_deref_array_type_indirect)
+ __clone_src(state, ninstr, &ndarr->indirect, &darr->indirect);
+
+ return ndarr;
+}
+
+static nir_deref_struct *
+clone_deref_struct(clone_state *state, const nir_deref_struct *dstr,
+ nir_instr *ninstr, nir_deref *parent)
+{
+ nir_deref_struct *ndstr = nir_deref_struct_create(parent, dstr->index);
+
+ ndstr->deref.type = dstr->deref.type;
+ if (dstr->deref.child)
+ ndstr->deref.child = clone_deref(state, dstr->deref.child,
+ ninstr, &ndstr->deref);
+
+ return ndstr;
+}
+
+static nir_deref *
+clone_deref(clone_state *state, const nir_deref *dref,
+ nir_instr *ninstr, nir_deref *parent)
+{
+ switch (dref->deref_type) {
+ case nir_deref_type_array:
+ return &clone_deref_array(state, nir_deref_as_array(dref),
+ ninstr, parent)->deref;
+ case nir_deref_type_struct:
+ return &clone_deref_struct(state, nir_deref_as_struct(dref),
+ ninstr, parent)->deref;
+ default:
+ unreachable("bad deref type");
+ return NULL;
+ }
+}
+
+static nir_alu_instr *
+clone_alu(clone_state *state, const nir_alu_instr *alu)
+{
+ nir_alu_instr *nalu = nir_alu_instr_create(state->ns, alu->op);
+
+ __clone_dst(state, &nalu->instr, &nalu->dest.dest, &alu->dest.dest);
+ nalu->dest.saturate = alu->dest.saturate;
+ nalu->dest.write_mask = alu->dest.write_mask;
+
+ for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
+ __clone_src(state, &nalu->instr, &nalu->src[i].src, &alu->src[i].src);
+ nalu->src[i].negate = alu->src[i].negate;
+ nalu->src[i].abs = alu->src[i].abs;
+ memcpy(nalu->src[i].swizzle, alu->src[i].swizzle,
+ sizeof(nalu->src[i].swizzle));
+ }
+
+ return nalu;
+}
+
+static nir_intrinsic_instr *
+clone_intrinsic(clone_state *state, const nir_intrinsic_instr *itr)
+{
+ nir_intrinsic_instr *nitr =
+ nir_intrinsic_instr_create(state->ns, itr->intrinsic);
+
+ unsigned num_variables = nir_intrinsic_infos[itr->intrinsic].num_variables;
+ unsigned num_srcs = nir_intrinsic_infos[itr->intrinsic].num_srcs;
+
+ if (nir_intrinsic_infos[itr->intrinsic].has_dest)
+ __clone_dst(state, &nitr->instr, &nitr->dest, &itr->dest);
+
+ nitr->num_components = itr->num_components;
+ memcpy(nitr->const_index, itr->const_index, sizeof(nitr->const_index));
+
+ for (unsigned i = 0; i < num_variables; i++) {
+ nitr->variables[i] = clone_deref_var(state, itr->variables[i],
+ &nitr->instr);
+ }
+
+ for (unsigned i = 0; i < num_srcs; i++)
+ __clone_src(state, &nitr->instr, &nitr->src[i], &itr->src[i]);
+
+ return nitr;
+}
+
+static nir_load_const_instr *
+clone_load_const(clone_state *state, const nir_load_const_instr *lc)
+{
+ nir_load_const_instr *nlc =
+ nir_load_const_instr_create(state->ns, lc->def.num_components);
+
+ memcpy(&nlc->value, &lc->value, sizeof(nlc->value));
+
+ store_ptr(state, &nlc->def, &lc->def);
+
+ return nlc;
+}
+
+static nir_ssa_undef_instr *
+clone_ssa_undef(clone_state *state, const nir_ssa_undef_instr *sa)
+{
+ nir_ssa_undef_instr *nsa =
+ nir_ssa_undef_instr_create(state->ns, sa->def.num_components);
+
+ store_ptr(state, &nsa->def, &sa->def);
+
+ return nsa;
+}
+
+static nir_tex_instr *
+clone_tex(clone_state *state, const nir_tex_instr *tex)
+{
+ nir_tex_instr *ntex = nir_tex_instr_create(state->ns, tex->num_srcs);
+
+ ntex->sampler_dim = tex->sampler_dim;
+ ntex->dest_type = tex->dest_type;
+ ntex->op = tex->op;
+ __clone_dst(state, &ntex->instr, &ntex->dest, &tex->dest);
+ for (unsigned i = 0; i < ntex->num_srcs; i++) {
+ ntex->src[i].src_type = tex->src[i].src_type;
+ __clone_src(state, &ntex->instr, &ntex->src[i].src, &tex->src[i].src);
+ }
+ ntex->coord_components = tex->coord_components;
+ ntex->is_array = tex->is_array;
+ ntex->is_shadow = tex->is_shadow;
+ ntex->is_new_style_shadow = tex->is_new_style_shadow;
+ memcpy(ntex->const_offset, tex->const_offset, sizeof(ntex->const_offset));
+ ntex->component = tex->component;
+ ntex->sampler_index = tex->sampler_index;
+ ntex->sampler_array_size = tex->sampler_array_size;
+ if (tex->sampler)
+ ntex->sampler = clone_deref_var(state, tex->sampler, &ntex->instr);
+
+ return ntex;
+}
+
+static nir_phi_instr *
+clone_phi(clone_state *state, const nir_phi_instr *phi, nir_block *nblk)
+{
+ nir_phi_instr *nphi = nir_phi_instr_create(state->ns);
+
+ __clone_dst(state, &nphi->instr, &nphi->dest, &phi->dest);
+
+ /* Cloning a phi node is a bit different from other instructions. The
+ * sources of phi instructions are the only time where we can use an SSA
+ * def before it is defined. In order to handle this, we just copy over
+ * the sources from the old phi instruction directly and then fix them up
+ * in a second pass once all the instrutions in the function have been
+ * properly cloned.
+ *
+ * In order to ensure that the copied sources (which are the same as the
+ * old phi instruction's sources for now) don't get inserted into the old
+ * shader's use-def lists, we have to add the phi instruction *before* we
+ * set up its sources.
+ */
+ nir_instr_insert_after_block(nblk, &nphi->instr);
+
+ foreach_list_typed(nir_phi_src, src, node, &phi->srcs) {
+ nir_phi_src *nsrc = ralloc(nphi, nir_phi_src);
+
+ /* Just copy the old source for now. */
+ memcpy(nsrc, src, sizeof(*src));
+
+ /* Since we're not letting nir_insert_instr handle use/def stuff for us,
+ * we have to set the parent_instr manually. It doesn't really matter
+ * when we do it, so we might as well do it here.
+ */
+ nsrc->src.parent_instr = &nphi->instr;
+
+ /* Stash it in the list of phi sources. We'll walk this list and fix up
+ * sources at the very end of clone_function_impl.
+ */
+ list_add(&nsrc->src.use_link, &state->phi_srcs);
+
+ exec_list_push_tail(&nphi->srcs, &nsrc->node);
+ }
+
+ return nphi;
+}
+
+static nir_jump_instr *
+clone_jump(clone_state *state, const nir_jump_instr *jmp)
+{
+ nir_jump_instr *njmp = nir_jump_instr_create(state->ns, jmp->type);
+
+ return njmp;
+}
+
+static nir_call_instr *
+clone_call(clone_state *state, const nir_call_instr *call)
+{
+ nir_function *ncallee = lookup_ptr(state, call->callee);
+ nir_call_instr *ncall = nir_call_instr_create(state->ns, ncallee);
+
+ for (unsigned i = 0; i < ncall->num_params; i++)
+ ncall->params[i] = clone_deref_var(state, call->params[i], &ncall->instr);
+
+ ncall->return_deref = clone_deref_var(state, call->return_deref,
+ &ncall->instr);
+
+ return ncall;
+}
+
+static nir_instr *
+clone_instr(clone_state *state, const nir_instr *instr)
+{
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ return &clone_alu(state, nir_instr_as_alu(instr))->instr;
+ case nir_instr_type_intrinsic:
+ return &clone_intrinsic(state, nir_instr_as_intrinsic(instr))->instr;
+ case nir_instr_type_load_const:
+ return &clone_load_const(state, nir_instr_as_load_const(instr))->instr;
+ case nir_instr_type_ssa_undef:
+ return &clone_ssa_undef(state, nir_instr_as_ssa_undef(instr))->instr;
+ case nir_instr_type_tex:
+ return &clone_tex(state, nir_instr_as_tex(instr))->instr;
+ case nir_instr_type_phi:
+ unreachable("Cannot clone phis with clone_instr");
+ case nir_instr_type_jump:
+ return &clone_jump(state, nir_instr_as_jump(instr))->instr;
+ case nir_instr_type_call:
+ return &clone_call(state, nir_instr_as_call(instr))->instr;
+ case nir_instr_type_parallel_copy:
+ unreachable("Cannot clone parallel copies");
+ default:
+ unreachable("bad instr type");
+ return NULL;
+ }
+}
+
+static nir_block *
+clone_block(clone_state *state, struct exec_list *cf_list, const nir_block *blk)
+{
+ /* Don't actually create a new block. Just use the one from the tail of
+ * the list. NIR guarantees that the tail of the list is a block and that
+ * no two blocks are side-by-side in the IR; It should be empty.
+ */
+ nir_block *nblk =
+ exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
+ assert(nblk->cf_node.type == nir_cf_node_block);
+ assert(exec_list_is_empty(&nblk->instr_list));
+
+ /* We need this for phi sources */
+ store_ptr(state, nblk, blk);
+
+ nir_foreach_instr(blk, instr) {
+ if (instr->type == nir_instr_type_phi) {
+ /* Phi instructions are a bit of a special case when cloning because
+ * we don't want inserting the instruction to automatically handle
+ * use/defs for us. Instead, we need to wait until all the
+ * blocks/instructions are in so that we can set their sources up.
+ */
+ clone_phi(state, nir_instr_as_phi(instr), nblk);
+ } else {
+ nir_instr *ninstr = clone_instr(state, instr);
+ nir_instr_insert_after_block(nblk, ninstr);
+ }
+ }
+
+ return nblk;
+}
+
+static void
+clone_cf_list(clone_state *state, struct exec_list *dst,
+ const struct exec_list *list);
+
+static nir_if *
+clone_if(clone_state *state, struct exec_list *cf_list, const nir_if *i)
+{
+ nir_if *ni = nir_if_create(state->ns);
+
+ __clone_src(state, ni, &ni->condition, &i->condition);
+
+ nir_cf_node_insert_end(cf_list, &ni->cf_node);
+
+ clone_cf_list(state, &ni->then_list, &i->then_list);
+ clone_cf_list(state, &ni->else_list, &i->else_list);
+
+ return ni;
+}
+
+static nir_loop *
+clone_loop(clone_state *state, struct exec_list *cf_list, const nir_loop *loop)
+{
+ nir_loop *nloop = nir_loop_create(state->ns);
+
+ nir_cf_node_insert_end(cf_list, &nloop->cf_node);
+
+ clone_cf_list(state, &nloop->body, &loop->body);
+
+ return nloop;
+}
+
+/* clone list of nir_cf_node: */
+static void
+clone_cf_list(clone_state *state, struct exec_list *dst,
+ const struct exec_list *list)
+{
+ foreach_list_typed(nir_cf_node, cf, node, list) {
+ switch (cf->type) {
+ case nir_cf_node_block:
+ clone_block(state, dst, nir_cf_node_as_block(cf));
+ break;
+ case nir_cf_node_if:
+ clone_if(state, dst, nir_cf_node_as_if(cf));
+ break;
+ case nir_cf_node_loop:
+ clone_loop(state, dst, nir_cf_node_as_loop(cf));
+ break;
+ default:
+ unreachable("bad cf type");
+ }
+ }
+}
+
+static nir_function_impl *
+clone_function_impl(clone_state *state, const nir_function_impl *fi,
+ nir_function *nfxn)
+{
+ nir_function_impl *nfi = nir_function_impl_create(nfxn);
+
+ clone_var_list(state, &nfi->locals, &fi->locals);
+ clone_reg_list(state, &nfi->registers, &fi->registers);
+ nfi->reg_alloc = fi->reg_alloc;
+
+ nfi->num_params = fi->num_params;
+ nfi->params = ralloc_array(state->ns, nir_variable *, fi->num_params);
+ for (unsigned i = 0; i < fi->num_params; i++) {
+ nfi->params[i] = lookup_ptr(state, fi->params[i]);
+ }
+ nfi->return_var = lookup_ptr(state, fi->return_var);
+
+ assert(list_empty(&state->phi_srcs));
+
+ clone_cf_list(state, &nfi->body, &fi->body);
+
+ /* After we've cloned almost everything, we have to walk the list of phi
+ * sources and fix them up. Thanks to loops, the block and SSA value for a
+ * phi source may not be defined when we first encounter it. Instead, we
+ * add it to the phi_srcs list and we fix it up here.
+ */
+ list_for_each_entry_safe(nir_phi_src, src, &state->phi_srcs, src.use_link) {
+ src->pred = lookup_ptr(state, src->pred);
+ assert(src->src.is_ssa);
+ src->src.ssa = lookup_ptr(state, src->src.ssa);
+
+ /* Remove from this list and place in the uses of the SSA def */
+ list_del(&src->src.use_link);
+ list_addtail(&src->src.use_link, &src->src.ssa->uses);
+ }
+ assert(list_empty(&state->phi_srcs));
+
+ /* All metadata is invalidated in the cloning process */
+ nfi->valid_metadata = 0;
+
+ return nfi;
+}
+
+static nir_function *
+clone_function(clone_state *state, const nir_function *fxn, nir_shader *ns)
+{
+ assert(ns == state->ns);
+ nir_function *nfxn = nir_function_create(ns, fxn->name);
+
+ /* Needed for call instructions */
+ store_ptr(state, nfxn, fxn);
+
+ nfxn->num_params = fxn->num_params;
+ nfxn->params = ralloc_array(state->ns, nir_parameter, fxn->num_params);
+ memcpy(nfxn->params, fxn->params, sizeof(nir_parameter) * fxn->num_params);
+
+ nfxn->return_type = fxn->return_type;
+
+ /* At first glance, it looks like we should clone the function_impl here.
+ * However, call instructions need to be able to reference at least the
+ * function and those will get processed as we clone the function_impl's.
+ * We stop here and do function_impls as a second pass.
+ */
+
+ return nfxn;
+}
+
+nir_shader *
+nir_shader_clone(void *mem_ctx, const nir_shader *s)
+{
+ clone_state state;
+ init_clone_state(&state);
+
+ nir_shader *ns = nir_shader_create(mem_ctx, s->stage, s->options);
+ state.ns = ns;
+
+ clone_var_list(&state, &ns->uniforms, &s->uniforms);
+ clone_var_list(&state, &ns->inputs, &s->inputs);
+ clone_var_list(&state, &ns->outputs, &s->outputs);
+ clone_var_list(&state, &ns->globals, &s->globals);
+ clone_var_list(&state, &ns->system_values, &s->system_values);
+
+ /* Go through and clone functions */
+ foreach_list_typed(nir_function, fxn, node, &s->functions)
+ clone_function(&state, fxn, ns);
+
+ /* Only after all functions are cloned can we clone the actual function
+ * implementations. This is because nir_call_instr's need to reference the
+ * functions of other functions and we don't know what order the functions
+ * will have in the list.
+ */
+ nir_foreach_function(s, fxn) {
+ nir_function *nfxn = lookup_ptr(&state, fxn);
+ clone_function_impl(&state, fxn->impl, nfxn);
+ }
+
+ clone_reg_list(&state, &ns->registers, &s->registers);
+ ns->reg_alloc = s->reg_alloc;
+
+ ns->info = s->info;
+ ns->info.name = ralloc_strdup(ns, ns->info.name);
+ if (ns->info.label)
+ ns->info.label = ralloc_strdup(ns, ns->info.label);
+
+ ns->num_inputs = s->num_inputs;
+ ns->num_uniforms = s->num_uniforms;
+ ns->num_outputs = s->num_outputs;
+
+ free_clone_state(&state);
+
+ return ns;
+}
diff --git a/src/compiler/nir/nir_constant_expressions.h b/src/compiler/nir/nir_constant_expressions.h
new file mode 100644
index 00000000000..97997f2e514
--- /dev/null
+++ b/src/compiler/nir/nir_constant_expressions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright © 2014 Connor Abbott
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott ([email protected])
+ *
+ */
+
+#include "nir.h"
+
+nir_const_value nir_eval_const_opcode(nir_op op, unsigned num_components,
+ nir_const_value *src);
diff --git a/src/compiler/nir/nir_constant_expressions.py b/src/compiler/nir/nir_constant_expressions.py
new file mode 100644
index 00000000000..32784f6398d
--- /dev/null
+++ b/src/compiler/nir/nir_constant_expressions.py
@@ -0,0 +1,336 @@
+#! /usr/bin/python2
+template = """\
+/*
+ * Copyright (C) 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand ([email protected])
+ */
+
+#include <math.h>
+#include "main/core.h"
+#include "util/rounding.h" /* for _mesa_roundeven */
+#include "util/half_float.h"
+#include "nir_constant_expressions.h"
+
+/**
+ * Evaluate one component of packSnorm4x8.
+ */
+static uint8_t
+pack_snorm_1x8(float x)
+{
+ /* From section 8.4 of the GLSL 4.30 spec:
+ *
+ * packSnorm4x8
+ * ------------
+ * The conversion for component c of v to fixed point is done as
+ * follows:
+ *
+ * packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
+ *
+ * We must first cast the float to an int, because casting a negative
+ * float to a uint is undefined.
+ */
+ return (uint8_t) (int)
+ _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f);
+}
+
+/**
+ * Evaluate one component of packSnorm2x16.
+ */
+static uint16_t
+pack_snorm_1x16(float x)
+{
+ /* From section 8.4 of the GLSL ES 3.00 spec:
+ *
+ * packSnorm2x16
+ * -------------
+ * The conversion for component c of v to fixed point is done as
+ * follows:
+ *
+ * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
+ *
+ * We must first cast the float to an int, because casting a negative
+ * float to a uint is undefined.
+ */
+ return (uint16_t) (int)
+ _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
+}
+
+/**
+ * Evaluate one component of unpackSnorm4x8.
+ */
+static float
+unpack_snorm_1x8(uint8_t u)
+{
+ /* From section 8.4 of the GLSL 4.30 spec:
+ *
+ * unpackSnorm4x8
+ * --------------
+ * The conversion for unpacked fixed-point value f to floating point is
+ * done as follows:
+ *
+ * unpackSnorm4x8: clamp(f / 127.0, -1, +1)
+ */
+ return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f);
+}
+
+/**
+ * Evaluate one component of unpackSnorm2x16.
+ */
+static float
+unpack_snorm_1x16(uint16_t u)
+{
+ /* From section 8.4 of the GLSL ES 3.00 spec:
+ *
+ * unpackSnorm2x16
+ * ---------------
+ * The conversion for unpacked fixed-point value f to floating point is
+ * done as follows:
+ *
+ * unpackSnorm2x16: clamp(f / 32767.0, -1, +1)
+ */
+ return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f);
+}
+
+/**
+ * Evaluate one component packUnorm4x8.
+ */
+static uint8_t
+pack_unorm_1x8(float x)
+{
+ /* From section 8.4 of the GLSL 4.30 spec:
+ *
+ * packUnorm4x8
+ * ------------
+ * The conversion for component c of v to fixed point is done as
+ * follows:
+ *
+ * packUnorm4x8: round(clamp(c, 0, +1) * 255.0)
+ */
+ return (uint8_t) (int)
+ _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f);
+}
+
+/**
+ * Evaluate one component packUnorm2x16.
+ */
+static uint16_t
+pack_unorm_1x16(float x)
+{
+ /* From section 8.4 of the GLSL ES 3.00 spec:
+ *
+ * packUnorm2x16
+ * -------------
+ * The conversion for component c of v to fixed point is done as
+ * follows:
+ *
+ * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
+ */
+ return (uint16_t) (int)
+ _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
+}
+
+/**
+ * Evaluate one component of unpackUnorm4x8.
+ */
+static float
+unpack_unorm_1x8(uint8_t u)
+{
+ /* From section 8.4 of the GLSL 4.30 spec:
+ *
+ * unpackUnorm4x8
+ * --------------
+ * The conversion for unpacked fixed-point value f to floating point is
+ * done as follows:
+ *
+ * unpackUnorm4x8: f / 255.0
+ */
+ return (float) u / 255.0f;
+}
+
+/**
+ * Evaluate one component of unpackUnorm2x16.
+ */
+static float
+unpack_unorm_1x16(uint16_t u)
+{
+ /* From section 8.4 of the GLSL ES 3.00 spec:
+ *
+ * unpackUnorm2x16
+ * ---------------
+ * The conversion for unpacked fixed-point value f to floating point is
+ * done as follows:
+ *
+ * unpackUnorm2x16: f / 65535.0
+ */
+ return (float) u / 65535.0f;
+}
+
+/**
+ * Evaluate one component of packHalf2x16.
+ */
+static uint16_t
+pack_half_1x16(float x)
+{
+ return _mesa_float_to_half(x);
+}
+
+/**
+ * Evaluate one component of unpackHalf2x16.
+ */
+static float
+unpack_half_1x16(uint16_t u)
+{
+ return _mesa_half_to_float(u);
+}
+
+/* Some typed vector structures to make things like src0.y work */
+% for type in ["float", "int", "uint", "bool"]:
+struct ${type}_vec {
+ ${type} x;
+ ${type} y;
+ ${type} z;
+ ${type} w;
+};
+% endfor
+
+% for name, op in sorted(opcodes.iteritems()):
+static nir_const_value
+evaluate_${name}(unsigned num_components, nir_const_value *_src)
+{
+ nir_const_value _dst_val = { { {0, 0, 0, 0} } };
+
+ ## For each non-per-component input, create a variable srcN that
+ ## contains x, y, z, and w elements which are filled in with the
+ ## appropriately-typed values.
+ % for j in range(op.num_inputs):
+ % if op.input_sizes[j] == 0:
+ <% continue %>
+ % elif "src" + str(j) not in op.const_expr:
+ ## Avoid unused variable warnings
+ <% continue %>
+ %endif
+
+ struct ${op.input_types[j]}_vec src${j} = {
+ % for k in range(op.input_sizes[j]):
+ % if op.input_types[j] == "bool":
+ _src[${j}].u[${k}] != 0,
+ % else:
+ _src[${j}].${op.input_types[j][:1]}[${k}],
+ % endif
+ % endfor
+ };
+ % endfor
+
+ % if op.output_size == 0:
+ ## For per-component instructions, we need to iterate over the
+ ## components and apply the constant expression one component
+ ## at a time.
+ for (unsigned _i = 0; _i < num_components; _i++) {
+ ## For each per-component input, create a variable srcN that
+ ## contains the value of the current (_i'th) component.
+ % for j in range(op.num_inputs):
+ % if op.input_sizes[j] != 0:
+ <% continue %>
+ % elif "src" + str(j) not in op.const_expr:
+ ## Avoid unused variable warnings
+ <% continue %>
+ % elif op.input_types[j] == "bool":
+ bool src${j} = _src[${j}].u[_i] != 0;
+ % else:
+ ${op.input_types[j]} src${j} = _src[${j}].${op.input_types[j][:1]}[_i];
+ % endif
+ % endfor
+
+ ## Create an appropriately-typed variable dst and assign the
+ ## result of the const_expr to it. If const_expr already contains
+ ## writes to dst, just include const_expr directly.
+ % if "dst" in op.const_expr:
+ ${op.output_type} dst;
+ ${op.const_expr}
+ % else:
+ ${op.output_type} dst = ${op.const_expr};
+ % endif
+
+ ## Store the current component of the actual destination to the
+ ## value of dst.
+ % if op.output_type == "bool":
+ ## Sanitize the C value to a proper NIR bool
+ _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE;
+ % else:
+ _dst_val.${op.output_type[:1]}[_i] = dst;
+ % endif
+ }
+ % else:
+ ## In the non-per-component case, create a struct dst with
+ ## appropriately-typed elements x, y, z, and w and assign the result
+ ## of the const_expr to all components of dst, or include the
+ ## const_expr directly if it writes to dst already.
+ struct ${op.output_type}_vec dst;
+
+ % if "dst" in op.const_expr:
+ ${op.const_expr}
+ % else:
+ ## Splat the value to all components. This way expressions which
+ ## write the same value to all components don't need to explicitly
+ ## write to dest. One such example is fnoise which has a
+ ## const_expr of 0.0f.
+ dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
+ % endif
+
+ ## For each component in the destination, copy the value of dst to
+ ## the actual destination.
+ % for k in range(op.output_size):
+ % if op.output_type == "bool":
+ ## Sanitize the C value to a proper NIR bool
+ _dst_val.u[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
+ % else:
+ _dst_val.${op.output_type[:1]}[${k}] = dst.${"xyzw"[k]};
+ % endif
+ % endfor
+ % endif
+
+ return _dst_val;
+}
+% endfor
+
+nir_const_value
+nir_eval_const_opcode(nir_op op, unsigned num_components,
+ nir_const_value *src)
+{
+ switch (op) {
+% for name in sorted(opcodes.iterkeys()):
+ case nir_op_${name}: {
+ return evaluate_${name}(num_components, src);
+ break;
+ }
+% endfor
+ default:
+ unreachable("shouldn't get here");
+ }
+}"""
+
+from nir_opcodes import opcodes
+from mako.template import Template
+
+print Template(template).render(opcodes=opcodes)
diff --git a/src/compiler/nir/nir_control_flow.c b/src/compiler/nir/nir_control_flow.c
new file mode 100644
index 00000000000..96395a41615
--- /dev/null
+++ b/src/compiler/nir/nir_control_flow.c
@@ -0,0 +1,808 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott ([email protected])
+ *
+ */
+
+#include "nir_control_flow_private.h"
+
+/**
+ * \name Control flow modification
+ *
+ * These functions modify the control flow tree while keeping the control flow
+ * graph up-to-date. The invariants respected are:
+ * 1. Each then statement, else statement, or loop body must have at least one
+ * control flow node.
+ * 2. Each if-statement and loop must have one basic block before it and one
+ * after.
+ * 3. Two basic blocks cannot be directly next to each other.
+ * 4. If a basic block has a jump instruction, there must be only one and it
+ * must be at the end of the block.
+ * 5. The CFG must always be connected - this means that we must insert a fake
+ * CFG edge for loops with no break statement.
+ *
+ * The purpose of the second one is so that we have places to insert code during
+ * GCM, as well as eliminating the possibility of critical edges.
+ */
+/*@{*/
+
+static bool
+block_ends_in_jump(nir_block *block)
+{
+ return !exec_list_is_empty(&block->instr_list) &&
+ nir_block_last_instr(block)->type == nir_instr_type_jump;
+}
+
+static inline void
+block_add_pred(nir_block *block, nir_block *pred)
+{
+ _mesa_set_add(block->predecessors, pred);
+}
+
+static inline void
+block_remove_pred(nir_block *block, nir_block *pred)
+{
+ struct set_entry *entry = _mesa_set_search(block->predecessors, pred);
+
+ assert(entry);
+
+ _mesa_set_remove(block->predecessors, entry);
+}
+
+static void
+link_blocks(nir_block *pred, nir_block *succ1, nir_block *succ2)
+{
+ pred->successors[0] = succ1;
+ if (succ1 != NULL)
+ block_add_pred(succ1, pred);
+
+ pred->successors[1] = succ2;
+ if (succ2 != NULL)
+ block_add_pred(succ2, pred);
+}
+
+static void
+unlink_blocks(nir_block *pred, nir_block *succ)
+{
+ if (pred->successors[0] == succ) {
+ pred->successors[0] = pred->successors[1];
+ pred->successors[1] = NULL;
+ } else {
+ assert(pred->successors[1] == succ);
+ pred->successors[1] = NULL;
+ }
+
+ block_remove_pred(succ, pred);
+}
+
+static void
+unlink_block_successors(nir_block *block)
+{
+ if (block->successors[1] != NULL)
+ unlink_blocks(block, block->successors[1]);
+ if (block->successors[0] != NULL)
+ unlink_blocks(block, block->successors[0]);
+}
+
+static void
+link_non_block_to_block(nir_cf_node *node, nir_block *block)
+{
+ if (node->type == nir_cf_node_if) {
+ /*
+ * We're trying to link an if to a block after it; this just means linking
+ * the last block of the then and else branches.
+ */
+
+ nir_if *if_stmt = nir_cf_node_as_if(node);
+
+ nir_cf_node *last_then = nir_if_last_then_node(if_stmt);
+ assert(last_then->type == nir_cf_node_block);
+ nir_block *last_then_block = nir_cf_node_as_block(last_then);
+
+ nir_cf_node *last_else = nir_if_last_else_node(if_stmt);
+ assert(last_else->type == nir_cf_node_block);
+ nir_block *last_else_block = nir_cf_node_as_block(last_else);
+
+ if (!block_ends_in_jump(last_then_block)) {
+ unlink_block_successors(last_then_block);
+ link_blocks(last_then_block, block, NULL);
+ }
+
+ if (!block_ends_in_jump(last_else_block)) {
+ unlink_block_successors(last_else_block);
+ link_blocks(last_else_block, block, NULL);
+ }
+ } else {
+ assert(node->type == nir_cf_node_loop);
+
+ /*
+ * We can only get to this codepath if we're inserting a new loop, or
+ * at least a loop with no break statements; we can't insert break
+ * statements into a loop when we haven't inserted it into the CFG
+ * because we wouldn't know which block comes after the loop
+ * and therefore, which block should be the successor of the block with
+ * the break). Therefore, we need to insert a fake edge (see invariant
+ * #5).
+ */
+
+ nir_loop *loop = nir_cf_node_as_loop(node);
+
+ nir_cf_node *last = nir_loop_last_cf_node(loop);
+ assert(last->type == nir_cf_node_block);
+ nir_block *last_block = nir_cf_node_as_block(last);
+
+ last_block->successors[1] = block;
+ block_add_pred(block, last_block);
+ }
+}
+
+static void
+link_block_to_non_block(nir_block *block, nir_cf_node *node)
+{
+ if (node->type == nir_cf_node_if) {
+ /*
+ * We're trying to link a block to an if after it; this just means linking
+ * the block to the first block of the then and else branches.
+ */
+
+ nir_if *if_stmt = nir_cf_node_as_if(node);
+
+ nir_cf_node *first_then = nir_if_first_then_node(if_stmt);
+ assert(first_then->type == nir_cf_node_block);
+ nir_block *first_then_block = nir_cf_node_as_block(first_then);
+
+ nir_cf_node *first_else = nir_if_first_else_node(if_stmt);
+ assert(first_else->type == nir_cf_node_block);
+ nir_block *first_else_block = nir_cf_node_as_block(first_else);
+
+ unlink_block_successors(block);
+ link_blocks(block, first_then_block, first_else_block);
+ } else {
+ /*
+ * For similar reasons as the corresponding case in
+ * link_non_block_to_block(), don't worry about if the loop header has
+ * any predecessors that need to be unlinked.
+ */
+
+ assert(node->type == nir_cf_node_loop);
+
+ nir_loop *loop = nir_cf_node_as_loop(node);
+
+ nir_cf_node *loop_header = nir_loop_first_cf_node(loop);
+ assert(loop_header->type == nir_cf_node_block);
+ nir_block *loop_header_block = nir_cf_node_as_block(loop_header);
+
+ unlink_block_successors(block);
+ link_blocks(block, loop_header_block, NULL);
+ }
+
+}
+
+/**
+ * Replace a block's successor with a different one.
+ */
+static void
+replace_successor(nir_block *block, nir_block *old_succ, nir_block *new_succ)
+{
+ if (block->successors[0] == old_succ) {
+ block->successors[0] = new_succ;
+ } else {
+ assert(block->successors[1] == old_succ);
+ block->successors[1] = new_succ;
+ }
+
+ block_remove_pred(old_succ, block);
+ block_add_pred(new_succ, block);
+}
+
+/**
+ * Takes a basic block and inserts a new empty basic block before it, making its
+ * predecessors point to the new block. This essentially splits the block into
+ * an empty header and a body so that another non-block CF node can be inserted
+ * between the two. Note that this does *not* link the two basic blocks, so
+ * some kind of cleanup *must* be performed after this call.
+ */
+
+static nir_block *
+split_block_beginning(nir_block *block)
+{
+ nir_block *new_block = nir_block_create(ralloc_parent(block));
+ new_block->cf_node.parent = block->cf_node.parent;
+ exec_node_insert_node_before(&block->cf_node.node, &new_block->cf_node.node);
+
+ struct set_entry *entry;
+ set_foreach(block->predecessors, entry) {
+ nir_block *pred = (nir_block *) entry->key;
+ replace_successor(pred, block, new_block);
+ }
+
+ /* Any phi nodes must stay part of the new block, or else their
+ * sourcse will be messed up. This will reverse the order of the phi's, but
+ * order shouldn't matter.
+ */
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_phi)
+ break;
+
+ exec_node_remove(&instr->node);
+ instr->block = new_block;
+ exec_list_push_head(&new_block->instr_list, &instr->node);
+ }
+
+ return new_block;
+}
+
+static void
+rewrite_phi_preds(nir_block *block, nir_block *old_pred, nir_block *new_pred)
+{
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_phi)
+ break;
+
+ nir_phi_instr *phi = nir_instr_as_phi(instr);
+ nir_foreach_phi_src(phi, src) {
+ if (src->pred == old_pred) {
+ src->pred = new_pred;
+ break;
+ }
+ }
+ }
+}
+
+static void
+insert_phi_undef(nir_block *block, nir_block *pred)
+{
+ nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node);
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_phi)
+ break;
+
+ nir_phi_instr *phi = nir_instr_as_phi(instr);
+ nir_ssa_undef_instr *undef =
+ nir_ssa_undef_instr_create(ralloc_parent(phi),
+ phi->dest.ssa.num_components);
+ nir_instr_insert_before_cf_list(&impl->body, &undef->instr);
+ nir_phi_src *src = ralloc(phi, nir_phi_src);
+ src->pred = pred;
+ src->src.parent_instr = &phi->instr;
+ src->src.is_ssa = true;
+ src->src.ssa = &undef->def;
+
+ list_addtail(&src->src.use_link, &undef->def.uses);
+
+ exec_list_push_tail(&phi->srcs, &src->node);
+ }
+}
+
+/**
+ * Moves the successors of source to the successors of dest, leaving both
+ * successors of source NULL.
+ */
+
+static void
+move_successors(nir_block *source, nir_block *dest)
+{
+ nir_block *succ1 = source->successors[0];
+ nir_block *succ2 = source->successors[1];
+
+ if (succ1) {
+ unlink_blocks(source, succ1);
+ rewrite_phi_preds(succ1, source, dest);
+ }
+
+ if (succ2) {
+ unlink_blocks(source, succ2);
+ rewrite_phi_preds(succ2, source, dest);
+ }
+
+ unlink_block_successors(dest);
+ link_blocks(dest, succ1, succ2);
+}
+
+/* Given a basic block with no successors that has been inserted into the
+ * control flow tree, gives it the successors it would normally have assuming
+ * it doesn't end in a jump instruction. Also inserts phi sources with undefs
+ * if necessary.
+ */
+static void
+block_add_normal_succs(nir_block *block)
+{
+ if (exec_node_is_tail_sentinel(block->cf_node.node.next)) {
+ nir_cf_node *parent = block->cf_node.parent;
+ if (parent->type == nir_cf_node_if) {
+ nir_cf_node *next = nir_cf_node_next(parent);
+ assert(next->type == nir_cf_node_block);
+ nir_block *next_block = nir_cf_node_as_block(next);
+
+ link_blocks(block, next_block, NULL);
+ } else {
+ assert(parent->type == nir_cf_node_loop);
+ nir_loop *loop = nir_cf_node_as_loop(parent);
+
+ nir_cf_node *head = nir_loop_first_cf_node(loop);
+ assert(head->type == nir_cf_node_block);
+ nir_block *head_block = nir_cf_node_as_block(head);
+
+ link_blocks(block, head_block, NULL);
+ insert_phi_undef(head_block, block);
+ }
+ } else {
+ nir_cf_node *next = nir_cf_node_next(&block->cf_node);
+ if (next->type == nir_cf_node_if) {
+ nir_if *next_if = nir_cf_node_as_if(next);
+
+ nir_cf_node *first_then = nir_if_first_then_node(next_if);
+ assert(first_then->type == nir_cf_node_block);
+ nir_block *first_then_block = nir_cf_node_as_block(first_then);
+
+ nir_cf_node *first_else = nir_if_first_else_node(next_if);
+ assert(first_else->type == nir_cf_node_block);
+ nir_block *first_else_block = nir_cf_node_as_block(first_else);
+
+ link_blocks(block, first_then_block, first_else_block);
+ } else {
+ assert(next->type == nir_cf_node_loop);
+ nir_loop *next_loop = nir_cf_node_as_loop(next);
+
+ nir_cf_node *first = nir_loop_first_cf_node(next_loop);
+ assert(first->type == nir_cf_node_block);
+ nir_block *first_block = nir_cf_node_as_block(first);
+
+ link_blocks(block, first_block, NULL);
+ insert_phi_undef(first_block, block);
+ }
+ }
+}
+
+static nir_block *
+split_block_end(nir_block *block)
+{
+ nir_block *new_block = nir_block_create(ralloc_parent(block));
+ new_block->cf_node.parent = block->cf_node.parent;
+ exec_node_insert_after(&block->cf_node.node, &new_block->cf_node.node);
+
+ if (block_ends_in_jump(block)) {
+ /* Figure out what successor block would've had if it didn't have a jump
+ * instruction, and make new_block have that successor.
+ */
+ block_add_normal_succs(new_block);
+ } else {
+ move_successors(block, new_block);
+ }
+
+ return new_block;
+}
+
+static nir_block *
+split_block_before_instr(nir_instr *instr)
+{
+ assert(instr->type != nir_instr_type_phi);
+ nir_block *new_block = split_block_beginning(instr->block);
+
+ nir_foreach_instr_safe(instr->block, cur_instr) {
+ if (cur_instr == instr)
+ break;
+
+ exec_node_remove(&cur_instr->node);
+ cur_instr->block = new_block;
+ exec_list_push_tail(&new_block->instr_list, &cur_instr->node);
+ }
+
+ return new_block;
+}
+
+/* Splits a basic block at the point specified by the cursor. The "before" and
+ * "after" arguments are filled out with the blocks resulting from the split
+ * if non-NULL. Note that the "beginning" of the block is actually interpreted
+ * as before the first non-phi instruction, and it's illegal to split a block
+ * before a phi instruction.
+ */
+
+static void
+split_block_cursor(nir_cursor cursor,
+ nir_block **_before, nir_block **_after)
+{
+ nir_block *before, *after;
+ switch (cursor.option) {
+ case nir_cursor_before_block:
+ after = cursor.block;
+ before = split_block_beginning(cursor.block);
+ break;
+
+ case nir_cursor_after_block:
+ before = cursor.block;
+ after = split_block_end(cursor.block);
+ break;
+
+ case nir_cursor_before_instr:
+ after = cursor.instr->block;
+ before = split_block_before_instr(cursor.instr);
+ break;
+
+ case nir_cursor_after_instr:
+ /* We lower this to split_block_before_instr() so that we can keep the
+ * after-a-jump-instr case contained to split_block_end().
+ */
+ if (nir_instr_is_last(cursor.instr)) {
+ before = cursor.instr->block;
+ after = split_block_end(cursor.instr->block);
+ } else {
+ after = cursor.instr->block;
+ before = split_block_before_instr(nir_instr_next(cursor.instr));
+ }
+ break;
+
+ default:
+ unreachable("not reached");
+ }
+
+ if (_before)
+ *_before = before;
+ if (_after)
+ *_after = after;
+}
+
+/**
+ * Inserts a non-basic block between two basic blocks and links them together.
+ */
+
+static void
+insert_non_block(nir_block *before, nir_cf_node *node, nir_block *after)
+{
+ node->parent = before->cf_node.parent;
+ exec_node_insert_after(&before->cf_node.node, &node->node);
+ link_block_to_non_block(before, node);
+ link_non_block_to_block(node, after);
+}
+
+/* walk up the control flow tree to find the innermost enclosed loop */
+static nir_loop *
+nearest_loop(nir_cf_node *node)
+{
+ while (node->type != nir_cf_node_loop) {
+ node = node->parent;
+ }
+
+ return nir_cf_node_as_loop(node);
+}
+
+/*
+ * update the CFG after a jump instruction has been added to the end of a block
+ */
+
+void
+nir_handle_add_jump(nir_block *block)
+{
+ nir_instr *instr = nir_block_last_instr(block);
+ nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
+
+ unlink_block_successors(block);
+
+ nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node);
+ nir_metadata_preserve(impl, nir_metadata_none);
+
+ if (jump_instr->type == nir_jump_break ||
+ jump_instr->type == nir_jump_continue) {
+ nir_loop *loop = nearest_loop(&block->cf_node);
+
+ if (jump_instr->type == nir_jump_continue) {
+ nir_cf_node *first_node = nir_loop_first_cf_node(loop);
+ assert(first_node->type == nir_cf_node_block);
+ nir_block *first_block = nir_cf_node_as_block(first_node);
+ link_blocks(block, first_block, NULL);
+ } else {
+ nir_cf_node *after = nir_cf_node_next(&loop->cf_node);
+ assert(after->type == nir_cf_node_block);
+ nir_block *after_block = nir_cf_node_as_block(after);
+ link_blocks(block, after_block, NULL);
+
+ /* If we inserted a fake link, remove it */
+ nir_cf_node *last = nir_loop_last_cf_node(loop);
+ assert(last->type == nir_cf_node_block);
+ nir_block *last_block = nir_cf_node_as_block(last);
+ if (last_block->successors[1] != NULL)
+ unlink_blocks(last_block, after_block);
+ }
+ } else {
+ assert(jump_instr->type == nir_jump_return);
+ link_blocks(block, impl->end_block, NULL);
+ }
+}
+
+static void
+remove_phi_src(nir_block *block, nir_block *pred)
+{
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_phi)
+ break;
+
+ nir_phi_instr *phi = nir_instr_as_phi(instr);
+ nir_foreach_phi_src_safe(phi, src) {
+ if (src->pred == pred) {
+ list_del(&src->src.use_link);
+ exec_node_remove(&src->node);
+ }
+ }
+ }
+}
+
+/* Removes the successor of a block with a jump, and inserts a fake edge for
+ * infinite loops. Note that the jump to be eliminated may be free-floating.
+ */
+
+static void
+unlink_jump(nir_block *block, nir_jump_type type, bool add_normal_successors)
+{
+ nir_block *next = block->successors[0];
+
+ if (block->successors[0])
+ remove_phi_src(block->successors[0], block);
+ if (block->successors[1])
+ remove_phi_src(block->successors[1], block);
+
+ unlink_block_successors(block);
+ if (add_normal_successors)
+ block_add_normal_succs(block);
+
+ /* If we've just removed a break, and the block we were jumping to (after
+ * the loop) now has zero predecessors, we've created a new infinite loop.
+ *
+ * NIR doesn't allow blocks (other than the start block) to have zero
+ * predecessors. In particular, dominance assumes all blocks are reachable.
+ * So, we insert a "fake link" by making successors[1] point after the loop.
+ *
+ * Note that we have to do this after unlinking/recreating the block's
+ * successors. If we removed a "break" at the end of the loop, then
+ * block == last_block, so block->successors[0] would already be "next",
+ * and adding a fake link would create two identical successors. Doing
+ * this afterward works, as we'll have changed block->successors[0] to
+ * be the top of the loop.
+ */
+ if (type == nir_jump_break && next->predecessors->entries == 0) {
+ nir_loop *loop =
+ nir_cf_node_as_loop(nir_cf_node_prev(&next->cf_node));
+
+ /* insert fake link */
+ nir_cf_node *last = nir_loop_last_cf_node(loop);
+ assert(last->type == nir_cf_node_block);
+ nir_block *last_block = nir_cf_node_as_block(last);
+
+ last_block->successors[1] = next;
+ block_add_pred(next, last_block);
+ }
+}
+
+void
+nir_handle_remove_jump(nir_block *block, nir_jump_type type)
+{
+ unlink_jump(block, type, true);
+
+ nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node);
+ nir_metadata_preserve(impl, nir_metadata_none);
+}
+
+static void
+update_if_uses(nir_cf_node *node)
+{
+ if (node->type != nir_cf_node_if)
+ return;
+
+ nir_if *if_stmt = nir_cf_node_as_if(node);
+
+ if_stmt->condition.parent_if = if_stmt;
+ if (if_stmt->condition.is_ssa) {
+ list_addtail(&if_stmt->condition.use_link,
+ &if_stmt->condition.ssa->if_uses);
+ } else {
+ list_addtail(&if_stmt->condition.use_link,
+ &if_stmt->condition.reg.reg->if_uses);
+ }
+}
+
+/**
+ * Stitch two basic blocks together into one. The aggregate must have the same
+ * predecessors as the first and the same successors as the second.
+ */
+
+static void
+stitch_blocks(nir_block *before, nir_block *after)
+{
+ /*
+ * We move after into before, so we have to deal with up to 2 successors vs.
+ * possibly a large number of predecessors.
+ *
+ * TODO: special case when before is empty and after isn't?
+ */
+
+ if (block_ends_in_jump(before)) {
+ assert(exec_list_is_empty(&after->instr_list));
+ if (after->successors[0])
+ remove_phi_src(after->successors[0], after);
+ if (after->successors[1])
+ remove_phi_src(after->successors[1], after);
+ unlink_block_successors(after);
+ exec_node_remove(&after->cf_node.node);
+ } else {
+ move_successors(after, before);
+
+ foreach_list_typed(nir_instr, instr, node, &after->instr_list) {
+ instr->block = before;
+ }
+
+ exec_list_append(&before->instr_list, &after->instr_list);
+ exec_node_remove(&after->cf_node.node);
+ }
+}
+
+void
+nir_cf_node_insert(nir_cursor cursor, nir_cf_node *node)
+{
+ nir_block *before, *after;
+
+ split_block_cursor(cursor, &before, &after);
+
+ if (node->type == nir_cf_node_block) {
+ nir_block *block = nir_cf_node_as_block(node);
+ exec_node_insert_after(&before->cf_node.node, &block->cf_node.node);
+ block->cf_node.parent = before->cf_node.parent;
+ /* stitch_blocks() assumes that any block that ends with a jump has
+ * already been setup with the correct successors, so we need to set
+ * up jumps here as the block is being inserted.
+ */
+ if (block_ends_in_jump(block))
+ nir_handle_add_jump(block);
+
+ stitch_blocks(block, after);
+ stitch_blocks(before, block);
+ } else {
+ update_if_uses(node);
+ insert_non_block(before, node, after);
+ }
+}
+
+static bool
+replace_ssa_def_uses(nir_ssa_def *def, void *void_impl)
+{
+ nir_function_impl *impl = void_impl;
+ void *mem_ctx = ralloc_parent(impl);
+
+ nir_ssa_undef_instr *undef =
+ nir_ssa_undef_instr_create(mem_ctx, def->num_components);
+ nir_instr_insert_before_cf_list(&impl->body, &undef->instr);
+ nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(&undef->def));
+ return true;
+}
+
+static void
+cleanup_cf_node(nir_cf_node *node, nir_function_impl *impl)
+{
+ switch (node->type) {
+ case nir_cf_node_block: {
+ nir_block *block = nir_cf_node_as_block(node);
+ /* We need to walk the instructions and clean up defs/uses */
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type == nir_instr_type_jump) {
+ nir_jump_type jump_type = nir_instr_as_jump(instr)->type;
+ unlink_jump(block, jump_type, false);
+ } else {
+ nir_foreach_ssa_def(instr, replace_ssa_def_uses, impl);
+ nir_instr_remove(instr);
+ }
+ }
+ break;
+ }
+
+ case nir_cf_node_if: {
+ nir_if *if_stmt = nir_cf_node_as_if(node);
+ foreach_list_typed(nir_cf_node, child, node, &if_stmt->then_list)
+ cleanup_cf_node(child, impl);
+ foreach_list_typed(nir_cf_node, child, node, &if_stmt->else_list)
+ cleanup_cf_node(child, impl);
+
+ list_del(&if_stmt->condition.use_link);
+ break;
+ }
+
+ case nir_cf_node_loop: {
+ nir_loop *loop = nir_cf_node_as_loop(node);
+ foreach_list_typed(nir_cf_node, child, node, &loop->body)
+ cleanup_cf_node(child, impl);
+ break;
+ }
+ case nir_cf_node_function: {
+ nir_function_impl *impl = nir_cf_node_as_function(node);
+ foreach_list_typed(nir_cf_node, child, node, &impl->body)
+ cleanup_cf_node(child, impl);
+ break;
+ }
+ default:
+ unreachable("Invalid CF node type");
+ }
+}
+
+void
+nir_cf_extract(nir_cf_list *extracted, nir_cursor begin, nir_cursor end)
+{
+ nir_block *block_begin, *block_end, *block_before, *block_after;
+
+ /* In the case where begin points to an instruction in some basic block and
+ * end points to the end of the same basic block, we rely on the fact that
+ * splitting on an instruction moves earlier instructions into a new basic
+ * block. If the later instructions were moved instead, then the end cursor
+ * would be pointing to the same place that begin used to point to, which
+ * is obviously not what we want.
+ */
+ split_block_cursor(begin, &block_before, &block_begin);
+ split_block_cursor(end, &block_end, &block_after);
+
+ extracted->impl = nir_cf_node_get_function(&block_begin->cf_node);
+ exec_list_make_empty(&extracted->list);
+
+ /* Dominance and other block-related information is toast. */
+ nir_metadata_preserve(extracted->impl, nir_metadata_none);
+
+ nir_cf_node *cf_node = &block_begin->cf_node;
+ nir_cf_node *cf_node_end = &block_end->cf_node;
+ while (true) {
+ nir_cf_node *next = nir_cf_node_next(cf_node);
+
+ exec_node_remove(&cf_node->node);
+ cf_node->parent = NULL;
+ exec_list_push_tail(&extracted->list, &cf_node->node);
+
+ if (cf_node == cf_node_end)
+ break;
+
+ cf_node = next;
+ }
+
+ stitch_blocks(block_before, block_after);
+}
+
+void
+nir_cf_reinsert(nir_cf_list *cf_list, nir_cursor cursor)
+{
+ nir_block *before, *after;
+
+ split_block_cursor(cursor, &before, &after);
+
+ foreach_list_typed_safe(nir_cf_node, node, node, &cf_list->list) {
+ exec_node_remove(&node->node);
+ node->parent = before->cf_node.parent;
+ exec_node_insert_node_before(&after->cf_node.node, &node->node);
+ }
+
+ stitch_blocks(before,
+ nir_cf_node_as_block(nir_cf_node_next(&before->cf_node)));
+ stitch_blocks(nir_cf_node_as_block(nir_cf_node_prev(&after->cf_node)),
+ after);
+}
+
+void
+nir_cf_delete(nir_cf_list *cf_list)
+{
+ foreach_list_typed(nir_cf_node, node, node, &cf_list->list) {
+ cleanup_cf_node(node, cf_list->impl);
+ }
+}
diff --git a/src/compiler/nir/nir_control_flow.h b/src/compiler/nir/nir_control_flow.h
new file mode 100644
index 00000000000..b71382fc597
--- /dev/null
+++ b/src/compiler/nir/nir_control_flow.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott ([email protected])
+ *
+ */
+
+#include "nir.h"
+
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** NIR Control Flow Modification
+ *
+ * This file contains various API's that make modifying control flow in NIR,
+ * while maintaining the invariants checked by the validator, much easier.
+ * There are two parts to this:
+ *
+ * 1. Inserting control flow (if's and loops) in various places, for creating
+ * IR either from scratch or as part of some lowering pass.
+ * 2. Taking existing pieces of the IR and either moving them around or
+ * deleting them.
+ */
+
+/** Control flow insertion. */
+
+/** puts a control flow node where the cursor is */
+void nir_cf_node_insert(nir_cursor cursor, nir_cf_node *node);
+
+/** puts a control flow node immediately after another control flow node */
+static inline void
+nir_cf_node_insert_after(nir_cf_node *node, nir_cf_node *after)
+{
+ nir_cf_node_insert(nir_after_cf_node(node), after);
+}
+
+/** puts a control flow node immediately before another control flow node */
+static inline void
+nir_cf_node_insert_before(nir_cf_node *node, nir_cf_node *before)
+{
+ nir_cf_node_insert(nir_before_cf_node(node), before);
+}
+
+/** puts a control flow node at the beginning of a list from an if, loop, or function */
+static inline void
+nir_cf_node_insert_begin(struct exec_list *list, nir_cf_node *node)
+{
+ nir_cf_node_insert(nir_before_cf_list(list), node);
+}
+
+/** puts a control flow node at the end of a list from an if, loop, or function */
+static inline void
+nir_cf_node_insert_end(struct exec_list *list, nir_cf_node *node)
+{
+ nir_cf_node_insert(nir_after_cf_list(list), node);
+}
+
+
+/** Control flow motion.
+ *
+ * These functions let you take a part of a control flow list (basically
+ * equivalent to a series of statement in GLSL) and "extract" it from the IR,
+ * so that it's a free-floating piece of IR that can be either re-inserted
+ * somewhere else or deleted entirely. A few notes on using it:
+ *
+ * 1. Phi nodes are considered attached to the piece of control flow that
+ * their sources come from. There are three places where phi nodes can
+ * occur, which are the three places where a block can have multiple
+ * predecessors:
+ *
+ * 1) After an if statement, if neither branch ends in a jump.
+ * 2) After a loop, if there are multiple break's.
+ * 3) At the beginning of a loop.
+ *
+ * For #1, the phi node is considered to be part of the if, and for #2 and
+ * #3 the phi node is considered to be part of the loop. This allows us to
+ * keep phi's intact, but it means that phi nodes cannot be separated from
+ * the control flow they come from. For example, extracting an if without
+ * extracting all the phi nodes after it is not allowed, and neither is
+ * extracting only some of the phi nodes at the beginning of a block. It
+ * also means that extracting from the beginning of a basic block actually
+ * means extracting from the first non-phi instruction, since there's no
+ * situation where extracting phi nodes without extracting what comes
+ * before them makes any sense.
+ *
+ * 2. Phi node sources are guaranteed to remain valid, meaning that they still
+ * correspond one-to-one with the predecessors of the basic block they're
+ * part of. In addition, the original sources will be preserved unless they
+ * correspond to a break or continue that was deleted. However, no attempt
+ * is made to ensure that SSA form is maintained. In particular, it is
+ * *not* guaranteed that definitions of SSA values will dominate all their
+ * uses after all is said and done. Either the caller must ensure that this
+ * is the case, or it must insert extra phi nodes to restore SSA.
+ *
+ * 3. It is invalid to move a piece of IR with a break/continue outside of the
+ * loop it references. Doing this will result in invalid
+ * successors/predecessors and phi node sources.
+ *
+ * 4. It is invalid to move a piece of IR from one function implementation to
+ * another.
+ *
+ * 5. Extracting a control flow list will leave lots of dangling references to
+ * and from other pieces of the IR. It also leaves things in a not 100%
+ * consistent state. This means that some things (e.g. inserting
+ * instructions) might not work reliably on the extracted control flow. It
+ * also means that extracting control flow without re-inserting it or
+ * deleting it is a Bad Thing (tm).
+ */
+
+typedef struct {
+ struct exec_list list;
+ nir_function_impl *impl; /* for cleaning up if the list is deleted */
+} nir_cf_list;
+
+void nir_cf_extract(nir_cf_list *extracted, nir_cursor begin, nir_cursor end);
+
+void nir_cf_reinsert(nir_cf_list *cf_list, nir_cursor cursor);
+
+void nir_cf_delete(nir_cf_list *cf_list);
+
+static inline void
+nir_cf_list_extract(nir_cf_list *extracted, struct exec_list *cf_list)
+{
+ nir_cf_extract(extracted, nir_before_cf_list(cf_list),
+ nir_after_cf_list(cf_list));
+}
+
+/** removes a control flow node, doing any cleanup necessary */
+static inline void
+nir_cf_node_remove(nir_cf_node *node)
+{
+ nir_cf_list list;
+ nir_cf_extract(&list, nir_before_cf_node(node), nir_after_cf_node(node));
+ nir_cf_delete(&list);
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/compiler/nir/nir_control_flow_private.h b/src/compiler/nir/nir_control_flow_private.h
new file mode 100644
index 00000000000..f32b57a8cef
--- /dev/null
+++ b/src/compiler/nir/nir_control_flow_private.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott ([email protected])
+ *
+ */
+
+#include "nir_control_flow.h"
+
+#pragma once
+
+/* Internal control-flow modification functions used when inserting/removing
+ * instructions.
+ */
+
+void nir_handle_add_jump(nir_block *block);
+void nir_handle_remove_jump(nir_block *block, nir_jump_type type);
diff --git a/src/compiler/nir/nir_dominance.c b/src/compiler/nir/nir_dominance.c
new file mode 100644
index 00000000000..b345b85e8a0
--- /dev/null
+++ b/src/compiler/nir/nir_dominance.c
@@ -0,0 +1,350 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott ([email protected])
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements the algorithms for computing the dominance tree and the
+ * dominance frontier from "A Simple, Fast Dominance Algorithm" by Cooper,
+ * Harvey, and Kennedy.
+ */
+
+typedef struct {
+ nir_function_impl *impl;
+ bool progress;
+} dom_state;
+
+static bool
+init_block_cb(nir_block *block, void *_state)
+{
+ dom_state *state = (dom_state *) _state;
+ if (block == nir_start_block(state->impl))
+ block->imm_dom = block;
+ else
+ block->imm_dom = NULL;
+ block->num_dom_children = 0;
+
+ struct set_entry *entry;
+ set_foreach(block->dom_frontier, entry) {
+ _mesa_set_remove(block->dom_frontier, entry);
+ }
+
+ return true;
+}
+
+static nir_block *
+intersect(nir_block *b1, nir_block *b2)
+{
+ while (b1 != b2) {
+ /*
+ * Note, the comparisons here are the opposite of what the paper says
+ * because we index blocks from beginning -> end (i.e. reverse
+ * post-order) instead of post-order like they assume.
+ */
+ while (b1->index > b2->index)
+ b1 = b1->imm_dom;
+ while (b2->index > b1->index)
+ b2 = b2->imm_dom;
+ }
+
+ return b1;
+}
+
+static bool
+calc_dominance_cb(nir_block *block, void *_state)
+{
+ dom_state *state = (dom_state *) _state;
+ if (block == nir_start_block(state->impl))
+ return true;
+
+ nir_block *new_idom = NULL;
+ struct set_entry *entry;
+ set_foreach(block->predecessors, entry) {
+ nir_block *pred = (nir_block *) entry->key;
+
+ if (pred->imm_dom) {
+ if (new_idom)
+ new_idom = intersect(pred, new_idom);
+ else
+ new_idom = pred;
+ }
+ }
+
+ assert(new_idom);
+ if (block->imm_dom != new_idom) {
+ block->imm_dom = new_idom;
+ state->progress = true;
+ }
+
+ return true;
+}
+
+static bool
+calc_dom_frontier_cb(nir_block *block, void *state)
+{
+ (void) state;
+
+ if (block->predecessors->entries > 1) {
+ struct set_entry *entry;
+ set_foreach(block->predecessors, entry) {
+ nir_block *runner = (nir_block *) entry->key;
+ while (runner != block->imm_dom) {
+ _mesa_set_add(runner->dom_frontier, block);
+ runner = runner->imm_dom;
+ }
+ }
+ }
+
+ return true;
+}
+
+/*
+ * Compute each node's children in the dominance tree from the immediate
+ * dominator information. We do this in three stages:
+ *
+ * 1. Calculate the number of children each node has
+ * 2. Allocate arrays, setting the number of children to 0 again
+ * 3. For each node, add itself to its parent's list of children, using
+ * num_dom_children as an index - at the end of this step, num_dom_children
+ * for each node will be the same as it was at the end of step #1.
+ */
+
+static bool
+block_count_children(nir_block *block, void *state)
+{
+ (void) state;
+
+ if (block->imm_dom)
+ block->imm_dom->num_dom_children++;
+
+ return true;
+}
+
+static bool
+block_alloc_children(nir_block *block, void *state)
+{
+ void *mem_ctx = state;
+
+ block->dom_children = ralloc_array(mem_ctx, nir_block *,
+ block->num_dom_children);
+ block->num_dom_children = 0;
+
+ return true;
+}
+
+static bool
+block_add_child(nir_block *block, void *state)
+{
+ (void) state;
+
+ if (block->imm_dom)
+ block->imm_dom->dom_children[block->imm_dom->num_dom_children++] = block;
+
+ return true;
+}
+
+static void
+calc_dom_children(nir_function_impl* impl)
+{
+ void *mem_ctx = ralloc_parent(impl);
+
+ nir_foreach_block(impl, block_count_children, NULL);
+ nir_foreach_block(impl, block_alloc_children, mem_ctx);
+ nir_foreach_block(impl, block_add_child, NULL);
+}
+
+static void
+calc_dfs_indicies(nir_block *block, unsigned *index)
+{
+ block->dom_pre_index = (*index)++;
+
+ for (unsigned i = 0; i < block->num_dom_children; i++)
+ calc_dfs_indicies(block->dom_children[i], index);
+
+ block->dom_post_index = (*index)++;
+}
+
+void
+nir_calc_dominance_impl(nir_function_impl *impl)
+{
+ if (impl->valid_metadata & nir_metadata_dominance)
+ return;
+
+ nir_metadata_require(impl, nir_metadata_block_index);
+
+ dom_state state;
+ state.impl = impl;
+ state.progress = true;
+
+ nir_foreach_block(impl, init_block_cb, &state);
+
+ while (state.progress) {
+ state.progress = false;
+ nir_foreach_block(impl, calc_dominance_cb, &state);
+ }
+
+ nir_foreach_block(impl, calc_dom_frontier_cb, &state);
+
+ nir_block *start_block = nir_start_block(impl);
+ start_block->imm_dom = NULL;
+
+ calc_dom_children(impl);
+
+ unsigned dfs_index = 0;
+ calc_dfs_indicies(start_block, &dfs_index);
+}
+
+void
+nir_calc_dominance(nir_shader *shader)
+{
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ nir_calc_dominance_impl(function->impl);
+ }
+}
+
+/**
+ * Computes the least common anscestor of two blocks. If one of the blocks
+ * is null, the other block is returned.
+ */
+nir_block *
+nir_dominance_lca(nir_block *b1, nir_block *b2)
+{
+ if (b1 == NULL)
+ return b2;
+
+ if (b2 == NULL)
+ return b1;
+
+ assert(nir_cf_node_get_function(&b1->cf_node) ==
+ nir_cf_node_get_function(&b2->cf_node));
+
+ assert(nir_cf_node_get_function(&b1->cf_node)->valid_metadata &
+ nir_metadata_dominance);
+
+ return intersect(b1, b2);
+}
+
+/**
+ * Returns true if parent dominates child
+ */
+bool
+nir_block_dominates(nir_block *parent, nir_block *child)
+{
+ assert(nir_cf_node_get_function(&parent->cf_node) ==
+ nir_cf_node_get_function(&child->cf_node));
+
+ assert(nir_cf_node_get_function(&parent->cf_node)->valid_metadata &
+ nir_metadata_dominance);
+
+ return child->dom_pre_index >= parent->dom_pre_index &&
+ child->dom_post_index <= parent->dom_post_index;
+}
+
+static bool
+dump_block_dom(nir_block *block, void *state)
+{
+ FILE *fp = state;
+ if (block->imm_dom)
+ fprintf(fp, "\t%u -> %u\n", block->imm_dom->index, block->index);
+ return true;
+}
+
+void
+nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp)
+{
+ fprintf(fp, "digraph doms_%s {\n", impl->function->name);
+ nir_foreach_block(impl, dump_block_dom, fp);
+ fprintf(fp, "}\n\n");
+}
+
+void
+nir_dump_dom_tree(nir_shader *shader, FILE *fp)
+{
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ nir_dump_dom_tree_impl(function->impl, fp);
+ }
+}
+
+static bool
+dump_block_dom_frontier(nir_block *block, void *state)
+{
+ FILE *fp = state;
+
+ fprintf(fp, "DF(%u) = {", block->index);
+ struct set_entry *entry;
+ set_foreach(block->dom_frontier, entry) {
+ nir_block *df = (nir_block *) entry->key;
+ fprintf(fp, "%u, ", df->index);
+ }
+ fprintf(fp, "}\n");
+ return true;
+}
+
+void
+nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp)
+{
+ nir_foreach_block(impl, dump_block_dom_frontier, fp);
+}
+
+void
+nir_dump_dom_frontier(nir_shader *shader, FILE *fp)
+{
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ nir_dump_dom_frontier_impl(function->impl, fp);
+ }
+}
+
+static bool
+dump_block_succs(nir_block *block, void *state)
+{
+ FILE *fp = state;
+ if (block->successors[0])
+ fprintf(fp, "\t%u -> %u\n", block->index, block->successors[0]->index);
+ if (block->successors[1])
+ fprintf(fp, "\t%u -> %u\n", block->index, block->successors[1]->index);
+ return true;
+}
+
+void
+nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp)
+{
+ fprintf(fp, "digraph cfg_%s {\n", impl->function->name);
+ nir_foreach_block(impl, dump_block_succs, fp);
+ fprintf(fp, "}\n\n");
+}
+
+void
+nir_dump_cfg(nir_shader *shader, FILE *fp)
+{
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ nir_dump_cfg_impl(function->impl, fp);
+ }
+}
diff --git a/src/compiler/nir/nir_from_ssa.c b/src/compiler/nir/nir_from_ssa.c
new file mode 100644
index 00000000000..8bc9f24e406
--- /dev/null
+++ b/src/compiler/nir/nir_from_ssa.c
@@ -0,0 +1,805 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand ([email protected])
+ *
+ */
+
+#include "nir.h"
+#include "nir_vla.h"
+
+/*
+ * This file implements an out-of-SSA pass as described in "Revisiting
+ * Out-of-SSA Translation for Correctness, Code Quality, and Efficiency" by
+ * Boissinot et. al.
+ */
+
+struct from_ssa_state {
+ void *mem_ctx;
+ void *dead_ctx;
+ bool phi_webs_only;
+ struct hash_table *merge_node_table;
+ nir_instr *instr;
+ nir_function_impl *impl;
+};
+
+/* Returns true if a dominates b */
+static bool
+ssa_def_dominates(nir_ssa_def *a, nir_ssa_def *b)
+{
+ if (a->live_index == 0) {
+ /* SSA undefs always dominate */
+ return true;
+ } else if (b->live_index < a->live_index) {
+ return false;
+ } else if (a->parent_instr->block == b->parent_instr->block) {
+ return a->live_index <= b->live_index;
+ } else {
+ return nir_block_dominates(a->parent_instr->block,
+ b->parent_instr->block);
+ }
+}
+
+
+/* The following data structure, which I have named merge_set is a way of
+ * representing a set registers of non-interfering registers. This is
+ * based on the concept of a "dominence forest" presented in "Fast Copy
+ * Coalescing and Live-Range Identification" by Budimlic et. al. but the
+ * implementation concept is taken from "Revisiting Out-of-SSA Translation
+ * for Correctness, Code Quality, and Efficiency" by Boissinot et. al..
+ *
+ * Each SSA definition is associated with a merge_node and the association
+ * is represented by a combination of a hash table and the "def" parameter
+ * in the merge_node structure. The merge_set stores a linked list of
+ * merge_node's in dominence order of the ssa definitions. (Since the
+ * liveness analysis pass indexes the SSA values in dominence order for us,
+ * this is an easy thing to keep up.) It is assumed that no pair of the
+ * nodes in a given set interfere. Merging two sets or checking for
+ * interference can be done in a single linear-time merge-sort walk of the
+ * two lists of nodes.
+ */
+struct merge_set;
+
+typedef struct {
+ struct exec_node node;
+ struct merge_set *set;
+ nir_ssa_def *def;
+} merge_node;
+
+typedef struct merge_set {
+ struct exec_list nodes;
+ unsigned size;
+ nir_register *reg;
+} merge_set;
+
+#if 0
+static void
+merge_set_dump(merge_set *set, FILE *fp)
+{
+ nir_ssa_def *dom[set->size];
+ int dom_idx = -1;
+
+ foreach_list_typed(merge_node, node, node, &set->nodes) {
+ while (dom_idx >= 0 && !ssa_def_dominates(dom[dom_idx], node->def))
+ dom_idx--;
+
+ for (int i = 0; i <= dom_idx; i++)
+ fprintf(fp, " ");
+
+ if (node->def->name)
+ fprintf(fp, "ssa_%d /* %s */\n", node->def->index, node->def->name);
+ else
+ fprintf(fp, "ssa_%d\n", node->def->index);
+
+ dom[++dom_idx] = node->def;
+ }
+}
+#endif
+
+static merge_node *
+get_merge_node(nir_ssa_def *def, struct from_ssa_state *state)
+{
+ struct hash_entry *entry =
+ _mesa_hash_table_search(state->merge_node_table, def);
+ if (entry)
+ return entry->data;
+
+ merge_set *set = ralloc(state->dead_ctx, merge_set);
+ exec_list_make_empty(&set->nodes);
+ set->size = 1;
+ set->reg = NULL;
+
+ merge_node *node = ralloc(state->dead_ctx, merge_node);
+ node->set = set;
+ node->def = def;
+ exec_list_push_head(&set->nodes, &node->node);
+
+ _mesa_hash_table_insert(state->merge_node_table, def, node);
+
+ return node;
+}
+
+static bool
+merge_nodes_interfere(merge_node *a, merge_node *b)
+{
+ return nir_ssa_defs_interfere(a->def, b->def);
+}
+
+/* Merges b into a */
+static merge_set *
+merge_merge_sets(merge_set *a, merge_set *b)
+{
+ struct exec_node *an = exec_list_get_head(&a->nodes);
+ struct exec_node *bn = exec_list_get_head(&b->nodes);
+ while (!exec_node_is_tail_sentinel(bn)) {
+ merge_node *a_node = exec_node_data(merge_node, an, node);
+ merge_node *b_node = exec_node_data(merge_node, bn, node);
+
+ if (exec_node_is_tail_sentinel(an) ||
+ a_node->def->live_index > b_node->def->live_index) {
+ struct exec_node *next = bn->next;
+ exec_node_remove(bn);
+ exec_node_insert_node_before(an, bn);
+ exec_node_data(merge_node, bn, node)->set = a;
+ bn = next;
+ } else {
+ an = an->next;
+ }
+ }
+
+ a->size += b->size;
+ b->size = 0;
+
+ return a;
+}
+
+/* Checks for any interference between two merge sets
+ *
+ * This is an implementation of Algorithm 2 in "Revisiting Out-of-SSA
+ * Translation for Correctness, Code Quality, and Efficiency" by
+ * Boissinot et. al.
+ */
+static bool
+merge_sets_interfere(merge_set *a, merge_set *b)
+{
+ NIR_VLA(merge_node *, dom, a->size + b->size);
+ int dom_idx = -1;
+
+ struct exec_node *an = exec_list_get_head(&a->nodes);
+ struct exec_node *bn = exec_list_get_head(&b->nodes);
+ while (!exec_node_is_tail_sentinel(an) ||
+ !exec_node_is_tail_sentinel(bn)) {
+
+ merge_node *current;
+ if (exec_node_is_tail_sentinel(an)) {
+ current = exec_node_data(merge_node, bn, node);
+ bn = bn->next;
+ } else if (exec_node_is_tail_sentinel(bn)) {
+ current = exec_node_data(merge_node, an, node);
+ an = an->next;
+ } else {
+ merge_node *a_node = exec_node_data(merge_node, an, node);
+ merge_node *b_node = exec_node_data(merge_node, bn, node);
+
+ if (a_node->def->live_index <= b_node->def->live_index) {
+ current = a_node;
+ an = an->next;
+ } else {
+ current = b_node;
+ bn = bn->next;
+ }
+ }
+
+ while (dom_idx >= 0 &&
+ !ssa_def_dominates(dom[dom_idx]->def, current->def))
+ dom_idx--;
+
+ if (dom_idx >= 0 && merge_nodes_interfere(current, dom[dom_idx]))
+ return true;
+
+ dom[++dom_idx] = current;
+ }
+
+ return false;
+}
+
+static bool
+add_parallel_copy_to_end_of_block(nir_block *block, void *void_state)
+{
+ struct from_ssa_state *state = void_state;
+
+ bool need_end_copy = false;
+ if (block->successors[0]) {
+ nir_instr *instr = nir_block_first_instr(block->successors[0]);
+ if (instr && instr->type == nir_instr_type_phi)
+ need_end_copy = true;
+ }
+
+ if (block->successors[1]) {
+ nir_instr *instr = nir_block_first_instr(block->successors[1]);
+ if (instr && instr->type == nir_instr_type_phi)
+ need_end_copy = true;
+ }
+
+ if (need_end_copy) {
+ /* If one of our successors has at least one phi node, we need to
+ * create a parallel copy at the end of the block but before the jump
+ * (if there is one).
+ */
+ nir_parallel_copy_instr *pcopy =
+ nir_parallel_copy_instr_create(state->dead_ctx);
+
+ nir_instr_insert(nir_after_block_before_jump(block), &pcopy->instr);
+ }
+
+ return true;
+}
+
+static nir_parallel_copy_instr *
+get_parallel_copy_at_end_of_block(nir_block *block)
+{
+ nir_instr *last_instr = nir_block_last_instr(block);
+ if (last_instr == NULL)
+ return NULL;
+
+ /* The last instruction may be a jump in which case the parallel copy is
+ * right before it.
+ */
+ if (last_instr->type == nir_instr_type_jump)
+ last_instr = nir_instr_prev(last_instr);
+
+ if (last_instr && last_instr->type == nir_instr_type_parallel_copy)
+ return nir_instr_as_parallel_copy(last_instr);
+ else
+ return NULL;
+}
+
+/** Isolate phi nodes with parallel copies
+ *
+ * In order to solve the dependency problems with the sources and
+ * destinations of phi nodes, we first isolate them by adding parallel
+ * copies to the beginnings and ends of basic blocks. For every block with
+ * phi nodes, we add a parallel copy immediately following the last phi
+ * node that copies the destinations of all of the phi nodes to new SSA
+ * values. We also add a parallel copy to the end of every block that has
+ * a successor with phi nodes that, for each phi node in each successor,
+ * copies the corresponding sorce of the phi node and adjust the phi to
+ * used the destination of the parallel copy.
+ *
+ * In SSA form, each value has exactly one definition. What this does is
+ * ensure that each value used in a phi also has exactly one use. The
+ * destinations of phis are only used by the parallel copy immediately
+ * following the phi nodes and. Thanks to the parallel copy at the end of
+ * the predecessor block, the sources of phi nodes are are the only use of
+ * that value. This allows us to immediately assign all the sources and
+ * destinations of any given phi node to the same register without worrying
+ * about interference at all. We do coalescing to get rid of the parallel
+ * copies where possible.
+ *
+ * Before this pass can be run, we have to iterate over the blocks with
+ * add_parallel_copy_to_end_of_block to ensure that the parallel copies at
+ * the ends of blocks exist. We can create the ones at the beginnings as
+ * we go, but the ones at the ends of blocks need to be created ahead of
+ * time because of potential back-edges in the CFG.
+ */
+static bool
+isolate_phi_nodes_block(nir_block *block, void *void_state)
+{
+ struct from_ssa_state *state = void_state;
+
+ nir_instr *last_phi_instr = NULL;
+ nir_foreach_instr(block, instr) {
+ /* Phi nodes only ever come at the start of a block */
+ if (instr->type != nir_instr_type_phi)
+ break;
+
+ last_phi_instr = instr;
+ }
+
+ /* If we don't have any phi's, then there's nothing for us to do. */
+ if (last_phi_instr == NULL)
+ return true;
+
+ /* If we have phi nodes, we need to create a parallel copy at the
+ * start of this block but after the phi nodes.
+ */
+ nir_parallel_copy_instr *block_pcopy =
+ nir_parallel_copy_instr_create(state->dead_ctx);
+ nir_instr_insert_after(last_phi_instr, &block_pcopy->instr);
+
+ nir_foreach_instr(block, instr) {
+ /* Phi nodes only ever come at the start of a block */
+ if (instr->type != nir_instr_type_phi)
+ break;
+
+ nir_phi_instr *phi = nir_instr_as_phi(instr);
+ assert(phi->dest.is_ssa);
+ nir_foreach_phi_src(phi, src) {
+ nir_parallel_copy_instr *pcopy =
+ get_parallel_copy_at_end_of_block(src->pred);
+ assert(pcopy);
+
+ nir_parallel_copy_entry *entry = rzalloc(state->dead_ctx,
+ nir_parallel_copy_entry);
+ nir_ssa_dest_init(&pcopy->instr, &entry->dest,
+ phi->dest.ssa.num_components, src->src.ssa->name);
+ exec_list_push_tail(&pcopy->entries, &entry->node);
+
+ assert(src->src.is_ssa);
+ nir_instr_rewrite_src(&pcopy->instr, &entry->src, src->src);
+
+ nir_instr_rewrite_src(&phi->instr, &src->src,
+ nir_src_for_ssa(&entry->dest.ssa));
+ }
+
+ nir_parallel_copy_entry *entry = rzalloc(state->dead_ctx,
+ nir_parallel_copy_entry);
+ nir_ssa_dest_init(&block_pcopy->instr, &entry->dest,
+ phi->dest.ssa.num_components, phi->dest.ssa.name);
+ exec_list_push_tail(&block_pcopy->entries, &entry->node);
+
+ nir_ssa_def_rewrite_uses(&phi->dest.ssa,
+ nir_src_for_ssa(&entry->dest.ssa));
+
+ nir_instr_rewrite_src(&block_pcopy->instr, &entry->src,
+ nir_src_for_ssa(&phi->dest.ssa));
+ }
+
+ return true;
+}
+
+static bool
+coalesce_phi_nodes_block(nir_block *block, void *void_state)
+{
+ struct from_ssa_state *state = void_state;
+
+ nir_foreach_instr(block, instr) {
+ /* Phi nodes only ever come at the start of a block */
+ if (instr->type != nir_instr_type_phi)
+ break;
+
+ nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+ assert(phi->dest.is_ssa);
+ merge_node *dest_node = get_merge_node(&phi->dest.ssa, state);
+
+ nir_foreach_phi_src(phi, src) {
+ assert(src->src.is_ssa);
+ merge_node *src_node = get_merge_node(src->src.ssa, state);
+ if (src_node->set != dest_node->set)
+ merge_merge_sets(dest_node->set, src_node->set);
+ }
+ }
+
+ return true;
+}
+
+static void
+aggressive_coalesce_parallel_copy(nir_parallel_copy_instr *pcopy,
+ struct from_ssa_state *state)
+{
+ nir_foreach_parallel_copy_entry(pcopy, entry) {
+ if (!entry->src.is_ssa)
+ continue;
+
+ /* Since load_const instructions are SSA only, we can't replace their
+ * destinations with registers and, therefore, can't coalesce them.
+ */
+ if (entry->src.ssa->parent_instr->type == nir_instr_type_load_const)
+ continue;
+
+ /* Don't try and coalesce these */
+ if (entry->dest.ssa.num_components != entry->src.ssa->num_components)
+ continue;
+
+ merge_node *src_node = get_merge_node(entry->src.ssa, state);
+ merge_node *dest_node = get_merge_node(&entry->dest.ssa, state);
+
+ if (src_node->set == dest_node->set)
+ continue;
+
+ if (!merge_sets_interfere(src_node->set, dest_node->set))
+ merge_merge_sets(src_node->set, dest_node->set);
+ }
+}
+
+static bool
+aggressive_coalesce_block(nir_block *block, void *void_state)
+{
+ struct from_ssa_state *state = void_state;
+
+ nir_parallel_copy_instr *start_pcopy = NULL;
+ nir_foreach_instr(block, instr) {
+ /* Phi nodes only ever come at the start of a block */
+ if (instr->type != nir_instr_type_phi) {
+ if (instr->type != nir_instr_type_parallel_copy)
+ break; /* The parallel copy must be right after the phis */
+
+ start_pcopy = nir_instr_as_parallel_copy(instr);
+
+ aggressive_coalesce_parallel_copy(start_pcopy, state);
+
+ break;
+ }
+ }
+
+ nir_parallel_copy_instr *end_pcopy =
+ get_parallel_copy_at_end_of_block(block);
+
+ if (end_pcopy && end_pcopy != start_pcopy)
+ aggressive_coalesce_parallel_copy(end_pcopy, state);
+
+ return true;
+}
+
+static bool
+rewrite_ssa_def(nir_ssa_def *def, void *void_state)
+{
+ struct from_ssa_state *state = void_state;
+ nir_register *reg;
+
+ struct hash_entry *entry =
+ _mesa_hash_table_search(state->merge_node_table, def);
+ if (entry) {
+ /* In this case, we're part of a phi web. Use the web's register. */
+ merge_node *node = (merge_node *)entry->data;
+
+ /* If it doesn't have a register yet, create one. Note that all of
+ * the things in the merge set should be the same so it doesn't
+ * matter which node's definition we use.
+ */
+ if (node->set->reg == NULL) {
+ node->set->reg = nir_local_reg_create(state->impl);
+ node->set->reg->name = def->name;
+ node->set->reg->num_components = def->num_components;
+ node->set->reg->num_array_elems = 0;
+ }
+
+ reg = node->set->reg;
+ } else {
+ if (state->phi_webs_only)
+ return true;
+
+ /* We leave load_const SSA values alone. They act as immediates to
+ * the backend. If it got coalesced into a phi, that's ok.
+ */
+ if (def->parent_instr->type == nir_instr_type_load_const)
+ return true;
+
+ reg = nir_local_reg_create(state->impl);
+ reg->name = def->name;
+ reg->num_components = def->num_components;
+ reg->num_array_elems = 0;
+ }
+
+ nir_ssa_def_rewrite_uses(def, nir_src_for_reg(reg));
+ assert(list_empty(&def->uses) && list_empty(&def->if_uses));
+
+ if (def->parent_instr->type == nir_instr_type_ssa_undef) {
+ /* If it's an ssa_undef instruction, remove it since we know we just got
+ * rid of all its uses.
+ */
+ nir_instr *parent_instr = def->parent_instr;
+ nir_instr_remove(parent_instr);
+ ralloc_steal(state->dead_ctx, parent_instr);
+ return true;
+ }
+
+ assert(def->parent_instr->type != nir_instr_type_load_const);
+
+ /* At this point we know a priori that this SSA def is part of a
+ * nir_dest. We can use exec_node_data to get the dest pointer.
+ */
+ nir_dest *dest = exec_node_data(nir_dest, def, ssa);
+
+ nir_instr_rewrite_dest(state->instr, dest, nir_dest_for_reg(reg));
+
+ return true;
+}
+
+/* Resolves ssa definitions to registers. While we're at it, we also
+ * remove phi nodes.
+ */
+static bool
+resolve_registers_block(nir_block *block, void *void_state)
+{
+ struct from_ssa_state *state = void_state;
+
+ nir_foreach_instr_safe(block, instr) {
+ state->instr = instr;
+ nir_foreach_ssa_def(instr, rewrite_ssa_def, state);
+
+ if (instr->type == nir_instr_type_phi) {
+ nir_instr_remove(instr);
+ ralloc_steal(state->dead_ctx, instr);
+ }
+ }
+ state->instr = NULL;
+
+ return true;
+}
+
+static void
+emit_copy(nir_parallel_copy_instr *pcopy, nir_src src, nir_src dest_src,
+ void *mem_ctx)
+{
+ assert(!dest_src.is_ssa &&
+ dest_src.reg.indirect == NULL &&
+ dest_src.reg.base_offset == 0);
+
+ if (src.is_ssa)
+ assert(src.ssa->num_components >= dest_src.reg.reg->num_components);
+ else
+ assert(src.reg.reg->num_components >= dest_src.reg.reg->num_components);
+
+ nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
+ nir_src_copy(&mov->src[0].src, &src, mov);
+ mov->dest.dest = nir_dest_for_reg(dest_src.reg.reg);
+ mov->dest.write_mask = (1 << dest_src.reg.reg->num_components) - 1;
+
+ nir_instr_insert_before(&pcopy->instr, &mov->instr);
+}
+
+/* Resolves a single parallel copy operation into a sequence of mov's
+ *
+ * This is based on Algorithm 1 from "Revisiting Out-of-SSA Translation for
+ * Correctness, Code Quality, and Efficiency" by Boissinot et. al..
+ * However, I never got the algorithm to work as written, so this version
+ * is slightly modified.
+ *
+ * The algorithm works by playing this little shell game with the values.
+ * We start by recording where every source value is and which source value
+ * each destination value should receive. We then grab any copy whose
+ * destination is "empty", i.e. not used as a source, and do the following:
+ * - Find where its source value currently lives
+ * - Emit the move instruction
+ * - Set the location of the source value to the destination
+ * - Mark the location containing the source value
+ * - Mark the destination as no longer needing to be copied
+ *
+ * When we run out of "empty" destinations, we have a cycle and so we
+ * create a temporary register, copy to that register, and mark the value
+ * we copied as living in that temporary. Now, the cycle is broken, so we
+ * can continue with the above steps.
+ */
+static void
+resolve_parallel_copy(nir_parallel_copy_instr *pcopy,
+ struct from_ssa_state *state)
+{
+ unsigned num_copies = 0;
+ nir_foreach_parallel_copy_entry(pcopy, entry) {
+ /* Sources may be SSA */
+ if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.reg.reg)
+ continue;
+
+ num_copies++;
+ }
+
+ if (num_copies == 0) {
+ /* Hooray, we don't need any copies! */
+ nir_instr_remove(&pcopy->instr);
+ return;
+ }
+
+ /* The register/source corresponding to the given index */
+ NIR_VLA_ZERO(nir_src, values, num_copies * 2);
+
+ /* The current location of a given piece of data. We will use -1 for "null" */
+ NIR_VLA_FILL(int, loc, num_copies * 2, -1);
+
+ /* The piece of data that the given piece of data is to be copied from. We will use -1 for "null" */
+ NIR_VLA_FILL(int, pred, num_copies * 2, -1);
+
+ /* The destinations we have yet to properly fill */
+ NIR_VLA(int, to_do, num_copies * 2);
+ int to_do_idx = -1;
+
+ /* Now we set everything up:
+ * - All values get assigned a temporary index
+ * - Current locations are set from sources
+ * - Predicessors are recorded from sources and destinations
+ */
+ int num_vals = 0;
+ nir_foreach_parallel_copy_entry(pcopy, entry) {
+ /* Sources may be SSA */
+ if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.reg.reg)
+ continue;
+
+ int src_idx = -1;
+ for (int i = 0; i < num_vals; ++i) {
+ if (nir_srcs_equal(values[i], entry->src))
+ src_idx = i;
+ }
+ if (src_idx < 0) {
+ src_idx = num_vals++;
+ values[src_idx] = entry->src;
+ }
+
+ nir_src dest_src = nir_src_for_reg(entry->dest.reg.reg);
+
+ int dest_idx = -1;
+ for (int i = 0; i < num_vals; ++i) {
+ if (nir_srcs_equal(values[i], dest_src)) {
+ /* Each destination of a parallel copy instruction should be
+ * unique. A destination may get used as a source, so we still
+ * have to walk the list. However, the predecessor should not,
+ * at this point, be set yet, so we should have -1 here.
+ */
+ assert(pred[i] == -1);
+ dest_idx = i;
+ }
+ }
+ if (dest_idx < 0) {
+ dest_idx = num_vals++;
+ values[dest_idx] = dest_src;
+ }
+
+ loc[src_idx] = src_idx;
+ pred[dest_idx] = src_idx;
+
+ to_do[++to_do_idx] = dest_idx;
+ }
+
+ /* Currently empty destinations we can go ahead and fill */
+ NIR_VLA(int, ready, num_copies * 2);
+ int ready_idx = -1;
+
+ /* Mark the ones that are ready for copying. We know an index is a
+ * destination if it has a predecessor and it's ready for copying if
+ * it's not marked as containing data.
+ */
+ for (int i = 0; i < num_vals; i++) {
+ if (pred[i] != -1 && loc[i] == -1)
+ ready[++ready_idx] = i;
+ }
+
+ while (to_do_idx >= 0) {
+ while (ready_idx >= 0) {
+ int b = ready[ready_idx--];
+ int a = pred[b];
+ emit_copy(pcopy, values[loc[a]], values[b], state->mem_ctx);
+
+ /* If any other copies want a they can find it at b */
+ loc[a] = b;
+
+ /* b has been filled, mark it as not needing to be copied */
+ pred[b] = -1;
+
+ /* If a needs to be filled, it's ready for copying now */
+ if (pred[a] != -1)
+ ready[++ready_idx] = a;
+ }
+ int b = to_do[to_do_idx--];
+ if (pred[b] == -1)
+ continue;
+
+ /* If we got here, then we don't have any more trivial copies that we
+ * can do. We have to break a cycle, so we create a new temporary
+ * register for that purpose. Normally, if going out of SSA after
+ * register allocation, you would want to avoid creating temporary
+ * registers. However, we are going out of SSA before register
+ * allocation, so we would rather not create extra register
+ * dependencies for the backend to deal with. If it wants, the
+ * backend can coalesce the (possibly multiple) temporaries.
+ */
+ assert(num_vals < num_copies * 2);
+ nir_register *reg = nir_local_reg_create(state->impl);
+ reg->name = "copy_temp";
+ reg->num_array_elems = 0;
+ if (values[b].is_ssa)
+ reg->num_components = values[b].ssa->num_components;
+ else
+ reg->num_components = values[b].reg.reg->num_components;
+ values[num_vals].is_ssa = false;
+ values[num_vals].reg.reg = reg;
+
+ emit_copy(pcopy, values[b], values[num_vals], state->mem_ctx);
+ loc[b] = num_vals;
+ ready[++ready_idx] = b;
+ num_vals++;
+ }
+
+ nir_instr_remove(&pcopy->instr);
+}
+
+/* Resolves the parallel copies in a block. Each block can have at most
+ * two: One at the beginning, right after all the phi noces, and one at
+ * the end (or right before the final jump if it exists).
+ */
+static bool
+resolve_parallel_copies_block(nir_block *block, void *void_state)
+{
+ struct from_ssa_state *state = void_state;
+
+ /* At this point, we have removed all of the phi nodes. If a parallel
+ * copy existed right after the phi nodes in this block, it is now the
+ * first instruction.
+ */
+ nir_instr *first_instr = nir_block_first_instr(block);
+ if (first_instr == NULL)
+ return true; /* Empty, nothing to do. */
+
+ if (first_instr->type == nir_instr_type_parallel_copy) {
+ nir_parallel_copy_instr *pcopy = nir_instr_as_parallel_copy(first_instr);
+
+ resolve_parallel_copy(pcopy, state);
+ }
+
+ /* It's possible that the above code already cleaned up the end parallel
+ * copy. However, doing so removed it form the instructions list so we
+ * won't find it here. Therefore, it's safe to go ahead and just look
+ * for one and clean it up if it exists.
+ */
+ nir_parallel_copy_instr *end_pcopy =
+ get_parallel_copy_at_end_of_block(block);
+ if (end_pcopy)
+ resolve_parallel_copy(end_pcopy, state);
+
+ return true;
+}
+
+static void
+nir_convert_from_ssa_impl(nir_function_impl *impl, bool phi_webs_only)
+{
+ struct from_ssa_state state;
+
+ state.mem_ctx = ralloc_parent(impl);
+ state.dead_ctx = ralloc_context(NULL);
+ state.impl = impl;
+ state.phi_webs_only = phi_webs_only;
+ state.merge_node_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ nir_foreach_block(impl, add_parallel_copy_to_end_of_block, &state);
+ nir_foreach_block(impl, isolate_phi_nodes_block, &state);
+
+ /* Mark metadata as dirty before we ask for liveness analysis */
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ nir_metadata_require(impl, nir_metadata_live_ssa_defs |
+ nir_metadata_dominance);
+
+ nir_foreach_block(impl, coalesce_phi_nodes_block, &state);
+ nir_foreach_block(impl, aggressive_coalesce_block, &state);
+
+ nir_foreach_block(impl, resolve_registers_block, &state);
+
+ nir_foreach_block(impl, resolve_parallel_copies_block, &state);
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ /* Clean up dead instructions and the hash tables */
+ _mesa_hash_table_destroy(state.merge_node_table, NULL);
+ ralloc_free(state.dead_ctx);
+}
+
+void
+nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only)
+{
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ nir_convert_from_ssa_impl(function->impl, phi_webs_only);
+ }
+}
diff --git a/src/compiler/nir/nir_gs_count_vertices.c b/src/compiler/nir/nir_gs_count_vertices.c
new file mode 100644
index 00000000000..db15d160ee7
--- /dev/null
+++ b/src/compiler/nir/nir_gs_count_vertices.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+static nir_intrinsic_instr *
+as_intrinsic(nir_instr *instr, nir_intrinsic_op op)
+{
+ if (instr->type != nir_instr_type_intrinsic)
+ return NULL;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ if (intrin->intrinsic != op)
+ return NULL;
+
+ return intrin;
+}
+
+static nir_intrinsic_instr *
+as_set_vertex_count(nir_instr *instr)
+{
+ return as_intrinsic(instr, nir_intrinsic_set_vertex_count);
+}
+
+/**
+ * If a geometry shader emits a constant number of vertices, return the
+ * number of vertices. Otherwise, return -1 (unknown).
+ *
+ * This only works if you've used nir_lower_gs_intrinsics() to do vertex
+ * counting at the NIR level.
+ */
+int
+nir_gs_count_vertices(const nir_shader *shader)
+{
+ int count = -1;
+
+ nir_foreach_function(shader, function) {
+ if (!function->impl)
+ continue;
+
+ /* set_vertex_count intrinsics only appear in predecessors of the
+ * end block. So we don't need to walk all of them.
+ */
+ struct set_entry *entry;
+ set_foreach(function->impl->end_block->predecessors, entry) {
+ nir_block *block = (nir_block *) entry->key;
+
+ nir_foreach_instr_reverse(block, instr) {
+ nir_intrinsic_instr *intrin = as_set_vertex_count(instr);
+ if (!intrin)
+ continue;
+
+ nir_const_value *val = nir_src_as_const_value(intrin->src[0]);
+ /* We've found a non-constant value. Bail. */
+ if (!val)
+ return -1;
+
+ if (count == -1)
+ count = val->i[0];
+
+ /* We've found contradictory set_vertex_count intrinsics.
+ * This can happen if there are early-returns in main() and
+ * different paths emit different numbers of vertices.
+ */
+ if (count != val->i[0])
+ return -1;
+ }
+ }
+ }
+
+ return count;
+}
diff --git a/src/compiler/nir/nir_instr_set.c b/src/compiler/nir/nir_instr_set.c
new file mode 100644
index 00000000000..d3f939fe805
--- /dev/null
+++ b/src/compiler/nir/nir_instr_set.c
@@ -0,0 +1,519 @@
+/*
+ * Copyright © 2014 Connor Abbott
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir_instr_set.h"
+#include "nir_vla.h"
+
+#define HASH(hash, data) _mesa_fnv32_1a_accumulate((hash), (data))
+
+static uint32_t
+hash_src(uint32_t hash, const nir_src *src)
+{
+ assert(src->is_ssa);
+ hash = HASH(hash, src->ssa);
+ return hash;
+}
+
+static uint32_t
+hash_alu_src(uint32_t hash, const nir_alu_src *src, unsigned num_components)
+{
+ hash = HASH(hash, src->abs);
+ hash = HASH(hash, src->negate);
+
+ for (unsigned i = 0; i < num_components; i++)
+ hash = HASH(hash, src->swizzle[i]);
+
+ hash = hash_src(hash, &src->src);
+ return hash;
+}
+
+static uint32_t
+hash_alu(uint32_t hash, const nir_alu_instr *instr)
+{
+ hash = HASH(hash, instr->op);
+ hash = HASH(hash, instr->dest.dest.ssa.num_components);
+
+ if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
+ assert(nir_op_infos[instr->op].num_inputs == 2);
+ uint32_t hash0 = hash_alu_src(hash, &instr->src[0],
+ nir_ssa_alu_instr_src_components(instr, 0));
+ uint32_t hash1 = hash_alu_src(hash, &instr->src[1],
+ nir_ssa_alu_instr_src_components(instr, 1));
+ /* For commutative operations, we need some commutative way of
+ * combining the hashes. One option would be to XOR them but that
+ * means that anything with two identical sources will hash to 0 and
+ * that's common enough we probably don't want the guaranteed
+ * collision. Either addition or multiplication will also work.
+ */
+ hash = hash0 * hash1;
+ } else {
+ for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+ hash = hash_alu_src(hash, &instr->src[i],
+ nir_ssa_alu_instr_src_components(instr, i));
+ }
+ }
+
+ return hash;
+}
+
+static uint32_t
+hash_load_const(uint32_t hash, const nir_load_const_instr *instr)
+{
+ hash = HASH(hash, instr->def.num_components);
+
+ hash = _mesa_fnv32_1a_accumulate_block(hash, instr->value.f,
+ instr->def.num_components
+ * sizeof(instr->value.f[0]));
+
+ return hash;
+}
+
+static int
+cmp_phi_src(const void *data1, const void *data2)
+{
+ nir_phi_src *src1 = *(nir_phi_src **)data1;
+ nir_phi_src *src2 = *(nir_phi_src **)data2;
+ return src1->pred - src2->pred;
+}
+
+static uint32_t
+hash_phi(uint32_t hash, const nir_phi_instr *instr)
+{
+ hash = HASH(hash, instr->instr.block);
+
+ /* sort sources by predecessor, since the order shouldn't matter */
+ unsigned num_preds = instr->instr.block->predecessors->entries;
+ NIR_VLA(nir_phi_src *, srcs, num_preds);
+ unsigned i = 0;
+ nir_foreach_phi_src(instr, src) {
+ srcs[i++] = src;
+ }
+
+ qsort(srcs, num_preds, sizeof(nir_phi_src *), cmp_phi_src);
+
+ for (i = 0; i < num_preds; i++) {
+ hash = hash_src(hash, &srcs[i]->src);
+ hash = HASH(hash, srcs[i]->pred);
+ }
+
+ return hash;
+}
+
+static uint32_t
+hash_intrinsic(uint32_t hash, const nir_intrinsic_instr *instr)
+{
+ const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
+ hash = HASH(hash, instr->intrinsic);
+
+ if (info->has_dest)
+ hash = HASH(hash, instr->dest.ssa.num_components);
+
+ assert(info->num_variables == 0);
+
+ hash = _mesa_fnv32_1a_accumulate_block(hash, instr->const_index,
+ info->num_indices
+ * sizeof(instr->const_index[0]));
+ return hash;
+}
+
+static uint32_t
+hash_tex(uint32_t hash, const nir_tex_instr *instr)
+{
+ hash = HASH(hash, instr->op);
+ hash = HASH(hash, instr->num_srcs);
+
+ for (unsigned i = 0; i < instr->num_srcs; i++) {
+ hash = HASH(hash, instr->src[i].src_type);
+ hash = hash_src(hash, &instr->src[i].src);
+ }
+
+ hash = HASH(hash, instr->coord_components);
+ hash = HASH(hash, instr->sampler_dim);
+ hash = HASH(hash, instr->is_array);
+ hash = HASH(hash, instr->is_shadow);
+ hash = HASH(hash, instr->is_new_style_shadow);
+ hash = HASH(hash, instr->const_offset);
+ unsigned component = instr->component;
+ hash = HASH(hash, component);
+ hash = HASH(hash, instr->sampler_index);
+ hash = HASH(hash, instr->sampler_array_size);
+
+ assert(!instr->sampler);
+
+ return hash;
+}
+
+/* Computes a hash of an instruction for use in a hash table. Note that this
+ * will only work for instructions where instr_can_rewrite() returns true, and
+ * it should return identical hashes for two instructions that are the same
+ * according nir_instrs_equal().
+ */
+
+static uint32_t
+hash_instr(const void *data)
+{
+ const nir_instr *instr = data;
+ uint32_t hash = _mesa_fnv32_1a_offset_bias;
+
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ hash = hash_alu(hash, nir_instr_as_alu(instr));
+ break;
+ case nir_instr_type_load_const:
+ hash = hash_load_const(hash, nir_instr_as_load_const(instr));
+ break;
+ case nir_instr_type_phi:
+ hash = hash_phi(hash, nir_instr_as_phi(instr));
+ break;
+ case nir_instr_type_intrinsic:
+ hash = hash_intrinsic(hash, nir_instr_as_intrinsic(instr));
+ break;
+ case nir_instr_type_tex:
+ hash = hash_tex(hash, nir_instr_as_tex(instr));
+ break;
+ default:
+ unreachable("Invalid instruction type");
+ }
+
+ return hash;
+}
+
+bool
+nir_srcs_equal(nir_src src1, nir_src src2)
+{
+ if (src1.is_ssa) {
+ if (src2.is_ssa) {
+ return src1.ssa == src2.ssa;
+ } else {
+ return false;
+ }
+ } else {
+ if (src2.is_ssa) {
+ return false;
+ } else {
+ if ((src1.reg.indirect == NULL) != (src2.reg.indirect == NULL))
+ return false;
+
+ if (src1.reg.indirect) {
+ if (!nir_srcs_equal(*src1.reg.indirect, *src2.reg.indirect))
+ return false;
+ }
+
+ return src1.reg.reg == src2.reg.reg &&
+ src1.reg.base_offset == src2.reg.base_offset;
+ }
+ }
+}
+
+static bool
+nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2,
+ unsigned src1, unsigned src2)
+{
+ if (alu1->src[src1].abs != alu2->src[src2].abs ||
+ alu1->src[src1].negate != alu2->src[src2].negate)
+ return false;
+
+ for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) {
+ if (alu1->src[src1].swizzle[i] != alu2->src[src2].swizzle[i])
+ return false;
+ }
+
+ return nir_srcs_equal(alu1->src[src1].src, alu2->src[src2].src);
+}
+
+/* Returns "true" if two instructions are equal. Note that this will only
+ * work for the subset of instructions defined by instr_can_rewrite(). Also,
+ * it should only return "true" for instructions that hash_instr() will return
+ * the same hash for (ignoring collisions, of course).
+ */
+
+static bool
+nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2)
+{
+ if (instr1->type != instr2->type)
+ return false;
+
+ switch (instr1->type) {
+ case nir_instr_type_alu: {
+ nir_alu_instr *alu1 = nir_instr_as_alu(instr1);
+ nir_alu_instr *alu2 = nir_instr_as_alu(instr2);
+
+ if (alu1->op != alu2->op)
+ return false;
+
+ /* TODO: We can probably acutally do something more inteligent such
+ * as allowing different numbers and taking a maximum or something
+ * here */
+ if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components)
+ return false;
+
+ if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
+ assert(nir_op_infos[alu1->op].num_inputs == 2);
+ return (nir_alu_srcs_equal(alu1, alu2, 0, 0) &&
+ nir_alu_srcs_equal(alu1, alu2, 1, 1)) ||
+ (nir_alu_srcs_equal(alu1, alu2, 0, 1) &&
+ nir_alu_srcs_equal(alu1, alu2, 1, 0));
+ } else {
+ for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) {
+ if (!nir_alu_srcs_equal(alu1, alu2, i, i))
+ return false;
+ }
+ }
+ return true;
+ }
+ case nir_instr_type_tex: {
+ nir_tex_instr *tex1 = nir_instr_as_tex(instr1);
+ nir_tex_instr *tex2 = nir_instr_as_tex(instr2);
+
+ if (tex1->op != tex2->op)
+ return false;
+
+ if (tex1->num_srcs != tex2->num_srcs)
+ return false;
+ for (unsigned i = 0; i < tex1->num_srcs; i++) {
+ if (tex1->src[i].src_type != tex2->src[i].src_type ||
+ !nir_srcs_equal(tex1->src[i].src, tex2->src[i].src)) {
+ return false;
+ }
+ }
+
+ if (tex1->coord_components != tex2->coord_components ||
+ tex1->sampler_dim != tex2->sampler_dim ||
+ tex1->is_array != tex2->is_array ||
+ tex1->is_shadow != tex2->is_shadow ||
+ tex1->is_new_style_shadow != tex2->is_new_style_shadow ||
+ memcmp(tex1->const_offset, tex2->const_offset,
+ sizeof(tex1->const_offset)) != 0 ||
+ tex1->component != tex2->component ||
+ tex1->sampler_index != tex2->sampler_index ||
+ tex1->sampler_array_size != tex2->sampler_array_size) {
+ return false;
+ }
+
+ /* Don't support un-lowered sampler derefs currently. */
+ assert(!tex1->sampler && !tex2->sampler);
+
+ return true;
+ }
+ case nir_instr_type_load_const: {
+ nir_load_const_instr *load1 = nir_instr_as_load_const(instr1);
+ nir_load_const_instr *load2 = nir_instr_as_load_const(instr2);
+
+ if (load1->def.num_components != load2->def.num_components)
+ return false;
+
+ return memcmp(load1->value.f, load2->value.f,
+ load1->def.num_components * sizeof(*load2->value.f)) == 0;
+ }
+ case nir_instr_type_phi: {
+ nir_phi_instr *phi1 = nir_instr_as_phi(instr1);
+ nir_phi_instr *phi2 = nir_instr_as_phi(instr2);
+
+ if (phi1->instr.block != phi2->instr.block)
+ return false;
+
+ nir_foreach_phi_src(phi1, src1) {
+ nir_foreach_phi_src(phi2, src2) {
+ if (src1->pred == src2->pred) {
+ if (!nir_srcs_equal(src1->src, src2->src))
+ return false;
+
+ break;
+ }
+ }
+ }
+
+ return true;
+ }
+ case nir_instr_type_intrinsic: {
+ nir_intrinsic_instr *intrinsic1 = nir_instr_as_intrinsic(instr1);
+ nir_intrinsic_instr *intrinsic2 = nir_instr_as_intrinsic(instr2);
+ const nir_intrinsic_info *info =
+ &nir_intrinsic_infos[intrinsic1->intrinsic];
+
+ if (intrinsic1->intrinsic != intrinsic2->intrinsic ||
+ intrinsic1->num_components != intrinsic2->num_components)
+ return false;
+
+ if (info->has_dest && intrinsic1->dest.ssa.num_components !=
+ intrinsic2->dest.ssa.num_components)
+ return false;
+
+ for (unsigned i = 0; i < info->num_srcs; i++) {
+ if (!nir_srcs_equal(intrinsic1->src[i], intrinsic2->src[i]))
+ return false;
+ }
+
+ assert(info->num_variables == 0);
+
+ for (unsigned i = 0; i < info->num_indices; i++) {
+ if (intrinsic1->const_index[i] != intrinsic2->const_index[i])
+ return false;
+ }
+
+ return true;
+ }
+ case nir_instr_type_call:
+ case nir_instr_type_jump:
+ case nir_instr_type_ssa_undef:
+ case nir_instr_type_parallel_copy:
+ default:
+ unreachable("Invalid instruction type");
+ }
+
+ return false;
+}
+
+static bool
+src_is_ssa(nir_src *src, void *data)
+{
+ (void) data;
+ return src->is_ssa;
+}
+
+static bool
+dest_is_ssa(nir_dest *dest, void *data)
+{
+ (void) data;
+ return dest->is_ssa;
+}
+
+/* This function determines if uses of an instruction can safely be rewritten
+ * to use another identical instruction instead. Note that this function must
+ * be kept in sync with hash_instr() and nir_instrs_equal() -- only
+ * instructions that pass this test will be handed on to those functions, and
+ * conversely they must handle everything that this function returns true for.
+ */
+
+static bool
+instr_can_rewrite(nir_instr *instr)
+{
+ /* We only handle SSA. */
+ if (!nir_foreach_dest(instr, dest_is_ssa, NULL) ||
+ !nir_foreach_src(instr, src_is_ssa, NULL))
+ return false;
+
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ case nir_instr_type_load_const:
+ case nir_instr_type_phi:
+ return true;
+ case nir_instr_type_tex: {
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+
+ /* Don't support un-lowered sampler derefs currently. */
+ if (tex->sampler)
+ return false;
+
+ return true;
+ }
+ case nir_instr_type_intrinsic: {
+ const nir_intrinsic_info *info =
+ &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic];
+ return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
+ (info->flags & NIR_INTRINSIC_CAN_REORDER) &&
+ info->num_variables == 0; /* not implemented yet */
+ }
+ case nir_instr_type_call:
+ case nir_instr_type_jump:
+ case nir_instr_type_ssa_undef:
+ return false;
+ case nir_instr_type_parallel_copy:
+ default:
+ unreachable("Invalid instruction type");
+ }
+
+ return false;
+}
+
+static nir_ssa_def *
+nir_instr_get_dest_ssa_def(nir_instr *instr)
+{
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ assert(nir_instr_as_alu(instr)->dest.dest.is_ssa);
+ return &nir_instr_as_alu(instr)->dest.dest.ssa;
+ case nir_instr_type_load_const:
+ return &nir_instr_as_load_const(instr)->def;
+ case nir_instr_type_phi:
+ assert(nir_instr_as_phi(instr)->dest.is_ssa);
+ return &nir_instr_as_phi(instr)->dest.ssa;
+ case nir_instr_type_intrinsic:
+ assert(nir_instr_as_intrinsic(instr)->dest.is_ssa);
+ return &nir_instr_as_intrinsic(instr)->dest.ssa;
+ case nir_instr_type_tex:
+ assert(nir_instr_as_tex(instr)->dest.is_ssa);
+ return &nir_instr_as_tex(instr)->dest.ssa;
+ default:
+ unreachable("We never ask for any of these");
+ }
+}
+
+static bool
+cmp_func(const void *data1, const void *data2)
+{
+ return nir_instrs_equal(data1, data2);
+}
+
+struct set *
+nir_instr_set_create(void *mem_ctx)
+{
+ return _mesa_set_create(mem_ctx, hash_instr, cmp_func);
+}
+
+void
+nir_instr_set_destroy(struct set *instr_set)
+{
+ _mesa_set_destroy(instr_set, NULL);
+}
+
+bool
+nir_instr_set_add_or_rewrite(struct set *instr_set, nir_instr *instr)
+{
+ if (!instr_can_rewrite(instr))
+ return false;
+
+ struct set_entry *entry = _mesa_set_search(instr_set, instr);
+ if (entry) {
+ nir_ssa_def *def = nir_instr_get_dest_ssa_def(instr);
+ nir_ssa_def *new_def =
+ nir_instr_get_dest_ssa_def((nir_instr *) entry->key);
+ nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(new_def));
+ return true;
+ }
+
+ _mesa_set_add(instr_set, instr);
+ return false;
+}
+
+void
+nir_instr_set_remove(struct set *instr_set, nir_instr *instr)
+{
+ if (!instr_can_rewrite(instr))
+ return;
+
+ struct set_entry *entry = _mesa_set_search(instr_set, instr);
+ if (entry)
+ _mesa_set_remove(instr_set, entry);
+}
+
diff --git a/src/compiler/nir/nir_instr_set.h b/src/compiler/nir/nir_instr_set.h
new file mode 100644
index 00000000000..939e8ddbf58
--- /dev/null
+++ b/src/compiler/nir/nir_instr_set.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright © 2014 Connor Abbott
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include "nir.h"
+
+/**
+ * This file defines functions for creating, destroying, and manipulating an
+ * "instruction set," which is an abstraction for finding duplicate
+ * instructions using a hash set. Note that the question of whether an
+ * instruction is actually a duplicate (e.g. whether it has any side effects)
+ * is handled transparently. The user can pass any instruction to
+ * nir_instr_set_add_or_rewrite() and nir_instr_set_remove(), and if the
+ * instruction isn't safe to rewrite or isn't supported, it's silently
+ * removed.
+ */
+
+/*@{*/
+
+/** Creates an instruction set, using a given ralloc mem_ctx */
+struct set *nir_instr_set_create(void *mem_ctx);
+
+/** Destroys an instruction set. */
+void nir_instr_set_destroy(struct set *instr_set);
+
+/**
+ * Adds an instruction to an instruction set if it doesn't exist, or if it
+ * does already exist, rewrites all uses of it to point to the other
+ * already-inserted instruction. Returns 'true' if the uses of the instruction
+ * were rewritten.
+ */
+bool nir_instr_set_add_or_rewrite(struct set *instr_set, nir_instr *instr);
+
+/**
+ * Removes an instruction from an instruction set, so that other instructions
+ * won't be merged with it.
+ */
+void nir_instr_set_remove(struct set *instr_set, nir_instr *instr);
+
+/*@}*/
+
diff --git a/src/compiler/nir/nir_intrinsics.c b/src/compiler/nir/nir_intrinsics.c
new file mode 100644
index 00000000000..a7c868c39af
--- /dev/null
+++ b/src/compiler/nir/nir_intrinsics.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott ([email protected])
+ *
+ */
+
+#include "nir.h"
+
+#define OPCODE(name) nir_intrinsic_##name
+
+#define INTRINSIC(_name, _num_srcs, _src_components, _has_dest, \
+ _dest_components, _num_variables, _num_indices, _flags) \
+{ \
+ .name = #_name, \
+ .num_srcs = _num_srcs, \
+ .src_components = _src_components, \
+ .has_dest = _has_dest, \
+ .dest_components = _dest_components, \
+ .num_variables = _num_variables, \
+ .num_indices = _num_indices, \
+ .flags = _flags \
+},
+
+#define LAST_INTRINSIC(name)
+
+const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics] = {
+#include "nir_intrinsics.h"
+}; \ No newline at end of file
diff --git a/src/compiler/nir/nir_intrinsics.h b/src/compiler/nir/nir_intrinsics.h
new file mode 100644
index 00000000000..62eead4878a
--- /dev/null
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -0,0 +1,316 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott ([email protected])
+ *
+ */
+
+/**
+ * This header file defines all the available intrinsics in one place. It
+ * expands to a list of macros of the form:
+ *
+ * INTRINSIC(name, num_srcs, src_components, has_dest, dest_components,
+ * num_variables, num_indices, flags)
+ *
+ * Which should correspond one-to-one with the nir_intrinsic_info structure. It
+ * is included in both ir.h to create the nir_intrinsic enum (with members of
+ * the form nir_intrinsic_(name)) and and in opcodes.c to create
+ * nir_intrinsic_infos, which is a const array of nir_intrinsic_info structures
+ * for each intrinsic.
+ */
+
+#define ARR(...) { __VA_ARGS__ }
+
+
+INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE)
+INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, 0)
+INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)
+
+/*
+ * Interpolation of input. The interp_var_at* intrinsics are similar to the
+ * load_var intrinsic acting an a shader input except that they interpolate
+ * the input differently. The at_sample and at_offset intrinsics take an
+ * aditional source that is a integer sample id or a vec2 position offset
+ * respectively.
+ */
+
+INTRINSIC(interp_var_at_centroid, 0, ARR(0), true, 0, 1, 0,
+ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+INTRINSIC(interp_var_at_sample, 1, ARR(1), true, 0, 1, 0,
+ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0,
+ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+
+/*
+ * Ask the driver for the size of a given buffer. It takes the buffer index
+ * as source.
+ */
+INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0,
+ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+
+/*
+ * a barrier is an intrinsic with no inputs/outputs but which can't be moved
+ * around/optimized in general
+ */
+#define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, 0)
+
+BARRIER(barrier)
+BARRIER(discard)
+
+/*
+ * Memory barrier with semantics analogous to the memoryBarrier() GLSL
+ * intrinsic.
+ */
+BARRIER(memory_barrier)
+
+/*
+ * Shader clock intrinsic with semantics analogous to the clock2x32ARB()
+ * GLSL intrinsic.
+ * The latter can be used as code motion barrier, which is currently not
+ * feasible with NIR.
+ */
+INTRINSIC(shader_clock, 0, ARR(), true, 1, 0, 0, NIR_INTRINSIC_CAN_ELIMINATE)
+
+/*
+ * Memory barrier with semantics analogous to the compute shader
+ * groupMemoryBarrier(), memoryBarrierAtomicCounter(), memoryBarrierBuffer(),
+ * memoryBarrierImage() and memoryBarrierShared() GLSL intrinsics.
+ */
+BARRIER(group_memory_barrier)
+BARRIER(memory_barrier_atomic_counter)
+BARRIER(memory_barrier_buffer)
+BARRIER(memory_barrier_image)
+BARRIER(memory_barrier_shared)
+
+/** A conditional discard, with a single boolean source. */
+INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0)
+
+/**
+ * Basic Geometry Shader intrinsics.
+ *
+ * emit_vertex implements GLSL's EmitStreamVertex() built-in. It takes a single
+ * index, which is the stream ID to write to.
+ *
+ * end_primitive implements GLSL's EndPrimitive() built-in.
+ */
+INTRINSIC(emit_vertex, 0, ARR(), false, 0, 0, 1, 0)
+INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, 0)
+
+/**
+ * Geometry Shader intrinsics with a vertex count.
+ *
+ * Alternatively, drivers may implement these intrinsics, and use
+ * nir_lower_gs_intrinsics() to convert from the basic intrinsics.
+ *
+ * These maintain a count of the number of vertices emitted, as an additional
+ * unsigned integer source.
+ */
+INTRINSIC(emit_vertex_with_counter, 1, ARR(1), false, 0, 0, 1, 0)
+INTRINSIC(end_primitive_with_counter, 1, ARR(1), false, 0, 0, 1, 0)
+INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, 0)
+
+/*
+ * Atomic counters
+ *
+ * The *_var variants take an atomic_uint nir_variable, while the other,
+ * lowered, variants take a constant buffer index and register offset.
+ */
+
+#define ATOMIC(name, flags) \
+ INTRINSIC(atomic_counter_##name##_var, 0, ARR(), true, 1, 1, 0, flags) \
+ INTRINSIC(atomic_counter_##name, 1, ARR(1), true, 1, 0, 1, flags)
+
+ATOMIC(inc, 0)
+ATOMIC(dec, 0)
+ATOMIC(read, NIR_INTRINSIC_CAN_ELIMINATE)
+
+/*
+ * Image load, store and atomic intrinsics.
+ *
+ * All image intrinsics take an image target passed as a nir_variable. Image
+ * variables contain a number of memory and layout qualifiers that influence
+ * the semantics of the intrinsic.
+ *
+ * All image intrinsics take a four-coordinate vector and a sample index as
+ * first two sources, determining the location within the image that will be
+ * accessed by the intrinsic. Components not applicable to the image target
+ * in use are undefined. Image store takes an additional four-component
+ * argument with the value to be written, and image atomic operations take
+ * either one or two additional scalar arguments with the same meaning as in
+ * the ARB_shader_image_load_store specification.
+ */
+INTRINSIC(image_load, 2, ARR(4, 1), true, 4, 1, 0,
+ NIR_INTRINSIC_CAN_ELIMINATE)
+INTRINSIC(image_store, 3, ARR(4, 1, 4), false, 0, 1, 0, 0)
+INTRINSIC(image_atomic_add, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+INTRINSIC(image_atomic_min, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+INTRINSIC(image_atomic_max, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+INTRINSIC(image_atomic_and, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
+INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, 0)
+INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0,
+ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0,
+ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+
+/*
+ * SSBO atomic intrinsics
+ *
+ * All of the SSBO atomic memory operations read a value from memory,
+ * compute a new value using one of the operations below, write the new
+ * value to memory, and return the original value read.
+ *
+ * All operations take 3 sources except CompSwap that takes 4. These
+ * sources represent:
+ *
+ * 0: The SSBO buffer index.
+ * 1: The offset into the SSBO buffer of the variable that the atomic
+ * operation will operate on.
+ * 2: The data parameter to the atomic function (i.e. the value to add
+ * in ssbo_atomic_add, etc).
+ * 3: For CompSwap only: the second data parameter.
+ */
+INTRINSIC(ssbo_atomic_add, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_imin, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_umin, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_imax, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_umax, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_and, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_or, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, 0)
+
+/*
+ * CS shared variable atomic intrinsics
+ *
+ * All of the shared variable atomic memory operations read a value from
+ * memory, compute a new value using one of the operations below, write the
+ * new value to memory, and return the original value read.
+ *
+ * All operations take 2 sources except CompSwap that takes 3. These
+ * sources represent:
+ *
+ * 0: The offset into the shared variable storage region that the atomic
+ * operation will operate on.
+ * 1: The data parameter to the atomic function (i.e. the value to add
+ * in shared_atomic_add, etc).
+ * 2: For CompSwap only: the second data parameter.
+ */
+INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+
+#define SYSTEM_VALUE(name, components, num_indices) \
+ INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \
+ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+
+SYSTEM_VALUE(front_face, 1, 0)
+SYSTEM_VALUE(vertex_id, 1, 0)
+SYSTEM_VALUE(vertex_id_zero_base, 1, 0)
+SYSTEM_VALUE(base_vertex, 1, 0)
+SYSTEM_VALUE(instance_id, 1, 0)
+SYSTEM_VALUE(base_instance, 1, 0)
+SYSTEM_VALUE(draw_id, 1, 0)
+SYSTEM_VALUE(sample_id, 1, 0)
+SYSTEM_VALUE(sample_pos, 2, 0)
+SYSTEM_VALUE(sample_mask_in, 1, 0)
+SYSTEM_VALUE(primitive_id, 1, 0)
+SYSTEM_VALUE(invocation_id, 1, 0)
+SYSTEM_VALUE(tess_coord, 3, 0)
+SYSTEM_VALUE(tess_level_outer, 4, 0)
+SYSTEM_VALUE(tess_level_inner, 2, 0)
+SYSTEM_VALUE(patch_vertices_in, 1, 0)
+SYSTEM_VALUE(local_invocation_id, 3, 0)
+SYSTEM_VALUE(work_group_id, 3, 0)
+SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */
+SYSTEM_VALUE(num_work_groups, 3, 0)
+SYSTEM_VALUE(helper_invocation, 1, 0)
+
+/*
+ * Load operations pull data from some piece of GPU memory. All load
+ * operations operate in terms of offsets into some piece of theoretical
+ * memory. Loads from externally visible memory (UBO and SSBO) simply take a
+ * byte offset as a source. Loads from opaque memory (uniforms, inputs, etc.)
+ * take a base+offset pair where the base (const_index[0]) gives the location
+ * of the start of the variable being loaded and and the offset source is a
+ * offset into that variable.
+ *
+ * Some load operations such as UBO/SSBO load and per_vertex loads take an
+ * additional source to specify which UBO/SSBO/vertex to load from.
+ *
+ * The exact address type depends on the lowering pass that generates the
+ * load/store intrinsics. Typically, this is vec4 units for things such as
+ * varying slots and float units for fragment shader inputs. UBO and SSBO
+ * offsets are always in bytes.
+ */
+
+#define LOAD(name, srcs, indices, flags) \
+ INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, indices, flags)
+
+/* src[] = { offset }. const_index[] = { base } */
+LOAD(uniform, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* src[] = { buffer_index, offset }. No const_index */
+LOAD(ubo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* src[] = { offset }. const_index[] = { base } */
+LOAD(input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* src[] = { vertex, offset }. const_index[] = { base } */
+LOAD(per_vertex_input, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* src[] = { buffer_index, offset }. No const_index */
+LOAD(ssbo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE)
+/* src[] = { offset }. const_index[] = { base } */
+LOAD(output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
+/* src[] = { vertex, offset }. const_index[] = { base } */
+LOAD(per_vertex_output, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE)
+/* src[] = { offset }. const_index[] = { base } */
+LOAD(shared, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
+
+/*
+ * Stores work the same way as loads, except now the first source is the value
+ * to store and the second (and possibly third) source specify where to store
+ * the value. SSBO and shared memory stores also have a write mask as
+ * const_index[0].
+ */
+
+#define STORE(name, srcs, indices, flags) \
+ INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0, indices, flags)
+
+/* src[] = { value, offset }. const_index[] = { base, write_mask } */
+STORE(output, 2, 2, 0)
+/* src[] = { value, vertex, offset }. const_index[] = { base, write_mask } */
+STORE(per_vertex_output, 3, 2, 0)
+/* src[] = { value, block_index, offset }. const_index[] = { write_mask } */
+STORE(ssbo, 3, 1, 0)
+/* src[] = { value, offset }. const_index[] = { base, write_mask } */
+STORE(shared, 2, 2, 0)
+
+LAST_INTRINSIC(store_shared)
diff --git a/src/compiler/nir/nir_liveness.c b/src/compiler/nir/nir_liveness.c
new file mode 100644
index 00000000000..05f79d7bc61
--- /dev/null
+++ b/src/compiler/nir/nir_liveness.c
@@ -0,0 +1,297 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand ([email protected])
+ */
+
+#include "nir.h"
+#include "nir_worklist.h"
+#include "nir_vla.h"
+
+/*
+ * Basic liveness analysis. This works only in SSA form.
+ *
+ * This liveness pass treats phi nodes as being melded to the space between
+ * blocks so that the destinations of a phi are in the livein of the block
+ * in which it resides and the sources are in the liveout of the
+ * corresponding block. By formulating the liveness information in this
+ * way, we ensure that the definition of any variable dominates its entire
+ * live range. This is true because the only way that the definition of an
+ * SSA value may not dominate a use is if the use is in a phi node and the
+ * uses in phi no are in the live-out of the corresponding predecessor
+ * block but not in the live-in of the block containing the phi node.
+ */
+
+struct live_ssa_defs_state {
+ unsigned num_ssa_defs;
+ unsigned bitset_words;
+
+ nir_block_worklist worklist;
+};
+
+static bool
+index_ssa_def(nir_ssa_def *def, void *void_state)
+{
+ struct live_ssa_defs_state *state = void_state;
+
+ if (def->parent_instr->type == nir_instr_type_ssa_undef)
+ def->live_index = 0;
+ else
+ def->live_index = state->num_ssa_defs++;
+
+ return true;
+}
+
+static bool
+index_ssa_definitions_block(nir_block *block, void *state)
+{
+ nir_foreach_instr(block, instr)
+ nir_foreach_ssa_def(instr, index_ssa_def, state);
+
+ return true;
+}
+
+/* Initialize the liveness data to zero and add the given block to the
+ * worklist.
+ */
+static bool
+init_liveness_block(nir_block *block, void *void_state)
+{
+ struct live_ssa_defs_state *state = void_state;
+
+ block->live_in = reralloc(block, block->live_in, BITSET_WORD,
+ state->bitset_words);
+ memset(block->live_in, 0, state->bitset_words * sizeof(BITSET_WORD));
+
+ block->live_out = reralloc(block, block->live_out, BITSET_WORD,
+ state->bitset_words);
+ memset(block->live_out, 0, state->bitset_words * sizeof(BITSET_WORD));
+
+ nir_block_worklist_push_head(&state->worklist, block);
+
+ return true;
+}
+
+static bool
+set_src_live(nir_src *src, void *void_live)
+{
+ BITSET_WORD *live = void_live;
+
+ if (!src->is_ssa)
+ return true;
+
+ if (src->ssa->live_index == 0)
+ return true; /* undefined variables are never live */
+
+ BITSET_SET(live, src->ssa->live_index);
+
+ return true;
+}
+
+static bool
+set_ssa_def_dead(nir_ssa_def *def, void *void_live)
+{
+ BITSET_WORD *live = void_live;
+
+ BITSET_CLEAR(live, def->live_index);
+
+ return true;
+}
+
+/** Propagates the live in of succ across the edge to the live out of pred
+ *
+ * Phi nodes exist "between" blocks and all the phi nodes at the start of a
+ * block act "in parallel". When we propagate from the live_in of one
+ * block to the live out of the other, we have to kill any writes from phis
+ * and make live any sources.
+ *
+ * Returns true if updating live out of pred added anything
+ */
+static bool
+propagate_across_edge(nir_block *pred, nir_block *succ,
+ struct live_ssa_defs_state *state)
+{
+ NIR_VLA(BITSET_WORD, live, state->bitset_words);
+ memcpy(live, succ->live_in, state->bitset_words * sizeof *live);
+
+ nir_foreach_instr(succ, instr) {
+ if (instr->type != nir_instr_type_phi)
+ break;
+ nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+ assert(phi->dest.is_ssa);
+ set_ssa_def_dead(&phi->dest.ssa, live);
+ }
+
+ nir_foreach_instr(succ, instr) {
+ if (instr->type != nir_instr_type_phi)
+ break;
+ nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+ nir_foreach_phi_src(phi, src) {
+ if (src->pred == pred) {
+ set_src_live(&src->src, live);
+ break;
+ }
+ }
+ }
+
+ BITSET_WORD progress = 0;
+ for (unsigned i = 0; i < state->bitset_words; ++i) {
+ progress |= live[i] & ~pred->live_out[i];
+ pred->live_out[i] |= live[i];
+ }
+ return progress != 0;
+}
+
+void
+nir_live_ssa_defs_impl(nir_function_impl *impl)
+{
+ struct live_ssa_defs_state state;
+
+ /* We start at 1 because we reserve the index value of 0 for ssa_undef
+ * instructions. Those are never live, so their liveness information
+ * can be compacted into a single bit.
+ */
+ state.num_ssa_defs = 1;
+ nir_foreach_block(impl, index_ssa_definitions_block, &state);
+
+ nir_block_worklist_init(&state.worklist, impl->num_blocks, NULL);
+
+ /* We now know how many unique ssa definitions we have and we can go
+ * ahead and allocate live_in and live_out sets and add all of the
+ * blocks to the worklist.
+ */
+ state.bitset_words = BITSET_WORDS(state.num_ssa_defs);
+ nir_foreach_block(impl, init_liveness_block, &state);
+
+ /* We're now ready to work through the worklist and update the liveness
+ * sets of each of the blocks. By the time we get to this point, every
+ * block in the function implementation has been pushed onto the
+ * worklist in reverse order. As long as we keep the worklist
+ * up-to-date as we go, everything will get covered.
+ */
+ while (!nir_block_worklist_is_empty(&state.worklist)) {
+ /* We pop them off in the reverse order we pushed them on. This way
+ * the first walk of the instructions is backwards so we only walk
+ * once in the case of no control flow.
+ */
+ nir_block *block = nir_block_worklist_pop_head(&state.worklist);
+
+ memcpy(block->live_in, block->live_out,
+ state.bitset_words * sizeof(BITSET_WORD));
+
+ nir_if *following_if = nir_block_get_following_if(block);
+ if (following_if)
+ set_src_live(&following_if->condition, block->live_in);
+
+ nir_foreach_instr_reverse(block, instr) {
+ /* Phi nodes are handled seperately so we want to skip them. Since
+ * we are going backwards and they are at the beginning, we can just
+ * break as soon as we see one.
+ */
+ if (instr->type == nir_instr_type_phi)
+ break;
+
+ nir_foreach_ssa_def(instr, set_ssa_def_dead, block->live_in);
+ nir_foreach_src(instr, set_src_live, block->live_in);
+ }
+
+ /* Walk over all of the predecessors of the current block updating
+ * their live in with the live out of this one. If anything has
+ * changed, add the predecessor to the work list so that we ensure
+ * that the new information is used.
+ */
+ struct set_entry *entry;
+ set_foreach(block->predecessors, entry) {
+ nir_block *pred = (nir_block *)entry->key;
+ if (propagate_across_edge(pred, block, &state))
+ nir_block_worklist_push_tail(&state.worklist, pred);
+ }
+ }
+
+ nir_block_worklist_fini(&state.worklist);
+}
+
+static bool
+src_does_not_use_def(nir_src *src, void *def)
+{
+ return !src->is_ssa || src->ssa != (nir_ssa_def *)def;
+}
+
+static bool
+search_for_use_after_instr(nir_instr *start, nir_ssa_def *def)
+{
+ /* Only look for a use strictly after the given instruction */
+ struct exec_node *node = start->node.next;
+ while (!exec_node_is_tail_sentinel(node)) {
+ nir_instr *instr = exec_node_data(nir_instr, node, node);
+ if (!nir_foreach_src(instr, src_does_not_use_def, def))
+ return true;
+ node = node->next;
+ }
+ return false;
+}
+
+/* Returns true if def is live at instr assuming that def comes before
+ * instr in a pre DFS search of the dominance tree.
+ */
+static bool
+nir_ssa_def_is_live_at(nir_ssa_def *def, nir_instr *instr)
+{
+ if (BITSET_TEST(instr->block->live_out, def->live_index)) {
+ /* Since def dominates instr, if def is in the liveout of the block,
+ * it's live at instr
+ */
+ return true;
+ } else {
+ if (BITSET_TEST(instr->block->live_in, def->live_index) ||
+ def->parent_instr->block == instr->block) {
+ /* In this case it is either live coming into instr's block or it
+ * is defined in the same block. In this case, we simply need to
+ * see if it is used after instr.
+ */
+ return search_for_use_after_instr(instr, def);
+ } else {
+ return false;
+ }
+ }
+}
+
+bool
+nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b)
+{
+ if (a->parent_instr == b->parent_instr) {
+ /* Two variables defined at the same time interfere assuming at
+ * least one isn't dead.
+ */
+ return true;
+ } else if (a->live_index == 0 || b->live_index == 0) {
+ /* If either variable is an ssa_undef, then there's no interference */
+ return false;
+ } else if (a->live_index < b->live_index) {
+ return nir_ssa_def_is_live_at(a, b->parent_instr);
+ } else {
+ return nir_ssa_def_is_live_at(b, a->parent_instr);
+ }
+}
diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c
new file mode 100644
index 00000000000..0a27e66cf0f
--- /dev/null
+++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
@@ -0,0 +1,210 @@
+/*
+ * Copyright © 2014-2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+/** @file nir_lower_alu_to_scalar.c
+ *
+ * Replaces nir_alu_instr operations with more than one channel used in the
+ * arguments with individual per-channel operations.
+ */
+
+static void
+nir_alu_ssa_dest_init(nir_alu_instr *instr, unsigned num_components)
+{
+ nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL);
+ instr->dest.write_mask = (1 << num_components) - 1;
+}
+
+static void
+lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op,
+ nir_builder *builder)
+{
+ unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
+
+ nir_ssa_def *last = NULL;
+ for (unsigned i = 0; i < num_components; i++) {
+ nir_alu_instr *chan = nir_alu_instr_create(builder->shader, chan_op);
+ nir_alu_ssa_dest_init(chan, 1);
+ nir_alu_src_copy(&chan->src[0], &instr->src[0], chan);
+ chan->src[0].swizzle[0] = chan->src[0].swizzle[i];
+ if (nir_op_infos[chan_op].num_inputs > 1) {
+ assert(nir_op_infos[chan_op].num_inputs == 2);
+ nir_alu_src_copy(&chan->src[1], &instr->src[1], chan);
+ chan->src[1].swizzle[0] = chan->src[1].swizzle[i];
+ }
+
+ nir_builder_instr_insert(builder, &chan->instr);
+
+ if (i == 0) {
+ last = &chan->dest.dest.ssa;
+ } else {
+ last = nir_build_alu(builder, merge_op,
+ last, &chan->dest.dest.ssa, NULL, NULL);
+ }
+ }
+
+ assert(instr->dest.write_mask == 1);
+ nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(last));
+ nir_instr_remove(&instr->instr);
+}
+
+static void
+lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
+{
+ unsigned num_src = nir_op_infos[instr->op].num_inputs;
+ unsigned i, chan;
+
+ assert(instr->dest.dest.is_ssa);
+ assert(instr->dest.write_mask != 0);
+
+ b->cursor = nir_before_instr(&instr->instr);
+
+#define LOWER_REDUCTION(name, chan, merge) \
+ case name##2: \
+ case name##3: \
+ case name##4: \
+ lower_reduction(instr, chan, merge, b); \
+ return;
+
+ switch (instr->op) {
+ case nir_op_vec4:
+ case nir_op_vec3:
+ case nir_op_vec2:
+ /* We don't need to scalarize these ops, they're the ones generated to
+ * group up outputs into a value that can be SSAed.
+ */
+ return;
+
+ case nir_op_unpack_unorm_4x8:
+ case nir_op_unpack_snorm_4x8:
+ case nir_op_unpack_unorm_2x16:
+ case nir_op_unpack_snorm_2x16:
+ /* There is no scalar version of these ops, unless we were to break it
+ * down to bitshifts and math (which is definitely not intended).
+ */
+ return;
+
+ case nir_op_unpack_half_2x16:
+ /* We could split this into unpack_half_2x16_split_[xy], but should
+ * we?
+ */
+ return;
+
+ case nir_op_fdph: {
+ nir_ssa_def *sum[4];
+ for (unsigned i = 0; i < 3; i++) {
+ sum[i] = nir_fmul(b, nir_channel(b, instr->src[0].src.ssa,
+ instr->src[0].swizzle[i]),
+ nir_channel(b, instr->src[1].src.ssa,
+ instr->src[1].swizzle[i]));
+ }
+ sum[3] = nir_channel(b, instr->src[1].src.ssa, instr->src[1].swizzle[3]);
+
+ nir_ssa_def *val = nir_fadd(b, nir_fadd(b, sum[0], sum[1]),
+ nir_fadd(b, sum[2], sum[3]));
+
+ nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val));
+ nir_instr_remove(&instr->instr);
+ return;
+ }
+
+ LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd);
+ LOWER_REDUCTION(nir_op_ball_fequal, nir_op_feq, nir_op_iand);
+ LOWER_REDUCTION(nir_op_ball_iequal, nir_op_ieq, nir_op_iand);
+ LOWER_REDUCTION(nir_op_bany_fnequal, nir_op_fne, nir_op_ior);
+ LOWER_REDUCTION(nir_op_bany_inequal, nir_op_ine, nir_op_ior);
+ LOWER_REDUCTION(nir_op_fall_equal, nir_op_seq, nir_op_fand);
+ LOWER_REDUCTION(nir_op_fany_nequal, nir_op_sne, nir_op_for);
+
+ default:
+ break;
+ }
+
+ if (instr->dest.dest.ssa.num_components == 1)
+ return;
+
+ unsigned num_components = instr->dest.dest.ssa.num_components;
+ nir_ssa_def *comps[] = { NULL, NULL, NULL, NULL };
+
+ for (chan = 0; chan < 4; chan++) {
+ if (!(instr->dest.write_mask & (1 << chan)))
+ continue;
+
+ nir_alu_instr *lower = nir_alu_instr_create(b->shader, instr->op);
+ for (i = 0; i < num_src; i++) {
+ /* We only handle same-size-as-dest (input_sizes[] == 0) or scalar
+ * args (input_sizes[] == 1).
+ */
+ assert(nir_op_infos[instr->op].input_sizes[i] < 2);
+ unsigned src_chan = (nir_op_infos[instr->op].input_sizes[i] == 1 ?
+ 0 : chan);
+
+ nir_alu_src_copy(&lower->src[i], &instr->src[i], lower);
+ for (int j = 0; j < 4; j++)
+ lower->src[i].swizzle[j] = instr->src[i].swizzle[src_chan];
+ }
+
+ nir_alu_ssa_dest_init(lower, 1);
+ lower->dest.saturate = instr->dest.saturate;
+ comps[chan] = &lower->dest.dest.ssa;
+
+ nir_builder_instr_insert(b, &lower->instr);
+ }
+
+ nir_ssa_def *vec = nir_vec(b, comps, num_components);
+
+ nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(vec));
+
+ nir_instr_remove(&instr->instr);
+}
+
+static bool
+lower_alu_to_scalar_block(nir_block *block, void *builder)
+{
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type == nir_instr_type_alu)
+ lower_alu_instr_scalar(nir_instr_as_alu(instr), builder);
+ }
+
+ return true;
+}
+
+static void
+nir_lower_alu_to_scalar_impl(nir_function_impl *impl)
+{
+ nir_builder builder;
+ nir_builder_init(&builder, impl);
+
+ nir_foreach_block(impl, lower_alu_to_scalar_block, &builder);
+}
+
+void
+nir_lower_alu_to_scalar(nir_shader *shader)
+{
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ nir_lower_alu_to_scalar_impl(function->impl);
+ }
+}
diff --git a/src/compiler/nir/nir_lower_atomics.c b/src/compiler/nir/nir_lower_atomics.c
new file mode 100644
index 00000000000..2cbc1b75348
--- /dev/null
+++ b/src/compiler/nir/nir_lower_atomics.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott ([email protected])
+ *
+ */
+
+#include "glsl/ir_uniform.h"
+#include "nir.h"
+#include "main/config.h"
+#include <assert.h>
+
+typedef struct {
+ const struct gl_shader_program *shader_program;
+ nir_shader *shader;
+} lower_atomic_state;
+
+/*
+ * replace atomic counter intrinsics that use a variable with intrinsics
+ * that directly store the buffer index and byte offset
+ */
+
+static void
+lower_instr(nir_intrinsic_instr *instr,
+ lower_atomic_state *state)
+{
+ nir_intrinsic_op op;
+ switch (instr->intrinsic) {
+ case nir_intrinsic_atomic_counter_read_var:
+ op = nir_intrinsic_atomic_counter_read;
+ break;
+
+ case nir_intrinsic_atomic_counter_inc_var:
+ op = nir_intrinsic_atomic_counter_inc;
+ break;
+
+ case nir_intrinsic_atomic_counter_dec_var:
+ op = nir_intrinsic_atomic_counter_dec;
+ break;
+
+ default:
+ return;
+ }
+
+ if (instr->variables[0]->var->data.mode != nir_var_uniform &&
+ instr->variables[0]->var->data.mode != nir_var_shader_storage)
+ return; /* atomics passed as function arguments can't be lowered */
+
+ void *mem_ctx = ralloc_parent(instr);
+ unsigned uniform_loc = instr->variables[0]->var->data.location;
+
+ nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op);
+ new_instr->const_index[0] =
+ state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index;
+
+ nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1);
+ offset_const->value.u[0] = instr->variables[0]->var->data.offset;
+
+ nir_instr_insert_before(&instr->instr, &offset_const->instr);
+
+ nir_ssa_def *offset_def = &offset_const->def;
+
+ nir_deref *tail = &instr->variables[0]->deref;
+ while (tail->child != NULL) {
+ assert(tail->child->deref_type == nir_deref_type_array);
+ nir_deref_array *deref_array = nir_deref_as_array(tail->child);
+ tail = tail->child;
+
+ unsigned child_array_elements = tail->child != NULL ?
+ glsl_get_aoa_size(tail->type) : 1;
+
+ offset_const->value.u[0] += deref_array->base_offset *
+ child_array_elements * ATOMIC_COUNTER_SIZE;
+
+ if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+ nir_load_const_instr *atomic_counter_size =
+ nir_load_const_instr_create(mem_ctx, 1);
+ atomic_counter_size->value.u[0] = child_array_elements * ATOMIC_COUNTER_SIZE;
+ nir_instr_insert_before(&instr->instr, &atomic_counter_size->instr);
+
+ nir_alu_instr *mul = nir_alu_instr_create(mem_ctx, nir_op_imul);
+ nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL);
+ mul->dest.write_mask = 0x1;
+ nir_src_copy(&mul->src[0].src, &deref_array->indirect, mul);
+ mul->src[1].src.is_ssa = true;
+ mul->src[1].src.ssa = &atomic_counter_size->def;
+ nir_instr_insert_before(&instr->instr, &mul->instr);
+
+ nir_alu_instr *add = nir_alu_instr_create(mem_ctx, nir_op_iadd);
+ nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL);
+ add->dest.write_mask = 0x1;
+ add->src[0].src.is_ssa = true;
+ add->src[0].src.ssa = &mul->dest.dest.ssa;
+ add->src[1].src.is_ssa = true;
+ add->src[1].src.ssa = offset_def;
+ nir_instr_insert_before(&instr->instr, &add->instr);
+
+ offset_def = &add->dest.dest.ssa;
+ }
+ }
+
+ new_instr->src[0].is_ssa = true;
+ new_instr->src[0].ssa = offset_def;
+
+ if (instr->dest.is_ssa) {
+ nir_ssa_dest_init(&new_instr->instr, &new_instr->dest,
+ instr->dest.ssa.num_components, NULL);
+ nir_ssa_def_rewrite_uses(&instr->dest.ssa,
+ nir_src_for_ssa(&new_instr->dest.ssa));
+ } else {
+ nir_dest_copy(&new_instr->dest, &instr->dest, mem_ctx);
+ }
+
+ nir_instr_insert_before(&instr->instr, &new_instr->instr);
+ nir_instr_remove(&instr->instr);
+}
+
+static bool
+lower_block(nir_block *block, void *state)
+{
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type == nir_instr_type_intrinsic)
+ lower_instr(nir_instr_as_intrinsic(instr),
+ (lower_atomic_state *) state);
+ }
+
+ return true;
+}
+
+void
+nir_lower_atomics(nir_shader *shader,
+ const struct gl_shader_program *shader_program)
+{
+ lower_atomic_state state = {
+ .shader = shader,
+ .shader_program = shader_program,
+ };
+
+ nir_foreach_function(shader, function) {
+ if (function->impl) {
+ nir_foreach_block(function->impl, lower_block, (void *) &state);
+ nir_metadata_preserve(function->impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+ }
+ }
+}
diff --git a/src/compiler/nir/nir_lower_clip.c b/src/compiler/nir/nir_lower_clip.c
new file mode 100644
index 00000000000..0ca6a289396
--- /dev/null
+++ b/src/compiler/nir/nir_lower_clip.c
@@ -0,0 +1,339 @@
+/*
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <[email protected]>
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+#define MAX_CLIP_PLANES 8
+
+/* Generates the lowering code for user-clip-planes, generating CLIPDIST
+ * from UCP[n] + CLIPVERTEX or POSITION. Additionally, an optional pass
+ * for fragment shaders to insert conditional kill's based on the inter-
+ * polated CLIPDIST
+ *
+ * NOTE: should be run after nir_lower_outputs_to_temporaries() (or at
+ * least in scenarios where you can count on each output written once
+ * and only once).
+ */
+
+
+static nir_variable *
+create_clipdist_var(nir_shader *shader, unsigned drvloc,
+ bool output, gl_varying_slot slot)
+{
+ nir_variable *var = rzalloc(shader, nir_variable);
+
+ var->data.driver_location = drvloc;
+ var->type = glsl_vec4_type();
+ var->data.mode = output ? nir_var_shader_out : nir_var_shader_in;
+ var->name = ralloc_asprintf(var, "clipdist_%d", drvloc);
+ var->data.index = 0;
+ var->data.location = slot;
+
+ if (output) {
+ exec_list_push_tail(&shader->outputs, &var->node);
+ shader->num_outputs++; /* TODO use type_size() */
+ }
+ else {
+ exec_list_push_tail(&shader->inputs, &var->node);
+ shader->num_inputs++; /* TODO use type_size() */
+ }
+ return var;
+}
+
+static void
+store_clipdist_output(nir_builder *b, nir_variable *out, nir_ssa_def **val)
+{
+ nir_intrinsic_instr *store;
+
+ store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
+ store->num_components = 4;
+ store->const_index[0] = out->data.driver_location;
+ store->const_index[1] = 0xf; /* wrmask */
+ store->src[0].ssa = nir_vec4(b, val[0], val[1], val[2], val[3]);
+ store->src[0].is_ssa = true;
+ store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
+ nir_builder_instr_insert(b, &store->instr);
+}
+
+static void
+load_clipdist_input(nir_builder *b, nir_variable *in, nir_ssa_def **val)
+{
+ nir_intrinsic_instr *load;
+
+ load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
+ load->num_components = 4;
+ load->const_index[0] = in->data.driver_location;
+ load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
+ nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
+ nir_builder_instr_insert(b, &load->instr);
+
+ val[0] = nir_channel(b, &load->dest.ssa, 0);
+ val[1] = nir_channel(b, &load->dest.ssa, 1);
+ val[2] = nir_channel(b, &load->dest.ssa, 2);
+ val[3] = nir_channel(b, &load->dest.ssa, 3);
+}
+
+struct find_output_state
+{
+ unsigned drvloc;
+ nir_ssa_def *def;
+};
+
+static bool
+find_output_in_block(nir_block *block, void *void_state)
+{
+ struct find_output_state *state = void_state;
+ nir_foreach_instr(block, instr) {
+
+ if (instr->type == nir_instr_type_intrinsic) {
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if ((intr->intrinsic == nir_intrinsic_store_output) &&
+ intr->const_index[0] == state->drvloc) {
+ assert(state->def == NULL);
+ assert(intr->src[0].is_ssa);
+ assert(nir_src_as_const_value(intr->src[1]));
+ state->def = intr->src[0].ssa;
+
+#if !defined(DEBUG)
+ /* for debug builds, scan entire shader to assert
+ * if output is written multiple times. For release
+ * builds just assume all is well and bail when we
+ * find first:
+ */
+ return false;
+#endif
+ }
+ }
+ }
+
+ return true;
+}
+
+/* TODO: maybe this would be a useful helper?
+ * NOTE: assumes each output is written exactly once (and unconditionally)
+ * so if needed nir_lower_outputs_to_temporaries()
+ */
+static nir_ssa_def *
+find_output(nir_shader *shader, unsigned drvloc)
+{
+ struct find_output_state state = {
+ .drvloc = drvloc,
+ };
+
+ nir_foreach_function(shader, function) {
+ if (function->impl) {
+ nir_foreach_block_reverse(function->impl,
+ find_output_in_block, &state);
+ }
+ }
+
+ return state.def;
+}
+
+/*
+ * VS lowering
+ */
+
+static void
+lower_clip_vs(nir_function_impl *impl, unsigned ucp_enables,
+ nir_ssa_def *cv, nir_variable **out)
+{
+ nir_ssa_def *clipdist[MAX_CLIP_PLANES];
+ nir_builder b;
+
+ nir_builder_init(&b, impl);
+
+ /* NIR should ensure that, even in case of loops/if-else, there
+ * should be only a single predecessor block to end_block, which
+ * makes the perfect place to insert the clipdist calculations.
+ *
+ * NOTE: in case of early return's, these would have to be lowered
+ * to jumps to end_block predecessor in a previous pass. Not sure
+ * if there is a good way to sanity check this, but for now the
+ * users of this pass don't support sub-routines.
+ */
+ assert(impl->end_block->predecessors->entries == 1);
+ b.cursor = nir_after_cf_list(&impl->body);
+
+ for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) {
+ if (ucp_enables & (1 << plane)) {
+ nir_ssa_def *ucp =
+ nir_load_system_value(&b, nir_intrinsic_load_user_clip_plane, plane);
+
+ /* calculate clipdist[plane] - dot(ucp, cv): */
+ clipdist[plane] = nir_fdot4(&b, ucp, cv);
+ }
+ else {
+ /* 0.0 == don't-clip == disabled: */
+ clipdist[plane] = nir_imm_float(&b, 0.0);
+ }
+ }
+
+ if (ucp_enables & 0x0f)
+ store_clipdist_output(&b, out[0], &clipdist[0]);
+ if (ucp_enables & 0xf0)
+ store_clipdist_output(&b, out[1], &clipdist[4]);
+
+ nir_metadata_preserve(impl, nir_metadata_dominance);
+}
+
+/* ucp_enables is bitmask of enabled ucp's. Actual ucp values are
+ * passed in to shader via user_clip_plane system-values
+ */
+void
+nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables)
+{
+ int clipvertex = -1;
+ int position = -1;
+ int maxloc = -1;
+ nir_ssa_def *cv;
+ nir_variable *out[2] = { NULL };
+
+ if (!ucp_enables)
+ return;
+
+ /* find clipvertex/position outputs: */
+ nir_foreach_variable(var, &shader->outputs) {
+ int loc = var->data.driver_location;
+
+ /* keep track of last used driver-location.. we'll be
+ * appending CLIP_DIST0/CLIP_DIST1 after last existing
+ * output:
+ */
+ maxloc = MAX2(maxloc, loc);
+
+ switch (var->data.location) {
+ case VARYING_SLOT_POS:
+ position = loc;
+ break;
+ case VARYING_SLOT_CLIP_VERTEX:
+ clipvertex = loc;
+ break;
+ case VARYING_SLOT_CLIP_DIST0:
+ case VARYING_SLOT_CLIP_DIST1:
+ /* if shader is already writing CLIPDIST, then
+ * there should be no user-clip-planes to deal
+ * with.
+ */
+ return;
+ }
+ }
+
+ if (clipvertex != -1)
+ cv = find_output(shader, clipvertex);
+ else if (position != -1)
+ cv = find_output(shader, position);
+ else
+ return;
+
+ /* insert CLIPDIST outputs: */
+ if (ucp_enables & 0x0f)
+ out[0] =
+ create_clipdist_var(shader, ++maxloc, true, VARYING_SLOT_CLIP_DIST0);
+ if (ucp_enables & 0xf0)
+ out[1] =
+ create_clipdist_var(shader, ++maxloc, true, VARYING_SLOT_CLIP_DIST1);
+
+ nir_foreach_function(shader, function) {
+ if (!strcmp(function->name, "main"))
+ lower_clip_vs(function->impl, ucp_enables, cv, out);
+ }
+}
+
+/*
+ * FS lowering
+ */
+
+static void
+lower_clip_fs(nir_function_impl *impl, unsigned ucp_enables,
+ nir_variable **in)
+{
+ nir_ssa_def *clipdist[MAX_CLIP_PLANES];
+ nir_builder b;
+
+ nir_builder_init(&b, impl);
+ b.cursor = nir_before_cf_list(&impl->body);
+
+ if (ucp_enables & 0x0f)
+ load_clipdist_input(&b, in[0], &clipdist[0]);
+ if (ucp_enables & 0xf0)
+ load_clipdist_input(&b, in[1], &clipdist[4]);
+
+ for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) {
+ if (ucp_enables & (1 << plane)) {
+ nir_intrinsic_instr *discard;
+ nir_ssa_def *cond;
+
+ cond = nir_flt(&b, clipdist[plane], nir_imm_float(&b, 0.0));
+
+ discard = nir_intrinsic_instr_create(b.shader,
+ nir_intrinsic_discard_if);
+ discard->src[0] = nir_src_for_ssa(cond);
+ nir_builder_instr_insert(&b, &discard->instr);
+ }
+ }
+}
+
+/* insert conditional kill based on interpolated CLIPDIST
+ */
+void
+nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables)
+{
+ nir_variable *in[2];
+ int maxloc = -1;
+
+ if (!ucp_enables)
+ return;
+
+ nir_foreach_variable(var, &shader->inputs) {
+ int loc = var->data.driver_location;
+
+ /* keep track of last used driver-location.. we'll be
+ * appending CLIP_DIST0/CLIP_DIST1 after last existing
+ * input:
+ */
+ maxloc = MAX2(maxloc, loc);
+ }
+
+ /* The shader won't normally have CLIPDIST inputs, so we
+ * must add our own:
+ */
+ /* insert CLIPDIST outputs: */
+ if (ucp_enables & 0x0f)
+ in[0] =
+ create_clipdist_var(shader, ++maxloc, false,
+ VARYING_SLOT_CLIP_DIST0);
+ if (ucp_enables & 0xf0)
+ in[1] =
+ create_clipdist_var(shader, ++maxloc, false,
+ VARYING_SLOT_CLIP_DIST1);
+
+ nir_foreach_function(shader, function) {
+ if (!strcmp(function->name, "main"))
+ lower_clip_fs(function->impl, ucp_enables, in);
+ }
+}
diff --git a/src/compiler/nir/nir_lower_global_vars_to_local.c b/src/compiler/nir/nir_lower_global_vars_to_local.c
new file mode 100644
index 00000000000..7b4cd4ee8dc
--- /dev/null
+++ b/src/compiler/nir/nir_lower_global_vars_to_local.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand ([email protected])
+ *
+ */
+
+/*
+ * This lowering pass detects when a global variable is only being used by
+ * one function and makes it local to that function
+ */
+
+#include "nir.h"
+
+struct global_to_local_state {
+ nir_function_impl *impl;
+ /* A hash table keyed on variable pointers that stores the unique
+ * nir_function_impl that uses the given variable. If a variable is
+ * used in multiple functions, the data for the given key will be NULL.
+ */
+ struct hash_table *var_func_table;
+};
+
+static bool
+mark_global_var_uses_block(nir_block *block, void *void_state)
+{
+ struct global_to_local_state *state = void_state;
+
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ unsigned num_vars = nir_intrinsic_infos[intrin->intrinsic].num_variables;
+
+ for (unsigned i = 0; i < num_vars; i++) {
+ nir_variable *var = intrin->variables[i]->var;
+ if (var->data.mode != nir_var_global)
+ continue;
+
+ struct hash_entry *entry =
+ _mesa_hash_table_search(state->var_func_table, var);
+
+ if (entry) {
+ if (entry->data != state->impl)
+ entry->data = NULL;
+ } else {
+ _mesa_hash_table_insert(state->var_func_table, var, state->impl);
+ }
+ }
+ }
+
+ return true;
+}
+
+bool
+nir_lower_global_vars_to_local(nir_shader *shader)
+{
+ struct global_to_local_state state;
+ bool progress = false;
+
+ state.var_func_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ nir_foreach_function(shader, function) {
+ if (function->impl) {
+ state.impl = function->impl;
+ nir_foreach_block(function->impl, mark_global_var_uses_block, &state);
+ }
+ }
+
+ struct hash_entry *entry;
+ hash_table_foreach(state.var_func_table, entry) {
+ nir_variable *var = (void *)entry->key;
+ nir_function_impl *impl = entry->data;
+
+ assert(var->data.mode == nir_var_global);
+
+ if (impl != NULL) {
+ exec_node_remove(&var->node);
+ var->data.mode = nir_var_local;
+ exec_list_push_tail(&impl->locals, &var->node);
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance |
+ nir_metadata_live_ssa_defs);
+ progress = true;
+ }
+ }
+
+ _mesa_hash_table_destroy(state.var_func_table, NULL);
+
+ return progress;
+}
diff --git a/src/compiler/nir/nir_lower_gs_intrinsics.c b/src/compiler/nir/nir_lower_gs_intrinsics.c
new file mode 100644
index 00000000000..fdff1656b4d
--- /dev/null
+++ b/src/compiler/nir/nir_lower_gs_intrinsics.c
@@ -0,0 +1,219 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+/**
+ * \file nir_lower_gs_intrinsics.c
+ *
+ * Geometry Shaders can call EmitVertex()/EmitStreamVertex() to output an
+ * arbitrary number of vertices. However, the shader must declare the maximum
+ * number of vertices that it will ever output - further attempts to emit
+ * vertices result in undefined behavior according to the GLSL specification.
+ *
+ * Drivers might use this maximum number of vertices to allocate enough space
+ * to hold the geometry shader's output. Some drivers (such as i965) need to
+ * implement "safety checks" which ensure that the shader hasn't emitted too
+ * many vertices, to avoid overflowing that space and trashing other memory.
+ *
+ * The count of emitted vertices can also be useful in buffer offset
+ * calculations, so drivers know where to write the GS output.
+ *
+ * However, for simple geometry shaders that emit a statically determinable
+ * number of vertices, this extra bookkeeping is unnecessary and inefficient.
+ * By tracking the vertex count in NIR, we allow constant folding/propagation
+ * and dead control flow optimizations to eliminate most of it where possible.
+ *
+ * This pass introduces a new global variable which stores the current vertex
+ * count (initialized to 0), and converts emit_vertex/end_primitive intrinsics
+ * to their *_with_counter variants. emit_vertex is also wrapped in a safety
+ * check to avoid buffer overflows. Finally, it adds a set_vertex_count
+ * intrinsic at the end of the program, informing the driver of the final
+ * vertex count.
+ */
+
+struct state {
+ nir_builder *builder;
+ nir_variable *vertex_count_var;
+ bool progress;
+};
+
+/**
+ * Replace emit_vertex intrinsics with:
+ *
+ * if (vertex_count < max_vertices) {
+ * emit_vertex_with_counter vertex_count ...
+ * vertex_count += 1
+ * }
+ */
+static void
+rewrite_emit_vertex(nir_intrinsic_instr *intrin, struct state *state)
+{
+ nir_builder *b = state->builder;
+
+ /* Load the vertex count */
+ b->cursor = nir_before_instr(&intrin->instr);
+ nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
+
+ nir_ssa_def *max_vertices = nir_imm_int(b, b->shader->info.gs.vertices_out);
+
+ /* Create: if (vertex_count < max_vertices) and insert it.
+ *
+ * The new if statement needs to be hooked up to the control flow graph
+ * before we start inserting instructions into it.
+ */
+ nir_if *if_stmt = nir_if_create(b->shader);
+ if_stmt->condition = nir_src_for_ssa(nir_ilt(b, count, max_vertices));
+ nir_builder_cf_insert(b, &if_stmt->cf_node);
+
+ /* Fill out the new then-block */
+ b->cursor = nir_after_cf_list(&if_stmt->then_list);
+
+ nir_intrinsic_instr *lowered =
+ nir_intrinsic_instr_create(b->shader,
+ nir_intrinsic_emit_vertex_with_counter);
+ lowered->const_index[0] = intrin->const_index[0];
+ lowered->src[0] = nir_src_for_ssa(count);
+ nir_builder_instr_insert(b, &lowered->instr);
+
+ /* Increment the vertex count by 1 */
+ nir_store_var(b, state->vertex_count_var,
+ nir_iadd(b, count, nir_imm_int(b, 1)),
+ 0x1); /* .x */
+
+ nir_instr_remove(&intrin->instr);
+
+ state->progress = true;
+}
+
+/**
+ * Replace end_primitive with end_primitive_with_counter.
+ */
+static void
+rewrite_end_primitive(nir_intrinsic_instr *intrin, struct state *state)
+{
+ nir_builder *b = state->builder;
+
+ b->cursor = nir_before_instr(&intrin->instr);
+ nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
+
+ nir_intrinsic_instr *lowered =
+ nir_intrinsic_instr_create(b->shader,
+ nir_intrinsic_end_primitive_with_counter);
+ lowered->const_index[0] = intrin->const_index[0];
+ lowered->src[0] = nir_src_for_ssa(count);
+ nir_builder_instr_insert(b, &lowered->instr);
+
+ nir_instr_remove(&intrin->instr);
+
+ state->progress = true;
+}
+
+static bool
+rewrite_intrinsics(nir_block *block, void *closure)
+{
+ struct state *state = closure;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_emit_vertex:
+ rewrite_emit_vertex(intrin, state);
+ break;
+ case nir_intrinsic_end_primitive:
+ rewrite_end_primitive(intrin, state);
+ break;
+ default:
+ /* not interesting; skip this */
+ break;
+ }
+ }
+
+ return true;
+}
+
+/**
+ * Add a set_vertex_count intrinsic at the end of the program
+ * (representing the final vertex count).
+ */
+static void
+append_set_vertex_count(nir_block *end_block, struct state *state)
+{
+ nir_builder *b = state->builder;
+ nir_shader *shader = state->builder->shader;
+
+ /* Insert the new intrinsic in all of the predecessors of the end block,
+ * but before any jump instructions (return).
+ */
+ struct set_entry *entry;
+ set_foreach(end_block->predecessors, entry) {
+ nir_block *pred = (nir_block *) entry->key;
+ b->cursor = nir_after_block_before_jump(pred);
+
+ nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
+
+ nir_intrinsic_instr *set_vertex_count =
+ nir_intrinsic_instr_create(shader, nir_intrinsic_set_vertex_count);
+ set_vertex_count->src[0] = nir_src_for_ssa(count);
+
+ nir_builder_instr_insert(b, &set_vertex_count->instr);
+ }
+}
+
+bool
+nir_lower_gs_intrinsics(nir_shader *shader)
+{
+ struct state state;
+ state.progress = false;
+
+ /* Create the counter variable */
+ nir_variable *var = rzalloc(shader, nir_variable);
+ var->data.mode = nir_var_global;
+ var->type = glsl_uint_type();
+ var->name = "vertex_count";
+ var->constant_initializer = rzalloc(shader, nir_constant); /* initialize to 0 */
+
+ exec_list_push_tail(&shader->globals, &var->node);
+ state.vertex_count_var = var;
+
+ nir_foreach_function(shader, function) {
+ if (function->impl) {
+ nir_builder b;
+ nir_builder_init(&b, function->impl);
+ state.builder = &b;
+
+ nir_foreach_block(function->impl, rewrite_intrinsics, &state);
+
+ /* This only works because we have a single main() function. */
+ append_set_vertex_count(function->impl->end_block, &state);
+
+ nir_metadata_preserve(function->impl, 0);
+ }
+ }
+
+ return state.progress;
+}
diff --git a/src/compiler/nir/nir_lower_idiv.c b/src/compiler/nir/nir_lower_idiv.c
new file mode 100644
index 00000000000..a084ad9c0e5
--- /dev/null
+++ b/src/compiler/nir/nir_lower_idiv.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <[email protected]>
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+/* Lowers idiv/udiv/umod
+ * Based on NV50LegalizeSSA::handleDIV()
+ *
+ * Note that this is probably not enough precision for compute shaders.
+ * Perhaps we want a second higher precision (looping) version of this?
+ * Or perhaps we assume if you can do compute shaders you can also
+ * branch out to a pre-optimized shader library routine..
+ */
+
+static void
+convert_instr(nir_builder *bld, nir_alu_instr *alu)
+{
+ nir_ssa_def *numer, *denom, *af, *bf, *a, *b, *q, *r;
+ nir_op op = alu->op;
+ bool is_signed;
+
+ if ((op != nir_op_idiv) &&
+ (op != nir_op_udiv) &&
+ (op != nir_op_umod))
+ return;
+
+ is_signed = (op == nir_op_idiv);
+
+ bld->cursor = nir_before_instr(&alu->instr);
+
+ numer = nir_ssa_for_alu_src(bld, alu, 0);
+ denom = nir_ssa_for_alu_src(bld, alu, 1);
+
+ if (is_signed) {
+ af = nir_i2f(bld, numer);
+ bf = nir_i2f(bld, denom);
+ af = nir_fabs(bld, af);
+ bf = nir_fabs(bld, bf);
+ a = nir_iabs(bld, numer);
+ b = nir_iabs(bld, denom);
+ } else {
+ af = nir_u2f(bld, numer);
+ bf = nir_u2f(bld, denom);
+ a = numer;
+ b = denom;
+ }
+
+ /* get first result: */
+ bf = nir_frcp(bld, bf);
+ bf = nir_isub(bld, bf, nir_imm_int(bld, 2)); /* yes, really */
+ q = nir_fmul(bld, af, bf);
+
+ if (is_signed) {
+ q = nir_f2i(bld, q);
+ } else {
+ q = nir_f2u(bld, q);
+ }
+
+ /* get error of first result: */
+ r = nir_imul(bld, q, b);
+ r = nir_isub(bld, a, r);
+ r = nir_u2f(bld, r);
+ r = nir_fmul(bld, r, bf);
+ r = nir_f2u(bld, r);
+
+ /* add quotients: */
+ q = nir_iadd(bld, q, r);
+
+ /* correction: if modulus >= divisor, add 1 */
+ r = nir_imul(bld, q, b);
+ r = nir_isub(bld, a, r);
+
+ r = nir_uge(bld, r, b);
+ r = nir_b2i(bld, r);
+
+ q = nir_iadd(bld, q, r);
+ if (is_signed) {
+ /* fix the sign: */
+ r = nir_ixor(bld, numer, denom);
+ r = nir_ushr(bld, r, nir_imm_int(bld, 31));
+ r = nir_i2b(bld, r);
+ b = nir_ineg(bld, q);
+ q = nir_bcsel(bld, r, b, q);
+ }
+
+ if (op == nir_op_umod) {
+ /* division result in q */
+ r = nir_imul(bld, q, b);
+ q = nir_isub(bld, a, r);
+ }
+
+ assert(alu->dest.dest.is_ssa);
+ nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(q));
+}
+
+static bool
+convert_block(nir_block *block, void *state)
+{
+ nir_builder *b = state;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type == nir_instr_type_alu)
+ convert_instr(b, nir_instr_as_alu(instr));
+ }
+
+ return true;
+}
+
+static void
+convert_impl(nir_function_impl *impl)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ nir_foreach_block(impl, convert_block, &b);
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+}
+
+void
+nir_lower_idiv(nir_shader *shader)
+{
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ convert_impl(function->impl);
+ }
+}
diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c
new file mode 100644
index 00000000000..80c5151f0ea
--- /dev/null
+++ b/src/compiler/nir/nir_lower_io.c
@@ -0,0 +1,350 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott ([email protected])
+ * Jason Ekstrand ([email protected])
+ *
+ */
+
+/*
+ * This lowering pass converts references to input/output variables with
+ * loads/stores to actual input/output intrinsics.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+struct lower_io_state {
+ nir_builder builder;
+ void *mem_ctx;
+ int (*type_size)(const struct glsl_type *type);
+ nir_variable_mode mode;
+};
+
+void
+nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
+ int (*type_size)(const struct glsl_type *))
+{
+ unsigned location = 0;
+
+ nir_foreach_variable(var, var_list) {
+ /*
+ * UBO's have their own address spaces, so don't count them towards the
+ * number of global uniforms
+ */
+ if ((var->data.mode == nir_var_uniform || var->data.mode == nir_var_shader_storage) &&
+ var->interface_type != NULL)
+ continue;
+
+ var->data.driver_location = location;
+ location += type_size(var->type);
+ }
+
+ *size = location;
+}
+
+/**
+ * Returns true if we're processing a stage whose inputs are arrays indexed
+ * by a vertex number (such as geometry shader inputs).
+ */
+static bool
+is_per_vertex_input(struct lower_io_state *state, nir_variable *var)
+{
+ gl_shader_stage stage = state->builder.shader->stage;
+
+ return var->data.mode == nir_var_shader_in && !var->data.patch &&
+ (stage == MESA_SHADER_TESS_CTRL ||
+ stage == MESA_SHADER_TESS_EVAL ||
+ stage == MESA_SHADER_GEOMETRY);
+}
+
+static bool
+is_per_vertex_output(struct lower_io_state *state, nir_variable *var)
+{
+ gl_shader_stage stage = state->builder.shader->stage;
+ return var->data.mode == nir_var_shader_out && !var->data.patch &&
+ stage == MESA_SHADER_TESS_CTRL;
+}
+
+static nir_ssa_def *
+get_io_offset(nir_builder *b, nir_deref_var *deref,
+ nir_ssa_def **vertex_index,
+ int (*type_size)(const struct glsl_type *))
+{
+ nir_deref *tail = &deref->deref;
+
+ /* For per-vertex input arrays (i.e. geometry shader inputs), keep the
+ * outermost array index separate. Process the rest normally.
+ */
+ if (vertex_index != NULL) {
+ tail = tail->child;
+ assert(tail->deref_type == nir_deref_type_array);
+ nir_deref_array *deref_array = nir_deref_as_array(tail);
+
+ nir_ssa_def *vtx = nir_imm_int(b, deref_array->base_offset);
+ if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+ vtx = nir_iadd(b, vtx, nir_ssa_for_src(b, deref_array->indirect, 1));
+ }
+ *vertex_index = vtx;
+ }
+
+ /* Just emit code and let constant-folding go to town */
+ nir_ssa_def *offset = nir_imm_int(b, 0);
+
+ while (tail->child != NULL) {
+ const struct glsl_type *parent_type = tail->type;
+ tail = tail->child;
+
+ if (tail->deref_type == nir_deref_type_array) {
+ nir_deref_array *deref_array = nir_deref_as_array(tail);
+ unsigned size = type_size(tail->type);
+
+ offset = nir_iadd(b, offset,
+ nir_imm_int(b, size * deref_array->base_offset));
+
+ if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+ nir_ssa_def *mul =
+ nir_imul(b, nir_imm_int(b, size),
+ nir_ssa_for_src(b, deref_array->indirect, 1));
+
+ offset = nir_iadd(b, offset, mul);
+ }
+ } else if (tail->deref_type == nir_deref_type_struct) {
+ nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
+
+ unsigned field_offset = 0;
+ for (unsigned i = 0; i < deref_struct->index; i++) {
+ field_offset += type_size(glsl_get_struct_field(parent_type, i));
+ }
+ offset = nir_iadd(b, offset, nir_imm_int(b, field_offset));
+ }
+ }
+
+ return offset;
+}
+
+static nir_intrinsic_op
+load_op(struct lower_io_state *state,
+ nir_variable_mode mode, bool per_vertex)
+{
+ nir_intrinsic_op op;
+ switch (mode) {
+ case nir_var_shader_in:
+ op = per_vertex ? nir_intrinsic_load_per_vertex_input :
+ nir_intrinsic_load_input;
+ break;
+ case nir_var_shader_out:
+ op = per_vertex ? nir_intrinsic_load_per_vertex_output :
+ nir_intrinsic_load_output;
+ break;
+ case nir_var_uniform:
+ op = nir_intrinsic_load_uniform;
+ break;
+ default:
+ unreachable("Unknown variable mode");
+ }
+ return op;
+}
+
+static bool
+nir_lower_io_block(nir_block *block, void *void_state)
+{
+ struct lower_io_state *state = void_state;
+
+ nir_builder *b = &state->builder;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ if (intrin->intrinsic != nir_intrinsic_load_var &&
+ intrin->intrinsic != nir_intrinsic_store_var)
+ continue;
+
+ nir_variable_mode mode = intrin->variables[0]->var->data.mode;
+
+ if (state->mode != nir_var_all && state->mode != mode)
+ continue;
+
+ if (mode != nir_var_shader_in &&
+ mode != nir_var_shader_out &&
+ mode != nir_var_uniform)
+ continue;
+
+ b->cursor = nir_before_instr(instr);
+
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_load_var: {
+ bool per_vertex =
+ is_per_vertex_input(state, intrin->variables[0]->var) ||
+ is_per_vertex_output(state, intrin->variables[0]->var);
+
+ nir_ssa_def *offset;
+ nir_ssa_def *vertex_index;
+
+ offset = get_io_offset(b, intrin->variables[0],
+ per_vertex ? &vertex_index : NULL,
+ state->type_size);
+
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(state->mem_ctx,
+ load_op(state, mode, per_vertex));
+ load->num_components = intrin->num_components;
+
+ load->const_index[0] =
+ intrin->variables[0]->var->data.driver_location;
+
+ if (per_vertex)
+ load->src[0] = nir_src_for_ssa(vertex_index);
+
+ load->src[per_vertex ? 1 : 0] = nir_src_for_ssa(offset);
+
+ if (intrin->dest.is_ssa) {
+ nir_ssa_dest_init(&load->instr, &load->dest,
+ intrin->num_components, NULL);
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+ nir_src_for_ssa(&load->dest.ssa));
+ } else {
+ nir_dest_copy(&load->dest, &intrin->dest, state->mem_ctx);
+ }
+
+ nir_instr_insert_before(&intrin->instr, &load->instr);
+ nir_instr_remove(&intrin->instr);
+ break;
+ }
+
+ case nir_intrinsic_store_var: {
+ assert(mode == nir_var_shader_out);
+
+ nir_ssa_def *offset;
+ nir_ssa_def *vertex_index;
+
+ bool per_vertex =
+ is_per_vertex_output(state, intrin->variables[0]->var);
+
+ offset = get_io_offset(b, intrin->variables[0],
+ per_vertex ? &vertex_index : NULL,
+ state->type_size);
+
+ nir_intrinsic_op store_op =
+ per_vertex ? nir_intrinsic_store_per_vertex_output :
+ nir_intrinsic_store_output;
+
+ nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx,
+ store_op);
+ store->num_components = intrin->num_components;
+
+ nir_src_copy(&store->src[0], &intrin->src[0], store);
+
+ store->const_index[0] =
+ intrin->variables[0]->var->data.driver_location;
+
+ /* Copy the writemask */
+ store->const_index[1] = intrin->const_index[0];
+
+ if (per_vertex)
+ store->src[1] = nir_src_for_ssa(vertex_index);
+
+ store->src[per_vertex ? 2 : 1] = nir_src_for_ssa(offset);
+
+ nir_instr_insert_before(&intrin->instr, &store->instr);
+ nir_instr_remove(&intrin->instr);
+ break;
+ }
+
+ default:
+ break;
+ }
+ }
+
+ return true;
+}
+
+static void
+nir_lower_io_impl(nir_function_impl *impl,
+ nir_variable_mode mode,
+ int (*type_size)(const struct glsl_type *))
+{
+ struct lower_io_state state;
+
+ nir_builder_init(&state.builder, impl);
+ state.mem_ctx = ralloc_parent(impl);
+ state.mode = mode;
+ state.type_size = type_size;
+
+ nir_foreach_block(impl, nir_lower_io_block, &state);
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+}
+
+void
+nir_lower_io(nir_shader *shader, nir_variable_mode mode,
+ int (*type_size)(const struct glsl_type *))
+{
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ nir_lower_io_impl(function->impl, mode, type_size);
+ }
+}
+
+/**
+ * Return the offset soruce for a load/store intrinsic.
+ */
+nir_src *
+nir_get_io_offset_src(nir_intrinsic_instr *instr)
+{
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_input:
+ case nir_intrinsic_load_output:
+ case nir_intrinsic_load_uniform:
+ return &instr->src[0];
+ case nir_intrinsic_load_per_vertex_input:
+ case nir_intrinsic_load_per_vertex_output:
+ case nir_intrinsic_store_output:
+ return &instr->src[1];
+ case nir_intrinsic_store_per_vertex_output:
+ return &instr->src[2];
+ default:
+ return NULL;
+ }
+}
+
+/**
+ * Return the vertex index source for a load/store per_vertex intrinsic.
+ */
+nir_src *
+nir_get_io_vertex_index_src(nir_intrinsic_instr *instr)
+{
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_per_vertex_input:
+ case nir_intrinsic_load_per_vertex_output:
+ return &instr->src[0];
+ case nir_intrinsic_store_per_vertex_output:
+ return &instr->src[1];
+ default:
+ return NULL;
+ }
+}
diff --git a/src/compiler/nir/nir_lower_load_const_to_scalar.c b/src/compiler/nir/nir_lower_load_const_to_scalar.c
new file mode 100644
index 00000000000..1eeed13cbac
--- /dev/null
+++ b/src/compiler/nir/nir_lower_load_const_to_scalar.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright © 2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/macros.h"
+#include "nir.h"
+#include "nir_builder.h"
+
+/** @file nir_lower_load_const_to_scalar.c
+ *
+ * Replaces vector nir_load_const instructions with a series of loads and a
+ * vec[234] to reconstruct the original vector (on the assumption that
+ * nir_lower_alu_to_scalar() will then be used to split it up).
+ *
+ * This gives NIR a chance to CSE more operations on a scalar shader, when the
+ * same value was used in different vector contant loads.
+ */
+
+static void
+lower_load_const_instr_scalar(nir_load_const_instr *lower)
+{
+ if (lower->def.num_components == 1)
+ return;
+
+ nir_builder b;
+ nir_builder_init(&b, nir_cf_node_get_function(&lower->instr.block->cf_node));
+ b.cursor = nir_before_instr(&lower->instr);
+
+ /* Emit the individual loads. */
+ nir_ssa_def *loads[4];
+ for (unsigned i = 0; i < lower->def.num_components; i++) {
+ nir_load_const_instr *load_comp = nir_load_const_instr_create(b.shader, 1);
+ load_comp->value.u[0] = lower->value.u[i];
+ nir_builder_instr_insert(&b, &load_comp->instr);
+ loads[i] = &load_comp->def;
+ }
+
+ /* Batch things back together into a vector. */
+ nir_ssa_def *vec = nir_vec(&b, loads, lower->def.num_components);
+
+ /* Replace the old load with a reference to our reconstructed vector. */
+ nir_ssa_def_rewrite_uses(&lower->def, nir_src_for_ssa(vec));
+ nir_instr_remove(&lower->instr);
+}
+
+static bool
+lower_load_const_to_scalar_block(nir_block *block, void *data)
+{
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type == nir_instr_type_load_const)
+ lower_load_const_instr_scalar(nir_instr_as_load_const(instr));
+ }
+
+ return true;
+}
+
+static void
+nir_lower_load_const_to_scalar_impl(nir_function_impl *impl)
+{
+ nir_foreach_block(impl, lower_load_const_to_scalar_block, NULL);
+}
+
+void
+nir_lower_load_const_to_scalar(nir_shader *shader)
+{
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ nir_lower_load_const_to_scalar_impl(function->impl);
+ }
+}
diff --git a/src/compiler/nir/nir_lower_locals_to_regs.c b/src/compiler/nir/nir_lower_locals_to_regs.c
new file mode 100644
index 00000000000..51b0fa733f2
--- /dev/null
+++ b/src/compiler/nir/nir_lower_locals_to_regs.c
@@ -0,0 +1,396 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand ([email protected])
+ *
+ */
+
+#include "nir.h"
+#include "nir_array.h"
+
+struct locals_to_regs_state {
+ nir_shader *shader;
+ nir_function_impl *impl;
+
+ /* A hash table mapping derefs to registers */
+ struct hash_table *regs_table;
+
+ /* A growing array of derefs that we have encountered. There is exactly
+ * one element of this array per element in the hash table. This is
+ * used to make adding register initialization code deterministic.
+ */
+ nir_array derefs_array;
+
+ bool progress;
+};
+
+/* The following two functions implement a hash and equality check for
+ * variable dreferences. When the hash or equality function encounters an
+ * array, it ignores the offset and whether it is direct or indirect
+ * entirely.
+ */
+static uint32_t
+hash_deref(const void *void_deref)
+{
+ uint32_t hash = _mesa_fnv32_1a_offset_bias;
+
+ const nir_deref_var *deref_var = void_deref;
+ hash = _mesa_fnv32_1a_accumulate(hash, deref_var->var);
+
+ for (const nir_deref *deref = deref_var->deref.child;
+ deref; deref = deref->child) {
+ if (deref->deref_type == nir_deref_type_struct) {
+ const nir_deref_struct *deref_struct = nir_deref_as_struct(deref);
+ hash = _mesa_fnv32_1a_accumulate(hash, deref_struct->index);
+ }
+ }
+
+ return hash;
+}
+
+static bool
+derefs_equal(const void *void_a, const void *void_b)
+{
+ const nir_deref_var *a_var = void_a;
+ const nir_deref_var *b_var = void_b;
+
+ if (a_var->var != b_var->var)
+ return false;
+
+ for (const nir_deref *a = a_var->deref.child, *b = b_var->deref.child;
+ a != NULL; a = a->child, b = b->child) {
+ if (a->deref_type != b->deref_type)
+ return false;
+
+ if (a->deref_type == nir_deref_type_struct) {
+ if (nir_deref_as_struct(a)->index != nir_deref_as_struct(b)->index)
+ return false;
+ }
+ /* Do nothing for arrays. They're all the same. */
+
+ assert((a->child == NULL) == (b->child == NULL));
+ if((a->child == NULL) != (b->child == NULL))
+ return false;
+ }
+
+ return true;
+}
+
+static nir_register *
+get_reg_for_deref(nir_deref_var *deref, struct locals_to_regs_state *state)
+{
+ uint32_t hash = hash_deref(deref);
+
+ struct hash_entry *entry =
+ _mesa_hash_table_search_pre_hashed(state->regs_table, hash, deref);
+ if (entry)
+ return entry->data;
+
+ unsigned array_size = 1;
+ nir_deref *tail = &deref->deref;
+ while (tail->child) {
+ if (tail->child->deref_type == nir_deref_type_array)
+ array_size *= glsl_get_length(tail->type);
+ tail = tail->child;
+ }
+
+ assert(glsl_type_is_vector(tail->type) || glsl_type_is_scalar(tail->type));
+
+ nir_register *reg = nir_local_reg_create(state->impl);
+ reg->num_components = glsl_get_vector_elements(tail->type);
+ reg->num_array_elems = array_size > 1 ? array_size : 0;
+
+ _mesa_hash_table_insert_pre_hashed(state->regs_table, hash, deref, reg);
+ nir_array_add(&state->derefs_array, nir_deref_var *, deref);
+
+ return reg;
+}
+
+static nir_src
+get_deref_reg_src(nir_deref_var *deref, nir_instr *instr,
+ struct locals_to_regs_state *state)
+{
+ nir_src src;
+
+ src.is_ssa = false;
+ src.reg.reg = get_reg_for_deref(deref, state);
+ src.reg.base_offset = 0;
+ src.reg.indirect = NULL;
+
+ /* It is possible for a user to create a shader that has an array with a
+ * single element and then proceed to access it indirectly. Indirectly
+ * accessing a non-array register is not allowed in NIR. In order to
+ * handle this case we just convert it to a direct reference.
+ */
+ if (src.reg.reg->num_array_elems == 0)
+ return src;
+
+ nir_deref *tail = &deref->deref;
+ while (tail->child != NULL) {
+ const struct glsl_type *parent_type = tail->type;
+ tail = tail->child;
+
+ if (tail->deref_type != nir_deref_type_array)
+ continue;
+
+ nir_deref_array *deref_array = nir_deref_as_array(tail);
+
+ src.reg.base_offset *= glsl_get_length(parent_type);
+ src.reg.base_offset += deref_array->base_offset;
+
+ if (src.reg.indirect) {
+ nir_load_const_instr *load_const =
+ nir_load_const_instr_create(state->shader, 1);
+ load_const->value.u[0] = glsl_get_length(parent_type);
+ nir_instr_insert_before(instr, &load_const->instr);
+
+ nir_alu_instr *mul = nir_alu_instr_create(state->shader, nir_op_imul);
+ mul->src[0].src = *src.reg.indirect;
+ mul->src[1].src.is_ssa = true;
+ mul->src[1].src.ssa = &load_const->def;
+ mul->dest.write_mask = 1;
+ nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL);
+ nir_instr_insert_before(instr, &mul->instr);
+
+ src.reg.indirect->is_ssa = true;
+ src.reg.indirect->ssa = &mul->dest.dest.ssa;
+ }
+
+ if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+ if (src.reg.indirect == NULL) {
+ src.reg.indirect = ralloc(state->shader, nir_src);
+ nir_src_copy(src.reg.indirect, &deref_array->indirect,
+ state->shader);
+ } else {
+ nir_alu_instr *add = nir_alu_instr_create(state->shader,
+ nir_op_iadd);
+ add->src[0].src = *src.reg.indirect;
+ nir_src_copy(&add->src[1].src, &deref_array->indirect, add);
+ add->dest.write_mask = 1;
+ nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL);
+ nir_instr_insert_before(instr, &add->instr);
+
+ src.reg.indirect->is_ssa = true;
+ src.reg.indirect->ssa = &add->dest.dest.ssa;
+ }
+ }
+ }
+
+ return src;
+}
+
+static bool
+lower_locals_to_regs_block(nir_block *block, void *void_state)
+{
+ struct locals_to_regs_state *state = void_state;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_load_var: {
+ if (intrin->variables[0]->var->data.mode != nir_var_local)
+ continue;
+
+ nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov);
+ mov->src[0].src = get_deref_reg_src(intrin->variables[0],
+ &intrin->instr, state);
+ mov->dest.write_mask = (1 << intrin->num_components) - 1;
+ if (intrin->dest.is_ssa) {
+ nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
+ intrin->num_components, NULL);
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+ nir_src_for_ssa(&mov->dest.dest.ssa));
+ } else {
+ nir_dest_copy(&mov->dest.dest, &intrin->dest, &mov->instr);
+ }
+ nir_instr_insert_before(&intrin->instr, &mov->instr);
+
+ nir_instr_remove(&intrin->instr);
+ state->progress = true;
+ break;
+ }
+
+ case nir_intrinsic_store_var: {
+ if (intrin->variables[0]->var->data.mode != nir_var_local)
+ continue;
+
+ nir_src reg_src = get_deref_reg_src(intrin->variables[0],
+ &intrin->instr, state);
+
+ nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov);
+ nir_src_copy(&mov->src[0].src, &intrin->src[0], mov);
+ mov->dest.write_mask = intrin->const_index[0];
+ mov->dest.dest.is_ssa = false;
+ mov->dest.dest.reg.reg = reg_src.reg.reg;
+ mov->dest.dest.reg.base_offset = reg_src.reg.base_offset;
+ mov->dest.dest.reg.indirect = reg_src.reg.indirect;
+
+ nir_instr_insert_before(&intrin->instr, &mov->instr);
+
+ nir_instr_remove(&intrin->instr);
+ state->progress = true;
+ break;
+ }
+
+ case nir_intrinsic_copy_var:
+ unreachable("There should be no copies whatsoever at this point");
+ break;
+
+ default:
+ continue;
+ }
+ }
+
+ return true;
+}
+
+static nir_block *
+compute_reg_usedef_lca(nir_register *reg)
+{
+ nir_block *lca = NULL;
+
+ list_for_each_entry(nir_dest, def_dest, &reg->defs, reg.def_link)
+ lca = nir_dominance_lca(lca, def_dest->reg.parent_instr->block);
+
+ list_for_each_entry(nir_src, use_src, &reg->uses, use_link)
+ lca = nir_dominance_lca(lca, use_src->parent_instr->block);
+
+ list_for_each_entry(nir_src, use_src, &reg->if_uses, use_link) {
+ nir_cf_node *prev_node = nir_cf_node_prev(&use_src->parent_if->cf_node);
+ assert(prev_node->type == nir_cf_node_block);
+ lca = nir_dominance_lca(lca, nir_cf_node_as_block(prev_node));
+ }
+
+ return lca;
+}
+
+static void
+insert_constant_initializer(nir_deref_var *deref_head, nir_deref *deref_tail,
+ nir_block *block,
+ struct locals_to_regs_state *state)
+{
+ if (deref_tail->child) {
+ switch (deref_tail->child->deref_type) {
+ case nir_deref_type_array: {
+ unsigned array_elems = glsl_get_length(deref_tail->type);
+
+ nir_deref_array arr_deref;
+ arr_deref.deref = *deref_tail->child;
+ arr_deref.deref_array_type = nir_deref_array_type_direct;
+
+ nir_deref *old_child = deref_tail->child;
+ deref_tail->child = &arr_deref.deref;
+ for (unsigned i = 0; i < array_elems; i++) {
+ arr_deref.base_offset = i;
+ insert_constant_initializer(deref_head, &arr_deref.deref,
+ block, state);
+ }
+ deref_tail->child = old_child;
+ return;
+ }
+
+ case nir_deref_type_struct:
+ insert_constant_initializer(deref_head, deref_tail->child,
+ block, state);
+ return;
+
+ default:
+ unreachable("Invalid deref child type");
+ }
+ }
+
+ assert(deref_tail->child == NULL);
+
+ nir_load_const_instr *load =
+ nir_deref_get_const_initializer_load(state->shader, deref_head);
+ nir_instr_insert_before_block(block, &load->instr);
+
+ nir_src reg_src = get_deref_reg_src(deref_head, &load->instr, state);
+
+ nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov);
+ mov->src[0].src = nir_src_for_ssa(&load->def);
+ mov->dest.write_mask = (1 << load->def.num_components) - 1;
+ mov->dest.dest.is_ssa = false;
+ mov->dest.dest.reg.reg = reg_src.reg.reg;
+ mov->dest.dest.reg.base_offset = reg_src.reg.base_offset;
+ mov->dest.dest.reg.indirect = reg_src.reg.indirect;
+
+ nir_instr_insert_after(&load->instr, &mov->instr);
+ state->progress = true;
+}
+
+static bool
+nir_lower_locals_to_regs_impl(nir_function_impl *impl)
+{
+ struct locals_to_regs_state state;
+
+ state.shader = impl->function->shader;
+ state.impl = impl;
+ state.progress = false;
+ state.regs_table = _mesa_hash_table_create(NULL, hash_deref, derefs_equal);
+ nir_array_init(&state.derefs_array, NULL);
+
+ nir_metadata_require(impl, nir_metadata_dominance);
+
+ nir_foreach_block(impl, lower_locals_to_regs_block, &state);
+
+ nir_array_foreach(&state.derefs_array, nir_deref_var *, deref_ptr) {
+ nir_deref_var *deref = *deref_ptr;
+ struct hash_entry *deref_entry =
+ _mesa_hash_table_search(state.regs_table, deref);
+ assert(deref_entry && deref_entry->key == deref);
+ nir_register *reg = (nir_register *)deref_entry->data;
+
+ if (deref->var->constant_initializer == NULL)
+ continue;
+
+ nir_block *usedef_lca = compute_reg_usedef_lca(reg);
+
+ insert_constant_initializer(deref, &deref->deref, usedef_lca, &state);
+ }
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ nir_array_fini(&state.derefs_array);
+ _mesa_hash_table_destroy(state.regs_table, NULL);
+
+ return state.progress;
+}
+
+bool
+nir_lower_locals_to_regs(nir_shader *shader)
+{
+ bool progress = false;
+
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ progress = nir_lower_locals_to_regs_impl(function->impl) || progress;
+ }
+
+ return progress;
+}
diff --git a/src/compiler/nir/nir_lower_outputs_to_temporaries.c b/src/compiler/nir/nir_lower_outputs_to_temporaries.c
new file mode 100644
index 00000000000..71b06b81fcc
--- /dev/null
+++ b/src/compiler/nir/nir_lower_outputs_to_temporaries.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/*
+ * Implements a pass that lowers output variables to a temporary plus an
+ * output variable with a single copy at each exit point of the shader.
+ * This way the output variable is only ever written.
+ *
+ * Because valid NIR requires that output variables are never read, this
+ * pass is more of a helper for NIR producers and must be run before the
+ * shader is ever validated.
+ */
+
+#include "nir.h"
+
+struct lower_outputs_state {
+ nir_shader *shader;
+ struct exec_list old_outputs;
+};
+
+static void
+emit_output_copies(nir_cursor cursor, struct lower_outputs_state *state)
+{
+ assert(exec_list_length(&state->shader->outputs) ==
+ exec_list_length(&state->old_outputs));
+
+ foreach_two_lists(out_node, &state->shader->outputs,
+ temp_node, &state->old_outputs) {
+ nir_variable *output = exec_node_data(nir_variable, out_node, node);
+ nir_variable *temp = exec_node_data(nir_variable, temp_node, node);
+
+ nir_intrinsic_instr *copy =
+ nir_intrinsic_instr_create(state->shader, nir_intrinsic_copy_var);
+ copy->variables[0] = nir_deref_var_create(copy, output);
+ copy->variables[1] = nir_deref_var_create(copy, temp);
+
+ nir_instr_insert(cursor, &copy->instr);
+ }
+}
+
+static bool
+emit_output_copies_block(nir_block *block, void *state)
+{
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ if (intrin->intrinsic == nir_intrinsic_emit_vertex)
+ emit_output_copies(nir_before_instr(&intrin->instr), state);
+ }
+
+ return true;
+}
+
+void
+nir_lower_outputs_to_temporaries(nir_shader *shader)
+{
+ struct lower_outputs_state state;
+
+ if (shader->stage == MESA_SHADER_TESS_CTRL)
+ return;
+
+ state.shader = shader;
+ exec_list_move_nodes_to(&shader->outputs, &state.old_outputs);
+
+ /* Walk over all of the outputs turn each output into a temporary and
+ * make a new variable for the actual output.
+ */
+ nir_foreach_variable(var, &state.old_outputs) {
+ nir_variable *output = ralloc(shader, nir_variable);
+ memcpy(output, var, sizeof *output);
+
+ /* The orignal is now the temporary */
+ nir_variable *temp = var;
+
+ /* Reparent the name to the new variable */
+ ralloc_steal(output, output->name);
+
+ /* Give the output a new name with @out-temp appended */
+ temp->name = ralloc_asprintf(var, "%s@out-temp", output->name);
+ temp->data.mode = nir_var_global;
+ temp->constant_initializer = NULL;
+
+ exec_list_push_tail(&shader->outputs, &output->node);
+ }
+
+ nir_foreach_function(shader, function) {
+ if (function->impl == NULL)
+ continue;
+
+ if (shader->stage == MESA_SHADER_GEOMETRY) {
+ /* For geometry shaders, we have to emit the output copies right
+ * before each EmitVertex call.
+ */
+ nir_foreach_block(function->impl, emit_output_copies_block, &state);
+ } else if (strcmp(function->name, "main") == 0) {
+ /* For all other shader types, we need to do the copies right before
+ * the jumps to the end block.
+ */
+ struct set_entry *block_entry;
+ set_foreach(function->impl->end_block->predecessors, block_entry) {
+ struct nir_block *block = (void *)block_entry->key;
+ emit_output_copies(nir_after_block_before_jump(block), &state);
+ }
+ }
+
+ nir_metadata_preserve(function->impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+ }
+
+ exec_list_append(&shader->globals, &state.old_outputs);
+}
diff --git a/src/compiler/nir/nir_lower_phis_to_scalar.c b/src/compiler/nir/nir_lower_phis_to_scalar.c
new file mode 100644
index 00000000000..dd2abcf72f8
--- /dev/null
+++ b/src/compiler/nir/nir_lower_phis_to_scalar.c
@@ -0,0 +1,293 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand ([email protected])
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements a pass that lowers vector phi nodes to scalar phi nodes when
+ * we don't think it will hurt anything.
+ */
+
+struct lower_phis_to_scalar_state {
+ void *mem_ctx;
+ void *dead_ctx;
+
+ /* Hash table marking which phi nodes are scalarizable. The key is
+ * pointers to phi instructions and the entry is either NULL for not
+ * scalarizable or non-null for scalarizable.
+ */
+ struct hash_table *phi_table;
+};
+
+static bool
+should_lower_phi(nir_phi_instr *phi, struct lower_phis_to_scalar_state *state);
+
+static bool
+is_phi_src_scalarizable(nir_phi_src *src,
+ struct lower_phis_to_scalar_state *state)
+{
+ /* Don't know what to do with non-ssa sources */
+ if (!src->src.is_ssa)
+ return false;
+
+ nir_instr *src_instr = src->src.ssa->parent_instr;
+ switch (src_instr->type) {
+ case nir_instr_type_alu: {
+ nir_alu_instr *src_alu = nir_instr_as_alu(src_instr);
+
+ /* ALU operations with output_size == 0 should be scalarized. We
+ * will also see a bunch of vecN operations from scalarizing ALU
+ * operations and, since they can easily be copy-propagated, they
+ * are ok too.
+ */
+ return nir_op_infos[src_alu->op].output_size == 0 ||
+ src_alu->op == nir_op_vec2 ||
+ src_alu->op == nir_op_vec3 ||
+ src_alu->op == nir_op_vec4;
+ }
+
+ case nir_instr_type_phi:
+ /* A phi is scalarizable if we're going to lower it */
+ return should_lower_phi(nir_instr_as_phi(src_instr), state);
+
+ case nir_instr_type_load_const:
+ case nir_instr_type_ssa_undef:
+ /* These are trivially scalarizable */
+ return true;
+
+ case nir_instr_type_intrinsic: {
+ nir_intrinsic_instr *src_intrin = nir_instr_as_intrinsic(src_instr);
+
+ switch (src_intrin->intrinsic) {
+ case nir_intrinsic_load_var:
+ return src_intrin->variables[0]->var->data.mode == nir_var_shader_in ||
+ src_intrin->variables[0]->var->data.mode == nir_var_uniform;
+
+ case nir_intrinsic_interp_var_at_centroid:
+ case nir_intrinsic_interp_var_at_sample:
+ case nir_intrinsic_interp_var_at_offset:
+ case nir_intrinsic_load_uniform:
+ case nir_intrinsic_load_ubo:
+ case nir_intrinsic_load_ssbo:
+ case nir_intrinsic_load_input:
+ return true;
+ default:
+ break;
+ }
+ }
+
+ default:
+ /* We can't scalarize this type of instruction */
+ return false;
+ }
+}
+
+/**
+ * Determines if the given phi node should be lowered. The only phi nodes
+ * we will scalarize at the moment are those where all of the sources are
+ * scalarizable.
+ *
+ * The reason for this comes down to coalescing. Since phi sources can't
+ * swizzle, swizzles on phis have to be resolved by inserting a mov right
+ * before the phi. The choice then becomes between movs to pick off
+ * components for a scalar phi or potentially movs to recombine components
+ * for a vector phi. The problem is that the movs generated to pick off
+ * the components are almost uncoalescable. We can't coalesce them in NIR
+ * because we need them to pick off components and we can't coalesce them
+ * in the backend because the source register is a vector and the
+ * destination is a scalar that may be used at other places in the program.
+ * On the other hand, if we have a bunch of scalars going into a vector
+ * phi, the situation is much better. In this case, if the SSA def is
+ * generated in the predecessor block to the corresponding phi source, the
+ * backend code will be an ALU op into a temporary and then a mov into the
+ * given vector component; this move can almost certainly be coalesced
+ * away.
+ */
+static bool
+should_lower_phi(nir_phi_instr *phi, struct lower_phis_to_scalar_state *state)
+{
+ /* Already scalar */
+ if (phi->dest.ssa.num_components == 1)
+ return false;
+
+ struct hash_entry *entry = _mesa_hash_table_search(state->phi_table, phi);
+ if (entry)
+ return entry->data != NULL;
+
+ /* Insert an entry and mark it as scalarizable for now. That way
+ * we don't recurse forever and a cycle in the dependence graph
+ * won't automatically make us fail to scalarize.
+ */
+ entry = _mesa_hash_table_insert(state->phi_table, phi, (void *)(intptr_t)1);
+
+ bool scalarizable = true;
+
+ nir_foreach_phi_src(phi, src) {
+ scalarizable = is_phi_src_scalarizable(src, state);
+ if (!scalarizable)
+ break;
+ }
+
+ /* The hash table entry for 'phi' may have changed while recursing the
+ * dependence graph, so we need to reset it */
+ entry = _mesa_hash_table_search(state->phi_table, phi);
+ assert(entry);
+
+ entry->data = (void *)(intptr_t)scalarizable;
+
+ return scalarizable;
+}
+
+static bool
+lower_phis_to_scalar_block(nir_block *block, void *void_state)
+{
+ struct lower_phis_to_scalar_state *state = void_state;
+
+ /* Find the last phi node in the block */
+ nir_phi_instr *last_phi = NULL;
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_phi)
+ break;
+
+ last_phi = nir_instr_as_phi(instr);
+ }
+
+ /* We have to handle the phi nodes in their own pass due to the way
+ * we're modifying the linked list of instructions.
+ */
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_phi)
+ break;
+
+ nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+ if (!should_lower_phi(phi, state))
+ continue;
+
+ /* Create a vecN operation to combine the results. Most of these
+ * will be redundant, but copy propagation should clean them up for
+ * us. No need to add the complexity here.
+ */
+ nir_op vec_op;
+ switch (phi->dest.ssa.num_components) {
+ case 2: vec_op = nir_op_vec2; break;
+ case 3: vec_op = nir_op_vec3; break;
+ case 4: vec_op = nir_op_vec4; break;
+ default: unreachable("Invalid number of components");
+ }
+
+ nir_alu_instr *vec = nir_alu_instr_create(state->mem_ctx, vec_op);
+ nir_ssa_dest_init(&vec->instr, &vec->dest.dest,
+ phi->dest.ssa.num_components, NULL);
+ vec->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
+
+ for (unsigned i = 0; i < phi->dest.ssa.num_components; i++) {
+ nir_phi_instr *new_phi = nir_phi_instr_create(state->mem_ctx);
+ nir_ssa_dest_init(&new_phi->instr, &new_phi->dest, 1, NULL);
+
+ vec->src[i].src = nir_src_for_ssa(&new_phi->dest.ssa);
+
+ nir_foreach_phi_src(phi, src) {
+ /* We need to insert a mov to grab the i'th component of src */
+ nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx,
+ nir_op_imov);
+ nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, NULL);
+ mov->dest.write_mask = 1;
+ nir_src_copy(&mov->src[0].src, &src->src, state->mem_ctx);
+ mov->src[0].swizzle[0] = i;
+
+ /* Insert at the end of the predecessor but before the jump */
+ nir_instr *pred_last_instr = nir_block_last_instr(src->pred);
+ if (pred_last_instr && pred_last_instr->type == nir_instr_type_jump)
+ nir_instr_insert_before(pred_last_instr, &mov->instr);
+ else
+ nir_instr_insert_after_block(src->pred, &mov->instr);
+
+ nir_phi_src *new_src = ralloc(new_phi, nir_phi_src);
+ new_src->pred = src->pred;
+ new_src->src = nir_src_for_ssa(&mov->dest.dest.ssa);
+
+ exec_list_push_tail(&new_phi->srcs, &new_src->node);
+ }
+
+ nir_instr_insert_before(&phi->instr, &new_phi->instr);
+ }
+
+ nir_instr_insert_after(&last_phi->instr, &vec->instr);
+
+ nir_ssa_def_rewrite_uses(&phi->dest.ssa,
+ nir_src_for_ssa(&vec->dest.dest.ssa));
+
+ ralloc_steal(state->dead_ctx, phi);
+ nir_instr_remove(&phi->instr);
+
+ /* We're using the safe iterator and inserting all the newly
+ * scalarized phi nodes before their non-scalarized version so that's
+ * ok. However, we are also inserting vec operations after all of
+ * the last phi node so once we get here, we can't trust even the
+ * safe iterator to stop properly. We have to break manually.
+ */
+ if (instr == &last_phi->instr)
+ break;
+ }
+
+ return true;
+}
+
+static void
+lower_phis_to_scalar_impl(nir_function_impl *impl)
+{
+ struct lower_phis_to_scalar_state state;
+
+ state.mem_ctx = ralloc_parent(impl);
+ state.dead_ctx = ralloc_context(NULL);
+ state.phi_table = _mesa_hash_table_create(state.dead_ctx, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ nir_foreach_block(impl, lower_phis_to_scalar_block, &state);
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ ralloc_free(state.dead_ctx);
+}
+
+/** A pass that lowers vector phi nodes to scalar
+ *
+ * This pass loops through the blocks and lowers looks for vector phi nodes
+ * it can lower to scalar phi nodes. Not all phi nodes are lowered. For
+ * instance, if one of the sources is a non-scalarizable vector, then we
+ * don't bother lowering because that would generate hard-to-coalesce movs.
+ */
+void
+nir_lower_phis_to_scalar(nir_shader *shader)
+{
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ lower_phis_to_scalar_impl(function->impl);
+ }
+}
diff --git a/src/compiler/nir/nir_lower_samplers.c b/src/compiler/nir/nir_lower_samplers.c
new file mode 100644
index 00000000000..9c912129f09
--- /dev/null
+++ b/src/compiler/nir/nir_lower_samplers.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright (C) 2005-2007 Brian Paul All Rights Reserved.
+ * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "program/hash_table.h"
+#include "glsl/ir_uniform.h"
+
+#include "main/compiler.h"
+#include "main/mtypes.h"
+#include "program/prog_parameter.h"
+#include "program/program.h"
+
+/* Calculate the sampler index based on array indicies and also
+ * calculate the base uniform location for struct members.
+ */
+static void
+calc_sampler_offsets(nir_deref *tail, nir_tex_instr *instr,
+ unsigned *array_elements, nir_ssa_def **indirect,
+ nir_builder *b, unsigned *location)
+{
+ if (tail->child == NULL)
+ return;
+
+ switch (tail->child->deref_type) {
+ case nir_deref_type_array: {
+ nir_deref_array *deref_array = nir_deref_as_array(tail->child);
+
+ assert(deref_array->deref_array_type != nir_deref_array_type_wildcard);
+
+ calc_sampler_offsets(tail->child, instr, array_elements,
+ indirect, b, location);
+ instr->sampler_index += deref_array->base_offset * *array_elements;
+
+ if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+ nir_ssa_def *mul =
+ nir_imul(b, nir_imm_int(b, *array_elements),
+ nir_ssa_for_src(b, deref_array->indirect, 1));
+
+ nir_instr_rewrite_src(&instr->instr, &deref_array->indirect,
+ NIR_SRC_INIT);
+
+ if (*indirect) {
+ *indirect = nir_iadd(b, *indirect, mul);
+ } else {
+ *indirect = mul;
+ }
+ }
+
+ *array_elements *= glsl_get_length(tail->type);
+ break;
+ }
+
+ case nir_deref_type_struct: {
+ nir_deref_struct *deref_struct = nir_deref_as_struct(tail->child);
+ *location += glsl_get_record_location_offset(tail->type, deref_struct->index);
+ calc_sampler_offsets(tail->child, instr, array_elements,
+ indirect, b, location);
+ break;
+ }
+
+ default:
+ unreachable("Invalid deref type");
+ break;
+ }
+}
+
+static void
+lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_program,
+ gl_shader_stage stage, nir_builder *builder)
+{
+ if (instr->sampler == NULL)
+ return;
+
+ instr->sampler_index = 0;
+ unsigned location = instr->sampler->var->data.location;
+ unsigned array_elements = 1;
+ nir_ssa_def *indirect = NULL;
+
+ builder->cursor = nir_before_instr(&instr->instr);
+ calc_sampler_offsets(&instr->sampler->deref, instr, &array_elements,
+ &indirect, builder, &location);
+
+ if (indirect) {
+ /* First, we have to resize the array of texture sources */
+ nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src,
+ instr->num_srcs + 1);
+
+ for (unsigned i = 0; i < instr->num_srcs; i++) {
+ new_srcs[i].src_type = instr->src[i].src_type;
+ nir_instr_move_src(&instr->instr, &new_srcs[i].src,
+ &instr->src[i].src);
+ }
+
+ ralloc_free(instr->src);
+ instr->src = new_srcs;
+
+ /* Now we can go ahead and move the source over to being a
+ * first-class texture source.
+ */
+ instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset;
+ instr->num_srcs++;
+ nir_instr_rewrite_src(&instr->instr,
+ &instr->src[instr->num_srcs - 1].src,
+ nir_src_for_ssa(indirect));
+
+ instr->sampler_array_size = array_elements;
+ }
+
+ if (location > shader_program->NumUniformStorage - 1 ||
+ !shader_program->UniformStorage[location].opaque[stage].active) {
+ assert(!"cannot return a sampler");
+ return;
+ }
+
+ instr->sampler_index +=
+ shader_program->UniformStorage[location].opaque[stage].index;
+
+ instr->sampler = NULL;
+}
+
+typedef struct {
+ nir_builder builder;
+ const struct gl_shader_program *shader_program;
+ gl_shader_stage stage;
+} lower_state;
+
+static bool
+lower_block_cb(nir_block *block, void *_state)
+{
+ lower_state *state = (lower_state *) _state;
+
+ nir_foreach_instr(block, instr) {
+ if (instr->type == nir_instr_type_tex) {
+ nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
+ lower_sampler(tex_instr, state->shader_program, state->stage,
+ &state->builder);
+ }
+ }
+
+ return true;
+}
+
+static void
+lower_impl(nir_function_impl *impl, const struct gl_shader_program *shader_program,
+ gl_shader_stage stage)
+{
+ lower_state state;
+
+ nir_builder_init(&state.builder, impl);
+ state.shader_program = shader_program;
+ state.stage = stage;
+
+ nir_foreach_block(impl, lower_block_cb, &state);
+}
+
+void
+nir_lower_samplers(nir_shader *shader,
+ const struct gl_shader_program *shader_program)
+{
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ lower_impl(function->impl, shader_program, shader->stage);
+ }
+}
diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c
new file mode 100644
index 00000000000..2bd787d3574
--- /dev/null
+++ b/src/compiler/nir/nir_lower_system_values.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott ([email protected])
+ *
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+struct lower_system_values_state {
+ nir_builder builder;
+ bool progress;
+};
+
+static bool
+convert_block(nir_block *block, void *void_state)
+{
+ struct lower_system_values_state *state = void_state;
+
+ nir_builder *b = &state->builder;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *load_var = nir_instr_as_intrinsic(instr);
+
+ if (load_var->intrinsic != nir_intrinsic_load_var)
+ continue;
+
+ nir_variable *var = load_var->variables[0]->var;
+ if (var->data.mode != nir_var_system_value)
+ continue;
+
+ b->cursor = nir_after_instr(&load_var->instr);
+
+ nir_intrinsic_op sysval_op =
+ nir_intrinsic_from_system_value(var->data.location);
+ nir_ssa_def *sysval = nir_load_system_value(b, sysval_op, 0);
+
+ nir_ssa_def_rewrite_uses(&load_var->dest.ssa, nir_src_for_ssa(sysval));
+ nir_instr_remove(&load_var->instr);
+
+ state->progress = true;
+ }
+
+ return true;
+}
+
+static bool
+convert_impl(nir_function_impl *impl)
+{
+ struct lower_system_values_state state;
+
+ state.progress = false;
+ nir_builder_init(&state.builder, impl);
+
+ nir_foreach_block(impl, convert_block, &state);
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+ return state.progress;
+}
+
+bool
+nir_lower_system_values(nir_shader *shader)
+{
+ bool progress = false;
+
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ progress = convert_impl(function->impl) || progress;
+ }
+
+ exec_list_make_empty(&shader->system_values);
+
+ return progress;
+}
diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c
new file mode 100644
index 00000000000..ae24fb2e16a
--- /dev/null
+++ b/src/compiler/nir/nir_lower_tex.c
@@ -0,0 +1,355 @@
+/*
+ * Copyright © 2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/*
+ * This lowering pass supports (as configured via nir_lower_tex_options)
+ * various texture related conversions:
+ * + texture projector lowering: converts the coordinate division for
+ * texture projection to be done in ALU instructions instead of
+ * asking the texture operation to do so.
+ * + lowering RECT: converts the un-normalized RECT texture coordinates
+ * to normalized coordinates with txs plus ALU instructions
+ * + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
+ * inserts instructions to clamp specified coordinates to [0.0, 1.0].
+ * Note that this automatically triggers texture projector lowering if
+ * needed, since clamping must happen after projector lowering.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+typedef struct {
+ nir_builder b;
+ const nir_lower_tex_options *options;
+ bool progress;
+} lower_tex_state;
+
+static void
+project_src(nir_builder *b, nir_tex_instr *tex)
+{
+ /* Find the projector in the srcs list, if present. */
+ unsigned proj_index;
+ for (proj_index = 0; proj_index < tex->num_srcs; proj_index++) {
+ if (tex->src[proj_index].src_type == nir_tex_src_projector)
+ break;
+ }
+ if (proj_index == tex->num_srcs)
+ return;
+
+ b->cursor = nir_before_instr(&tex->instr);
+
+ nir_ssa_def *inv_proj =
+ nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
+
+ /* Walk through the sources projecting the arguments. */
+ for (unsigned i = 0; i < tex->num_srcs; i++) {
+ switch (tex->src[i].src_type) {
+ case nir_tex_src_coord:
+ case nir_tex_src_comparitor:
+ break;
+ default:
+ continue;
+ }
+ nir_ssa_def *unprojected =
+ nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
+ nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
+
+ /* Array indices don't get projected, so make an new vector with the
+ * coordinate's array index untouched.
+ */
+ if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
+ switch (tex->coord_components) {
+ case 4:
+ projected = nir_vec4(b,
+ nir_channel(b, projected, 0),
+ nir_channel(b, projected, 1),
+ nir_channel(b, projected, 2),
+ nir_channel(b, unprojected, 3));
+ break;
+ case 3:
+ projected = nir_vec3(b,
+ nir_channel(b, projected, 0),
+ nir_channel(b, projected, 1),
+ nir_channel(b, unprojected, 2));
+ break;
+ case 2:
+ projected = nir_vec2(b,
+ nir_channel(b, projected, 0),
+ nir_channel(b, unprojected, 1));
+ break;
+ default:
+ unreachable("bad texture coord count for array");
+ break;
+ }
+ }
+
+ nir_instr_rewrite_src(&tex->instr,
+ &tex->src[i].src,
+ nir_src_for_ssa(projected));
+ }
+
+ /* Now move the later tex sources down the array so that the projector
+ * disappears.
+ */
+ nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src,
+ NIR_SRC_INIT);
+ for (unsigned i = proj_index + 1; i < tex->num_srcs; i++) {
+ tex->src[i-1].src_type = tex->src[i].src_type;
+ nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
+ }
+ tex->num_srcs--;
+}
+
+static nir_ssa_def *
+get_texture_size(nir_builder *b, nir_tex_instr *tex)
+{
+ b->cursor = nir_before_instr(&tex->instr);
+
+ /* RECT textures should not be array: */
+ assert(!tex->is_array);
+
+ nir_tex_instr *txs;
+
+ txs = nir_tex_instr_create(b->shader, 1);
+ txs->op = nir_texop_txs;
+ txs->sampler_dim = GLSL_SAMPLER_DIM_RECT;
+ txs->sampler_index = tex->sampler_index;
+ txs->dest_type = nir_type_int;
+
+ /* only single src, the lod: */
+ txs->src[0].src = nir_src_for_ssa(nir_imm_int(b, 0));
+ txs->src[0].src_type = nir_tex_src_lod;
+
+ nir_ssa_dest_init(&txs->instr, &txs->dest, 2, NULL);
+ nir_builder_instr_insert(b, &txs->instr);
+
+ return nir_i2f(b, &txs->dest.ssa);
+}
+
+static void
+lower_rect(nir_builder *b, nir_tex_instr *tex)
+{
+ nir_ssa_def *txs = get_texture_size(b, tex);
+ nir_ssa_def *scale = nir_frcp(b, txs);
+
+ /* Walk through the sources normalizing the requested arguments. */
+ for (unsigned i = 0; i < tex->num_srcs; i++) {
+ if (tex->src[i].src_type != nir_tex_src_coord)
+ continue;
+
+ nir_ssa_def *coords =
+ nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
+ nir_instr_rewrite_src(&tex->instr,
+ &tex->src[i].src,
+ nir_src_for_ssa(nir_fmul(b, coords, scale)));
+ }
+
+ tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
+}
+
+static void
+saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
+{
+ b->cursor = nir_before_instr(&tex->instr);
+
+ /* Walk through the sources saturating the requested arguments. */
+ for (unsigned i = 0; i < tex->num_srcs; i++) {
+ if (tex->src[i].src_type != nir_tex_src_coord)
+ continue;
+
+ nir_ssa_def *src =
+ nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
+
+ /* split src into components: */
+ nir_ssa_def *comp[4];
+
+ for (unsigned j = 0; j < tex->coord_components; j++)
+ comp[j] = nir_channel(b, src, j);
+
+ /* clamp requested components, array index does not get clamped: */
+ unsigned ncomp = tex->coord_components;
+ if (tex->is_array)
+ ncomp--;
+
+ for (unsigned j = 0; j < ncomp; j++) {
+ if ((1 << j) & sat_mask) {
+ if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
+ /* non-normalized texture coords, so clamp to texture
+ * size rather than [0.0, 1.0]
+ */
+ nir_ssa_def *txs = get_texture_size(b, tex);
+ comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0));
+ comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j));
+ } else {
+ comp[j] = nir_fsat(b, comp[j]);
+ }
+ }
+ }
+
+ /* and move the result back into a single vecN: */
+ src = nir_vec(b, comp, tex->coord_components);
+
+ nir_instr_rewrite_src(&tex->instr,
+ &tex->src[i].src,
+ nir_src_for_ssa(src));
+ }
+}
+
+static nir_ssa_def *
+get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
+{
+ nir_const_value v;
+
+ memset(&v, 0, sizeof(v));
+
+ if (swizzle_val == 4) {
+ v.u[0] = v.u[1] = v.u[2] = v.u[3] = 0;
+ } else {
+ assert(swizzle_val == 5);
+ if (type == nir_type_float)
+ v.f[0] = v.f[1] = v.f[2] = v.f[3] = 1.0;
+ else
+ v.u[0] = v.u[1] = v.u[2] = v.u[3] = 1;
+ }
+
+ return nir_build_imm(b, 4, v);
+}
+
+static void
+swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4])
+{
+ assert(tex->dest.is_ssa);
+
+ b->cursor = nir_after_instr(&tex->instr);
+
+ nir_ssa_def *swizzled;
+ if (tex->op == nir_texop_tg4) {
+ if (swizzle[tex->component] < 4) {
+ /* This one's easy */
+ tex->component = swizzle[tex->component];
+ return;
+ } else {
+ swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]);
+ }
+ } else {
+ assert(nir_tex_instr_dest_size(tex) == 4);
+ if (swizzle[0] < 4 && swizzle[1] < 4 &&
+ swizzle[2] < 4 && swizzle[3] < 4) {
+ unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] };
+ /* We have no 0's or 1's, just emit a swizzling MOV */
+ swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4, false);
+ } else {
+ nir_ssa_def *srcs[4];
+ for (unsigned i = 0; i < 4; i++) {
+ if (swizzle[i] < 4) {
+ srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]);
+ } else {
+ srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]);
+ }
+ }
+ swizzled = nir_vec(b, srcs, 4);
+ }
+ }
+
+ nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled),
+ swizzled->parent_instr);
+}
+
+static bool
+nir_lower_tex_block(nir_block *block, void *void_state)
+{
+ lower_tex_state *state = void_state;
+ nir_builder *b = &state->b;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_tex)
+ continue;
+
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ bool lower_txp = !!(state->options->lower_txp & (1 << tex->sampler_dim));
+
+ /* mask of src coords to saturate (clamp): */
+ unsigned sat_mask = 0;
+
+ if ((1 << tex->sampler_index) & state->options->saturate_r)
+ sat_mask |= (1 << 2); /* .z */
+ if ((1 << tex->sampler_index) & state->options->saturate_t)
+ sat_mask |= (1 << 1); /* .y */
+ if ((1 << tex->sampler_index) & state->options->saturate_s)
+ sat_mask |= (1 << 0); /* .x */
+
+ /* If we are clamping any coords, we must lower projector first
+ * as clamping happens *after* projection:
+ */
+ if (lower_txp || sat_mask) {
+ project_src(b, tex);
+ state->progress = true;
+ }
+
+ if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) &&
+ state->options->lower_rect) {
+ lower_rect(b, tex);
+ state->progress = true;
+ }
+
+ if (sat_mask) {
+ saturate_src(b, tex, sat_mask);
+ state->progress = true;
+ }
+
+ if (((1 << tex->sampler_index) & state->options->swizzle_result) &&
+ !nir_tex_instr_is_query(tex) &&
+ !(tex->is_shadow && tex->is_new_style_shadow)) {
+ swizzle_result(b, tex, state->options->swizzles[tex->sampler_index]);
+ state->progress = true;
+ }
+ }
+
+ return true;
+}
+
+static void
+nir_lower_tex_impl(nir_function_impl *impl, lower_tex_state *state)
+{
+ nir_builder_init(&state->b, impl);
+
+ nir_foreach_block(impl, nir_lower_tex_block, state);
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+}
+
+bool
+nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
+{
+ lower_tex_state state;
+ state.options = options;
+ state.progress = false;
+
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ nir_lower_tex_impl(function->impl, &state);
+ }
+
+ return state.progress;
+}
diff --git a/src/compiler/nir/nir_lower_to_source_mods.c b/src/compiler/nir/nir_lower_to_source_mods.c
new file mode 100644
index 00000000000..6c4e1f0d3f3
--- /dev/null
+++ b/src/compiler/nir/nir_lower_to_source_mods.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand ([email protected])
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * This pass lowers the neg, abs, and sat operations to source modifiers on
+ * ALU operations to make things nicer for the backend. It's just much
+ * easier to not have them when we're doing optimizations.
+ */
+
+static bool
+nir_lower_to_source_mods_block(nir_block *block, void *state)
+{
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_alu)
+ continue;
+
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+
+ for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
+ if (!alu->src[i].src.is_ssa)
+ continue;
+
+ if (alu->src[i].src.ssa->parent_instr->type != nir_instr_type_alu)
+ continue;
+
+ nir_alu_instr *parent = nir_instr_as_alu(alu->src[i].src.ssa->parent_instr);
+
+ if (parent->dest.saturate)
+ continue;
+
+ switch (nir_op_infos[alu->op].input_types[i]) {
+ case nir_type_float:
+ if (parent->op != nir_op_fmov)
+ continue;
+ break;
+ case nir_type_int:
+ if (parent->op != nir_op_imov)
+ continue;
+ break;
+ default:
+ continue;
+ }
+
+ /* We can only do a rewrite if the source we are copying is SSA.
+ * Otherwise, moving the read might invalidly reorder reads/writes
+ * on a register.
+ */
+ if (!parent->src[0].src.is_ssa)
+ continue;
+
+ nir_instr_rewrite_src(instr, &alu->src[i].src, parent->src[0].src);
+ if (alu->src[i].abs) {
+ /* abs trumps both neg and abs, do nothing */
+ } else {
+ alu->src[i].negate = (alu->src[i].negate != parent->src[0].negate);
+ alu->src[i].abs |= parent->src[0].abs;
+ }
+
+ for (int j = 0; j < 4; ++j) {
+ if (!nir_alu_instr_channel_used(alu, i, j))
+ continue;
+ alu->src[i].swizzle[j] = parent->src[0].swizzle[alu->src[i].swizzle[j]];
+ }
+
+ if (list_empty(&parent->dest.dest.ssa.uses) &&
+ list_empty(&parent->dest.dest.ssa.if_uses))
+ nir_instr_remove(&parent->instr);
+ }
+
+ switch (alu->op) {
+ case nir_op_fsat:
+ alu->op = nir_op_fmov;
+ alu->dest.saturate = true;
+ break;
+ case nir_op_ineg:
+ alu->op = nir_op_imov;
+ alu->src[0].negate = !alu->src[0].negate;
+ break;
+ case nir_op_fneg:
+ alu->op = nir_op_fmov;
+ alu->src[0].negate = !alu->src[0].negate;
+ break;
+ case nir_op_iabs:
+ alu->op = nir_op_imov;
+ alu->src[0].abs = true;
+ alu->src[0].negate = false;
+ break;
+ case nir_op_fabs:
+ alu->op = nir_op_fmov;
+ alu->src[0].abs = true;
+ alu->src[0].negate = false;
+ break;
+ default:
+ break;
+ }
+
+ /* We've covered sources. Now we're going to try and saturate the
+ * destination if we can.
+ */
+
+ if (!alu->dest.dest.is_ssa)
+ continue;
+
+ /* We can only saturate float destinations */
+ if (nir_op_infos[alu->op].output_type != nir_type_float)
+ continue;
+
+ if (!list_empty(&alu->dest.dest.ssa.if_uses))
+ continue;
+
+ bool all_children_are_sat = true;
+ nir_foreach_use(&alu->dest.dest.ssa, child_src) {
+ assert(child_src->is_ssa);
+ nir_instr *child = child_src->parent_instr;
+ if (child->type != nir_instr_type_alu) {
+ all_children_are_sat = false;
+ continue;
+ }
+
+ nir_alu_instr *child_alu = nir_instr_as_alu(child);
+ if (child_alu->src[0].negate || child_alu->src[0].abs) {
+ all_children_are_sat = false;
+ continue;
+ }
+
+ if (child_alu->op != nir_op_fsat &&
+ !(child_alu->op == nir_op_fmov && child_alu->dest.saturate)) {
+ all_children_are_sat = false;
+ continue;
+ }
+ }
+
+ if (!all_children_are_sat)
+ continue;
+
+ alu->dest.saturate = true;
+
+ nir_foreach_use(&alu->dest.dest.ssa, child_src) {
+ assert(child_src->is_ssa);
+ nir_instr *child = child_src->parent_instr;
+ assert(child->type == nir_instr_type_alu);
+ nir_alu_instr *child_alu = nir_instr_as_alu(child);
+
+ child_alu->op = nir_op_fmov;
+ child_alu->dest.saturate = false;
+ /* We could propagate the dest of our instruction to the
+ * destinations of the uses here. However, one quick round of
+ * copy propagation will clean that all up and then we don't have
+ * the complexity.
+ */
+ }
+ }
+
+ return true;
+}
+
+static void
+nir_lower_to_source_mods_impl(nir_function_impl *impl)
+{
+ nir_foreach_block(impl, nir_lower_to_source_mods_block, NULL);
+}
+
+void
+nir_lower_to_source_mods(nir_shader *shader)
+{
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ nir_lower_to_source_mods_impl(function->impl);
+ }
+}
diff --git a/src/compiler/nir/nir_lower_two_sided_color.c b/src/compiler/nir/nir_lower_two_sided_color.c
new file mode 100644
index 00000000000..1294cb89004
--- /dev/null
+++ b/src/compiler/nir/nir_lower_two_sided_color.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <[email protected]>
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+#define MAX_COLORS 2 /* VARYING_SLOT_COL0/COL1 */
+
+typedef struct {
+ nir_builder b;
+ nir_shader *shader;
+ nir_variable *face;
+ struct {
+ nir_variable *front; /* COLn */
+ nir_variable *back; /* BFCn */
+ } colors[MAX_COLORS];
+ int colors_count;
+} lower_2side_state;
+
+
+/* Lowering pass for fragment shaders to emulated two-sided-color. For
+ * each COLOR input, a corresponding BCOLOR input is created, and bcsel
+ * instruction used to select front or back color based on FACE.
+ */
+
+static nir_variable *
+create_input(nir_shader *shader, unsigned drvloc, gl_varying_slot slot)
+{
+ nir_variable *var = rzalloc(shader, nir_variable);
+
+ var->data.driver_location = drvloc;
+ var->type = glsl_vec4_type();
+ var->data.mode = nir_var_shader_in;
+ var->name = ralloc_asprintf(var, "in_%d", drvloc);
+ var->data.index = 0;
+ var->data.location = slot;
+
+ exec_list_push_tail(&shader->inputs, &var->node);
+
+ shader->num_inputs++; /* TODO use type_size() */
+
+ return var;
+}
+
+static nir_ssa_def *
+load_input(nir_builder *b, nir_variable *in)
+{
+ nir_intrinsic_instr *load;
+
+ load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
+ load->num_components = 4;
+ load->const_index[0] = in->data.driver_location;
+ load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
+ nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
+ nir_builder_instr_insert(b, &load->instr);
+
+ return &load->dest.ssa;
+}
+
+static int
+setup_inputs(lower_2side_state *state)
+{
+ int maxloc = -1;
+
+ /* find color/face inputs: */
+ nir_foreach_variable(var, &state->shader->inputs) {
+ int loc = var->data.driver_location;
+
+ /* keep track of last used driver-location.. we'll be
+ * appending BCLr/FACE after last existing input:
+ */
+ maxloc = MAX2(maxloc, loc);
+
+ switch (var->data.location) {
+ case VARYING_SLOT_COL0:
+ case VARYING_SLOT_COL1:
+ assert(state->colors_count < ARRAY_SIZE(state->colors));
+ state->colors[state->colors_count].front = var;
+ state->colors_count++;
+ break;
+ case VARYING_SLOT_FACE:
+ state->face = var;
+ break;
+ }
+ }
+
+ /* if we don't have any color inputs, nothing to do: */
+ if (state->colors_count == 0)
+ return -1;
+
+ /* if we don't already have one, insert a FACE input: */
+ if (!state->face) {
+ state->face = create_input(state->shader, ++maxloc, VARYING_SLOT_FACE);
+ state->face->data.interpolation = INTERP_QUALIFIER_FLAT;
+ }
+
+ /* add required back-face color inputs: */
+ for (int i = 0; i < state->colors_count; i++) {
+ gl_varying_slot slot;
+
+ if (state->colors[i].front->data.location == VARYING_SLOT_COL0)
+ slot = VARYING_SLOT_BFC0;
+ else
+ slot = VARYING_SLOT_BFC1;
+
+ state->colors[i].back = create_input(state->shader, ++maxloc, slot);
+ }
+
+ return 0;
+}
+
+static bool
+nir_lower_two_sided_color_block(nir_block *block, void *void_state)
+{
+ lower_2side_state *state = void_state;
+ nir_builder *b = &state->b;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+ if (intr->intrinsic != nir_intrinsic_load_input)
+ continue;
+
+ int idx;
+ for (idx = 0; idx < state->colors_count; idx++) {
+ unsigned drvloc =
+ state->colors[idx].front->data.driver_location;
+ if (intr->const_index[0] == drvloc) {
+ assert(nir_src_as_const_value(intr->src[0]));
+ break;
+ }
+ }
+
+ if (idx == state->colors_count)
+ continue;
+
+ /* replace load_input(COLn) with
+ * bcsel(load_input(FACE), load_input(COLn), load_input(BFCn))
+ */
+ b->cursor = nir_before_instr(&intr->instr);
+ nir_ssa_def *face = nir_channel(b, load_input(b, state->face), 0);
+ nir_ssa_def *front = load_input(b, state->colors[idx].front);
+ nir_ssa_def *back = load_input(b, state->colors[idx].back);
+ nir_ssa_def *cond = nir_flt(b, face, nir_imm_float(b, 0.0));
+ nir_ssa_def *color = nir_bcsel(b, cond, back, front);
+
+ assert(intr->dest.is_ssa);
+ nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(color));
+ }
+
+ return true;
+}
+
+static void
+nir_lower_two_sided_color_impl(nir_function_impl *impl,
+ lower_2side_state *state)
+{
+ nir_builder *b = &state->b;
+
+ nir_builder_init(b, impl);
+
+ nir_foreach_block(impl, nir_lower_two_sided_color_block, state);
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+}
+
+void
+nir_lower_two_sided_color(nir_shader *shader)
+{
+ lower_2side_state state = {
+ .shader = shader,
+ };
+
+ if (shader->stage != MESA_SHADER_FRAGMENT)
+ return;
+
+ if (setup_inputs(&state) != 0)
+ return;
+
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ nir_lower_two_sided_color_impl(function->impl, &state);
+ }
+
+}
diff --git a/src/compiler/nir/nir_lower_var_copies.c b/src/compiler/nir/nir_lower_var_copies.c
new file mode 100644
index 00000000000..8cb3edd0a84
--- /dev/null
+++ b/src/compiler/nir/nir_lower_var_copies.c
@@ -0,0 +1,190 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand ([email protected])
+ *
+ */
+
+#include "nir.h"
+#include "compiler/nir_types.h"
+
+/*
+ * Lowers all copy intrinsics to sequences of load/store intrinsics.
+ */
+
+/* Walks down the deref chain and returns the next deref in the chain whose
+ * child is a wildcard. In other words, given the chain a[1].foo[*].bar,
+ * this function will return the deref to foo. Calling it a second time
+ * with the [*].bar, it will return NULL.
+ */
+static nir_deref *
+deref_next_wildcard_parent(nir_deref *deref)
+{
+ for (nir_deref *tail = deref; tail->child; tail = tail->child) {
+ if (tail->child->deref_type != nir_deref_type_array)
+ continue;
+
+ nir_deref_array *arr = nir_deref_as_array(tail->child);
+
+ if (arr->deref_array_type == nir_deref_array_type_wildcard)
+ return tail;
+ }
+
+ return NULL;
+}
+
+/* This function recursively walks the given deref chain and replaces the
+ * given copy instruction with an equivalent sequence load/store
+ * operations.
+ *
+ * @copy_instr The copy instruction to replace; new instructions will be
+ * inserted before this one
+ *
+ * @dest_head The head of the destination variable deref chain
+ *
+ * @src_head The head of the source variable deref chain
+ *
+ * @dest_tail The current tail of the destination variable deref chain;
+ * this is used for recursion and external callers of this
+ * function should call it with tail == head
+ *
+ * @src_tail The current tail of the source variable deref chain;
+ * this is used for recursion and external callers of this
+ * function should call it with tail == head
+ *
+ * @state The current variable lowering state
+ */
+static void
+emit_copy_load_store(nir_intrinsic_instr *copy_instr,
+ nir_deref_var *dest_head, nir_deref_var *src_head,
+ nir_deref *dest_tail, nir_deref *src_tail, void *mem_ctx)
+{
+ /* Find the next pair of wildcards */
+ nir_deref *src_arr_parent = deref_next_wildcard_parent(src_tail);
+ nir_deref *dest_arr_parent = deref_next_wildcard_parent(dest_tail);
+
+ if (src_arr_parent || dest_arr_parent) {
+ /* Wildcards had better come in matched pairs */
+ assert(dest_arr_parent && dest_arr_parent);
+
+ nir_deref_array *src_arr = nir_deref_as_array(src_arr_parent->child);
+ nir_deref_array *dest_arr = nir_deref_as_array(dest_arr_parent->child);
+
+ unsigned length = glsl_get_length(src_arr_parent->type);
+ /* The wildcards should represent the same number of elements */
+ assert(length == glsl_get_length(dest_arr_parent->type));
+ assert(length > 0);
+
+ /* Walk over all of the elements that this wildcard refers to and
+ * call emit_copy_load_store on each one of them */
+ src_arr->deref_array_type = nir_deref_array_type_direct;
+ dest_arr->deref_array_type = nir_deref_array_type_direct;
+ for (unsigned i = 0; i < length; i++) {
+ src_arr->base_offset = i;
+ dest_arr->base_offset = i;
+ emit_copy_load_store(copy_instr, dest_head, src_head,
+ &dest_arr->deref, &src_arr->deref, mem_ctx);
+ }
+ src_arr->deref_array_type = nir_deref_array_type_wildcard;
+ dest_arr->deref_array_type = nir_deref_array_type_wildcard;
+ } else {
+ /* In this case, we have no wildcards anymore, so all we have to do
+ * is just emit the load and store operations. */
+ src_tail = nir_deref_tail(src_tail);
+ dest_tail = nir_deref_tail(dest_tail);
+
+ assert(src_tail->type == dest_tail->type);
+
+ unsigned num_components = glsl_get_vector_elements(src_tail->type);
+
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_load_var);
+ load->num_components = num_components;
+ load->variables[0] = nir_deref_as_var(nir_copy_deref(load, &src_head->deref));
+ nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
+
+ nir_instr_insert_before(&copy_instr->instr, &load->instr);
+
+ nir_intrinsic_instr *store =
+ nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_store_var);
+ store->num_components = num_components;
+ store->const_index[0] = (1 << num_components) - 1;
+ store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest_head->deref));
+
+ store->src[0].is_ssa = true;
+ store->src[0].ssa = &load->dest.ssa;
+
+ nir_instr_insert_before(&copy_instr->instr, &store->instr);
+ }
+}
+
+/* Lowers a copy instruction to a sequence of load/store instructions
+ *
+ * The new instructions are placed before the copy instruction in the IR.
+ */
+void
+nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx)
+{
+ assert(copy->intrinsic == nir_intrinsic_copy_var);
+ emit_copy_load_store(copy, copy->variables[0], copy->variables[1],
+ &copy->variables[0]->deref,
+ &copy->variables[1]->deref, mem_ctx);
+}
+
+static bool
+lower_var_copies_block(nir_block *block, void *mem_ctx)
+{
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr);
+ if (copy->intrinsic != nir_intrinsic_copy_var)
+ continue;
+
+ nir_lower_var_copy_instr(copy, mem_ctx);
+
+ nir_instr_remove(&copy->instr);
+ ralloc_free(copy);
+ }
+
+ return true;
+}
+
+static void
+lower_var_copies_impl(nir_function_impl *impl)
+{
+ nir_foreach_block(impl, lower_var_copies_block, ralloc_parent(impl));
+}
+
+/* Lowers every copy_var instruction in the program to a sequence of
+ * load/store instructions.
+ */
+void
+nir_lower_var_copies(nir_shader *shader)
+{
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ lower_var_copies_impl(function->impl);
+ }
+}
diff --git a/src/compiler/nir/nir_lower_vars_to_ssa.c b/src/compiler/nir/nir_lower_vars_to_ssa.c
new file mode 100644
index 00000000000..75d31ff60af
--- /dev/null
+++ b/src/compiler/nir/nir_lower_vars_to_ssa.c
@@ -0,0 +1,973 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand ([email protected])
+ *
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "nir_vla.h"
+
+
+struct deref_node {
+ struct deref_node *parent;
+ const struct glsl_type *type;
+
+ bool lower_to_ssa;
+
+ /* Only valid for things that end up in the direct list.
+ * Note that multiple nir_deref_vars may correspond to this node, but they
+ * will all be equivalent, so any is as good as the other.
+ */
+ nir_deref_var *deref;
+ struct exec_node direct_derefs_link;
+
+ struct set *loads;
+ struct set *stores;
+ struct set *copies;
+
+ nir_ssa_def **def_stack;
+ nir_ssa_def **def_stack_tail;
+
+ struct deref_node *wildcard;
+ struct deref_node *indirect;
+ struct deref_node *children[0];
+};
+
+struct lower_variables_state {
+ nir_shader *shader;
+ void *dead_ctx;
+ nir_function_impl *impl;
+
+ /* A hash table mapping variables to deref_node data */
+ struct hash_table *deref_var_nodes;
+
+ /* A hash table mapping fully-qualified direct dereferences, i.e.
+ * dereferences with no indirect or wildcard array dereferences, to
+ * deref_node data.
+ *
+ * At the moment, we only lower loads, stores, and copies that can be
+ * trivially lowered to loads and stores, i.e. copies with no indirects
+ * and no wildcards. If a part of a variable that is being loaded from
+ * and/or stored into is also involved in a copy operation with
+ * wildcards, then we lower that copy operation to loads and stores, but
+ * otherwise we leave copies with wildcards alone. Since the only derefs
+ * used in these loads, stores, and trivial copies are ones with no
+ * wildcards and no indirects, these are precisely the derefs that we
+ * can actually consider lowering.
+ */
+ struct exec_list direct_deref_nodes;
+
+ /* Controls whether get_deref_node will add variables to the
+ * direct_deref_nodes table. This is turned on when we are initially
+ * scanning for load/store instructions. It is then turned off so we
+ * don't accidentally change the direct_deref_nodes table while we're
+ * iterating throug it.
+ */
+ bool add_to_direct_deref_nodes;
+
+ /* A hash table mapping phi nodes to deref_state data */
+ struct hash_table *phi_table;
+};
+
+static struct deref_node *
+deref_node_create(struct deref_node *parent,
+ const struct glsl_type *type, nir_shader *shader)
+{
+ size_t size = sizeof(struct deref_node) +
+ glsl_get_length(type) * sizeof(struct deref_node *);
+
+ struct deref_node *node = rzalloc_size(shader, size);
+ node->type = type;
+ node->parent = parent;
+ node->deref = NULL;
+ exec_node_init(&node->direct_derefs_link);
+
+ return node;
+}
+
+/* Returns the deref node associated with the given variable. This will be
+ * the root of the tree representing all of the derefs of the given variable.
+ */
+static struct deref_node *
+get_deref_node_for_var(nir_variable *var, struct lower_variables_state *state)
+{
+ struct deref_node *node;
+
+ struct hash_entry *var_entry =
+ _mesa_hash_table_search(state->deref_var_nodes, var);
+
+ if (var_entry) {
+ return var_entry->data;
+ } else {
+ node = deref_node_create(NULL, var->type, state->dead_ctx);
+ _mesa_hash_table_insert(state->deref_var_nodes, var, node);
+ return node;
+ }
+}
+
+/* Gets the deref_node for the given deref chain and creates it if it
+ * doesn't yet exist. If the deref is fully-qualified and direct and
+ * state->add_to_direct_deref_nodes is true, it will be added to the hash
+ * table of of fully-qualified direct derefs.
+ */
+static struct deref_node *
+get_deref_node(nir_deref_var *deref, struct lower_variables_state *state)
+{
+ bool is_direct = true;
+
+ /* Start at the base of the chain. */
+ struct deref_node *node = get_deref_node_for_var(deref->var, state);
+ assert(deref->deref.type == node->type);
+
+ for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) {
+ switch (tail->deref_type) {
+ case nir_deref_type_struct: {
+ nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
+
+ assert(deref_struct->index < glsl_get_length(node->type));
+
+ if (node->children[deref_struct->index] == NULL)
+ node->children[deref_struct->index] =
+ deref_node_create(node, tail->type, state->dead_ctx);
+
+ node = node->children[deref_struct->index];
+ break;
+ }
+
+ case nir_deref_type_array: {
+ nir_deref_array *arr = nir_deref_as_array(tail);
+
+ switch (arr->deref_array_type) {
+ case nir_deref_array_type_direct:
+ /* This is possible if a loop unrolls and generates an
+ * out-of-bounds offset. We need to handle this at least
+ * somewhat gracefully.
+ */
+ if (arr->base_offset >= glsl_get_length(node->type))
+ return NULL;
+
+ if (node->children[arr->base_offset] == NULL)
+ node->children[arr->base_offset] =
+ deref_node_create(node, tail->type, state->dead_ctx);
+
+ node = node->children[arr->base_offset];
+ break;
+
+ case nir_deref_array_type_indirect:
+ if (node->indirect == NULL)
+ node->indirect = deref_node_create(node, tail->type,
+ state->dead_ctx);
+
+ node = node->indirect;
+ is_direct = false;
+ break;
+
+ case nir_deref_array_type_wildcard:
+ if (node->wildcard == NULL)
+ node->wildcard = deref_node_create(node, tail->type,
+ state->dead_ctx);
+
+ node = node->wildcard;
+ is_direct = false;
+ break;
+
+ default:
+ unreachable("Invalid array deref type");
+ }
+ break;
+ }
+ default:
+ unreachable("Invalid deref type");
+ }
+ }
+
+ assert(node);
+
+ /* Only insert if it isn't already in the list. */
+ if (is_direct && state->add_to_direct_deref_nodes &&
+ node->direct_derefs_link.next == NULL) {
+ node->deref = deref;
+ assert(deref->var != NULL);
+ exec_list_push_tail(&state->direct_deref_nodes,
+ &node->direct_derefs_link);
+ }
+
+ return node;
+}
+
+/* \sa foreach_deref_node_match */
+static bool
+foreach_deref_node_worker(struct deref_node *node, nir_deref *deref,
+ bool (* cb)(struct deref_node *node,
+ struct lower_variables_state *state),
+ struct lower_variables_state *state)
+{
+ if (deref->child == NULL) {
+ return cb(node, state);
+ } else {
+ switch (deref->child->deref_type) {
+ case nir_deref_type_array: {
+ nir_deref_array *arr = nir_deref_as_array(deref->child);
+ assert(arr->deref_array_type == nir_deref_array_type_direct);
+ if (node->children[arr->base_offset] &&
+ !foreach_deref_node_worker(node->children[arr->base_offset],
+ deref->child, cb, state))
+ return false;
+
+ if (node->wildcard &&
+ !foreach_deref_node_worker(node->wildcard,
+ deref->child, cb, state))
+ return false;
+
+ return true;
+ }
+
+ case nir_deref_type_struct: {
+ nir_deref_struct *str = nir_deref_as_struct(deref->child);
+ return foreach_deref_node_worker(node->children[str->index],
+ deref->child, cb, state);
+ }
+
+ default:
+ unreachable("Invalid deref child type");
+ }
+ }
+}
+
+/* Walks over every "matching" deref_node and calls the callback. A node
+ * is considered to "match" if either refers to that deref or matches up t
+ * a wildcard. In other words, the following would match a[6].foo[3].bar:
+ *
+ * a[6].foo[3].bar
+ * a[*].foo[3].bar
+ * a[6].foo[*].bar
+ * a[*].foo[*].bar
+ *
+ * The given deref must be a full-length and fully qualified (no wildcards
+ * or indirects) deref chain.
+ */
+static bool
+foreach_deref_node_match(nir_deref_var *deref,
+ bool (* cb)(struct deref_node *node,
+ struct lower_variables_state *state),
+ struct lower_variables_state *state)
+{
+ nir_deref_var var_deref = *deref;
+ var_deref.deref.child = NULL;
+ struct deref_node *node = get_deref_node(&var_deref, state);
+
+ if (node == NULL)
+ return false;
+
+ return foreach_deref_node_worker(node, &deref->deref, cb, state);
+}
+
+/* \sa deref_may_be_aliased */
+static bool
+deref_may_be_aliased_node(struct deref_node *node, nir_deref *deref,
+ struct lower_variables_state *state)
+{
+ if (deref->child == NULL) {
+ return false;
+ } else {
+ switch (deref->child->deref_type) {
+ case nir_deref_type_array: {
+ nir_deref_array *arr = nir_deref_as_array(deref->child);
+ if (arr->deref_array_type == nir_deref_array_type_indirect)
+ return true;
+
+ /* If there is an indirect at this level, we're aliased. */
+ if (node->indirect)
+ return true;
+
+ assert(arr->deref_array_type == nir_deref_array_type_direct);
+
+ if (node->children[arr->base_offset] &&
+ deref_may_be_aliased_node(node->children[arr->base_offset],
+ deref->child, state))
+ return true;
+
+ if (node->wildcard &&
+ deref_may_be_aliased_node(node->wildcard, deref->child, state))
+ return true;
+
+ return false;
+ }
+
+ case nir_deref_type_struct: {
+ nir_deref_struct *str = nir_deref_as_struct(deref->child);
+ if (node->children[str->index]) {
+ return deref_may_be_aliased_node(node->children[str->index],
+ deref->child, state);
+ } else {
+ return false;
+ }
+ }
+
+ default:
+ unreachable("Invalid nir_deref child type");
+ }
+ }
+}
+
+/* Returns true if there are no indirects that can ever touch this deref.
+ *
+ * For example, if the given deref is a[6].foo, then any uses of a[i].foo
+ * would cause this to return false, but a[i].bar would not affect it
+ * because it's a different structure member. A var_copy involving of
+ * a[*].bar also doesn't affect it because that can be lowered to entirely
+ * direct load/stores.
+ *
+ * We only support asking this question about fully-qualified derefs.
+ * Obviously, it's pointless to ask this about indirects, but we also
+ * rule-out wildcards. Handling Wildcard dereferences would involve
+ * checking each array index to make sure that there aren't any indirect
+ * references.
+ */
+static bool
+deref_may_be_aliased(nir_deref_var *deref,
+ struct lower_variables_state *state)
+{
+ return deref_may_be_aliased_node(get_deref_node_for_var(deref->var, state),
+ &deref->deref, state);
+}
+
+static void
+register_load_instr(nir_intrinsic_instr *load_instr,
+ struct lower_variables_state *state)
+{
+ struct deref_node *node = get_deref_node(load_instr->variables[0], state);
+ if (node == NULL)
+ return;
+
+ if (node->loads == NULL)
+ node->loads = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ _mesa_set_add(node->loads, load_instr);
+}
+
+static void
+register_store_instr(nir_intrinsic_instr *store_instr,
+ struct lower_variables_state *state)
+{
+ struct deref_node *node = get_deref_node(store_instr->variables[0], state);
+ if (node == NULL)
+ return;
+
+ if (node->stores == NULL)
+ node->stores = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ _mesa_set_add(node->stores, store_instr);
+}
+
+static void
+register_copy_instr(nir_intrinsic_instr *copy_instr,
+ struct lower_variables_state *state)
+{
+ for (unsigned idx = 0; idx < 2; idx++) {
+ struct deref_node *node =
+ get_deref_node(copy_instr->variables[idx], state);
+
+ if (node == NULL)
+ continue;
+
+ if (node->copies == NULL)
+ node->copies = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ _mesa_set_add(node->copies, copy_instr);
+ }
+}
+
+/* Registers all variable uses in the given block. */
+static bool
+register_variable_uses_block(nir_block *block, void *void_state)
+{
+ struct lower_variables_state *state = void_state;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_load_var:
+ register_load_instr(intrin, state);
+ break;
+
+ case nir_intrinsic_store_var:
+ register_store_instr(intrin, state);
+ break;
+
+ case nir_intrinsic_copy_var:
+ register_copy_instr(intrin, state);
+ break;
+
+ default:
+ continue;
+ }
+ }
+
+ return true;
+}
+
+/* Walks over all of the copy instructions to or from the given deref_node
+ * and lowers them to load/store intrinsics.
+ */
+static bool
+lower_copies_to_load_store(struct deref_node *node,
+ struct lower_variables_state *state)
+{
+ if (!node->copies)
+ return true;
+
+ struct set_entry *copy_entry;
+ set_foreach(node->copies, copy_entry) {
+ nir_intrinsic_instr *copy = (void *)copy_entry->key;
+
+ nir_lower_var_copy_instr(copy, state->shader);
+
+ for (unsigned i = 0; i < 2; ++i) {
+ struct deref_node *arg_node =
+ get_deref_node(copy->variables[i], state);
+
+ /* Only bother removing copy entries for other nodes */
+ if (arg_node == NULL || arg_node == node)
+ continue;
+
+ struct set_entry *arg_entry = _mesa_set_search(arg_node->copies, copy);
+ assert(arg_entry);
+ _mesa_set_remove(node->copies, arg_entry);
+ }
+
+ nir_instr_remove(&copy->instr);
+ }
+
+ node->copies = NULL;
+
+ return true;
+}
+
+/** Pushes an SSA def onto the def stack for the given node
+ *
+ * Each node is potentially associated with a stack of SSA definitions.
+ * This stack is used for determining what SSA definition reaches a given
+ * point in the program for variable renaming. The stack is always kept in
+ * dominance-order with at most one SSA def per block. If the SSA
+ * definition on the top of the stack is in the same block as the one being
+ * pushed, the top element is replaced.
+ */
+static void
+def_stack_push(struct deref_node *node, nir_ssa_def *def,
+ struct lower_variables_state *state)
+{
+ if (node->def_stack == NULL) {
+ node->def_stack = ralloc_array(state->dead_ctx, nir_ssa_def *,
+ state->impl->num_blocks);
+ node->def_stack_tail = node->def_stack - 1;
+ }
+
+ if (node->def_stack_tail >= node->def_stack) {
+ nir_ssa_def *top_def = *node->def_stack_tail;
+
+ if (def->parent_instr->block == top_def->parent_instr->block) {
+ /* They're in the same block, just replace the top */
+ *node->def_stack_tail = def;
+ return;
+ }
+ }
+
+ *(++node->def_stack_tail) = def;
+}
+
+/* Pop the top of the def stack if it's in the given block */
+static void
+def_stack_pop_if_in_block(struct deref_node *node, nir_block *block)
+{
+ /* If we're popping, then we have presumably pushed at some time in the
+ * past so this should exist.
+ */
+ assert(node->def_stack != NULL);
+
+ /* The stack is already empty. Do nothing. */
+ if (node->def_stack_tail < node->def_stack)
+ return;
+
+ nir_ssa_def *def = *node->def_stack_tail;
+ if (def->parent_instr->block == block)
+ node->def_stack_tail--;
+}
+
+/** Retrieves the SSA definition on the top of the stack for the given
+ * node, if one exists. If the stack is empty, then we return the constant
+ * initializer (if it exists) or an SSA undef.
+ */
+static nir_ssa_def *
+get_ssa_def_for_block(struct deref_node *node, nir_block *block,
+ struct lower_variables_state *state)
+{
+ /* If we have something on the stack, go ahead and return it. We're
+ * assuming that the top of the stack dominates the given block.
+ */
+ if (node->def_stack && node->def_stack_tail >= node->def_stack)
+ return *node->def_stack_tail;
+
+ /* If we got here then we don't have a definition that dominates the
+ * given block. This means that we need to add an undef and use that.
+ */
+ nir_ssa_undef_instr *undef =
+ nir_ssa_undef_instr_create(state->shader,
+ glsl_get_vector_elements(node->type));
+ nir_instr_insert_before_cf_list(&state->impl->body, &undef->instr);
+ def_stack_push(node, &undef->def, state);
+ return &undef->def;
+}
+
+/* Given a block and one of its predecessors, this function fills in the
+ * souces of the phi nodes to take SSA defs from the given predecessor.
+ * This function must be called exactly once per block/predecessor pair.
+ */
+static void
+add_phi_sources(nir_block *block, nir_block *pred,
+ struct lower_variables_state *state)
+{
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_phi)
+ break;
+
+ nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+ struct hash_entry *entry =
+ _mesa_hash_table_search(state->phi_table, phi);
+ if (!entry)
+ continue;
+
+ struct deref_node *node = entry->data;
+
+ nir_phi_src *src = ralloc(phi, nir_phi_src);
+ src->pred = pred;
+ src->src.parent_instr = &phi->instr;
+ src->src.is_ssa = true;
+ src->src.ssa = get_ssa_def_for_block(node, pred, state);
+
+ list_addtail(&src->src.use_link, &src->src.ssa->uses);
+
+ exec_list_push_tail(&phi->srcs, &src->node);
+ }
+}
+
+/* Performs variable renaming by doing a DFS of the dominance tree
+ *
+ * This algorithm is very similar to the one outlined in "Efficiently
+ * Computing Static Single Assignment Form and the Control Dependence
+ * Graph" by Cytron et. al. The primary difference is that we only put one
+ * SSA def on the stack per block.
+ */
+static bool
+rename_variables_block(nir_block *block, struct lower_variables_state *state)
+{
+ nir_builder b;
+ nir_builder_init(&b, state->impl);
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type == nir_instr_type_phi) {
+ nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+ struct hash_entry *entry =
+ _mesa_hash_table_search(state->phi_table, phi);
+
+ /* This can happen if we already have phi nodes in the program
+ * that were not created in this pass.
+ */
+ if (!entry)
+ continue;
+
+ struct deref_node *node = entry->data;
+
+ def_stack_push(node, &phi->dest.ssa, state);
+ } else if (instr->type == nir_instr_type_intrinsic) {
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_load_var: {
+ struct deref_node *node =
+ get_deref_node(intrin->variables[0], state);
+
+ if (node == NULL) {
+ /* If we hit this path then we are referencing an invalid
+ * value. Most likely, we unrolled something and are
+ * reading past the end of some array. In any case, this
+ * should result in an undefined value.
+ */
+ nir_ssa_undef_instr *undef =
+ nir_ssa_undef_instr_create(state->shader,
+ intrin->num_components);
+
+ nir_instr_insert_before(&intrin->instr, &undef->instr);
+ nir_instr_remove(&intrin->instr);
+
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+ nir_src_for_ssa(&undef->def));
+ continue;
+ }
+
+ if (!node->lower_to_ssa)
+ continue;
+
+ nir_alu_instr *mov = nir_alu_instr_create(state->shader,
+ nir_op_imov);
+ mov->src[0].src.is_ssa = true;
+ mov->src[0].src.ssa = get_ssa_def_for_block(node, block, state);
+ for (unsigned i = intrin->num_components; i < 4; i++)
+ mov->src[0].swizzle[i] = 0;
+
+ assert(intrin->dest.is_ssa);
+
+ mov->dest.write_mask = (1 << intrin->num_components) - 1;
+ nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
+ intrin->num_components, NULL);
+
+ nir_instr_insert_before(&intrin->instr, &mov->instr);
+ nir_instr_remove(&intrin->instr);
+
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+ nir_src_for_ssa(&mov->dest.dest.ssa));
+ break;
+ }
+
+ case nir_intrinsic_store_var: {
+ struct deref_node *node =
+ get_deref_node(intrin->variables[0], state);
+
+ if (node == NULL) {
+ /* Probably an out-of-bounds array store. That should be a
+ * no-op. */
+ nir_instr_remove(&intrin->instr);
+ continue;
+ }
+
+ if (!node->lower_to_ssa)
+ continue;
+
+ assert(intrin->num_components ==
+ glsl_get_vector_elements(node->type));
+
+ assert(intrin->src[0].is_ssa);
+
+ nir_ssa_def *new_def;
+ b.cursor = nir_before_instr(&intrin->instr);
+
+ if (intrin->const_index[0] == (1 << intrin->num_components) - 1) {
+ /* Whole variable store - just copy the source. Note that
+ * intrin->num_components and intrin->src[0].ssa->num_components
+ * may differ.
+ */
+ unsigned swiz[4];
+ for (unsigned i = 0; i < 4; i++)
+ swiz[i] = i < intrin->num_components ? i : 0;
+
+ new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz,
+ intrin->num_components, false);
+ } else {
+ nir_ssa_def *old_def = get_ssa_def_for_block(node, block, state);
+ /* For writemasked store_var intrinsics, we combine the newly
+ * written values with the existing contents of unwritten
+ * channels, creating a new SSA value for the whole vector.
+ */
+ nir_ssa_def *srcs[4];
+ for (unsigned i = 0; i < intrin->num_components; i++) {
+ if (intrin->const_index[0] & (1 << i)) {
+ srcs[i] = nir_channel(&b, intrin->src[0].ssa, i);
+ } else {
+ srcs[i] = nir_channel(&b, old_def, i);
+ }
+ }
+ new_def = nir_vec(&b, srcs, intrin->num_components);
+ }
+
+ assert(new_def->num_components == intrin->num_components);
+
+ def_stack_push(node, new_def, state);
+
+ /* We'll wait to remove the instruction until the next pass
+ * where we pop the node we just pushed back off the stack.
+ */
+ break;
+ }
+
+ default:
+ break;
+ }
+ }
+ }
+
+ if (block->successors[0])
+ add_phi_sources(block->successors[0], block, state);
+ if (block->successors[1])
+ add_phi_sources(block->successors[1], block, state);
+
+ for (unsigned i = 0; i < block->num_dom_children; ++i)
+ rename_variables_block(block->dom_children[i], state);
+
+ /* Now we iterate over the instructions and pop off any SSA defs that we
+ * pushed in the first loop.
+ */
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type == nir_instr_type_phi) {
+ nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+ struct hash_entry *entry =
+ _mesa_hash_table_search(state->phi_table, phi);
+
+ /* This can happen if we already have phi nodes in the program
+ * that were not created in this pass.
+ */
+ if (!entry)
+ continue;
+
+ struct deref_node *node = entry->data;
+
+ def_stack_pop_if_in_block(node, block);
+ } else if (instr->type == nir_instr_type_intrinsic) {
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ if (intrin->intrinsic != nir_intrinsic_store_var)
+ continue;
+
+ struct deref_node *node = get_deref_node(intrin->variables[0], state);
+ if (!node)
+ continue;
+
+ if (!node->lower_to_ssa)
+ continue;
+
+ def_stack_pop_if_in_block(node, block);
+ nir_instr_remove(&intrin->instr);
+ }
+ }
+
+ return true;
+}
+
+/* Inserts phi nodes for all variables marked lower_to_ssa
+ *
+ * This is the same algorithm as presented in "Efficiently Computing Static
+ * Single Assignment Form and the Control Dependence Graph" by Cytron et.
+ * al.
+ */
+static void
+insert_phi_nodes(struct lower_variables_state *state)
+{
+ NIR_VLA_ZERO(unsigned, work, state->impl->num_blocks);
+ NIR_VLA_ZERO(unsigned, has_already, state->impl->num_blocks);
+
+ /*
+ * Since the work flags already prevent us from inserting a node that has
+ * ever been inserted into W, we don't need to use a set to represent W.
+ * Also, since no block can ever be inserted into W more than once, we know
+ * that the maximum size of W is the number of basic blocks in the
+ * function. So all we need to handle W is an array and a pointer to the
+ * next element to be inserted and the next element to be removed.
+ */
+ NIR_VLA(nir_block *, W, state->impl->num_blocks);
+
+ unsigned w_start, w_end;
+ unsigned iter_count = 0;
+
+ foreach_list_typed(struct deref_node, node, direct_derefs_link,
+ &state->direct_deref_nodes) {
+ if (node->stores == NULL)
+ continue;
+
+ if (!node->lower_to_ssa)
+ continue;
+
+ w_start = w_end = 0;
+ iter_count++;
+
+ struct set_entry *store_entry;
+ set_foreach(node->stores, store_entry) {
+ nir_intrinsic_instr *store = (nir_intrinsic_instr *)store_entry->key;
+ if (work[store->instr.block->index] < iter_count)
+ W[w_end++] = store->instr.block;
+ work[store->instr.block->index] = iter_count;
+ }
+
+ while (w_start != w_end) {
+ nir_block *cur = W[w_start++];
+ struct set_entry *dom_entry;
+ set_foreach(cur->dom_frontier, dom_entry) {
+ nir_block *next = (nir_block *) dom_entry->key;
+
+ /*
+ * If there's more than one return statement, then the end block
+ * can be a join point for some definitions. However, there are
+ * no instructions in the end block, so nothing would use those
+ * phi nodes. Of course, we couldn't place those phi nodes
+ * anyways due to the restriction of having no instructions in the
+ * end block...
+ */
+ if (next == state->impl->end_block)
+ continue;
+
+ if (has_already[next->index] < iter_count) {
+ nir_phi_instr *phi = nir_phi_instr_create(state->shader);
+ nir_ssa_dest_init(&phi->instr, &phi->dest,
+ glsl_get_vector_elements(node->type), NULL);
+ nir_instr_insert_before_block(next, &phi->instr);
+
+ _mesa_hash_table_insert(state->phi_table, phi, node);
+
+ has_already[next->index] = iter_count;
+ if (work[next->index] < iter_count) {
+ work[next->index] = iter_count;
+ W[w_end++] = next;
+ }
+ }
+ }
+ }
+ }
+}
+
+
+/** Implements a pass to lower variable uses to SSA values
+ *
+ * This path walks the list of instructions and tries to lower as many
+ * local variable load/store operations to SSA defs and uses as it can.
+ * The process involves four passes:
+ *
+ * 1) Iterate over all of the instructions and mark where each local
+ * variable deref is used in a load, store, or copy. While we're at
+ * it, we keep track of all of the fully-qualified (no wildcards) and
+ * fully-direct references we see and store them in the
+ * direct_deref_nodes hash table.
+ *
+ * 2) Walk over the the list of fully-qualified direct derefs generated in
+ * the previous pass. For each deref, we determine if it can ever be
+ * aliased, i.e. if there is an indirect reference anywhere that may
+ * refer to it. If it cannot be aliased, we mark it for lowering to an
+ * SSA value. At this point, we lower any var_copy instructions that
+ * use the given deref to load/store operations and, if the deref has a
+ * constant initializer, we go ahead and add a load_const value at the
+ * beginning of the function with the initialized value.
+ *
+ * 3) Walk over the list of derefs we plan to lower to SSA values and
+ * insert phi nodes as needed.
+ *
+ * 4) Perform "variable renaming" by replacing the load/store instructions
+ * with SSA definitions and SSA uses.
+ */
+static bool
+nir_lower_vars_to_ssa_impl(nir_function_impl *impl)
+{
+ struct lower_variables_state state;
+
+ state.shader = impl->function->shader;
+ state.dead_ctx = ralloc_context(state.shader);
+ state.impl = impl;
+
+ state.deref_var_nodes = _mesa_hash_table_create(state.dead_ctx,
+ _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ exec_list_make_empty(&state.direct_deref_nodes);
+ state.phi_table = _mesa_hash_table_create(state.dead_ctx,
+ _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ /* Build the initial deref structures and direct_deref_nodes table */
+ state.add_to_direct_deref_nodes = true;
+ nir_foreach_block(impl, register_variable_uses_block, &state);
+
+ bool progress = false;
+
+ nir_metadata_require(impl, nir_metadata_block_index);
+
+ /* We're about to iterate through direct_deref_nodes. Don't modify it. */
+ state.add_to_direct_deref_nodes = false;
+
+ foreach_list_typed_safe(struct deref_node, node, direct_derefs_link,
+ &state.direct_deref_nodes) {
+ nir_deref_var *deref = node->deref;
+
+ if (deref->var->data.mode != nir_var_local) {
+ exec_node_remove(&node->direct_derefs_link);
+ continue;
+ }
+
+ if (deref_may_be_aliased(deref, &state)) {
+ exec_node_remove(&node->direct_derefs_link);
+ continue;
+ }
+
+ node->lower_to_ssa = true;
+ progress = true;
+
+ if (deref->var->constant_initializer) {
+ nir_load_const_instr *load =
+ nir_deref_get_const_initializer_load(state.shader, deref);
+ nir_ssa_def_init(&load->instr, &load->def,
+ glsl_get_vector_elements(node->type), NULL);
+ nir_instr_insert_before_cf_list(&impl->body, &load->instr);
+ def_stack_push(node, &load->def, &state);
+ }
+
+ foreach_deref_node_match(deref, lower_copies_to_load_store, &state);
+ }
+
+ if (!progress)
+ return false;
+
+ nir_metadata_require(impl, nir_metadata_dominance);
+
+ /* We may have lowered some copy instructions to load/store
+ * instructions. The uses from the copy instructions hav already been
+ * removed but we need to rescan to ensure that the uses from the newly
+ * added load/store instructions are registered. We need this
+ * information for phi node insertion below.
+ */
+ nir_foreach_block(impl, register_variable_uses_block, &state);
+
+ insert_phi_nodes(&state);
+ rename_variables_block(nir_start_block(impl), &state);
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ ralloc_free(state.dead_ctx);
+
+ return progress;
+}
+
+void
+nir_lower_vars_to_ssa(nir_shader *shader)
+{
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ nir_lower_vars_to_ssa_impl(function->impl);
+ }
+}
diff --git a/src/compiler/nir/nir_lower_vec_to_movs.c b/src/compiler/nir/nir_lower_vec_to_movs.c
new file mode 100644
index 00000000000..06d627900c6
--- /dev/null
+++ b/src/compiler/nir/nir_lower_vec_to_movs.c
@@ -0,0 +1,310 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand ([email protected])
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements a simple pass that lowers vecN instructions to a series of
+ * moves with partial writes.
+ */
+
+struct vec_to_movs_state {
+ nir_function_impl *impl;
+ bool progress;
+};
+
+static bool
+src_matches_dest_reg(nir_dest *dest, nir_src *src)
+{
+ if (dest->is_ssa || src->is_ssa)
+ return false;
+
+ return (dest->reg.reg == src->reg.reg &&
+ dest->reg.base_offset == src->reg.base_offset &&
+ !dest->reg.indirect &&
+ !src->reg.indirect);
+}
+
+/**
+ * For a given starting writemask channel and corresponding source index in
+ * the vec instruction, insert a MOV to the vec instruction's dest of all the
+ * writemask channels that get read from the same src reg.
+ *
+ * Returns the writemask of our MOV, so the parent loop calling this knows
+ * which ones have been processed.
+ */
+static unsigned
+insert_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
+{
+ assert(start_idx < nir_op_infos[vec->op].num_inputs);
+
+ nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_imov);
+ nir_alu_src_copy(&mov->src[0], &vec->src[start_idx], mov);
+ nir_alu_dest_copy(&mov->dest, &vec->dest, mov);
+
+ mov->dest.write_mask = (1u << start_idx);
+ mov->src[0].swizzle[start_idx] = vec->src[start_idx].swizzle[0];
+ mov->src[0].negate = vec->src[start_idx].negate;
+ mov->src[0].abs = vec->src[start_idx].abs;
+
+ for (unsigned i = start_idx + 1; i < 4; i++) {
+ if (!(vec->dest.write_mask & (1 << i)))
+ continue;
+
+ if (nir_srcs_equal(vec->src[i].src, vec->src[start_idx].src) &&
+ vec->src[i].negate == vec->src[start_idx].negate &&
+ vec->src[i].abs == vec->src[start_idx].abs) {
+ mov->dest.write_mask |= (1 << i);
+ mov->src[0].swizzle[i] = vec->src[i].swizzle[0];
+ }
+ }
+
+ /* In some situations (if the vecN is involved in a phi-web), we can end
+ * up with a mov from a register to itself. Some of those channels may end
+ * up doing nothing and there's no reason to have them as part of the mov.
+ */
+ if (src_matches_dest_reg(&mov->dest.dest, &mov->src[0].src) &&
+ !mov->src[0].abs && !mov->src[0].negate) {
+ for (unsigned i = 0; i < 4; i++) {
+ if (mov->src[0].swizzle[i] == i) {
+ mov->dest.write_mask &= ~(1 << i);
+ }
+ }
+ }
+
+ /* Only emit the instruction if it actually does something */
+ if (mov->dest.write_mask) {
+ nir_instr_insert_before(&vec->instr, &mov->instr);
+ } else {
+ ralloc_free(mov);
+ }
+
+ return mov->dest.write_mask;
+}
+
+static bool
+has_replicated_dest(nir_alu_instr *alu)
+{
+ return alu->op == nir_op_fdot_replicated2 ||
+ alu->op == nir_op_fdot_replicated3 ||
+ alu->op == nir_op_fdot_replicated4 ||
+ alu->op == nir_op_fdph_replicated;
+}
+
+/* Attempts to coalesce the "move" from the given source of the vec to the
+ * destination of the instruction generating the value. If, for whatever
+ * reason, we cannot coalesce the mmove, it does nothing and returns 0. We
+ * can then call insert_mov as normal.
+ */
+static unsigned
+try_coalesce(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
+{
+ assert(start_idx < nir_op_infos[vec->op].num_inputs);
+
+ /* We will only even try if the source is SSA */
+ if (!vec->src[start_idx].src.is_ssa)
+ return 0;
+
+ assert(vec->src[start_idx].src.ssa);
+
+ /* If we are going to do a reswizzle, then the vecN operation must be the
+ * only use of the source value. We also can't have any source modifiers.
+ */
+ nir_foreach_use(vec->src[start_idx].src.ssa, src) {
+ if (src->parent_instr != &vec->instr)
+ return 0;
+
+ nir_alu_src *alu_src = exec_node_data(nir_alu_src, src, src);
+ if (alu_src->abs || alu_src->negate)
+ return 0;
+ }
+
+ if (!list_empty(&vec->src[start_idx].src.ssa->if_uses))
+ return 0;
+
+ if (vec->src[start_idx].src.ssa->parent_instr->type != nir_instr_type_alu)
+ return 0;
+
+ nir_alu_instr *src_alu =
+ nir_instr_as_alu(vec->src[start_idx].src.ssa->parent_instr);
+
+ if (has_replicated_dest(src_alu)) {
+ /* The fdot instruction is special: It replicates its result to all
+ * components. This means that we can always rewrite its destination
+ * and we don't need to swizzle anything.
+ */
+ } else {
+ /* We only care about being able to re-swizzle the instruction if it is
+ * something that we can reswizzle. It must be per-component. The one
+ * exception to this is the fdotN instructions which implicitly splat
+ * their result out to all channels.
+ */
+ if (nir_op_infos[src_alu->op].output_size != 0)
+ return 0;
+
+ /* If we are going to reswizzle the instruction, we can't have any
+ * non-per-component sources either.
+ */
+ for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
+ if (nir_op_infos[src_alu->op].input_sizes[j] != 0)
+ return 0;
+ }
+
+ /* Stash off all of the ALU instruction's swizzles. */
+ uint8_t swizzles[4][4];
+ for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
+ for (unsigned i = 0; i < 4; i++)
+ swizzles[j][i] = src_alu->src[j].swizzle[i];
+
+ unsigned write_mask = 0;
+ for (unsigned i = start_idx; i < 4; i++) {
+ if (!(vec->dest.write_mask & (1 << i)))
+ continue;
+
+ if (!vec->src[i].src.is_ssa ||
+ vec->src[i].src.ssa != &src_alu->dest.dest.ssa)
+ continue;
+
+ /* At this point, the give vec source matchese up with the ALU
+ * instruction so we can re-swizzle that component to match.
+ */
+ write_mask |= 1 << i;
+ if (has_replicated_dest(src_alu)) {
+ /* Since the destination is a single replicated value, we don't need
+ * to do any reswizzling
+ */
+ } else {
+ for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
+ src_alu->src[j].swizzle[i] = swizzles[j][vec->src[i].swizzle[0]];
+ }
+
+ /* Clear the no longer needed vec source */
+ nir_instr_rewrite_src(&vec->instr, &vec->src[i].src, NIR_SRC_INIT);
+ }
+
+ nir_instr_rewrite_dest(&src_alu->instr, &src_alu->dest.dest, vec->dest.dest);
+ src_alu->dest.write_mask = write_mask;
+
+ return write_mask;
+}
+
+static bool
+lower_vec_to_movs_block(nir_block *block, void *void_state)
+{
+ struct vec_to_movs_state *state = void_state;
+ nir_function_impl *impl = state->impl;
+ nir_shader *shader = impl->function->shader;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_alu)
+ continue;
+
+ nir_alu_instr *vec = nir_instr_as_alu(instr);
+
+ switch (vec->op) {
+ case nir_op_vec2:
+ case nir_op_vec3:
+ case nir_op_vec4:
+ break;
+ default:
+ continue; /* The loop */
+ }
+
+ if (vec->dest.dest.is_ssa) {
+ /* Since we insert multiple MOVs, we have a register destination. */
+ nir_register *reg = nir_local_reg_create(impl);
+ reg->num_components = vec->dest.dest.ssa.num_components;
+
+ nir_ssa_def_rewrite_uses(&vec->dest.dest.ssa, nir_src_for_reg(reg));
+
+ nir_instr_rewrite_dest(&vec->instr, &vec->dest.dest,
+ nir_dest_for_reg(reg));
+ }
+
+ unsigned finished_write_mask = 0;
+
+ /* First, emit a MOV for all the src channels that are in the
+ * destination reg, in case other values we're populating in the dest
+ * might overwrite them.
+ */
+ for (unsigned i = 0; i < 4; i++) {
+ if (!(vec->dest.write_mask & (1 << i)))
+ continue;
+
+ if (src_matches_dest_reg(&vec->dest.dest, &vec->src[i].src)) {
+ finished_write_mask |= insert_mov(vec, i, shader);
+ break;
+ }
+ }
+
+ /* Now, emit MOVs for all the other src channels. */
+ for (unsigned i = 0; i < 4; i++) {
+ if (!(vec->dest.write_mask & (1 << i)))
+ continue;
+
+ if (!(finished_write_mask & (1 << i)))
+ finished_write_mask |= try_coalesce(vec, i, shader);
+
+ if (!(finished_write_mask & (1 << i)))
+ finished_write_mask |= insert_mov(vec, i, shader);
+ }
+
+ nir_instr_remove(&vec->instr);
+ ralloc_free(vec);
+ state->progress = true;
+ }
+
+ return true;
+}
+
+static bool
+nir_lower_vec_to_movs_impl(nir_function_impl *impl)
+{
+ struct vec_to_movs_state state = { impl, false };
+
+ nir_foreach_block(impl, lower_vec_to_movs_block, &state);
+
+ if (state.progress) {
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+ }
+
+ return state.progress;
+}
+
+bool
+nir_lower_vec_to_movs(nir_shader *shader)
+{
+ bool progress = false;
+
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ progress = nir_lower_vec_to_movs_impl(function->impl) || progress;
+ }
+
+ return progress;
+}
diff --git a/src/compiler/nir/nir_metadata.c b/src/compiler/nir/nir_metadata.c
new file mode 100644
index 00000000000..61aae73221e
--- /dev/null
+++ b/src/compiler/nir/nir_metadata.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand ([email protected])
+ */
+
+#include "nir.h"
+
+/*
+ * Handles management of the metadata.
+ */
+
+void
+nir_metadata_require(nir_function_impl *impl, nir_metadata required)
+{
+#define NEEDS_UPDATE(X) ((required & ~impl->valid_metadata) & (X))
+
+ if (NEEDS_UPDATE(nir_metadata_block_index))
+ nir_index_blocks(impl);
+ if (NEEDS_UPDATE(nir_metadata_dominance))
+ nir_calc_dominance_impl(impl);
+ if (NEEDS_UPDATE(nir_metadata_live_ssa_defs))
+ nir_live_ssa_defs_impl(impl);
+
+#undef NEEDS_UPDATE
+
+ impl->valid_metadata |= required;
+}
+
+void
+nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved)
+{
+ impl->valid_metadata &= preserved;
+}
+
+#ifdef DEBUG
+/**
+ * Make sure passes properly invalidate metadata (part 1).
+ *
+ * Call this before running a pass to set a bogus metadata flag, which will
+ * only be preserved if the pass forgets to call nir_metadata_preserve().
+ */
+void
+nir_metadata_set_validation_flag(nir_shader *shader)
+{
+ nir_foreach_function(shader, function) {
+ if (function->impl) {
+ function->impl->valid_metadata |= nir_metadata_not_properly_reset;
+ }
+ }
+}
+
+/**
+ * Make sure passes properly invalidate metadata (part 2).
+ *
+ * Call this after a pass makes progress to verify that the bogus metadata set by
+ * the earlier function was properly thrown away. Note that passes may not call
+ * nir_metadata_preserve() if they don't actually make any changes at all.
+ */
+void
+nir_metadata_check_validation_flag(nir_shader *shader)
+{
+ nir_foreach_function(shader, function) {
+ if (function->impl) {
+ assert(!(function->impl->valid_metadata &
+ nir_metadata_not_properly_reset));
+ }
+ }
+}
+#endif
diff --git a/src/compiler/nir/nir_move_vec_src_uses_to_dest.c b/src/compiler/nir/nir_move_vec_src_uses_to_dest.c
new file mode 100644
index 00000000000..b5186e6e944
--- /dev/null
+++ b/src/compiler/nir/nir_move_vec_src_uses_to_dest.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand ([email protected])
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements a pass that tries to move uses vecN sources to their
+ * destinations. This is kind of like an inverse copy-propagation pass.
+ * For instance, if you have
+ *
+ * ssa_1 = vec4(a, b, c, d)
+ * ssa_2 = fadd(a, b)
+ *
+ * This will be turned into
+ *
+ * ssa_1 = vec4(a, b, c, d)
+ * ssa_2 = fadd(ssa_1.x, ssa_1.y)
+ *
+ * While this is "worse" because it adds a bunch of unneeded dependencies, it
+ * actually makes it much easier for vec4-based backends to coalesce the MOV's
+ * that result from the vec4 operation because it doesn't have to worry about
+ * quite as many reads.
+ */
+
+/* Returns true if the given SSA def dominates the instruction. An SSA def is
+ * considered to *not* dominate the instruction that defines it.
+ */
+static bool
+ssa_def_dominates_instr(nir_ssa_def *def, nir_instr *instr)
+{
+ if (instr->index <= def->parent_instr->index) {
+ return false;
+ } else if (def->parent_instr->block == instr->block) {
+ return def->parent_instr->index < instr->index;
+ } else {
+ return nir_block_dominates(def->parent_instr->block, instr->block);
+ }
+}
+
+static bool
+move_vec_src_uses_to_dest_block(nir_block *block, void *shader)
+{
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_alu)
+ continue;
+
+ nir_alu_instr *vec = nir_instr_as_alu(instr);
+
+ switch (vec->op) {
+ case nir_op_vec2:
+ case nir_op_vec3:
+ case nir_op_vec4:
+ break;
+ default:
+ continue; /* The loop */
+ }
+
+ /* Can't handle non-SSA vec operations */
+ if (!vec->dest.dest.is_ssa)
+ continue;
+
+ /* Can't handle saturation */
+ if (vec->dest.saturate)
+ continue;
+
+ /* First, mark all of the sources we are going to consider for rewriting
+ * to the destination
+ */
+ int srcs_remaining = 0;
+ for (unsigned i = 0; i < nir_op_infos[vec->op].num_inputs; i++) {
+ /* We can't rewrite a source if it's not in SSA form */
+ if (!vec->src[i].src.is_ssa)
+ continue;
+
+ /* We can't rewrite a source if it has modifiers */
+ if (vec->src[i].abs || vec->src[i].negate)
+ continue;
+
+ srcs_remaining |= 1 << i;
+ }
+
+ /* We can't actually do anything with this instruction */
+ if (srcs_remaining == 0)
+ continue;
+
+ for (unsigned i; i = ffs(srcs_remaining) - 1, srcs_remaining;) {
+ int8_t swizzle[4] = { -1, -1, -1, -1 };
+
+ for (unsigned j = i; j < nir_op_infos[vec->op].num_inputs; j++) {
+ if (vec->src[j].src.ssa != vec->src[i].src.ssa)
+ continue;
+
+ /* Mark the given chanle as having been handled */
+ srcs_remaining &= ~(1 << j);
+
+ /* Mark the appropreate channel as coming from src j */
+ swizzle[vec->src[j].swizzle[0]] = j;
+ }
+
+ nir_foreach_use_safe(vec->src[i].src.ssa, use) {
+ if (use->parent_instr == &vec->instr)
+ continue;
+
+ /* We need to dominate the use if we are going to rewrite it */
+ if (!ssa_def_dominates_instr(&vec->dest.dest.ssa, use->parent_instr))
+ continue;
+
+ /* For now, we'll just rewrite ALU instructions */
+ if (use->parent_instr->type != nir_instr_type_alu)
+ continue;
+
+ assert(use->is_ssa);
+
+ nir_alu_instr *use_alu = nir_instr_as_alu(use->parent_instr);
+
+ /* Figure out which source we're actually looking at */
+ nir_alu_src *use_alu_src = exec_node_data(nir_alu_src, use, src);
+ unsigned src_idx = use_alu_src - use_alu->src;
+ assert(src_idx < nir_op_infos[use_alu->op].num_inputs);
+
+ bool can_reswizzle = true;
+ for (unsigned j = 0; j < 4; j++) {
+ if (!nir_alu_instr_channel_used(use_alu, src_idx, j))
+ continue;
+
+ if (swizzle[use_alu_src->swizzle[j]] == -1) {
+ can_reswizzle = false;
+ break;
+ }
+ }
+
+ if (!can_reswizzle)
+ continue;
+
+ /* At this point, we have determined that the given use can be
+ * reswizzled to actually use the destination of the vecN operation.
+ * Go ahead and rewrite it as needed.
+ */
+ nir_instr_rewrite_src(use->parent_instr, use,
+ nir_src_for_ssa(&vec->dest.dest.ssa));
+ for (unsigned j = 0; j < 4; j++) {
+ if (!nir_alu_instr_channel_used(use_alu, src_idx, j))
+ continue;
+
+ use_alu_src->swizzle[j] = swizzle[use_alu_src->swizzle[j]];
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+static void
+nir_move_vec_src_uses_to_dest_impl(nir_shader *shader, nir_function_impl *impl)
+{
+ nir_metadata_require(impl, nir_metadata_dominance);
+
+ nir_index_instrs(impl);
+ nir_foreach_block(impl, move_vec_src_uses_to_dest_block, shader);
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+}
+
+void
+nir_move_vec_src_uses_to_dest(nir_shader *shader)
+{
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ nir_move_vec_src_uses_to_dest_impl(shader, function->impl);
+ }
+}
diff --git a/src/compiler/nir/nir_normalize_cubemap_coords.c b/src/compiler/nir/nir_normalize_cubemap_coords.c
new file mode 100644
index 00000000000..9c15eb8c15c
--- /dev/null
+++ b/src/compiler/nir/nir_normalize_cubemap_coords.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand <[email protected]>
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+/**
+ * This file implements a NIR lowering pass to perform the normalization of
+ * the cubemap coordinates to have the largest magnitude component be -1.0
+ * or 1.0. This is based on the old GLSL IR based pass by Eric.
+ */
+
+struct normalize_cubemap_state {
+ nir_builder b;
+ bool progress;
+};
+
+static bool
+normalize_cubemap_coords_block(nir_block *block, void *void_state)
+{
+ struct normalize_cubemap_state *state = void_state;
+ nir_builder *b = &state->b;
+
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_tex)
+ continue;
+
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE)
+ continue;
+
+ b->cursor = nir_before_instr(&tex->instr);
+
+ for (unsigned i = 0; i < tex->num_srcs; i++) {
+ if (tex->src[i].src_type != nir_tex_src_coord)
+ continue;
+
+ nir_ssa_def *orig_coord =
+ nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
+ assert(orig_coord->num_components >= 3);
+
+ nir_ssa_def *abs = nir_fabs(b, orig_coord);
+ nir_ssa_def *norm = nir_fmax(b, nir_channel(b, abs, 0),
+ nir_fmax(b, nir_channel(b, abs, 1),
+ nir_channel(b, abs, 2)));
+
+ nir_ssa_def *normalized = nir_fmul(b, orig_coord, nir_frcp(b, norm));
+
+ /* Array indices don't have to be normalized, so make a new vector
+ * with the coordinate's array index untouched.
+ */
+ if (tex->coord_components == 4) {
+ normalized = nir_vec4(b,
+ nir_channel(b, normalized, 0),
+ nir_channel(b, normalized, 1),
+ nir_channel(b, normalized, 2),
+ nir_channel(b, orig_coord, 3));
+ }
+
+ nir_instr_rewrite_src(&tex->instr,
+ &tex->src[i].src,
+ nir_src_for_ssa(normalized));
+
+ state->progress = true;
+ }
+ }
+
+ return true;
+}
+
+static bool
+normalize_cubemap_coords_impl(nir_function_impl *impl)
+{
+ struct normalize_cubemap_state state;
+ nir_builder_init(&state.b, impl);
+ state.progress = false;
+
+ nir_foreach_block(impl, normalize_cubemap_coords_block, &state);
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ return state.progress;
+}
+
+bool
+nir_normalize_cubemap_coords(nir_shader *shader)
+{
+ bool progress = false;
+
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ progress = normalize_cubemap_coords_impl(function->impl) || progress;
+ }
+
+ return progress;
+}
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
new file mode 100644
index 00000000000..e79810c1991
--- /dev/null
+++ b/src/compiler/nir/nir_opcodes.py
@@ -0,0 +1,668 @@
+#! /usr/bin/env python
+#
+# Copyright (C) 2014 Connor Abbott
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#
+# Authors:
+# Connor Abbott ([email protected])
+
+
+# Class that represents all the information we have about the opcode
+# NOTE: this must be kept in sync with nir_op_info
+
+class Opcode(object):
+ """Class that represents all the information we have about the opcode
+ NOTE: this must be kept in sync with nir_op_info
+ """
+ def __init__(self, name, output_size, output_type, input_sizes,
+ input_types, algebraic_properties, const_expr):
+ """Parameters:
+
+ - name is the name of the opcode (prepend nir_op_ for the enum name)
+ - all types are strings that get nir_type_ prepended to them
+ - input_types is a list of types
+ - algebraic_properties is a space-seperated string, where nir_op_is_ is
+ prepended before each entry
+ - const_expr is an expression or series of statements that computes the
+ constant value of the opcode given the constant values of its inputs.
+
+ Constant expressions are formed from the variables src0, src1, ...,
+ src(N-1), where N is the number of arguments. The output of the
+ expression should be stored in the dst variable. Per-component input
+ and output variables will be scalars and non-per-component input and
+ output variables will be a struct with fields named x, y, z, and w
+ all of the correct type. Input and output variables can be assumed
+ to already be of the correct type and need no conversion. In
+ particular, the conversion from the C bool type to/from NIR_TRUE and
+ NIR_FALSE happens automatically.
+
+ For per-component instructions, the entire expression will be
+ executed once for each component. For non-per-component
+ instructions, the expression is expected to store the correct values
+ in dst.x, dst.y, etc. If "dst" does not exist anywhere in the
+ constant expression, an assignment to dst will happen automatically
+ and the result will be equivalent to "dst = <expression>" for
+ per-component instructions and "dst.x = dst.y = ... = <expression>"
+ for non-per-component instructions.
+ """
+ assert isinstance(name, str)
+ assert isinstance(output_size, int)
+ assert isinstance(output_type, str)
+ assert isinstance(input_sizes, list)
+ assert isinstance(input_sizes[0], int)
+ assert isinstance(input_types, list)
+ assert isinstance(input_types[0], str)
+ assert isinstance(algebraic_properties, str)
+ assert isinstance(const_expr, str)
+ assert len(input_sizes) == len(input_types)
+ assert 0 <= output_size <= 4
+ for size in input_sizes:
+ assert 0 <= size <= 4
+ if output_size != 0:
+ assert size != 0
+ self.name = name
+ self.num_inputs = len(input_sizes)
+ self.output_size = output_size
+ self.output_type = output_type
+ self.input_sizes = input_sizes
+ self.input_types = input_types
+ self.algebraic_properties = algebraic_properties
+ self.const_expr = const_expr
+
+# helper variables for strings
+tfloat = "float"
+tint = "int"
+tbool = "bool"
+tuint = "uint"
+
+commutative = "commutative "
+associative = "associative "
+
+# global dictionary of opcodes
+opcodes = {}
+
+def opcode(name, output_size, output_type, input_sizes, input_types,
+ algebraic_properties, const_expr):
+ assert name not in opcodes
+ opcodes[name] = Opcode(name, output_size, output_type, input_sizes,
+ input_types, algebraic_properties, const_expr)
+
+def unop_convert(name, in_type, out_type, const_expr):
+ opcode(name, 0, out_type, [0], [in_type], "", const_expr)
+
+def unop(name, ty, const_expr):
+ opcode(name, 0, ty, [0], [ty], "", const_expr)
+
+def unop_horiz(name, output_size, output_type, input_size, input_type,
+ const_expr):
+ opcode(name, output_size, output_type, [input_size], [input_type], "",
+ const_expr)
+
+def unop_reduce(name, output_size, output_type, input_type, prereduce_expr,
+ reduce_expr, final_expr):
+ def prereduce(src):
+ return "(" + prereduce_expr.format(src=src) + ")"
+ def final(src):
+ return final_expr.format(src="(" + src + ")")
+ def reduce_(src0, src1):
+ return reduce_expr.format(src0=src0, src1=src1)
+ src0 = prereduce("src0.x")
+ src1 = prereduce("src0.y")
+ src2 = prereduce("src0.z")
+ src3 = prereduce("src0.w")
+ unop_horiz(name + "2", output_size, output_type, 2, input_type,
+ final(reduce_(src0, src1)))
+ unop_horiz(name + "3", output_size, output_type, 3, input_type,
+ final(reduce_(reduce_(src0, src1), src2)))
+ unop_horiz(name + "4", output_size, output_type, 4, input_type,
+ final(reduce_(reduce_(src0, src1), reduce_(src2, src3))))
+
+
+# These two move instructions differ in what modifiers they support and what
+# the negate modifier means. Otherwise, they are identical.
+unop("fmov", tfloat, "src0")
+unop("imov", tint, "src0")
+
+unop("ineg", tint, "-src0")
+unop("fneg", tfloat, "-src0")
+unop("inot", tint, "~src0") # invert every bit of the integer
+unop("fnot", tfloat, "(src0 == 0.0f) ? 1.0f : 0.0f")
+unop("fsign", tfloat, "(src0 == 0.0f) ? 0.0f : ((src0 > 0.0f) ? 1.0f : -1.0f)")
+unop("isign", tint, "(src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1)")
+unop("iabs", tint, "(src0 < 0) ? -src0 : src0")
+unop("fabs", tfloat, "fabsf(src0)")
+unop("fsat", tfloat, "(src0 > 1.0f) ? 1.0f : ((src0 <= 0.0f) ? 0.0f : src0)")
+unop("frcp", tfloat, "1.0f / src0")
+unop("frsq", tfloat, "1.0f / sqrtf(src0)")
+unop("fsqrt", tfloat, "sqrtf(src0)")
+unop("fexp2", tfloat, "exp2f(src0)")
+unop("flog2", tfloat, "log2f(src0)")
+unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion.
+unop_convert("f2u", tfloat, tuint, "src0") # Float-to-unsigned conversion
+unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion.
+# Float-to-boolean conversion
+unop_convert("f2b", tfloat, tbool, "src0 != 0.0f")
+# Boolean-to-float conversion
+unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f")
+# Int-to-boolean conversion
+unop_convert("i2b", tint, tbool, "src0 != 0")
+unop_convert("b2i", tbool, tint, "src0 ? 1 : 0") # Boolean-to-int conversion
+unop_convert("u2f", tuint, tfloat, "src0") # Unsigned-to-float conversion.
+
+# Unary floating-point rounding operations.
+
+
+unop("ftrunc", tfloat, "truncf(src0)")
+unop("fceil", tfloat, "ceilf(src0)")
+unop("ffloor", tfloat, "floorf(src0)")
+unop("ffract", tfloat, "src0 - floorf(src0)")
+unop("fround_even", tfloat, "_mesa_roundevenf(src0)")
+
+
+# Trigonometric operations.
+
+
+unop("fsin", tfloat, "sinf(src0)")
+unop("fcos", tfloat, "cosf(src0)")
+
+
+# Partial derivatives.
+
+
+unop("fddx", tfloat, "0.0f") # the derivative of a constant is 0.
+unop("fddy", tfloat, "0.0f")
+unop("fddx_fine", tfloat, "0.0f")
+unop("fddy_fine", tfloat, "0.0f")
+unop("fddx_coarse", tfloat, "0.0f")
+unop("fddy_coarse", tfloat, "0.0f")
+
+
+# Floating point pack and unpack operations.
+
+def pack_2x16(fmt):
+ unop_horiz("pack_" + fmt + "_2x16", 1, tuint, 2, tfloat, """
+dst.x = (uint32_t) pack_fmt_1x16(src0.x);
+dst.x |= ((uint32_t) pack_fmt_1x16(src0.y)) << 16;
+""".replace("fmt", fmt))
+
+def pack_4x8(fmt):
+ unop_horiz("pack_" + fmt + "_4x8", 1, tuint, 4, tfloat, """
+dst.x = (uint32_t) pack_fmt_1x8(src0.x);
+dst.x |= ((uint32_t) pack_fmt_1x8(src0.y)) << 8;
+dst.x |= ((uint32_t) pack_fmt_1x8(src0.z)) << 16;
+dst.x |= ((uint32_t) pack_fmt_1x8(src0.w)) << 24;
+""".replace("fmt", fmt))
+
+def unpack_2x16(fmt):
+ unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tuint, """
+dst.x = unpack_fmt_1x16((uint16_t)(src0.x & 0xffff));
+dst.y = unpack_fmt_1x16((uint16_t)(src0.x << 16));
+""".replace("fmt", fmt))
+
+def unpack_4x8(fmt):
+ unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tuint, """
+dst.x = unpack_fmt_1x8((uint8_t)(src0.x & 0xff));
+dst.y = unpack_fmt_1x8((uint8_t)((src0.x >> 8) & 0xff));
+dst.z = unpack_fmt_1x8((uint8_t)((src0.x >> 16) & 0xff));
+dst.w = unpack_fmt_1x8((uint8_t)(src0.x >> 24));
+""".replace("fmt", fmt))
+
+
+pack_2x16("snorm")
+pack_4x8("snorm")
+pack_2x16("unorm")
+pack_4x8("unorm")
+pack_2x16("half")
+unpack_2x16("snorm")
+unpack_4x8("snorm")
+unpack_2x16("unorm")
+unpack_4x8("unorm")
+unpack_2x16("half")
+
+
+# Lowered floating point unpacking operations.
+
+
+unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tuint,
+ "unpack_half_1x16((uint16_t)(src0.x & 0xffff))")
+unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tuint,
+ "unpack_half_1x16((uint16_t)(src0.x >> 16))")
+
+
+# Bit operations, part of ARB_gpu_shader5.
+
+
+unop("bitfield_reverse", tuint, """
+/* we're not winning any awards for speed here, but that's ok */
+dst = 0;
+for (unsigned bit = 0; bit < 32; bit++)
+ dst |= ((src0 >> bit) & 1) << (31 - bit);
+""")
+unop("bit_count", tuint, """
+dst = 0;
+for (unsigned bit = 0; bit < 32; bit++) {
+ if ((src0 >> bit) & 1)
+ dst++;
+}
+""")
+
+unop_convert("ufind_msb", tuint, tint, """
+dst = -1;
+for (int bit = 31; bit > 0; bit--) {
+ if ((src0 >> bit) & 1) {
+ dst = bit;
+ break;
+ }
+}
+""")
+
+unop("ifind_msb", tint, """
+dst = -1;
+for (int bit = 31; bit >= 0; bit--) {
+ /* If src0 < 0, we're looking for the first 0 bit.
+ * if src0 >= 0, we're looking for the first 1 bit.
+ */
+ if ((((src0 >> bit) & 1) && (src0 >= 0)) ||
+ (!((src0 >> bit) & 1) && (src0 < 0))) {
+ dst = bit;
+ break;
+ }
+}
+""")
+
+unop("find_lsb", tint, """
+dst = -1;
+for (unsigned bit = 0; bit < 32; bit++) {
+ if ((src0 >> bit) & 1) {
+ dst = bit;
+ break;
+ }
+}
+""")
+
+
+for i in xrange(1, 5):
+ for j in xrange(1, 5):
+ unop_horiz("fnoise{0}_{1}".format(i, j), i, tfloat, j, tfloat, "0.0f")
+
+def binop_convert(name, out_type, in_type, alg_props, const_expr):
+ opcode(name, 0, out_type, [0, 0], [in_type, in_type], alg_props, const_expr)
+
+def binop(name, ty, alg_props, const_expr):
+ binop_convert(name, ty, ty, alg_props, const_expr)
+
+def binop_compare(name, ty, alg_props, const_expr):
+ binop_convert(name, tbool, ty, alg_props, const_expr)
+
+def binop_horiz(name, out_size, out_type, src1_size, src1_type, src2_size,
+ src2_type, const_expr):
+ opcode(name, out_size, out_type, [src1_size, src2_size], [src1_type, src2_type],
+ "", const_expr)
+
+def binop_reduce(name, output_size, output_type, src_type, prereduce_expr,
+ reduce_expr, final_expr):
+ def final(src):
+ return final_expr.format(src= "(" + src + ")")
+ def reduce_(src0, src1):
+ return reduce_expr.format(src0=src0, src1=src1)
+ def prereduce(src0, src1):
+ return "(" + prereduce_expr.format(src0=src0, src1=src1) + ")"
+ src0 = prereduce("src0.x", "src1.x")
+ src1 = prereduce("src0.y", "src1.y")
+ src2 = prereduce("src0.z", "src1.z")
+ src3 = prereduce("src0.w", "src1.w")
+ opcode(name + "2", output_size, output_type,
+ [2, 2], [src_type, src_type], commutative,
+ final(reduce_(src0, src1)))
+ opcode(name + "3", output_size, output_type,
+ [3, 3], [src_type, src_type], commutative,
+ final(reduce_(reduce_(src0, src1), src2)))
+ opcode(name + "4", output_size, output_type,
+ [4, 4], [src_type, src_type], commutative,
+ final(reduce_(reduce_(src0, src1), reduce_(src2, src3))))
+
+binop("fadd", tfloat, commutative + associative, "src0 + src1")
+binop("iadd", tint, commutative + associative, "src0 + src1")
+binop("fsub", tfloat, "", "src0 - src1")
+binop("isub", tint, "", "src0 - src1")
+
+binop("fmul", tfloat, commutative + associative, "src0 * src1")
+# low 32-bits of signed/unsigned integer multiply
+binop("imul", tint, commutative + associative, "src0 * src1")
+# high 32-bits of signed integer multiply
+binop("imul_high", tint, commutative,
+ "(int32_t)(((int64_t) src0 * (int64_t) src1) >> 32)")
+# high 32-bits of unsigned integer multiply
+binop("umul_high", tuint, commutative,
+ "(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)")
+
+binop("fdiv", tfloat, "", "src0 / src1")
+binop("idiv", tint, "", "src0 / src1")
+binop("udiv", tuint, "", "src0 / src1")
+
+# returns a boolean representing the carry resulting from the addition of
+# the two unsigned arguments.
+
+binop_convert("uadd_carry", tuint, tuint, commutative, "src0 + src1 < src0")
+
+# returns a boolean representing the borrow resulting from the subtraction
+# of the two unsigned arguments.
+
+binop_convert("usub_borrow", tuint, tuint, "", "src0 < src1")
+
+binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)")
+binop("umod", tuint, "", "src1 == 0 ? 0 : src0 % src1")
+
+#
+# Comparisons
+#
+
+
+# these integer-aware comparisons return a boolean (0 or ~0)
+
+binop_compare("flt", tfloat, "", "src0 < src1")
+binop_compare("fge", tfloat, "", "src0 >= src1")
+binop_compare("feq", tfloat, commutative, "src0 == src1")
+binop_compare("fne", tfloat, commutative, "src0 != src1")
+binop_compare("ilt", tint, "", "src0 < src1")
+binop_compare("ige", tint, "", "src0 >= src1")
+binop_compare("ieq", tint, commutative, "src0 == src1")
+binop_compare("ine", tint, commutative, "src0 != src1")
+binop_compare("ult", tuint, "", "src0 < src1")
+binop_compare("uge", tuint, "", "src0 >= src1")
+
+# integer-aware GLSL-style comparisons that compare floats and ints
+
+binop_reduce("ball_fequal", 1, tbool, tfloat, "{src0} == {src1}",
+ "{src0} && {src1}", "{src}")
+binop_reduce("bany_fnequal", 1, tbool, tfloat, "{src0} != {src1}",
+ "{src0} || {src1}", "{src}")
+binop_reduce("ball_iequal", 1, tbool, tint, "{src0} == {src1}",
+ "{src0} && {src1}", "{src}")
+binop_reduce("bany_inequal", 1, tbool, tint, "{src0} != {src1}",
+ "{src0} || {src1}", "{src}")
+
+# non-integer-aware GLSL-style comparisons that return 0.0 or 1.0
+
+binop_reduce("fall_equal", 1, tfloat, tfloat, "{src0} == {src1}",
+ "{src0} && {src1}", "{src} ? 1.0f : 0.0f")
+binop_reduce("fany_nequal", 1, tfloat, tfloat, "{src0} != {src1}",
+ "{src0} || {src1}", "{src} ? 1.0f : 0.0f")
+
+# These comparisons for integer-less hardware return 1.0 and 0.0 for true
+# and false respectively
+
+binop("slt", tfloat, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than
+binop("sge", tfloat, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equal
+binop("seq", tfloat, commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal
+binop("sne", tfloat, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal
+
+
+binop("ishl", tint, "", "src0 << src1")
+binop("ishr", tint, "", "src0 >> src1")
+binop("ushr", tuint, "", "src0 >> src1")
+
+# bitwise logic operators
+#
+# These are also used as boolean and, or, xor for hardware supporting
+# integers.
+
+
+binop("iand", tuint, commutative + associative, "src0 & src1")
+binop("ior", tuint, commutative + associative, "src0 | src1")
+binop("ixor", tuint, commutative + associative, "src0 ^ src1")
+
+
+# floating point logic operators
+#
+# These use (src != 0.0) for testing the truth of the input, and output 1.0
+# for true and 0.0 for false
+
+binop("fand", tfloat, commutative,
+ "((src0 != 0.0f) && (src1 != 0.0f)) ? 1.0f : 0.0f")
+binop("for", tfloat, commutative,
+ "((src0 != 0.0f) || (src1 != 0.0f)) ? 1.0f : 0.0f")
+binop("fxor", tfloat, commutative,
+ "(src0 != 0.0f && src1 == 0.0f) || (src0 == 0.0f && src1 != 0.0f) ? 1.0f : 0.0f")
+
+binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
+ "{src}")
+
+binop_reduce("fdot_replicated", 4, tfloat, tfloat,
+ "{src0} * {src1}", "{src0} + {src1}", "{src}")
+
+opcode("fdph", 1, tfloat, [3, 4], [tfloat, tfloat], "",
+ "src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w")
+opcode("fdph_replicated", 4, tfloat, [3, 4], [tfloat, tfloat], "",
+ "src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w")
+
+binop("fmin", tfloat, "", "fminf(src0, src1)")
+binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1")
+binop("umin", tuint, commutative + associative, "src1 > src0 ? src0 : src1")
+binop("fmax", tfloat, "", "fmaxf(src0, src1)")
+binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0")
+binop("umax", tuint, commutative + associative, "src1 > src0 ? src1 : src0")
+
+# Saturated vector add for 4 8bit ints.
+binop("usadd_4x8", tint, commutative + associative, """
+dst = 0;
+for (int i = 0; i < 32; i += 8) {
+ dst |= MIN2(((src0 >> i) & 0xff) + ((src1 >> i) & 0xff), 0xff) << i;
+}
+""")
+
+# Saturated vector subtract for 4 8bit ints.
+binop("ussub_4x8", tint, "", """
+dst = 0;
+for (int i = 0; i < 32; i += 8) {
+ int src0_chan = (src0 >> i) & 0xff;
+ int src1_chan = (src1 >> i) & 0xff;
+ if (src0_chan > src1_chan)
+ dst |= (src0_chan - src1_chan) << i;
+}
+""")
+
+# vector min for 4 8bit ints.
+binop("umin_4x8", tint, commutative + associative, """
+dst = 0;
+for (int i = 0; i < 32; i += 8) {
+ dst |= MIN2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
+}
+""")
+
+# vector max for 4 8bit ints.
+binop("umax_4x8", tint, commutative + associative, """
+dst = 0;
+for (int i = 0; i < 32; i += 8) {
+ dst |= MAX2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
+}
+""")
+
+# unorm multiply: (a * b) / 255.
+binop("umul_unorm_4x8", tint, commutative + associative, """
+dst = 0;
+for (int i = 0; i < 32; i += 8) {
+ int src0_chan = (src0 >> i) & 0xff;
+ int src1_chan = (src1 >> i) & 0xff;
+ dst |= ((src0_chan * src1_chan) / 255) << i;
+}
+""")
+
+binop("fpow", tfloat, "", "powf(src0, src1)")
+
+binop_horiz("pack_half_2x16_split", 1, tuint, 1, tfloat, 1, tfloat,
+ "pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)")
+
+# bfm implements the behavior of the first operation of the SM5 "bfi" assembly
+# and that of the "bfi1" i965 instruction. That is, it has undefined behavior
+# if either of its arguments are 32.
+binop_convert("bfm", tuint, tint, "", """
+int bits = src0, offset = src1;
+if (offset < 0 || bits < 0 || offset > 31 || bits > 31 || offset + bits > 32)
+ dst = 0; /* undefined */
+else
+ dst = ((1u << bits) - 1) << offset;
+""")
+
+opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint], "", """
+dst = ldexpf(src0, src1);
+/* flush denormals to zero. */
+if (!isnormal(dst))
+ dst = copysignf(0.0f, src0);
+""")
+
+# Combines the first component of each input to make a 2-component vector.
+
+binop_horiz("vec2", 2, tuint, 1, tuint, 1, tuint, """
+dst.x = src0.x;
+dst.y = src1.x;
+""")
+
+def triop(name, ty, const_expr):
+ opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], "", const_expr)
+def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr):
+ opcode(name, output_size, tuint,
+ [src1_size, src2_size, src3_size],
+ [tuint, tuint, tuint], "", const_expr)
+
+triop("ffma", tfloat, "src0 * src1 + src2")
+
+triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2")
+
+# Conditional Select
+#
+# A vector conditional select instruction (like ?:, but operating per-
+# component on vectors). There are two versions, one for floating point
+# bools (0.0 vs 1.0) and one for integer bools (0 vs ~0).
+
+
+triop("fcsel", tfloat, "(src0 != 0.0f) ? src1 : src2")
+opcode("bcsel", 0, tuint, [0, 0, 0],
+ [tbool, tuint, tuint], "", "src0 ? src1 : src2")
+
+# SM5 bfi assembly
+triop("bfi", tuint, """
+unsigned mask = src0, insert = src1, base = src2;
+if (mask == 0) {
+ dst = base;
+} else {
+ unsigned tmp = mask;
+ while (!(tmp & 1)) {
+ tmp >>= 1;
+ insert <<= 1;
+ }
+ dst = (base & ~mask) | (insert & mask);
+}
+""")
+
+# SM5 ubfe/ibfe assembly
+opcode("ubfe", 0, tuint,
+ [0, 0, 0], [tuint, tint, tint], "", """
+unsigned base = src0;
+int offset = src1, bits = src2;
+if (bits == 0) {
+ dst = 0;
+} else if (bits < 0 || offset < 0) {
+ dst = 0; /* undefined */
+} else if (offset + bits < 32) {
+ dst = (base << (32 - bits - offset)) >> (32 - bits);
+} else {
+ dst = base >> offset;
+}
+""")
+opcode("ibfe", 0, tint,
+ [0, 0, 0], [tint, tint, tint], "", """
+int base = src0;
+int offset = src1, bits = src2;
+if (bits == 0) {
+ dst = 0;
+} else if (bits < 0 || offset < 0) {
+ dst = 0; /* undefined */
+} else if (offset + bits < 32) {
+ dst = (base << (32 - bits - offset)) >> (32 - bits);
+} else {
+ dst = base >> offset;
+}
+""")
+
+# GLSL bitfieldExtract()
+opcode("ubitfield_extract", 0, tuint,
+ [0, 0, 0], [tuint, tint, tint], "", """
+unsigned base = src0;
+int offset = src1, bits = src2;
+if (bits == 0) {
+ dst = 0;
+} else if (bits < 0 || offset < 0 || offset + bits > 32) {
+ dst = 0; /* undefined per the spec */
+} else {
+ dst = (base >> offset) & ((1ull << bits) - 1);
+}
+""")
+opcode("ibitfield_extract", 0, tint,
+ [0, 0, 0], [tint, tint, tint], "", """
+int base = src0;
+int offset = src1, bits = src2;
+if (bits == 0) {
+ dst = 0;
+} else if (offset < 0 || bits < 0 || offset + bits > 32) {
+ dst = 0;
+} else {
+ dst = (base << (32 - offset - bits)) >> offset; /* use sign-extending shift */
+}
+""")
+
+# Combines the first component of each input to make a 3-component vector.
+
+triop_horiz("vec3", 3, 1, 1, 1, """
+dst.x = src0.x;
+dst.y = src1.x;
+dst.z = src2.x;
+""")
+
+def quadop_horiz(name, output_size, src1_size, src2_size, src3_size,
+ src4_size, const_expr):
+ opcode(name, output_size, tuint,
+ [src1_size, src2_size, src3_size, src4_size],
+ [tuint, tuint, tuint, tuint],
+ "", const_expr)
+
+opcode("bitfield_insert", 0, tuint, [0, 0, 0, 0],
+ [tuint, tuint, tint, tint], "", """
+unsigned base = src0, insert = src1;
+int offset = src2, bits = src3;
+if (bits == 0) {
+ dst = 0;
+} else if (offset < 0 || bits < 0 || bits + offset > 32) {
+ dst = 0;
+} else {
+ unsigned mask = ((1ull << bits) - 1) << offset;
+ dst = (base & ~mask) | ((insert << bits) & mask);
+}
+""")
+
+quadop_horiz("vec4", 4, 1, 1, 1, 1, """
+dst.x = src0.x;
+dst.y = src1.x;
+dst.z = src2.x;
+dst.w = src3.x;
+""")
+
+
diff --git a/src/compiler/nir/nir_opcodes_c.py b/src/compiler/nir/nir_opcodes_c.py
new file mode 100644
index 00000000000..7049c5be676
--- /dev/null
+++ b/src/compiler/nir/nir_opcodes_c.py
@@ -0,0 +1,55 @@
+#! /usr/bin/env python
+#
+# Copyright (C) 2014 Connor Abbott
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#
+# Authors:
+# Connor Abbott ([email protected])
+
+from nir_opcodes import opcodes
+from mako.template import Template
+
+template = Template("""
+#include "nir.h"
+
+const nir_op_info nir_op_infos[nir_num_opcodes] = {
+% for name, opcode in sorted(opcodes.iteritems()):
+{
+ .name = "${name}",
+ .num_inputs = ${opcode.num_inputs},
+ .output_size = ${opcode.output_size},
+ .output_type = ${"nir_type_" + opcode.output_type},
+ .input_sizes = {
+ ${ ", ".join(str(size) for size in opcode.input_sizes) }
+ },
+ .input_types = {
+ ${ ", ".join("nir_type_" + type for type in opcode.input_types) }
+ },
+ .algebraic_properties =
+ ${ "0" if opcode.algebraic_properties == "" else " | ".join(
+ "NIR_OP_IS_" + prop.upper() for prop in
+ opcode.algebraic_properties.strip().split(" ")) }
+},
+% endfor
+};
+""")
+
+print template.render(opcodes=opcodes)
diff --git a/src/compiler/nir/nir_opcodes_h.py b/src/compiler/nir/nir_opcodes_h.py
new file mode 100644
index 00000000000..be15a96d236
--- /dev/null
+++ b/src/compiler/nir/nir_opcodes_h.py
@@ -0,0 +1,47 @@
+#! /usr/bin/env python
+
+template = """\
+/* Copyright (C) 2014 Connor Abbott
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott ([email protected])
+ */
+
+#ifndef _NIR_OPCODES_
+#define _NIR_OPCODES_
+
+<% opcode_names = sorted(opcodes.iterkeys()) %>
+
+typedef enum {
+% for name in opcode_names:
+ nir_op_${name},
+% endfor
+ nir_last_opcode = nir_op_${opcode_names[-1]},
+ nir_num_opcodes = nir_last_opcode + 1
+} nir_op;
+
+#endif /* _NIR_OPCODES_ */"""
+
+from nir_opcodes import opcodes
+from mako.template import Template
+
+print Template(template).render(opcodes=opcodes)
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
new file mode 100644
index 00000000000..7745b76f7ce
--- /dev/null
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -0,0 +1,285 @@
+#! /usr/bin/env python
+#
+# Copyright (C) 2014 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#
+# Authors:
+# Jason Ekstrand ([email protected])
+
+import nir_algebraic
+
+# Convenience variables
+a = 'a'
+b = 'b'
+c = 'c'
+d = 'd'
+
+# Written in the form (<search>, <replace>) where <search> is an expression
+# and <replace> is either an expression or a value. An expression is
+# defined as a tuple of the form (<op>, <src0>, <src1>, <src2>, <src3>)
+# where each source is either an expression or a value. A value can be
+# either a numeric constant or a string representing a variable name.
+#
+# Variable names are specified as "[#]name[@type]" where "#" inicates that
+# the given variable will only match constants and the type indicates that
+# the given variable will only match values from ALU instructions with the
+# given output type.
+#
+# For constants, you have to be careful to make sure that it is the right
+# type because python is unaware of the source and destination types of the
+# opcodes.
+
+optimizations = [
+ (('fneg', ('fneg', a)), a),
+ (('ineg', ('ineg', a)), a),
+ (('fabs', ('fabs', a)), ('fabs', a)),
+ (('fabs', ('fneg', a)), ('fabs', a)),
+ (('iabs', ('iabs', a)), ('iabs', a)),
+ (('iabs', ('ineg', a)), ('iabs', a)),
+ (('fadd', a, 0.0), a),
+ (('iadd', a, 0), a),
+ (('usadd_4x8', a, 0), a),
+ (('usadd_4x8', a, ~0), ~0),
+ (('fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
+ (('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
+ (('fadd', ('fneg', a), a), 0.0),
+ (('iadd', ('ineg', a), a), 0),
+ (('iadd', ('ineg', a), ('iadd', a, b)), b),
+ (('iadd', a, ('iadd', ('ineg', a), b)), b),
+ (('fadd', ('fneg', a), ('fadd', a, b)), b),
+ (('fadd', a, ('fadd', ('fneg', a), b)), b),
+ (('fmul', a, 0.0), 0.0),
+ (('imul', a, 0), 0),
+ (('umul_unorm_4x8', a, 0), 0),
+ (('umul_unorm_4x8', a, ~0), a),
+ (('fmul', a, 1.0), a),
+ (('imul', a, 1), a),
+ (('fmul', a, -1.0), ('fneg', a)),
+ (('imul', a, -1), ('ineg', a)),
+ (('ffma', 0.0, a, b), b),
+ (('ffma', a, 0.0, b), b),
+ (('ffma', a, b, 0.0), ('fmul', a, b)),
+ (('ffma', a, 1.0, b), ('fadd', a, b)),
+ (('ffma', 1.0, a, b), ('fadd', a, b)),
+ (('flrp', a, b, 0.0), a),
+ (('flrp', a, b, 1.0), b),
+ (('flrp', a, a, b), a),
+ (('flrp', 0.0, a, b), ('fmul', a, b)),
+ (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'),
+ (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
+ (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'),
+ (('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
+ (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
+ (('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
+ # Comparison simplifications
+ (('inot', ('flt', a, b)), ('fge', a, b)),
+ (('inot', ('fge', a, b)), ('flt', a, b)),
+ (('inot', ('feq', a, b)), ('fne', a, b)),
+ (('inot', ('fne', a, b)), ('feq', a, b)),
+ (('inot', ('ilt', a, b)), ('ige', a, b)),
+ (('inot', ('ige', a, b)), ('ilt', a, b)),
+ (('inot', ('ieq', a, b)), ('ine', a, b)),
+ (('inot', ('ine', a, b)), ('ieq', a, b)),
+ (('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
+ (('bcsel', ('flt', a, b), a, b), ('fmin', a, b)),
+ (('bcsel', ('flt', a, b), b, a), ('fmax', a, b)),
+ (('bcsel', ('inot', 'a@bool'), b, c), ('bcsel', a, c, b)),
+ (('bcsel', a, ('bcsel', a, b, c), d), ('bcsel', a, b, d)),
+ (('fmin', a, a), a),
+ (('fmax', a, a), a),
+ (('imin', a, a), a),
+ (('imax', a, a), a),
+ (('umin', a, a), a),
+ (('umax', a, a), a),
+ (('fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'),
+ (('fmax', ('fmin', a, 1.0), 0.0), ('fsat', a), '!options->lower_fsat'),
+ (('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'),
+ (('fsat', ('fsat', a)), ('fsat', a)),
+ (('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)),
+ (('ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))),
+ (('ior', ('flt', a, c), ('flt', b, c)), ('flt', ('fmin', a, b), c)),
+ (('ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))),
+ (('ior', ('fge', a, c), ('fge', b, c)), ('fge', ('fmax', a, b), c)),
+ (('slt', a, b), ('b2f', ('flt', a, b)), 'options->lower_scmp'),
+ (('sge', a, b), ('b2f', ('fge', a, b)), 'options->lower_scmp'),
+ (('seq', a, b), ('b2f', ('feq', a, b)), 'options->lower_scmp'),
+ (('sne', a, b), ('b2f', ('fne', a, b)), 'options->lower_scmp'),
+ (('fne', ('fneg', a), a), ('fne', a, 0.0)),
+ (('feq', ('fneg', a), a), ('feq', a, 0.0)),
+ # Emulating booleans
+ (('imul', ('b2i', a), ('b2i', b)), ('b2i', ('iand', a, b))),
+ (('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))),
+ (('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))),
+ (('iand', 'a@bool', 1.0), ('b2f', a)),
+ (('flt', ('fneg', ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
+ (('flt', ('fsub', 0.0, ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
+ # Comparison with the same args. Note that these are not done for
+ # the float versions because NaN always returns false on float
+ # inequalities.
+ (('ilt', a, a), False),
+ (('ige', a, a), True),
+ (('ieq', a, a), True),
+ (('ine', a, a), False),
+ (('ult', a, a), False),
+ (('uge', a, a), True),
+ # Logical and bit operations
+ (('fand', a, 0.0), 0.0),
+ (('iand', a, a), a),
+ (('iand', a, ~0), a),
+ (('iand', a, 0), 0),
+ (('ior', a, a), a),
+ (('ior', a, 0), a),
+ (('fxor', a, a), 0.0),
+ (('ixor', a, a), 0),
+ (('inot', ('inot', a)), a),
+ # DeMorgan's Laws
+ (('iand', ('inot', a), ('inot', b)), ('inot', ('ior', a, b))),
+ (('ior', ('inot', a), ('inot', b)), ('inot', ('iand', a, b))),
+ # Shift optimizations
+ (('ishl', 0, a), 0),
+ (('ishl', a, 0), a),
+ (('ishr', 0, a), 0),
+ (('ishr', a, 0), a),
+ (('ushr', 0, a), 0),
+ (('ushr', a, 0), a),
+ # Exponential/logarithmic identities
+ (('fexp2', ('flog2', a)), a), # 2^lg2(a) = a
+ (('flog2', ('fexp2', a)), a), # lg2(2^a) = a
+ (('fpow', a, b), ('fexp2', ('fmul', ('flog2', a), b)), 'options->lower_fpow'), # a^b = 2^(lg2(a)*b)
+ (('fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b
+ (('fpow', a, 1.0), a),
+ (('fpow', a, 2.0), ('fmul', a, a)),
+ (('fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))),
+ (('fpow', 2.0, a), ('fexp2', a)),
+ (('fpow', ('fpow', a, 2.2), 0.454545), a),
+ (('fpow', ('fabs', ('fpow', a, 2.2)), 0.454545), ('fabs', a)),
+ (('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))),
+ (('frcp', ('fexp2', a)), ('fexp2', ('fneg', a))),
+ (('frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))),
+ (('flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))),
+ (('flog2', ('frcp', a)), ('fneg', ('flog2', a))),
+ (('flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))),
+ (('flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))),
+ (('fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))),
+ (('fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))),
+ (('fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))),
+ # Division and reciprocal
+ (('fdiv', 1.0, a), ('frcp', a)),
+ (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
+ (('frcp', ('frcp', a)), a),
+ (('frcp', ('fsqrt', a)), ('frsq', a)),
+ (('fsqrt', a), ('frcp', ('frsq', a)), 'options->lower_fsqrt'),
+ (('frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'),
+ # Boolean simplifications
+ (('ieq', 'a@bool', True), a),
+ (('ine', 'a@bool', True), ('inot', a)),
+ (('ine', 'a@bool', False), a),
+ (('ieq', 'a@bool', False), ('inot', 'a')),
+ (('bcsel', a, True, False), ('ine', a, 0)),
+ (('bcsel', a, False, True), ('ieq', a, 0)),
+ (('bcsel', True, b, c), b),
+ (('bcsel', False, b, c), c),
+ # The result of this should be hit by constant propagation and, in the
+ # next round of opt_algebraic, get picked up by one of the above two.
+ (('bcsel', '#a', b, c), ('bcsel', ('ine', 'a', 0), b, c)),
+
+ (('bcsel', a, b, b), b),
+ (('fcsel', a, b, b), b),
+
+ # Conversions
+ (('i2b', ('b2i', a)), a),
+ (('f2i', ('ftrunc', a)), ('f2i', a)),
+ (('f2u', ('ftrunc', a)), ('f2u', a)),
+
+ # Subtracts
+ (('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)),
+ (('isub', a, ('isub', 0, b)), ('iadd', a, b)),
+ (('ussub_4x8', a, 0), a),
+ (('ussub_4x8', a, ~0), 0),
+ (('fsub', a, b), ('fadd', a, ('fneg', b)), 'options->lower_sub'),
+ (('isub', a, b), ('iadd', a, ('ineg', b)), 'options->lower_sub'),
+ (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'),
+ (('ineg', a), ('isub', 0, a), 'options->lower_negate'),
+ (('fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)),
+ (('iadd', a, ('isub', 0, b)), ('isub', a, b)),
+ (('fabs', ('fsub', 0.0, a)), ('fabs', a)),
+ (('iabs', ('isub', 0, a)), ('iabs', a)),
+
+ # Misc. lowering
+ (('fmod', a, b), ('fsub', a, ('fmul', b, ('ffloor', ('fdiv', a, b)))), 'options->lower_fmod'),
+ (('uadd_carry', a, b), ('b2i', ('ult', ('iadd', a, b), a)), 'options->lower_uadd_carry'),
+ (('usub_borrow', a, b), ('b2i', ('ult', a, b)), 'options->lower_usub_borrow'),
+
+ (('bitfield_insert', 'base', 'insert', 'offset', 'bits'),
+ ('bcsel', ('ilt', 31, 'bits'), 'insert',
+ ('bfi', ('bfm', 'bits', 'offset'), 'insert', 'base')),
+ 'options->lower_bitfield_insert'),
+
+ (('ibitfield_extract', 'value', 'offset', 'bits'),
+ ('bcsel', ('ilt', 31, 'bits'), 'value',
+ ('ibfe', 'value', 'offset', 'bits')),
+ 'options->lower_bitfield_extract'),
+
+ (('ubitfield_extract', 'value', 'offset', 'bits'),
+ ('bcsel', ('ult', 31, 'bits'), 'value',
+ ('ubfe', 'value', 'offset', 'bits')),
+ 'options->lower_bitfield_extract'),
+]
+
+# Add optimizations to handle the case where the result of a ternary is
+# compared to a constant. This way we can take things like
+#
+# (a ? 0 : 1) > 0
+#
+# and turn it into
+#
+# a ? (0 > 0) : (1 > 0)
+#
+# which constant folding will eat for lunch. The resulting ternary will
+# further get cleaned up by the boolean reductions above and we will be
+# left with just the original variable "a".
+for op in ['flt', 'fge', 'feq', 'fne',
+ 'ilt', 'ige', 'ieq', 'ine', 'ult', 'uge']:
+ optimizations += [
+ ((op, ('bcsel', 'a', '#b', '#c'), '#d'),
+ ('bcsel', 'a', (op, 'b', 'd'), (op, 'c', 'd'))),
+ ((op, '#d', ('bcsel', a, '#b', '#c')),
+ ('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))),
+ ]
+
+# This section contains "late" optimizations that should be run after the
+# regular optimizations have finished. Optimizations should go here if
+# they help code generation but do not necessarily produce code that is
+# more easily optimizable.
+late_optimizations = [
+ (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))),
+ (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
+ (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
+ (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
+ (('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'),
+ (('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'),
+ (('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'),
+ (('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates'),
+]
+
+print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()
+print nir_algebraic.AlgebraicPass("nir_opt_algebraic_late",
+ late_optimizations).render()
diff --git a/src/compiler/nir/nir_opt_constant_folding.c b/src/compiler/nir/nir_opt_constant_folding.c
new file mode 100644
index 00000000000..28a73f86f95
--- /dev/null
+++ b/src/compiler/nir/nir_opt_constant_folding.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand ([email protected])
+ *
+ */
+
+#include "nir_constant_expressions.h"
+#include <math.h>
+
+/*
+ * Implements SSA-based constant folding.
+ */
+
+struct constant_fold_state {
+ void *mem_ctx;
+ nir_function_impl *impl;
+ bool progress;
+};
+
+static bool
+constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
+{
+ nir_const_value src[4];
+
+ if (!instr->dest.dest.is_ssa)
+ return false;
+
+ for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+ if (!instr->src[i].src.is_ssa)
+ return false;
+
+ nir_instr *src_instr = instr->src[i].src.ssa->parent_instr;
+
+ if (src_instr->type != nir_instr_type_load_const)
+ return false;
+ nir_load_const_instr* load_const = nir_instr_as_load_const(src_instr);
+
+ for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(instr, i);
+ j++) {
+ src[i].u[j] = load_const->value.u[instr->src[i].swizzle[j]];
+ }
+
+ /* We shouldn't have any source modifiers in the optimization loop. */
+ assert(!instr->src[i].abs && !instr->src[i].negate);
+ }
+
+ /* We shouldn't have any saturate modifiers in the optimization loop. */
+ assert(!instr->dest.saturate);
+
+ nir_const_value dest =
+ nir_eval_const_opcode(instr->op, instr->dest.dest.ssa.num_components,
+ src);
+
+ nir_load_const_instr *new_instr =
+ nir_load_const_instr_create(mem_ctx,
+ instr->dest.dest.ssa.num_components);
+
+ new_instr->value = dest;
+
+ nir_instr_insert_before(&instr->instr, &new_instr->instr);
+
+ nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa,
+ nir_src_for_ssa(&new_instr->def));
+
+ nir_instr_remove(&instr->instr);
+ ralloc_free(instr);
+
+ return true;
+}
+
+static bool
+constant_fold_deref(nir_instr *instr, nir_deref_var *deref)
+{
+ bool progress = false;
+
+ for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) {
+ if (tail->deref_type != nir_deref_type_array)
+ continue;
+
+ nir_deref_array *arr = nir_deref_as_array(tail);
+
+ if (arr->deref_array_type == nir_deref_array_type_indirect &&
+ arr->indirect.is_ssa &&
+ arr->indirect.ssa->parent_instr->type == nir_instr_type_load_const) {
+ nir_load_const_instr *indirect =
+ nir_instr_as_load_const(arr->indirect.ssa->parent_instr);
+
+ arr->base_offset += indirect->value.u[0];
+
+ /* Clear out the source */
+ nir_instr_rewrite_src(instr, &arr->indirect, nir_src_for_ssa(NULL));
+
+ arr->deref_array_type = nir_deref_array_type_direct;
+
+ progress = true;
+ }
+ }
+
+ return progress;
+}
+
+static bool
+constant_fold_intrinsic_instr(nir_intrinsic_instr *instr)
+{
+ bool progress = false;
+
+ unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
+ for (unsigned i = 0; i < num_vars; i++) {
+ progress |= constant_fold_deref(&instr->instr, instr->variables[i]);
+ }
+
+ return progress;
+}
+
+static bool
+constant_fold_tex_instr(nir_tex_instr *instr)
+{
+ if (instr->sampler)
+ return constant_fold_deref(&instr->instr, instr->sampler);
+ else
+ return false;
+}
+
+static bool
+constant_fold_block(nir_block *block, void *void_state)
+{
+ struct constant_fold_state *state = void_state;
+
+ nir_foreach_instr_safe(block, instr) {
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ state->progress |= constant_fold_alu_instr(nir_instr_as_alu(instr),
+ state->mem_ctx);
+ break;
+ case nir_instr_type_intrinsic:
+ state->progress |=
+ constant_fold_intrinsic_instr(nir_instr_as_intrinsic(instr));
+ break;
+ case nir_instr_type_tex:
+ state->progress |= constant_fold_tex_instr(nir_instr_as_tex(instr));
+ break;
+ default:
+ /* Don't know how to constant fold */
+ break;
+ }
+ }
+
+ return true;
+}
+
+static bool
+nir_opt_constant_folding_impl(nir_function_impl *impl)
+{
+ struct constant_fold_state state;
+
+ state.mem_ctx = ralloc_parent(impl);
+ state.impl = impl;
+ state.progress = false;
+
+ nir_foreach_block(impl, constant_fold_block, &state);
+
+ if (state.progress)
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ return state.progress;
+}
+
+bool
+nir_opt_constant_folding(nir_shader *shader)
+{
+ bool progress = false;
+
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ progress |= nir_opt_constant_folding_impl(function->impl);
+ }
+
+ return progress;
+}
diff --git a/src/compiler/nir/nir_opt_copy_propagate.c b/src/compiler/nir/nir_opt_copy_propagate.c
new file mode 100644
index 00000000000..d99f78ddb36
--- /dev/null
+++ b/src/compiler/nir/nir_opt_copy_propagate.c
@@ -0,0 +1,290 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott ([email protected])
+ *
+ */
+
+#include "nir.h"
+#include <main/imports.h>
+
+/**
+ * SSA-based copy propagation
+ */
+
+static bool is_move(nir_alu_instr *instr)
+{
+ if (instr->op != nir_op_fmov &&
+ instr->op != nir_op_imov)
+ return false;
+
+ if (instr->dest.saturate)
+ return false;
+
+ /* we handle modifiers in a separate pass */
+
+ if (instr->src[0].abs || instr->src[0].negate)
+ return false;
+
+ if (!instr->src[0].src.is_ssa)
+ return false;
+
+ return true;
+
+}
+
+static bool is_vec(nir_alu_instr *instr)
+{
+ for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+ if (!instr->src[i].src.is_ssa)
+ return false;
+
+ /* we handle modifiers in a separate pass */
+ if (instr->src[i].abs || instr->src[i].negate)
+ return false;
+ }
+
+ return instr->op == nir_op_vec2 ||
+ instr->op == nir_op_vec3 ||
+ instr->op == nir_op_vec4;
+}
+
+static bool
+is_swizzleless_move(nir_alu_instr *instr)
+{
+ if (is_move(instr)) {
+ for (unsigned i = 0; i < 4; i++) {
+ if (!((instr->dest.write_mask >> i) & 1))
+ break;
+ if (instr->src[0].swizzle[i] != i)
+ return false;
+ }
+ return true;
+ } else if (is_vec(instr)) {
+ nir_ssa_def *def = NULL;
+ for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+ if (instr->src[i].swizzle[0] != i)
+ return false;
+
+ if (def == NULL) {
+ def = instr->src[i].src.ssa;
+ } else if (instr->src[i].src.ssa != def) {
+ return false;
+ }
+ }
+ return true;
+ } else {
+ return false;
+ }
+}
+
+static bool
+copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
+{
+ if (!src->is_ssa) {
+ if (src->reg.indirect)
+ return copy_prop_src(src, parent_instr, parent_if);
+ return false;
+ }
+
+ nir_instr *src_instr = src->ssa->parent_instr;
+ if (src_instr->type != nir_instr_type_alu)
+ return false;
+
+ nir_alu_instr *alu_instr = nir_instr_as_alu(src_instr);
+ if (!is_swizzleless_move(alu_instr))
+ return false;
+
+ /* Don't let copy propagation land us with a phi that has more
+ * components in its source than it has in its destination. That badly
+ * messes up out-of-ssa.
+ */
+ if (parent_instr && parent_instr->type == nir_instr_type_phi) {
+ nir_phi_instr *phi = nir_instr_as_phi(parent_instr);
+ assert(phi->dest.is_ssa);
+ if (phi->dest.ssa.num_components !=
+ alu_instr->src[0].src.ssa->num_components)
+ return false;
+ }
+
+ if (parent_instr) {
+ nir_instr_rewrite_src(parent_instr, src,
+ nir_src_for_ssa(alu_instr->src[0].src.ssa));
+ } else {
+ assert(src == &parent_if->condition);
+ nir_if_rewrite_condition(parent_if,
+ nir_src_for_ssa(alu_instr->src[0].src.ssa));
+ }
+
+ return true;
+}
+
+static bool
+copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index)
+{
+ nir_alu_src *src = &parent_alu_instr->src[index];
+ if (!src->src.is_ssa) {
+ if (src->src.reg.indirect)
+ return copy_prop_src(src->src.reg.indirect, &parent_alu_instr->instr,
+ NULL);
+ return false;
+ }
+
+ nir_instr *src_instr = src->src.ssa->parent_instr;
+ if (src_instr->type != nir_instr_type_alu)
+ return false;
+
+ nir_alu_instr *alu_instr = nir_instr_as_alu(src_instr);
+ if (!is_move(alu_instr) && !is_vec(alu_instr))
+ return false;
+
+ nir_ssa_def *def;
+ unsigned new_swizzle[4] = {0, 0, 0, 0};
+
+ if (alu_instr->op == nir_op_fmov ||
+ alu_instr->op == nir_op_imov) {
+ for (unsigned i = 0; i < 4; i++)
+ new_swizzle[i] = alu_instr->src[0].swizzle[src->swizzle[i]];
+ def = alu_instr->src[0].src.ssa;
+ } else {
+ def = NULL;
+
+ for (unsigned i = 0; i < 4; i++) {
+ if (!nir_alu_instr_channel_used(parent_alu_instr, index, i))
+ continue;
+
+ nir_ssa_def *new_def = alu_instr->src[src->swizzle[i]].src.ssa;
+ if (def == NULL)
+ def = new_def;
+ else {
+ if (def != new_def)
+ return false;
+ }
+ new_swizzle[i] = alu_instr->src[src->swizzle[i]].swizzle[0];
+ }
+ }
+
+ for (unsigned i = 0; i < 4; i++)
+ src->swizzle[i] = new_swizzle[i];
+
+ nir_instr_rewrite_src(&parent_alu_instr->instr, &src->src,
+ nir_src_for_ssa(def));
+
+ return true;
+}
+
+typedef struct {
+ nir_instr *parent_instr;
+ bool progress;
+} copy_prop_state;
+
+static bool
+copy_prop_src_cb(nir_src *src, void *_state)
+{
+ copy_prop_state *state = (copy_prop_state *) _state;
+ while (copy_prop_src(src, state->parent_instr, NULL))
+ state->progress = true;
+
+ return true;
+}
+
+static bool
+copy_prop_instr(nir_instr *instr)
+{
+ if (instr->type == nir_instr_type_alu) {
+ nir_alu_instr *alu_instr = nir_instr_as_alu(instr);
+ bool progress = false;
+
+ for (unsigned i = 0; i < nir_op_infos[alu_instr->op].num_inputs; i++)
+ while (copy_prop_alu_src(alu_instr, i))
+ progress = true;
+
+ if (!alu_instr->dest.dest.is_ssa && alu_instr->dest.dest.reg.indirect)
+ while (copy_prop_src(alu_instr->dest.dest.reg.indirect, instr, NULL))
+ progress = true;
+
+ return progress;
+ }
+
+ copy_prop_state state;
+ state.parent_instr = instr;
+ state.progress = false;
+ nir_foreach_src(instr, copy_prop_src_cb, &state);
+
+ return state.progress;
+}
+
+static bool
+copy_prop_if(nir_if *if_stmt)
+{
+ return copy_prop_src(&if_stmt->condition, NULL, if_stmt);
+}
+
+static bool
+copy_prop_block(nir_block *block, void *_state)
+{
+ bool *progress = (bool *) _state;
+
+ nir_foreach_instr(block, instr) {
+ if (copy_prop_instr(instr))
+ *progress = true;
+ }
+
+ if (block->cf_node.node.next != NULL && /* check that we aren't the end node */
+ !nir_cf_node_is_last(&block->cf_node) &&
+ nir_cf_node_next(&block->cf_node)->type == nir_cf_node_if) {
+ nir_if *if_stmt = nir_cf_node_as_if(nir_cf_node_next(&block->cf_node));
+ if (copy_prop_if(if_stmt))
+ *progress = true;
+ }
+
+ return true;
+}
+
+static bool
+nir_copy_prop_impl(nir_function_impl *impl)
+{
+ bool progress = false;
+
+ nir_foreach_block(impl, copy_prop_block, &progress);
+
+ if (progress) {
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+ }
+
+ return progress;
+}
+
+bool
+nir_copy_prop(nir_shader *shader)
+{
+ bool progress = false;
+
+ nir_foreach_function(shader, function) {
+ if (function->impl && nir_copy_prop_impl(function->impl))
+ progress = true;
+ }
+
+ return progress;
+}
diff --git a/src/compiler/nir/nir_opt_cse.c b/src/compiler/nir/nir_opt_cse.c
new file mode 100644
index 00000000000..364fb023dce
--- /dev/null
+++ b/src/compiler/nir/nir_opt_cse.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand ([email protected])
+ * Connor Abbott ([email protected])
+ *
+ */
+
+#include "nir_instr_set.h"
+
+/*
+ * Implements common subexpression elimination
+ */
+
+/*
+ * Visits and CSE's the given block and all its descendants in the dominance
+ * tree recursively. Note that the instr_set is guaranteed to only ever
+ * contain instructions that dominate the current block.
+ */
+
+static bool
+cse_block(nir_block *block, struct set *instr_set)
+{
+ bool progress = false;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (nir_instr_set_add_or_rewrite(instr_set, instr)) {
+ progress = true;
+ nir_instr_remove(instr);
+ }
+ }
+
+ for (unsigned i = 0; i < block->num_dom_children; i++) {
+ nir_block *child = block->dom_children[i];
+ progress |= cse_block(child, instr_set);
+ }
+
+ nir_foreach_instr(block, instr)
+ nir_instr_set_remove(instr_set, instr);
+
+ return progress;
+}
+
+static bool
+nir_opt_cse_impl(nir_function_impl *impl)
+{
+ struct set *instr_set = nir_instr_set_create(NULL);
+
+ nir_metadata_require(impl, nir_metadata_dominance);
+
+ bool progress = cse_block(nir_start_block(impl), instr_set);
+
+ if (progress)
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ nir_instr_set_destroy(instr_set);
+ return progress;
+}
+
+bool
+nir_opt_cse(nir_shader *shader)
+{
+ bool progress = false;
+
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ progress |= nir_opt_cse_impl(function->impl);
+ }
+
+ return progress;
+}
+
diff --git a/src/compiler/nir/nir_opt_dce.c b/src/compiler/nir/nir_opt_dce.c
new file mode 100644
index 00000000000..32436c18b60
--- /dev/null
+++ b/src/compiler/nir/nir_opt_dce.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott ([email protected])
+ *
+ */
+
+#include "nir.h"
+
+/* SSA-based mark-and-sweep dead code elimination */
+
+typedef struct {
+ struct exec_node node;
+ nir_instr *instr;
+} worklist_elem;
+
+static void
+worklist_push(struct exec_list *worklist, nir_instr *instr)
+{
+ worklist_elem *elem = ralloc(worklist, worklist_elem);
+ elem->instr = instr;
+ instr->pass_flags = 1;
+ exec_list_push_tail(worklist, &elem->node);
+}
+
+static nir_instr *
+worklist_pop(struct exec_list *worklist)
+{
+ struct exec_node *node = exec_list_pop_head(worklist);
+ worklist_elem *elem = exec_node_data(worklist_elem, node, node);
+ return elem->instr;
+}
+
+static bool
+mark_live_cb(nir_src *src, void *_state)
+{
+ struct exec_list *worklist = (struct exec_list *) _state;
+
+ if (src->is_ssa && !src->ssa->parent_instr->pass_flags) {
+ worklist_push(worklist, src->ssa->parent_instr);
+ }
+
+ return true;
+}
+
+static void
+init_instr(nir_instr *instr, struct exec_list *worklist)
+{
+ nir_alu_instr *alu_instr;
+ nir_intrinsic_instr *intrin_instr;
+ nir_tex_instr *tex_instr;
+
+ /* We use the pass_flags to store the live/dead information. In DCE, we
+ * just treat it as a zero/non-zerl boolean for whether or not the
+ * instruction is live.
+ */
+ instr->pass_flags = 0;
+
+ switch (instr->type) {
+ case nir_instr_type_call:
+ case nir_instr_type_jump:
+ worklist_push(worklist, instr);
+ break;
+
+ case nir_instr_type_alu:
+ alu_instr = nir_instr_as_alu(instr);
+ if (!alu_instr->dest.dest.is_ssa)
+ worklist_push(worklist, instr);
+ break;
+
+ case nir_instr_type_intrinsic:
+ intrin_instr = nir_instr_as_intrinsic(instr);
+ if (nir_intrinsic_infos[intrin_instr->intrinsic].flags &
+ NIR_INTRINSIC_CAN_ELIMINATE) {
+ if (nir_intrinsic_infos[intrin_instr->intrinsic].has_dest &&
+ !intrin_instr->dest.is_ssa) {
+ worklist_push(worklist, instr);
+ }
+ } else {
+ worklist_push(worklist, instr);
+ }
+ break;
+
+ case nir_instr_type_tex:
+ tex_instr = nir_instr_as_tex(instr);
+ if (!tex_instr->dest.is_ssa)
+ worklist_push(worklist, instr);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static bool
+init_block_cb(nir_block *block, void *_state)
+{
+ struct exec_list *worklist = (struct exec_list *) _state;
+
+ nir_foreach_instr(block, instr)
+ init_instr(instr, worklist);
+
+ nir_if *following_if = nir_block_get_following_if(block);
+ if (following_if) {
+ if (following_if->condition.is_ssa &&
+ !following_if->condition.ssa->parent_instr->pass_flags)
+ worklist_push(worklist, following_if->condition.ssa->parent_instr);
+ }
+
+ return true;
+}
+
+static bool
+delete_block_cb(nir_block *block, void *_state)
+{
+ bool *progress = (bool *) _state;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (!instr->pass_flags) {
+ nir_instr_remove(instr);
+ *progress = true;
+ }
+ }
+
+ return true;
+}
+
+static bool
+nir_opt_dce_impl(nir_function_impl *impl)
+{
+ struct exec_list *worklist = ralloc(NULL, struct exec_list);
+ exec_list_make_empty(worklist);
+
+ nir_foreach_block(impl, init_block_cb, worklist);
+
+ while (!exec_list_is_empty(worklist)) {
+ nir_instr *instr = worklist_pop(worklist);
+ nir_foreach_src(instr, mark_live_cb, worklist);
+ }
+
+ ralloc_free(worklist);
+
+ bool progress = false;
+ nir_foreach_block(impl, delete_block_cb, &progress);
+
+ if (progress)
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ return progress;
+}
+
+bool
+nir_opt_dce(nir_shader *shader)
+{
+ bool progress = false;
+ nir_foreach_function(shader, function) {
+ if (function->impl && nir_opt_dce_impl(function->impl))
+ progress = true;
+ }
+
+ return progress;
+}
diff --git a/src/compiler/nir/nir_opt_dead_cf.c b/src/compiler/nir/nir_opt_dead_cf.c
new file mode 100644
index 00000000000..4cc6798702b
--- /dev/null
+++ b/src/compiler/nir/nir_opt_dead_cf.c
@@ -0,0 +1,358 @@
+/*
+ * Copyright © 2014 Connor Abbott
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott ([email protected])
+ *
+ */
+
+#include "nir.h"
+#include "nir_control_flow.h"
+
+/*
+ * This file implements an optimization that deletes statically
+ * unreachable/dead code. In NIR, one way this can happen if if an if
+ * statement has a constant condition:
+ *
+ * if (true) {
+ * ...
+ * }
+ *
+ * We delete the if statement and paste the contents of the always-executed
+ * branch into the surrounding control flow, possibly removing more code if
+ * the branch had a jump at the end.
+ *
+ * Another way is that control flow can end in a jump so that code after it
+ * never gets executed. In particular, this can happen after optimizing
+ * something like:
+ *
+ * if (true) {
+ * ...
+ * break;
+ * }
+ * ...
+ *
+ * We also consider the case where both branches of an if end in a jump, e.g.:
+ *
+ * if (...) {
+ * break;
+ * } else {
+ * continue;
+ * }
+ * ...
+ *
+ * Finally, we also handle removing useless loops, i.e. loops with no side
+ * effects and without any definitions that are used elsewhere. This case is a
+ * little different from the first two in that the code is actually run (it
+ * just never does anything), but there are similar issues with needing to
+ * be careful with restarting after deleting the cf_node (see dead_cf_list())
+ * so this is a convenient place to remove them.
+ */
+
+static void
+remove_after_cf_node(nir_cf_node *node)
+{
+ nir_cf_node *end = node;
+ while (!nir_cf_node_is_last(end))
+ end = nir_cf_node_next(end);
+
+ nir_cf_list list;
+ nir_cf_extract(&list, nir_after_cf_node(node), nir_after_cf_node(end));
+ nir_cf_delete(&list);
+}
+
+static void
+opt_constant_if(nir_if *if_stmt, bool condition)
+{
+ /* First, we need to remove any phi nodes after the if by rewriting uses to
+ * point to the correct source.
+ */
+ nir_block *after = nir_cf_node_as_block(nir_cf_node_next(&if_stmt->cf_node));
+ nir_block *last_block =
+ nir_cf_node_as_block(condition ? nir_if_last_then_node(if_stmt)
+ : nir_if_last_else_node(if_stmt));
+
+ nir_foreach_instr_safe(after, instr) {
+ if (instr->type != nir_instr_type_phi)
+ break;
+
+ nir_phi_instr *phi = nir_instr_as_phi(instr);
+ nir_ssa_def *def = NULL;
+ nir_foreach_phi_src(phi, phi_src) {
+ if (phi_src->pred != last_block)
+ continue;
+
+ assert(phi_src->src.is_ssa);
+ def = phi_src->src.ssa;
+ }
+
+ assert(def);
+ assert(phi->dest.is_ssa);
+ nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_src_for_ssa(def));
+ nir_instr_remove(instr);
+ }
+
+ /* The control flow list we're about to paste in may include a jump at the
+ * end, and in that case we have to delete the rest of the control flow
+ * list after the if since it's unreachable and the validator will balk if
+ * we don't.
+ */
+
+ if (!exec_list_is_empty(&last_block->instr_list)) {
+ nir_instr *last_instr = nir_block_last_instr(last_block);
+ if (last_instr->type == nir_instr_type_jump)
+ remove_after_cf_node(&if_stmt->cf_node);
+ }
+
+ /* Finally, actually paste in the then or else branch and delete the if. */
+ struct exec_list *cf_list = condition ? &if_stmt->then_list
+ : &if_stmt->else_list;
+
+ nir_cf_list list;
+ nir_cf_extract(&list, nir_before_cf_list(cf_list),
+ nir_after_cf_list(cf_list));
+ nir_cf_reinsert(&list, nir_after_cf_node(&if_stmt->cf_node));
+ nir_cf_node_remove(&if_stmt->cf_node);
+}
+
+static bool
+block_has_no_side_effects(nir_block *block, void *state)
+{
+ (void) state;
+
+ nir_foreach_instr(block, instr) {
+ if (instr->type == nir_instr_type_call)
+ return false;
+
+ /* Return instructions can cause us to skip over other side-effecting
+ * instructions after the loop, so consider them to have side effects
+ * here.
+ */
+
+ if (instr->type == nir_instr_type_jump &&
+ nir_instr_as_jump(instr)->type == nir_jump_return)
+ return false;
+
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ if (!nir_intrinsic_infos[intrin->intrinsic].flags &
+ NIR_INTRINSIC_CAN_ELIMINATE)
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+def_not_live_out(nir_ssa_def *def, void *state)
+{
+ nir_block *after = state;
+
+ return !BITSET_TEST(after->live_in, def->live_index);
+}
+
+/*
+ * Test if a loop is dead. A loop is dead if:
+ *
+ * 1) It has no side effects (i.e. intrinsics which could possibly affect the
+ * state of the program aside from producing an SSA value, indicated by a lack
+ * of NIR_INTRINSIC_CAN_ELIMINATE).
+ *
+ * 2) It has no phi nodes after it, since those indicate values inside the
+ * loop being used after the loop.
+ *
+ * 3) If there are no phi nodes after the loop, then the only way a value
+ * defined inside the loop can be used outside the loop is if its definition
+ * dominates the block after the loop. If none of the definitions that
+ * dominate the loop exit are used outside the loop, then the loop is dead
+ * and it can be deleted.
+ */
+
+static bool
+loop_is_dead(nir_loop *loop)
+{
+ nir_block *before = nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
+ nir_block *after = nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
+
+ if (!exec_list_is_empty(&after->instr_list) &&
+ nir_block_first_instr(after)->type == nir_instr_type_phi)
+ return false;
+
+ if (!nir_foreach_block_in_cf_node(&loop->cf_node, block_has_no_side_effects,
+ NULL))
+ return false;
+
+ nir_function_impl *impl = nir_cf_node_get_function(&loop->cf_node);
+ nir_metadata_require(impl, nir_metadata_live_ssa_defs |
+ nir_metadata_dominance);
+
+ for (nir_block *cur = after->imm_dom; cur != before; cur = cur->imm_dom) {
+ nir_foreach_instr(cur, instr) {
+ if (!nir_foreach_ssa_def(instr, def_not_live_out, after))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool
+dead_cf_block(nir_block *block)
+{
+ nir_if *following_if = nir_block_get_following_if(block);
+ if (following_if) {
+ nir_const_value *const_value =
+ nir_src_as_const_value(following_if->condition);
+
+ if (!const_value)
+ return false;
+
+ opt_constant_if(following_if, const_value->u[0] != 0);
+ return true;
+ }
+
+ nir_loop *following_loop = nir_block_get_following_loop(block);
+ if (!following_loop)
+ return false;
+
+ if (!loop_is_dead(following_loop))
+ return false;
+
+ nir_cf_node_remove(&following_loop->cf_node);
+ return true;
+}
+
+static bool
+ends_in_jump(nir_block *block)
+{
+ if (exec_list_is_empty(&block->instr_list))
+ return false;
+
+ nir_instr *instr = nir_block_last_instr(block);
+ return instr->type == nir_instr_type_jump;
+}
+
+static bool
+dead_cf_list(struct exec_list *list, bool *list_ends_in_jump)
+{
+ bool progress = false;
+ *list_ends_in_jump = false;
+
+ nir_cf_node *prev = NULL;
+
+ foreach_list_typed(nir_cf_node, cur, node, list) {
+ switch (cur->type) {
+ case nir_cf_node_block: {
+ nir_block *block = nir_cf_node_as_block(cur);
+ if (dead_cf_block(block)) {
+ /* We just deleted the if or loop after this block, so we may have
+ * deleted the block before or after it -- which one is an
+ * implementation detail. Therefore, to recover the place we were
+ * at, we have to use the previous cf_node.
+ */
+
+ if (prev) {
+ cur = nir_cf_node_next(prev);
+ } else {
+ cur = exec_node_data(nir_cf_node, exec_list_get_head(list),
+ node);
+ }
+
+ block = nir_cf_node_as_block(cur);
+
+ progress = true;
+ }
+
+ if (ends_in_jump(block)) {
+ *list_ends_in_jump = true;
+
+ if (!exec_node_is_tail_sentinel(cur->node.next)) {
+ remove_after_cf_node(cur);
+ return true;
+ }
+ }
+
+ break;
+ }
+
+ case nir_cf_node_if: {
+ nir_if *if_stmt = nir_cf_node_as_if(cur);
+ bool then_ends_in_jump, else_ends_in_jump;
+ progress |= dead_cf_list(&if_stmt->then_list, &then_ends_in_jump);
+ progress |= dead_cf_list(&if_stmt->else_list, &else_ends_in_jump);
+
+ if (then_ends_in_jump && else_ends_in_jump) {
+ *list_ends_in_jump = true;
+ nir_block *next = nir_cf_node_as_block(nir_cf_node_next(cur));
+ if (!exec_list_is_empty(&next->instr_list) ||
+ !exec_node_is_tail_sentinel(next->cf_node.node.next)) {
+ remove_after_cf_node(cur);
+ return true;
+ }
+ }
+
+ break;
+ }
+
+ case nir_cf_node_loop: {
+ nir_loop *loop = nir_cf_node_as_loop(cur);
+ bool dummy;
+ progress |= dead_cf_list(&loop->body, &dummy);
+
+ break;
+ }
+
+ default:
+ unreachable("unknown cf node type");
+ }
+
+ prev = cur;
+ }
+
+ return progress;
+}
+
+static bool
+opt_dead_cf_impl(nir_function_impl *impl)
+{
+ bool dummy;
+ bool progress = dead_cf_list(&impl->body, &dummy);
+
+ if (progress)
+ nir_metadata_preserve(impl, nir_metadata_none);
+
+ return progress;
+}
+
+bool
+nir_opt_dead_cf(nir_shader *shader)
+{
+ bool progress = false;
+
+ nir_foreach_function(shader, function)
+ if (function->impl)
+ progress |= opt_dead_cf_impl(function->impl);
+
+ return progress;
+}
diff --git a/src/compiler/nir/nir_opt_gcm.c b/src/compiler/nir/nir_opt_gcm.c
new file mode 100644
index 00000000000..a8779ce5b84
--- /dev/null
+++ b/src/compiler/nir/nir_opt_gcm.c
@@ -0,0 +1,494 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand ([email protected])
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements Global Code Motion. A description of GCM can be found in
+ * "Global Code Motion; Global Value Numbering" by Cliff Click.
+ * Unfortunately, the algorithm presented in the paper is broken in a
+ * number of ways. The algorithm used here differs substantially from the
+ * one in the paper but it is, in my opinion, much easier to read and
+ * verify correcness.
+ */
+
+struct gcm_block_info {
+ /* Number of loops this block is inside */
+ unsigned loop_depth;
+
+ /* The last instruction inserted into this block. This is used as we
+ * traverse the instructions and insert them back into the program to
+ * put them in the right order.
+ */
+ nir_instr *last_instr;
+};
+
+/* Flags used in the instr->pass_flags field for various instruction states */
+enum {
+ GCM_INSTR_PINNED = (1 << 0),
+ GCM_INSTR_SCHEDULED_EARLY = (1 << 1),
+ GCM_INSTR_SCHEDULED_LATE = (1 << 2),
+ GCM_INSTR_PLACED = (1 << 3),
+};
+
+struct gcm_state {
+ nir_function_impl *impl;
+ nir_instr *instr;
+
+ /* The list of non-pinned instructions. As we do the late scheduling,
+ * we pull non-pinned instructions out of their blocks and place them in
+ * this list. This saves us from having linked-list problems when we go
+ * to put instructions back in their blocks.
+ */
+ struct exec_list instrs;
+
+ struct gcm_block_info *blocks;
+};
+
+/* Recursively walks the CFG and builds the block_info structure */
+static void
+gcm_build_block_info(struct exec_list *cf_list, struct gcm_state *state,
+ unsigned loop_depth)
+{
+ foreach_list_typed(nir_cf_node, node, node, cf_list) {
+ switch (node->type) {
+ case nir_cf_node_block: {
+ nir_block *block = nir_cf_node_as_block(node);
+ state->blocks[block->index].loop_depth = loop_depth;
+ break;
+ }
+ case nir_cf_node_if: {
+ nir_if *if_stmt = nir_cf_node_as_if(node);
+ gcm_build_block_info(&if_stmt->then_list, state, loop_depth);
+ gcm_build_block_info(&if_stmt->else_list, state, loop_depth);
+ break;
+ }
+ case nir_cf_node_loop: {
+ nir_loop *loop = nir_cf_node_as_loop(node);
+ gcm_build_block_info(&loop->body, state, loop_depth + 1);
+ break;
+ }
+ default:
+ unreachable("Invalid CF node type");
+ }
+ }
+}
+
+/* Walks the instruction list and marks immovable instructions as pinned
+ *
+ * This function also serves to initialize the instr->pass_flags field.
+ * After this is completed, all instructions' pass_flags fields will be set
+ * to either GCM_INSTR_PINNED or 0.
+ */
+static bool
+gcm_pin_instructions_block(nir_block *block, void *void_state)
+{
+ struct gcm_state *state = void_state;
+
+ nir_foreach_instr_safe(block, instr) {
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ switch (nir_instr_as_alu(instr)->op) {
+ case nir_op_fddx:
+ case nir_op_fddy:
+ case nir_op_fddx_fine:
+ case nir_op_fddy_fine:
+ case nir_op_fddx_coarse:
+ case nir_op_fddy_coarse:
+ /* These can only go in uniform control flow; pin them for now */
+ instr->pass_flags = GCM_INSTR_PINNED;
+ break;
+
+ default:
+ instr->pass_flags = 0;
+ break;
+ }
+ break;
+
+ case nir_instr_type_tex:
+ switch (nir_instr_as_tex(instr)->op) {
+ case nir_texop_tex:
+ case nir_texop_txb:
+ case nir_texop_lod:
+ /* These two take implicit derivatives so they need to be pinned */
+ instr->pass_flags = GCM_INSTR_PINNED;
+ break;
+
+ default:
+ instr->pass_flags = 0;
+ break;
+ }
+ break;
+
+ case nir_instr_type_load_const:
+ instr->pass_flags = 0;
+ break;
+
+ case nir_instr_type_intrinsic: {
+ const nir_intrinsic_info *info =
+ &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic];
+
+ if ((info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
+ (info->flags & NIR_INTRINSIC_CAN_REORDER)) {
+ instr->pass_flags = 0;
+ } else {
+ instr->pass_flags = GCM_INSTR_PINNED;
+ }
+ break;
+ }
+
+ case nir_instr_type_jump:
+ case nir_instr_type_ssa_undef:
+ case nir_instr_type_phi:
+ instr->pass_flags = GCM_INSTR_PINNED;
+ break;
+
+ default:
+ unreachable("Invalid instruction type in GCM");
+ }
+
+ if (!(instr->pass_flags & GCM_INSTR_PINNED)) {
+ /* If this is an unpinned instruction, go ahead and pull it out of
+ * the program and put it on the instrs list. This has a couple
+ * of benifits. First, it makes the scheduling algorithm more
+ * efficient because we can avoid walking over basic blocks and
+ * pinned instructions. Second, it keeps us from causing linked
+ * list confusion when we're trying to put everything in its
+ * proper place at the end of the pass.
+ *
+ * Note that we don't use nir_instr_remove here because that also
+ * cleans up uses and defs and we want to keep that information.
+ */
+ exec_node_remove(&instr->node);
+ exec_list_push_tail(&state->instrs, &instr->node);
+ }
+ }
+
+ return true;
+}
+
+static void
+gcm_schedule_early_instr(nir_instr *instr, struct gcm_state *state);
+
+/** Update an instructions schedule for the given source
+ *
+ * This function is called iteratively as we walk the sources of an
+ * instruction. It ensures that the given source instruction has been
+ * scheduled and then update this instruction's block if the source
+ * instruction is lower down the tree.
+ */
+static bool
+gcm_schedule_early_src(nir_src *src, void *void_state)
+{
+ struct gcm_state *state = void_state;
+ nir_instr *instr = state->instr;
+
+ assert(src->is_ssa);
+
+ gcm_schedule_early_instr(src->ssa->parent_instr, void_state);
+
+ /* While the index isn't a proper dominance depth, it does have the
+ * property that if A dominates B then A->index <= B->index. Since we
+ * know that this instruction must have been dominated by all of its
+ * sources at some point (even if it's gone through value-numbering),
+ * all of the sources must lie on the same branch of the dominance tree.
+ * Therefore, we can just go ahead and just compare indices.
+ */
+ if (instr->block->index < src->ssa->parent_instr->block->index)
+ instr->block = src->ssa->parent_instr->block;
+
+ /* We need to restore the state instruction because it may have been
+ * changed through the gcm_schedule_early_instr call above. Since we
+ * may still be iterating through sources and future calls to
+ * gcm_schedule_early_src for the same instruction will still need it.
+ */
+ state->instr = instr;
+
+ return true;
+}
+
+/** Schedules an instruction early
+ *
+ * This function performs a recursive depth-first search starting at the
+ * given instruction and proceeding through the sources to schedule
+ * instructions as early as they can possibly go in the dominance tree.
+ * The instructions are "scheduled" by updating their instr->block field.
+ */
+static void
+gcm_schedule_early_instr(nir_instr *instr, struct gcm_state *state)
+{
+ if (instr->pass_flags & GCM_INSTR_SCHEDULED_EARLY)
+ return;
+
+ instr->pass_flags |= GCM_INSTR_SCHEDULED_EARLY;
+
+ /* Pinned instructions are already scheduled so we don't need to do
+ * anything. Also, bailing here keeps us from ever following the
+ * sources of phi nodes which can be back-edges.
+ */
+ if (instr->pass_flags & GCM_INSTR_PINNED)
+ return;
+
+ /* Start with the instruction at the top. As we iterate over the
+ * sources, it will get moved down as needed.
+ */
+ instr->block = nir_start_block(state->impl);
+ state->instr = instr;
+
+ nir_foreach_src(instr, gcm_schedule_early_src, state);
+}
+
+static void
+gcm_schedule_late_instr(nir_instr *instr, struct gcm_state *state);
+
+/** Schedules the instruction associated with the given SSA def late
+ *
+ * This function works by first walking all of the uses of the given SSA
+ * definition, ensuring that they are scheduled, and then computing the LCA
+ * (least common ancestor) of its uses. It then schedules this instruction
+ * as close to the LCA as possible while trying to stay out of loops.
+ */
+static bool
+gcm_schedule_late_def(nir_ssa_def *def, void *void_state)
+{
+ struct gcm_state *state = void_state;
+
+ nir_block *lca = NULL;
+
+ nir_foreach_use(def, use_src) {
+ nir_instr *use_instr = use_src->parent_instr;
+
+ gcm_schedule_late_instr(use_instr, state);
+
+ /* Phi instructions are a bit special. SSA definitions don't have to
+ * dominate the sources of the phi nodes that use them; instead, they
+ * have to dominate the predecessor block corresponding to the phi
+ * source. We handle this by looking through the sources, finding
+ * any that are usingg this SSA def, and using those blocks instead
+ * of the one the phi lives in.
+ */
+ if (use_instr->type == nir_instr_type_phi) {
+ nir_phi_instr *phi = nir_instr_as_phi(use_instr);
+
+ nir_foreach_phi_src(phi, phi_src) {
+ if (phi_src->src.ssa == def)
+ lca = nir_dominance_lca(lca, phi_src->pred);
+ }
+ } else {
+ lca = nir_dominance_lca(lca, use_instr->block);
+ }
+ }
+
+ nir_foreach_if_use(def, use_src) {
+ nir_if *if_stmt = use_src->parent_if;
+
+ /* For if statements, we consider the block to be the one immediately
+ * preceding the if CF node.
+ */
+ nir_block *pred_block =
+ nir_cf_node_as_block(nir_cf_node_prev(&if_stmt->cf_node));
+
+ lca = nir_dominance_lca(lca, pred_block);
+ }
+
+ /* Some instructions may never be used. We'll just leave them scheduled
+ * early and let dead code clean them up.
+ */
+ if (lca == NULL)
+ return true;
+
+ /* We know have the LCA of all of the uses. If our invariants hold,
+ * this is dominated by the block that we chose when scheduling early.
+ * We now walk up the dominance tree and pick the lowest block that is
+ * as far outside loops as we can get.
+ */
+ nir_block *best = lca;
+ while (lca != def->parent_instr->block) {
+ assert(lca);
+ if (state->blocks[lca->index].loop_depth <
+ state->blocks[best->index].loop_depth)
+ best = lca;
+ lca = lca->imm_dom;
+ }
+ def->parent_instr->block = best;
+
+ return true;
+}
+
+/** Schedules an instruction late
+ *
+ * This function performs a depth-first search starting at the given
+ * instruction and proceeding through its uses to schedule instructions as
+ * late as they can reasonably go in the dominance tree. The instructions
+ * are "scheduled" by updating their instr->block field.
+ *
+ * The name of this function is actually a bit of a misnomer as it doesn't
+ * schedule them "as late as possible" as the paper implies. Instead, it
+ * first finds the lates possible place it can schedule the instruction and
+ * then possibly schedules it earlier than that. The actual location is as
+ * far down the tree as we can go while trying to stay out of loops.
+ */
+static void
+gcm_schedule_late_instr(nir_instr *instr, struct gcm_state *state)
+{
+ if (instr->pass_flags & GCM_INSTR_SCHEDULED_LATE)
+ return;
+
+ instr->pass_flags |= GCM_INSTR_SCHEDULED_LATE;
+
+ /* Pinned instructions are already scheduled so we don't need to do
+ * anything. Also, bailing here keeps us from ever following phi nodes
+ * which can be back-edges.
+ */
+ if (instr->pass_flags & GCM_INSTR_PINNED)
+ return;
+
+ nir_foreach_ssa_def(instr, gcm_schedule_late_def, state);
+}
+
+static void
+gcm_place_instr(nir_instr *instr, struct gcm_state *state);
+
+static bool
+gcm_place_instr_def(nir_ssa_def *def, void *state)
+{
+ nir_foreach_use(def, use_src)
+ gcm_place_instr(use_src->parent_instr, state);
+
+ return false;
+}
+
+/** Places an instrution back into the program
+ *
+ * The earlier passes of GCM simply choose blocks for each instruction and
+ * otherwise leave them alone. This pass actually places the instructions
+ * into their chosen blocks.
+ *
+ * To do so, we use a standard post-order depth-first search linearization
+ * algorithm. We walk over the uses of the given instruction and ensure
+ * that they are placed and then place this instruction. Because we are
+ * working on multiple blocks at a time, we keep track of the last inserted
+ * instruction per-block in the state structure's block_info array. When
+ * we insert an instruction in a block we insert it before the last
+ * instruction inserted in that block rather than the last instruction
+ * inserted globally.
+ */
+static void
+gcm_place_instr(nir_instr *instr, struct gcm_state *state)
+{
+ if (instr->pass_flags & GCM_INSTR_PLACED)
+ return;
+
+ instr->pass_flags |= GCM_INSTR_PLACED;
+
+ /* Phi nodes are our once source of back-edges. Since right now we are
+ * only doing scheduling within blocks, we don't need to worry about
+ * them since they are always at the top. Just skip them completely.
+ */
+ if (instr->type == nir_instr_type_phi) {
+ assert(instr->pass_flags & GCM_INSTR_PINNED);
+ return;
+ }
+
+ nir_foreach_ssa_def(instr, gcm_place_instr_def, state);
+
+ if (instr->pass_flags & GCM_INSTR_PINNED) {
+ /* Pinned instructions have an implicit dependence on the pinned
+ * instructions that come after them in the block. Since the pinned
+ * instructions will naturally "chain" together, we only need to
+ * explicitly visit one of them.
+ */
+ for (nir_instr *after = nir_instr_next(instr);
+ after;
+ after = nir_instr_next(after)) {
+ if (after->pass_flags & GCM_INSTR_PINNED) {
+ gcm_place_instr(after, state);
+ break;
+ }
+ }
+ }
+
+ struct gcm_block_info *block_info = &state->blocks[instr->block->index];
+ if (!(instr->pass_flags & GCM_INSTR_PINNED)) {
+ exec_node_remove(&instr->node);
+
+ if (block_info->last_instr) {
+ exec_node_insert_node_before(&block_info->last_instr->node,
+ &instr->node);
+ } else {
+ /* Schedule it at the end of the block */
+ nir_instr *jump_instr = nir_block_last_instr(instr->block);
+ if (jump_instr && jump_instr->type == nir_instr_type_jump) {
+ exec_node_insert_node_before(&jump_instr->node, &instr->node);
+ } else {
+ exec_list_push_tail(&instr->block->instr_list, &instr->node);
+ }
+ }
+ }
+
+ block_info->last_instr = instr;
+}
+
+static void
+opt_gcm_impl(nir_function_impl *impl)
+{
+ struct gcm_state state;
+
+ state.impl = impl;
+ state.instr = NULL;
+ exec_list_make_empty(&state.instrs);
+ state.blocks = rzalloc_array(NULL, struct gcm_block_info, impl->num_blocks);
+
+ nir_metadata_require(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ gcm_build_block_info(&impl->body, &state, 0);
+ nir_foreach_block(impl, gcm_pin_instructions_block, &state);
+
+ foreach_list_typed(nir_instr, instr, node, &state.instrs)
+ gcm_schedule_early_instr(instr, &state);
+
+ foreach_list_typed(nir_instr, instr, node, &state.instrs)
+ gcm_schedule_late_instr(instr, &state);
+
+ while (!exec_list_is_empty(&state.instrs)) {
+ nir_instr *instr = exec_node_data(nir_instr,
+ state.instrs.tail_pred, node);
+ gcm_place_instr(instr, &state);
+ }
+
+ ralloc_free(state.blocks);
+}
+
+void
+nir_opt_gcm(nir_shader *shader)
+{
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ opt_gcm_impl(function->impl);
+ }
+}
diff --git a/src/compiler/nir/nir_opt_global_to_local.c b/src/compiler/nir/nir_opt_global_to_local.c
new file mode 100644
index 00000000000..bccb45b6237
--- /dev/null
+++ b/src/compiler/nir/nir_opt_global_to_local.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott ([email protected])
+ *
+ */
+
+#include "nir.h"
+
+static bool
+global_to_local(nir_register *reg)
+{
+ nir_function_impl *impl = NULL;
+
+ assert(reg->is_global);
+
+ nir_foreach_def(reg, def_dest) {
+ nir_instr *instr = def_dest->reg.parent_instr;
+ nir_function_impl *instr_impl =
+ nir_cf_node_get_function(&instr->block->cf_node);
+ if (impl != NULL) {
+ if (impl != instr_impl)
+ return false;
+ } else {
+ impl = instr_impl;
+ }
+ }
+
+ nir_foreach_use(reg, use_src) {
+ nir_instr *instr = use_src->parent_instr;
+ nir_function_impl *instr_impl =
+ nir_cf_node_get_function(&instr->block->cf_node);
+ if (impl != NULL) {
+ if (impl != instr_impl)
+ return false;
+ } else {
+ impl = instr_impl;
+ }
+ }
+
+ nir_foreach_if_use(reg, use_src) {
+ nir_if *if_stmt = use_src->parent_if;
+ nir_function_impl *if_impl = nir_cf_node_get_function(&if_stmt->cf_node);
+ if (impl != NULL) {
+ if (impl != if_impl)
+ return false;
+ } else {
+ impl = if_impl;
+ }
+ }
+
+ if (impl == NULL) {
+ /* this instruction is never used/defined, delete it */
+ nir_reg_remove(reg);
+ return true;
+ }
+
+ /*
+ * if we've gotten to this point, the register is always used/defined in
+ * the same implementation so we can move it to be local to that
+ * implementation.
+ */
+
+ exec_node_remove(&reg->node);
+ exec_list_push_tail(&impl->registers, &reg->node);
+ reg->index = impl->reg_alloc++;
+ reg->is_global = false;
+ return true;
+}
+
+bool
+nir_opt_global_to_local(nir_shader *shader)
+{
+ bool progress = false;
+
+ foreach_list_typed_safe(nir_register, reg, node, &shader->registers) {
+ if (global_to_local(reg))
+ progress = true;
+ }
+
+ return progress;
+}
diff --git a/src/compiler/nir/nir_opt_peephole_select.c b/src/compiler/nir/nir_opt_peephole_select.c
new file mode 100644
index 00000000000..0fc658df861
--- /dev/null
+++ b/src/compiler/nir/nir_opt_peephole_select.c
@@ -0,0 +1,256 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand ([email protected])
+ *
+ */
+
+#include "nir.h"
+#include "nir_control_flow.h"
+
+/*
+ * Implements a small peephole optimization that looks for
+ *
+ * if (cond) {
+ * <empty>
+ * } else {
+ * <empty>
+ * }
+ * phi
+ * ...
+ * phi
+ *
+ * and replaces it with a series of selects. It can also handle the case
+ * where, instead of being empty, the if may contain some move operations
+ * whose only use is one of the following phi nodes. This happens all the
+ * time when the SSA form comes from a conditional assignment with a
+ * swizzle.
+ */
+
+struct peephole_select_state {
+ void *mem_ctx;
+ bool progress;
+};
+
+static bool
+block_check_for_allowed_instrs(nir_block *block)
+{
+ nir_foreach_instr(block, instr) {
+ switch (instr->type) {
+ case nir_instr_type_intrinsic: {
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_load_var:
+ switch (intrin->variables[0]->var->data.mode) {
+ case nir_var_shader_in:
+ case nir_var_uniform:
+ break;
+
+ default:
+ return false;
+ }
+ break;
+
+ default:
+ return false;
+ }
+
+ break;
+ }
+
+ case nir_instr_type_load_const:
+ break;
+
+ case nir_instr_type_alu: {
+ nir_alu_instr *mov = nir_instr_as_alu(instr);
+ switch (mov->op) {
+ case nir_op_fmov:
+ case nir_op_imov:
+ case nir_op_fneg:
+ case nir_op_ineg:
+ case nir_op_fabs:
+ case nir_op_iabs:
+ case nir_op_vec2:
+ case nir_op_vec3:
+ case nir_op_vec4:
+ /* It must be a move-like operation. */
+ break;
+ default:
+ return false;
+ }
+
+ /* Can't handle saturate */
+ if (mov->dest.saturate)
+ return false;
+
+ /* It must be SSA */
+ if (!mov->dest.dest.is_ssa)
+ return false;
+
+ /* It cannot have any if-uses */
+ if (!list_empty(&mov->dest.dest.ssa.if_uses))
+ return false;
+
+ /* The only uses of this definition must be phi's in the successor */
+ nir_foreach_use(&mov->dest.dest.ssa, use) {
+ if (use->parent_instr->type != nir_instr_type_phi ||
+ use->parent_instr->block != block->successors[0])
+ return false;
+ }
+ break;
+ }
+
+ default:
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool
+nir_opt_peephole_select_block(nir_block *block, void *void_state)
+{
+ struct peephole_select_state *state = void_state;
+
+ /* If the block is empty, then it certainly doesn't have any phi nodes,
+ * so we can skip it. This also ensures that we do an early skip on the
+ * end block of the function which isn't actually attached to the CFG.
+ */
+ if (exec_list_is_empty(&block->instr_list))
+ return true;
+
+ if (nir_cf_node_is_first(&block->cf_node))
+ return true;
+
+ nir_cf_node *prev_node = nir_cf_node_prev(&block->cf_node);
+ if (prev_node->type != nir_cf_node_if)
+ return true;
+
+ nir_if *if_stmt = nir_cf_node_as_if(prev_node);
+ nir_cf_node *then_node = nir_if_first_then_node(if_stmt);
+ nir_cf_node *else_node = nir_if_first_else_node(if_stmt);
+
+ /* We can only have one block in each side ... */
+ if (nir_if_last_then_node(if_stmt) != then_node ||
+ nir_if_last_else_node(if_stmt) != else_node)
+ return true;
+
+ nir_block *then_block = nir_cf_node_as_block(then_node);
+ nir_block *else_block = nir_cf_node_as_block(else_node);
+
+ /* ... and those blocks must only contain "allowed" instructions. */
+ if (!block_check_for_allowed_instrs(then_block) ||
+ !block_check_for_allowed_instrs(else_block))
+ return true;
+
+ /* At this point, we know that the previous CFG node is an if-then
+ * statement containing only moves to phi nodes in this block. We can
+ * just remove that entire CF node and replace all of the phi nodes with
+ * selects.
+ */
+
+ nir_block *prev_block = nir_cf_node_as_block(nir_cf_node_prev(prev_node));
+ assert(prev_block->cf_node.type == nir_cf_node_block);
+
+ /* First, we move the remaining instructions from the blocks to the
+ * block before. We have already guaranteed that this is safe by
+ * calling block_check_for_allowed_instrs()
+ */
+ nir_foreach_instr_safe(then_block, instr) {
+ exec_node_remove(&instr->node);
+ instr->block = prev_block;
+ exec_list_push_tail(&prev_block->instr_list, &instr->node);
+ }
+
+ nir_foreach_instr_safe(else_block, instr) {
+ exec_node_remove(&instr->node);
+ instr->block = prev_block;
+ exec_list_push_tail(&prev_block->instr_list, &instr->node);
+ }
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_phi)
+ break;
+
+ nir_phi_instr *phi = nir_instr_as_phi(instr);
+ nir_alu_instr *sel = nir_alu_instr_create(state->mem_ctx, nir_op_bcsel);
+ nir_src_copy(&sel->src[0].src, &if_stmt->condition, sel);
+ /* Splat the condition to all channels */
+ memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle);
+
+ assert(exec_list_length(&phi->srcs) == 2);
+ nir_foreach_phi_src(phi, src) {
+ assert(src->pred == then_block || src->pred == else_block);
+ assert(src->src.is_ssa);
+
+ unsigned idx = src->pred == then_block ? 1 : 2;
+ nir_src_copy(&sel->src[idx].src, &src->src, sel);
+ }
+
+ nir_ssa_dest_init(&sel->instr, &sel->dest.dest,
+ phi->dest.ssa.num_components, phi->dest.ssa.name);
+ sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
+
+ nir_ssa_def_rewrite_uses(&phi->dest.ssa,
+ nir_src_for_ssa(&sel->dest.dest.ssa));
+
+ nir_instr_insert_before(&phi->instr, &sel->instr);
+ nir_instr_remove(&phi->instr);
+ }
+
+ nir_cf_node_remove(&if_stmt->cf_node);
+ state->progress = true;
+
+ return true;
+}
+
+static bool
+nir_opt_peephole_select_impl(nir_function_impl *impl)
+{
+ struct peephole_select_state state;
+
+ state.mem_ctx = ralloc_parent(impl);
+ state.progress = false;
+
+ nir_foreach_block(impl, nir_opt_peephole_select_block, &state);
+
+ if (state.progress)
+ nir_metadata_preserve(impl, nir_metadata_none);
+
+ return state.progress;
+}
+
+bool
+nir_opt_peephole_select(nir_shader *shader)
+{
+ bool progress = false;
+
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ progress |= nir_opt_peephole_select_impl(function->impl);
+ }
+
+ return progress;
+}
diff --git a/src/compiler/nir/nir_opt_remove_phis.c b/src/compiler/nir/nir_opt_remove_phis.c
new file mode 100644
index 00000000000..646183707bd
--- /dev/null
+++ b/src/compiler/nir/nir_opt_remove_phis.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright © 2015 Connor Abbott
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott ([email protected])
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * This is a pass for removing phi nodes that look like:
+ * a = phi(b, b, b, ...)
+ *
+ * Note that we can't ignore undef sources here, or else we may create a
+ * situation where the definition of b isn't dominated by its uses. We're
+ * allowed to do this since the definition of b must dominate all of the
+ * phi node's predecessors, which means it must dominate the phi node as well
+ * as all of the phi node's uses. In essence, the phi node acts as a copy
+ * instruction. b can't be another phi node in the same block, since the only
+ * time when phi nodes can source other phi nodes defined in the same block is
+ * at the loop header, and in that case one of the sources of the phi has to
+ * be from before the loop and that source can't be b.
+ */
+
+static bool
+remove_phis_block(nir_block *block, void *state)
+{
+ bool *progress = state;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_phi)
+ break;
+
+ nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+ nir_ssa_def *def = NULL;
+ bool srcs_same = true;
+
+ nir_foreach_phi_src(phi, src) {
+ assert(src->src.is_ssa);
+
+ /* For phi nodes at the beginning of loops, we may encounter some
+ * sources from backedges that point back to the destination of the
+ * same phi, i.e. something like:
+ *
+ * a = phi(a, b, ...)
+ *
+ * We can safely ignore these sources, since if all of the normal
+ * sources point to the same definition, then that definition must
+ * still dominate the phi node, and the phi will still always take
+ * the value of that definition.
+ */
+ if (src->src.ssa == &phi->dest.ssa)
+ continue;
+
+ if (def == NULL) {
+ def = src->src.ssa;
+ } else {
+ if (src->src.ssa != def) {
+ srcs_same = false;
+ break;
+ }
+ }
+ }
+
+ if (!srcs_same)
+ continue;
+
+ /* We must have found at least one definition, since there must be at
+ * least one forward edge.
+ */
+ assert(def != NULL);
+
+ assert(phi->dest.is_ssa);
+ nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_src_for_ssa(def));
+ nir_instr_remove(instr);
+
+ *progress = true;
+ }
+
+ return true;
+}
+
+static bool
+remove_phis_impl(nir_function_impl *impl)
+{
+ bool progress = false;
+
+ nir_foreach_block(impl, remove_phis_block, &progress);
+
+ if (progress) {
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+ }
+
+ return progress;
+}
+
+bool
+nir_opt_remove_phis(nir_shader *shader)
+{
+ bool progress = false;
+
+ nir_foreach_function(shader, function)
+ if (function->impl)
+ progress = remove_phis_impl(function->impl) || progress;
+
+ return progress;
+}
+
diff --git a/src/compiler/nir/nir_opt_undef.c b/src/compiler/nir/nir_opt_undef.c
new file mode 100644
index 00000000000..374564d34c5
--- /dev/null
+++ b/src/compiler/nir/nir_opt_undef.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright © 2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+
+/** @file nir_opt_undef.c
+ *
+ * Handles optimization of operations involving ssa_undef. For now, we just
+ * make sure that csels between undef and some other value just give the other
+ * value (on the assumption that the condition's going to be choosing the
+ * defined value). This reduces work after if flattening when each side of
+ * the if is defining a variable.
+ *
+ * Some day, we may find some use for making other operations consuming an
+ * undef arg output undef, but I don't know of any cases currently.
+ */
+
+static bool
+opt_undef_alu(nir_alu_instr *instr)
+{
+ if (instr->op != nir_op_bcsel && instr->op != nir_op_fcsel)
+ return false;
+
+ assert(instr->dest.dest.is_ssa);
+
+ for (int i = 1; i <= 2; i++) {
+ if (!instr->src[i].src.is_ssa)
+ continue;
+
+ nir_instr *parent = instr->src[i].src.ssa->parent_instr;
+ if (parent->type != nir_instr_type_ssa_undef)
+ continue;
+
+ /* We can't just use nir_alu_src_copy, because we need the def/use
+ * updated.
+ */
+ nir_instr_rewrite_src(&instr->instr, &instr->src[0].src,
+ instr->src[i == 1 ? 2 : 1].src);
+ nir_alu_src_copy(&instr->src[0], &instr->src[i == 1 ? 2 : 1],
+ ralloc_parent(instr));
+
+ nir_src empty_src;
+ memset(&empty_src, 0, sizeof(empty_src));
+ nir_instr_rewrite_src(&instr->instr, &instr->src[1].src, empty_src);
+ nir_instr_rewrite_src(&instr->instr, &instr->src[2].src, empty_src);
+ instr->op = nir_op_imov;
+
+ return true;
+ }
+
+ return false;
+}
+
+static bool
+opt_undef_block(nir_block *block, void *data)
+{
+ bool *progress = data;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type == nir_instr_type_alu)
+ if (opt_undef_alu(nir_instr_as_alu(instr)))
+ (*progress) = true;
+ }
+
+ return true;
+}
+
+bool
+nir_opt_undef(nir_shader *shader)
+{
+ bool progress = false;
+
+ nir_foreach_function(shader, function) {
+ if (function->impl) {
+ nir_foreach_block(function->impl, opt_undef_block, &progress);
+ if (progress)
+ nir_metadata_preserve(function->impl,
+ nir_metadata_block_index |
+ nir_metadata_dominance);
+ }
+ }
+
+ return progress;
+}
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
new file mode 100644
index 00000000000..48ecb48a620
--- /dev/null
+++ b/src/compiler/nir/nir_print.c
@@ -0,0 +1,1069 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott ([email protected])
+ *
+ */
+
+#include "nir.h"
+#include "compiler/shader_enums.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+static void
+print_tabs(unsigned num_tabs, FILE *fp)
+{
+ for (unsigned i = 0; i < num_tabs; i++)
+ fprintf(fp, "\t");
+}
+
+typedef struct {
+ FILE *fp;
+ nir_shader *shader;
+ /** map from nir_variable -> printable name */
+ struct hash_table *ht;
+
+ /** set of names used so far for nir_variables */
+ struct set *syms;
+
+ /* an index used to make new non-conflicting names */
+ unsigned index;
+} print_state;
+
+static void
+print_register(nir_register *reg, print_state *state)
+{
+ FILE *fp = state->fp;
+ if (reg->name != NULL)
+ fprintf(fp, "/* %s */ ", reg->name);
+ if (reg->is_global)
+ fprintf(fp, "gr%u", reg->index);
+ else
+ fprintf(fp, "r%u", reg->index);
+}
+
+static const char *sizes[] = { "error", "vec1", "vec2", "vec3", "vec4" };
+
+static void
+print_register_decl(nir_register *reg, print_state *state)
+{
+ FILE *fp = state->fp;
+ fprintf(fp, "decl_reg %s ", sizes[reg->num_components]);
+ if (reg->is_packed)
+ fprintf(fp, "(packed) ");
+ print_register(reg, state);
+ if (reg->num_array_elems != 0)
+ fprintf(fp, "[%u]", reg->num_array_elems);
+ fprintf(fp, "\n");
+}
+
+static void
+print_ssa_def(nir_ssa_def *def, print_state *state)
+{
+ FILE *fp = state->fp;
+ if (def->name != NULL)
+ fprintf(fp, "/* %s */ ", def->name);
+ fprintf(fp, "%s ssa_%u", sizes[def->num_components], def->index);
+}
+
+static void
+print_ssa_use(nir_ssa_def *def, print_state *state)
+{
+ FILE *fp = state->fp;
+ if (def->name != NULL)
+ fprintf(fp, "/* %s */ ", def->name);
+ fprintf(fp, "ssa_%u", def->index);
+}
+
+static void print_src(nir_src *src, print_state *state);
+
+static void
+print_reg_src(nir_reg_src *src, print_state *state)
+{
+ FILE *fp = state->fp;
+ print_register(src->reg, state);
+ if (src->reg->num_array_elems != 0) {
+ fprintf(fp, "[%u", src->base_offset);
+ if (src->indirect != NULL) {
+ fprintf(fp, " + ");
+ print_src(src->indirect, state);
+ }
+ fprintf(fp, "]");
+ }
+}
+
+static void
+print_reg_dest(nir_reg_dest *dest, print_state *state)
+{
+ FILE *fp = state->fp;
+ print_register(dest->reg, state);
+ if (dest->reg->num_array_elems != 0) {
+ fprintf(fp, "[%u", dest->base_offset);
+ if (dest->indirect != NULL) {
+ fprintf(fp, " + ");
+ print_src(dest->indirect, state);
+ }
+ fprintf(fp, "]");
+ }
+}
+
+static void
+print_src(nir_src *src, print_state *state)
+{
+ if (src->is_ssa)
+ print_ssa_use(src->ssa, state);
+ else
+ print_reg_src(&src->reg, state);
+}
+
+static void
+print_dest(nir_dest *dest, print_state *state)
+{
+ if (dest->is_ssa)
+ print_ssa_def(&dest->ssa, state);
+ else
+ print_reg_dest(&dest->reg, state);
+}
+
+static void
+print_alu_src(nir_alu_instr *instr, unsigned src, print_state *state)
+{
+ FILE *fp = state->fp;
+
+ if (instr->src[src].negate)
+ fprintf(fp, "-");
+ if (instr->src[src].abs)
+ fprintf(fp, "abs(");
+
+ print_src(&instr->src[src].src, state);
+
+ bool print_swizzle = false;
+ for (unsigned i = 0; i < 4; i++) {
+ if (!nir_alu_instr_channel_used(instr, src, i))
+ continue;
+
+ if (instr->src[src].swizzle[i] != i) {
+ print_swizzle = true;
+ break;
+ }
+ }
+
+ if (print_swizzle) {
+ fprintf(fp, ".");
+ for (unsigned i = 0; i < 4; i++) {
+ if (!nir_alu_instr_channel_used(instr, src, i))
+ continue;
+
+ fprintf(fp, "%c", "xyzw"[instr->src[src].swizzle[i]]);
+ }
+ }
+
+ if (instr->src[src].abs)
+ fprintf(fp, ")");
+}
+
+static void
+print_alu_dest(nir_alu_dest *dest, print_state *state)
+{
+ FILE *fp = state->fp;
+ /* we're going to print the saturate modifier later, after the opcode */
+
+ print_dest(&dest->dest, state);
+
+ if (!dest->dest.is_ssa &&
+ dest->write_mask != (1 << dest->dest.reg.reg->num_components) - 1) {
+ fprintf(fp, ".");
+ for (unsigned i = 0; i < 4; i++)
+ if ((dest->write_mask >> i) & 1)
+ fprintf(fp, "%c", "xyzw"[i]);
+ }
+}
+
+static void
+print_alu_instr(nir_alu_instr *instr, print_state *state)
+{
+ FILE *fp = state->fp;
+
+ print_alu_dest(&instr->dest, state);
+
+ fprintf(fp, " = %s", nir_op_infos[instr->op].name);
+ if (instr->dest.saturate)
+ fprintf(fp, ".sat");
+ fprintf(fp, " ");
+
+ for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+ if (i != 0)
+ fprintf(fp, ", ");
+
+ print_alu_src(instr, i, state);
+ }
+}
+
+static void
+print_constant(nir_constant *c, const struct glsl_type *type, print_state *state)
+{
+ FILE *fp = state->fp;
+ unsigned total_elems = glsl_get_components(type);
+ unsigned i;
+
+ switch (glsl_get_base_type(type)) {
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_BOOL:
+ for (i = 0; i < total_elems; i++) {
+ if (i > 0) fprintf(fp, ", ");
+ fprintf(fp, "0x%08x", c->value.u[i]);
+ }
+ break;
+
+ case GLSL_TYPE_FLOAT:
+ for (i = 0; i < total_elems; i++) {
+ if (i > 0) fprintf(fp, ", ");
+ fprintf(fp, "%f", c->value.f[i]);
+ }
+ break;
+
+ case GLSL_TYPE_STRUCT:
+ for (i = 0; i < c->num_elements; i++) {
+ if (i > 0) fprintf(fp, ", ");
+ fprintf(fp, "{ ");
+ print_constant(c->elements[i], glsl_get_struct_field(type, i), state);
+ fprintf(fp, " }");
+ }
+ break;
+
+ case GLSL_TYPE_ARRAY:
+ for (i = 0; i < c->num_elements; i++) {
+ if (i > 0) fprintf(fp, ", ");
+ fprintf(fp, "{ ");
+ print_constant(c->elements[i], glsl_get_array_element(type), state);
+ fprintf(fp, " }");
+ }
+ break;
+
+ default:
+ unreachable("not reached");
+ }
+}
+
+static void
+print_var_decl(nir_variable *var, print_state *state)
+{
+ FILE *fp = state->fp;
+
+ fprintf(fp, "decl_var ");
+
+ const char *const cent = (var->data.centroid) ? "centroid " : "";
+ const char *const samp = (var->data.sample) ? "sample " : "";
+ const char *const patch = (var->data.patch) ? "patch " : "";
+ const char *const inv = (var->data.invariant) ? "invariant " : "";
+ const char *const mode[] = { "shader_in ", "shader_out ", "", "",
+ "uniform ", "shader_storage", "system " };
+
+ fprintf(fp, "%s%s%s%s%s%s ",
+ cent, samp, patch, inv, mode[var->data.mode],
+ glsl_interp_qualifier_name(var->data.interpolation));
+
+ glsl_print_type(var->type, fp);
+
+ struct set_entry *entry = NULL;
+ if (state->syms)
+ entry = _mesa_set_search(state->syms, var->name);
+
+ char *name;
+
+ if (entry != NULL) {
+ /* we have a collision with another name, append an @ + a unique index */
+ name = ralloc_asprintf(state->syms, "%s@%u", var->name, state->index++);
+ } else {
+ name = var->name;
+ }
+
+ fprintf(fp, " %s", name);
+
+ if (var->data.mode == nir_var_shader_in ||
+ var->data.mode == nir_var_shader_out ||
+ var->data.mode == nir_var_uniform ||
+ var->data.mode == nir_var_shader_storage) {
+ const char *loc = NULL;
+ char buf[4];
+
+ switch (state->shader->stage) {
+ case MESA_SHADER_VERTEX:
+ if (var->data.mode == nir_var_shader_in)
+ loc = gl_vert_attrib_name(var->data.location);
+ else if (var->data.mode == nir_var_shader_out)
+ loc = gl_varying_slot_name(var->data.location);
+ break;
+ case MESA_SHADER_GEOMETRY:
+ if ((var->data.mode == nir_var_shader_in) ||
+ (var->data.mode == nir_var_shader_out))
+ loc = gl_varying_slot_name(var->data.location);
+ break;
+ case MESA_SHADER_FRAGMENT:
+ if (var->data.mode == nir_var_shader_in)
+ loc = gl_varying_slot_name(var->data.location);
+ else if (var->data.mode == nir_var_shader_out)
+ loc = gl_frag_result_name(var->data.location);
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ case MESA_SHADER_TESS_EVAL:
+ case MESA_SHADER_COMPUTE:
+ default:
+ /* TODO */
+ break;
+ }
+
+ if (!loc) {
+ snprintf(buf, sizeof(buf), "%u", var->data.location);
+ loc = buf;
+ }
+
+ fprintf(fp, " (%s, %u)", loc, var->data.driver_location);
+ }
+
+ if (var->constant_initializer) {
+ fprintf(fp, " = { ");
+ print_constant(var->constant_initializer, var->type, state);
+ fprintf(fp, " }");
+ }
+
+ fprintf(fp, "\n");
+
+ if (state->syms) {
+ _mesa_set_add(state->syms, name);
+ _mesa_hash_table_insert(state->ht, var, name);
+ }
+}
+
+static void
+print_var(nir_variable *var, print_state *state)
+{
+ FILE *fp = state->fp;
+ const char *name;
+ if (state->ht) {
+ struct hash_entry *entry = _mesa_hash_table_search(state->ht, var);
+
+ assert(entry != NULL);
+ name = entry->data;
+ } else {
+ name = var->name;
+ }
+
+ fprintf(fp, "%s", name);
+}
+
+static void
+print_deref_var(nir_deref_var *deref, print_state *state)
+{
+ print_var(deref->var, state);
+}
+
+static void
+print_deref_array(nir_deref_array *deref, print_state *state)
+{
+ FILE *fp = state->fp;
+ fprintf(fp, "[");
+ switch (deref->deref_array_type) {
+ case nir_deref_array_type_direct:
+ fprintf(fp, "%u", deref->base_offset);
+ break;
+ case nir_deref_array_type_indirect:
+ if (deref->base_offset != 0)
+ fprintf(fp, "%u + ", deref->base_offset);
+ print_src(&deref->indirect, state);
+ break;
+ case nir_deref_array_type_wildcard:
+ fprintf(fp, "*");
+ break;
+ }
+ fprintf(fp, "]");
+}
+
+static void
+print_deref_struct(nir_deref_struct *deref, const struct glsl_type *parent_type,
+ print_state *state)
+{
+ FILE *fp = state->fp;
+ fprintf(fp, ".%s", glsl_get_struct_elem_name(parent_type, deref->index));
+}
+
+static void
+print_deref(nir_deref_var *deref, print_state *state)
+{
+ nir_deref *tail = &deref->deref;
+ nir_deref *pretail = NULL;
+ while (tail != NULL) {
+ switch (tail->deref_type) {
+ case nir_deref_type_var:
+ assert(pretail == NULL);
+ assert(tail == &deref->deref);
+ print_deref_var(deref, state);
+ break;
+
+ case nir_deref_type_array:
+ assert(pretail != NULL);
+ print_deref_array(nir_deref_as_array(tail), state);
+ break;
+
+ case nir_deref_type_struct:
+ assert(pretail != NULL);
+ print_deref_struct(nir_deref_as_struct(tail),
+ pretail->type, state);
+ break;
+
+ default:
+ unreachable("Invalid deref type");
+ }
+
+ pretail = tail;
+ tail = pretail->child;
+ }
+}
+
+static void
+print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
+{
+ unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
+ FILE *fp = state->fp;
+
+ if (nir_intrinsic_infos[instr->intrinsic].has_dest) {
+ print_dest(&instr->dest, state);
+ fprintf(fp, " = ");
+ }
+
+ fprintf(fp, "intrinsic %s (", nir_intrinsic_infos[instr->intrinsic].name);
+
+ for (unsigned i = 0; i < num_srcs; i++) {
+ if (i != 0)
+ fprintf(fp, ", ");
+
+ print_src(&instr->src[i], state);
+ }
+
+ fprintf(fp, ") (");
+
+ unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
+
+ for (unsigned i = 0; i < num_vars; i++) {
+ if (i != 0)
+ fprintf(fp, ", ");
+
+ print_deref(instr->variables[i], state);
+ }
+
+ fprintf(fp, ") (");
+
+ unsigned num_indices = nir_intrinsic_infos[instr->intrinsic].num_indices;
+
+ for (unsigned i = 0; i < num_indices; i++) {
+ if (i != 0)
+ fprintf(fp, ", ");
+
+ fprintf(fp, "%d", instr->const_index[i]);
+ }
+
+ fprintf(fp, ")");
+
+ if (!state->shader)
+ return;
+
+ struct exec_list *var_list = NULL;
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_uniform:
+ var_list = &state->shader->uniforms;
+ break;
+ case nir_intrinsic_load_input:
+ case nir_intrinsic_load_per_vertex_input:
+ var_list = &state->shader->inputs;
+ break;
+ case nir_intrinsic_load_output:
+ case nir_intrinsic_store_output:
+ case nir_intrinsic_store_per_vertex_output:
+ var_list = &state->shader->outputs;
+ break;
+ default:
+ return;
+ }
+
+ nir_foreach_variable(var, var_list) {
+ if ((var->data.driver_location == instr->const_index[0]) &&
+ var->name) {
+ fprintf(fp, "\t/* %s */", var->name);
+ break;
+ }
+ }
+}
+
+static void
+print_tex_instr(nir_tex_instr *instr, print_state *state)
+{
+ FILE *fp = state->fp;
+
+ print_dest(&instr->dest, state);
+
+ fprintf(fp, " = ");
+
+ switch (instr->op) {
+ case nir_texop_tex:
+ fprintf(fp, "tex ");
+ break;
+ case nir_texop_txb:
+ fprintf(fp, "txb ");
+ break;
+ case nir_texop_txl:
+ fprintf(fp, "txl ");
+ break;
+ case nir_texop_txd:
+ fprintf(fp, "txd ");
+ break;
+ case nir_texop_txf:
+ fprintf(fp, "txf ");
+ break;
+ case nir_texop_txf_ms:
+ fprintf(fp, "txf_ms ");
+ break;
+ case nir_texop_txs:
+ fprintf(fp, "txs ");
+ break;
+ case nir_texop_lod:
+ fprintf(fp, "lod ");
+ break;
+ case nir_texop_tg4:
+ fprintf(fp, "tg4 ");
+ break;
+ case nir_texop_query_levels:
+ fprintf(fp, "query_levels ");
+ break;
+ case nir_texop_texture_samples:
+ fprintf(fp, "texture_samples ");
+ break;
+ case nir_texop_samples_identical:
+ fprintf(fp, "samples_identical ");
+ break;
+ default:
+ unreachable("Invalid texture operation");
+ break;
+ }
+
+ for (unsigned i = 0; i < instr->num_srcs; i++) {
+ print_src(&instr->src[i].src, state);
+
+ fprintf(fp, " ");
+
+ switch(instr->src[i].src_type) {
+ case nir_tex_src_coord:
+ fprintf(fp, "(coord)");
+ break;
+ case nir_tex_src_projector:
+ fprintf(fp, "(projector)");
+ break;
+ case nir_tex_src_comparitor:
+ fprintf(fp, "(comparitor)");
+ break;
+ case nir_tex_src_offset:
+ fprintf(fp, "(offset)");
+ break;
+ case nir_tex_src_bias:
+ fprintf(fp, "(bias)");
+ break;
+ case nir_tex_src_lod:
+ fprintf(fp, "(lod)");
+ break;
+ case nir_tex_src_ms_index:
+ fprintf(fp, "(ms_index)");
+ break;
+ case nir_tex_src_ddx:
+ fprintf(fp, "(ddx)");
+ break;
+ case nir_tex_src_ddy:
+ fprintf(fp, "(ddy)");
+ break;
+ case nir_tex_src_sampler_offset:
+ fprintf(fp, "(sampler_offset)");
+ break;
+
+ default:
+ unreachable("Invalid texture source type");
+ break;
+ }
+
+ fprintf(fp, ", ");
+ }
+
+ bool has_nonzero_offset = false;
+ for (unsigned i = 0; i < 4; i++) {
+ if (instr->const_offset[i] != 0) {
+ has_nonzero_offset = true;
+ break;
+ }
+ }
+
+ if (has_nonzero_offset) {
+ fprintf(fp, "[%i %i %i %i] (offset), ",
+ instr->const_offset[0], instr->const_offset[1],
+ instr->const_offset[2], instr->const_offset[3]);
+ }
+
+ if (instr->op == nir_texop_tg4) {
+ fprintf(fp, "%u (gather_component), ", instr->component);
+ }
+
+ if (instr->sampler) {
+ print_deref(instr->sampler, state);
+ } else {
+ fprintf(fp, "%u", instr->sampler_index);
+ }
+
+ fprintf(fp, " (sampler)");
+}
+
+static void
+print_call_instr(nir_call_instr *instr, print_state *state)
+{
+ FILE *fp = state->fp;
+
+ fprintf(fp, "call %s ", instr->callee->name);
+
+ for (unsigned i = 0; i < instr->num_params; i++) {
+ if (i != 0)
+ fprintf(fp, ", ");
+
+ print_deref(instr->params[i], state);
+ }
+
+ if (instr->return_deref != NULL) {
+ if (instr->num_params != 0)
+ fprintf(fp, ", ");
+ fprintf(fp, "returning ");
+ print_deref(instr->return_deref, state);
+ }
+}
+
+static void
+print_load_const_instr(nir_load_const_instr *instr, print_state *state)
+{
+ FILE *fp = state->fp;
+
+ print_ssa_def(&instr->def, state);
+
+ fprintf(fp, " = load_const (");
+
+ for (unsigned i = 0; i < instr->def.num_components; i++) {
+ if (i != 0)
+ fprintf(fp, ", ");
+
+ /*
+ * we don't really know the type of the constant (if it will be used as a
+ * float or an int), so just print the raw constant in hex for fidelity
+ * and then print the float in a comment for readability.
+ */
+
+ fprintf(fp, "0x%08x /* %f */", instr->value.u[i], instr->value.f[i]);
+ }
+
+ fprintf(fp, ")");
+}
+
+static void
+print_jump_instr(nir_jump_instr *instr, print_state *state)
+{
+ FILE *fp = state->fp;
+
+ switch (instr->type) {
+ case nir_jump_break:
+ fprintf(fp, "break");
+ break;
+
+ case nir_jump_continue:
+ fprintf(fp, "continue");
+ break;
+
+ case nir_jump_return:
+ fprintf(fp, "return");
+ break;
+ }
+}
+
+static void
+print_ssa_undef_instr(nir_ssa_undef_instr* instr, print_state *state)
+{
+ FILE *fp = state->fp;
+ print_ssa_def(&instr->def, state);
+ fprintf(fp, " = undefined");
+}
+
+static void
+print_phi_instr(nir_phi_instr *instr, print_state *state)
+{
+ FILE *fp = state->fp;
+ print_dest(&instr->dest, state);
+ fprintf(fp, " = phi ");
+ nir_foreach_phi_src(instr, src) {
+ if (&src->node != exec_list_get_head(&instr->srcs))
+ fprintf(fp, ", ");
+
+ fprintf(fp, "block_%u: ", src->pred->index);
+ print_src(&src->src, state);
+ }
+}
+
+static void
+print_parallel_copy_instr(nir_parallel_copy_instr *instr, print_state *state)
+{
+ FILE *fp = state->fp;
+ nir_foreach_parallel_copy_entry(instr, entry) {
+ if (&entry->node != exec_list_get_head(&instr->entries))
+ fprintf(fp, "; ");
+
+ print_dest(&entry->dest, state);
+ fprintf(fp, " = ");
+ print_src(&entry->src, state);
+ }
+}
+
+static void
+print_instr(const nir_instr *instr, print_state *state, unsigned tabs)
+{
+ FILE *fp = state->fp;
+ print_tabs(tabs, fp);
+
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ print_alu_instr(nir_instr_as_alu(instr), state);
+ break;
+
+ case nir_instr_type_call:
+ print_call_instr(nir_instr_as_call(instr), state);
+ break;
+
+ case nir_instr_type_intrinsic:
+ print_intrinsic_instr(nir_instr_as_intrinsic(instr), state);
+ break;
+
+ case nir_instr_type_tex:
+ print_tex_instr(nir_instr_as_tex(instr), state);
+ break;
+
+ case nir_instr_type_load_const:
+ print_load_const_instr(nir_instr_as_load_const(instr), state);
+ break;
+
+ case nir_instr_type_jump:
+ print_jump_instr(nir_instr_as_jump(instr), state);
+ break;
+
+ case nir_instr_type_ssa_undef:
+ print_ssa_undef_instr(nir_instr_as_ssa_undef(instr), state);
+ break;
+
+ case nir_instr_type_phi:
+ print_phi_instr(nir_instr_as_phi(instr), state);
+ break;
+
+ case nir_instr_type_parallel_copy:
+ print_parallel_copy_instr(nir_instr_as_parallel_copy(instr), state);
+ break;
+
+ default:
+ unreachable("Invalid instruction type");
+ break;
+ }
+}
+
+static int
+compare_block_index(const void *p1, const void *p2)
+{
+ const nir_block *block1 = *((const nir_block **) p1);
+ const nir_block *block2 = *((const nir_block **) p2);
+
+ return (int) block1->index - (int) block2->index;
+}
+
+static void print_cf_node(nir_cf_node *node, print_state *state,
+ unsigned tabs);
+
+static void
+print_block(nir_block *block, print_state *state, unsigned tabs)
+{
+ FILE *fp = state->fp;
+
+ print_tabs(tabs, fp);
+ fprintf(fp, "block block_%u:\n", block->index);
+
+ /* sort the predecessors by index so we consistently print the same thing */
+
+ nir_block **preds =
+ malloc(block->predecessors->entries * sizeof(nir_block *));
+
+ struct set_entry *entry;
+ unsigned i = 0;
+ set_foreach(block->predecessors, entry) {
+ preds[i++] = (nir_block *) entry->key;
+ }
+
+ qsort(preds, block->predecessors->entries, sizeof(nir_block *),
+ compare_block_index);
+
+ print_tabs(tabs, fp);
+ fprintf(fp, "/* preds: ");
+ for (unsigned i = 0; i < block->predecessors->entries; i++) {
+ fprintf(fp, "block_%u ", preds[i]->index);
+ }
+ fprintf(fp, "*/\n");
+
+ free(preds);
+
+ nir_foreach_instr(block, instr) {
+ print_instr(instr, state, tabs);
+ fprintf(fp, "\n");
+ }
+
+ print_tabs(tabs, fp);
+ fprintf(fp, "/* succs: ");
+ for (unsigned i = 0; i < 2; i++)
+ if (block->successors[i]) {
+ fprintf(fp, "block_%u ", block->successors[i]->index);
+ }
+ fprintf(fp, "*/\n");
+}
+
+static void
+print_if(nir_if *if_stmt, print_state *state, unsigned tabs)
+{
+ FILE *fp = state->fp;
+
+ print_tabs(tabs, fp);
+ fprintf(fp, "if ");
+ print_src(&if_stmt->condition, state);
+ fprintf(fp, " {\n");
+ foreach_list_typed(nir_cf_node, node, node, &if_stmt->then_list) {
+ print_cf_node(node, state, tabs + 1);
+ }
+ print_tabs(tabs, fp);
+ fprintf(fp, "} else {\n");
+ foreach_list_typed(nir_cf_node, node, node, &if_stmt->else_list) {
+ print_cf_node(node, state, tabs + 1);
+ }
+ print_tabs(tabs, fp);
+ fprintf(fp, "}\n");
+}
+
+static void
+print_loop(nir_loop *loop, print_state *state, unsigned tabs)
+{
+ FILE *fp = state->fp;
+
+ print_tabs(tabs, fp);
+ fprintf(fp, "loop {\n");
+ foreach_list_typed(nir_cf_node, node, node, &loop->body) {
+ print_cf_node(node, state, tabs + 1);
+ }
+ print_tabs(tabs, fp);
+ fprintf(fp, "}\n");
+}
+
+static void
+print_cf_node(nir_cf_node *node, print_state *state, unsigned int tabs)
+{
+ switch (node->type) {
+ case nir_cf_node_block:
+ print_block(nir_cf_node_as_block(node), state, tabs);
+ break;
+
+ case nir_cf_node_if:
+ print_if(nir_cf_node_as_if(node), state, tabs);
+ break;
+
+ case nir_cf_node_loop:
+ print_loop(nir_cf_node_as_loop(node), state, tabs);
+ break;
+
+ default:
+ unreachable("Invalid CFG node type");
+ }
+}
+
+static void
+print_function_impl(nir_function_impl *impl, print_state *state)
+{
+ FILE *fp = state->fp;
+
+ fprintf(fp, "\nimpl %s ", impl->function->name);
+
+ for (unsigned i = 0; i < impl->num_params; i++) {
+ if (i != 0)
+ fprintf(fp, ", ");
+
+ print_var(impl->params[i], state);
+ }
+
+ if (impl->return_var != NULL) {
+ if (impl->num_params != 0)
+ fprintf(fp, ", ");
+ fprintf(fp, "returning ");
+ print_var(impl->return_var, state);
+ }
+
+ fprintf(fp, "{\n");
+
+ nir_foreach_variable(var, &impl->locals) {
+ fprintf(fp, "\t");
+ print_var_decl(var, state);
+ }
+
+ foreach_list_typed(nir_register, reg, node, &impl->registers) {
+ fprintf(fp, "\t");
+ print_register_decl(reg, state);
+ }
+
+ nir_index_blocks(impl);
+
+ foreach_list_typed(nir_cf_node, node, node, &impl->body) {
+ print_cf_node(node, state, 1);
+ }
+
+ fprintf(fp, "\tblock block_%u:\n}\n\n", impl->end_block->index);
+}
+
+static void
+print_function(nir_function *function, print_state *state)
+{
+ FILE *fp = state->fp;
+
+ fprintf(fp, "decl_function %s ", function->name);
+
+ for (unsigned i = 0; i < function->num_params; i++) {
+ if (i != 0)
+ fprintf(fp, ", ");
+
+ switch (function->params[i].param_type) {
+ case nir_parameter_in:
+ fprintf(fp, "in ");
+ break;
+ case nir_parameter_out:
+ fprintf(fp, "out ");
+ break;
+ case nir_parameter_inout:
+ fprintf(fp, "inout ");
+ break;
+ default:
+ unreachable("Invalid parameter type");
+ }
+
+ glsl_print_type(function->params[i].type, fp);
+ }
+
+ if (function->return_type != NULL) {
+ if (function->num_params != 0)
+ fprintf(fp, ", ");
+ fprintf(fp, "returning ");
+ glsl_print_type(function->return_type, fp);
+ }
+
+ fprintf(fp, "\n");
+
+ if (function->impl != NULL) {
+ print_function_impl(function->impl, state);
+ return;
+ }
+}
+
+static void
+init_print_state(print_state *state, nir_shader *shader, FILE *fp)
+{
+ state->fp = fp;
+ state->shader = shader;
+ state->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ state->syms = _mesa_set_create(NULL, _mesa_key_hash_string,
+ _mesa_key_string_equal);
+ state->index = 0;
+}
+
+static void
+destroy_print_state(print_state *state)
+{
+ _mesa_hash_table_destroy(state->ht, NULL);
+ _mesa_set_destroy(state->syms, NULL);
+}
+
+void
+nir_print_shader(nir_shader *shader, FILE *fp)
+{
+ print_state state;
+ init_print_state(&state, shader, fp);
+
+ fprintf(fp, "shader: %s\n", gl_shader_stage_name(shader->stage));
+
+ if (shader->info.name)
+ fprintf(fp, "name: %s\n", shader->info.name);
+
+ if (shader->info.label)
+ fprintf(fp, "label: %s\n", shader->info.label);
+
+ fprintf(fp, "inputs: %u\n", shader->num_inputs);
+ fprintf(fp, "outputs: %u\n", shader->num_outputs);
+ fprintf(fp, "uniforms: %u\n", shader->num_uniforms);
+
+ nir_foreach_variable(var, &shader->uniforms) {
+ print_var_decl(var, &state);
+ }
+
+ nir_foreach_variable(var, &shader->inputs) {
+ print_var_decl(var, &state);
+ }
+
+ nir_foreach_variable(var, &shader->outputs) {
+ print_var_decl(var, &state);
+ }
+
+ nir_foreach_variable(var, &shader->globals) {
+ print_var_decl(var, &state);
+ }
+
+ nir_foreach_variable(var, &shader->system_values) {
+ print_var_decl(var, &state);
+ }
+
+ foreach_list_typed(nir_register, reg, node, &shader->registers) {
+ print_register_decl(reg, &state);
+ }
+
+ foreach_list_typed(nir_function, func, node, &shader->functions) {
+ print_function(func, &state);
+ }
+
+ destroy_print_state(&state);
+}
+
+void
+nir_print_instr(const nir_instr *instr, FILE *fp)
+{
+ print_state state = {
+ .fp = fp,
+ };
+ print_instr(instr, &state, 0);
+
+}
diff --git a/src/compiler/nir/nir_remove_dead_variables.c b/src/compiler/nir/nir_remove_dead_variables.c
new file mode 100644
index 00000000000..db754e56b1c
--- /dev/null
+++ b/src/compiler/nir/nir_remove_dead_variables.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott ([email protected])
+ *
+ */
+
+#include "nir.h"
+
+static void
+add_var_use_intrinsic(nir_intrinsic_instr *instr, struct set *live)
+{
+ unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
+ for (unsigned i = 0; i < num_vars; i++) {
+ nir_variable *var = instr->variables[i]->var;
+ _mesa_set_add(live, var);
+ }
+}
+
+static void
+add_var_use_call(nir_call_instr *instr, struct set *live)
+{
+ if (instr->return_deref != NULL) {
+ nir_variable *var = instr->return_deref->var;
+ _mesa_set_add(live, var);
+ }
+
+ for (unsigned i = 0; i < instr->num_params; i++) {
+ nir_variable *var = instr->params[i]->var;
+ _mesa_set_add(live, var);
+ }
+}
+
+static void
+add_var_use_tex(nir_tex_instr *instr, struct set *live)
+{
+ if (instr->sampler != NULL) {
+ nir_variable *var = instr->sampler->var;
+ _mesa_set_add(live, var);
+ }
+}
+
+static bool
+add_var_use_block(nir_block *block, void *state)
+{
+ struct set *live = state;
+
+ nir_foreach_instr(block, instr) {
+ switch(instr->type) {
+ case nir_instr_type_intrinsic:
+ add_var_use_intrinsic(nir_instr_as_intrinsic(instr), live);
+ break;
+
+ case nir_instr_type_call:
+ add_var_use_call(nir_instr_as_call(instr), live);
+ break;
+
+ case nir_instr_type_tex:
+ add_var_use_tex(nir_instr_as_tex(instr), live);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return true;
+}
+
+static void
+add_var_use_shader(nir_shader *shader, struct set *live)
+{
+ nir_foreach_function(shader, function) {
+ if (function->impl) {
+ nir_foreach_block(function->impl, add_var_use_block, live);
+ }
+ }
+}
+
+static bool
+remove_dead_vars(struct exec_list *var_list, struct set *live)
+{
+ bool progress = false;
+
+ foreach_list_typed_safe(nir_variable, var, node, var_list) {
+ struct set_entry *entry = _mesa_set_search(live, var);
+ if (entry == NULL) {
+ exec_node_remove(&var->node);
+ ralloc_free(var);
+ progress = true;
+ }
+ }
+
+ return progress;
+}
+
+bool
+nir_remove_dead_variables(nir_shader *shader)
+{
+ bool progress = false;
+ struct set *live =
+ _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
+
+ add_var_use_shader(shader, live);
+
+ progress = remove_dead_vars(&shader->globals, live) || progress;
+
+ nir_foreach_function(shader, function) {
+ if (function->impl) {
+ if (remove_dead_vars(&function->impl->locals, live)) {
+ nir_metadata_preserve(function->impl, nir_metadata_block_index |
+ nir_metadata_dominance |
+ nir_metadata_live_ssa_defs);
+ progress = true;
+ }
+ }
+ }
+
+ _mesa_set_destroy(live, NULL);
+ return progress;
+}
diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c
new file mode 100644
index 00000000000..56d7e8162f3
--- /dev/null
+++ b/src/compiler/nir/nir_search.c
@@ -0,0 +1,379 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand ([email protected])
+ *
+ */
+
+#include "nir_search.h"
+
+struct match_state {
+ unsigned variables_seen;
+ nir_alu_src variables[NIR_SEARCH_MAX_VARIABLES];
+};
+
+static bool
+match_expression(const nir_search_expression *expr, nir_alu_instr *instr,
+ unsigned num_components, const uint8_t *swizzle,
+ struct match_state *state);
+
+static const uint8_t identity_swizzle[] = { 0, 1, 2, 3 };
+
+static bool alu_instr_is_bool(nir_alu_instr *instr);
+
+static bool
+src_is_bool(nir_src src)
+{
+ if (!src.is_ssa)
+ return false;
+ if (src.ssa->parent_instr->type != nir_instr_type_alu)
+ return false;
+ return alu_instr_is_bool(nir_instr_as_alu(src.ssa->parent_instr));
+}
+
+static bool
+alu_instr_is_bool(nir_alu_instr *instr)
+{
+ switch (instr->op) {
+ case nir_op_iand:
+ case nir_op_ior:
+ case nir_op_ixor:
+ return src_is_bool(instr->src[0].src) && src_is_bool(instr->src[1].src);
+ case nir_op_inot:
+ return src_is_bool(instr->src[0].src);
+ default:
+ return nir_op_infos[instr->op].output_type == nir_type_bool;
+ }
+}
+
+static bool
+match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
+ unsigned num_components, const uint8_t *swizzle,
+ struct match_state *state)
+{
+ uint8_t new_swizzle[4];
+
+ /* If the source is an explicitly sized source, then we need to reset
+ * both the number of components and the swizzle.
+ */
+ if (nir_op_infos[instr->op].input_sizes[src] != 0) {
+ num_components = nir_op_infos[instr->op].input_sizes[src];
+ swizzle = identity_swizzle;
+ }
+
+ for (unsigned i = 0; i < num_components; ++i)
+ new_swizzle[i] = instr->src[src].swizzle[swizzle[i]];
+
+ switch (value->type) {
+ case nir_search_value_expression:
+ if (!instr->src[src].src.is_ssa)
+ return false;
+
+ if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu)
+ return false;
+
+ return match_expression(nir_search_value_as_expression(value),
+ nir_instr_as_alu(instr->src[src].src.ssa->parent_instr),
+ num_components, new_swizzle, state);
+
+ case nir_search_value_variable: {
+ nir_search_variable *var = nir_search_value_as_variable(value);
+ assert(var->variable < NIR_SEARCH_MAX_VARIABLES);
+
+ if (state->variables_seen & (1 << var->variable)) {
+ if (!nir_srcs_equal(state->variables[var->variable].src,
+ instr->src[src].src))
+ return false;
+
+ assert(!instr->src[src].abs && !instr->src[src].negate);
+
+ for (unsigned i = 0; i < num_components; ++i) {
+ if (state->variables[var->variable].swizzle[i] != new_swizzle[i])
+ return false;
+ }
+
+ return true;
+ } else {
+ if (var->is_constant &&
+ instr->src[src].src.ssa->parent_instr->type != nir_instr_type_load_const)
+ return false;
+
+ if (var->type != nir_type_invalid) {
+ if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu)
+ return false;
+
+ nir_alu_instr *src_alu =
+ nir_instr_as_alu(instr->src[src].src.ssa->parent_instr);
+
+ if (nir_op_infos[src_alu->op].output_type != var->type &&
+ !(var->type == nir_type_bool && alu_instr_is_bool(src_alu)))
+ return false;
+ }
+
+ state->variables_seen |= (1 << var->variable);
+ state->variables[var->variable].src = instr->src[src].src;
+ state->variables[var->variable].abs = false;
+ state->variables[var->variable].negate = false;
+
+ for (unsigned i = 0; i < 4; ++i) {
+ if (i < num_components)
+ state->variables[var->variable].swizzle[i] = new_swizzle[i];
+ else
+ state->variables[var->variable].swizzle[i] = 0;
+ }
+
+ return true;
+ }
+ }
+
+ case nir_search_value_constant: {
+ nir_search_constant *const_val = nir_search_value_as_constant(value);
+
+ if (!instr->src[src].src.is_ssa)
+ return false;
+
+ if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_load_const)
+ return false;
+
+ nir_load_const_instr *load =
+ nir_instr_as_load_const(instr->src[src].src.ssa->parent_instr);
+
+ switch (nir_op_infos[instr->op].input_types[src]) {
+ case nir_type_float:
+ for (unsigned i = 0; i < num_components; ++i) {
+ if (load->value.f[new_swizzle[i]] != const_val->data.f)
+ return false;
+ }
+ return true;
+ case nir_type_int:
+ case nir_type_uint:
+ case nir_type_bool:
+ for (unsigned i = 0; i < num_components; ++i) {
+ if (load->value.i[new_swizzle[i]] != const_val->data.i)
+ return false;
+ }
+ return true;
+ default:
+ unreachable("Invalid alu source type");
+ }
+ }
+
+ default:
+ unreachable("Invalid search value type");
+ }
+}
+
+static bool
+match_expression(const nir_search_expression *expr, nir_alu_instr *instr,
+ unsigned num_components, const uint8_t *swizzle,
+ struct match_state *state)
+{
+ if (instr->op != expr->opcode)
+ return false;
+
+ assert(!instr->dest.saturate);
+ assert(nir_op_infos[instr->op].num_inputs > 0);
+
+ /* If we have an explicitly sized destination, we can only handle the
+ * identity swizzle. While dot(vec3(a, b, c).zxy) is a valid
+ * expression, we don't have the information right now to propagate that
+ * swizzle through. We can only properly propagate swizzles if the
+ * instruction is vectorized.
+ */
+ if (nir_op_infos[instr->op].output_size != 0) {
+ for (unsigned i = 0; i < num_components; i++) {
+ if (swizzle[i] != i)
+ return false;
+ }
+ }
+
+ /* Stash off the current variables_seen bitmask. This way we can
+ * restore it prior to matching in the commutative case below.
+ */
+ unsigned variables_seen_stash = state->variables_seen;
+
+ bool matched = true;
+ for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+ if (!match_value(expr->srcs[i], instr, i, num_components,
+ swizzle, state)) {
+ matched = false;
+ break;
+ }
+ }
+
+ if (matched)
+ return true;
+
+ if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
+ assert(nir_op_infos[instr->op].num_inputs == 2);
+
+ /* Restore the variables_seen bitmask. If we don't do this, then we
+ * could end up with an erroneous failure due to variables found in the
+ * first match attempt above not matching those in the second.
+ */
+ state->variables_seen = variables_seen_stash;
+
+ if (!match_value(expr->srcs[0], instr, 1, num_components,
+ swizzle, state))
+ return false;
+
+ return match_value(expr->srcs[1], instr, 0, num_components,
+ swizzle, state);
+ } else {
+ return false;
+ }
+}
+
+static nir_alu_src
+construct_value(const nir_search_value *value, nir_alu_type type,
+ unsigned num_components, struct match_state *state,
+ nir_instr *instr, void *mem_ctx)
+{
+ switch (value->type) {
+ case nir_search_value_expression: {
+ const nir_search_expression *expr = nir_search_value_as_expression(value);
+
+ if (nir_op_infos[expr->opcode].output_size != 0)
+ num_components = nir_op_infos[expr->opcode].output_size;
+
+ nir_alu_instr *alu = nir_alu_instr_create(mem_ctx, expr->opcode);
+ nir_ssa_dest_init(&alu->instr, &alu->dest.dest, num_components, NULL);
+ alu->dest.write_mask = (1 << num_components) - 1;
+ alu->dest.saturate = false;
+
+ for (unsigned i = 0; i < nir_op_infos[expr->opcode].num_inputs; i++) {
+ /* If the source is an explicitly sized source, then we need to reset
+ * the number of components to match.
+ */
+ if (nir_op_infos[alu->op].input_sizes[i] != 0)
+ num_components = nir_op_infos[alu->op].input_sizes[i];
+
+ alu->src[i] = construct_value(expr->srcs[i],
+ nir_op_infos[alu->op].input_types[i],
+ num_components,
+ state, instr, mem_ctx);
+ }
+
+ nir_instr_insert_before(instr, &alu->instr);
+
+ nir_alu_src val;
+ val.src = nir_src_for_ssa(&alu->dest.dest.ssa);
+ val.negate = false;
+ val.abs = false,
+ memcpy(val.swizzle, identity_swizzle, sizeof val.swizzle);
+
+ return val;
+ }
+
+ case nir_search_value_variable: {
+ const nir_search_variable *var = nir_search_value_as_variable(value);
+ assert(state->variables_seen & (1 << var->variable));
+
+ nir_alu_src val = { NIR_SRC_INIT };
+ nir_alu_src_copy(&val, &state->variables[var->variable], mem_ctx);
+
+ assert(!var->is_constant);
+
+ return val;
+ }
+
+ case nir_search_value_constant: {
+ const nir_search_constant *c = nir_search_value_as_constant(value);
+ nir_load_const_instr *load = nir_load_const_instr_create(mem_ctx, 1);
+
+ switch (type) {
+ case nir_type_float:
+ load->def.name = ralloc_asprintf(mem_ctx, "%f", c->data.f);
+ load->value.f[0] = c->data.f;
+ break;
+ case nir_type_int:
+ load->def.name = ralloc_asprintf(mem_ctx, "%d", c->data.i);
+ load->value.i[0] = c->data.i;
+ break;
+ case nir_type_uint:
+ case nir_type_bool:
+ load->value.u[0] = c->data.u;
+ break;
+ default:
+ unreachable("Invalid alu source type");
+ }
+
+ nir_instr_insert_before(instr, &load->instr);
+
+ nir_alu_src val;
+ val.src = nir_src_for_ssa(&load->def);
+ val.negate = false;
+ val.abs = false,
+ memset(val.swizzle, 0, sizeof val.swizzle);
+
+ return val;
+ }
+
+ default:
+ unreachable("Invalid search value type");
+ }
+}
+
+nir_alu_instr *
+nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search,
+ const nir_search_value *replace, void *mem_ctx)
+{
+ uint8_t swizzle[4] = { 0, 0, 0, 0 };
+
+ for (unsigned i = 0; i < instr->dest.dest.ssa.num_components; ++i)
+ swizzle[i] = i;
+
+ assert(instr->dest.dest.is_ssa);
+
+ struct match_state state;
+ state.variables_seen = 0;
+
+ if (!match_expression(search, instr, instr->dest.dest.ssa.num_components,
+ swizzle, &state))
+ return NULL;
+
+ /* Inserting a mov may be unnecessary. However, it's much easier to
+ * simply let copy propagation clean this up than to try to go through
+ * and rewrite swizzles ourselves.
+ */
+ nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
+ mov->dest.write_mask = instr->dest.write_mask;
+ nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
+ instr->dest.dest.ssa.num_components, NULL);
+
+ mov->src[0] = construct_value(replace, nir_op_infos[instr->op].output_type,
+ instr->dest.dest.ssa.num_components, &state,
+ &instr->instr, mem_ctx);
+ nir_instr_insert_before(&instr->instr, &mov->instr);
+
+ nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa,
+ nir_src_for_ssa(&mov->dest.dest.ssa));
+
+ /* We know this one has no more uses because we just rewrote them all,
+ * so we can remove it. The rest of the matched expression, however, we
+ * don't know so much about. We'll just let dead code clean them up.
+ */
+ nir_instr_remove(&instr->instr);
+
+ return mov;
+}
diff --git a/src/compiler/nir/nir_search.h b/src/compiler/nir/nir_search.h
new file mode 100644
index 00000000000..7d47792945e
--- /dev/null
+++ b/src/compiler/nir/nir_search.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand ([email protected])
+ *
+ */
+
+#ifndef _NIR_SEARCH_
+#define _NIR_SEARCH_
+
+#include "nir.h"
+
+#define NIR_SEARCH_MAX_VARIABLES 16
+
+typedef enum {
+ nir_search_value_expression,
+ nir_search_value_variable,
+ nir_search_value_constant,
+} nir_search_value_type;
+
+typedef struct {
+ nir_search_value_type type;
+} nir_search_value;
+
+typedef struct {
+ nir_search_value value;
+
+ /** The variable index; Must be less than NIR_SEARCH_MAX_VARIABLES */
+ unsigned variable;
+
+ /** Indicates that the given variable must be a constant
+ *
+ * This is only alloed in search expressions and indicates that the
+ * given variable is only allowed to match constant values.
+ */
+ bool is_constant;
+
+ /** Indicates that the given variable must have a certain type
+ *
+ * This is only allowed in search expressions and indicates that the
+ * given variable is only allowed to match values that come from an ALU
+ * instruction with the given output type. A type of nir_type_void
+ * means it can match any type.
+ *
+ * Note: A variable that is both constant and has a non-void type will
+ * never match anything.
+ */
+ nir_alu_type type;
+} nir_search_variable;
+
+typedef struct {
+ nir_search_value value;
+
+ union {
+ uint32_t u;
+ int32_t i;
+ float f;
+ } data;
+} nir_search_constant;
+
+typedef struct {
+ nir_search_value value;
+
+ nir_op opcode;
+ const nir_search_value *srcs[4];
+} nir_search_expression;
+
+NIR_DEFINE_CAST(nir_search_value_as_variable, nir_search_value,
+ nir_search_variable, value)
+NIR_DEFINE_CAST(nir_search_value_as_constant, nir_search_value,
+ nir_search_constant, value)
+NIR_DEFINE_CAST(nir_search_value_as_expression, nir_search_value,
+ nir_search_expression, value)
+
+nir_alu_instr *
+nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search,
+ const nir_search_value *replace, void *mem_ctx);
+
+#endif /* _NIR_SEARCH_ */
diff --git a/src/compiler/nir/nir_split_var_copies.c b/src/compiler/nir/nir_split_var_copies.c
new file mode 100644
index 00000000000..6fdaefa32c8
--- /dev/null
+++ b/src/compiler/nir/nir_split_var_copies.c
@@ -0,0 +1,285 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand ([email protected])
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements "copy splitting" which is similar to structure splitting only
+ * it works on copy operations rather than the datatypes themselves. The
+ * GLSL language allows you to copy one variable to another an entire
+ * structure (which may contain arrays or other structures) at a time.
+ * Normally, in a language such as C this would be handled by a "structure
+ * splitting" pass that breaks up the structures. Unfortunately for us,
+ * structures used in inputs or outputs can't be split. Therefore,
+ * regardlesss of what we do, we have to be able to copy to/from
+ * structures.
+ *
+ * The primary purpose of structure splitting is to allow you to better
+ * optimize variable access and lower things to registers where you can.
+ * The primary issue here is that, if you lower the copy to a bunch of
+ * loads and stores, you loose a lot of information about the copy
+ * operation that you would like to keep around. To solve this problem, we
+ * have a "copy splitting" pass that, instead of splitting the structures
+ * or lowering the copy into loads and storres, splits the copy operation
+ * into a bunch of copy operations one for each leaf of the structure tree.
+ * If an intermediate array is encountered, it is referenced with a
+ * wildcard reference to indicate that the entire array is to be copied.
+ *
+ * As things become direct, array copies may be able to be losslessly
+ * lowered to having fewer and fewer wildcards. However, until that
+ * happens we want to keep the information about the arrays intact.
+ *
+ * Prior to the copy splitting pass, there are no wildcard references but
+ * there may be incomplete references where the tail of the deref chain is
+ * an array or a structure and not a specific element. After the copy
+ * splitting pass has completed, every variable deref will be a full-length
+ * dereference pointing to a single leaf in the structure type tree with
+ * possibly a few wildcard array dereferences.
+ */
+
+struct split_var_copies_state {
+ void *mem_ctx;
+ void *dead_ctx;
+ bool progress;
+};
+
+/* Recursively constructs deref chains to split a copy instruction into
+ * multiple (if needed) copy instructions with full-length deref chains.
+ * External callers of this function should pass the tail and head of the
+ * deref chains found as the source and destination of the copy instruction
+ * into this function.
+ *
+ * \param old_copy The copy instruction we are splitting
+ * \param dest_head The head of the destination deref chain we are building
+ * \param src_head The head of the source deref chain we are building
+ * \param dest_tail The tail of the destination deref chain we are building
+ * \param src_tail The tail of the source deref chain we are building
+ * \param state The current split_var_copies_state object
+ */
+static void
+split_var_copy_instr(nir_intrinsic_instr *old_copy,
+ nir_deref *dest_head, nir_deref *src_head,
+ nir_deref *dest_tail, nir_deref *src_tail,
+ struct split_var_copies_state *state)
+{
+ assert(src_tail->type == dest_tail->type);
+
+ /* Make sure these really are the tails of the deref chains */
+ assert(dest_tail->child == NULL);
+ assert(src_tail->child == NULL);
+
+ switch (glsl_get_base_type(src_tail->type)) {
+ case GLSL_TYPE_ARRAY: {
+ /* Make a wildcard dereference */
+ nir_deref_array *deref = nir_deref_array_create(state->dead_ctx);
+ deref->deref.type = glsl_get_array_element(src_tail->type);
+ deref->deref_array_type = nir_deref_array_type_wildcard;
+
+ /* Set the tail of both as the newly created wildcard deref. It is
+ * safe to use the same wildcard in both places because a) we will be
+ * copying it before we put it in an actual instruction and b)
+ * everything that will potentially add another link in the deref
+ * chain will also add the same thing to both chains.
+ */
+ src_tail->child = &deref->deref;
+ dest_tail->child = &deref->deref;
+
+ split_var_copy_instr(old_copy, dest_head, src_head,
+ dest_tail->child, src_tail->child, state);
+
+ /* Set it back to the way we found it */
+ src_tail->child = NULL;
+ dest_tail->child = NULL;
+ break;
+ }
+
+ case GLSL_TYPE_STRUCT:
+ /* This is the only part that actually does any interesting
+ * splitting. For array types, we just use wildcards and resolve
+ * them later. For structure types, we need to emit one copy
+ * instruction for every structure element. Because we may have
+ * structs inside structs, we just recurse and let the next level
+ * take care of any additional structures.
+ */
+ for (unsigned i = 0; i < glsl_get_length(src_tail->type); i++) {
+ nir_deref_struct *deref = nir_deref_struct_create(state->dead_ctx, i);
+ deref->deref.type = glsl_get_struct_field(src_tail->type, i);
+
+ /* Set the tail of both as the newly created structure deref. It
+ * is safe to use the same wildcard in both places because a) we
+ * will be copying it before we put it in an actual instruction
+ * and b) everything that will potentially add another link in the
+ * deref chain will also add the same thing to both chains.
+ */
+ src_tail->child = &deref->deref;
+ dest_tail->child = &deref->deref;
+
+ split_var_copy_instr(old_copy, dest_head, src_head,
+ dest_tail->child, src_tail->child, state);
+ }
+ /* Set it back to the way we found it */
+ src_tail->child = NULL;
+ dest_tail->child = NULL;
+ break;
+
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_BOOL:
+ if (glsl_type_is_matrix(src_tail->type)) {
+ nir_deref_array *deref = nir_deref_array_create(state->dead_ctx);
+ deref->deref.type = glsl_get_column_type(src_tail->type);
+ deref->deref_array_type = nir_deref_array_type_wildcard;
+
+ /* Set the tail of both as the newly created wildcard deref. It
+ * is safe to use the same wildcard in both places because a) we
+ * will be copying it before we put it in an actual instruction
+ * and b) everything that will potentially add another link in the
+ * deref chain will also add the same thing to both chains.
+ */
+ src_tail->child = &deref->deref;
+ dest_tail->child = &deref->deref;
+
+ split_var_copy_instr(old_copy, dest_head, src_head,
+ dest_tail->child, src_tail->child, state);
+
+ /* Set it back to the way we found it */
+ src_tail->child = NULL;
+ dest_tail->child = NULL;
+ } else {
+ /* At this point, we have fully built our deref chains and can
+ * actually add the new copy instruction.
+ */
+ nir_intrinsic_instr *new_copy =
+ nir_intrinsic_instr_create(state->mem_ctx, nir_intrinsic_copy_var);
+
+ /* We need to make copies because a) this deref chain actually
+ * belongs to the copy instruction and b) the deref chains may
+ * have some of the same links due to the way we constructed them
+ */
+ nir_deref *src = nir_copy_deref(new_copy, src_head);
+ nir_deref *dest = nir_copy_deref(new_copy, dest_head);
+
+ new_copy->variables[0] = nir_deref_as_var(dest);
+ new_copy->variables[1] = nir_deref_as_var(src);
+
+ /* Emit the copy instruction after the old instruction. We'll
+ * remove the old one later.
+ */
+ nir_instr_insert_after(&old_copy->instr, &new_copy->instr);
+ state->progress = true;
+ }
+ break;
+
+ case GLSL_TYPE_SAMPLER:
+ case GLSL_TYPE_IMAGE:
+ case GLSL_TYPE_ATOMIC_UINT:
+ case GLSL_TYPE_INTERFACE:
+ default:
+ unreachable("Cannot copy these types");
+ }
+}
+
+static bool
+split_var_copies_block(nir_block *block, void *void_state)
+{
+ struct split_var_copies_state *state = void_state;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
+ if (intrinsic->intrinsic != nir_intrinsic_copy_var)
+ continue;
+
+ nir_deref *dest_head = &intrinsic->variables[0]->deref;
+ nir_deref *src_head = &intrinsic->variables[1]->deref;
+ nir_deref *dest_tail = nir_deref_tail(dest_head);
+ nir_deref *src_tail = nir_deref_tail(src_head);
+
+ switch (glsl_get_base_type(src_tail->type)) {
+ case GLSL_TYPE_ARRAY:
+ case GLSL_TYPE_STRUCT:
+ split_var_copy_instr(intrinsic, dest_head, src_head,
+ dest_tail, src_tail, state);
+ nir_instr_remove(&intrinsic->instr);
+ ralloc_steal(state->dead_ctx, instr);
+ break;
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_BOOL:
+ if (glsl_type_is_matrix(src_tail->type)) {
+ split_var_copy_instr(intrinsic, dest_head, src_head,
+ dest_tail, src_tail, state);
+ nir_instr_remove(&intrinsic->instr);
+ ralloc_steal(state->dead_ctx, instr);
+ }
+ break;
+ default:
+ unreachable("Invalid type");
+ break;
+ }
+ }
+
+ return true;
+}
+
+static bool
+split_var_copies_impl(nir_function_impl *impl)
+{
+ struct split_var_copies_state state;
+
+ state.mem_ctx = ralloc_parent(impl);
+ state.dead_ctx = ralloc_context(NULL);
+ state.progress = false;
+
+ nir_foreach_block(impl, split_var_copies_block, &state);
+
+ ralloc_free(state.dead_ctx);
+
+ if (state.progress) {
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+ }
+
+ return state.progress;
+}
+
+bool
+nir_split_var_copies(nir_shader *shader)
+{
+ bool progress = false;
+
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ progress = split_var_copies_impl(function->impl) || progress;
+ }
+
+ return progress;
+}
diff --git a/src/compiler/nir/nir_sweep.c b/src/compiler/nir/nir_sweep.c
new file mode 100644
index 00000000000..0710bdba7c7
--- /dev/null
+++ b/src/compiler/nir/nir_sweep.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+
+/**
+ * \file nir_sweep.c
+ *
+ * The nir_sweep() pass performs a mark and sweep pass over a nir_shader's associated
+ * memory - anything still connected to the program will be kept, and any dead memory
+ * we dropped on the floor will be freed.
+ *
+ * The expectation is that drivers should call this when finished compiling the shader
+ * (after any optimization, lowering, and so on). However, it's also fine to call it
+ * earlier, and even many times, trading CPU cycles for memory savings.
+ */
+
+#define steal_list(mem_ctx, type, list) \
+ foreach_list_typed(type, obj, node, list) { ralloc_steal(mem_ctx, obj); }
+
+static void sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node);
+
+static bool
+sweep_src_indirect(nir_src *src, void *nir)
+{
+ if (!src->is_ssa && src->reg.indirect)
+ ralloc_steal(nir, src->reg.indirect);
+
+ return true;
+}
+
+static bool
+sweep_dest_indirect(nir_dest *dest, void *nir)
+{
+ if (!dest->is_ssa && dest->reg.indirect)
+ ralloc_steal(nir, dest->reg.indirect);
+
+ return true;
+}
+
+static void
+sweep_block(nir_shader *nir, nir_block *block)
+{
+ ralloc_steal(nir, block);
+
+ nir_foreach_instr(block, instr) {
+ ralloc_steal(nir, instr);
+
+ nir_foreach_src(instr, sweep_src_indirect, nir);
+ nir_foreach_dest(instr, sweep_dest_indirect, nir);
+ }
+}
+
+static void
+sweep_if(nir_shader *nir, nir_if *iff)
+{
+ ralloc_steal(nir, iff);
+
+ foreach_list_typed(nir_cf_node, cf_node, node, &iff->then_list) {
+ sweep_cf_node(nir, cf_node);
+ }
+
+ foreach_list_typed(nir_cf_node, cf_node, node, &iff->else_list) {
+ sweep_cf_node(nir, cf_node);
+ }
+}
+
+static void
+sweep_loop(nir_shader *nir, nir_loop *loop)
+{
+ ralloc_steal(nir, loop);
+
+ foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) {
+ sweep_cf_node(nir, cf_node);
+ }
+}
+
+static void
+sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node)
+{
+ switch (cf_node->type) {
+ case nir_cf_node_block:
+ sweep_block(nir, nir_cf_node_as_block(cf_node));
+ break;
+ case nir_cf_node_if:
+ sweep_if(nir, nir_cf_node_as_if(cf_node));
+ break;
+ case nir_cf_node_loop:
+ sweep_loop(nir, nir_cf_node_as_loop(cf_node));
+ break;
+ default:
+ unreachable("Invalid CF node type");
+ }
+}
+
+static void
+sweep_impl(nir_shader *nir, nir_function_impl *impl)
+{
+ ralloc_steal(nir, impl);
+
+ ralloc_steal(nir, impl->params);
+ ralloc_steal(nir, impl->return_var);
+ steal_list(nir, nir_variable, &impl->locals);
+ steal_list(nir, nir_register, &impl->registers);
+
+ foreach_list_typed(nir_cf_node, cf_node, node, &impl->body) {
+ sweep_cf_node(nir, cf_node);
+ }
+
+ sweep_block(nir, impl->end_block);
+
+ /* Wipe out all the metadata, if any. */
+ nir_metadata_preserve(impl, nir_metadata_none);
+}
+
+static void
+sweep_function(nir_shader *nir, nir_function *f)
+{
+ ralloc_steal(nir, f);
+ ralloc_steal(nir, f->params);
+
+ if (f->impl)
+ sweep_impl(nir, f->impl);
+}
+
+void
+nir_sweep(nir_shader *nir)
+{
+ void *rubbish = ralloc_context(NULL);
+
+ /* First, move ownership of all the memory to a temporary context; assume dead. */
+ ralloc_adopt(rubbish, nir);
+
+ ralloc_steal(nir, (char *)nir->info.name);
+ if (nir->info.label)
+ ralloc_steal(nir, (char *)nir->info.label);
+
+ /* Variables and registers are not dead. Steal them back. */
+ steal_list(nir, nir_variable, &nir->uniforms);
+ steal_list(nir, nir_variable, &nir->inputs);
+ steal_list(nir, nir_variable, &nir->outputs);
+ steal_list(nir, nir_variable, &nir->globals);
+ steal_list(nir, nir_variable, &nir->system_values);
+ steal_list(nir, nir_register, &nir->registers);
+
+ /* Recurse into functions, stealing their contents back. */
+ foreach_list_typed(nir_function, func, node, &nir->functions) {
+ sweep_function(nir, func);
+ }
+
+ /* Free everything we didn't steal back. */
+ ralloc_free(rubbish);
+}
diff --git a/src/compiler/nir/nir_to_ssa.c b/src/compiler/nir/nir_to_ssa.c
new file mode 100644
index 00000000000..44a50547738
--- /dev/null
+++ b/src/compiler/nir/nir_to_ssa.c
@@ -0,0 +1,536 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott ([email protected])
+ *
+ */
+
+#include "nir.h"
+#include <stdlib.h>
+#include <unistd.h>
+
+/*
+ * Implements the classic to-SSA algorithm described by Cytron et. al. in
+ * "Efficiently Computing Static Single Assignment Form and the Control
+ * Dependence Graph."
+ */
+
+/* inserts a phi node of the form reg = phi(reg, reg, reg, ...) */
+
+static void
+insert_trivial_phi(nir_register *reg, nir_block *block, void *mem_ctx)
+{
+ nir_phi_instr *instr = nir_phi_instr_create(mem_ctx);
+
+ instr->dest.reg.reg = reg;
+ struct set_entry *entry;
+ set_foreach(block->predecessors, entry) {
+ nir_block *pred = (nir_block *) entry->key;
+
+ nir_phi_src *src = ralloc(instr, nir_phi_src);
+ src->pred = pred;
+ src->src.is_ssa = false;
+ src->src.reg.base_offset = 0;
+ src->src.reg.indirect = NULL;
+ src->src.reg.reg = reg;
+ exec_list_push_tail(&instr->srcs, &src->node);
+ }
+
+ nir_instr_insert_before_block(block, &instr->instr);
+}
+
+static void
+insert_phi_nodes(nir_function_impl *impl)
+{
+ void *mem_ctx = ralloc_parent(impl);
+
+ unsigned *work = calloc(impl->num_blocks, sizeof(unsigned));
+ unsigned *has_already = calloc(impl->num_blocks, sizeof(unsigned));
+
+ /*
+ * Since the work flags already prevent us from inserting a node that has
+ * ever been inserted into W, we don't need to use a set to represent W.
+ * Also, since no block can ever be inserted into W more than once, we know
+ * that the maximum size of W is the number of basic blocks in the
+ * function. So all we need to handle W is an array and a pointer to the
+ * next element to be inserted and the next element to be removed.
+ */
+ nir_block **W = malloc(impl->num_blocks * sizeof(nir_block *));
+ unsigned w_start, w_end;
+
+ unsigned iter_count = 0;
+
+ nir_index_blocks(impl);
+
+ foreach_list_typed(nir_register, reg, node, &impl->registers) {
+ if (reg->num_array_elems != 0)
+ continue;
+
+ w_start = w_end = 0;
+ iter_count++;
+
+ nir_foreach_def(reg, dest) {
+ nir_instr *def = dest->reg.parent_instr;
+ if (work[def->block->index] < iter_count)
+ W[w_end++] = def->block;
+ work[def->block->index] = iter_count;
+ }
+
+ while (w_start != w_end) {
+ nir_block *cur = W[w_start++];
+ struct set_entry *entry;
+ set_foreach(cur->dom_frontier, entry) {
+ nir_block *next = (nir_block *) entry->key;
+
+ /*
+ * If there's more than one return statement, then the end block
+ * can be a join point for some definitions. However, there are
+ * no instructions in the end block, so nothing would use those
+ * phi nodes. Of course, we couldn't place those phi nodes
+ * anyways due to the restriction of having no instructions in the
+ * end block...
+ */
+ if (next == impl->end_block)
+ continue;
+
+ if (has_already[next->index] < iter_count) {
+ insert_trivial_phi(reg, next, mem_ctx);
+ has_already[next->index] = iter_count;
+ if (work[next->index] < iter_count) {
+ work[next->index] = iter_count;
+ W[w_end++] = next;
+ }
+ }
+ }
+ }
+ }
+
+ free(work);
+ free(has_already);
+ free(W);
+}
+
+typedef struct {
+ nir_ssa_def **stack;
+ int index;
+ unsigned num_defs; /** < used to add indices to debug names */
+#ifndef NDEBUG
+ unsigned stack_size;
+#endif
+} reg_state;
+
+typedef struct {
+ reg_state *states;
+ void *mem_ctx;
+ nir_instr *parent_instr;
+ nir_if *parent_if;
+ nir_function_impl *impl;
+
+ /* map from SSA value -> original register */
+ struct hash_table *ssa_map;
+} rewrite_state;
+
+static nir_ssa_def *get_ssa_src(nir_register *reg, rewrite_state *state)
+{
+ unsigned index = reg->index;
+
+ if (state->states[index].index == -1) {
+ /*
+ * We're using an undefined register, create a new undefined SSA value
+ * to preserve the information that this source is undefined
+ */
+ nir_ssa_undef_instr *instr =
+ nir_ssa_undef_instr_create(state->mem_ctx, reg->num_components);
+
+ /*
+ * We could just insert the undefined instruction before the instruction
+ * we're rewriting, but we could be rewriting a phi source in which case
+ * we can't do that, so do the next easiest thing - insert it at the
+ * beginning of the program. In the end, it doesn't really matter where
+ * the undefined instructions are because they're going to be ignored
+ * in the backend.
+ */
+ nir_instr_insert_before_cf_list(&state->impl->body, &instr->instr);
+ return &instr->def;
+ }
+
+ return state->states[index].stack[state->states[index].index];
+}
+
+static bool
+rewrite_use(nir_src *src, void *_state)
+{
+ rewrite_state *state = (rewrite_state *) _state;
+
+ if (src->is_ssa)
+ return true;
+
+ unsigned index = src->reg.reg->index;
+
+ if (state->states[index].stack == NULL)
+ return true;
+
+ nir_ssa_def *def = get_ssa_src(src->reg.reg, state);
+ if (state->parent_instr)
+ nir_instr_rewrite_src(state->parent_instr, src, nir_src_for_ssa(def));
+ else
+ nir_if_rewrite_condition(state->parent_if, nir_src_for_ssa(def));
+
+ return true;
+}
+
+static bool
+rewrite_def_forwards(nir_dest *dest, void *_state)
+{
+ rewrite_state *state = (rewrite_state *) _state;
+
+ if (dest->is_ssa)
+ return true;
+
+ nir_register *reg = dest->reg.reg;
+ unsigned index = reg->index;
+
+ if (state->states[index].stack == NULL)
+ return true;
+
+ char *name = NULL;
+ if (dest->reg.reg->name)
+ name = ralloc_asprintf(state->mem_ctx, "%s_%u", dest->reg.reg->name,
+ state->states[index].num_defs);
+
+ list_del(&dest->reg.def_link);
+ nir_ssa_dest_init(state->parent_instr, dest, reg->num_components, name);
+
+ /* push our SSA destination on the stack */
+ state->states[index].index++;
+ assert(state->states[index].index < state->states[index].stack_size);
+ state->states[index].stack[state->states[index].index] = &dest->ssa;
+ state->states[index].num_defs++;
+
+ _mesa_hash_table_insert(state->ssa_map, &dest->ssa, reg);
+
+ return true;
+}
+
+static void
+rewrite_alu_instr_forward(nir_alu_instr *instr, rewrite_state *state)
+{
+ state->parent_instr = &instr->instr;
+
+ nir_foreach_src(&instr->instr, rewrite_use, state);
+
+ if (instr->dest.dest.is_ssa)
+ return;
+
+ nir_register *reg = instr->dest.dest.reg.reg;
+ unsigned index = reg->index;
+
+ if (state->states[index].stack == NULL)
+ return;
+
+ unsigned write_mask = instr->dest.write_mask;
+ if (write_mask != (1 << instr->dest.dest.reg.reg->num_components) - 1) {
+ /*
+ * Calculate the number of components the final instruction, which for
+ * per-component things is the number of output components of the
+ * instruction and non-per-component things is the number of enabled
+ * channels in the write mask.
+ */
+ unsigned num_components;
+ if (nir_op_infos[instr->op].output_size == 0) {
+ unsigned temp = (write_mask & 0x5) + ((write_mask >> 1) & 0x5);
+ num_components = (temp & 0x3) + ((temp >> 2) & 0x3);
+ } else {
+ num_components = nir_op_infos[instr->op].output_size;
+ }
+
+ char *name = NULL;
+ if (instr->dest.dest.reg.reg->name)
+ name = ralloc_asprintf(state->mem_ctx, "%s_%u",
+ reg->name, state->states[index].num_defs);
+
+ instr->dest.write_mask = (1 << num_components) - 1;
+ list_del(&instr->dest.dest.reg.def_link);
+ nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, name);
+
+ if (nir_op_infos[instr->op].output_size == 0) {
+ /*
+ * When we change the output writemask, we need to change the
+ * swizzles for per-component inputs too
+ */
+ for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+ if (nir_op_infos[instr->op].input_sizes[i] != 0)
+ continue;
+
+ unsigned new_swizzle[4] = {0, 0, 0, 0};
+
+ /*
+ * We keep two indices:
+ * 1. The index of the original (non-SSA) component
+ * 2. The index of the post-SSA, compacted, component
+ *
+ * We need to map the swizzle component at index 1 to the swizzle
+ * component at index 2.
+ */
+
+ unsigned ssa_index = 0;
+ for (unsigned index = 0; index < 4; index++) {
+ if (!((write_mask >> index) & 1))
+ continue;
+
+ new_swizzle[ssa_index] = instr->src[i].swizzle[index];
+ ssa_index++;
+ }
+
+ for (unsigned j = 0; j < 4; j++)
+ instr->src[i].swizzle[j] = new_swizzle[j];
+ }
+ }
+
+ nir_op op;
+ switch (reg->num_components) {
+ case 2: op = nir_op_vec2; break;
+ case 3: op = nir_op_vec3; break;
+ case 4: op = nir_op_vec4; break;
+ default: unreachable("not reached");
+ }
+
+ nir_alu_instr *vec = nir_alu_instr_create(state->mem_ctx, op);
+
+ vec->dest.dest.reg.reg = reg;
+ vec->dest.write_mask = (1 << reg->num_components) - 1;
+
+ nir_ssa_def *old_src = get_ssa_src(reg, state);
+ nir_ssa_def *new_src = &instr->dest.dest.ssa;
+
+ unsigned ssa_index = 0;
+ for (unsigned i = 0; i < reg->num_components; i++) {
+ vec->src[i].src.is_ssa = true;
+ if ((write_mask >> i) & 1) {
+ vec->src[i].src.ssa = new_src;
+ if (nir_op_infos[instr->op].output_size == 0)
+ vec->src[i].swizzle[0] = ssa_index;
+ else
+ vec->src[i].swizzle[0] = i;
+ ssa_index++;
+ } else {
+ vec->src[i].src.ssa = old_src;
+ vec->src[i].swizzle[0] = i;
+ }
+ }
+
+ nir_instr_insert_after(&instr->instr, &vec->instr);
+
+ state->parent_instr = &vec->instr;
+ rewrite_def_forwards(&vec->dest.dest, state);
+ } else {
+ rewrite_def_forwards(&instr->dest.dest, state);
+ }
+}
+
+static void
+rewrite_phi_instr(nir_phi_instr *instr, rewrite_state *state)
+{
+ state->parent_instr = &instr->instr;
+ rewrite_def_forwards(&instr->dest, state);
+}
+
+static void
+rewrite_instr_forward(nir_instr *instr, rewrite_state *state)
+{
+ if (instr->type == nir_instr_type_alu) {
+ rewrite_alu_instr_forward(nir_instr_as_alu(instr), state);
+ return;
+ }
+
+ if (instr->type == nir_instr_type_phi) {
+ rewrite_phi_instr(nir_instr_as_phi(instr), state);
+ return;
+ }
+
+ state->parent_instr = instr;
+
+ nir_foreach_src(instr, rewrite_use, state);
+ nir_foreach_dest(instr, rewrite_def_forwards, state);
+}
+
+static void
+rewrite_phi_sources(nir_block *block, nir_block *pred, rewrite_state *state)
+{
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_phi)
+ break;
+
+ nir_phi_instr *phi_instr = nir_instr_as_phi(instr);
+
+ state->parent_instr = instr;
+
+ nir_foreach_phi_src(phi_instr, src) {
+ if (src->pred == pred) {
+ rewrite_use(&src->src, state);
+ break;
+ }
+ }
+ }
+}
+
+static bool
+rewrite_def_backwards(nir_dest *dest, void *_state)
+{
+ rewrite_state *state = (rewrite_state *) _state;
+
+ if (!dest->is_ssa)
+ return true;
+
+ struct hash_entry *entry =
+ _mesa_hash_table_search(state->ssa_map, &dest->ssa);
+
+ if (!entry)
+ return true;
+
+ nir_register *reg = (nir_register *) entry->data;
+ unsigned index = reg->index;
+
+ state->states[index].index--;
+ assert(state->states[index].index >= -1);
+
+ return true;
+}
+
+static void
+rewrite_instr_backwards(nir_instr *instr, rewrite_state *state)
+{
+ nir_foreach_dest(instr, rewrite_def_backwards, state);
+}
+
+static void
+rewrite_block(nir_block *block, rewrite_state *state)
+{
+ /* This will skip over any instructions after the current one, which is
+ * what we want because those instructions (vector gather, conditional
+ * select) will already be in SSA form.
+ */
+ nir_foreach_instr_safe(block, instr) {
+ rewrite_instr_forward(instr, state);
+ }
+
+ if (block != state->impl->end_block &&
+ !nir_cf_node_is_last(&block->cf_node) &&
+ nir_cf_node_next(&block->cf_node)->type == nir_cf_node_if) {
+ nir_if *if_stmt = nir_cf_node_as_if(nir_cf_node_next(&block->cf_node));
+ state->parent_instr = NULL;
+ state->parent_if = if_stmt;
+ rewrite_use(&if_stmt->condition, state);
+ }
+
+ if (block->successors[0])
+ rewrite_phi_sources(block->successors[0], block, state);
+ if (block->successors[1])
+ rewrite_phi_sources(block->successors[1], block, state);
+
+ for (unsigned i = 0; i < block->num_dom_children; i++)
+ rewrite_block(block->dom_children[i], state);
+
+ nir_foreach_instr_reverse(block, instr) {
+ rewrite_instr_backwards(instr, state);
+ }
+}
+
+static void
+remove_unused_regs(nir_function_impl *impl, rewrite_state *state)
+{
+ foreach_list_typed_safe(nir_register, reg, node, &impl->registers) {
+ if (state->states[reg->index].stack != NULL)
+ exec_node_remove(&reg->node);
+ }
+}
+
+static void
+init_rewrite_state(nir_function_impl *impl, rewrite_state *state)
+{
+ state->impl = impl;
+ state->mem_ctx = ralloc_parent(impl);
+ state->ssa_map = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ state->states = ralloc_array(NULL, reg_state, impl->reg_alloc);
+
+ foreach_list_typed(nir_register, reg, node, &impl->registers) {
+ assert(reg->index < impl->reg_alloc);
+ if (reg->num_array_elems > 0) {
+ state->states[reg->index].stack = NULL;
+ } else {
+ /*
+ * Calculate a conservative estimate of the stack size based on the
+ * number of definitions there are. Note that this function *must* be
+ * called after phi nodes are inserted so we can count phi node
+ * definitions too.
+ */
+ unsigned stack_size = list_length(&reg->defs);
+
+ state->states[reg->index].stack = ralloc_array(state->states,
+ nir_ssa_def *,
+ stack_size);
+#ifndef NDEBUG
+ state->states[reg->index].stack_size = stack_size;
+#endif
+ state->states[reg->index].index = -1;
+ state->states[reg->index].num_defs = 0;
+ }
+ }
+}
+
+static void
+destroy_rewrite_state(rewrite_state *state)
+{
+ _mesa_hash_table_destroy(state->ssa_map, NULL);
+ ralloc_free(state->states);
+}
+
+void
+nir_convert_to_ssa_impl(nir_function_impl *impl)
+{
+ nir_metadata_require(impl, nir_metadata_dominance);
+
+ insert_phi_nodes(impl);
+
+ rewrite_state state;
+ init_rewrite_state(impl, &state);
+
+ rewrite_block(nir_start_block(impl), &state);
+
+ remove_unused_regs(impl, &state);
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ destroy_rewrite_state(&state);
+}
+
+void
+nir_convert_to_ssa(nir_shader *shader)
+{
+ nir_foreach_function(shader, function) {
+ if (function->impl)
+ nir_convert_to_ssa_impl(function->impl);
+ }
+}
diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c
new file mode 100644
index 00000000000..e4db68db3c0
--- /dev/null
+++ b/src/compiler/nir/nir_validate.c
@@ -0,0 +1,1071 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Connor Abbott ([email protected])
+ *
+ */
+
+#include "nir.h"
+#include <assert.h>
+
+/*
+ * This file checks for invalid IR indicating a bug somewhere in the compiler.
+ */
+
+/* Since this file is just a pile of asserts, don't bother compiling it if
+ * we're not building a debug build.
+ */
+#ifdef DEBUG
+
+/*
+ * Per-register validation state.
+ */
+
+typedef struct {
+ /*
+ * equivalent to the uses and defs in nir_register, but built up by the
+ * validator. At the end, we verify that the sets have the same entries.
+ */
+ struct set *uses, *if_uses, *defs;
+ nir_function_impl *where_defined; /* NULL for global registers */
+} reg_validate_state;
+
+typedef struct {
+ /*
+ * equivalent to the uses in nir_ssa_def, but built up by the validator.
+ * At the end, we verify that the sets have the same entries.
+ */
+ struct set *uses, *if_uses;
+ nir_function_impl *where_defined;
+} ssa_def_validate_state;
+
+typedef struct {
+ /* map of register -> validation state (struct above) */
+ struct hash_table *regs;
+
+ /* the current shader being validated */
+ nir_shader *shader;
+
+ /* the current instruction being validated */
+ nir_instr *instr;
+
+ /* the current basic block being validated */
+ nir_block *block;
+
+ /* the current if statement being validated */
+ nir_if *if_stmt;
+
+ /* the current loop being visited */
+ nir_loop *loop;
+
+ /* the parent of the current cf node being visited */
+ nir_cf_node *parent_node;
+
+ /* the current function implementation being validated */
+ nir_function_impl *impl;
+
+ /* map of SSA value -> function implementation where it is defined */
+ struct hash_table *ssa_defs;
+
+ /* bitset of ssa definitions we have found; used to check uniqueness */
+ BITSET_WORD *ssa_defs_found;
+
+ /* bitset of registers we have currently found; used to check uniqueness */
+ BITSET_WORD *regs_found;
+
+ /* map of local variable -> function implementation where it is defined */
+ struct hash_table *var_defs;
+} validate_state;
+
+static void validate_src(nir_src *src, validate_state *state);
+
+static void
+validate_reg_src(nir_src *src, validate_state *state)
+{
+ assert(src->reg.reg != NULL);
+
+ struct hash_entry *entry;
+ entry = _mesa_hash_table_search(state->regs, src->reg.reg);
+ assert(entry);
+
+ reg_validate_state *reg_state = (reg_validate_state *) entry->data;
+
+ if (state->instr) {
+ _mesa_set_add(reg_state->uses, src);
+ } else {
+ assert(state->if_stmt);
+ _mesa_set_add(reg_state->if_uses, src);
+ }
+
+ if (!src->reg.reg->is_global) {
+ assert(reg_state->where_defined == state->impl &&
+ "using a register declared in a different function");
+ }
+
+ assert((src->reg.reg->num_array_elems == 0 ||
+ src->reg.base_offset < src->reg.reg->num_array_elems) &&
+ "definitely out-of-bounds array access");
+
+ if (src->reg.indirect) {
+ assert(src->reg.reg->num_array_elems != 0);
+ assert((src->reg.indirect->is_ssa ||
+ src->reg.indirect->reg.indirect == NULL) &&
+ "only one level of indirection allowed");
+ validate_src(src->reg.indirect, state);
+ }
+}
+
+static void
+validate_ssa_src(nir_src *src, validate_state *state)
+{
+ assert(src->ssa != NULL);
+
+ struct hash_entry *entry = _mesa_hash_table_search(state->ssa_defs, src->ssa);
+
+ assert(entry);
+
+ ssa_def_validate_state *def_state = (ssa_def_validate_state *)entry->data;
+
+ assert(def_state->where_defined == state->impl &&
+ "using an SSA value defined in a different function");
+
+ if (state->instr) {
+ _mesa_set_add(def_state->uses, src);
+ } else {
+ assert(state->if_stmt);
+ _mesa_set_add(def_state->if_uses, src);
+ }
+
+ /* TODO validate that the use is dominated by the definition */
+}
+
+static void
+validate_src(nir_src *src, validate_state *state)
+{
+ if (state->instr)
+ assert(src->parent_instr == state->instr);
+ else
+ assert(src->parent_if == state->if_stmt);
+
+ if (src->is_ssa)
+ validate_ssa_src(src, state);
+ else
+ validate_reg_src(src, state);
+}
+
+static void
+validate_alu_src(nir_alu_instr *instr, unsigned index, validate_state *state)
+{
+ nir_alu_src *src = &instr->src[index];
+
+ unsigned num_components;
+ if (src->src.is_ssa)
+ num_components = src->src.ssa->num_components;
+ else {
+ if (src->src.reg.reg->is_packed)
+ num_components = 4; /* can't check anything */
+ else
+ num_components = src->src.reg.reg->num_components;
+ }
+ for (unsigned i = 0; i < 4; i++) {
+ assert(src->swizzle[i] < 4);
+
+ if (nir_alu_instr_channel_used(instr, index, i))
+ assert(src->swizzle[i] < num_components);
+ }
+
+ validate_src(&src->src, state);
+}
+
+static void
+validate_reg_dest(nir_reg_dest *dest, validate_state *state)
+{
+ assert(dest->reg != NULL);
+
+ assert(dest->parent_instr == state->instr);
+
+ struct hash_entry *entry2;
+ entry2 = _mesa_hash_table_search(state->regs, dest->reg);
+
+ assert(entry2);
+
+ reg_validate_state *reg_state = (reg_validate_state *) entry2->data;
+ _mesa_set_add(reg_state->defs, dest);
+
+ if (!dest->reg->is_global) {
+ assert(reg_state->where_defined == state->impl &&
+ "writing to a register declared in a different function");
+ }
+
+ assert((dest->reg->num_array_elems == 0 ||
+ dest->base_offset < dest->reg->num_array_elems) &&
+ "definitely out-of-bounds array access");
+
+ if (dest->indirect) {
+ assert(dest->reg->num_array_elems != 0);
+ assert((dest->indirect->is_ssa || dest->indirect->reg.indirect == NULL) &&
+ "only one level of indirection allowed");
+ validate_src(dest->indirect, state);
+ }
+}
+
+static void
+validate_ssa_def(nir_ssa_def *def, validate_state *state)
+{
+ assert(def->index < state->impl->ssa_alloc);
+ assert(!BITSET_TEST(state->ssa_defs_found, def->index));
+ BITSET_SET(state->ssa_defs_found, def->index);
+
+ assert(def->parent_instr == state->instr);
+
+ assert(def->num_components <= 4);
+
+ list_validate(&def->uses);
+ list_validate(&def->if_uses);
+
+ ssa_def_validate_state *def_state = ralloc(state->ssa_defs,
+ ssa_def_validate_state);
+ def_state->where_defined = state->impl;
+ def_state->uses = _mesa_set_create(def_state, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ def_state->if_uses = _mesa_set_create(def_state, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ _mesa_hash_table_insert(state->ssa_defs, def, def_state);
+}
+
+static void
+validate_dest(nir_dest *dest, validate_state *state)
+{
+ if (dest->is_ssa)
+ validate_ssa_def(&dest->ssa, state);
+ else
+ validate_reg_dest(&dest->reg, state);
+}
+
+static void
+validate_alu_dest(nir_alu_dest *dest, validate_state *state)
+{
+ unsigned dest_size =
+ dest->dest.is_ssa ? dest->dest.ssa.num_components
+ : dest->dest.reg.reg->num_components;
+ bool is_packed = !dest->dest.is_ssa && dest->dest.reg.reg->is_packed;
+ /*
+ * validate that the instruction doesn't write to components not in the
+ * register/SSA value
+ */
+ assert(is_packed || !(dest->write_mask & ~((1 << dest_size) - 1)));
+
+ /* validate that saturate is only ever used on instructions with
+ * destinations of type float
+ */
+ nir_alu_instr *alu = nir_instr_as_alu(state->instr);
+ assert(nir_op_infos[alu->op].output_type == nir_type_float ||
+ !dest->saturate);
+
+ validate_dest(&dest->dest, state);
+}
+
+static void
+validate_alu_instr(nir_alu_instr *instr, validate_state *state)
+{
+ assert(instr->op < nir_num_opcodes);
+
+ for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+ validate_alu_src(instr, i, state);
+ }
+
+ validate_alu_dest(&instr->dest, state);
+}
+
+static void
+validate_deref_chain(nir_deref *deref, validate_state *state)
+{
+ assert(deref->child == NULL || ralloc_parent(deref->child) == deref);
+
+ nir_deref *parent = NULL;
+ while (deref != NULL) {
+ switch (deref->deref_type) {
+ case nir_deref_type_array:
+ assert(deref->type == glsl_get_array_element(parent->type));
+ if (nir_deref_as_array(deref)->deref_array_type ==
+ nir_deref_array_type_indirect)
+ validate_src(&nir_deref_as_array(deref)->indirect, state);
+ break;
+
+ case nir_deref_type_struct:
+ assert(deref->type ==
+ glsl_get_struct_field(parent->type,
+ nir_deref_as_struct(deref)->index));
+ break;
+
+ case nir_deref_type_var:
+ break;
+
+ default:
+ assert(!"Invalid deref type");
+ break;
+ }
+
+ parent = deref;
+ deref = deref->child;
+ }
+}
+
+static void
+validate_var_use(nir_variable *var, validate_state *state)
+{
+ if (var->data.mode == nir_var_local) {
+ struct hash_entry *entry = _mesa_hash_table_search(state->var_defs, var);
+
+ assert(entry);
+ assert((nir_function_impl *) entry->data == state->impl);
+ }
+}
+
+static void
+validate_deref_var(void *parent_mem_ctx, nir_deref_var *deref, validate_state *state)
+{
+ assert(deref != NULL);
+ assert(ralloc_parent(deref) == parent_mem_ctx);
+ assert(deref->deref.type == deref->var->type);
+
+ validate_var_use(deref->var, state);
+
+ validate_deref_chain(&deref->deref, state);
+}
+
+static void
+validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
+{
+ unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
+ for (unsigned i = 0; i < num_srcs; i++) {
+ unsigned components_read =
+ nir_intrinsic_infos[instr->intrinsic].src_components[i];
+ if (components_read == 0)
+ components_read = instr->num_components;
+
+ assert(components_read > 0);
+
+ if (instr->src[i].is_ssa) {
+ assert(components_read <= instr->src[i].ssa->num_components);
+ } else if (!instr->src[i].reg.reg->is_packed) {
+ assert(components_read <= instr->src[i].reg.reg->num_components);
+ }
+
+ validate_src(&instr->src[i], state);
+ }
+
+ unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
+ for (unsigned i = 0; i < num_vars; i++) {
+ validate_deref_var(instr, instr->variables[i], state);
+ }
+
+ if (nir_intrinsic_infos[instr->intrinsic].has_dest) {
+ unsigned components_written =
+ nir_intrinsic_infos[instr->intrinsic].dest_components;
+ if (components_written == 0)
+ components_written = instr->num_components;
+
+ assert(components_written > 0);
+
+ if (instr->dest.is_ssa) {
+ assert(components_written <= instr->dest.ssa.num_components);
+ } else if (!instr->dest.reg.reg->is_packed) {
+ assert(components_written <= instr->dest.reg.reg->num_components);
+ }
+
+ validate_dest(&instr->dest, state);
+ }
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_var: {
+ const struct glsl_type *type =
+ nir_deref_tail(&instr->variables[0]->deref)->type;
+ assert(glsl_type_is_vector_or_scalar(type) ||
+ (instr->variables[0]->var->data.mode == nir_var_uniform &&
+ glsl_get_base_type(type) == GLSL_TYPE_SUBROUTINE));
+ assert(instr->num_components == glsl_get_vector_elements(type));
+ break;
+ }
+ case nir_intrinsic_store_var: {
+ const struct glsl_type *type =
+ nir_deref_tail(&instr->variables[0]->deref)->type;
+ assert(glsl_type_is_vector_or_scalar(type) ||
+ (instr->variables[0]->var->data.mode == nir_var_uniform &&
+ glsl_get_base_type(type) == GLSL_TYPE_SUBROUTINE));
+ assert(instr->num_components == glsl_get_vector_elements(type));
+ assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
+ instr->variables[0]->var->data.mode != nir_var_uniform &&
+ instr->variables[0]->var->data.mode != nir_var_shader_storage);
+ assert((instr->const_index[0] & ~((1 << instr->num_components) - 1)) == 0);
+ break;
+ }
+ case nir_intrinsic_copy_var:
+ assert(nir_deref_tail(&instr->variables[0]->deref)->type ==
+ nir_deref_tail(&instr->variables[1]->deref)->type);
+ assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
+ instr->variables[0]->var->data.mode != nir_var_uniform &&
+ instr->variables[0]->var->data.mode != nir_var_shader_storage);
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+validate_tex_instr(nir_tex_instr *instr, validate_state *state)
+{
+ bool src_type_seen[nir_num_tex_src_types];
+ for (unsigned i = 0; i < nir_num_tex_src_types; i++)
+ src_type_seen[i] = false;
+
+ for (unsigned i = 0; i < instr->num_srcs; i++) {
+ assert(!src_type_seen[instr->src[i].src_type]);
+ src_type_seen[instr->src[i].src_type] = true;
+ validate_src(&instr->src[i].src, state);
+ }
+
+ if (instr->sampler != NULL)
+ validate_deref_var(instr, instr->sampler, state);
+
+ validate_dest(&instr->dest, state);
+}
+
+static void
+validate_call_instr(nir_call_instr *instr, validate_state *state)
+{
+ if (instr->return_deref == NULL)
+ assert(glsl_type_is_void(instr->callee->return_type));
+ else
+ assert(instr->return_deref->deref.type == instr->callee->return_type);
+
+ assert(instr->num_params == instr->callee->num_params);
+
+ for (unsigned i = 0; i < instr->num_params; i++) {
+ assert(instr->callee->params[i].type == instr->params[i]->deref.type);
+ validate_deref_var(instr, instr->params[i], state);
+ }
+
+ validate_deref_var(instr, instr->return_deref, state);
+}
+
+static void
+validate_load_const_instr(nir_load_const_instr *instr, validate_state *state)
+{
+ validate_ssa_def(&instr->def, state);
+}
+
+static void
+validate_ssa_undef_instr(nir_ssa_undef_instr *instr, validate_state *state)
+{
+ validate_ssa_def(&instr->def, state);
+}
+
+static void
+validate_phi_instr(nir_phi_instr *instr, validate_state *state)
+{
+ /*
+ * don't validate the sources until we get to them from their predecessor
+ * basic blocks, to avoid validating an SSA use before its definition.
+ */
+
+ validate_dest(&instr->dest, state);
+
+ exec_list_validate(&instr->srcs);
+ assert(exec_list_length(&instr->srcs) ==
+ state->block->predecessors->entries);
+}
+
+static void
+validate_instr(nir_instr *instr, validate_state *state)
+{
+ assert(instr->block == state->block);
+
+ state->instr = instr;
+
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ validate_alu_instr(nir_instr_as_alu(instr), state);
+ break;
+
+ case nir_instr_type_call:
+ validate_call_instr(nir_instr_as_call(instr), state);
+ break;
+
+ case nir_instr_type_intrinsic:
+ validate_intrinsic_instr(nir_instr_as_intrinsic(instr), state);
+ break;
+
+ case nir_instr_type_tex:
+ validate_tex_instr(nir_instr_as_tex(instr), state);
+ break;
+
+ case nir_instr_type_load_const:
+ validate_load_const_instr(nir_instr_as_load_const(instr), state);
+ break;
+
+ case nir_instr_type_phi:
+ validate_phi_instr(nir_instr_as_phi(instr), state);
+ break;
+
+ case nir_instr_type_ssa_undef:
+ validate_ssa_undef_instr(nir_instr_as_ssa_undef(instr), state);
+ break;
+
+ case nir_instr_type_jump:
+ break;
+
+ default:
+ assert(!"Invalid ALU instruction type");
+ break;
+ }
+
+ state->instr = NULL;
+}
+
+static void
+validate_phi_src(nir_phi_instr *instr, nir_block *pred, validate_state *state)
+{
+ state->instr = &instr->instr;
+
+ assert(instr->dest.is_ssa);
+
+ exec_list_validate(&instr->srcs);
+ nir_foreach_phi_src(instr, src) {
+ if (src->pred == pred) {
+ assert(src->src.is_ssa);
+ assert(src->src.ssa->num_components ==
+ instr->dest.ssa.num_components);
+
+ validate_src(&src->src, state);
+ state->instr = NULL;
+ return;
+ }
+ }
+
+ abort();
+}
+
+static void
+validate_phi_srcs(nir_block *block, nir_block *succ, validate_state *state)
+{
+ nir_foreach_instr(succ, instr) {
+ if (instr->type != nir_instr_type_phi)
+ break;
+
+ validate_phi_src(nir_instr_as_phi(instr), block, state);
+ }
+}
+
+static void validate_cf_node(nir_cf_node *node, validate_state *state);
+
+static void
+validate_block(nir_block *block, validate_state *state)
+{
+ assert(block->cf_node.parent == state->parent_node);
+
+ state->block = block;
+
+ exec_list_validate(&block->instr_list);
+ nir_foreach_instr(block, instr) {
+ if (instr->type == nir_instr_type_phi) {
+ assert(instr == nir_block_first_instr(block) ||
+ nir_instr_prev(instr)->type == nir_instr_type_phi);
+ }
+
+ if (instr->type == nir_instr_type_jump) {
+ assert(instr == nir_block_last_instr(block));
+ }
+
+ validate_instr(instr, state);
+ }
+
+ assert(block->successors[0] != NULL);
+ assert(block->successors[0] != block->successors[1]);
+
+ for (unsigned i = 0; i < 2; i++) {
+ if (block->successors[i] != NULL) {
+ struct set_entry *entry =
+ _mesa_set_search(block->successors[i]->predecessors, block);
+ assert(entry);
+
+ validate_phi_srcs(block, block->successors[i], state);
+ }
+ }
+
+ struct set_entry *entry;
+ set_foreach(block->predecessors, entry) {
+ const nir_block *pred = entry->key;
+ assert(pred->successors[0] == block ||
+ pred->successors[1] == block);
+ }
+
+ if (!exec_list_is_empty(&block->instr_list) &&
+ nir_block_last_instr(block)->type == nir_instr_type_jump) {
+ assert(block->successors[1] == NULL);
+ nir_jump_instr *jump = nir_instr_as_jump(nir_block_last_instr(block));
+ switch (jump->type) {
+ case nir_jump_break: {
+ nir_block *after =
+ nir_cf_node_as_block(nir_cf_node_next(&state->loop->cf_node));
+ assert(block->successors[0] == after);
+ break;
+ }
+
+ case nir_jump_continue: {
+ nir_block *first =
+ nir_cf_node_as_block(nir_loop_first_cf_node(state->loop));
+ assert(block->successors[0] == first);
+ break;
+ }
+
+ case nir_jump_return:
+ assert(block->successors[0] == state->impl->end_block);
+ break;
+
+ default:
+ unreachable("bad jump type");
+ }
+ } else {
+ nir_cf_node *next = nir_cf_node_next(&block->cf_node);
+ if (next == NULL) {
+ switch (state->parent_node->type) {
+ case nir_cf_node_loop: {
+ nir_block *first =
+ nir_cf_node_as_block(nir_loop_first_cf_node(state->loop));
+ assert(block->successors[0] == first);
+ /* due to the hack for infinite loops, block->successors[1] may
+ * point to the block after the loop.
+ */
+ break;
+ }
+
+ case nir_cf_node_if: {
+ nir_block *after =
+ nir_cf_node_as_block(nir_cf_node_next(state->parent_node));
+ assert(block->successors[0] == after);
+ assert(block->successors[1] == NULL);
+ break;
+ }
+
+ case nir_cf_node_function:
+ assert(block->successors[0] == state->impl->end_block);
+ assert(block->successors[1] == NULL);
+ break;
+
+ default:
+ unreachable("unknown control flow node type");
+ }
+ } else {
+ if (next->type == nir_cf_node_if) {
+ nir_if *if_stmt = nir_cf_node_as_if(next);
+ assert(&block->successors[0]->cf_node ==
+ nir_if_first_then_node(if_stmt));
+ assert(&block->successors[1]->cf_node ==
+ nir_if_first_else_node(if_stmt));
+ } else {
+ assert(next->type == nir_cf_node_loop);
+ nir_loop *loop = nir_cf_node_as_loop(next);
+ assert(&block->successors[0]->cf_node ==
+ nir_loop_first_cf_node(loop));
+ assert(block->successors[1] == NULL);
+ }
+ }
+ }
+}
+
+static void
+validate_if(nir_if *if_stmt, validate_state *state)
+{
+ state->if_stmt = if_stmt;
+
+ assert(!exec_node_is_head_sentinel(if_stmt->cf_node.node.prev));
+ nir_cf_node *prev_node = nir_cf_node_prev(&if_stmt->cf_node);
+ assert(prev_node->type == nir_cf_node_block);
+
+ assert(!exec_node_is_tail_sentinel(if_stmt->cf_node.node.next));
+ nir_cf_node *next_node = nir_cf_node_next(&if_stmt->cf_node);
+ assert(next_node->type == nir_cf_node_block);
+
+ validate_src(&if_stmt->condition, state);
+
+ assert(!exec_list_is_empty(&if_stmt->then_list));
+ assert(!exec_list_is_empty(&if_stmt->else_list));
+
+ nir_cf_node *old_parent = state->parent_node;
+ state->parent_node = &if_stmt->cf_node;
+
+ exec_list_validate(&if_stmt->then_list);
+ foreach_list_typed(nir_cf_node, cf_node, node, &if_stmt->then_list) {
+ validate_cf_node(cf_node, state);
+ }
+
+ exec_list_validate(&if_stmt->else_list);
+ foreach_list_typed(nir_cf_node, cf_node, node, &if_stmt->else_list) {
+ validate_cf_node(cf_node, state);
+ }
+
+ state->parent_node = old_parent;
+ state->if_stmt = NULL;
+}
+
+static void
+validate_loop(nir_loop *loop, validate_state *state)
+{
+ assert(!exec_node_is_head_sentinel(loop->cf_node.node.prev));
+ nir_cf_node *prev_node = nir_cf_node_prev(&loop->cf_node);
+ assert(prev_node->type == nir_cf_node_block);
+
+ assert(!exec_node_is_tail_sentinel(loop->cf_node.node.next));
+ nir_cf_node *next_node = nir_cf_node_next(&loop->cf_node);
+ assert(next_node->type == nir_cf_node_block);
+
+ assert(!exec_list_is_empty(&loop->body));
+
+ nir_cf_node *old_parent = state->parent_node;
+ state->parent_node = &loop->cf_node;
+ nir_loop *old_loop = state->loop;
+ state->loop = loop;
+
+ exec_list_validate(&loop->body);
+ foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) {
+ validate_cf_node(cf_node, state);
+ }
+
+ state->parent_node = old_parent;
+ state->loop = old_loop;
+}
+
+static void
+validate_cf_node(nir_cf_node *node, validate_state *state)
+{
+ assert(node->parent == state->parent_node);
+
+ switch (node->type) {
+ case nir_cf_node_block:
+ validate_block(nir_cf_node_as_block(node), state);
+ break;
+
+ case nir_cf_node_if:
+ validate_if(nir_cf_node_as_if(node), state);
+ break;
+
+ case nir_cf_node_loop:
+ validate_loop(nir_cf_node_as_loop(node), state);
+ break;
+
+ default:
+ unreachable("Invalid CF node type");
+ }
+}
+
+static void
+prevalidate_reg_decl(nir_register *reg, bool is_global, validate_state *state)
+{
+ assert(reg->is_global == is_global);
+
+ if (is_global)
+ assert(reg->index < state->shader->reg_alloc);
+ else
+ assert(reg->index < state->impl->reg_alloc);
+ assert(!BITSET_TEST(state->regs_found, reg->index));
+ BITSET_SET(state->regs_found, reg->index);
+
+ list_validate(&reg->uses);
+ list_validate(&reg->defs);
+ list_validate(&reg->if_uses);
+
+ reg_validate_state *reg_state = ralloc(state->regs, reg_validate_state);
+ reg_state->uses = _mesa_set_create(reg_state, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ reg_state->if_uses = _mesa_set_create(reg_state, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ reg_state->defs = _mesa_set_create(reg_state, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ reg_state->where_defined = is_global ? NULL : state->impl;
+
+ _mesa_hash_table_insert(state->regs, reg, reg_state);
+}
+
+static void
+postvalidate_reg_decl(nir_register *reg, validate_state *state)
+{
+ struct hash_entry *entry = _mesa_hash_table_search(state->regs, reg);
+
+ reg_validate_state *reg_state = (reg_validate_state *) entry->data;
+
+ nir_foreach_use(reg, src) {
+ struct set_entry *entry = _mesa_set_search(reg_state->uses, src);
+ assert(entry);
+ _mesa_set_remove(reg_state->uses, entry);
+ }
+
+ if (reg_state->uses->entries != 0) {
+ printf("extra entries in register uses:\n");
+ struct set_entry *entry;
+ set_foreach(reg_state->uses, entry)
+ printf("%p\n", entry->key);
+
+ abort();
+ }
+
+ nir_foreach_if_use(reg, src) {
+ struct set_entry *entry = _mesa_set_search(reg_state->if_uses, src);
+ assert(entry);
+ _mesa_set_remove(reg_state->if_uses, entry);
+ }
+
+ if (reg_state->if_uses->entries != 0) {
+ printf("extra entries in register if_uses:\n");
+ struct set_entry *entry;
+ set_foreach(reg_state->if_uses, entry)
+ printf("%p\n", entry->key);
+
+ abort();
+ }
+
+ nir_foreach_def(reg, src) {
+ struct set_entry *entry = _mesa_set_search(reg_state->defs, src);
+ assert(entry);
+ _mesa_set_remove(reg_state->defs, entry);
+ }
+
+ if (reg_state->defs->entries != 0) {
+ printf("extra entries in register defs:\n");
+ struct set_entry *entry;
+ set_foreach(reg_state->defs, entry)
+ printf("%p\n", entry->key);
+
+ abort();
+ }
+}
+
+static void
+validate_var_decl(nir_variable *var, bool is_global, validate_state *state)
+{
+ assert(is_global != (var->data.mode == nir_var_local));
+
+ /*
+ * TODO validate some things ir_validate.cpp does (requires more GLSL type
+ * support)
+ */
+
+ if (!is_global) {
+ _mesa_hash_table_insert(state->var_defs, var, state->impl);
+ }
+}
+
+static bool
+postvalidate_ssa_def(nir_ssa_def *def, void *void_state)
+{
+ validate_state *state = void_state;
+
+ struct hash_entry *entry = _mesa_hash_table_search(state->ssa_defs, def);
+ ssa_def_validate_state *def_state = (ssa_def_validate_state *)entry->data;
+
+ nir_foreach_use(def, src) {
+ struct set_entry *entry = _mesa_set_search(def_state->uses, src);
+ assert(entry);
+ _mesa_set_remove(def_state->uses, entry);
+ }
+
+ if (def_state->uses->entries != 0) {
+ printf("extra entries in register uses:\n");
+ struct set_entry *entry;
+ set_foreach(def_state->uses, entry)
+ printf("%p\n", entry->key);
+
+ abort();
+ }
+
+ nir_foreach_if_use(def, src) {
+ struct set_entry *entry = _mesa_set_search(def_state->if_uses, src);
+ assert(entry);
+ _mesa_set_remove(def_state->if_uses, entry);
+ }
+
+ if (def_state->if_uses->entries != 0) {
+ printf("extra entries in register uses:\n");
+ struct set_entry *entry;
+ set_foreach(def_state->if_uses, entry)
+ printf("%p\n", entry->key);
+
+ abort();
+ }
+
+ return true;
+}
+
+static bool
+postvalidate_ssa_defs_block(nir_block *block, void *state)
+{
+ nir_foreach_instr(block, instr)
+ nir_foreach_ssa_def(instr, postvalidate_ssa_def, state);
+
+ return true;
+}
+
+static void
+validate_function_impl(nir_function_impl *impl, validate_state *state)
+{
+ assert(impl->function->impl == impl);
+ assert(impl->cf_node.parent == NULL);
+
+ assert(impl->num_params == impl->function->num_params);
+ for (unsigned i = 0; i < impl->num_params; i++)
+ assert(impl->params[i]->type == impl->function->params[i].type);
+
+ if (glsl_type_is_void(impl->function->return_type))
+ assert(impl->return_var == NULL);
+ else
+ assert(impl->return_var->type == impl->function->return_type);
+
+ assert(exec_list_is_empty(&impl->end_block->instr_list));
+ assert(impl->end_block->successors[0] == NULL);
+ assert(impl->end_block->successors[1] == NULL);
+
+ state->impl = impl;
+ state->parent_node = &impl->cf_node;
+
+ exec_list_validate(&impl->locals);
+ nir_foreach_variable(var, &impl->locals) {
+ validate_var_decl(var, false, state);
+ }
+
+ state->regs_found = realloc(state->regs_found,
+ BITSET_WORDS(impl->reg_alloc) *
+ sizeof(BITSET_WORD));
+ memset(state->regs_found, 0, BITSET_WORDS(impl->reg_alloc) *
+ sizeof(BITSET_WORD));
+ exec_list_validate(&impl->registers);
+ foreach_list_typed(nir_register, reg, node, &impl->registers) {
+ prevalidate_reg_decl(reg, false, state);
+ }
+
+ state->ssa_defs_found = realloc(state->ssa_defs_found,
+ BITSET_WORDS(impl->ssa_alloc) *
+ sizeof(BITSET_WORD));
+ memset(state->ssa_defs_found, 0, BITSET_WORDS(impl->ssa_alloc) *
+ sizeof(BITSET_WORD));
+ exec_list_validate(&impl->body);
+ foreach_list_typed(nir_cf_node, node, node, &impl->body) {
+ validate_cf_node(node, state);
+ }
+
+ foreach_list_typed(nir_register, reg, node, &impl->registers) {
+ postvalidate_reg_decl(reg, state);
+ }
+
+ nir_foreach_block(impl, postvalidate_ssa_defs_block, state);
+}
+
+static void
+validate_function(nir_function *func, validate_state *state)
+{
+ if (func->impl != NULL) {
+ assert(func->impl->function == func);
+ validate_function_impl(func->impl, state);
+ }
+}
+
+static void
+init_validate_state(validate_state *state)
+{
+ state->regs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ state->ssa_defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ state->ssa_defs_found = NULL;
+ state->regs_found = NULL;
+ state->var_defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ state->loop = NULL;
+}
+
+static void
+destroy_validate_state(validate_state *state)
+{
+ _mesa_hash_table_destroy(state->regs, NULL);
+ _mesa_hash_table_destroy(state->ssa_defs, NULL);
+ free(state->ssa_defs_found);
+ free(state->regs_found);
+ _mesa_hash_table_destroy(state->var_defs, NULL);
+}
+
+void
+nir_validate_shader(nir_shader *shader)
+{
+ validate_state state;
+ init_validate_state(&state);
+
+ state.shader = shader;
+
+ exec_list_validate(&shader->uniforms);
+ nir_foreach_variable(var, &shader->uniforms) {
+ validate_var_decl(var, true, &state);
+ }
+
+ exec_list_validate(&shader->inputs);
+ nir_foreach_variable(var, &shader->inputs) {
+ validate_var_decl(var, true, &state);
+ }
+
+ exec_list_validate(&shader->outputs);
+ nir_foreach_variable(var, &shader->outputs) {
+ validate_var_decl(var, true, &state);
+ }
+
+ exec_list_validate(&shader->globals);
+ nir_foreach_variable(var, &shader->globals) {
+ validate_var_decl(var, true, &state);
+ }
+
+ exec_list_validate(&shader->system_values);
+ nir_foreach_variable(var, &shader->system_values) {
+ validate_var_decl(var, true, &state);
+ }
+
+ state.regs_found = realloc(state.regs_found,
+ BITSET_WORDS(shader->reg_alloc) *
+ sizeof(BITSET_WORD));
+ memset(state.regs_found, 0, BITSET_WORDS(shader->reg_alloc) *
+ sizeof(BITSET_WORD));
+ exec_list_validate(&shader->registers);
+ foreach_list_typed(nir_register, reg, node, &shader->registers) {
+ prevalidate_reg_decl(reg, true, &state);
+ }
+
+ exec_list_validate(&shader->functions);
+ foreach_list_typed(nir_function, func, node, &shader->functions) {
+ validate_function(func, &state);
+ }
+
+ foreach_list_typed(nir_register, reg, node, &shader->registers) {
+ postvalidate_reg_decl(reg, &state);
+ }
+
+ destroy_validate_state(&state);
+}
+
+#endif /* NDEBUG */
diff --git a/src/compiler/nir/nir_vla.h b/src/compiler/nir/nir_vla.h
new file mode 100644
index 00000000000..753783316a2
--- /dev/null
+++ b/src/compiler/nir/nir_vla.h
@@ -0,0 +1,54 @@
+/**************************************************************************
+ *
+ * Copyright 2015 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#pragma once
+
+
+#include "c99_alloca.h"
+
+
+/* Declare a variable length array, with no initialization */
+#define NIR_VLA(_type, _name, _length) \
+ _type *_name = alloca((_length) * sizeof *_name)
+
+
+/* Declare a variable length array, and initialize it with the given byte.
+ *
+ * _length is evaluated twice, so expressions with side-effects must be
+ * avoided.
+ */
+#define NIR_VLA_FILL(_type, _name, _length, _byte) \
+ _type *_name = memset(alloca((_length) * sizeof *_name), _byte, (_length) * sizeof *_name)
+
+
+/* Declare a variable length array, and zero it.
+ *
+ * Just like NIR_VLA_FILL, _length is evaluated twice, so expressions with
+ * side-effects must be avoided.
+ */
+#define NIR_VLA_ZERO(_type, _name, _length) \
+ NIR_VLA_FILL(_type, _name, _length, 0)
diff --git a/src/compiler/nir/nir_worklist.c b/src/compiler/nir/nir_worklist.c
new file mode 100644
index 00000000000..3087a1d2354
--- /dev/null
+++ b/src/compiler/nir/nir_worklist.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand ([email protected])
+ *
+ */
+
+#include "nir_worklist.h"
+
+void
+nir_block_worklist_init(nir_block_worklist *w, unsigned num_blocks,
+ void *mem_ctx)
+{
+ w->size = num_blocks;
+ w->count = 0;
+ w->start = 0;
+
+ w->blocks_present = rzalloc_array(mem_ctx, BITSET_WORD,
+ BITSET_WORDS(num_blocks));
+ w->blocks = ralloc_array(mem_ctx, nir_block *, num_blocks);
+}
+
+void
+nir_block_worklist_fini(nir_block_worklist *w)
+{
+ ralloc_free(w->blocks_present);
+ ralloc_free(w->blocks);
+}
+
+static bool
+worklist_add_block(nir_block *block, void *w)
+{
+ nir_block_worklist_push_tail(w, block);
+
+ return true;
+}
+
+void
+nir_block_worklist_add_all(nir_block_worklist *w, nir_function_impl *impl)
+{
+ nir_foreach_block(impl, worklist_add_block, w);
+}
+
+void
+nir_block_worklist_push_head(nir_block_worklist *w, nir_block *block)
+{
+ /* Pushing a block we already have is a no-op */
+ if (BITSET_TEST(w->blocks_present, block->index))
+ return;
+
+ assert(w->count < w->size);
+
+ if (w->start == 0)
+ w->start = w->size - 1;
+ else
+ w->start--;
+
+ w->count++;
+
+ w->blocks[w->start] = block;
+ BITSET_SET(w->blocks_present, block->index);
+}
+
+nir_block *
+nir_block_worklist_peek_head(const nir_block_worklist *w)
+{
+ assert(w->count > 0);
+
+ return w->blocks[w->start];
+}
+
+nir_block *
+nir_block_worklist_pop_head(nir_block_worklist *w)
+{
+ assert(w->count > 0);
+
+ unsigned head = w->start;
+
+ w->start = (w->start + 1) % w->size;
+ w->count--;
+
+ BITSET_CLEAR(w->blocks_present, w->blocks[head]->index);
+ return w->blocks[head];
+}
+
+void
+nir_block_worklist_push_tail(nir_block_worklist *w, nir_block *block)
+{
+ /* Pushing a block we already have is a no-op */
+ if (BITSET_TEST(w->blocks_present, block->index))
+ return;
+
+ assert(w->count < w->size);
+
+ w->count++;
+
+ unsigned tail = (w->start + w->count - 1) % w->size;
+
+ w->blocks[tail] = block;
+ BITSET_SET(w->blocks_present, block->index);
+}
+
+nir_block *
+nir_block_worklist_peek_tail(const nir_block_worklist *w)
+{
+ assert(w->count > 0);
+
+ unsigned tail = (w->start + w->count - 1) % w->size;
+
+ return w->blocks[tail];
+}
+
+nir_block *
+nir_block_worklist_pop_tail(nir_block_worklist *w)
+{
+ assert(w->count > 0);
+
+ unsigned tail = (w->start + w->count - 1) % w->size;
+
+ w->count--;
+
+ BITSET_CLEAR(w->blocks_present, w->blocks[tail]->index);
+ return w->blocks[tail];
+}
diff --git a/src/compiler/nir/nir_worklist.h b/src/compiler/nir/nir_worklist.h
new file mode 100644
index 00000000000..829bff24a55
--- /dev/null
+++ b/src/compiler/nir/nir_worklist.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand ([email protected])
+ *
+ */
+
+#pragma once
+
+#ifndef _NIR_WORKLIST_
+#define _NIR_WORKLIST_
+
+#include "nir.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Represents a double-ended queue of unique blocks
+ *
+ * The worklist datastructure guarantees that eacy block is in the queue at
+ * most once. Pushing a block onto either end of the queue is a no-op if
+ * the block is already in the queue. In order for this to work, the
+ * caller must ensure that the blocks are properly indexed.
+ */
+typedef struct {
+ /* The total size of the worklist */
+ unsigned size;
+
+ /* The number of blocks currently in the worklist */
+ unsigned count;
+
+ /* The offset in the array of blocks at which the list starts */
+ unsigned start;
+
+ /* A bitset of all of the blocks currently present in the worklist */
+ BITSET_WORD *blocks_present;
+
+ /* The actual worklist */
+ nir_block **blocks;
+} nir_block_worklist;
+
+void nir_block_worklist_init(nir_block_worklist *w, unsigned num_blocks,
+ void *mem_ctx);
+void nir_block_worklist_fini(nir_block_worklist *w);
+
+void nir_block_worklist_add_all(nir_block_worklist *w, nir_function_impl *impl);
+
+static inline bool
+nir_block_worklist_is_empty(const nir_block_worklist *w)
+{
+ return w->count == 0;
+}
+
+void nir_block_worklist_push_head(nir_block_worklist *w, nir_block *block);
+
+nir_block *nir_block_worklist_peek_head(const nir_block_worklist *w);
+
+nir_block *nir_block_worklist_pop_head(nir_block_worklist *w);
+
+void nir_block_worklist_push_tail(nir_block_worklist *w, nir_block *block);
+
+nir_block *nir_block_worklist_peek_tail(const nir_block_worklist *w);
+
+nir_block *nir_block_worklist_pop_tail(nir_block_worklist *w);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* _NIR_WORKLIST_ */
diff --git a/src/compiler/nir/tests/control_flow_tests.cpp b/src/compiler/nir/tests/control_flow_tests.cpp
new file mode 100644
index 00000000000..b9379ef3b06
--- /dev/null
+++ b/src/compiler/nir/tests/control_flow_tests.cpp
@@ -0,0 +1,148 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <gtest/gtest.h>
+#include "nir.h"
+#include "nir_builder.h"
+
+class nir_cf_test : public ::testing::Test {
+protected:
+ nir_cf_test();
+ ~nir_cf_test();
+
+ nir_builder b;
+};
+
+nir_cf_test::nir_cf_test()
+{
+ static const nir_shader_compiler_options options = { };
+ nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, &options);
+}
+
+nir_cf_test::~nir_cf_test()
+{
+ ralloc_free(b.shader);
+}
+
+TEST_F(nir_cf_test, delete_break_in_loop)
+{
+ /* Create IR:
+ *
+ * while (...) { break; }
+ */
+ nir_loop *loop = nir_loop_create(b.shader);
+ nir_cf_node_insert(nir_after_cf_list(&b.impl->body), &loop->cf_node);
+
+ b.cursor = nir_after_cf_list(&loop->body);
+
+ nir_jump_instr *jump = nir_jump_instr_create(b.shader, nir_jump_break);
+ nir_builder_instr_insert(&b, &jump->instr);
+
+ /* At this point, we should have:
+ *
+ * impl main {
+ * block block_0:
+ * // preds:
+ * // succs: block_1
+ * loop {
+ * block block_1:
+ * // preds: block_0
+ * break
+ * // succs: block_2
+ * }
+ * block block_2:
+ * // preds: block_1
+ * // succs: block_3
+ * block block_3:
+ * }
+ */
+ nir_block *block_0 = nir_start_block(b.impl);
+ nir_block *block_1 = nir_cf_node_as_block(nir_loop_first_cf_node(loop));
+ nir_block *block_2 = nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
+ nir_block *block_3 = b.impl->end_block;
+ ASSERT_EQ(nir_cf_node_block, block_0->cf_node.type);
+ ASSERT_EQ(nir_cf_node_block, block_1->cf_node.type);
+ ASSERT_EQ(nir_cf_node_block, block_2->cf_node.type);
+ ASSERT_EQ(nir_cf_node_block, block_3->cf_node.type);
+
+ /* Verify the successors and predecessors. */
+ EXPECT_EQ(block_1, block_0->successors[0]);
+ EXPECT_EQ(NULL, block_0->successors[1]);
+ EXPECT_EQ(block_2, block_1->successors[0]);
+ EXPECT_EQ(NULL, block_1->successors[1]);
+ EXPECT_EQ(block_3, block_2->successors[0]);
+ EXPECT_EQ(NULL, block_2->successors[1]);
+ EXPECT_EQ(NULL, block_3->successors[0]);
+ EXPECT_EQ(NULL, block_3->successors[1]);
+ EXPECT_EQ(0, block_0->predecessors->entries);
+ EXPECT_EQ(1, block_1->predecessors->entries);
+ EXPECT_EQ(1, block_2->predecessors->entries);
+ EXPECT_EQ(1, block_3->predecessors->entries);
+ EXPECT_TRUE(_mesa_set_search(block_1->predecessors, block_0));
+ EXPECT_TRUE(_mesa_set_search(block_2->predecessors, block_1));
+ EXPECT_TRUE(_mesa_set_search(block_3->predecessors, block_2));
+
+ nir_print_shader(b.shader, stderr);
+
+ /* Now remove the break. */
+ nir_instr_remove(&jump->instr);
+
+ nir_print_shader(b.shader, stderr);
+
+ /* At this point, we should have:
+ *
+ * impl main {
+ * block block_0:
+ * // preds:
+ * // succs: block_1
+ * loop {
+ * block block_1:
+ * // preds: block_0 block_1
+ * // succs: block_1
+ * }
+ * block block_2:
+ * // preds: block_1
+ * // succs: block_3
+ * block block_3:
+ * }
+ *
+ * Re-verify the predecessors and successors.
+ */
+ EXPECT_EQ(block_1, block_0->successors[0]);
+ EXPECT_EQ(NULL, block_0->successors[1]);
+ EXPECT_EQ(block_1, block_1->successors[0]); /* back to itself */
+ EXPECT_EQ(block_2, block_1->successors[1]); /* fake successor */
+ EXPECT_EQ(block_3, block_2->successors[0]);
+ EXPECT_EQ(NULL, block_2->successors[1]);
+ EXPECT_EQ(NULL, block_3->successors[0]);
+ EXPECT_EQ(NULL, block_3->successors[1]);
+ EXPECT_EQ(0, block_0->predecessors->entries);
+ EXPECT_EQ(2, block_1->predecessors->entries);
+ EXPECT_EQ(1, block_2->predecessors->entries);
+ EXPECT_EQ(1, block_3->predecessors->entries);
+ EXPECT_TRUE(_mesa_set_search(block_1->predecessors, block_0));
+ EXPECT_TRUE(_mesa_set_search(block_1->predecessors, block_1));
+ EXPECT_TRUE(_mesa_set_search(block_2->predecessors, block_1));
+ EXPECT_TRUE(_mesa_set_search(block_3->predecessors, block_2));
+
+ nir_metadata_require(b.impl, nir_metadata_dominance);
+}