summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri')
-rw-r--r--src/mesa/drivers/dri/Makefile.defines34
-rw-r--r--src/mesa/drivers/dri/Makefile.targets (renamed from src/mesa/drivers/dri/Makefile.template)37
-rw-r--r--src/mesa/drivers/dri/i810/Makefile4
-rw-r--r--src/mesa/drivers/dri/i915/Makefile4
-rw-r--r--src/mesa/drivers/dri/i915/i915_vtbl.c9
-rw-r--r--src/mesa/drivers/dri/i965/Makefile9
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h15
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h44
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp2747
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h16
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_emit.cpp875
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp71
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp1734
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf_emit.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.cpp194
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.h25
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_dump.c22
-rw-r--r--src/mesa/drivers/dri/i965/brw_structs.h74
-rw-r--r--src/mesa/drivers/dri/i965/brw_tex_layout.c5
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_constval.c12
-rw-r--r--src/mesa/drivers/dri/i965/brw_vtbl.c11
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c35
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.h13
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_fp.c27
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c222
-rw-r--r--src/mesa/drivers/dri/i965/gen7_vs_state.c1
-rw-r--r--src/mesa/drivers/dri/intel/intel_buffers.c28
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.c62
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.h7
-rw-r--r--src/mesa/drivers/dri/intel/intel_fbo.c130
-rw-r--r--src/mesa/drivers/dri/intel/intel_fbo.h27
-rw-r--r--src/mesa/drivers/dri/intel/intel_mipmap_tree.c1
-rw-r--r--src/mesa/drivers/dri/intel/intel_mipmap_tree.h14
-rw-r--r--src/mesa/drivers/dri/intel/intel_span.c3
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_format.c1
-rw-r--r--src/mesa/drivers/dri/mach64/Makefile4
-rw-r--r--src/mesa/drivers/dri/mga/Makefile4
-rw-r--r--src/mesa/drivers/dri/nouveau/Makefile9
-rw-r--r--src/mesa/drivers/dri/r128/Makefile4
-rw-r--r--src/mesa/drivers/dri/r200/Makefile7
-rw-r--r--src/mesa/drivers/dri/r300/Makefile9
-rw-r--r--src/mesa/drivers/dri/r300/compiler/Makefile5
-rwxr-xr-xsrc/mesa/drivers/dri/r300/compiler/SConscript3
-rw-r--r--src/mesa/drivers/dri/r300/r300_state.c1
-rw-r--r--src/mesa/drivers/dri/r600/Makefile9
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_blit.c44
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_chip.c4
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_state.c16
-rw-r--r--src/mesa/drivers/dri/radeon/Makefile7
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_chipset.h14
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common_context.c2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_screen.c19
-rw-r--r--src/mesa/drivers/dri/savage/Makefile4
-rw-r--r--src/mesa/drivers/dri/sis/Makefile4
-rw-r--r--src/mesa/drivers/dri/swrast/Makefile4
-rw-r--r--src/mesa/drivers/dri/tdfx/Makefile4
-rw-r--r--src/mesa/drivers/dri/unichrome/Makefile4
62 files changed, 3674 insertions, 3039 deletions
diff --git a/src/mesa/drivers/dri/Makefile.defines b/src/mesa/drivers/dri/Makefile.defines
new file mode 100644
index 00000000000..19b6de8b85a
--- /dev/null
+++ b/src/mesa/drivers/dri/Makefile.defines
@@ -0,0 +1,34 @@
+# -*-makefile-*-
+
+COMMON_GALLIUM_SOURCES = \
+ ../common/utils.c \
+ ../common/vblank.c \
+ ../common/dri_util.c \
+ ../common/xmlconfig.c
+
+COMMON_SOURCES = $(COMMON_GALLIUM_SOURCES) \
+ ../../common/driverfuncs.c \
+ ../common/texmem.c \
+ ../common/drirenderbuffer.c
+
+INCLUDES = $(SHARED_INCLUDES) $(EXPAT_INCLUDES)
+
+OBJECTS = $(C_SOURCES:.c=.o) \
+ $(CXX_SOURCES:.cpp=.o) \
+ $(ASM_SOURCES:.S=.o)
+
+
+### Include directories
+SHARED_INCLUDES = \
+ -I. \
+ -I$(TOP)/src/mesa/drivers/dri/common \
+ -Iserver \
+ -I$(TOP)/include \
+ -I$(TOP)/src/mapi \
+ -I$(TOP)/src/mesa \
+ -I$(TOP)/src/egl/main \
+ -I$(TOP)/src/egl/drivers/dri \
+ $(LIBDRM_CFLAGS)
+
+INCLUDES += $(API_DEFINES)
+CXXFLAGS += $(API_DEFINES)
diff --git a/src/mesa/drivers/dri/Makefile.template b/src/mesa/drivers/dri/Makefile.targets
index d1a119379ed..436b2a3c477 100644
--- a/src/mesa/drivers/dri/Makefile.template
+++ b/src/mesa/drivers/dri/Makefile.targets
@@ -1,38 +1,5 @@
# -*-makefile-*-
-COMMON_GALLIUM_SOURCES = \
- ../common/utils.c \
- ../common/vblank.c \
- ../common/dri_util.c \
- ../common/xmlconfig.c
-
-COMMON_SOURCES = $(COMMON_GALLIUM_SOURCES) \
- ../../common/driverfuncs.c \
- ../common/texmem.c \
- ../common/drirenderbuffer.c
-
-INCLUDES = $(SHARED_INCLUDES) $(EXPAT_INCLUDES)
-
-OBJECTS = $(C_SOURCES:.c=.o) \
- $(CXX_SOURCES:.cpp=.o) \
- $(ASM_SOURCES:.S=.o)
-
-
-### Include directories
-SHARED_INCLUDES = \
- -I. \
- -I$(TOP)/src/mesa/drivers/dri/common \
- -Iserver \
- -I$(TOP)/include \
- -I$(TOP)/src/mapi \
- -I$(TOP)/src/mesa \
- -I$(TOP)/src/egl/main \
- -I$(TOP)/src/egl/drivers/dri \
- $(LIBDRM_CFLAGS)
-
-CFLAGS += $(API_DEFINES)
-CXXFLAGS += $(API_DEFINES)
-
##### RULES #####
.c.o:
@@ -45,7 +12,7 @@ CXXFLAGS += $(API_DEFINES)
$(CC) -c $(INCLUDES) $(DRI_CFLAGS) $(DRIVER_DEFINES) $< -o $@
-##### TARGETS #####
+#### TARGETS #####
default: subdirs lib
@@ -55,7 +22,7 @@ lib: symlinks subdirs depend
@$(MAKE) $(LIBNAME) $(TOP)/$(LIB_DIR)/$(LIBNAME)
$(LIBNAME): $(OBJECTS) $(EXTRA_MODULES) $(MESA_MODULES) Makefile \
- $(TOP)/src/mesa/drivers/dri/Makefile.template $(TOP)/src/mesa/drivers/dri/common/dri_test.o
+ $(TOP)/src/mesa/drivers/dri/Makefile.targets $(TOP)/src/mesa/drivers/dri/common/dri_test.o
$(MKLIB) -o [email protected] -noprefix -linker '$(CXX)' -ldflags '$(LDFLAGS)' \
$(OBJECTS) $(EXTRA_MODULES) $(DRI_LIB_DEPS)
$(CXX) $(CFLAGS) -o [email protected] $(TOP)/src/mesa/drivers/dri/common/dri_test.o [email protected] $(DRI_LIB_DEPS)
diff --git a/src/mesa/drivers/dri/i810/Makefile b/src/mesa/drivers/dri/i810/Makefile
index 54a837d5ea9..edc6dd21732 100644
--- a/src/mesa/drivers/dri/i810/Makefile
+++ b/src/mesa/drivers/dri/i810/Makefile
@@ -5,6 +5,8 @@ include $(TOP)/configs/current
LIBNAME = i810_dri.so
+include ../Makefile.defines
+
DRIVER_SOURCES = \
i810context.c \
i810ioctl.c \
@@ -24,5 +26,5 @@ C_SOURCES = \
ASM_SOURCES =
-include ../Makefile.template
+include ../Makefile.targets
diff --git a/src/mesa/drivers/dri/i915/Makefile b/src/mesa/drivers/dri/i915/Makefile
index 65fd658c047..79e03f2f1d1 100644
--- a/src/mesa/drivers/dri/i915/Makefile
+++ b/src/mesa/drivers/dri/i915/Makefile
@@ -4,6 +4,8 @@ include $(TOP)/configs/current
LIBNAME = i915_dri.so
+include ../Makefile.defines
+
DRIVER_SOURCES = \
i830_context.c \
i830_state.c \
@@ -63,7 +65,7 @@ DRIVER_DEFINES = -I../intel -DI915 \
INCLUDES += $(INTEL_CFLAGS)
DRI_LIB_DEPS += $(INTEL_LIBS)
-include ../Makefile.template
+include ../Makefile.targets
intel_decode.o: ../intel/intel_decode.c
diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c
index 89650b618e4..820feba04ba 100644
--- a/src/mesa/drivers/dri/i915/i915_vtbl.c
+++ b/src/mesa/drivers/dri/i915/i915_vtbl.c
@@ -687,6 +687,14 @@ i915_assert_not_dirty( struct intel_context *intel )
(void) dirty;
}
+/** Return false; i915 does not support HiZ. */
+static bool
+i915_is_hiz_depth_format(struct intel_context *intel,
+ gl_format format)
+{
+ return false;
+}
+
void
i915InitVtbl(struct i915_context *i915)
{
@@ -702,4 +710,5 @@ i915InitVtbl(struct i915_context *i915)
i915->intel.vtbl.assert_not_dirty = i915_assert_not_dirty;
i915->intel.vtbl.finish_batch = intel_finish_vb;
i915->intel.vtbl.render_target_supported = i915_render_target_supported;
+ i915->intel.vtbl.is_hiz_depth_format = i915_is_hiz_depth_format;
}
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index b96f42bfe88..44f28cd9d15 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -4,6 +4,8 @@ include $(TOP)/configs/current
LIBNAME = i965_dri.so
+include ../Makefile.defines
+
DRIVER_SOURCES = \
intel_batchbuffer.c \
intel_blit.c \
@@ -116,10 +118,13 @@ C_SOURCES = \
CXX_SOURCES = \
brw_cubemap_normalize.cpp \
brw_fs.cpp \
+ brw_fs_emit.cpp \
+ brw_fs_visitor.cpp \
brw_fs_channel_expressions.cpp \
brw_fs_reg_allocate.cpp \
brw_fs_schedule_instructions.cpp \
- brw_fs_vector_splitting.cpp
+ brw_fs_vector_splitting.cpp \
+ brw_shader.cpp
ASM_SOURCES =
@@ -128,7 +133,7 @@ DRIVER_DEFINES = -I../intel
INCLUDES += $(INTEL_CFLAGS)
DRI_LIB_DEPS += $(INTEL_LIBS)
-include ../Makefile.template
+include ../Makefile.targets
intel_decode.o: ../intel/intel_decode.c
intel_tex_layout.o: ../intel/intel_tex_layout.c
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index df753abed02..0256ab9061f 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -94,9 +94,9 @@ GLboolean brwCreateContext( int api,
ctx->Const.MaxVertexTextureImageUnits +
ctx->Const.MaxTextureImageUnits;
- /* Mesa limits textures to 4kx4k; it would be nice to fix that someday
- */
- ctx->Const.MaxTextureLevels = 13;
+ ctx->Const.MaxTextureLevels = 14; /* 8192 */
+ if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
+ ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
ctx->Const.Max3DTextureLevels = 9;
ctx->Const.MaxCubeTextureLevels = 12;
ctx->Const.MaxTextureRectSize = (1<<12);
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 1d2ef066db2..621b6f8990b 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -231,8 +231,8 @@ struct brw_wm_prog_data {
GLuint first_curbe_grf;
GLuint first_curbe_grf_16;
- GLuint total_grf;
- GLuint total_grf_16;
+ GLuint reg_blocks;
+ GLuint reg_blocks_16;
GLuint total_scratch;
GLuint nr_params; /**< number of float params/constants */
@@ -863,6 +863,17 @@ float convert_param(enum param_conversion conversion, float param)
}
}
+/**
+ * Pre-gen6, the register file of the EUs was shared between threads,
+ * and each thread used some subset allocated on a 16-register block
+ * granularity. The unit states wanted these block counts.
+ */
+static inline int
+brw_register_blocks(int reg_count)
+{
+ return ALIGN(reg_count, 16) / 16 - 1;
+}
+
GLboolean brw_do_cubemap_normalize(struct exec_list *instructions);
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 5eb7892bb08..6d41b1e69d3 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -28,7 +28,10 @@
* Authors:
* Keith Whitwell <[email protected]>
*/
-
+
+#define INTEL_MASK(high, low) (((1<<((high)-(low)+1))-1)<<(low))
+#define SET_FIELD(value, field) (((value) << field ## _SHIFT) & field ## _MASK)
+#define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT)
#ifndef BRW_DEFINES_H
#define BRW_DEFINES_H
@@ -243,8 +246,17 @@
#define BRW_STENCILOP_DECR 6
#define BRW_STENCILOP_INVERT 7
+/* Surface state DW0 */
+#define BRW_SURFACE_RC_READ_WRITE (1 << 8)
+#define BRW_SURFACE_MIPLAYOUT_SHIFT 10
#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0
#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1
+#define BRW_SURFACE_CUBEFACE_ENABLES 0x3f
+#define BRW_SURFACE_BLEND_ENABLED (1 << 13)
+#define BRW_SURFACE_WRITEDISABLE_B_SHIFT 14
+#define BRW_SURFACE_WRITEDISABLE_G_SHIFT 15
+#define BRW_SURFACE_WRITEDISABLE_R_SHIFT 16
+#define BRW_SURFACE_WRITEDISABLE_A_SHIFT 17
#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001
@@ -403,10 +415,14 @@
#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D
#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F
+#define BRW_SURFACE_FORMAT_SHIFT 18
+#define BRW_SURFACE_FORMAT_MASK INTEL_MASK(26, 18)
#define BRW_SURFACERETURNFORMAT_FLOAT32 0
#define BRW_SURFACERETURNFORMAT_S1 1
+#define BRW_SURFACE_TYPE_SHIFT 29
+#define BRW_SURFACE_TYPE_MASK INTEL_MASK(31, 29)
#define BRW_SURFACE_1D 0
#define BRW_SURFACE_2D 1
#define BRW_SURFACE_3D 2
@@ -414,6 +430,32 @@
#define BRW_SURFACE_BUFFER 4
#define BRW_SURFACE_NULL 7
+/* Surface state DW2 */
+#define BRW_SURFACE_HEIGHT_SHIFT 19
+#define BRW_SURFACE_HEIGHT_MASK INTEL_MASK(31, 19)
+#define BRW_SURFACE_WIDTH_SHIFT 6
+#define BRW_SURFACE_WIDTH_MASK INTEL_MASK(18, 6)
+#define BRW_SURFACE_LOD_SHIFT 2
+#define BRW_SURFACE_LOD_MASK INTEL_MASK(5, 2)
+
+/* Surface state DW3 */
+#define BRW_SURFACE_DEPTH_SHIFT 21
+#define BRW_SURFACE_DEPTH_MASK INTEL_MASK(31, 21)
+#define BRW_SURFACE_PITCH_SHIFT 3
+#define BRW_SURFACE_PITCH_MASK INTEL_MASK(19, 3)
+#define BRW_SURFACE_TILED (1 << 1)
+#define BRW_SURFACE_TILED_Y (1 << 0)
+
+/* Surface state DW4 */
+#define BRW_SURFACE_MIN_LOD_SHIFT 28
+#define BRW_SURFACE_MIN_LOD_MASK INTEL_MASK(31, 28)
+
+/* Surface state DW5 */
+#define BRW_SURFACE_X_OFFSET_SHIFT 25
+#define BRW_SURFACE_X_OFFSET_MASK INTEL_MASK(31, 25)
+#define BRW_SURFACE_Y_OFFSET_SHIFT 20
+#define BRW_SURFACE_Y_OFFSET_MASK INTEL_MASK(23, 20)
+
#define BRW_TEXCOORDMODE_WRAP 0
#define BRW_TEXCOORDMODE_MIRROR 1
#define BRW_TEXCOORDMODE_CLAMP 2
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 2d41302d15a..e7370f36064 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -93,8 +93,6 @@ static void brw_set_dest(struct brw_compile *p,
struct brw_instruction *insn,
struct brw_reg dest)
{
- struct intel_context *intel = &p->brw->intel;
-
if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
dest.file != BRW_MESSAGE_REGISTER_FILE)
assert(dest.nr < 128);
@@ -1254,7 +1252,6 @@ struct brw_instruction *gen6_CONT(struct brw_compile *p,
struct brw_instruction *do_insn)
{
struct brw_instruction *insn;
- int br = 2;
insn = next_insn(p, BRW_OPCODE_CONTINUE);
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 6d545afab64..09033aecd7c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -19,10 +19,13 @@
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
+ */
+
+/** @file brw_fs.cpp
*
- * Authors:
- * Eric Anholt <[email protected]>
- *
+ * This file drives the GLSL IR -> LIR translation, contains the
+ * optimizations on the LIR, and drives the generation of native code
+ * from the LIR.
*/
extern "C" {
@@ -34,7 +37,6 @@ extern "C" {
#include "main/uniforms.h"
#include "program/prog_parameter.h"
#include "program/prog_print.h"
-#include "program/prog_optimize.h"
#include "program/register_allocate.h"
#include "program/sampler.h"
#include "program/hash_table.h"
@@ -42,113 +44,15 @@ extern "C" {
#include "brw_eu.h"
#include "brw_wm.h"
}
+#include "brw_shader.h"
#include "brw_fs.h"
#include "../glsl/glsl_types.h"
-#include "../glsl/ir_optimization.h"
#include "../glsl/ir_print_visitor.h"
#define MAX_INSTRUCTION (1 << 30)
-static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg);
-
-struct gl_shader *
-brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
-{
- struct brw_shader *shader;
-
- shader = rzalloc(NULL, struct brw_shader);
- if (shader) {
- shader->base.Type = type;
- shader->base.Name = name;
- _mesa_init_shader(ctx, &shader->base);
- }
-
- return &shader->base;
-}
-
-struct gl_shader_program *
-brw_new_shader_program(struct gl_context *ctx, GLuint name)
-{
- struct brw_shader_program *prog;
- prog = rzalloc(NULL, struct brw_shader_program);
- if (prog) {
- prog->base.Name = name;
- _mesa_init_shader_program(ctx, &prog->base);
- }
- return &prog->base;
-}
-
-GLboolean
-brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
-{
- struct brw_context *brw = brw_context(ctx);
- struct intel_context *intel = &brw->intel;
-
- struct brw_shader *shader =
- (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
- if (shader != NULL) {
- void *mem_ctx = ralloc_context(NULL);
- bool progress;
-
- if (shader->ir)
- ralloc_free(shader->ir);
- shader->ir = new(shader) exec_list;
- clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
-
- do_mat_op_to_vec(shader->ir);
- lower_instructions(shader->ir,
- MOD_TO_FRACT |
- DIV_TO_MUL_RCP |
- SUB_TO_ADD_NEG |
- EXP_TO_EXP2 |
- LOG_TO_LOG2);
-
- /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this,
- * if-statements need to be flattened.
- */
- if (intel->gen < 6)
- lower_if_to_cond_assign(shader->ir, 16);
-
- do_lower_texture_projection(shader->ir);
- do_vec_index_to_cond_assign(shader->ir);
- brw_do_cubemap_normalize(shader->ir);
- lower_noise(shader->ir);
- lower_quadop_vector(shader->ir, false);
- lower_variable_index_to_cond_assign(shader->ir,
- GL_TRUE, /* input */
- GL_TRUE, /* output */
- GL_TRUE, /* temp */
- GL_TRUE /* uniform */
- );
-
- do {
- progress = false;
-
- brw_do_channel_expressions(shader->ir);
- brw_do_vector_splitting(shader->ir);
-
- progress = do_lower_jumps(shader->ir, true, true,
- true, /* main return */
- false, /* continue */
- false /* loops */
- ) || progress;
-
- progress = do_common_optimization(shader->ir, true, 32) || progress;
- } while (progress);
-
- validate_ir_tree(shader->ir);
- reparent_ir(shader->ir, shader->ir);
- ralloc_free(mem_ctx);
- }
-
- if (!_mesa_ir_link_shader(ctx, prog))
- return GL_FALSE;
-
- return GL_TRUE;
-}
-
-static int
-type_size(const struct glsl_type *type)
+int
+fs_visitor::type_size(const struct glsl_type *type)
{
unsigned int size, i;
@@ -180,17 +84,23 @@ type_size(const struct glsl_type *type)
void
fs_visitor::fail(const char *format, ...)
{
- if (!failed) {
- failed = true;
+ va_list va;
+ char *msg;
- if (INTEL_DEBUG & DEBUG_WM) {
- fprintf(stderr, "FS compile failed: ");
+ if (failed)
+ return;
- va_list va;
- va_start(va, format);
- vfprintf(stderr, format, va);
- va_end(va);
- }
+ failed = true;
+
+ va_start(va, format);
+ msg = ralloc_vasprintf(mem_ctx, format, va);
+ va_end(va);
+ msg = ralloc_asprintf(mem_ctx, "FS compile failed: %s\n", msg);
+
+ this->fail_msg = msg;
+
+ if (INTEL_DEBUG & DEBUG_WM) {
+ fprintf(stderr, msg);
}
}
@@ -297,38 +207,13 @@ fs_reg::fs_reg(enum register_file file, int hw_reg, uint32_t type)
this->type = type;
}
-int
-brw_type_for_base_type(const struct glsl_type *type)
-{
- switch (type->base_type) {
- case GLSL_TYPE_FLOAT:
- return BRW_REGISTER_TYPE_F;
- case GLSL_TYPE_INT:
- case GLSL_TYPE_BOOL:
- return BRW_REGISTER_TYPE_D;
- case GLSL_TYPE_UINT:
- return BRW_REGISTER_TYPE_UD;
- case GLSL_TYPE_ARRAY:
- case GLSL_TYPE_STRUCT:
- case GLSL_TYPE_SAMPLER:
- /* These should be overridden with the type of the member when
- * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely
- * way to trip up if we don't.
- */
- return BRW_REGISTER_TYPE_UD;
- default:
- assert(!"not reached");
- return BRW_REGISTER_TYPE_F;
- }
-}
-
/** Automatic reg constructor. */
fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type)
{
init();
this->file = GRF;
- this->reg = v->virtual_grf_alloc(type_size(type));
+ this->reg = v->virtual_grf_alloc(v->type_size(type));
this->reg_offset = 0;
this->type = brw_type_for_base_type(type);
}
@@ -584,7 +469,7 @@ fs_visitor::emit_general_interpolation(ir_variable *ir)
attr.reg_offset++;
}
- if (intel->gen < 6 && !(is_gl_Color && c->key.linear_color)) {
+ if (intel->gen < 6) {
attr.reg_offset -= type->vector_elements;
for (unsigned int k = 0; k < type->vector_elements; k++) {
emit(BRW_OPCODE_MUL, attr, attr, this->pixel_w);
@@ -706,2134 +591,6 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1)
return inst;
}
-void
-fs_visitor::visit(ir_variable *ir)
-{
- fs_reg *reg = NULL;
-
- if (variable_storage(ir))
- return;
-
- if (strcmp(ir->name, "gl_FragColor") == 0) {
- this->frag_color = ir;
- } else if (strcmp(ir->name, "gl_FragData") == 0) {
- this->frag_data = ir;
- } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
- this->frag_depth = ir;
- }
-
- if (ir->mode == ir_var_in) {
- if (!strcmp(ir->name, "gl_FragCoord")) {
- reg = emit_fragcoord_interpolation(ir);
- } else if (!strcmp(ir->name, "gl_FrontFacing")) {
- reg = emit_frontfacing_interpolation(ir);
- } else {
- reg = emit_general_interpolation(ir);
- }
- assert(reg);
- hash_table_insert(this->variable_ht, reg, ir);
- return;
- }
-
- if (ir->mode == ir_var_uniform) {
- int param_index = c->prog_data.nr_params;
-
- if (c->dispatch_width == 16) {
- if (!variable_storage(ir)) {
- fail("Failed to find uniform '%s' in 16-wide\n", ir->name);
- }
- return;
- }
-
- if (!strncmp(ir->name, "gl_", 3)) {
- setup_builtin_uniform_values(ir);
- } else {
- setup_uniform_values(ir->location, ir->type);
- }
-
- reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
- reg->type = brw_type_for_base_type(ir->type);
- }
-
- if (!reg)
- reg = new(this->mem_ctx) fs_reg(this, ir->type);
-
- hash_table_insert(this->variable_ht, reg, ir);
-}
-
-void
-fs_visitor::visit(ir_dereference_variable *ir)
-{
- fs_reg *reg = variable_storage(ir->var);
- this->result = *reg;
-}
-
-void
-fs_visitor::visit(ir_dereference_record *ir)
-{
- const glsl_type *struct_type = ir->record->type;
-
- ir->record->accept(this);
-
- unsigned int offset = 0;
- for (unsigned int i = 0; i < struct_type->length; i++) {
- if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
- break;
- offset += type_size(struct_type->fields.structure[i].type);
- }
- this->result.reg_offset += offset;
- this->result.type = brw_type_for_base_type(ir->type);
-}
-
-void
-fs_visitor::visit(ir_dereference_array *ir)
-{
- ir_constant *index;
- int element_size;
-
- ir->array->accept(this);
- index = ir->array_index->as_constant();
-
- element_size = type_size(ir->type);
- this->result.type = brw_type_for_base_type(ir->type);
-
- if (index) {
- assert(this->result.file == UNIFORM ||
- (this->result.file == GRF &&
- this->result.reg != 0));
- this->result.reg_offset += index->value.i[0] * element_size;
- } else {
- assert(!"FINISHME: non-constant array element");
- }
-}
-
-/* Instruction selection: Produce a MOV.sat instead of
- * MIN(MAX(val, 0), 1) when possible.
- */
-bool
-fs_visitor::try_emit_saturate(ir_expression *ir)
-{
- ir_rvalue *sat_val = ir->as_rvalue_to_saturate();
-
- if (!sat_val)
- return false;
-
- sat_val->accept(this);
- fs_reg src = this->result;
-
- this->result = fs_reg(this, ir->type);
- fs_inst *inst = emit(BRW_OPCODE_MOV, this->result, src);
- inst->saturate = true;
-
- return true;
-}
-
-static uint32_t
-brw_conditional_for_comparison(unsigned int op)
-{
- switch (op) {
- case ir_binop_less:
- return BRW_CONDITIONAL_L;
- case ir_binop_greater:
- return BRW_CONDITIONAL_G;
- case ir_binop_lequal:
- return BRW_CONDITIONAL_LE;
- case ir_binop_gequal:
- return BRW_CONDITIONAL_GE;
- case ir_binop_equal:
- case ir_binop_all_equal: /* same as equal for scalars */
- return BRW_CONDITIONAL_Z;
- case ir_binop_nequal:
- case ir_binop_any_nequal: /* same as nequal for scalars */
- return BRW_CONDITIONAL_NZ;
- default:
- assert(!"not reached: bad operation for comparison");
- return BRW_CONDITIONAL_NZ;
- }
-}
-
-void
-fs_visitor::visit(ir_expression *ir)
-{
- unsigned int operand;
- fs_reg op[2], temp;
- fs_inst *inst;
-
- assert(ir->get_num_operands() <= 2);
-
- if (try_emit_saturate(ir))
- return;
-
- for (operand = 0; operand < ir->get_num_operands(); operand++) {
- ir->operands[operand]->accept(this);
- if (this->result.file == BAD_FILE) {
- ir_print_visitor v;
- fail("Failed to get tree for expression operand:\n");
- ir->operands[operand]->accept(&v);
- }
- op[operand] = this->result;
-
- /* Matrix expression operands should have been broken down to vector
- * operations already.
- */
- assert(!ir->operands[operand]->type->is_matrix());
- /* And then those vector operands should have been broken down to scalar.
- */
- assert(!ir->operands[operand]->type->is_vector());
- }
-
- /* Storage for our result. If our result goes into an assignment, it will
- * just get copy-propagated out, so no worries.
- */
- this->result = fs_reg(this, ir->type);
-
- switch (ir->operation) {
- case ir_unop_logic_not:
- /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
- * ones complement of the whole register, not just bit 0.
- */
- emit(BRW_OPCODE_XOR, this->result, op[0], fs_reg(1));
- break;
- case ir_unop_neg:
- op[0].negate = !op[0].negate;
- this->result = op[0];
- break;
- case ir_unop_abs:
- op[0].abs = true;
- op[0].negate = false;
- this->result = op[0];
- break;
- case ir_unop_sign:
- temp = fs_reg(this, ir->type);
-
- emit(BRW_OPCODE_MOV, this->result, fs_reg(0.0f));
-
- inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f));
- inst->conditional_mod = BRW_CONDITIONAL_G;
- inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(1.0f));
- inst->predicated = true;
-
- inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f));
- inst->conditional_mod = BRW_CONDITIONAL_L;
- inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f));
- inst->predicated = true;
-
- break;
- case ir_unop_rcp:
- emit_math(FS_OPCODE_RCP, this->result, op[0]);
- break;
-
- case ir_unop_exp2:
- emit_math(FS_OPCODE_EXP2, this->result, op[0]);
- break;
- case ir_unop_log2:
- emit_math(FS_OPCODE_LOG2, this->result, op[0]);
- break;
- case ir_unop_exp:
- case ir_unop_log:
- assert(!"not reached: should be handled by ir_explog_to_explog2");
- break;
- case ir_unop_sin:
- case ir_unop_sin_reduced:
- emit_math(FS_OPCODE_SIN, this->result, op[0]);
- break;
- case ir_unop_cos:
- case ir_unop_cos_reduced:
- emit_math(FS_OPCODE_COS, this->result, op[0]);
- break;
-
- case ir_unop_dFdx:
- emit(FS_OPCODE_DDX, this->result, op[0]);
- break;
- case ir_unop_dFdy:
- emit(FS_OPCODE_DDY, this->result, op[0]);
- break;
-
- case ir_binop_add:
- emit(BRW_OPCODE_ADD, this->result, op[0], op[1]);
- break;
- case ir_binop_sub:
- assert(!"not reached: should be handled by ir_sub_to_add_neg");
- break;
-
- case ir_binop_mul:
- emit(BRW_OPCODE_MUL, this->result, op[0], op[1]);
- break;
- case ir_binop_div:
- assert(!"not reached: should be handled by ir_div_to_mul_rcp");
- break;
- case ir_binop_mod:
- assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
- break;
-
- case ir_binop_less:
- case ir_binop_greater:
- case ir_binop_lequal:
- case ir_binop_gequal:
- case ir_binop_equal:
- case ir_binop_all_equal:
- case ir_binop_nequal:
- case ir_binop_any_nequal:
- temp = this->result;
- /* original gen4 does implicit conversion before comparison. */
- if (intel->gen < 5)
- temp.type = op[0].type;
-
- inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
- inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
- emit(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1));
- break;
-
- case ir_binop_logic_xor:
- emit(BRW_OPCODE_XOR, this->result, op[0], op[1]);
- break;
-
- case ir_binop_logic_or:
- emit(BRW_OPCODE_OR, this->result, op[0], op[1]);
- break;
-
- case ir_binop_logic_and:
- emit(BRW_OPCODE_AND, this->result, op[0], op[1]);
- break;
-
- case ir_binop_dot:
- case ir_unop_any:
- assert(!"not reached: should be handled by brw_fs_channel_expressions");
- break;
-
- case ir_unop_noise:
- assert(!"not reached: should be handled by lower_noise");
- break;
-
- case ir_quadop_vector:
- assert(!"not reached: should be handled by lower_quadop_vector");
- break;
-
- case ir_unop_sqrt:
- emit_math(FS_OPCODE_SQRT, this->result, op[0]);
- break;
-
- case ir_unop_rsq:
- emit_math(FS_OPCODE_RSQ, this->result, op[0]);
- break;
-
- case ir_unop_i2f:
- case ir_unop_b2f:
- case ir_unop_b2i:
- case ir_unop_f2i:
- emit(BRW_OPCODE_MOV, this->result, op[0]);
- break;
- case ir_unop_f2b:
- case ir_unop_i2b:
- temp = this->result;
- /* original gen4 does implicit conversion before comparison. */
- if (intel->gen < 5)
- temp.type = op[0].type;
-
- inst = emit(BRW_OPCODE_CMP, temp, op[0], fs_reg(0.0f));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- inst = emit(BRW_OPCODE_AND, this->result, this->result, fs_reg(1));
- break;
-
- case ir_unop_trunc:
- emit(BRW_OPCODE_RNDZ, this->result, op[0]);
- break;
- case ir_unop_ceil:
- op[0].negate = !op[0].negate;
- inst = emit(BRW_OPCODE_RNDD, this->result, op[0]);
- this->result.negate = true;
- break;
- case ir_unop_floor:
- inst = emit(BRW_OPCODE_RNDD, this->result, op[0]);
- break;
- case ir_unop_fract:
- inst = emit(BRW_OPCODE_FRC, this->result, op[0]);
- break;
- case ir_unop_round_even:
- emit(BRW_OPCODE_RNDE, this->result, op[0]);
- break;
-
- case ir_binop_min:
- inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]);
- inst->conditional_mod = BRW_CONDITIONAL_L;
-
- inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]);
- inst->predicated = true;
- break;
- case ir_binop_max:
- inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]);
- inst->conditional_mod = BRW_CONDITIONAL_G;
-
- inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]);
- inst->predicated = true;
- break;
-
- case ir_binop_pow:
- emit_math(FS_OPCODE_POW, this->result, op[0], op[1]);
- break;
-
- case ir_unop_bit_not:
- inst = emit(BRW_OPCODE_NOT, this->result, op[0]);
- break;
- case ir_binop_bit_and:
- inst = emit(BRW_OPCODE_AND, this->result, op[0], op[1]);
- break;
- case ir_binop_bit_xor:
- inst = emit(BRW_OPCODE_XOR, this->result, op[0], op[1]);
- break;
- case ir_binop_bit_or:
- inst = emit(BRW_OPCODE_OR, this->result, op[0], op[1]);
- break;
-
- case ir_unop_u2f:
- case ir_binop_lshift:
- case ir_binop_rshift:
- assert(!"GLSL 1.30 features unsupported");
- break;
- }
-}
-
-void
-fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r,
- const glsl_type *type, bool predicated)
-{
- switch (type->base_type) {
- case GLSL_TYPE_FLOAT:
- case GLSL_TYPE_UINT:
- case GLSL_TYPE_INT:
- case GLSL_TYPE_BOOL:
- for (unsigned int i = 0; i < type->components(); i++) {
- l.type = brw_type_for_base_type(type);
- r.type = brw_type_for_base_type(type);
-
- fs_inst *inst = emit(BRW_OPCODE_MOV, l, r);
- inst->predicated = predicated;
-
- l.reg_offset++;
- r.reg_offset++;
- }
- break;
- case GLSL_TYPE_ARRAY:
- for (unsigned int i = 0; i < type->length; i++) {
- emit_assignment_writes(l, r, type->fields.array, predicated);
- }
- break;
-
- case GLSL_TYPE_STRUCT:
- for (unsigned int i = 0; i < type->length; i++) {
- emit_assignment_writes(l, r, type->fields.structure[i].type,
- predicated);
- }
- break;
-
- case GLSL_TYPE_SAMPLER:
- break;
-
- default:
- assert(!"not reached");
- break;
- }
-}
-
-void
-fs_visitor::visit(ir_assignment *ir)
-{
- struct fs_reg l, r;
- fs_inst *inst;
-
- /* FINISHME: arrays on the lhs */
- ir->lhs->accept(this);
- l = this->result;
-
- ir->rhs->accept(this);
- r = this->result;
-
- assert(l.file != BAD_FILE);
- assert(r.file != BAD_FILE);
-
- if (ir->condition) {
- emit_bool_to_cond_code(ir->condition);
- }
-
- if (ir->lhs->type->is_scalar() ||
- ir->lhs->type->is_vector()) {
- for (int i = 0; i < ir->lhs->type->vector_elements; i++) {
- if (ir->write_mask & (1 << i)) {
- inst = emit(BRW_OPCODE_MOV, l, r);
- if (ir->condition)
- inst->predicated = true;
- r.reg_offset++;
- }
- l.reg_offset++;
- }
- } else {
- emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL);
- }
-}
-
-fs_inst *
-fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
- int sampler)
-{
- int mlen;
- int base_mrf = 1;
- bool simd16 = false;
- fs_reg orig_dst;
-
- /* g0 header. */
- mlen = 1;
-
- if (ir->shadow_comparitor) {
- for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- fs_inst *inst = emit(BRW_OPCODE_MOV,
- fs_reg(MRF, base_mrf + mlen + i), coordinate);
- if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
- inst->saturate = true;
-
- coordinate.reg_offset++;
- }
- /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
- mlen += 3;
-
- if (ir->op == ir_tex) {
- /* There's no plain shadow compare message, so we use shadow
- * compare with a bias of 0.0.
- */
- emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f));
- mlen++;
- } else if (ir->op == ir_txb) {
- ir->lod_info.bias->accept(this);
- emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
- mlen++;
- } else {
- assert(ir->op == ir_txl);
- ir->lod_info.lod->accept(this);
- emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
- mlen++;
- }
-
- ir->shadow_comparitor->accept(this);
- emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
- mlen++;
- } else if (ir->op == ir_tex) {
- for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i),
- coordinate);
- if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
- inst->saturate = true;
- coordinate.reg_offset++;
- }
- /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
- mlen += 3;
- } else if (ir->op == ir_txd) {
- assert(!"TXD isn't supported on gen4 yet.");
- } else {
- /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod
- * instructions. We'll need to do SIMD16 here.
- */
- assert(ir->op == ir_txb || ir->op == ir_txl);
-
- for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF,
- base_mrf + mlen + i * 2),
- coordinate);
- if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
- inst->saturate = true;
- coordinate.reg_offset++;
- }
-
- /* lod/bias appears after u/v/r. */
- mlen += 6;
-
- if (ir->op == ir_txb) {
- ir->lod_info.bias->accept(this);
- emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
- mlen++;
- } else {
- ir->lod_info.lod->accept(this);
- emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
- mlen++;
- }
-
- /* The unused upper half. */
- mlen++;
-
- /* Now, since we're doing simd16, the return is 2 interleaved
- * vec4s where the odd-indexed ones are junk. We'll need to move
- * this weirdness around to the expected layout.
- */
- simd16 = true;
- orig_dst = dst;
- dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type,
- 2));
- dst.type = BRW_REGISTER_TYPE_F;
- }
-
- fs_inst *inst = NULL;
- switch (ir->op) {
- case ir_tex:
- inst = emit(FS_OPCODE_TEX, dst);
- break;
- case ir_txb:
- inst = emit(FS_OPCODE_TXB, dst);
- break;
- case ir_txl:
- inst = emit(FS_OPCODE_TXL, dst);
- break;
- case ir_txd:
- inst = emit(FS_OPCODE_TXD, dst);
- break;
- case ir_txf:
- assert(!"GLSL 1.30 features unsupported");
- break;
- }
- inst->base_mrf = base_mrf;
- inst->mlen = mlen;
- inst->header_present = true;
-
- if (simd16) {
- for (int i = 0; i < 4; i++) {
- emit(BRW_OPCODE_MOV, orig_dst, dst);
- orig_dst.reg_offset++;
- dst.reg_offset += 2;
- }
- }
-
- return inst;
-}
-
-/* gen5's sampler has slots for u, v, r, array index, then optional
- * parameters like shadow comparitor or LOD bias. If optional
- * parameters aren't present, those base slots are optional and don't
- * need to be included in the message.
- *
- * We don't fill in the unnecessary slots regardless, which may look
- * surprising in the disassembly.
- */
-fs_inst *
-fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
- int sampler)
-{
- int mlen = 0;
- int base_mrf = 2;
- int reg_width = c->dispatch_width / 8;
- bool header_present = false;
-
- if (ir->offset) {
- /* The offsets set up by the ir_texture visitor are in the
- * m1 header, so we can't go headerless.
- */
- header_present = true;
- mlen++;
- base_mrf--;
- }
-
- for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- fs_inst *inst = emit(BRW_OPCODE_MOV,
- fs_reg(MRF, base_mrf + mlen + i * reg_width),
- coordinate);
- if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
- inst->saturate = true;
- coordinate.reg_offset++;
- }
- mlen += ir->coordinate->type->vector_elements * reg_width;
-
- if (ir->shadow_comparitor) {
- mlen = MAX2(mlen, header_present + 4 * reg_width);
-
- ir->shadow_comparitor->accept(this);
- emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
- mlen += reg_width;
- }
-
- fs_inst *inst = NULL;
- switch (ir->op) {
- case ir_tex:
- inst = emit(FS_OPCODE_TEX, dst);
- break;
- case ir_txb:
- ir->lod_info.bias->accept(this);
- mlen = MAX2(mlen, header_present + 4 * reg_width);
- emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
- mlen += reg_width;
-
- inst = emit(FS_OPCODE_TXB, dst);
-
- break;
- case ir_txl:
- ir->lod_info.lod->accept(this);
- mlen = MAX2(mlen, header_present + 4 * reg_width);
- emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
- mlen += reg_width;
-
- inst = emit(FS_OPCODE_TXL, dst);
- break;
- case ir_txd:
- case ir_txf:
- assert(!"GLSL 1.30 features unsupported");
- break;
- }
- inst->base_mrf = base_mrf;
- inst->mlen = mlen;
- inst->header_present = header_present;
-
- if (mlen > 11) {
- fail("Message length >11 disallowed by hardware\n");
- }
-
- return inst;
-}
-
-fs_inst *
-fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
- int sampler)
-{
- int mlen = 0;
- int base_mrf = 2;
- int reg_width = c->dispatch_width / 8;
- bool header_present = false;
-
- if (ir->offset) {
- /* The offsets set up by the ir_texture visitor are in the
- * m1 header, so we can't go headerless.
- */
- header_present = true;
- mlen++;
- base_mrf--;
- }
-
- if (ir->shadow_comparitor) {
- ir->shadow_comparitor->accept(this);
- emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
- mlen += reg_width;
- }
-
- /* Set up the LOD info */
- switch (ir->op) {
- case ir_tex:
- break;
- case ir_txb:
- ir->lod_info.bias->accept(this);
- emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
- mlen += reg_width;
- break;
- case ir_txl:
- ir->lod_info.lod->accept(this);
- emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
- mlen += reg_width;
- break;
- case ir_txd:
- case ir_txf:
- assert(!"GLSL 1.30 features unsupported");
- break;
- }
-
- /* Set up the coordinate */
- for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
- coordinate);
- if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
- inst->saturate = true;
- coordinate.reg_offset++;
- mlen += reg_width;
- }
-
- /* Generate the SEND */
- fs_inst *inst = NULL;
- switch (ir->op) {
- case ir_tex: inst = emit(FS_OPCODE_TEX, dst); break;
- case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break;
- case ir_txl: inst = emit(FS_OPCODE_TXL, dst); break;
- case ir_txd: inst = emit(FS_OPCODE_TXD, dst); break;
- case ir_txf: assert(!"TXF unsupported.");
- }
- inst->base_mrf = base_mrf;
- inst->mlen = mlen;
- inst->header_present = header_present;
-
- if (mlen > 11) {
- fail("Message length >11 disallowed by hardware\n");
- }
-
- return inst;
-}
-
-void
-fs_visitor::visit(ir_texture *ir)
-{
- int sampler;
- fs_inst *inst = NULL;
-
- ir->coordinate->accept(this);
- fs_reg coordinate = this->result;
-
- if (ir->offset != NULL) {
- ir_constant *offset = ir->offset->as_constant();
- assert(offset != NULL);
-
- signed char offsets[3];
- for (unsigned i = 0; i < ir->offset->type->vector_elements; i++)
- offsets[i] = (signed char) offset->value.i[i];
-
- /* Combine all three offsets into a single unsigned dword:
- *
- * bits 11:8 - U Offset (X component)
- * bits 7:4 - V Offset (Y component)
- * bits 3:0 - R Offset (Z component)
- */
- unsigned offset_bits = 0;
- for (unsigned i = 0; i < ir->offset->type->vector_elements; i++) {
- const unsigned shift = 4 * (2 - i);
- offset_bits |= (offsets[i] << shift) & (0xF << shift);
- }
-
- /* Explicitly set up the message header by copying g0 to msg reg m1. */
- emit(BRW_OPCODE_MOV, fs_reg(MRF, 1, BRW_REGISTER_TYPE_UD),
- fs_reg(GRF, 0, BRW_REGISTER_TYPE_UD));
-
- /* Then set the offset bits in DWord 2 of the message header. */
- emit(BRW_OPCODE_MOV,
- fs_reg(retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1, 2),
- BRW_REGISTER_TYPE_UD)),
- fs_reg(brw_imm_uw(offset_bits)));
- }
-
- /* Should be lowered by do_lower_texture_projection */
- assert(!ir->projector);
-
- sampler = _mesa_get_sampler_uniform_value(ir->sampler,
- ctx->Shader.CurrentFragmentProgram,
- &brw->fragment_program->Base);
- sampler = c->fp->program.Base.SamplerUnits[sampler];
-
- /* The 965 requires the EU to do the normalization of GL rectangle
- * texture coordinates. We use the program parameter state
- * tracking to get the scaling factor.
- */
- if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) {
- struct gl_program_parameter_list *params = c->fp->program.Base.Parameters;
- int tokens[STATE_LENGTH] = {
- STATE_INTERNAL,
- STATE_TEXRECT_SCALE,
- sampler,
- 0,
- 0
- };
-
- if (c->dispatch_width == 16) {
- fail("rectangle scale uniform setup not supported on 16-wide\n");
- this->result = fs_reg(this, ir->type);
- return;
- }
-
- c->prog_data.param_convert[c->prog_data.nr_params] =
- PARAM_NO_CONVERT;
- c->prog_data.param_convert[c->prog_data.nr_params + 1] =
- PARAM_NO_CONVERT;
-
- fs_reg scale_x = fs_reg(UNIFORM, c->prog_data.nr_params);
- fs_reg scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1);
- GLuint index = _mesa_add_state_reference(params,
- (gl_state_index *)tokens);
-
- this->param_index[c->prog_data.nr_params] = index;
- this->param_offset[c->prog_data.nr_params] = 0;
- c->prog_data.nr_params++;
- this->param_index[c->prog_data.nr_params] = index;
- this->param_offset[c->prog_data.nr_params] = 1;
- c->prog_data.nr_params++;
-
- fs_reg dst = fs_reg(this, ir->coordinate->type);
- fs_reg src = coordinate;
- coordinate = dst;
-
- emit(BRW_OPCODE_MUL, dst, src, scale_x);
- dst.reg_offset++;
- src.reg_offset++;
- emit(BRW_OPCODE_MUL, dst, src, scale_y);
- }
-
- /* Writemasking doesn't eliminate channels on SIMD8 texture
- * samples, so don't worry about them.
- */
- fs_reg dst = fs_reg(this, glsl_type::vec4_type);
-
- if (intel->gen >= 7) {
- inst = emit_texture_gen7(ir, dst, coordinate, sampler);
- } else if (intel->gen >= 5) {
- inst = emit_texture_gen5(ir, dst, coordinate, sampler);
- } else {
- inst = emit_texture_gen4(ir, dst, coordinate, sampler);
- }
-
- /* If there's an offset, we already set up m1. To avoid the implied move,
- * use the null register. Otherwise, we want an implied move from g0.
- */
- if (ir->offset != NULL || !inst->header_present)
- inst->src[0] = fs_reg(brw_null_reg());
- else
- inst->src[0] = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
-
- inst->sampler = sampler;
-
- this->result = dst;
-
- if (ir->shadow_comparitor)
- inst->shadow_compare = true;
-
- if (ir->type == glsl_type::float_type) {
- /* Ignore DEPTH_TEXTURE_MODE swizzling. */
- assert(ir->sampler->type->sampler_shadow);
- } else if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) {
- fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type);
-
- for (int i = 0; i < 4; i++) {
- int swiz = GET_SWZ(c->key.tex_swizzles[inst->sampler], i);
- fs_reg l = swizzle_dst;
- l.reg_offset += i;
-
- if (swiz == SWIZZLE_ZERO) {
- emit(BRW_OPCODE_MOV, l, fs_reg(0.0f));
- } else if (swiz == SWIZZLE_ONE) {
- emit(BRW_OPCODE_MOV, l, fs_reg(1.0f));
- } else {
- fs_reg r = dst;
- r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i);
- emit(BRW_OPCODE_MOV, l, r);
- }
- }
- this->result = swizzle_dst;
- }
-}
-
-void
-fs_visitor::visit(ir_swizzle *ir)
-{
- ir->val->accept(this);
- fs_reg val = this->result;
-
- if (ir->type->vector_elements == 1) {
- this->result.reg_offset += ir->mask.x;
- return;
- }
-
- fs_reg result = fs_reg(this, ir->type);
- this->result = result;
-
- for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
- fs_reg channel = val;
- int swiz = 0;
-
- switch (i) {
- case 0:
- swiz = ir->mask.x;
- break;
- case 1:
- swiz = ir->mask.y;
- break;
- case 2:
- swiz = ir->mask.z;
- break;
- case 3:
- swiz = ir->mask.w;
- break;
- }
-
- channel.reg_offset += swiz;
- emit(BRW_OPCODE_MOV, result, channel);
- result.reg_offset++;
- }
-}
-
-void
-fs_visitor::visit(ir_discard *ir)
-{
- assert(ir->condition == NULL); /* FINISHME */
-
- emit(FS_OPCODE_DISCARD);
- kill_emitted = true;
-}
-
-void
-fs_visitor::visit(ir_constant *ir)
-{
- /* Set this->result to reg at the bottom of the function because some code
- * paths will cause this visitor to be applied to other fields. This will
- * cause the value stored in this->result to be modified.
- *
- * Make reg constant so that it doesn't get accidentally modified along the
- * way. Yes, I actually had this problem. :(
- */
- const fs_reg reg(this, ir->type);
- fs_reg dst_reg = reg;
-
- if (ir->type->is_array()) {
- const unsigned size = type_size(ir->type->fields.array);
-
- for (unsigned i = 0; i < ir->type->length; i++) {
- ir->array_elements[i]->accept(this);
- fs_reg src_reg = this->result;
-
- dst_reg.type = src_reg.type;
- for (unsigned j = 0; j < size; j++) {
- emit(BRW_OPCODE_MOV, dst_reg, src_reg);
- src_reg.reg_offset++;
- dst_reg.reg_offset++;
- }
- }
- } else if (ir->type->is_record()) {
- foreach_list(node, &ir->components) {
- ir_instruction *const field = (ir_instruction *) node;
- const unsigned size = type_size(field->type);
-
- field->accept(this);
- fs_reg src_reg = this->result;
-
- dst_reg.type = src_reg.type;
- for (unsigned j = 0; j < size; j++) {
- emit(BRW_OPCODE_MOV, dst_reg, src_reg);
- src_reg.reg_offset++;
- dst_reg.reg_offset++;
- }
- }
- } else {
- const unsigned size = type_size(ir->type);
-
- for (unsigned i = 0; i < size; i++) {
- switch (ir->type->base_type) {
- case GLSL_TYPE_FLOAT:
- emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i]));
- break;
- case GLSL_TYPE_UINT:
- emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i]));
- break;
- case GLSL_TYPE_INT:
- emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i]));
- break;
- case GLSL_TYPE_BOOL:
- emit(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i]));
- break;
- default:
- assert(!"Non-float/uint/int/bool constant");
- }
- dst_reg.reg_offset++;
- }
- }
-
- this->result = reg;
-}
-
-void
-fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
-{
- ir_expression *expr = ir->as_expression();
-
- if (expr) {
- fs_reg op[2];
- fs_inst *inst;
-
- assert(expr->get_num_operands() <= 2);
- for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
- assert(expr->operands[i]->type->is_scalar());
-
- expr->operands[i]->accept(this);
- op[i] = this->result;
- }
-
- switch (expr->operation) {
- case ir_unop_logic_not:
- inst = emit(BRW_OPCODE_AND, reg_null_d, op[0], fs_reg(1));
- inst->conditional_mod = BRW_CONDITIONAL_Z;
- break;
-
- case ir_binop_logic_xor:
- inst = emit(BRW_OPCODE_XOR, reg_null_d, op[0], op[1]);
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- break;
-
- case ir_binop_logic_or:
- inst = emit(BRW_OPCODE_OR, reg_null_d, op[0], op[1]);
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- break;
-
- case ir_binop_logic_and:
- inst = emit(BRW_OPCODE_AND, reg_null_d, op[0], op[1]);
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- break;
-
- case ir_unop_f2b:
- if (intel->gen >= 6) {
- inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0.0f));
- } else {
- inst = emit(BRW_OPCODE_MOV, reg_null_f, op[0]);
- }
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- break;
-
- case ir_unop_i2b:
- if (intel->gen >= 6) {
- inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0));
- } else {
- inst = emit(BRW_OPCODE_MOV, reg_null_d, op[0]);
- }
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- break;
-
- case ir_binop_greater:
- case ir_binop_gequal:
- case ir_binop_less:
- case ir_binop_lequal:
- case ir_binop_equal:
- case ir_binop_all_equal:
- case ir_binop_nequal:
- case ir_binop_any_nequal:
- inst = emit(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1]);
- inst->conditional_mod =
- brw_conditional_for_comparison(expr->operation);
- break;
-
- default:
- assert(!"not reached");
- fail("bad cond code\n");
- break;
- }
- return;
- }
-
- ir->accept(this);
-
- if (intel->gen >= 6) {
- fs_inst *inst = emit(BRW_OPCODE_AND, reg_null_d, this->result, fs_reg(1));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- } else {
- fs_inst *inst = emit(BRW_OPCODE_MOV, reg_null_d, this->result);
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- }
-}
-
-/**
- * Emit a gen6 IF statement with the comparison folded into the IF
- * instruction.
- */
-void
-fs_visitor::emit_if_gen6(ir_if *ir)
-{
- ir_expression *expr = ir->condition->as_expression();
-
- if (expr) {
- fs_reg op[2];
- fs_inst *inst;
- fs_reg temp;
-
- assert(expr->get_num_operands() <= 2);
- for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
- assert(expr->operands[i]->type->is_scalar());
-
- expr->operands[i]->accept(this);
- op[i] = this->result;
- }
-
- switch (expr->operation) {
- case ir_unop_logic_not:
- inst = emit(BRW_OPCODE_IF, temp, op[0], fs_reg(0));
- inst->conditional_mod = BRW_CONDITIONAL_Z;
- return;
-
- case ir_binop_logic_xor:
- inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]);
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- return;
-
- case ir_binop_logic_or:
- temp = fs_reg(this, glsl_type::bool_type);
- emit(BRW_OPCODE_OR, temp, op[0], op[1]);
- inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- return;
-
- case ir_binop_logic_and:
- temp = fs_reg(this, glsl_type::bool_type);
- emit(BRW_OPCODE_AND, temp, op[0], op[1]);
- inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- return;
-
- case ir_unop_f2b:
- inst = emit(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- return;
-
- case ir_unop_i2b:
- inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- return;
-
- case ir_binop_greater:
- case ir_binop_gequal:
- case ir_binop_less:
- case ir_binop_lequal:
- case ir_binop_equal:
- case ir_binop_all_equal:
- case ir_binop_nequal:
- case ir_binop_any_nequal:
- inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]);
- inst->conditional_mod =
- brw_conditional_for_comparison(expr->operation);
- return;
- default:
- assert(!"not reached");
- inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- fail("bad condition\n");
- return;
- }
- return;
- }
-
- ir->condition->accept(this);
-
- fs_inst *inst = emit(BRW_OPCODE_IF, reg_null_d, this->result, fs_reg(0));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
-}
-
-void
-fs_visitor::visit(ir_if *ir)
-{
- fs_inst *inst;
-
- if (intel->gen != 6 && c->dispatch_width == 16) {
- fail("Can't support (non-uniform) control flow on 16-wide\n");
- }
-
- /* Don't point the annotation at the if statement, because then it plus
- * the then and else blocks get printed.
- */
- this->base_ir = ir->condition;
-
- if (intel->gen == 6) {
- emit_if_gen6(ir);
- } else {
- emit_bool_to_cond_code(ir->condition);
-
- inst = emit(BRW_OPCODE_IF);
- inst->predicated = true;
- }
-
- foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
- ir_instruction *ir = (ir_instruction *)iter.get();
- this->base_ir = ir;
-
- ir->accept(this);
- }
-
- if (!ir->else_instructions.is_empty()) {
- emit(BRW_OPCODE_ELSE);
-
- foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
- ir_instruction *ir = (ir_instruction *)iter.get();
- this->base_ir = ir;
-
- ir->accept(this);
- }
- }
-
- emit(BRW_OPCODE_ENDIF);
-}
-
-void
-fs_visitor::visit(ir_loop *ir)
-{
- fs_reg counter = reg_undef;
-
- if (c->dispatch_width == 16) {
- fail("Can't support (non-uniform) control flow on 16-wide\n");
- }
-
- if (ir->counter) {
- this->base_ir = ir->counter;
- ir->counter->accept(this);
- counter = *(variable_storage(ir->counter));
-
- if (ir->from) {
- this->base_ir = ir->from;
- ir->from->accept(this);
-
- emit(BRW_OPCODE_MOV, counter, this->result);
- }
- }
-
- emit(BRW_OPCODE_DO);
-
- if (ir->to) {
- this->base_ir = ir->to;
- ir->to->accept(this);
-
- fs_inst *inst = emit(BRW_OPCODE_CMP, reg_null_cmp, counter, this->result);
- inst->conditional_mod = brw_conditional_for_comparison(ir->cmp);
-
- inst = emit(BRW_OPCODE_BREAK);
- inst->predicated = true;
- }
-
- foreach_iter(exec_list_iterator, iter, ir->body_instructions) {
- ir_instruction *ir = (ir_instruction *)iter.get();
-
- this->base_ir = ir;
- ir->accept(this);
- }
-
- if (ir->increment) {
- this->base_ir = ir->increment;
- ir->increment->accept(this);
- emit(BRW_OPCODE_ADD, counter, counter, this->result);
- }
-
- emit(BRW_OPCODE_WHILE);
-}
-
-void
-fs_visitor::visit(ir_loop_jump *ir)
-{
- switch (ir->mode) {
- case ir_loop_jump::jump_break:
- emit(BRW_OPCODE_BREAK);
- break;
- case ir_loop_jump::jump_continue:
- emit(BRW_OPCODE_CONTINUE);
- break;
- }
-}
-
-void
-fs_visitor::visit(ir_call *ir)
-{
- assert(!"FINISHME");
-}
-
-void
-fs_visitor::visit(ir_return *ir)
-{
- assert(!"FINISHME");
-}
-
-void
-fs_visitor::visit(ir_function *ir)
-{
- /* Ignore function bodies other than main() -- we shouldn't see calls to
- * them since they should all be inlined before we get to ir_to_mesa.
- */
- if (strcmp(ir->name, "main") == 0) {
- const ir_function_signature *sig;
- exec_list empty;
-
- sig = ir->matching_signature(&empty);
-
- assert(sig);
-
- foreach_iter(exec_list_iterator, iter, sig->body) {
- ir_instruction *ir = (ir_instruction *)iter.get();
- this->base_ir = ir;
-
- ir->accept(this);
- }
- }
-}
-
-void
-fs_visitor::visit(ir_function_signature *ir)
-{
- assert(!"not reached");
- (void)ir;
-}
-
-fs_inst *
-fs_visitor::emit(fs_inst inst)
-{
- fs_inst *list_inst = new(mem_ctx) fs_inst;
- *list_inst = inst;
-
- if (force_uncompressed_stack > 0)
- list_inst->force_uncompressed = true;
- else if (force_sechalf_stack > 0)
- list_inst->force_sechalf = true;
-
- list_inst->annotation = this->current_annotation;
- list_inst->ir = this->base_ir;
-
- this->instructions.push_tail(list_inst);
-
- return list_inst;
-}
-
-/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
-void
-fs_visitor::emit_dummy_fs()
-{
- /* Everyone's favorite color. */
- emit(BRW_OPCODE_MOV, fs_reg(MRF, 2), fs_reg(1.0f));
- emit(BRW_OPCODE_MOV, fs_reg(MRF, 3), fs_reg(0.0f));
- emit(BRW_OPCODE_MOV, fs_reg(MRF, 4), fs_reg(1.0f));
- emit(BRW_OPCODE_MOV, fs_reg(MRF, 5), fs_reg(0.0f));
-
- fs_inst *write;
- write = emit(FS_OPCODE_FB_WRITE, fs_reg(0), fs_reg(0));
- write->base_mrf = 0;
-}
-
-/* The register location here is relative to the start of the URB
- * data. It will get adjusted to be a real location before
- * generate_code() time.
- */
-struct brw_reg
-fs_visitor::interp_reg(int location, int channel)
-{
- int regnr = urb_setup[location] * 2 + channel / 2;
- int stride = (channel & 1) * 4;
-
- assert(urb_setup[location] != -1);
-
- return brw_vec1_grf(regnr, stride);
-}
-
-/** Emits the interpolation for the varying inputs. */
-void
-fs_visitor::emit_interpolation_setup_gen4()
-{
- this->current_annotation = "compute pixel centers";
- this->pixel_x = fs_reg(this, glsl_type::uint_type);
- this->pixel_y = fs_reg(this, glsl_type::uint_type);
- this->pixel_x.type = BRW_REGISTER_TYPE_UW;
- this->pixel_y.type = BRW_REGISTER_TYPE_UW;
-
- emit(FS_OPCODE_PIXEL_X, this->pixel_x);
- emit(FS_OPCODE_PIXEL_Y, this->pixel_y);
-
- this->current_annotation = "compute pixel deltas from v0";
- if (brw->has_pln) {
- this->delta_x = fs_reg(this, glsl_type::vec2_type);
- this->delta_y = this->delta_x;
- this->delta_y.reg_offset++;
- } else {
- this->delta_x = fs_reg(this, glsl_type::float_type);
- this->delta_y = fs_reg(this, glsl_type::float_type);
- }
- emit(BRW_OPCODE_ADD, this->delta_x,
- this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0))));
- emit(BRW_OPCODE_ADD, this->delta_y,
- this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1))));
-
- this->current_annotation = "compute pos.w and 1/pos.w";
- /* Compute wpos.w. It's always in our setup, since it's needed to
- * interpolate the other attributes.
- */
- this->wpos_w = fs_reg(this, glsl_type::float_type);
- emit(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y,
- interp_reg(FRAG_ATTRIB_WPOS, 3));
- /* Compute the pixel 1/W value from wpos.w. */
- this->pixel_w = fs_reg(this, glsl_type::float_type);
- emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w);
- this->current_annotation = NULL;
-}
-
-/** Emits the interpolation for the varying inputs. */
-void
-fs_visitor::emit_interpolation_setup_gen6()
-{
- struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
-
- /* If the pixel centers end up used, the setup is the same as for gen4. */
- this->current_annotation = "compute pixel centers";
- fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type);
- fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type);
- int_pixel_x.type = BRW_REGISTER_TYPE_UW;
- int_pixel_y.type = BRW_REGISTER_TYPE_UW;
- emit(BRW_OPCODE_ADD,
- int_pixel_x,
- fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
- fs_reg(brw_imm_v(0x10101010)));
- emit(BRW_OPCODE_ADD,
- int_pixel_y,
- fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
- fs_reg(brw_imm_v(0x11001100)));
-
- /* As of gen6, we can no longer mix float and int sources. We have
- * to turn the integer pixel centers into floats for their actual
- * use.
- */
- this->pixel_x = fs_reg(this, glsl_type::float_type);
- this->pixel_y = fs_reg(this, glsl_type::float_type);
- emit(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x);
- emit(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y);
-
- this->current_annotation = "compute pos.w";
- this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0));
- this->wpos_w = fs_reg(this, glsl_type::float_type);
- emit_math(FS_OPCODE_RCP, this->wpos_w, this->pixel_w);
-
- this->delta_x = fs_reg(brw_vec8_grf(2, 0));
- this->delta_y = fs_reg(brw_vec8_grf(3, 0));
-
- this->current_annotation = NULL;
-}
-
-void
-fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color)
-{
- int reg_width = c->dispatch_width / 8;
-
- if (c->dispatch_width == 8 || intel->gen == 6) {
- /* SIMD8 write looks like:
- * m + 0: r0
- * m + 1: r1
- * m + 2: g0
- * m + 3: g1
- *
- * gen6 SIMD16 DP write looks like:
- * m + 0: r0
- * m + 1: r1
- * m + 2: g0
- * m + 3: g1
- * m + 4: b0
- * m + 5: b1
- * m + 6: a0
- * m + 7: a1
- */
- emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index * reg_width),
- color);
- } else {
- /* pre-gen6 SIMD16 single source DP write looks like:
- * m + 0: r0
- * m + 1: g0
- * m + 2: b0
- * m + 3: a0
- * m + 4: r1
- * m + 5: g1
- * m + 6: b1
- * m + 7: a1
- */
- if (brw->has_compr4) {
- /* By setting the high bit of the MRF register number, we
- * indicate that we want COMPR4 mode - instead of doing the
- * usual destination + 1 for the second half we get
- * destination + 4.
- */
- emit(BRW_OPCODE_MOV,
- fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index), color);
- } else {
- push_force_uncompressed();
- emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), color);
- pop_force_uncompressed();
-
- push_force_sechalf();
- color.sechalf = true;
- emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), color);
- pop_force_sechalf();
- color.sechalf = false;
- }
- }
-}
-
-void
-fs_visitor::emit_fb_writes()
-{
- this->current_annotation = "FB write header";
- GLboolean header_present = GL_TRUE;
- int nr = 0;
- int reg_width = c->dispatch_width / 8;
-
- if (intel->gen >= 6 &&
- !this->kill_emitted &&
- c->key.nr_color_regions == 1) {
- header_present = false;
- }
-
- if (header_present) {
- /* m0, m1 header */
- nr += 2;
- }
-
- if (c->aa_dest_stencil_reg) {
- push_force_uncompressed();
- emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
- fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0)));
- pop_force_uncompressed();
- }
-
- /* Reserve space for color. It'll be filled in per MRT below. */
- int color_mrf = nr;
- nr += 4 * reg_width;
-
- if (c->source_depth_to_render_target) {
- if (intel->gen == 6 && c->dispatch_width == 16) {
- /* For outputting oDepth on gen6, SIMD8 writes have to be
- * used. This would require 8-wide moves of each half to
- * message regs, kind of like pre-gen5 SIMD16 FB writes.
- * Just bail on doing so for now.
- */
- fail("Missing support for simd16 depth writes on gen6\n");
- }
-
- if (c->computes_depth) {
- /* Hand over gl_FragDepth. */
- assert(this->frag_depth);
- fs_reg depth = *(variable_storage(this->frag_depth));
-
- emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), depth);
- } else {
- /* Pass through the payload depth. */
- emit(BRW_OPCODE_MOV, fs_reg(MRF, nr),
- fs_reg(brw_vec8_grf(c->source_depth_reg, 0)));
- }
- nr += reg_width;
- }
-
- if (c->dest_depth_reg) {
- emit(BRW_OPCODE_MOV, fs_reg(MRF, nr),
- fs_reg(brw_vec8_grf(c->dest_depth_reg, 0)));
- nr += reg_width;
- }
-
- fs_reg color = reg_undef;
- if (this->frag_color)
- color = *(variable_storage(this->frag_color));
- else if (this->frag_data) {
- color = *(variable_storage(this->frag_data));
- color.type = BRW_REGISTER_TYPE_F;
- }
-
- for (int target = 0; target < c->key.nr_color_regions; target++) {
- this->current_annotation = ralloc_asprintf(this->mem_ctx,
- "FB write target %d",
- target);
- if (this->frag_color || this->frag_data) {
- for (int i = 0; i < 4; i++) {
- emit_color_write(i, color_mrf, color);
- color.reg_offset++;
- }
- }
-
- if (this->frag_color)
- color.reg_offset -= 4;
-
- fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
- inst->target = target;
- inst->base_mrf = 0;
- inst->mlen = nr;
- if (target == c->key.nr_color_regions - 1)
- inst->eot = true;
- inst->header_present = header_present;
- }
-
- if (c->key.nr_color_regions == 0) {
- if (c->key.alpha_test && (this->frag_color || this->frag_data)) {
- /* If the alpha test is enabled but there's no color buffer,
- * we still need to send alpha out the pipeline to our null
- * renderbuffer.
- */
- color.reg_offset += 3;
- emit_color_write(3, color_mrf, color);
- }
-
- fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
- inst->base_mrf = 0;
- inst->mlen = nr;
- inst->eot = true;
- inst->header_present = header_present;
- }
-
- this->current_annotation = NULL;
-}
-
-void
-fs_visitor::generate_fb_write(fs_inst *inst)
-{
- GLboolean eot = inst->eot;
- struct brw_reg implied_header;
-
- /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
- * move, here's g1.
- */
- brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-
- if (inst->header_present) {
- if (intel->gen >= 6) {
- brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
- brw_MOV(p,
- retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
- retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-
- if (inst->target > 0) {
- /* Set the render target index for choosing BLEND_STATE. */
- brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2),
- BRW_REGISTER_TYPE_UD),
- brw_imm_ud(inst->target));
- }
-
- implied_header = brw_null_reg();
- } else {
- implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
-
- brw_MOV(p,
- brw_message_reg(inst->base_mrf + 1),
- brw_vec8_grf(1, 0));
- }
- } else {
- implied_header = brw_null_reg();
- }
-
- brw_pop_insn_state(p);
-
- brw_fb_WRITE(p,
- c->dispatch_width,
- inst->base_mrf,
- implied_header,
- inst->target,
- inst->mlen,
- 0,
- eot,
- inst->header_present);
-}
-
-/* Computes the integer pixel x,y values from the origin.
- *
- * This is the basis of gl_FragCoord computation, but is also used
- * pre-gen6 for computing the deltas from v0 for computing
- * interpolation.
- */
-void
-fs_visitor::generate_pixel_xy(struct brw_reg dst, bool is_x)
-{
- struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
- struct brw_reg src;
- struct brw_reg deltas;
-
- if (is_x) {
- src = stride(suboffset(g1_uw, 4), 2, 4, 0);
- deltas = brw_imm_v(0x10101010);
- } else {
- src = stride(suboffset(g1_uw, 5), 2, 4, 0);
- deltas = brw_imm_v(0x11001100);
- }
-
- if (c->dispatch_width == 16) {
- dst = vec16(dst);
- }
-
- /* We do this 8 or 16-wide, but since the destination is UW we
- * don't do compression in the 16-wide case.
- */
- brw_push_insn_state(p);
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_ADD(p, dst, src, deltas);
- brw_pop_insn_state(p);
-}
-
-void
-fs_visitor::generate_linterp(fs_inst *inst,
- struct brw_reg dst, struct brw_reg *src)
-{
- struct brw_reg delta_x = src[0];
- struct brw_reg delta_y = src[1];
- struct brw_reg interp = src[2];
-
- if (brw->has_pln &&
- delta_y.nr == delta_x.nr + 1 &&
- (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
- brw_PLN(p, dst, interp, delta_x);
- } else {
- brw_LINE(p, brw_null_reg(), interp, delta_x);
- brw_MAC(p, dst, suboffset(interp, 1), delta_y);
- }
-}
-
-void
-fs_visitor::generate_math(fs_inst *inst,
- struct brw_reg dst, struct brw_reg *src)
-{
- int op;
-
- switch (inst->opcode) {
- case FS_OPCODE_RCP:
- op = BRW_MATH_FUNCTION_INV;
- break;
- case FS_OPCODE_RSQ:
- op = BRW_MATH_FUNCTION_RSQ;
- break;
- case FS_OPCODE_SQRT:
- op = BRW_MATH_FUNCTION_SQRT;
- break;
- case FS_OPCODE_EXP2:
- op = BRW_MATH_FUNCTION_EXP;
- break;
- case FS_OPCODE_LOG2:
- op = BRW_MATH_FUNCTION_LOG;
- break;
- case FS_OPCODE_POW:
- op = BRW_MATH_FUNCTION_POW;
- break;
- case FS_OPCODE_SIN:
- op = BRW_MATH_FUNCTION_SIN;
- break;
- case FS_OPCODE_COS:
- op = BRW_MATH_FUNCTION_COS;
- break;
- default:
- assert(!"not reached: unknown math function");
- op = 0;
- break;
- }
-
- if (intel->gen >= 6) {
- assert(inst->mlen == 0);
-
- if (inst->opcode == FS_OPCODE_POW) {
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_math2(p, dst, op, src[0], src[1]);
-
- if (c->dispatch_width == 16) {
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_math2(p, sechalf(dst), op, sechalf(src[0]), sechalf(src[1]));
- brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
- }
- } else {
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_math(p, dst,
- op,
- inst->saturate ? BRW_MATH_SATURATE_SATURATE :
- BRW_MATH_SATURATE_NONE,
- 0, src[0],
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
-
- if (c->dispatch_width == 16) {
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_math(p, sechalf(dst),
- op,
- inst->saturate ? BRW_MATH_SATURATE_SATURATE :
- BRW_MATH_SATURATE_NONE,
- 0, sechalf(src[0]),
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
- brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
- }
- }
- } else /* gen <= 5 */{
- assert(inst->mlen >= 1);
-
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_math(p, dst,
- op,
- inst->saturate ? BRW_MATH_SATURATE_SATURATE :
- BRW_MATH_SATURATE_NONE,
- inst->base_mrf, src[0],
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
-
- if (c->dispatch_width == 16) {
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_math(p, sechalf(dst),
- op,
- inst->saturate ? BRW_MATH_SATURATE_SATURATE :
- BRW_MATH_SATURATE_NONE,
- inst->base_mrf + 1, sechalf(src[0]),
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
-
- brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
- }
- }
-}
-
-void
-fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
-{
- int msg_type = -1;
- int rlen = 4;
- uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
-
- if (c->dispatch_width == 16)
- simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
-
- if (intel->gen >= 5) {
- switch (inst->opcode) {
- case FS_OPCODE_TEX:
- if (inst->shadow_compare) {
- msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
- } else {
- msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
- }
- break;
- case FS_OPCODE_TXB:
- if (inst->shadow_compare) {
- msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
- } else {
- msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
- }
- break;
- case FS_OPCODE_TXL:
- if (inst->shadow_compare) {
- msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
- } else {
- msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
- }
- break;
- case FS_OPCODE_TXD:
- assert(!"TXD isn't supported on gen5+ yet.");
- break;
- }
- } else {
- switch (inst->opcode) {
- case FS_OPCODE_TEX:
- /* Note that G45 and older determines shadow compare and dispatch width
- * from message length for most messages.
- */
- assert(c->dispatch_width == 8);
- msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
- if (inst->shadow_compare) {
- assert(inst->mlen == 6);
- } else {
- assert(inst->mlen <= 4);
- }
- break;
- case FS_OPCODE_TXB:
- if (inst->shadow_compare) {
- assert(inst->mlen == 6);
- msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE;
- } else {
- assert(inst->mlen == 9);
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
- simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
- }
- break;
- case FS_OPCODE_TXL:
- if (inst->shadow_compare) {
- assert(inst->mlen == 6);
- msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
- } else {
- assert(inst->mlen == 9);
- msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD;
- simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
- }
- break;
- case FS_OPCODE_TXD:
- assert(!"TXD isn't supported on gen4 yet.");
- break;
- }
- }
- assert(msg_type != -1);
-
- if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
- rlen = 8;
- dst = vec16(dst);
- }
-
- brw_SAMPLE(p,
- retype(dst, BRW_REGISTER_TYPE_UW),
- inst->base_mrf,
- src,
- SURF_INDEX_TEXTURE(inst->sampler),
- inst->sampler,
- WRITEMASK_XYZW,
- msg_type,
- rlen,
- inst->mlen,
- 0,
- inst->header_present,
- simd_mode);
-}
-
-
-/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
- * looking like:
- *
- * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
- *
- * and we're trying to produce:
- *
- * DDX DDY
- * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
- * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
- * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
- * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
- * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
- * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
- * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
- * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
- *
- * and add another set of two more subspans if in 16-pixel dispatch mode.
- *
- * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
- * for each pair, and vertstride = 2 jumps us 2 elements after processing a
- * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
- * between each other. We could probably do it like ddx and swizzle the right
- * order later, but bail for now and just produce
- * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
- */
-void
-fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
-{
- struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
- BRW_REGISTER_TYPE_F,
- BRW_VERTICAL_STRIDE_2,
- BRW_WIDTH_2,
- BRW_HORIZONTAL_STRIDE_0,
- BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
- struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
- BRW_REGISTER_TYPE_F,
- BRW_VERTICAL_STRIDE_2,
- BRW_WIDTH_2,
- BRW_HORIZONTAL_STRIDE_0,
- BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
- brw_ADD(p, dst, src0, negate(src1));
-}
-
-void
-fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
-{
- struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
- BRW_REGISTER_TYPE_F,
- BRW_VERTICAL_STRIDE_4,
- BRW_WIDTH_4,
- BRW_HORIZONTAL_STRIDE_0,
- BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
- struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
- BRW_REGISTER_TYPE_F,
- BRW_VERTICAL_STRIDE_4,
- BRW_WIDTH_4,
- BRW_HORIZONTAL_STRIDE_0,
- BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
- brw_ADD(p, dst, src0, negate(src1));
-}
-
-void
-fs_visitor::generate_discard(fs_inst *inst)
-{
- struct brw_reg f0 = brw_flag_reg();
-
- if (intel->gen >= 6) {
- struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
- struct brw_reg some_register;
-
- /* As of gen6, we no longer have the mask register to look at,
- * so life gets a bit more complicated.
- */
-
- /* Load the flag register with all ones. */
- brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_MOV(p, f0, brw_imm_uw(0xffff));
- brw_pop_insn_state(p);
-
- /* Do a comparison that should always fail, to produce 0s in the flag
- * reg where we have active channels.
- */
- some_register = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
- brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
- BRW_CONDITIONAL_NZ, some_register, some_register);
-
- /* Undo CMP's whacking of predication*/
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
-
- brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_AND(p, g1, f0, g1);
- brw_pop_insn_state(p);
- } else {
- struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
- struct brw_reg mask = brw_uw1_reg(mask.file, mask.nr, 0);
-
- brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-
- /* Unlike the 965, we have the mask reg, so we just need
- * somewhere to invert that (containing channels to be disabled)
- * so it can be ANDed with the mask of pixels still to be
- * written. Use the flag reg for consistency with gen6+.
- */
- brw_NOT(p, f0, brw_mask_reg(1)); /* IMASK */
- brw_AND(p, g0, f0, g0);
-
- brw_pop_insn_state(p);
- }
-}
-
-void
-fs_visitor::generate_spill(fs_inst *inst, struct brw_reg src)
-{
- assert(inst->mlen != 0);
-
- brw_MOV(p,
- retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD),
- retype(src, BRW_REGISTER_TYPE_UD));
- brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1,
- inst->offset);
-}
-
-void
-fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst)
-{
- assert(inst->mlen != 0);
-
- /* Clear any post destination dependencies that would be ignored by
- * the block read. See the B-Spec for pre-gen5 send instruction.
- *
- * This could use a better solution, since texture sampling and
- * math reads could potentially run into it as well -- anywhere
- * that we have a SEND with a destination that is a register that
- * was written but not read within the last N instructions (what's
- * N? unsure). This is rare because of dead code elimination, but
- * not impossible.
- */
- if (intel->gen == 4 && !intel->is_g4x)
- brw_MOV(p, brw_null_reg(), dst);
-
- brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1,
- inst->offset);
-
- if (intel->gen == 4 && !intel->is_g4x) {
- /* gen4 errata: destination from a send can't be used as a
- * destination until it's been read. Just read it so we don't
- * have to worry.
- */
- brw_MOV(p, brw_null_reg(), dst);
- }
-}
-
-
-void
-fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst)
-{
- assert(inst->mlen != 0);
-
- /* Clear any post destination dependencies that would be ignored by
- * the block read. See the B-Spec for pre-gen5 send instruction.
- *
- * This could use a better solution, since texture sampling and
- * math reads could potentially run into it as well -- anywhere
- * that we have a SEND with a destination that is a register that
- * was written but not read within the last N instructions (what's
- * N? unsure). This is rare because of dead code elimination, but
- * not impossible.
- */
- if (intel->gen == 4 && !intel->is_g4x)
- brw_MOV(p, brw_null_reg(), dst);
-
- brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
- inst->offset, SURF_INDEX_FRAG_CONST_BUFFER);
-
- if (intel->gen == 4 && !intel->is_g4x) {
- /* gen4 errata: destination from a send can't be used as a
- * destination until it's been read. Just read it so we don't
- * have to worry.
- */
- brw_MOV(p, brw_null_reg(), dst);
- }
-}
-
/**
* To be called after the last _mesa_add_state_reference() call, to
* set up prog_data.param[] for assign_curb_setup() and
@@ -2892,7 +649,7 @@ fs_visitor::calculate_urb_setup()
/* Figure out where each of the incoming setup attributes lands. */
if (intel->gen >= 6) {
for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
- if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)) {
+ if (fp->Base.InputsRead & BITFIELD64_BIT(i)) {
urb_setup[i] = urb_next++;
}
}
@@ -3276,12 +1033,16 @@ fs_visitor::propagate_constants()
scan_inst->src[i] = inst->src[0];
progress = true;
} else if (i == 0 && scan_inst->src[1].file != IMM) {
- /* Fit this constant in by swapping the operands and
- * flipping the predicate
- */
scan_inst->src[0] = scan_inst->src[1];
scan_inst->src[1] = inst->src[0];
- scan_inst->predicate_inverse = !scan_inst->predicate_inverse;
+
+ /* If this was predicated, flipping operands means
+ * we also need to flip the predicate.
+ */
+ if (scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) {
+ scan_inst->predicate_inverse =
+ !scan_inst->predicate_inverse;
+ }
progress = true;
}
break;
@@ -3734,355 +1495,6 @@ fs_visitor::virtual_grf_interferes(int a, int b)
return start < end;
}
-static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
-{
- struct brw_reg brw_reg;
-
- switch (reg->file) {
- case GRF:
- case ARF:
- case MRF:
- if (reg->smear == -1) {
- brw_reg = brw_vec8_reg(reg->file,
- reg->hw_reg, 0);
- } else {
- brw_reg = brw_vec1_reg(reg->file,
- reg->hw_reg, reg->smear);
- }
- brw_reg = retype(brw_reg, reg->type);
- if (reg->sechalf)
- brw_reg = sechalf(brw_reg);
- break;
- case IMM:
- switch (reg->type) {
- case BRW_REGISTER_TYPE_F:
- brw_reg = brw_imm_f(reg->imm.f);
- break;
- case BRW_REGISTER_TYPE_D:
- brw_reg = brw_imm_d(reg->imm.i);
- break;
- case BRW_REGISTER_TYPE_UD:
- brw_reg = brw_imm_ud(reg->imm.u);
- break;
- default:
- assert(!"not reached");
- brw_reg = brw_null_reg();
- break;
- }
- break;
- case FIXED_HW_REG:
- brw_reg = reg->fixed_hw_reg;
- break;
- case BAD_FILE:
- /* Probably unused. */
- brw_reg = brw_null_reg();
- break;
- case UNIFORM:
- assert(!"not reached");
- brw_reg = brw_null_reg();
- break;
- default:
- assert(!"not reached");
- brw_reg = brw_null_reg();
- break;
- }
- if (reg->abs)
- brw_reg = brw_abs(brw_reg);
- if (reg->negate)
- brw_reg = negate(brw_reg);
-
- return brw_reg;
-}
-
-void
-fs_visitor::generate_code()
-{
- int last_native_inst = p->nr_insn;
- const char *last_annotation_string = NULL;
- ir_instruction *last_annotation_ir = NULL;
-
- int loop_stack_array_size = 16;
- int loop_stack_depth = 0;
- brw_instruction **loop_stack =
- rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size);
- int *if_depth_in_loop =
- rzalloc_array(this->mem_ctx, int, loop_stack_array_size);
-
-
- if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
- printf("Native code for fragment shader %d (%d-wide dispatch):\n",
- ctx->Shader.CurrentFragmentProgram->Name, c->dispatch_width);
- }
-
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
- struct brw_reg src[3], dst;
-
- if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
- if (last_annotation_ir != inst->ir) {
- last_annotation_ir = inst->ir;
- if (last_annotation_ir) {
- printf(" ");
- last_annotation_ir->print();
- printf("\n");
- }
- }
- if (last_annotation_string != inst->annotation) {
- last_annotation_string = inst->annotation;
- if (last_annotation_string)
- printf(" %s\n", last_annotation_string);
- }
- }
-
- for (unsigned int i = 0; i < 3; i++) {
- src[i] = brw_reg_from_fs_reg(&inst->src[i]);
- }
- dst = brw_reg_from_fs_reg(&inst->dst);
-
- brw_set_conditionalmod(p, inst->conditional_mod);
- brw_set_predicate_control(p, inst->predicated);
- brw_set_predicate_inverse(p, inst->predicate_inverse);
- brw_set_saturate(p, inst->saturate);
-
- if (inst->force_uncompressed || c->dispatch_width == 8) {
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- } else if (inst->force_sechalf) {
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- } else {
- brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
- }
-
- switch (inst->opcode) {
- case BRW_OPCODE_MOV:
- brw_MOV(p, dst, src[0]);
- break;
- case BRW_OPCODE_ADD:
- brw_ADD(p, dst, src[0], src[1]);
- break;
- case BRW_OPCODE_MUL:
- brw_MUL(p, dst, src[0], src[1]);
- break;
-
- case BRW_OPCODE_FRC:
- brw_FRC(p, dst, src[0]);
- break;
- case BRW_OPCODE_RNDD:
- brw_RNDD(p, dst, src[0]);
- break;
- case BRW_OPCODE_RNDE:
- brw_RNDE(p, dst, src[0]);
- break;
- case BRW_OPCODE_RNDZ:
- brw_RNDZ(p, dst, src[0]);
- break;
-
- case BRW_OPCODE_AND:
- brw_AND(p, dst, src[0], src[1]);
- break;
- case BRW_OPCODE_OR:
- brw_OR(p, dst, src[0], src[1]);
- break;
- case BRW_OPCODE_XOR:
- brw_XOR(p, dst, src[0], src[1]);
- break;
- case BRW_OPCODE_NOT:
- brw_NOT(p, dst, src[0]);
- break;
- case BRW_OPCODE_ASR:
- brw_ASR(p, dst, src[0], src[1]);
- break;
- case BRW_OPCODE_SHR:
- brw_SHR(p, dst, src[0], src[1]);
- break;
- case BRW_OPCODE_SHL:
- brw_SHL(p, dst, src[0], src[1]);
- break;
-
- case BRW_OPCODE_CMP:
- brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
- break;
- case BRW_OPCODE_SEL:
- brw_SEL(p, dst, src[0], src[1]);
- break;
-
- case BRW_OPCODE_IF:
- if (inst->src[0].file != BAD_FILE) {
- /* The instruction has an embedded compare (only allowed on gen6) */
- assert(intel->gen == 6);
- gen6_IF(p, inst->conditional_mod, src[0], src[1]);
- } else {
- brw_IF(p, c->dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8);
- }
- if_depth_in_loop[loop_stack_depth]++;
- break;
-
- case BRW_OPCODE_ELSE:
- brw_ELSE(p);
- break;
- case BRW_OPCODE_ENDIF:
- brw_ENDIF(p);
- if_depth_in_loop[loop_stack_depth]--;
- break;
-
- case BRW_OPCODE_DO:
- loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
- if (loop_stack_array_size <= loop_stack_depth) {
- loop_stack_array_size *= 2;
- loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *,
- loop_stack_array_size);
- if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int,
- loop_stack_array_size);
- }
- if_depth_in_loop[loop_stack_depth] = 0;
- break;
-
- case BRW_OPCODE_BREAK:
- brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- break;
- case BRW_OPCODE_CONTINUE:
- /* FINISHME: We need to write the loop instruction support still. */
- if (intel->gen >= 6)
- gen6_CONT(p, loop_stack[loop_stack_depth - 1]);
- else
- brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- break;
-
- case BRW_OPCODE_WHILE: {
- struct brw_instruction *inst0, *inst1;
- GLuint br = 1;
-
- if (intel->gen >= 5)
- br = 2;
-
- assert(loop_stack_depth > 0);
- loop_stack_depth--;
- inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
- if (intel->gen < 6) {
- /* patch all the BREAK/CONT instructions from last BGNLOOP */
- while (inst0 > loop_stack[loop_stack_depth]) {
- inst0--;
- if (inst0->header.opcode == BRW_OPCODE_BREAK &&
- inst0->bits3.if_else.jump_count == 0) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
- }
- else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
- inst0->bits3.if_else.jump_count == 0) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
- }
- }
- }
- }
- break;
-
- case FS_OPCODE_RCP:
- case FS_OPCODE_RSQ:
- case FS_OPCODE_SQRT:
- case FS_OPCODE_EXP2:
- case FS_OPCODE_LOG2:
- case FS_OPCODE_POW:
- case FS_OPCODE_SIN:
- case FS_OPCODE_COS:
- generate_math(inst, dst, src);
- break;
- case FS_OPCODE_PIXEL_X:
- generate_pixel_xy(dst, true);
- break;
- case FS_OPCODE_PIXEL_Y:
- generate_pixel_xy(dst, false);
- break;
- case FS_OPCODE_CINTERP:
- brw_MOV(p, dst, src[0]);
- break;
- case FS_OPCODE_LINTERP:
- generate_linterp(inst, dst, src);
- break;
- case FS_OPCODE_TEX:
- case FS_OPCODE_TXB:
- case FS_OPCODE_TXD:
- case FS_OPCODE_TXL:
- generate_tex(inst, dst, src[0]);
- break;
- case FS_OPCODE_DISCARD:
- generate_discard(inst);
- break;
- case FS_OPCODE_DDX:
- generate_ddx(inst, dst, src[0]);
- break;
- case FS_OPCODE_DDY:
- generate_ddy(inst, dst, src[0]);
- break;
-
- case FS_OPCODE_SPILL:
- generate_spill(inst, src[0]);
- break;
-
- case FS_OPCODE_UNSPILL:
- generate_unspill(inst, dst);
- break;
-
- case FS_OPCODE_PULL_CONSTANT_LOAD:
- generate_pull_constant_load(inst, dst);
- break;
-
- case FS_OPCODE_FB_WRITE:
- generate_fb_write(inst);
- break;
- default:
- if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
- _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
- brw_opcodes[inst->opcode].name);
- } else {
- _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
- }
- fail("unsupported opcode in FS\n");
- }
-
- if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
- for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
- if (0) {
- printf("0x%08x 0x%08x 0x%08x 0x%08x ",
- ((uint32_t *)&p->store[i])[3],
- ((uint32_t *)&p->store[i])[2],
- ((uint32_t *)&p->store[i])[1],
- ((uint32_t *)&p->store[i])[0]);
- }
- brw_disasm(stdout, &p->store[i], intel->gen);
- }
- }
-
- last_native_inst = p->nr_insn;
- }
-
- if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
- printf("\n");
- }
-
- ralloc_free(loop_stack);
- ralloc_free(if_depth_in_loop);
-
- brw_set_uip_jip(p);
-
- /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS
- * emit issues, it doesn't get the jump distances into the output,
- * which is often something we want to debug. So this is here in
- * case you're doing that.
- */
- if (0) {
- if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
- for (unsigned int i = 0; i < p->nr_insn; i++) {
- printf("0x%08x 0x%08x 0x%08x 0x%08x ",
- ((uint32_t *)&p->store[i])[3],
- ((uint32_t *)&p->store[i])[2],
- ((uint32_t *)&p->store[i])[1],
- ((uint32_t *)&p->store[i])[0]);
- brw_disasm(stdout, &p->store[i], intel->gen);
- }
- }
- }
-}
-
bool
fs_visitor::run()
{
@@ -4118,6 +1530,7 @@ fs_visitor::run()
foreach_iter(exec_list_iterator, iter, *shader->ir) {
ir_instruction *ir = (ir_instruction *)iter.get();
base_ir = ir;
+ this->result = reg_undef;
ir->accept(this);
}
@@ -4171,9 +1584,9 @@ fs_visitor::run()
generate_code();
if (c->dispatch_width == 8) {
- c->prog_data.total_grf = grf_used;
+ c->prog_data.reg_blocks = brw_register_blocks(grf_used);
} else {
- c->prog_data.total_grf_16 = grf_used;
+ c->prog_data.reg_blocks_16 = brw_register_blocks(grf_used);
c->prog_data.prog_offset_16 = prog_offset_16;
/* Make sure we didn't try to sneak in an extra uniform */
@@ -4184,11 +1597,10 @@ fs_visitor::run()
}
bool
-brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
+brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
+ struct gl_shader_program *prog)
{
struct intel_context *intel = &brw->intel;
- struct gl_context *ctx = &intel->ctx;
- struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram;
if (!prog)
return false;
@@ -4208,16 +1620,17 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
*/
c->dispatch_width = 8;
- fs_visitor v(c, shader);
+ fs_visitor v(c, prog, shader);
if (!v.run()) {
- /* FINISHME: Cleanly fail, test at link time, etc. */
- assert(!"not reached");
+ prog->LinkStatus = GL_FALSE;
+ prog->InfoLog = ralloc_strdup(prog, v.fail_msg);
+
return false;
}
if (intel->gen >= 5 && c->prog_data.nr_pull_params == 0) {
c->dispatch_width = 16;
- fs_visitor v2(c, shader);
+ fs_visitor v2(c, prog, shader);
v2.import_uniforms(v.variable_ht);
v2.run();
}
@@ -4226,3 +1639,73 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
return true;
}
+
+bool
+brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_wm_prog_key key;
+ struct gl_fragment_program *fp = prog->FragmentProgram;
+ struct brw_fragment_program *bfp = brw_fragment_program(fp);
+
+ if (!fp)
+ return true;
+
+ memset(&key, 0, sizeof(key));
+
+ if (fp->UsesKill)
+ key.iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+
+ if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
+ key.iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+
+ /* Just assume depth testing. */
+ key.iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
+ key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
+
+ key.vp_outputs_written |= BITFIELD64_BIT(FRAG_ATTRIB_WPOS);
+ for (int i = 0; i < FRAG_ATTRIB_MAX; i++) {
+ int vp_index = -1;
+
+ if (!(fp->Base.InputsRead & BITFIELD64_BIT(i)))
+ continue;
+
+ key.proj_attrib_mask |= 1 << i;
+
+ if (i <= FRAG_ATTRIB_TEX7)
+ vp_index = i;
+ else if (i >= FRAG_ATTRIB_VAR0)
+ vp_index = i - FRAG_ATTRIB_VAR0 + VERT_RESULT_VAR0;
+
+ if (vp_index >= 0)
+ key.vp_outputs_written |= BITFIELD64_BIT(vp_index);
+ }
+
+ key.clamp_fragment_color = true;
+
+ for (int i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+ /* FINISHME: depth compares might use (0,0,0,W) for example */
+ key.tex_swizzles[i] = SWIZZLE_XYZW;
+ }
+
+ if (fp->Base.InputsRead & FRAG_BIT_WPOS) {
+ key.drawable_height = ctx->DrawBuffer->Height;
+ key.render_to_fbo = ctx->DrawBuffer->Name != 0;
+ }
+
+ key.nr_color_regions = 1;
+
+ key.program_string_id = bfp->id;
+
+ drm_intel_bo *old_prog_bo = brw->wm.prog_bo;
+ struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data;
+ brw->wm.prog_bo = NULL;
+
+ bool success = do_wm_prog(brw, prog, bfp, &key);
+
+ drm_intel_bo_unreference(brw->wm.prog_bo);
+ brw->wm.prog_bo = old_prog_bo;
+ brw->wm.prog_data = old_prog_data;
+
+ return success;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 4b355c979eb..7570dda1024 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -359,12 +359,14 @@ class fs_visitor : public ir_visitor
{
public:
- fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader)
+ fs_visitor(struct brw_wm_compile *c, struct gl_shader_program *prog,
+ struct brw_shader *shader)
{
this->c = c;
this->p = &c->func;
this->brw = p->brw;
- this->fp = brw->fragment_program;
+ this->fp = prog->FragmentProgram;
+ this->prog = prog;
this->intel = &brw->intel;
this->ctx = &intel->ctx;
this->mem_ctx = ralloc_context(NULL);
@@ -466,6 +468,8 @@ public:
return emit(fs_inst(opcode, dst, src0, src1, src2));
}
+ int type_size(const struct glsl_type *type);
+
bool run();
void setup_paramvalues_refs();
void assign_curb_setup();
@@ -542,6 +546,7 @@ public:
struct brw_wm_compile *c;
struct brw_compile *p;
struct brw_shader *shader;
+ struct gl_shader_program *prog;
void *mem_ctx;
exec_list instructions;
@@ -570,8 +575,12 @@ public:
/** @} */
bool failed;
+ char *fail_msg;
- /* Result of last visit() method. */
+ /* On entry to a visit() method, this is the storage for the
+ * result. On exit, the visit() called may have changed it, in
+ * which case the parent must use the new storage instead.
+ */
fs_reg result;
fs_reg pixel_x;
@@ -590,3 +599,4 @@ public:
GLboolean brw_do_channel_expressions(struct exec_list *instructions);
GLboolean brw_do_vector_splitting(struct exec_list *instructions);
+bool brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
new file mode 100644
index 00000000000..6b7c434949c
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -0,0 +1,875 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file brw_fs_emit.cpp
+ *
+ * This file supports emitting code from the FS LIR to the actual
+ * native instructions.
+ */
+
+extern "C" {
+#include "main/macros.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+} /* extern "C" */
+
+#include "brw_fs.h"
+#include "../glsl/ir_print_visitor.h"
+
+void
+fs_visitor::generate_fb_write(fs_inst *inst)
+{
+ GLboolean eot = inst->eot;
+ struct brw_reg implied_header;
+
+ /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
+ * move, here's g1.
+ */
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ if (inst->header_present) {
+ if (intel->gen >= 6) {
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ brw_MOV(p,
+ retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
+ retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ if (inst->target > 0) {
+ /* Set the render target index for choosing BLEND_STATE. */
+ brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2),
+ BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(inst->target));
+ }
+
+ implied_header = brw_null_reg();
+ } else {
+ implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
+
+ brw_MOV(p,
+ brw_message_reg(inst->base_mrf + 1),
+ brw_vec8_grf(1, 0));
+ }
+ } else {
+ implied_header = brw_null_reg();
+ }
+
+ brw_pop_insn_state(p);
+
+ brw_fb_WRITE(p,
+ c->dispatch_width,
+ inst->base_mrf,
+ implied_header,
+ inst->target,
+ inst->mlen,
+ 0,
+ eot,
+ inst->header_present);
+}
+
+/* Computes the integer pixel x,y values from the origin.
+ *
+ * This is the basis of gl_FragCoord computation, but is also used
+ * pre-gen6 for computing the deltas from v0 for computing
+ * interpolation.
+ */
+void
+fs_visitor::generate_pixel_xy(struct brw_reg dst, bool is_x)
+{
+ struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
+ struct brw_reg src;
+ struct brw_reg deltas;
+
+ if (is_x) {
+ src = stride(suboffset(g1_uw, 4), 2, 4, 0);
+ deltas = brw_imm_v(0x10101010);
+ } else {
+ src = stride(suboffset(g1_uw, 5), 2, 4, 0);
+ deltas = brw_imm_v(0x11001100);
+ }
+
+ if (c->dispatch_width == 16) {
+ dst = vec16(dst);
+ }
+
+ /* We do this 8 or 16-wide, but since the destination is UW we
+ * don't do compression in the 16-wide case.
+ */
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_ADD(p, dst, src, deltas);
+ brw_pop_insn_state(p);
+}
+
+void
+fs_visitor::generate_linterp(fs_inst *inst,
+ struct brw_reg dst, struct brw_reg *src)
+{
+ struct brw_reg delta_x = src[0];
+ struct brw_reg delta_y = src[1];
+ struct brw_reg interp = src[2];
+
+ if (brw->has_pln &&
+ delta_y.nr == delta_x.nr + 1 &&
+ (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
+ brw_PLN(p, dst, interp, delta_x);
+ } else {
+ brw_LINE(p, brw_null_reg(), interp, delta_x);
+ brw_MAC(p, dst, suboffset(interp, 1), delta_y);
+ }
+}
+
+void
+fs_visitor::generate_math(fs_inst *inst,
+ struct brw_reg dst, struct brw_reg *src)
+{
+ int op;
+
+ switch (inst->opcode) {
+ case FS_OPCODE_RCP:
+ op = BRW_MATH_FUNCTION_INV;
+ break;
+ case FS_OPCODE_RSQ:
+ op = BRW_MATH_FUNCTION_RSQ;
+ break;
+ case FS_OPCODE_SQRT:
+ op = BRW_MATH_FUNCTION_SQRT;
+ break;
+ case FS_OPCODE_EXP2:
+ op = BRW_MATH_FUNCTION_EXP;
+ break;
+ case FS_OPCODE_LOG2:
+ op = BRW_MATH_FUNCTION_LOG;
+ break;
+ case FS_OPCODE_POW:
+ op = BRW_MATH_FUNCTION_POW;
+ break;
+ case FS_OPCODE_SIN:
+ op = BRW_MATH_FUNCTION_SIN;
+ break;
+ case FS_OPCODE_COS:
+ op = BRW_MATH_FUNCTION_COS;
+ break;
+ default:
+ assert(!"not reached: unknown math function");
+ op = 0;
+ break;
+ }
+
+ if (intel->gen >= 6) {
+ assert(inst->mlen == 0);
+
+ if (inst->opcode == FS_OPCODE_POW) {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_math2(p, dst, op, src[0], src[1]);
+
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math2(p, sechalf(dst), op, sechalf(src[0]), sechalf(src[1]));
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ }
+ } else {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_math(p, dst,
+ op,
+ inst->saturate ? BRW_MATH_SATURATE_SATURATE :
+ BRW_MATH_SATURATE_NONE,
+ 0, src[0],
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math(p, sechalf(dst),
+ op,
+ inst->saturate ? BRW_MATH_SATURATE_SATURATE :
+ BRW_MATH_SATURATE_NONE,
+ 0, sechalf(src[0]),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ }
+ }
+ } else /* gen <= 5 */{
+ assert(inst->mlen >= 1);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_math(p, dst,
+ op,
+ inst->saturate ? BRW_MATH_SATURATE_SATURATE :
+ BRW_MATH_SATURATE_NONE,
+ inst->base_mrf, src[0],
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math(p, sechalf(dst),
+ op,
+ inst->saturate ? BRW_MATH_SATURATE_SATURATE :
+ BRW_MATH_SATURATE_NONE,
+ inst->base_mrf + 1, sechalf(src[0]),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ }
+ }
+}
+
+void
+fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
+{
+ int msg_type = -1;
+ int rlen = 4;
+ uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
+
+ if (c->dispatch_width == 16)
+ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+
+ if (intel->gen >= 5) {
+ switch (inst->opcode) {
+ case FS_OPCODE_TEX:
+ if (inst->shadow_compare) {
+ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
+ } else {
+ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
+ }
+ break;
+ case FS_OPCODE_TXB:
+ if (inst->shadow_compare) {
+ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
+ } else {
+ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
+ }
+ break;
+ case FS_OPCODE_TXL:
+ if (inst->shadow_compare) {
+ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
+ } else {
+ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
+ }
+ break;
+ case FS_OPCODE_TXD:
+ assert(!"TXD isn't supported on gen5+ yet.");
+ break;
+ }
+ } else {
+ switch (inst->opcode) {
+ case FS_OPCODE_TEX:
+ /* Note that G45 and older determines shadow compare and dispatch width
+ * from message length for most messages.
+ */
+ assert(c->dispatch_width == 8);
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
+ if (inst->shadow_compare) {
+ assert(inst->mlen == 6);
+ } else {
+ assert(inst->mlen <= 4);
+ }
+ break;
+ case FS_OPCODE_TXB:
+ if (inst->shadow_compare) {
+ assert(inst->mlen == 6);
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE;
+ } else {
+ assert(inst->mlen == 9);
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
+ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+ }
+ break;
+ case FS_OPCODE_TXL:
+ if (inst->shadow_compare) {
+ assert(inst->mlen == 6);
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
+ } else {
+ assert(inst->mlen == 9);
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD;
+ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+ }
+ break;
+ case FS_OPCODE_TXD:
+ assert(!"TXD isn't supported on gen4 yet.");
+ break;
+ }
+ }
+ assert(msg_type != -1);
+
+ if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
+ rlen = 8;
+ dst = vec16(dst);
+ }
+
+ brw_SAMPLE(p,
+ retype(dst, BRW_REGISTER_TYPE_UW),
+ inst->base_mrf,
+ src,
+ SURF_INDEX_TEXTURE(inst->sampler),
+ inst->sampler,
+ WRITEMASK_XYZW,
+ msg_type,
+ rlen,
+ inst->mlen,
+ 0,
+ inst->header_present,
+ simd_mode);
+}
+
+
+/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
+ * looking like:
+ *
+ * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
+ *
+ * and we're trying to produce:
+ *
+ * DDX DDY
+ * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
+ * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
+ * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
+ * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
+ * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
+ * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
+ * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
+ * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
+ *
+ * and add another set of two more subspans if in 16-pixel dispatch mode.
+ *
+ * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
+ * for each pair, and vertstride = 2 jumps us 2 elements after processing a
+ * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
+ * between each other. We could probably do it like ddx and swizzle the right
+ * order later, but bail for now and just produce
+ * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
+ */
+void
+fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
+{
+ struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_2,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_2,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ brw_ADD(p, dst, src0, negate(src1));
+}
+
+void
+fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
+{
+ struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_4,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_4,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ brw_ADD(p, dst, src0, negate(src1));
+}
+
+void
+fs_visitor::generate_discard(fs_inst *inst)
+{
+ struct brw_reg f0 = brw_flag_reg();
+
+ if (intel->gen >= 6) {
+ struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
+ struct brw_reg some_register;
+
+ /* As of gen6, we no longer have the mask register to look at,
+ * so life gets a bit more complicated.
+ */
+
+ /* Load the flag register with all ones. */
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV(p, f0, brw_imm_uw(0xffff));
+ brw_pop_insn_state(p);
+
+ /* Do a comparison that should always fail, to produce 0s in the flag
+ * reg where we have active channels.
+ */
+ some_register = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
+ brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+ BRW_CONDITIONAL_NZ, some_register, some_register);
+
+ /* Undo CMP's whacking of predication*/
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_AND(p, g1, f0, g1);
+ brw_pop_insn_state(p);
+ } else {
+ struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ /* Unlike the 965, we have the mask reg, so we just need
+ * somewhere to invert that (containing channels to be disabled)
+ * so it can be ANDed with the mask of pixels still to be
+ * written. Use the flag reg for consistency with gen6+.
+ */
+ brw_NOT(p, f0, brw_mask_reg(1)); /* IMASK */
+ brw_AND(p, g0, f0, g0);
+
+ brw_pop_insn_state(p);
+ }
+}
+
+void
+fs_visitor::generate_spill(fs_inst *inst, struct brw_reg src)
+{
+ assert(inst->mlen != 0);
+
+ brw_MOV(p,
+ retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD),
+ retype(src, BRW_REGISTER_TYPE_UD));
+ brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1,
+ inst->offset);
+}
+
+void
+fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst)
+{
+ assert(inst->mlen != 0);
+
+ /* Clear any post destination dependencies that would be ignored by
+ * the block read. See the B-Spec for pre-gen5 send instruction.
+ *
+ * This could use a better solution, since texture sampling and
+ * math reads could potentially run into it as well -- anywhere
+ * that we have a SEND with a destination that is a register that
+ * was written but not read within the last N instructions (what's
+ * N? unsure). This is rare because of dead code elimination, but
+ * not impossible.
+ */
+ if (intel->gen == 4 && !intel->is_g4x)
+ brw_MOV(p, brw_null_reg(), dst);
+
+ brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1,
+ inst->offset);
+
+ if (intel->gen == 4 && !intel->is_g4x) {
+ /* gen4 errata: destination from a send can't be used as a
+ * destination until it's been read. Just read it so we don't
+ * have to worry.
+ */
+ brw_MOV(p, brw_null_reg(), dst);
+ }
+}
+
+void
+fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst)
+{
+ assert(inst->mlen != 0);
+
+ /* Clear any post destination dependencies that would be ignored by
+ * the block read. See the B-Spec for pre-gen5 send instruction.
+ *
+ * This could use a better solution, since texture sampling and
+ * math reads could potentially run into it as well -- anywhere
+ * that we have a SEND with a destination that is a register that
+ * was written but not read within the last N instructions (what's
+ * N? unsure). This is rare because of dead code elimination, but
+ * not impossible.
+ */
+ if (intel->gen == 4 && !intel->is_g4x)
+ brw_MOV(p, brw_null_reg(), dst);
+
+ brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
+ inst->offset, SURF_INDEX_FRAG_CONST_BUFFER);
+
+ if (intel->gen == 4 && !intel->is_g4x) {
+ /* gen4 errata: destination from a send can't be used as a
+ * destination until it's been read. Just read it so we don't
+ * have to worry.
+ */
+ brw_MOV(p, brw_null_reg(), dst);
+ }
+}
+
+static struct brw_reg
+brw_reg_from_fs_reg(fs_reg *reg)
+{
+ struct brw_reg brw_reg;
+
+ switch (reg->file) {
+ case GRF:
+ case ARF:
+ case MRF:
+ if (reg->smear == -1) {
+ brw_reg = brw_vec8_reg(reg->file,
+ reg->hw_reg, 0);
+ } else {
+ brw_reg = brw_vec1_reg(reg->file,
+ reg->hw_reg, reg->smear);
+ }
+ brw_reg = retype(brw_reg, reg->type);
+ if (reg->sechalf)
+ brw_reg = sechalf(brw_reg);
+ break;
+ case IMM:
+ switch (reg->type) {
+ case BRW_REGISTER_TYPE_F:
+ brw_reg = brw_imm_f(reg->imm.f);
+ break;
+ case BRW_REGISTER_TYPE_D:
+ brw_reg = brw_imm_d(reg->imm.i);
+ break;
+ case BRW_REGISTER_TYPE_UD:
+ brw_reg = brw_imm_ud(reg->imm.u);
+ break;
+ default:
+ assert(!"not reached");
+ brw_reg = brw_null_reg();
+ break;
+ }
+ break;
+ case FIXED_HW_REG:
+ brw_reg = reg->fixed_hw_reg;
+ break;
+ case BAD_FILE:
+ /* Probably unused. */
+ brw_reg = brw_null_reg();
+ break;
+ case UNIFORM:
+ assert(!"not reached");
+ brw_reg = brw_null_reg();
+ break;
+ default:
+ assert(!"not reached");
+ brw_reg = brw_null_reg();
+ break;
+ }
+ if (reg->abs)
+ brw_reg = brw_abs(brw_reg);
+ if (reg->negate)
+ brw_reg = negate(brw_reg);
+
+ return brw_reg;
+}
+
+void
+fs_visitor::generate_code()
+{
+ int last_native_inst = p->nr_insn;
+ const char *last_annotation_string = NULL;
+ ir_instruction *last_annotation_ir = NULL;
+
+ int loop_stack_array_size = 16;
+ int loop_stack_depth = 0;
+ brw_instruction **loop_stack =
+ rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size);
+ int *if_depth_in_loop =
+ rzalloc_array(this->mem_ctx, int, loop_stack_array_size);
+
+
+ if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
+ printf("Native code for fragment shader %d (%d-wide dispatch):\n",
+ prog->Name, c->dispatch_width);
+ }
+
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ fs_inst *inst = (fs_inst *)iter.get();
+ struct brw_reg src[3], dst;
+
+ if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
+ if (last_annotation_ir != inst->ir) {
+ last_annotation_ir = inst->ir;
+ if (last_annotation_ir) {
+ printf(" ");
+ last_annotation_ir->print();
+ printf("\n");
+ }
+ }
+ if (last_annotation_string != inst->annotation) {
+ last_annotation_string = inst->annotation;
+ if (last_annotation_string)
+ printf(" %s\n", last_annotation_string);
+ }
+ }
+
+ for (unsigned int i = 0; i < 3; i++) {
+ src[i] = brw_reg_from_fs_reg(&inst->src[i]);
+ }
+ dst = brw_reg_from_fs_reg(&inst->dst);
+
+ brw_set_conditionalmod(p, inst->conditional_mod);
+ brw_set_predicate_control(p, inst->predicated);
+ brw_set_predicate_inverse(p, inst->predicate_inverse);
+ brw_set_saturate(p, inst->saturate);
+
+ if (inst->force_uncompressed || c->dispatch_width == 8) {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ } else if (inst->force_sechalf) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ } else {
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ }
+
+ switch (inst->opcode) {
+ case BRW_OPCODE_MOV:
+ brw_MOV(p, dst, src[0]);
+ break;
+ case BRW_OPCODE_ADD:
+ brw_ADD(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_MUL:
+ brw_MUL(p, dst, src[0], src[1]);
+ break;
+
+ case BRW_OPCODE_FRC:
+ brw_FRC(p, dst, src[0]);
+ break;
+ case BRW_OPCODE_RNDD:
+ brw_RNDD(p, dst, src[0]);
+ break;
+ case BRW_OPCODE_RNDE:
+ brw_RNDE(p, dst, src[0]);
+ break;
+ case BRW_OPCODE_RNDZ:
+ brw_RNDZ(p, dst, src[0]);
+ break;
+
+ case BRW_OPCODE_AND:
+ brw_AND(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_OR:
+ brw_OR(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_XOR:
+ brw_XOR(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_NOT:
+ brw_NOT(p, dst, src[0]);
+ break;
+ case BRW_OPCODE_ASR:
+ brw_ASR(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_SHR:
+ brw_SHR(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_SHL:
+ brw_SHL(p, dst, src[0], src[1]);
+ break;
+
+ case BRW_OPCODE_CMP:
+ brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
+ break;
+ case BRW_OPCODE_SEL:
+ brw_SEL(p, dst, src[0], src[1]);
+ break;
+
+ case BRW_OPCODE_IF:
+ if (inst->src[0].file != BAD_FILE) {
+ /* The instruction has an embedded compare (only allowed on gen6) */
+ assert(intel->gen == 6);
+ gen6_IF(p, inst->conditional_mod, src[0], src[1]);
+ } else {
+ brw_IF(p, c->dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8);
+ }
+ if_depth_in_loop[loop_stack_depth]++;
+ break;
+
+ case BRW_OPCODE_ELSE:
+ brw_ELSE(p);
+ break;
+ case BRW_OPCODE_ENDIF:
+ brw_ENDIF(p);
+ if_depth_in_loop[loop_stack_depth]--;
+ break;
+
+ case BRW_OPCODE_DO:
+ loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
+ if (loop_stack_array_size <= loop_stack_depth) {
+ loop_stack_array_size *= 2;
+ loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *,
+ loop_stack_array_size);
+ if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int,
+ loop_stack_array_size);
+ }
+ if_depth_in_loop[loop_stack_depth] = 0;
+ break;
+
+ case BRW_OPCODE_BREAK:
+ brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case BRW_OPCODE_CONTINUE:
+ /* FINISHME: We need to write the loop instruction support still. */
+ if (intel->gen >= 6)
+ gen6_CONT(p, loop_stack[loop_stack_depth - 1]);
+ else
+ brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+
+ case BRW_OPCODE_WHILE: {
+ struct brw_instruction *inst0, *inst1;
+ GLuint br = 1;
+
+ if (intel->gen >= 5)
+ br = 2;
+
+ assert(loop_stack_depth > 0);
+ loop_stack_depth--;
+ inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
+ if (intel->gen < 6) {
+ /* patch all the BREAK/CONT instructions from last BGNLOOP */
+ while (inst0 > loop_stack[loop_stack_depth]) {
+ inst0--;
+ if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+ inst0->bits3.if_else.jump_count == 0) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+ }
+ else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+ inst0->bits3.if_else.jump_count == 0) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+ }
+ }
+ }
+ }
+ break;
+
+ case FS_OPCODE_RCP:
+ case FS_OPCODE_RSQ:
+ case FS_OPCODE_SQRT:
+ case FS_OPCODE_EXP2:
+ case FS_OPCODE_LOG2:
+ case FS_OPCODE_POW:
+ case FS_OPCODE_SIN:
+ case FS_OPCODE_COS:
+ generate_math(inst, dst, src);
+ break;
+ case FS_OPCODE_PIXEL_X:
+ generate_pixel_xy(dst, true);
+ break;
+ case FS_OPCODE_PIXEL_Y:
+ generate_pixel_xy(dst, false);
+ break;
+ case FS_OPCODE_CINTERP:
+ brw_MOV(p, dst, src[0]);
+ break;
+ case FS_OPCODE_LINTERP:
+ generate_linterp(inst, dst, src);
+ break;
+ case FS_OPCODE_TEX:
+ case FS_OPCODE_TXB:
+ case FS_OPCODE_TXD:
+ case FS_OPCODE_TXL:
+ generate_tex(inst, dst, src[0]);
+ break;
+ case FS_OPCODE_DISCARD:
+ generate_discard(inst);
+ break;
+ case FS_OPCODE_DDX:
+ generate_ddx(inst, dst, src[0]);
+ break;
+ case FS_OPCODE_DDY:
+ generate_ddy(inst, dst, src[0]);
+ break;
+
+ case FS_OPCODE_SPILL:
+ generate_spill(inst, src[0]);
+ break;
+
+ case FS_OPCODE_UNSPILL:
+ generate_unspill(inst, dst);
+ break;
+
+ case FS_OPCODE_PULL_CONSTANT_LOAD:
+ generate_pull_constant_load(inst, dst);
+ break;
+
+ case FS_OPCODE_FB_WRITE:
+ generate_fb_write(inst);
+ break;
+ default:
+ if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
+ _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
+ brw_opcodes[inst->opcode].name);
+ } else {
+ _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
+ }
+ fail("unsupported opcode in FS\n");
+ }
+
+ if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
+ for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
+ if (0) {
+ printf("0x%08x 0x%08x 0x%08x 0x%08x ",
+ ((uint32_t *)&p->store[i])[3],
+ ((uint32_t *)&p->store[i])[2],
+ ((uint32_t *)&p->store[i])[1],
+ ((uint32_t *)&p->store[i])[0]);
+ }
+ brw_disasm(stdout, &p->store[i], intel->gen);
+ }
+ }
+
+ last_native_inst = p->nr_insn;
+ }
+
+ if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
+ printf("\n");
+ }
+
+ ralloc_free(loop_stack);
+ ralloc_free(if_depth_in_loop);
+
+ brw_set_uip_jip(p);
+
+ /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS
+ * emit issues, it doesn't get the jump distances into the output,
+ * which is often something we want to debug. So this is here in
+ * case you're doing that.
+ */
+ if (0) {
+ if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
+ for (unsigned int i = 0; i < p->nr_insn; i++) {
+ printf("0x%08x 0x%08x 0x%08x 0x%08x ",
+ ((uint32_t *)&p->store[i])[3],
+ ((uint32_t *)&p->store[i])[2],
+ ((uint32_t *)&p->store[i])[1],
+ ((uint32_t *)&p->store[i])[0]);
+ brw_disasm(stdout, &p->store[i], intel->gen);
+ }
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
index fb1192c810a..d8218c26edb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -140,6 +140,7 @@ public:
}
void add_barrier_deps(schedule_node *n);
void add_dep(schedule_node *before, schedule_node *after, int latency);
+ void add_dep(schedule_node *before, schedule_node *after);
void add_inst(fs_inst *inst);
void calculate_deps();
@@ -210,6 +211,15 @@ instruction_scheduler::add_dep(schedule_node *before, schedule_node *after,
after->parent_count++;
}
+void
+instruction_scheduler::add_dep(schedule_node *before, schedule_node *after)
+{
+ if (!before)
+ return;
+
+ add_dep(before, after, before->latency);
+}
+
/**
* Sometimes we really want this node to execute after everything that
* was before it and before everything that followed it. This adds
@@ -253,6 +263,12 @@ instruction_scheduler::calculate_deps()
schedule_node *last_grf_write[virtual_grf_count];
schedule_node *last_mrf_write[BRW_MAX_MRF];
schedule_node *last_conditional_mod = NULL;
+ /* Fixed HW registers are assumed to be separate from the virtual
+ * GRFs, so they can be tracked separately. We don't really write
+ * to fixed GRFs much, so don't bother tracking them on a more
+ * granular level.
+ */
+ schedule_node *last_fixed_grf_write = NULL;
/* The last instruction always needs to still be the last
* instruction. Either it's flow control (IF, ELSE, ENDIF, DO,
@@ -274,10 +290,11 @@ instruction_scheduler::calculate_deps()
/* read-after-write deps. */
for (int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF) {
- if (last_grf_write[inst->src[i].reg]) {
- add_dep(last_grf_write[inst->src[i].reg], n,
- last_grf_write[inst->src[i].reg]->latency);
- }
+ add_dep(last_grf_write[inst->src[i].reg], n);
+ } else if (inst->src[i].file == FIXED_HW_REG &&
+ (inst->src[i].fixed_hw_reg.file ==
+ BRW_GENERAL_REGISTER_FILE)) {
+ add_dep(last_fixed_grf_write, n);
} else if (inst->src[i].file != BAD_FILE &&
inst->src[i].file != IMM &&
inst->src[i].file != UNIFORM) {
@@ -291,53 +308,41 @@ instruction_scheduler::calculate_deps()
* instruction once it's sent, not when the result comes
* back.
*/
- if (last_mrf_write[inst->base_mrf + i]) {
- add_dep(last_mrf_write[inst->base_mrf + i], n,
- last_mrf_write[inst->base_mrf + i]->latency);
- }
+ add_dep(last_mrf_write[inst->base_mrf + i], n);
}
if (inst->predicated) {
assert(last_conditional_mod);
- add_dep(last_conditional_mod, n, last_conditional_mod->latency);
+ add_dep(last_conditional_mod, n);
}
/* write-after-write deps. */
if (inst->dst.file == GRF) {
- if (last_grf_write[inst->dst.reg]) {
- add_dep(last_grf_write[inst->dst.reg], n,
- last_grf_write[inst->dst.reg]->latency);
- }
+ add_dep(last_grf_write[inst->dst.reg], n);
last_grf_write[inst->dst.reg] = n;
} else if (inst->dst.file == MRF) {
int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4;
- if (last_mrf_write[reg]) {
- add_dep(last_mrf_write[reg], n,
- last_mrf_write[reg]->latency);
- }
+ add_dep(last_mrf_write[reg], n);
last_mrf_write[reg] = n;
if (is_compressed(inst)) {
if (inst->dst.hw_reg & BRW_MRF_COMPR4)
reg += 4;
else
reg++;
- if (last_mrf_write[reg]) {
- add_dep(last_mrf_write[reg], n,
- last_mrf_write[reg]->latency);
- }
+ add_dep(last_mrf_write[reg], n);
last_mrf_write[reg] = n;
}
+ } else if (inst->dst.file == FIXED_HW_REG &&
+ inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
+ last_fixed_grf_write = n;
} else if (inst->dst.file != BAD_FILE) {
add_barrier_deps(n);
}
if (inst->mlen > 0) {
for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
- if (last_mrf_write[inst->base_mrf + i]) {
- add_dep(last_mrf_write[inst->base_mrf + i], n,
- last_mrf_write[inst->base_mrf + i]->latency);
- }
+ add_dep(last_mrf_write[inst->base_mrf + i], n);
last_mrf_write[inst->base_mrf + i] = n;
}
}
@@ -352,6 +357,7 @@ instruction_scheduler::calculate_deps()
memset(last_grf_write, 0, sizeof(last_grf_write));
memset(last_mrf_write, 0, sizeof(last_mrf_write));
last_conditional_mod = NULL;
+ last_fixed_grf_write = NULL;
exec_node *node;
exec_node *prev;
@@ -364,9 +370,11 @@ instruction_scheduler::calculate_deps()
/* write-after-read deps. */
for (int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF) {
- if (last_grf_write[inst->src[i].reg]) {
- add_dep(n, last_grf_write[inst->src[i].reg], n->latency);
- }
+ add_dep(n, last_grf_write[inst->src[i].reg]);
+ } else if (inst->src[i].file == FIXED_HW_REG &&
+ (inst->src[i].fixed_hw_reg.file ==
+ BRW_GENERAL_REGISTER_FILE)) {
+ add_dep(n, last_fixed_grf_write);
} else if (inst->src[i].file != BAD_FILE &&
inst->src[i].file != IMM &&
inst->src[i].file != UNIFORM) {
@@ -384,9 +392,7 @@ instruction_scheduler::calculate_deps()
}
if (inst->predicated) {
- if (last_conditional_mod) {
- add_dep(n, last_conditional_mod, n->latency);
- }
+ add_dep(n, last_conditional_mod);
}
/* Update the things this instruction wrote, so earlier reads
@@ -407,6 +413,9 @@ instruction_scheduler::calculate_deps()
last_mrf_write[reg] = n;
}
+ } else if (inst->dst.file == FIXED_HW_REG &&
+ inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
+ last_fixed_grf_write = n;
} else if (inst->dst.file != BAD_FILE) {
add_barrier_deps(n);
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
new file mode 100644
index 00000000000..b4857871c78
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -0,0 +1,1734 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file brw_fs_visitor.cpp
+ *
+ * This file supports generating the FS LIR from the GLSL IR. The LIR
+ * makes it easier to do backend-specific optimizations than doing so
+ * in the GLSL IR or in the native code.
+ */
+extern "C" {
+
+#include <sys/types.h>
+
+#include "main/macros.h"
+#include "main/shaderobj.h"
+#include "main/uniforms.h"
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
+#include "program/prog_optimize.h"
+#include "program/register_allocate.h"
+#include "program/sampler.h"
+#include "program/hash_table.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+}
+#include "brw_shader.h"
+#include "brw_fs.h"
+#include "../glsl/glsl_types.h"
+#include "../glsl/ir_optimization.h"
+#include "../glsl/ir_print_visitor.h"
+
+void
+fs_visitor::visit(ir_variable *ir)
+{
+ fs_reg *reg = NULL;
+
+ if (variable_storage(ir))
+ return;
+
+ if (strcmp(ir->name, "gl_FragColor") == 0) {
+ this->frag_color = ir;
+ } else if (strcmp(ir->name, "gl_FragData") == 0) {
+ this->frag_data = ir;
+ } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
+ this->frag_depth = ir;
+ }
+
+ if (ir->mode == ir_var_in) {
+ if (!strcmp(ir->name, "gl_FragCoord")) {
+ reg = emit_fragcoord_interpolation(ir);
+ } else if (!strcmp(ir->name, "gl_FrontFacing")) {
+ reg = emit_frontfacing_interpolation(ir);
+ } else {
+ reg = emit_general_interpolation(ir);
+ }
+ assert(reg);
+ hash_table_insert(this->variable_ht, reg, ir);
+ return;
+ }
+
+ if (ir->mode == ir_var_uniform) {
+ int param_index = c->prog_data.nr_params;
+
+ if (c->dispatch_width == 16) {
+ if (!variable_storage(ir)) {
+ fail("Failed to find uniform '%s' in 16-wide\n", ir->name);
+ }
+ return;
+ }
+
+ if (!strncmp(ir->name, "gl_", 3)) {
+ setup_builtin_uniform_values(ir);
+ } else {
+ setup_uniform_values(ir->location, ir->type);
+ }
+
+ reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
+ reg->type = brw_type_for_base_type(ir->type);
+ }
+
+ if (!reg)
+ reg = new(this->mem_ctx) fs_reg(this, ir->type);
+
+ hash_table_insert(this->variable_ht, reg, ir);
+}
+
+void
+fs_visitor::visit(ir_dereference_variable *ir)
+{
+ fs_reg *reg = variable_storage(ir->var);
+ this->result = *reg;
+}
+
+void
+fs_visitor::visit(ir_dereference_record *ir)
+{
+ const glsl_type *struct_type = ir->record->type;
+
+ ir->record->accept(this);
+
+ unsigned int offset = 0;
+ for (unsigned int i = 0; i < struct_type->length; i++) {
+ if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
+ break;
+ offset += type_size(struct_type->fields.structure[i].type);
+ }
+ this->result.reg_offset += offset;
+ this->result.type = brw_type_for_base_type(ir->type);
+}
+
+void
+fs_visitor::visit(ir_dereference_array *ir)
+{
+ ir_constant *index;
+ int element_size;
+
+ ir->array->accept(this);
+ index = ir->array_index->as_constant();
+
+ element_size = type_size(ir->type);
+ this->result.type = brw_type_for_base_type(ir->type);
+
+ if (index) {
+ assert(this->result.file == UNIFORM ||
+ (this->result.file == GRF &&
+ this->result.reg != 0));
+ this->result.reg_offset += index->value.i[0] * element_size;
+ } else {
+ assert(!"FINISHME: non-constant array element");
+ }
+}
+
+/* Instruction selection: Produce a MOV.sat instead of
+ * MIN(MAX(val, 0), 1) when possible.
+ */
+bool
+fs_visitor::try_emit_saturate(ir_expression *ir)
+{
+ ir_rvalue *sat_val = ir->as_rvalue_to_saturate();
+
+ if (!sat_val)
+ return false;
+
+ this->result = reg_undef;
+ sat_val->accept(this);
+ fs_reg src = this->result;
+
+ this->result = fs_reg(this, ir->type);
+ fs_inst *inst = emit(BRW_OPCODE_MOV, this->result, src);
+ inst->saturate = true;
+
+ return true;
+}
+
+void
+fs_visitor::visit(ir_expression *ir)
+{
+ unsigned int operand;
+ fs_reg op[2], temp;
+ fs_inst *inst;
+
+ assert(ir->get_num_operands() <= 2);
+
+ if (try_emit_saturate(ir))
+ return;
+
+ /* This is where our caller would like us to put the result, if possible. */
+ fs_reg saved_result_storage = this->result;
+
+ for (operand = 0; operand < ir->get_num_operands(); operand++) {
+ this->result = reg_undef;
+ ir->operands[operand]->accept(this);
+ if (this->result.file == BAD_FILE) {
+ ir_print_visitor v;
+ fail("Failed to get tree for expression operand:\n");
+ ir->operands[operand]->accept(&v);
+ }
+ op[operand] = this->result;
+
+ /* Matrix expression operands should have been broken down to vector
+ * operations already.
+ */
+ assert(!ir->operands[operand]->type->is_matrix());
+ /* And then those vector operands should have been broken down to scalar.
+ */
+ assert(!ir->operands[operand]->type->is_vector());
+ }
+
+ /* Inherit storage from our parent if possible, and otherwise we
+ * alloc a temporary.
+ */
+ if (saved_result_storage.file == BAD_FILE) {
+ this->result = fs_reg(this, ir->type);
+ } else {
+ this->result = saved_result_storage;
+ }
+
+ switch (ir->operation) {
+ case ir_unop_logic_not:
+ /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
+ * ones complement of the whole register, not just bit 0.
+ */
+ emit(BRW_OPCODE_XOR, this->result, op[0], fs_reg(1));
+ break;
+ case ir_unop_neg:
+ op[0].negate = !op[0].negate;
+ this->result = op[0];
+ break;
+ case ir_unop_abs:
+ op[0].abs = true;
+ op[0].negate = false;
+ this->result = op[0];
+ break;
+ case ir_unop_sign:
+ temp = fs_reg(this, ir->type);
+
+ /* Unalias the destination. (imagine a = sign(a)) */
+ this->result = fs_reg(this, ir->type);
+
+ emit(BRW_OPCODE_MOV, this->result, fs_reg(0.0f));
+
+ inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f));
+ inst->conditional_mod = BRW_CONDITIONAL_G;
+ inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(1.0f));
+ inst->predicated = true;
+
+ inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f));
+ inst->conditional_mod = BRW_CONDITIONAL_L;
+ inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f));
+ inst->predicated = true;
+
+ break;
+ case ir_unop_rcp:
+ emit_math(FS_OPCODE_RCP, this->result, op[0]);
+ break;
+
+ case ir_unop_exp2:
+ emit_math(FS_OPCODE_EXP2, this->result, op[0]);
+ break;
+ case ir_unop_log2:
+ emit_math(FS_OPCODE_LOG2, this->result, op[0]);
+ break;
+ case ir_unop_exp:
+ case ir_unop_log:
+ assert(!"not reached: should be handled by ir_explog_to_explog2");
+ break;
+ case ir_unop_sin:
+ case ir_unop_sin_reduced:
+ emit_math(FS_OPCODE_SIN, this->result, op[0]);
+ break;
+ case ir_unop_cos:
+ case ir_unop_cos_reduced:
+ emit_math(FS_OPCODE_COS, this->result, op[0]);
+ break;
+
+ case ir_unop_dFdx:
+ emit(FS_OPCODE_DDX, this->result, op[0]);
+ break;
+ case ir_unop_dFdy:
+ emit(FS_OPCODE_DDY, this->result, op[0]);
+ break;
+
+ case ir_binop_add:
+ emit(BRW_OPCODE_ADD, this->result, op[0], op[1]);
+ break;
+ case ir_binop_sub:
+ assert(!"not reached: should be handled by ir_sub_to_add_neg");
+ break;
+
+ case ir_binop_mul:
+ emit(BRW_OPCODE_MUL, this->result, op[0], op[1]);
+ break;
+ case ir_binop_div:
+ assert(!"not reached: should be handled by ir_div_to_mul_rcp");
+ break;
+ case ir_binop_mod:
+ assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
+ break;
+
+ case ir_binop_less:
+ case ir_binop_greater:
+ case ir_binop_lequal:
+ case ir_binop_gequal:
+ case ir_binop_equal:
+ case ir_binop_all_equal:
+ case ir_binop_nequal:
+ case ir_binop_any_nequal:
+ temp = this->result;
+ /* original gen4 does implicit conversion before comparison. */
+ if (intel->gen < 5)
+ temp.type = op[0].type;
+
+ inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
+ inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
+ emit(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1));
+ break;
+
+ case ir_binop_logic_xor:
+ emit(BRW_OPCODE_XOR, this->result, op[0], op[1]);
+ break;
+
+ case ir_binop_logic_or:
+ emit(BRW_OPCODE_OR, this->result, op[0], op[1]);
+ break;
+
+ case ir_binop_logic_and:
+ emit(BRW_OPCODE_AND, this->result, op[0], op[1]);
+ break;
+
+ case ir_binop_dot:
+ case ir_unop_any:
+ assert(!"not reached: should be handled by brw_fs_channel_expressions");
+ break;
+
+ case ir_unop_noise:
+ assert(!"not reached: should be handled by lower_noise");
+ break;
+
+ case ir_quadop_vector:
+ assert(!"not reached: should be handled by lower_quadop_vector");
+ break;
+
+ case ir_unop_sqrt:
+ emit_math(FS_OPCODE_SQRT, this->result, op[0]);
+ break;
+
+ case ir_unop_rsq:
+ emit_math(FS_OPCODE_RSQ, this->result, op[0]);
+ break;
+
+ case ir_unop_i2f:
+ case ir_unop_b2f:
+ case ir_unop_b2i:
+ case ir_unop_f2i:
+ emit(BRW_OPCODE_MOV, this->result, op[0]);
+ break;
+ case ir_unop_f2b:
+ case ir_unop_i2b:
+ temp = this->result;
+ /* original gen4 does implicit conversion before comparison. */
+ if (intel->gen < 5)
+ temp.type = op[0].type;
+
+ inst = emit(BRW_OPCODE_CMP, temp, op[0], fs_reg(0.0f));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ inst = emit(BRW_OPCODE_AND, this->result, this->result, fs_reg(1));
+ break;
+
+ case ir_unop_trunc:
+ emit(BRW_OPCODE_RNDZ, this->result, op[0]);
+ break;
+ case ir_unop_ceil:
+ op[0].negate = !op[0].negate;
+ inst = emit(BRW_OPCODE_RNDD, this->result, op[0]);
+ this->result.negate = true;
+ break;
+ case ir_unop_floor:
+ inst = emit(BRW_OPCODE_RNDD, this->result, op[0]);
+ break;
+ case ir_unop_fract:
+ inst = emit(BRW_OPCODE_FRC, this->result, op[0]);
+ break;
+ case ir_unop_round_even:
+ emit(BRW_OPCODE_RNDE, this->result, op[0]);
+ break;
+
+ case ir_binop_min:
+ if (intel->gen >= 6) {
+ inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_L;
+ } else {
+ /* Unalias the destination */
+ this->result = fs_reg(this, ir->type);
+
+ inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_L;
+
+ inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]);
+ inst->predicated = true;
+ }
+ break;
+ case ir_binop_max:
+ if (intel->gen >= 6) {
+ inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_GE;
+ } else {
+ /* Unalias the destination */
+ this->result = fs_reg(this, ir->type);
+
+ inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_G;
+
+ inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]);
+ inst->predicated = true;
+ }
+ break;
+
+ case ir_binop_pow:
+ emit_math(FS_OPCODE_POW, this->result, op[0], op[1]);
+ break;
+
+ case ir_unop_bit_not:
+ inst = emit(BRW_OPCODE_NOT, this->result, op[0]);
+ break;
+ case ir_binop_bit_and:
+ inst = emit(BRW_OPCODE_AND, this->result, op[0], op[1]);
+ break;
+ case ir_binop_bit_xor:
+ inst = emit(BRW_OPCODE_XOR, this->result, op[0], op[1]);
+ break;
+ case ir_binop_bit_or:
+ inst = emit(BRW_OPCODE_OR, this->result, op[0], op[1]);
+ break;
+
+ case ir_unop_u2f:
+ case ir_binop_lshift:
+ case ir_binop_rshift:
+ assert(!"GLSL 1.30 features unsupported");
+ break;
+ }
+}
+
+void
+fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r,
+ const glsl_type *type, bool predicated)
+{
+ switch (type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_BOOL:
+ for (unsigned int i = 0; i < type->components(); i++) {
+ l.type = brw_type_for_base_type(type);
+ r.type = brw_type_for_base_type(type);
+
+ if (predicated || !l.equals(&r)) {
+ fs_inst *inst = emit(BRW_OPCODE_MOV, l, r);
+ inst->predicated = predicated;
+ }
+
+ l.reg_offset++;
+ r.reg_offset++;
+ }
+ break;
+ case GLSL_TYPE_ARRAY:
+ for (unsigned int i = 0; i < type->length; i++) {
+ emit_assignment_writes(l, r, type->fields.array, predicated);
+ }
+ break;
+
+ case GLSL_TYPE_STRUCT:
+ for (unsigned int i = 0; i < type->length; i++) {
+ emit_assignment_writes(l, r, type->fields.structure[i].type,
+ predicated);
+ }
+ break;
+
+ case GLSL_TYPE_SAMPLER:
+ break;
+
+ default:
+ assert(!"not reached");
+ break;
+ }
+}
+
+void
+fs_visitor::visit(ir_assignment *ir)
+{
+ struct fs_reg l, r;
+ fs_inst *inst;
+
+ /* FINISHME: arrays on the lhs */
+ this->result = reg_undef;
+ ir->lhs->accept(this);
+ l = this->result;
+
+ /* If we're doing a direct assignment, an RHS expression could
+ * drop its result right into our destination. Otherwise, tell it
+ * not to.
+ */
+ if (ir->condition ||
+ !(ir->lhs->type->is_scalar() ||
+ (ir->lhs->type->is_vector() &&
+ ir->write_mask == (1 << ir->lhs->type->vector_elements) - 1))) {
+ this->result = reg_undef;
+ }
+
+ ir->rhs->accept(this);
+ r = this->result;
+
+ assert(l.file != BAD_FILE);
+ assert(r.file != BAD_FILE);
+
+ if (ir->condition) {
+ emit_bool_to_cond_code(ir->condition);
+ }
+
+ if (ir->lhs->type->is_scalar() ||
+ ir->lhs->type->is_vector()) {
+ for (int i = 0; i < ir->lhs->type->vector_elements; i++) {
+ if (ir->write_mask & (1 << i)) {
+ if (ir->condition) {
+ inst = emit(BRW_OPCODE_MOV, l, r);
+ inst->predicated = true;
+ } else if (!l.equals(&r)) {
+ inst = emit(BRW_OPCODE_MOV, l, r);
+ }
+
+ r.reg_offset++;
+ }
+ l.reg_offset++;
+ }
+ } else {
+ emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL);
+ }
+}
+
+fs_inst *
+fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+ int sampler)
+{
+ int mlen;
+ int base_mrf = 1;
+ bool simd16 = false;
+ fs_reg orig_dst;
+
+ /* g0 header. */
+ mlen = 1;
+
+ if (ir->shadow_comparitor) {
+ for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
+ fs_inst *inst = emit(BRW_OPCODE_MOV,
+ fs_reg(MRF, base_mrf + mlen + i), coordinate);
+ if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
+ inst->saturate = true;
+
+ coordinate.reg_offset++;
+ }
+ /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
+ mlen += 3;
+
+ if (ir->op == ir_tex) {
+ /* There's no plain shadow compare message, so we use shadow
+ * compare with a bias of 0.0.
+ */
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f));
+ mlen++;
+ } else if (ir->op == ir_txb) {
+ this->result = reg_undef;
+ ir->lod_info.bias->accept(this);
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
+ mlen++;
+ } else {
+ assert(ir->op == ir_txl);
+ this->result = reg_undef;
+ ir->lod_info.lod->accept(this);
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
+ mlen++;
+ }
+
+ this->result = reg_undef;
+ ir->shadow_comparitor->accept(this);
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
+ mlen++;
+ } else if (ir->op == ir_tex) {
+ for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
+ fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i),
+ coordinate);
+ if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
+ inst->saturate = true;
+ coordinate.reg_offset++;
+ }
+ /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
+ mlen += 3;
+ } else if (ir->op == ir_txd) {
+ assert(!"TXD isn't supported on gen4 yet.");
+ } else {
+ /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod
+ * instructions. We'll need to do SIMD16 here.
+ */
+ assert(ir->op == ir_txb || ir->op == ir_txl);
+
+ for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
+ fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF,
+ base_mrf + mlen + i * 2),
+ coordinate);
+ if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
+ inst->saturate = true;
+ coordinate.reg_offset++;
+ }
+
+ /* lod/bias appears after u/v/r. */
+ mlen += 6;
+
+ if (ir->op == ir_txb) {
+ this->result = reg_undef;
+ ir->lod_info.bias->accept(this);
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
+ mlen++;
+ } else {
+ this->result = reg_undef;
+ ir->lod_info.lod->accept(this);
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
+ mlen++;
+ }
+
+ /* The unused upper half. */
+ mlen++;
+
+ /* Now, since we're doing simd16, the return is 2 interleaved
+ * vec4s where the odd-indexed ones are junk. We'll need to move
+ * this weirdness around to the expected layout.
+ */
+ simd16 = true;
+ orig_dst = dst;
+ dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type,
+ 2));
+ dst.type = BRW_REGISTER_TYPE_F;
+ }
+
+ fs_inst *inst = NULL;
+ switch (ir->op) {
+ case ir_tex:
+ inst = emit(FS_OPCODE_TEX, dst);
+ break;
+ case ir_txb:
+ inst = emit(FS_OPCODE_TXB, dst);
+ break;
+ case ir_txl:
+ inst = emit(FS_OPCODE_TXL, dst);
+ break;
+ case ir_txd:
+ inst = emit(FS_OPCODE_TXD, dst);
+ break;
+ case ir_txf:
+ assert(!"GLSL 1.30 features unsupported");
+ break;
+ }
+ inst->base_mrf = base_mrf;
+ inst->mlen = mlen;
+ inst->header_present = true;
+
+ if (simd16) {
+ for (int i = 0; i < 4; i++) {
+ emit(BRW_OPCODE_MOV, orig_dst, dst);
+ orig_dst.reg_offset++;
+ dst.reg_offset += 2;
+ }
+ }
+
+ return inst;
+}
+
+/* gen5's sampler has slots for u, v, r, array index, then optional
+ * parameters like shadow comparitor or LOD bias. If optional
+ * parameters aren't present, those base slots are optional and don't
+ * need to be included in the message.
+ *
+ * We don't fill in the unnecessary slots regardless, which may look
+ * surprising in the disassembly.
+ */
+fs_inst *
+fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+ int sampler)
+{
+ int mlen = 0;
+ int base_mrf = 2;
+ int reg_width = c->dispatch_width / 8;
+ bool header_present = false;
+
+ if (ir->offset) {
+ /* The offsets set up by the ir_texture visitor are in the
+ * m1 header, so we can't go headerless.
+ */
+ header_present = true;
+ mlen++;
+ base_mrf--;
+ }
+
+ for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
+ fs_inst *inst = emit(BRW_OPCODE_MOV,
+ fs_reg(MRF, base_mrf + mlen + i * reg_width),
+ coordinate);
+ if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
+ inst->saturate = true;
+ coordinate.reg_offset++;
+ }
+ mlen += ir->coordinate->type->vector_elements * reg_width;
+
+ if (ir->shadow_comparitor) {
+ mlen = MAX2(mlen, header_present + 4 * reg_width);
+
+ this->result = reg_undef;
+ ir->shadow_comparitor->accept(this);
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
+ mlen += reg_width;
+ }
+
+ fs_inst *inst = NULL;
+ switch (ir->op) {
+ case ir_tex:
+ inst = emit(FS_OPCODE_TEX, dst);
+ break;
+ case ir_txb:
+ this->result = reg_undef;
+ ir->lod_info.bias->accept(this);
+ mlen = MAX2(mlen, header_present + 4 * reg_width);
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
+ mlen += reg_width;
+
+ inst = emit(FS_OPCODE_TXB, dst);
+
+ break;
+ case ir_txl:
+ this->result = reg_undef;
+ ir->lod_info.lod->accept(this);
+ mlen = MAX2(mlen, header_present + 4 * reg_width);
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
+ mlen += reg_width;
+
+ inst = emit(FS_OPCODE_TXL, dst);
+ break;
+ case ir_txd:
+ case ir_txf:
+ assert(!"GLSL 1.30 features unsupported");
+ break;
+ }
+ inst->base_mrf = base_mrf;
+ inst->mlen = mlen;
+ inst->header_present = header_present;
+
+ if (mlen > 11) {
+ fail("Message length >11 disallowed by hardware\n");
+ }
+
+ return inst;
+}
+
+fs_inst *
+fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+ int sampler)
+{
+ int mlen = 0;
+ int base_mrf = 2;
+ int reg_width = c->dispatch_width / 8;
+ bool header_present = false;
+
+ if (ir->offset) {
+ /* The offsets set up by the ir_texture visitor are in the
+ * m1 header, so we can't go headerless.
+ */
+ header_present = true;
+ mlen++;
+ base_mrf--;
+ }
+
+ if (ir->shadow_comparitor) {
+ ir->shadow_comparitor->accept(this);
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
+ mlen += reg_width;
+ }
+
+ /* Set up the LOD info */
+ switch (ir->op) {
+ case ir_tex:
+ break;
+ case ir_txb:
+ ir->lod_info.bias->accept(this);
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
+ mlen += reg_width;
+ break;
+ case ir_txl:
+ ir->lod_info.lod->accept(this);
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
+ mlen += reg_width;
+ break;
+ case ir_txd:
+ case ir_txf:
+ assert(!"GLSL 1.30 features unsupported");
+ break;
+ }
+
+ /* Set up the coordinate */
+ for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
+ fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
+ coordinate);
+ if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
+ inst->saturate = true;
+ coordinate.reg_offset++;
+ mlen += reg_width;
+ }
+
+ /* Generate the SEND */
+ fs_inst *inst = NULL;
+ switch (ir->op) {
+ case ir_tex: inst = emit(FS_OPCODE_TEX, dst); break;
+ case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break;
+ case ir_txl: inst = emit(FS_OPCODE_TXL, dst); break;
+ case ir_txd: inst = emit(FS_OPCODE_TXD, dst); break;
+ case ir_txf: assert(!"TXF unsupported.");
+ }
+ inst->base_mrf = base_mrf;
+ inst->mlen = mlen;
+ inst->header_present = header_present;
+
+ if (mlen > 11) {
+ fail("Message length >11 disallowed by hardware\n");
+ }
+
+ return inst;
+}
+
+void
+fs_visitor::visit(ir_texture *ir)
+{
+ int sampler;
+ fs_inst *inst = NULL;
+
+ this->result = reg_undef;
+ ir->coordinate->accept(this);
+ fs_reg coordinate = this->result;
+
+ if (ir->offset != NULL) {
+ ir_constant *offset = ir->offset->as_constant();
+ assert(offset != NULL);
+
+ signed char offsets[3];
+ for (unsigned i = 0; i < ir->offset->type->vector_elements; i++)
+ offsets[i] = (signed char) offset->value.i[i];
+
+ /* Combine all three offsets into a single unsigned dword:
+ *
+ * bits 11:8 - U Offset (X component)
+ * bits 7:4 - V Offset (Y component)
+ * bits 3:0 - R Offset (Z component)
+ */
+ unsigned offset_bits = 0;
+ for (unsigned i = 0; i < ir->offset->type->vector_elements; i++) {
+ const unsigned shift = 4 * (2 - i);
+ offset_bits |= (offsets[i] << shift) & (0xF << shift);
+ }
+
+ /* Explicitly set up the message header by copying g0 to msg reg m1. */
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, 1, BRW_REGISTER_TYPE_UD),
+ fs_reg(GRF, 0, BRW_REGISTER_TYPE_UD));
+
+ /* Then set the offset bits in DWord 2 of the message header. */
+ emit(BRW_OPCODE_MOV,
+ fs_reg(retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1, 2),
+ BRW_REGISTER_TYPE_UD)),
+ fs_reg(brw_imm_uw(offset_bits)));
+ }
+
+ /* Should be lowered by do_lower_texture_projection */
+ assert(!ir->projector);
+
+ sampler = _mesa_get_sampler_uniform_value(ir->sampler,
+ prog,
+ &fp->Base);
+ sampler = fp->Base.SamplerUnits[sampler];
+
+ /* The 965 requires the EU to do the normalization of GL rectangle
+ * texture coordinates. We use the program parameter state
+ * tracking to get the scaling factor.
+ */
+ if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) {
+ struct gl_program_parameter_list *params = c->fp->program.Base.Parameters;
+ int tokens[STATE_LENGTH] = {
+ STATE_INTERNAL,
+ STATE_TEXRECT_SCALE,
+ sampler,
+ 0,
+ 0
+ };
+
+ if (c->dispatch_width == 16) {
+ fail("rectangle scale uniform setup not supported on 16-wide\n");
+ this->result = fs_reg(this, ir->type);
+ return;
+ }
+
+ c->prog_data.param_convert[c->prog_data.nr_params] =
+ PARAM_NO_CONVERT;
+ c->prog_data.param_convert[c->prog_data.nr_params + 1] =
+ PARAM_NO_CONVERT;
+
+ fs_reg scale_x = fs_reg(UNIFORM, c->prog_data.nr_params);
+ fs_reg scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1);
+ GLuint index = _mesa_add_state_reference(params,
+ (gl_state_index *)tokens);
+
+ this->param_index[c->prog_data.nr_params] = index;
+ this->param_offset[c->prog_data.nr_params] = 0;
+ c->prog_data.nr_params++;
+ this->param_index[c->prog_data.nr_params] = index;
+ this->param_offset[c->prog_data.nr_params] = 1;
+ c->prog_data.nr_params++;
+
+ fs_reg dst = fs_reg(this, ir->coordinate->type);
+ fs_reg src = coordinate;
+ coordinate = dst;
+
+ emit(BRW_OPCODE_MUL, dst, src, scale_x);
+ dst.reg_offset++;
+ src.reg_offset++;
+ emit(BRW_OPCODE_MUL, dst, src, scale_y);
+ }
+
+ /* Writemasking doesn't eliminate channels on SIMD8 texture
+ * samples, so don't worry about them.
+ */
+ fs_reg dst = fs_reg(this, glsl_type::vec4_type);
+
+ if (intel->gen >= 7) {
+ inst = emit_texture_gen7(ir, dst, coordinate, sampler);
+ } else if (intel->gen >= 5) {
+ inst = emit_texture_gen5(ir, dst, coordinate, sampler);
+ } else {
+ inst = emit_texture_gen4(ir, dst, coordinate, sampler);
+ }
+
+ /* If there's an offset, we already set up m1. To avoid the implied move,
+ * use the null register. Otherwise, we want an implied move from g0.
+ */
+ if (ir->offset != NULL || !inst->header_present)
+ inst->src[0] = reg_undef;
+ else
+ inst->src[0] = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
+
+ inst->sampler = sampler;
+
+ this->result = dst;
+
+ if (ir->shadow_comparitor)
+ inst->shadow_compare = true;
+
+ if (ir->type == glsl_type::float_type) {
+ /* Ignore DEPTH_TEXTURE_MODE swizzling. */
+ assert(ir->sampler->type->sampler_shadow);
+ } else if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) {
+ fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type);
+
+ for (int i = 0; i < 4; i++) {
+ int swiz = GET_SWZ(c->key.tex_swizzles[inst->sampler], i);
+ fs_reg l = swizzle_dst;
+ l.reg_offset += i;
+
+ if (swiz == SWIZZLE_ZERO) {
+ emit(BRW_OPCODE_MOV, l, fs_reg(0.0f));
+ } else if (swiz == SWIZZLE_ONE) {
+ emit(BRW_OPCODE_MOV, l, fs_reg(1.0f));
+ } else {
+ fs_reg r = dst;
+ r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i);
+ emit(BRW_OPCODE_MOV, l, r);
+ }
+ }
+ this->result = swizzle_dst;
+ }
+}
+
+void
+fs_visitor::visit(ir_swizzle *ir)
+{
+ this->result = reg_undef;
+ ir->val->accept(this);
+ fs_reg val = this->result;
+
+ if (ir->type->vector_elements == 1) {
+ this->result.reg_offset += ir->mask.x;
+ return;
+ }
+
+ fs_reg result = fs_reg(this, ir->type);
+ this->result = result;
+
+ for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
+ fs_reg channel = val;
+ int swiz = 0;
+
+ switch (i) {
+ case 0:
+ swiz = ir->mask.x;
+ break;
+ case 1:
+ swiz = ir->mask.y;
+ break;
+ case 2:
+ swiz = ir->mask.z;
+ break;
+ case 3:
+ swiz = ir->mask.w;
+ break;
+ }
+
+ channel.reg_offset += swiz;
+ emit(BRW_OPCODE_MOV, result, channel);
+ result.reg_offset++;
+ }
+}
+
+void
+fs_visitor::visit(ir_discard *ir)
+{
+ assert(ir->condition == NULL); /* FINISHME */
+
+ emit(FS_OPCODE_DISCARD);
+ kill_emitted = true;
+}
+
+void
+fs_visitor::visit(ir_constant *ir)
+{
+ /* Set this->result to reg at the bottom of the function because some code
+ * paths will cause this visitor to be applied to other fields. This will
+ * cause the value stored in this->result to be modified.
+ *
+ * Make reg constant so that it doesn't get accidentally modified along the
+ * way. Yes, I actually had this problem. :(
+ */
+ const fs_reg reg(this, ir->type);
+ fs_reg dst_reg = reg;
+
+ if (ir->type->is_array()) {
+ const unsigned size = type_size(ir->type->fields.array);
+
+ for (unsigned i = 0; i < ir->type->length; i++) {
+ this->result = reg_undef;
+ ir->array_elements[i]->accept(this);
+ fs_reg src_reg = this->result;
+
+ dst_reg.type = src_reg.type;
+ for (unsigned j = 0; j < size; j++) {
+ emit(BRW_OPCODE_MOV, dst_reg, src_reg);
+ src_reg.reg_offset++;
+ dst_reg.reg_offset++;
+ }
+ }
+ } else if (ir->type->is_record()) {
+ foreach_list(node, &ir->components) {
+ ir_instruction *const field = (ir_instruction *) node;
+ const unsigned size = type_size(field->type);
+
+ this->result = reg_undef;
+ field->accept(this);
+ fs_reg src_reg = this->result;
+
+ dst_reg.type = src_reg.type;
+ for (unsigned j = 0; j < size; j++) {
+ emit(BRW_OPCODE_MOV, dst_reg, src_reg);
+ src_reg.reg_offset++;
+ dst_reg.reg_offset++;
+ }
+ }
+ } else {
+ const unsigned size = type_size(ir->type);
+
+ for (unsigned i = 0; i < size; i++) {
+ switch (ir->type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i]));
+ break;
+ case GLSL_TYPE_UINT:
+ emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i]));
+ break;
+ case GLSL_TYPE_INT:
+ emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i]));
+ break;
+ case GLSL_TYPE_BOOL:
+ emit(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i]));
+ break;
+ default:
+ assert(!"Non-float/uint/int/bool constant");
+ }
+ dst_reg.reg_offset++;
+ }
+ }
+
+ this->result = reg;
+}
+
+void
+fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
+{
+ ir_expression *expr = ir->as_expression();
+
+ if (expr) {
+ fs_reg op[2];
+ fs_inst *inst;
+
+ assert(expr->get_num_operands() <= 2);
+ for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
+ assert(expr->operands[i]->type->is_scalar());
+
+ this->result = reg_undef;
+ expr->operands[i]->accept(this);
+ op[i] = this->result;
+ }
+
+ switch (expr->operation) {
+ case ir_unop_logic_not:
+ inst = emit(BRW_OPCODE_AND, reg_null_d, op[0], fs_reg(1));
+ inst->conditional_mod = BRW_CONDITIONAL_Z;
+ break;
+
+ case ir_binop_logic_xor:
+ inst = emit(BRW_OPCODE_XOR, reg_null_d, op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ break;
+
+ case ir_binop_logic_or:
+ inst = emit(BRW_OPCODE_OR, reg_null_d, op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ break;
+
+ case ir_binop_logic_and:
+ inst = emit(BRW_OPCODE_AND, reg_null_d, op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ break;
+
+ case ir_unop_f2b:
+ if (intel->gen >= 6) {
+ inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0.0f));
+ } else {
+ inst = emit(BRW_OPCODE_MOV, reg_null_f, op[0]);
+ }
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ break;
+
+ case ir_unop_i2b:
+ if (intel->gen >= 6) {
+ inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0));
+ } else {
+ inst = emit(BRW_OPCODE_MOV, reg_null_d, op[0]);
+ }
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ break;
+
+ case ir_binop_greater:
+ case ir_binop_gequal:
+ case ir_binop_less:
+ case ir_binop_lequal:
+ case ir_binop_equal:
+ case ir_binop_all_equal:
+ case ir_binop_nequal:
+ case ir_binop_any_nequal:
+ inst = emit(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1]);
+ inst->conditional_mod =
+ brw_conditional_for_comparison(expr->operation);
+ break;
+
+ default:
+ assert(!"not reached");
+ fail("bad cond code\n");
+ break;
+ }
+ return;
+ }
+
+ this->result = reg_undef;
+ ir->accept(this);
+
+ if (intel->gen >= 6) {
+ fs_inst *inst = emit(BRW_OPCODE_AND, reg_null_d, this->result, fs_reg(1));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ } else {
+ fs_inst *inst = emit(BRW_OPCODE_MOV, reg_null_d, this->result);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ }
+}
+
+/**
+ * Emit a gen6 IF statement with the comparison folded into the IF
+ * instruction.
+ */
+void
+fs_visitor::emit_if_gen6(ir_if *ir)
+{
+ ir_expression *expr = ir->condition->as_expression();
+
+ if (expr) {
+ fs_reg op[2];
+ fs_inst *inst;
+ fs_reg temp;
+
+ assert(expr->get_num_operands() <= 2);
+ for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
+ assert(expr->operands[i]->type->is_scalar());
+
+ this->result = reg_undef;
+ expr->operands[i]->accept(this);
+ op[i] = this->result;
+ }
+
+ switch (expr->operation) {
+ case ir_unop_logic_not:
+ inst = emit(BRW_OPCODE_IF, temp, op[0], fs_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_Z;
+ return;
+
+ case ir_binop_logic_xor:
+ inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ return;
+
+ case ir_binop_logic_or:
+ temp = fs_reg(this, glsl_type::bool_type);
+ emit(BRW_OPCODE_OR, temp, op[0], op[1]);
+ inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ return;
+
+ case ir_binop_logic_and:
+ temp = fs_reg(this, glsl_type::bool_type);
+ emit(BRW_OPCODE_AND, temp, op[0], op[1]);
+ inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ return;
+
+ case ir_unop_f2b:
+ inst = emit(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ return;
+
+ case ir_unop_i2b:
+ inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ return;
+
+ case ir_binop_greater:
+ case ir_binop_gequal:
+ case ir_binop_less:
+ case ir_binop_lequal:
+ case ir_binop_equal:
+ case ir_binop_all_equal:
+ case ir_binop_nequal:
+ case ir_binop_any_nequal:
+ inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]);
+ inst->conditional_mod =
+ brw_conditional_for_comparison(expr->operation);
+ return;
+ default:
+ assert(!"not reached");
+ inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ fail("bad condition\n");
+ return;
+ }
+ return;
+ }
+
+ this->result = reg_undef;
+ ir->condition->accept(this);
+
+ fs_inst *inst = emit(BRW_OPCODE_IF, reg_null_d, this->result, fs_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+}
+
+void
+fs_visitor::visit(ir_if *ir)
+{
+ fs_inst *inst;
+
+ if (intel->gen != 6 && c->dispatch_width == 16) {
+ fail("Can't support (non-uniform) control flow on 16-wide\n");
+ }
+
+ /* Don't point the annotation at the if statement, because then it plus
+ * the then and else blocks get printed.
+ */
+ this->base_ir = ir->condition;
+
+ if (intel->gen == 6) {
+ emit_if_gen6(ir);
+ } else {
+ emit_bool_to_cond_code(ir->condition);
+
+ inst = emit(BRW_OPCODE_IF);
+ inst->predicated = true;
+ }
+
+ foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
+ ir_instruction *ir = (ir_instruction *)iter.get();
+ this->base_ir = ir;
+ this->result = reg_undef;
+ ir->accept(this);
+ }
+
+ if (!ir->else_instructions.is_empty()) {
+ emit(BRW_OPCODE_ELSE);
+
+ foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
+ ir_instruction *ir = (ir_instruction *)iter.get();
+ this->base_ir = ir;
+ this->result = reg_undef;
+ ir->accept(this);
+ }
+ }
+
+ emit(BRW_OPCODE_ENDIF);
+}
+
+void
+fs_visitor::visit(ir_loop *ir)
+{
+ fs_reg counter = reg_undef;
+
+ if (c->dispatch_width == 16) {
+ fail("Can't support (non-uniform) control flow on 16-wide\n");
+ }
+
+ if (ir->counter) {
+ this->base_ir = ir->counter;
+ ir->counter->accept(this);
+ counter = *(variable_storage(ir->counter));
+
+ if (ir->from) {
+ this->result = counter;
+
+ this->base_ir = ir->from;
+ this->result = counter;
+ ir->from->accept(this);
+
+ if (!this->result.equals(&counter))
+ emit(BRW_OPCODE_MOV, counter, this->result);
+ }
+ }
+
+ emit(BRW_OPCODE_DO);
+
+ if (ir->to) {
+ this->base_ir = ir->to;
+ this->result = reg_undef;
+ ir->to->accept(this);
+
+ fs_inst *inst = emit(BRW_OPCODE_CMP, reg_null_cmp, counter, this->result);
+ inst->conditional_mod = brw_conditional_for_comparison(ir->cmp);
+
+ inst = emit(BRW_OPCODE_BREAK);
+ inst->predicated = true;
+ }
+
+ foreach_iter(exec_list_iterator, iter, ir->body_instructions) {
+ ir_instruction *ir = (ir_instruction *)iter.get();
+
+ this->base_ir = ir;
+ this->result = reg_undef;
+ ir->accept(this);
+ }
+
+ if (ir->increment) {
+ this->base_ir = ir->increment;
+ this->result = reg_undef;
+ ir->increment->accept(this);
+ emit(BRW_OPCODE_ADD, counter, counter, this->result);
+ }
+
+ emit(BRW_OPCODE_WHILE);
+}
+
+void
+fs_visitor::visit(ir_loop_jump *ir)
+{
+ switch (ir->mode) {
+ case ir_loop_jump::jump_break:
+ emit(BRW_OPCODE_BREAK);
+ break;
+ case ir_loop_jump::jump_continue:
+ emit(BRW_OPCODE_CONTINUE);
+ break;
+ }
+}
+
+void
+fs_visitor::visit(ir_call *ir)
+{
+ assert(!"FINISHME");
+}
+
+void
+fs_visitor::visit(ir_return *ir)
+{
+ assert(!"FINISHME");
+}
+
+void
+fs_visitor::visit(ir_function *ir)
+{
+ /* Ignore function bodies other than main() -- we shouldn't see calls to
+ * them since they should all be inlined before we get to ir_to_mesa.
+ */
+ if (strcmp(ir->name, "main") == 0) {
+ const ir_function_signature *sig;
+ exec_list empty;
+
+ sig = ir->matching_signature(&empty);
+
+ assert(sig);
+
+ foreach_iter(exec_list_iterator, iter, sig->body) {
+ ir_instruction *ir = (ir_instruction *)iter.get();
+ this->base_ir = ir;
+ this->result = reg_undef;
+ ir->accept(this);
+ }
+ }
+}
+
+void
+fs_visitor::visit(ir_function_signature *ir)
+{
+ assert(!"not reached");
+ (void)ir;
+}
+
+fs_inst *
+fs_visitor::emit(fs_inst inst)
+{
+ fs_inst *list_inst = new(mem_ctx) fs_inst;
+ *list_inst = inst;
+
+ if (force_uncompressed_stack > 0)
+ list_inst->force_uncompressed = true;
+ else if (force_sechalf_stack > 0)
+ list_inst->force_sechalf = true;
+
+ list_inst->annotation = this->current_annotation;
+ list_inst->ir = this->base_ir;
+
+ this->instructions.push_tail(list_inst);
+
+ return list_inst;
+}
+
+/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
+void
+fs_visitor::emit_dummy_fs()
+{
+ /* Everyone's favorite color. */
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, 2), fs_reg(1.0f));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, 3), fs_reg(0.0f));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, 4), fs_reg(1.0f));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, 5), fs_reg(0.0f));
+
+ fs_inst *write;
+ write = emit(FS_OPCODE_FB_WRITE, fs_reg(0), fs_reg(0));
+ write->base_mrf = 0;
+}
+
+/* The register location here is relative to the start of the URB
+ * data. It will get adjusted to be a real location before
+ * generate_code() time.
+ */
+struct brw_reg
+fs_visitor::interp_reg(int location, int channel)
+{
+ int regnr = urb_setup[location] * 2 + channel / 2;
+ int stride = (channel & 1) * 4;
+
+ assert(urb_setup[location] != -1);
+
+ return brw_vec1_grf(regnr, stride);
+}
+
+/** Emits the interpolation for the varying inputs. */
+void
+fs_visitor::emit_interpolation_setup_gen4()
+{
+ this->current_annotation = "compute pixel centers";
+ this->pixel_x = fs_reg(this, glsl_type::uint_type);
+ this->pixel_y = fs_reg(this, glsl_type::uint_type);
+ this->pixel_x.type = BRW_REGISTER_TYPE_UW;
+ this->pixel_y.type = BRW_REGISTER_TYPE_UW;
+
+ emit(FS_OPCODE_PIXEL_X, this->pixel_x);
+ emit(FS_OPCODE_PIXEL_Y, this->pixel_y);
+
+ this->current_annotation = "compute pixel deltas from v0";
+ if (brw->has_pln) {
+ this->delta_x = fs_reg(this, glsl_type::vec2_type);
+ this->delta_y = this->delta_x;
+ this->delta_y.reg_offset++;
+ } else {
+ this->delta_x = fs_reg(this, glsl_type::float_type);
+ this->delta_y = fs_reg(this, glsl_type::float_type);
+ }
+ emit(BRW_OPCODE_ADD, this->delta_x,
+ this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0))));
+ emit(BRW_OPCODE_ADD, this->delta_y,
+ this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1))));
+
+ this->current_annotation = "compute pos.w and 1/pos.w";
+ /* Compute wpos.w. It's always in our setup, since it's needed to
+ * interpolate the other attributes.
+ */
+ this->wpos_w = fs_reg(this, glsl_type::float_type);
+ emit(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y,
+ interp_reg(FRAG_ATTRIB_WPOS, 3));
+ /* Compute the pixel 1/W value from wpos.w. */
+ this->pixel_w = fs_reg(this, glsl_type::float_type);
+ emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w);
+ this->current_annotation = NULL;
+}
+
+/** Emits the interpolation for the varying inputs. */
+void
+fs_visitor::emit_interpolation_setup_gen6()
+{
+ struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
+
+ /* If the pixel centers end up used, the setup is the same as for gen4. */
+ this->current_annotation = "compute pixel centers";
+ fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type);
+ fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type);
+ int_pixel_x.type = BRW_REGISTER_TYPE_UW;
+ int_pixel_y.type = BRW_REGISTER_TYPE_UW;
+ emit(BRW_OPCODE_ADD,
+ int_pixel_x,
+ fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
+ fs_reg(brw_imm_v(0x10101010)));
+ emit(BRW_OPCODE_ADD,
+ int_pixel_y,
+ fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
+ fs_reg(brw_imm_v(0x11001100)));
+
+ /* As of gen6, we can no longer mix float and int sources. We have
+ * to turn the integer pixel centers into floats for their actual
+ * use.
+ */
+ this->pixel_x = fs_reg(this, glsl_type::float_type);
+ this->pixel_y = fs_reg(this, glsl_type::float_type);
+ emit(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x);
+ emit(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y);
+
+ this->current_annotation = "compute pos.w";
+ this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0));
+ this->wpos_w = fs_reg(this, glsl_type::float_type);
+ emit_math(FS_OPCODE_RCP, this->wpos_w, this->pixel_w);
+
+ this->delta_x = fs_reg(brw_vec8_grf(2, 0));
+ this->delta_y = fs_reg(brw_vec8_grf(3, 0));
+
+ this->current_annotation = NULL;
+}
+
+void
+fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color)
+{
+ int reg_width = c->dispatch_width / 8;
+
+ if (c->dispatch_width == 8 || intel->gen == 6) {
+ /* SIMD8 write looks like:
+ * m + 0: r0
+ * m + 1: r1
+ * m + 2: g0
+ * m + 3: g1
+ *
+ * gen6 SIMD16 DP write looks like:
+ * m + 0: r0
+ * m + 1: r1
+ * m + 2: g0
+ * m + 3: g1
+ * m + 4: b0
+ * m + 5: b1
+ * m + 6: a0
+ * m + 7: a1
+ */
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index * reg_width),
+ color);
+ } else {
+ /* pre-gen6 SIMD16 single source DP write looks like:
+ * m + 0: r0
+ * m + 1: g0
+ * m + 2: b0
+ * m + 3: a0
+ * m + 4: r1
+ * m + 5: g1
+ * m + 6: b1
+ * m + 7: a1
+ */
+ if (brw->has_compr4) {
+ /* By setting the high bit of the MRF register number, we
+ * indicate that we want COMPR4 mode - instead of doing the
+ * usual destination + 1 for the second half we get
+ * destination + 4.
+ */
+ emit(BRW_OPCODE_MOV,
+ fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index), color);
+ } else {
+ push_force_uncompressed();
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), color);
+ pop_force_uncompressed();
+
+ push_force_sechalf();
+ color.sechalf = true;
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), color);
+ pop_force_sechalf();
+ color.sechalf = false;
+ }
+ }
+}
+
+void
+fs_visitor::emit_fb_writes()
+{
+ this->current_annotation = "FB write header";
+ GLboolean header_present = GL_TRUE;
+ int nr = 0;
+ int reg_width = c->dispatch_width / 8;
+
+ if (intel->gen >= 6 &&
+ !this->kill_emitted &&
+ c->key.nr_color_regions == 1) {
+ header_present = false;
+ }
+
+ if (header_present) {
+ /* m0, m1 header */
+ nr += 2;
+ }
+
+ if (c->aa_dest_stencil_reg) {
+ push_force_uncompressed();
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
+ fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0)));
+ pop_force_uncompressed();
+ }
+
+ /* Reserve space for color. It'll be filled in per MRT below. */
+ int color_mrf = nr;
+ nr += 4 * reg_width;
+
+ if (c->source_depth_to_render_target) {
+ if (intel->gen == 6 && c->dispatch_width == 16) {
+ /* For outputting oDepth on gen6, SIMD8 writes have to be
+ * used. This would require 8-wide moves of each half to
+ * message regs, kind of like pre-gen5 SIMD16 FB writes.
+ * Just bail on doing so for now.
+ */
+ fail("Missing support for simd16 depth writes on gen6\n");
+ }
+
+ if (c->computes_depth) {
+ /* Hand over gl_FragDepth. */
+ assert(this->frag_depth);
+ fs_reg depth = *(variable_storage(this->frag_depth));
+
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), depth);
+ } else {
+ /* Pass through the payload depth. */
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, nr),
+ fs_reg(brw_vec8_grf(c->source_depth_reg, 0)));
+ }
+ nr += reg_width;
+ }
+
+ if (c->dest_depth_reg) {
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, nr),
+ fs_reg(brw_vec8_grf(c->dest_depth_reg, 0)));
+ nr += reg_width;
+ }
+
+ fs_reg color = reg_undef;
+ if (this->frag_color)
+ color = *(variable_storage(this->frag_color));
+ else if (this->frag_data) {
+ color = *(variable_storage(this->frag_data));
+ color.type = BRW_REGISTER_TYPE_F;
+ }
+
+ for (int target = 0; target < c->key.nr_color_regions; target++) {
+ this->current_annotation = ralloc_asprintf(this->mem_ctx,
+ "FB write target %d",
+ target);
+ if (this->frag_color || this->frag_data) {
+ for (int i = 0; i < 4; i++) {
+ emit_color_write(i, color_mrf, color);
+ color.reg_offset++;
+ }
+ }
+
+ if (this->frag_color)
+ color.reg_offset -= 4;
+
+ fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
+ inst->target = target;
+ inst->base_mrf = 0;
+ inst->mlen = nr;
+ if (target == c->key.nr_color_regions - 1)
+ inst->eot = true;
+ inst->header_present = header_present;
+ }
+
+ if (c->key.nr_color_regions == 0) {
+ if (c->key.alpha_test && (this->frag_color || this->frag_data)) {
+ /* If the alpha test is enabled but there's no color buffer,
+ * we still need to send alpha out the pipeline to our null
+ * renderbuffer.
+ */
+ color.reg_offset += 3;
+ emit_color_write(3, color_mrf, color);
+ }
+
+ fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
+ inst->base_mrf = 0;
+ inst->mlen = nr;
+ inst->eot = true;
+ inst->header_present = header_present;
+ }
+
+ this->current_annotation = NULL;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c
index 9b82fe159f9..c2227777cfb 100644
--- a/src/mesa/drivers/dri/i965/brw_sf.c
+++ b/src/mesa/drivers/dri/i965/brw_sf.c
@@ -181,9 +181,6 @@ static void upload_sf_prog(struct brw_context *brw)
key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide);
- /* _NEW_HINT */
- key.linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST);
-
/* _NEW_POLYGON */
if (key.do_twoside_color) {
/* If we're rendering to a FBO, we have to invert the polygon
diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h
index e525c730d3f..be32085c697 100644
--- a/src/mesa/drivers/dri/i965/brw_sf.h
+++ b/src/mesa/drivers/dri/i965/brw_sf.h
@@ -52,7 +52,6 @@ struct brw_sf_prog_key {
GLuint do_flat_shading:1;
GLuint frontface_ccw:1;
GLuint do_point_sprite:1;
- GLuint linear_color:1; /**< linear interp vs. perspective interp */
GLuint sprite_origin_lower_left:1;
GLuint pad:24;
};
diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c
index 4b2e26cbed7..52a3fb3893d 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c
@@ -316,7 +316,7 @@ static GLboolean calculate_masks( struct brw_sf_compile *c,
GLbitfield64 persp_mask;
GLbitfield64 linear_mask;
- if (c->key.do_flat_shading || c->key.linear_color)
+ if (c->key.do_flat_shading)
persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS |
FRAG_BIT_COL0 |
FRAG_BIT_COL1);
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
new file mode 100644
index 00000000000..9471883fb2b
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -0,0 +1,194 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+extern "C" {
+#include "main/macros.h"
+#include "brw_context.h"
+}
+#include "brw_fs.h"
+#include "../glsl/ir_optimization.h"
+#include "../glsl/ir_print_visitor.h"
+
+struct gl_shader *
+brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
+{
+ struct brw_shader *shader;
+
+ shader = rzalloc(NULL, struct brw_shader);
+ if (shader) {
+ shader->base.Type = type;
+ shader->base.Name = name;
+ _mesa_init_shader(ctx, &shader->base);
+ }
+
+ return &shader->base;
+}
+
+struct gl_shader_program *
+brw_new_shader_program(struct gl_context *ctx, GLuint name)
+{
+ struct brw_shader_program *prog;
+ prog = rzalloc(NULL, struct brw_shader_program);
+ if (prog) {
+ prog->base.Name = name;
+ _mesa_init_shader_program(ctx, &prog->base);
+ }
+ return &prog->base;
+}
+
+/**
+ * Performs a compile of the shader stages even when we don't know
+ * what non-orthogonal state will be set, in the hope that it reflects
+ * the eventual NOS used, and thus allows us to produce link failures.
+ */
+bool
+brw_shader_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
+{
+ if (!brw_fs_precompile(ctx, prog))
+ return false;
+
+ return true;
+}
+
+GLboolean
+brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct intel_context *intel = &brw->intel;
+
+ struct brw_shader *shader =
+ (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
+ if (shader != NULL) {
+ void *mem_ctx = ralloc_context(NULL);
+ bool progress;
+
+ if (shader->ir)
+ ralloc_free(shader->ir);
+ shader->ir = new(shader) exec_list;
+ clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
+
+ do_mat_op_to_vec(shader->ir);
+ lower_instructions(shader->ir,
+ MOD_TO_FRACT |
+ DIV_TO_MUL_RCP |
+ SUB_TO_ADD_NEG |
+ EXP_TO_EXP2 |
+ LOG_TO_LOG2);
+
+ /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this,
+ * if-statements need to be flattened.
+ */
+ if (intel->gen < 6)
+ lower_if_to_cond_assign(shader->ir, 16);
+
+ do_lower_texture_projection(shader->ir);
+ do_vec_index_to_cond_assign(shader->ir);
+ brw_do_cubemap_normalize(shader->ir);
+ lower_noise(shader->ir);
+ lower_quadop_vector(shader->ir, false);
+ lower_variable_index_to_cond_assign(shader->ir,
+ GL_TRUE, /* input */
+ GL_TRUE, /* output */
+ GL_TRUE, /* temp */
+ GL_TRUE /* uniform */
+ );
+
+ do {
+ progress = false;
+
+ brw_do_channel_expressions(shader->ir);
+ brw_do_vector_splitting(shader->ir);
+
+ progress = do_lower_jumps(shader->ir, true, true,
+ true, /* main return */
+ false, /* continue */
+ false /* loops */
+ ) || progress;
+
+ progress = do_common_optimization(shader->ir, true, 32) || progress;
+ } while (progress);
+
+ validate_ir_tree(shader->ir);
+
+ reparent_ir(shader->ir, shader->ir);
+ ralloc_free(mem_ctx);
+ }
+
+ if (!_mesa_ir_link_shader(ctx, prog))
+ return GL_FALSE;
+
+ if (!brw_shader_precompile(ctx, prog))
+ return GL_FALSE;
+
+ return GL_TRUE;
+}
+
+
+int
+brw_type_for_base_type(const struct glsl_type *type)
+{
+ switch (type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ return BRW_REGISTER_TYPE_F;
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_BOOL:
+ return BRW_REGISTER_TYPE_D;
+ case GLSL_TYPE_UINT:
+ return BRW_REGISTER_TYPE_UD;
+ case GLSL_TYPE_ARRAY:
+ case GLSL_TYPE_STRUCT:
+ case GLSL_TYPE_SAMPLER:
+ /* These should be overridden with the type of the member when
+ * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely
+ * way to trip up if we don't.
+ */
+ return BRW_REGISTER_TYPE_UD;
+ default:
+ assert(!"not reached");
+ return BRW_REGISTER_TYPE_F;
+ }
+}
+
+uint32_t
+brw_conditional_for_comparison(unsigned int op)
+{
+ switch (op) {
+ case ir_binop_less:
+ return BRW_CONDITIONAL_L;
+ case ir_binop_greater:
+ return BRW_CONDITIONAL_G;
+ case ir_binop_lequal:
+ return BRW_CONDITIONAL_LE;
+ case ir_binop_gequal:
+ return BRW_CONDITIONAL_GE;
+ case ir_binop_equal:
+ case ir_binop_all_equal: /* same as equal for scalars */
+ return BRW_CONDITIONAL_Z;
+ case ir_binop_nequal:
+ case ir_binop_any_nequal: /* same as nequal for scalars */
+ return BRW_CONDITIONAL_NZ;
+ default:
+ assert(!"not reached: bad operation for comparison");
+ return BRW_CONDITIONAL_NZ;
+ }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
new file mode 100644
index 00000000000..4c568a26caa
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+int brw_type_for_base_type(const struct glsl_type *type);
+uint32_t brw_conditional_for_comparison(unsigned int op);
diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c
index 3a3aa8c0346..ef58619702d 100644
--- a/src/mesa/drivers/dri/i965/brw_state_dump.c
+++ b/src/mesa/drivers/dri/i965/brw_state_dump.c
@@ -110,7 +110,7 @@ static void dump_wm_surface_state(struct brw_context *brw)
for (i = 0; i < brw->wm.nr_surfaces; i++) {
unsigned int surfoff;
- struct brw_surface_state *surf;
+ uint32_t *surf;
char name[20];
if (brw->wm.surf_offset[i] == 0) {
@@ -118,21 +118,25 @@ static void dump_wm_surface_state(struct brw_context *brw)
continue;
}
surfoff = bo->offset + brw->wm.surf_offset[i];
- surf = (struct brw_surface_state *)(base + brw->wm.surf_offset[i]);
+ surf = (uint32_t *)(base + brw->wm.surf_offset[i]);
sprintf(name, "WM SURF%d", i);
state_out(name, surf, surfoff, 0, "%s %s\n",
- get_965_surfacetype(surf->ss0.surface_type),
- get_965_surface_format(surf->ss0.surface_format));
+ get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)),
+ get_965_surface_format(GET_FIELD(surf[0], BRW_SURFACE_FORMAT)));
state_out(name, surf, surfoff, 1, "offset\n");
state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n",
- surf->ss2.width + 1, surf->ss2.height + 1, surf->ss2.mip_count);
- state_out(name, surf, surfoff, 3, "pitch %d, %stiled\n",
- surf->ss3.pitch + 1, surf->ss3.tiled_surface ? "" : "not ");
+ GET_FIELD(surf[2], BRW_SURFACE_WIDTH) + 1,
+ GET_FIELD(surf[2], BRW_SURFACE_HEIGHT) + 1);
+ state_out(name, surf, surfoff, 3, "pitch %d, %s tiled\n",
+ GET_FIELD(surf[3], BRW_SURFACE_PITCH) + 1,
+ (surf[3] & BRW_SURFACE_TILED) ?
+ ((surf[3] & BRW_SURFACE_TILED_Y) ? "Y" : "X") : "not");
state_out(name, surf, surfoff, 4, "mip base %d\n",
- surf->ss4.min_lod);
+ GET_FIELD(surf[4], BRW_SURFACE_MIN_LOD));
state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n",
- surf->ss5.x_offset, surf->ss5.y_offset);
+ GET_FIELD(surf[5], BRW_SURFACE_X_OFFSET),
+ GET_FIELD(surf[5], BRW_SURFACE_Y_OFFSET));
}
drm_intel_bo_unmap(bo);
}
diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h
index ad31222e9ec..7b9cdba4cbf 100644
--- a/src/mesa/drivers/dri/i965/brw_structs.h
+++ b/src/mesa/drivers/dri/i965/brw_structs.h
@@ -1225,80 +1225,6 @@ struct gen7_sf_clip_viewport {
GLfloat pad1[4];
};
-/* Documented in the subsystem/shared-functions/sampler chapter...
- *
- * vol5c Shared Functions - 1.13.4.1.1
- */
-struct brw_surface_state
-{
- struct {
- GLuint cube_pos_z:1;
- GLuint cube_neg_z:1;
- GLuint cube_pos_y:1;
- GLuint cube_neg_y:1;
- GLuint cube_pos_x:1;
- GLuint cube_neg_x:1;
- GLuint pad:2;
- /* Required on gen6 for surfaces accessed through render cache messages.
- */
- GLuint render_cache_read_write:1;
- /* Ironlake and newer: instead of replicating one of the texels */
- GLuint cube_corner_average:1;
- GLuint mipmap_layout_mode:1;
- GLuint vert_line_stride_ofs:1;
- GLuint vert_line_stride:1;
- GLuint color_blend:1;
- GLuint writedisable_blue:1;
- GLuint writedisable_green:1;
- GLuint writedisable_red:1;
- GLuint writedisable_alpha:1;
- GLuint surface_format:9; /**< BRW_SURFACEFORMAT_x */
- GLuint data_return_format:1;
- GLuint pad0:1;
- GLuint surface_type:3; /**< BRW_SURFACE_1D/2D/3D/CUBE */
- } ss0;
-
- struct {
- GLuint base_addr;
- } ss1;
-
- struct {
- GLuint pad:2;
- GLuint mip_count:4;
- GLuint width:13;
- GLuint height:13;
- } ss2;
-
- struct {
- GLuint tile_walk:1;
- GLuint tiled_surface:1;
- GLuint pad:1;
- GLuint pitch:18;
- GLuint depth:11;
- } ss3;
-
- struct {
- GLuint multisample_position_palette_index:3;
- GLuint pad1:1;
- GLuint num_multisamples:3;
- GLuint pad0:1;
- GLuint render_target_view_extent:9;
- GLuint min_array_elt:11;
- GLuint min_lod:4;
- } ss4;
-
- struct {
- GLuint pad1:16;
- GLuint cache_control:2;
- GLuint gfdt:1;
- GLuint encrypt:1;
- GLuint y_offset:4;
- GLuint pad0:1;
- GLuint x_offset:7;
- } ss5; /* New in G4X */
-
-};
-
/* volume 5c Shared Functions - 1.13.4.1.2 */
struct gen7_surface_state
{
diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index 9ac0713a1d3..4a3a2bfada2 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -61,7 +61,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel,
*/
h0 = ALIGN(mt->height0, align_h);
h1 = ALIGN(minify(h0), align_h);
- qpitch = (h0 + h1 + 11 * align_h);
+ qpitch = (h0 + h1 + (intel->gen >= 7 ? 12 : 11) * align_h);
if (mt->compressed)
qpitch /= 4;
@@ -152,9 +152,6 @@ GLboolean brw_miptree_layout(struct intel_context *intel,
* in the texture surfaces run, so they may be "vertical" through
* memory. As a result, the docs say in Surface Padding Requirements:
* Sampling Engine Surfaces that two extra rows of padding are required.
- * We don't know of similar requirements for pre-965, but given that
- * those docs are silent on padding requirements in general, let's play
- * it safe.
*/
if (mt->target == GL_TEXTURE_CUBE_MAP)
mt->total_height += 2;
diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c
index 47cc0a7da7a..9fdfebe9f76 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_constval.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c
@@ -194,11 +194,19 @@ static void calc_wm_input_sizes( struct brw_context *brw )
/* BRW_NEW_VERTEX_PROGRAM */
const struct brw_vertex_program *vp =
brw_vertex_program_const(brw->vertex_program);
+ /* BRW_NEW_FRAGMENT_PROGRAM */
+ struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram;
/* BRW_NEW_INPUT_DIMENSIONS */
struct tracker t;
GLuint insn;
GLuint i;
+ /* If we're going to go through brw_fs.cpp, we don't end up using
+ * brw->wm.input_size_masks.
+ */
+ if (prog && prog->_LinkedShaders[MESA_SHADER_FRAGMENT])
+ return;
+
memset(&t, 0, sizeof(t));
/* _NEW_LIGHT */
@@ -238,7 +246,9 @@ static void calc_wm_input_sizes( struct brw_context *brw )
const struct brw_tracked_state brw_wm_input_sizes = {
.dirty = {
.mesa = _NEW_LIGHT,
- .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_INPUT_DIMENSIONS,
+ .brw = (BRW_NEW_FRAGMENT_PROGRAM |
+ BRW_NEW_VERTEX_PROGRAM |
+ BRW_NEW_INPUT_DIMENSIONS),
.cache = 0
},
.prepare = calc_wm_input_sizes
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index 9f99ef57214..69650e1df77 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -136,6 +136,16 @@ static void brw_invalidate_state( struct intel_context *intel, GLuint new_state
/* nothing */
}
+/**
+ * \see intel_context.vtbl.is_hiz_depth_format
+ */
+static bool brw_is_hiz_depth_format(struct intel_context *intel,
+ gl_format format)
+{
+ /* In the future, this will support Z_FLOAT32. */
+ return intel->has_hiz && (format == MESA_FORMAT_X8_Z24);
+}
+
void brwInitVtbl( struct brw_context *brw )
{
@@ -152,4 +162,5 @@ void brwInitVtbl( struct brw_context *brw )
brw->intel.vtbl.set_draw_region = brw_set_draw_region;
brw->intel.vtbl.debug_batch = brw_debug_batch;
brw->intel.vtbl.render_target_supported = brw_render_target_supported;
+ brw->intel.vtbl.is_hiz_depth_format = brw_is_hiz_depth_format;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 907976295ab..1aebd12df49 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -34,6 +34,7 @@
#include "brw_state.h"
#include "main/formats.h"
#include "main/samplerobj.h"
+#include "program/prog_parameter.h"
#include "../glsl/ralloc.h"
@@ -115,7 +116,7 @@ brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
brw_wm_pass2(c);
/* how many general-purpose registers are used */
- c->prog_data.total_grf = c->max_wm_grf;
+ c->prog_data.reg_blocks = brw_register_blocks(c->max_wm_grf);
/* Emit GEN4 code.
*/
@@ -184,9 +185,10 @@ brw_wm_payload_setup(struct brw_context *brw,
* Depending on the instructions used (i.e. flow control instructions)
* we'll use one of two code generators.
*/
-static void do_wm_prog( struct brw_context *brw,
- struct brw_fragment_program *fp,
- struct brw_wm_prog_key *key)
+bool do_wm_prog(struct brw_context *brw,
+ struct gl_shader_program *prog,
+ struct brw_fragment_program *fp,
+ struct brw_wm_prog_key *key)
{
struct intel_context *intel = &brw->intel;
struct brw_wm_compile *c;
@@ -202,7 +204,7 @@ static void do_wm_prog( struct brw_context *brw,
* without triggering a segfault, no way to signal,
* so just return.
*/
- return;
+ return false;
}
c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN);
c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN);
@@ -226,7 +228,10 @@ static void do_wm_prog( struct brw_context *brw,
brw_init_compile(brw, &c->func, c);
- if (!brw_wm_fs_emit(brw, c)) {
+ if (prog && prog->FragmentProgram) {
+ if (!brw_wm_fs_emit(brw, c, prog))
+ return false;
+ } else {
/* Fallback for fixed function and ARB_fp shaders. */
c->dispatch_width = 16;
brw_wm_payload_setup(brw, c);
@@ -274,6 +279,8 @@ static void do_wm_prog( struct brw_context *brw,
program, program_size,
&c->prog_data, sizeof(c->prog_data),
&brw->wm.prog_data);
+
+ return true;
}
@@ -355,9 +362,6 @@ static void brw_wm_populate_key( struct brw_context *brw,
/* _NEW_LIGHT */
key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT);
- /* _NEW_HINT */
- key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST);
-
/* _NEW_FRAG_CLAMP | _NEW_BUFFERS */
key->clamp_fragment_color = ctx->Color._ClampFragmentColor;
@@ -426,9 +430,6 @@ static void brw_wm_populate_key( struct brw_context *brw,
}
}
- /* Shadow */
- key->shadowtex_mask = fp->program.Base.ShadowSamplers;
-
/* _NEW_BUFFERS */
/*
* Include the draw buffer origin and height so that we can calculate
@@ -468,6 +469,8 @@ static void brw_wm_populate_key( struct brw_context *brw,
static void brw_prepare_wm_prog(struct brw_context *brw)
{
+ struct intel_context *intel = &brw->intel;
+ struct gl_context *ctx = &intel->ctx;
struct brw_wm_prog_key key;
struct brw_fragment_program *fp = (struct brw_fragment_program *)
brw->fragment_program;
@@ -480,8 +483,11 @@ static void brw_prepare_wm_prog(struct brw_context *brw)
brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG,
&key, sizeof(key),
&brw->wm.prog_data);
- if (brw->wm.prog_bo == NULL)
- do_wm_prog(brw, fp, &key);
+ if (brw->wm.prog_bo == NULL) {
+ bool success = do_wm_prog(brw, ctx->Shader.CurrentFragmentProgram, fp,
+ &key);
+ assert(success);
+ }
}
@@ -489,7 +495,6 @@ const struct brw_tracked_state brw_wm_prog = {
.dirty = {
.mesa = (_NEW_COLOR |
_NEW_DEPTH |
- _NEW_HINT |
_NEW_STENCIL |
_NEW_POLYGON |
_NEW_LINE |
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index 8ab531bdf87..e244b55a083 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -59,16 +59,16 @@
#define AA_ALWAYS 2
struct brw_wm_prog_key {
+ uint8_t iz_lookup;
GLuint stats_wm:1;
GLuint flat_shade:1;
- GLuint linear_color:1; /**< linear interpolation vs perspective interp */
GLuint nr_color_regions:5;
GLuint render_to_fbo:1;
GLuint alpha_test:1;
GLuint clamp_fragment_color:1;
+ GLuint line_aa:2;
GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */
- GLuint shadowtex_mask:16;
GLuint yuvtex_mask:16;
GLuint yuvtex_swap_mask:16; /* UV swaped */
uint16_t gl_clamp_mask[3];
@@ -76,8 +76,6 @@ struct brw_wm_prog_key {
GLushort tex_swizzles[BRW_MAX_TEX_UNIT];
GLushort drawable_height;
GLbitfield64 vp_outputs_written;
- GLuint iz_lookup;
- GLuint line_aa;
GLuint program_string_id:32;
};
@@ -314,7 +312,8 @@ void brw_wm_print_program( struct brw_wm_compile *c,
void brw_wm_lookup_iz(struct intel_context *intel,
struct brw_wm_compile *c);
-bool brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c);
+bool brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
+ struct gl_shader_program *prog);
/* brw_wm_emit.c */
void emit_alu1(struct brw_compile *p,
@@ -476,5 +475,9 @@ bool brw_color_buffer_write_enabled(struct brw_context *brw);
bool brw_render_target_supported(gl_format format);
void brw_wm_payload_setup(struct brw_context *brw,
struct brw_wm_compile *c);
+bool do_wm_prog(struct brw_context *brw,
+ struct gl_shader_program *prog,
+ struct brw_fragment_program *fp,
+ struct brw_wm_prog_key *key);
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
index 9ddbee2edf4..59dcda7b414 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_fp.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -417,25 +417,14 @@ static void emit_interp( struct brw_wm_compile *c,
src_undef());
}
else {
- if (c->key.linear_color) {
- emit_op(c,
- WM_LINTERP,
- dst,
- 0,
- interp,
- deltas,
- src_undef());
- }
- else {
- /* perspective-corrected color interpolation */
- emit_op(c,
- WM_PINTERP,
- dst,
- 0,
- interp,
- deltas,
- get_pixel_w(c));
- }
+ /* perspective-corrected color interpolation */
+ emit_op(c,
+ WM_PINTERP,
+ dst,
+ 0,
+ interp,
+ deltas,
+ get_pixel_w(c));
}
break;
case FRAG_ATTRIB_FOGC:
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index a356711470a..ef98f8126dc 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -91,8 +91,8 @@ brw_prepare_wm_unit(struct brw_context *brw)
}
/* CACHE_NEW_WM_PROG */
- wm->thread0.grf_reg_count = ALIGN(brw->wm.prog_data->total_grf, 16) / 16 - 1;
- wm->wm9.grf_reg_count_2 = ALIGN(brw->wm.prog_data->total_grf_16, 16) / 16 - 1;
+ wm->thread0.grf_reg_count = brw->wm.prog_data->reg_blocks;
+ wm->wm9.grf_reg_count_2 = brw->wm.prog_data->reg_blocks_16;
wm->thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */
/* reloc */
wm->wm9.kernel_start_pointer_2 = (brw->wm.prog_bo->offset +
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index de1953ed600..6c1eba69d4b 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -200,22 +200,16 @@ translate_tex_format(gl_format mesa_format,
}
}
-static void
-brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
+static uint32_t
+brw_get_surface_tiling_bits(uint32_t tiling)
{
switch (tiling) {
- case I915_TILING_NONE:
- surf->ss3.tiled_surface = 0;
- surf->ss3.tile_walk = 0;
- break;
case I915_TILING_X:
- surf->ss3.tiled_surface = 1;
- surf->ss3.tile_walk = BRW_TILEWALK_XMAJOR;
- break;
+ return BRW_SURFACE_TILED;
case I915_TILING_Y:
- surf->ss3.tiled_surface = 1;
- surf->ss3.tile_walk = BRW_TILEWALK_YMAJOR;
- break;
+ return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
+ default:
+ return 0;
}
}
@@ -228,46 +222,36 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit )
struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
const GLuint surf_index = SURF_INDEX_TEXTURE(unit);
- struct brw_surface_state *surf;
+ uint32_t *surf;
- surf = brw_state_batch(brw, sizeof(*surf), 32,
- &brw->wm.surf_offset[surf_index]);
- memset(surf, 0, sizeof(*surf));
+ surf = brw_state_batch(brw, 6 * 4, 32, &brw->wm.surf_offset[surf_index]);
- surf->ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
- surf->ss0.surface_type = translate_tex_target(tObj->Target);
- surf->ss0.surface_format = translate_tex_format(firstImage->TexFormat,
- firstImage->InternalFormat,
- sampler->DepthMode,
- sampler->sRGBDecode);
+ surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
+ BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
+ BRW_SURFACE_CUBEFACE_ENABLES |
+ (translate_tex_format(firstImage->TexFormat,
+ firstImage->InternalFormat,
+ sampler->DepthMode,
+ sampler->sRGBDecode) <<
+ BRW_SURFACE_FORMAT_SHIFT));
- /* This is ok for all textures with channel width 8bit or less:
- */
-/* surf->ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
- surf->ss1.base_addr = intelObj->mt->region->buffer->offset; /* reloc */
+ surf[1] = intelObj->mt->region->buffer->offset; /* reloc */
- surf->ss2.mip_count = intelObj->_MaxLevel - tObj->BaseLevel;
- surf->ss2.width = firstImage->Width - 1;
- surf->ss2.height = firstImage->Height - 1;
- brw_set_surface_tiling(surf, intelObj->mt->region->tiling);
- surf->ss3.pitch = (intelObj->mt->region->pitch * intelObj->mt->cpp) - 1;
- surf->ss3.depth = firstImage->Depth - 1;
+ surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
+ (firstImage->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
+ (firstImage->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
- surf->ss4.min_lod = 0;
-
- if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
- surf->ss0.cube_pos_x = 1;
- surf->ss0.cube_pos_y = 1;
- surf->ss0.cube_pos_z = 1;
- surf->ss0.cube_neg_x = 1;
- surf->ss0.cube_neg_y = 1;
- surf->ss0.cube_neg_z = 1;
- }
+ surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
+ (firstImage->Depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
+ ((intelObj->mt->region->pitch * intelObj->mt->cpp) - 1) <<
+ BRW_SURFACE_PITCH_SHIFT);
+
+ surf[4] = 0;
+ surf[5] = 0;
/* Emit relocation to surface contents */
drm_intel_bo_emit_reloc(brw->intel.batch.bo,
- brw->wm.surf_offset[surf_index] +
- offsetof(struct brw_surface_state, ss1),
+ brw->wm.surf_offset[surf_index] + 4,
intelObj->mt->region->buffer, 0,
I915_GEM_DOMAIN_SAMPLER, 0);
}
@@ -284,34 +268,34 @@ brw_create_constant_surface(struct brw_context *brw,
{
struct intel_context *intel = &brw->intel;
const GLint w = width - 1;
- struct brw_surface_state *surf;
+ uint32_t *surf;
- surf = brw_state_batch(brw, sizeof(*surf), 32, out_offset);
- memset(surf, 0, sizeof(*surf));
+ surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
- surf->ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
- surf->ss0.surface_type = BRW_SURFACE_BUFFER;
- surf->ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+ surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
+ BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
+ BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_SURFACE_FORMAT_SHIFT);
if (intel->gen >= 6)
- surf->ss0.render_cache_read_write = 1;
+ surf[0] |= BRW_SURFACE_RC_READ_WRITE;
+
+ surf[1] = bo->offset; /* reloc */
- assert(bo);
- surf->ss1.base_addr = bo->offset; /* reloc */
+ surf[2] = (((w & 0x7f) - 1) << BRW_SURFACE_WIDTH_SHIFT |
+ (((w >> 7) & 0x1fff) - 1) << BRW_SURFACE_HEIGHT_SHIFT);
- surf->ss2.width = w & 0x7f; /* bits 6:0 of size or width */
- surf->ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */
- surf->ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */
- surf->ss3.pitch = (width * 16) - 1; /* ignored?? */
- brw_set_surface_tiling(surf, I915_TILING_NONE); /* tiling now allowed */
+ surf[3] = ((((w >> 20) & 0x7f) - 1) << BRW_SURFACE_DEPTH_SHIFT |
+ (width * 16 - 1) << BRW_SURFACE_PITCH_SHIFT);
+
+ surf[4] = 0;
+ surf[5] = 0;
/* Emit relocation to surface contents. Section 5.1.1 of the gen4
* bspec ("Data Cache") says that the data cache does not exist as
* a separate cache and is just the sampler cache.
*/
drm_intel_bo_emit_reloc(brw->intel.batch.bo,
- (*out_offset +
- offsetof(struct brw_surface_state, ss1)),
+ *out_offset + 4,
bo, 0,
I915_GEM_DOMAIN_SAMPLER, 0);
}
@@ -416,23 +400,23 @@ static void
brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
{
struct intel_context *intel = &brw->intel;
- struct brw_surface_state *surf;
-
- surf = brw_state_batch(brw, sizeof(*surf), 32,
- &brw->wm.surf_offset[unit]);
- memset(surf, 0, sizeof(*surf));
+ uint32_t *surf;
- surf->ss0.surface_type = BRW_SURFACE_NULL;
- surf->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ surf = brw_state_batch(brw, 6 * 4, 32, &brw->wm.surf_offset[unit]);
+ surf[0] = (BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
+ BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
if (intel->gen < 6) {
- /* _NEW_COLOR */
- surf->ss0.color_blend = 0;
- surf->ss0.writedisable_red = 1;
- surf->ss0.writedisable_green = 1;
- surf->ss0.writedisable_blue = 1;
- surf->ss0.writedisable_alpha = 1;
+ surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
+ 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
+ 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
+ 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
}
+ surf[1] = 0;
+ surf[2] = 0;
+ surf[3] = 0;
+ surf[4] = 0;
+ surf[5] = 0;
}
/**
@@ -449,12 +433,11 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
struct gl_context *ctx = &intel->ctx;
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
struct intel_region *region = irb->region;
- struct brw_surface_state *surf;
+ uint32_t *surf;
uint32_t tile_x, tile_y;
+ uint32_t format = 0;
- surf = brw_state_batch(brw, sizeof(*surf), 32,
- &brw->wm.surf_offset[unit]);
- memset(surf, 0, sizeof(*surf));
+ surf = brw_state_batch(brw, 6 * 4, 32, &brw->wm.surf_offset[unit]);
switch (irb->Base.Format) {
case MESA_FORMAT_XRGB8888:
@@ -465,7 +448,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
* cases where GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is
* used.
*/
- surf->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
break;
case MESA_FORMAT_INTENSITY_FLOAT32:
case MESA_FORMAT_LUMINANCE_FLOAT32:
@@ -473,25 +456,35 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
* channel into R, which is to say that we just treat them as
* GL_RED.
*/
- surf->ss0.surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
+ format = BRW_SURFACEFORMAT_R32_FLOAT;
break;
case MESA_FORMAT_SARGB8:
/* without GL_EXT_framebuffer_sRGB we shouldn't bind sRGB
surfaces to the blend/update as sRGB */
if (ctx->Color.sRGBEnabled)
- surf->ss0.surface_format = brw_format_for_mesa_format(irb->Base.Format);
+ format = brw_format_for_mesa_format(irb->Base.Format);
else
- surf->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
break;
default:
assert(brw_render_target_supported(irb->Base.Format));
- surf->ss0.surface_format = brw_format_for_mesa_format(irb->Base.Format);
+ format = brw_format_for_mesa_format(irb->Base.Format);
}
- surf->ss0.surface_type = BRW_SURFACE_2D;
+ surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
+ format << BRW_SURFACE_FORMAT_SHIFT);
+
/* reloc */
- surf->ss1.base_addr = intel_region_tile_offsets(region, &tile_x, &tile_y);
- surf->ss1.base_addr += region->buffer->offset; /* reloc */
+ surf[1] = (intel_region_tile_offsets(region, &tile_x, &tile_y) +
+ region->buffer->offset);
+
+ surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
+ (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
+
+ surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
+ ((region->pitch * region->cpp) - 1) << BRW_SURFACE_PITCH_SHIFT);
+
+ surf[4] = 0;
assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
/* Note that the low bits of these fields are missing, so
@@ -499,35 +492,35 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
*/
assert(tile_x % 4 == 0);
assert(tile_y % 2 == 0);
- surf->ss5.x_offset = tile_x / 4;
- surf->ss5.y_offset = tile_y / 2;
-
- surf->ss2.width = rb->Width - 1;
- surf->ss2.height = rb->Height - 1;
- brw_set_surface_tiling(surf, region->tiling);
- surf->ss3.pitch = (region->pitch * region->cpp) - 1;
+ surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
+ (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT);
if (intel->gen < 6) {
/* _NEW_COLOR */
- surf->ss0.color_blend = (!ctx->Color._LogicOpEnabled &&
- (ctx->Color.BlendEnabled & (1 << unit)));
- surf->ss0.writedisable_red = !ctx->Color.ColorMask[unit][0];
- surf->ss0.writedisable_green = !ctx->Color.ColorMask[unit][1];
- surf->ss0.writedisable_blue = !ctx->Color.ColorMask[unit][2];
+ if (!ctx->Color._LogicOpEnabled &&
+ (ctx->Color.BlendEnabled & (1 << unit)))
+ surf[0] |= BRW_SURFACE_BLEND_ENABLED;
+
+ if (!ctx->Color.ColorMask[unit][0])
+ surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
+ if (!ctx->Color.ColorMask[unit][1])
+ surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
+ if (!ctx->Color.ColorMask[unit][2])
+ surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
+
/* As mentioned above, disable writes to the alpha component when the
* renderbuffer is XRGB.
*/
- if (ctx->DrawBuffer->Visual.alphaBits == 0)
- surf->ss0.writedisable_alpha = 1;
- else
- surf->ss0.writedisable_alpha = !ctx->Color.ColorMask[unit][3];
+ if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
+ !ctx->Color.ColorMask[unit][3]) {
+ surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
+ }
}
drm_intel_bo_emit_reloc(brw->intel.batch.bo,
- brw->wm.surf_offset[unit] +
- offsetof(struct brw_surface_state, ss1),
+ brw->wm.surf_offset[unit] + 4,
region->buffer,
- surf->ss1.base_addr - region->buffer->offset,
+ surf[1] - region->buffer->offset,
I915_GEM_DOMAIN_RENDER,
I915_GEM_DOMAIN_RENDER);
}
@@ -539,16 +532,14 @@ prepare_wm_surfaces(struct brw_context *brw)
int i;
int nr_surfaces = 0;
- if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
- for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
- struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
- struct intel_renderbuffer *irb = intel_renderbuffer(rb);
- struct intel_region *region = irb ? irb->region : NULL;
+ for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+ struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+ struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+ struct intel_region *region = irb ? irb->region : NULL;
- if (region)
- brw_add_validated_bo(brw, region->buffer);
- nr_surfaces = SURF_INDEX_DRAW(i) + 1;
- }
+ if (region)
+ brw_add_validated_bo(brw, region->buffer);
+ nr_surfaces = SURF_INDEX_DRAW(i) + 1;
}
if (brw->wm.const_bo) {
@@ -558,10 +549,11 @@ prepare_wm_surfaces(struct brw_context *brw)
for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
- struct gl_texture_object *tObj = texUnit->_Current;
- struct intel_texture_object *intelObj = intel_texture_object(tObj);
if (texUnit->_ReallyEnabled) {
+ struct gl_texture_object *tObj = texUnit->_Current;
+ struct intel_texture_object *intelObj = intel_texture_object(tObj);
+
brw_add_validated_bo(brw, intelObj->mt->region->buffer);
nr_surfaces = SURF_INDEX_TEXTURE(i) + 1;
}
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index 6a7add8e562..ae7a1d6c35c 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -33,7 +33,6 @@ static void
upload_vs_state(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
- struct gl_context *ctx = &intel->ctx;
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_VS << 16 | (2 - 2));
diff --git a/src/mesa/drivers/dri/intel/intel_buffers.c b/src/mesa/drivers/dri/intel/intel_buffers.c
index ee551ef60d4..7eb50edc6b4 100644
--- a/src/mesa/drivers/dri/intel/intel_buffers.c
+++ b/src/mesa/drivers/dri/intel/intel_buffers.c
@@ -93,6 +93,7 @@ intel_draw_buffer(struct gl_context * ctx, struct gl_framebuffer *fb)
struct intel_context *intel = intel_context(ctx);
struct intel_region *colorRegions[MAX_DRAW_BUFFERS], *depthRegion = NULL;
struct intel_renderbuffer *irbDepth = NULL, *irbStencil = NULL;
+ bool fb_has_hiz = intel_framebuffer_has_hiz(fb);
if (!fb) {
/* this can happen during the initial context initialization */
@@ -166,11 +167,11 @@ intel_draw_buffer(struct gl_context * ctx, struct gl_framebuffer *fb)
/***
*** Get depth buffer region and check if we need a software fallback.
- *** Note that the depth buffer is usually a DEPTH_STENCIL buffer.
***/
if (fb->_DepthBuffer && fb->_DepthBuffer->Wrapped) {
irbDepth = intel_renderbuffer(fb->_DepthBuffer->Wrapped);
if (irbDepth && irbDepth->region) {
+ assert(!fb_has_hiz || irbDepth->Base.Format != MESA_FORMAT_S8_Z24);
FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, GL_FALSE);
depthRegion = irbDepth->region;
}
@@ -187,13 +188,16 @@ intel_draw_buffer(struct gl_context * ctx, struct gl_framebuffer *fb)
/***
*** Stencil buffer
- *** This can only be hardware accelerated if we're using a
- *** combined DEPTH_STENCIL buffer.
***/
if (fb->_StencilBuffer && fb->_StencilBuffer->Wrapped) {
irbStencil = intel_renderbuffer(fb->_StencilBuffer->Wrapped);
if (irbStencil && irbStencil->region) {
- ASSERT(irbStencil->Base.Format == MESA_FORMAT_S8_Z24);
+ if (!intel->has_separate_stencil)
+ assert(irbStencil->Base.Format == MESA_FORMAT_S8_Z24);
+ if (fb_has_hiz || intel->must_use_separate_stencil)
+ assert(irbStencil->Base.Format == MESA_FORMAT_S8);
+ if (irbStencil->Base.Format == MESA_FORMAT_S8)
+ assert(intel->has_separate_stencil);
FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE);
}
else {
@@ -208,8 +212,10 @@ intel_draw_buffer(struct gl_context * ctx, struct gl_framebuffer *fb)
/* If we have a (packed) stencil buffer attached but no depth buffer,
* we still need to set up the shared depth/stencil state so we can use it.
*/
- if (depthRegion == NULL && irbStencil && irbStencil->region)
+ if (depthRegion == NULL && irbStencil && irbStencil->region
+ && irbStencil->Base.Format == MESA_FORMAT_S8_Z24) {
depthRegion = irbStencil->region;
+ }
/*
* Update depth and stencil test state
@@ -302,18 +308,6 @@ intelReadBuffer(struct gl_context * ctx, GLenum mode)
if (!was_front_buffer_reading && intel->is_front_buffer_reading)
dri2InvalidateDrawable(intel->driContext->driReadablePriv);
}
-
- if (ctx->ReadBuffer == ctx->DrawBuffer) {
- /* This will update FBO completeness status.
- * A framebuffer will be incomplete if the GL_READ_BUFFER setting
- * refers to a missing renderbuffer. Calling glReadBuffer can set
- * that straight and can make the drawing buffer complete.
- */
- intel_draw_buffer(ctx, ctx->DrawBuffer);
- }
- /* Generally, functions which read pixels (glReadPixels, glCopyPixels, etc)
- * reference ctx->ReadBuffer and do appropriate state checks.
- */
}
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index 4516db20ffc..2ea52c26106 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -620,6 +620,53 @@ intelInitDriverFunctions(struct dd_function_table *functions)
intel_init_syncobj_functions(functions);
}
+/**
+ * Override intel->has_hiz with environment variable INTEL_HIZ.
+ *
+ * Valid values for INTEL_HIZ are "0" and "1". If an invalid valid value is
+ * encountered, a warning is emitted and INTEL_HIZ is ignored.
+ */
+static void
+intel_override_hiz(struct intel_context *intel)
+{
+ const char *s = getenv("INTEL_HIZ");
+ if (!s) {
+ return;
+ } else if (!strncmp("0", s, 2)) {
+ intel->has_hiz = false;
+ } else if (!strncmp("1", s, 2)) {
+ intel->has_hiz = true;
+ } else {
+ _mesa_warning(&intel->ctx,
+ "env variable INTEL_HIZ=\"%s\" has invalid value and "
+ "is ignored", s);
+ }
+}
+
+/**
+ * Override intel->has_separate_stencil with environment variable
+ * INTEL_SEPARATE_STENCIL.
+ *
+ * Valid values for INTEL_SEPARATE_STENCIL are "0" and "1". If an invalid
+ * value is encountered, a warning is emitted and INTEL_SEPARATE_STENCIL is
+ * ignored.
+ */
+static void
+intel_override_separate_stencil(struct intel_context *intel)
+{
+ const char *s = getenv("INTEL_SEPARATE_STENCIL");
+ if (!s) {
+ return;
+ } else if (!strncmp("0", s, 2)) {
+ intel->has_separate_stencil = false;
+ } else if (!strncmp("1", s, 2)) {
+ intel->has_separate_stencil = true;
+ } else {
+ _mesa_warning(&intel->ctx,
+ "env variable INTEL_SEPARATE_STENCIL=\"%s\" has invalid "
+ "value and is ignored", s);
+ }
+}
GLboolean
intelInitContext(struct intel_context *intel,
@@ -667,9 +714,14 @@ intelInitContext(struct intel_context *intel,
if (IS_GEN7(intel->intelScreen->deviceID)) {
intel->needs_ff_sync = GL_TRUE;
intel->has_luminance_srgb = GL_TRUE;
+ /* FINISHME: Enable intel->has_separate_stencil on Gen7. */
+ /* FINISHME: Enable intel->must_use_separate_stencil on Gen7. */
+ /* FINISHME: Enable intel->has_hiz on Gen7. */
} else if (IS_GEN6(intel->intelScreen->deviceID)) {
intel->needs_ff_sync = GL_TRUE;
intel->has_luminance_srgb = GL_TRUE;
+ /* FINISHME: Enable intel->has_separate_stencil on Gen6. */
+ /* FINISHME: Enable intel->has_hiz on Gen6. */
} else if (IS_GEN5(intel->intelScreen->deviceID)) {
intel->needs_ff_sync = GL_TRUE;
intel->has_luminance_srgb = GL_TRUE;
@@ -689,6 +741,9 @@ intelInitContext(struct intel_context *intel,
}
}
+ intel_override_hiz(intel);
+ intel_override_separate_stencil(intel);
+
memset(&ctx->TextureFormatSupported, 0,
sizeof(ctx->TextureFormatSupported));
ctx->TextureFormatSupported[MESA_FORMAT_ARGB8888] = GL_TRUE;
@@ -703,7 +758,12 @@ intelInitContext(struct intel_context *intel,
ctx->TextureFormatSupported[MESA_FORMAT_AL88] = GL_TRUE;
if (intel->gen >= 4)
ctx->TextureFormatSupported[MESA_FORMAT_AL1616] = GL_TRUE;
- ctx->TextureFormatSupported[MESA_FORMAT_S8_Z24] = GL_TRUE;
+
+ /* Depth and stencil */
+ ctx->TextureFormatSupported[MESA_FORMAT_S8_Z24] = !intel->must_use_separate_stencil;
+ ctx->TextureFormatSupported[MESA_FORMAT_X8_Z24] = intel->has_separate_stencil;
+ ctx->TextureFormatSupported[MESA_FORMAT_S8] = intel->has_separate_stencil;
+
/*
* This was disabled in initial FBO enabling to avoid combinations
* of depth+stencil that wouldn't work together. We since decided
diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
index d3a8a659caa..f599861cba8 100644
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -149,6 +149,10 @@ struct intel_context
void (*debug_batch)(struct intel_context *intel);
bool (*render_target_supported)(gl_format format);
+
+ /** Can HiZ be enabled on a depthbuffer of the given format? */
+ bool (*is_hiz_depth_format)(struct intel_context *intel,
+ gl_format format);
} vtbl;
GLbitfield Fallback; /**< mask of INTEL_FALLBACK_x bits */
@@ -166,6 +170,9 @@ struct intel_context
GLboolean is_945;
GLboolean has_luminance_srgb;
GLboolean has_xrgb_textures;
+ GLboolean has_separate_stencil;
+ GLboolean must_use_separate_stencil;
+ GLboolean has_hiz;
int urb_size;
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index ad2468a3237..7434e0efff6 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -79,6 +79,9 @@ intel_delete_renderbuffer(struct gl_renderbuffer *rb)
if (intel && irb->region) {
intel_region_release(&irb->region);
}
+ if (intel && irb->hiz_region) {
+ intel_region_release(&irb->hiz_region);
+ }
free(irb);
}
@@ -129,7 +132,12 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer
case GL_STENCIL_INDEX8_EXT:
case GL_STENCIL_INDEX16_EXT:
/* These aren't actual texture formats, so force them here. */
- rb->Format = MESA_FORMAT_S8_Z24;
+ if (intel->has_separate_stencil) {
+ rb->Format = MESA_FORMAT_S8;
+ } else {
+ assert(!intel->must_use_separate_stencil);
+ rb->Format = MESA_FORMAT_S8_Z24;
+ }
break;
}
@@ -143,6 +151,9 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer
if (irb->region) {
intel_region_release(&irb->region);
}
+ if (irb->hiz_region) {
+ intel_region_release(&irb->hiz_region);
+ }
/* allocate new memory region/renderbuffer */
@@ -154,19 +165,54 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer
GLenum base_format = _mesa_get_format_base_format(rb->Format);
if (intel->gen >= 4 && (base_format == GL_DEPTH_COMPONENT ||
+ base_format == GL_STENCIL_INDEX ||
base_format == GL_DEPTH_STENCIL))
tiling = I915_TILING_Y;
else
tiling = I915_TILING_X;
}
- irb->region = intel_region_alloc(intel->intelScreen, tiling, cpp,
- width, height, GL_TRUE);
+ if (irb->Base.Format == MESA_FORMAT_S8) {
+ /*
+ * The stencil buffer has quirky pitch requirements. From Vol 2a,
+ * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
+ * The pitch must be set to 2x the value computed based on width, as
+ * the stencil buffer is stored with two rows interleaved.
+ * To accomplish this, we resort to the nasty hack of doubling the drm
+ * region's cpp and halving its height.
+ *
+ * If we neglect to double the pitch, then drm_intel_gem_bo_map_gtt()
+ * maps the memory incorrectly.
+ */
+ irb->region = intel_region_alloc(intel->intelScreen,
+ I915_TILING_Y,
+ cpp * 2,
+ width,
+ height / 2,
+ GL_TRUE);
+ } else {
+ irb->region = intel_region_alloc(intel->intelScreen, tiling, cpp,
+ width, height, GL_TRUE);
+ }
+
if (!irb->region)
return GL_FALSE; /* out of memory? */
ASSERT(irb->region->buffer);
+ if (intel->vtbl.is_hiz_depth_format(intel, rb->Format)) {
+ irb->hiz_region = intel_region_alloc(intel->intelScreen,
+ I915_TILING_Y,
+ irb->region->cpp,
+ irb->region->width,
+ irb->region->height,
+ GL_TRUE);
+ if (!irb->hiz_region) {
+ intel_region_release(&irb->region);
+ return GL_FALSE;
+ }
+ }
+
rb->Width = width;
rb->Height = height;
@@ -374,6 +420,9 @@ static GLboolean
intel_update_wrapper(struct gl_context *ctx, struct intel_renderbuffer *irb,
struct gl_texture_image *texImage)
{
+ struct intel_context *intel = intel_context(ctx);
+ struct intel_texture_image *intel_image = intel_texture_image(texImage);
+
if (!intel_span_supports_format(texImage->TexFormat)) {
DBG("Render to texture BAD FORMAT %s\n",
_mesa_get_format_name(texImage->TexFormat));
@@ -392,6 +441,32 @@ intel_update_wrapper(struct gl_context *ctx, struct intel_renderbuffer *irb,
irb->Base.Delete = intel_delete_renderbuffer;
irb->Base.AllocStorage = intel_nop_alloc_storage;
+ /* Point the renderbuffer's region to the texture's region. */
+ if (irb->region != intel_image->mt->region) {
+ intel_region_release(&irb->region);
+ intel_region_reference(&irb->region, intel_image->mt->region);
+ }
+
+ /* Allocate the texture's hiz region if necessary. */
+ if (intel->vtbl.is_hiz_depth_format(intel, texImage->TexFormat)
+ && !intel_image->mt->hiz_region) {
+ intel_image->mt->hiz_region =
+ intel_region_alloc(intel->intelScreen,
+ I915_TILING_Y,
+ _mesa_get_format_bytes(texImage->TexFormat),
+ texImage->Width,
+ texImage->Height,
+ GL_TRUE);
+ if (!intel_image->mt->hiz_region)
+ return GL_FALSE;
+ }
+
+ /* Point the renderbuffer's hiz region to the texture's hiz region. */
+ if (irb->hiz_region != intel_image->mt->hiz_region) {
+ intel_region_release(&irb->hiz_region);
+ intel_region_reference(&irb->hiz_region, intel_image->mt->hiz_region);
+ }
+
return GL_TRUE;
}
@@ -497,13 +572,6 @@ intel_render_texture(struct gl_context * ctx,
att->Texture->Name, newImage->Width, newImage->Height,
irb->Base.RefCount);
- /* point the renderbufer's region to the texture image region */
- if (irb->region != intel_image->mt->region) {
- if (irb->region)
- intel_region_release(&irb->region);
- intel_region_reference(&irb->region, intel_image->mt->region);
- }
-
intel_set_draw_offset_for_image(intel_image, att->Zoffset);
intel_image->used_as_render_target = GL_TRUE;
@@ -597,21 +665,33 @@ intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)
intel_get_renderbuffer(fb, BUFFER_STENCIL);
int i;
- if (depthRb && stencilRb && stencilRb != depthRb) {
- if (fb->Attachment[BUFFER_DEPTH].Type == GL_TEXTURE &&
- fb->Attachment[BUFFER_STENCIL].Type == GL_TEXTURE &&
- (fb->Attachment[BUFFER_DEPTH].Texture->Name ==
- fb->Attachment[BUFFER_STENCIL].Texture->Name)) {
- /* OK */
- } else {
- /* we only support combined depth/stencil buffers, not separate
- * stencil buffers.
- */
- DBG("Only supports combined depth/stencil (found %s, %s)\n",
- depthRb ? _mesa_get_format_name(depthRb->Base.Format): "NULL",
- stencilRb ? _mesa_get_format_name(stencilRb->Base.Format): "NULL");
- fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT;
- }
+ /*
+ * The depth and stencil renderbuffers are the same renderbuffer or wrap
+ * the same texture.
+ */
+ bool depth_stencil_are_same;
+ if (depthRb && stencilRb && depthRb == stencilRb)
+ depth_stencil_are_same = true;
+ else if (depthRb && stencilRb && depthRb != stencilRb
+ && (fb->Attachment[BUFFER_DEPTH].Type == GL_TEXTURE)
+ && (fb->Attachment[BUFFER_STENCIL].Type == GL_TEXTURE)
+ && (fb->Attachment[BUFFER_DEPTH].Texture->Name
+ == fb->Attachment[BUFFER_STENCIL].Texture->Name))
+ depth_stencil_are_same = true;
+ else
+ depth_stencil_are_same = false;
+
+ bool fb_has_combined_depth_stencil_format =
+ (depthRb && depthRb->Base.Format == MESA_FORMAT_S8_Z24) ||
+ (stencilRb && stencilRb->Base.Format == MESA_FORMAT_S8_Z24);
+
+ bool fb_has_hiz = intel_framebuffer_has_hiz(fb);
+
+ if ((intel->must_use_separate_stencil || fb_has_hiz)
+ && (depth_stencil_are_same || fb_has_combined_depth_stencil_format)) {
+ fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT;
+ } else if (!intel->has_separate_stencil && depthRb && stencilRb && !depth_stencil_are_same) {
+ fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT;
}
for (i = 0; i < Elements(fb->Attachment); i++) {
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h
index 028f657d12d..212dd9aadc8 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.h
+++ b/src/mesa/drivers/dri/intel/intel_fbo.h
@@ -28,6 +28,7 @@
#ifndef INTEL_FBO_H
#define INTEL_FBO_H
+#include <stdbool.h>
#include "main/formats.h"
#include "intel_screen.h"
@@ -40,6 +41,9 @@ struct intel_renderbuffer
{
struct gl_renderbuffer Base;
struct intel_region *region;
+
+ /** Only used by depth renderbuffers for which HiZ is enabled. */
+ struct intel_region *hiz_region;
};
@@ -80,6 +84,29 @@ intel_get_renderbuffer(struct gl_framebuffer *fb, int attIndex)
return NULL;
}
+/**
+ * If the framebuffer has a depth buffer attached, then return its HiZ region.
+ * The HiZ region may be null.
+ */
+static INLINE struct intel_region*
+intel_framebuffer_get_hiz_region(struct gl_framebuffer *fb)
+{
+ struct intel_renderbuffer *rb = NULL;
+ if (fb)
+ rb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
+
+ if (rb)
+ return rb->hiz_region;
+ else
+ return NULL;
+}
+
+static INLINE bool
+intel_framebuffer_has_hiz(struct gl_framebuffer *fb)
+{
+ return intel_framebuffer_get_hiz_region(fb) != NULL;
+}
+
extern void
intel_renderbuffer_set_region(struct intel_context *intel,
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index a3409274fb7..e62905de7c3 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -200,6 +200,7 @@ intel_miptree_release(struct intel_context *intel,
DBG("%s deleting %p\n", __FUNCTION__, *mt);
intel_region_release(&((*mt)->region));
+ intel_region_release(&((*mt)->hiz_region));
for (i = 0; i < MAX_TEXTURE_LEVELS; i++) {
free((*mt)->level[i].x_offset);
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
index 760a8bce601..325e3916981 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
@@ -113,6 +113,20 @@ struct intel_mipmap_tree
*/
struct intel_region *region;
+ /**
+ * This points to an auxillary hiz region if all of the following hold:
+ * 1. The texture has been attached to an FBO as a depthbuffer.
+ * 2. The texture format is hiz compatible.
+ * 3. The intel context supports hiz.
+ *
+ * When a texture is attached to multiple FBO's, a separate renderbuffer
+ * wrapper is created for each attachment. This necessitates storing the
+ * hiz region in the texture itself instead of the renderbuffer wrapper.
+ *
+ * \see intel_fbo.c:intel_wrap_texture()
+ */
+ struct intel_region *hiz_region;
+
/* These are also refcounted:
*/
GLuint refcount;
diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c
index 16bce20317e..5290342c3e1 100644
--- a/src/mesa/drivers/dri/intel/intel_span.c
+++ b/src/mesa/drivers/dri/intel/intel_span.c
@@ -70,9 +70,6 @@ intel_set_span_functions(struct intel_context *intel,
#define HW_UNLOCK()
-/* Convenience macros to avoid typing the address argument over and over */
-#define NO_TILE(_X, _Y) (((_Y) * irb->region->pitch + (_X)) * irb->region->cpp)
-
/* r5g6b5 color span and pixel functions */
#define SPANTMP_PIXEL_FMT GL_RGB
#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
diff --git a/src/mesa/drivers/dri/intel/intel_tex_format.c b/src/mesa/drivers/dri/intel/intel_tex_format.c
index befa615d1e6..6890a690ab1 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_format.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_format.c
@@ -22,6 +22,7 @@ intel_mesa_format_to_rb_datatype(gl_format format)
case MESA_FORMAT_RGB565:
case MESA_FORMAT_ARGB1555:
case MESA_FORMAT_ARGB4444:
+ case MESA_FORMAT_S8:
return GL_UNSIGNED_BYTE;
case MESA_FORMAT_R16:
case MESA_FORMAT_RG1616:
diff --git a/src/mesa/drivers/dri/mach64/Makefile b/src/mesa/drivers/dri/mach64/Makefile
index c20fdece297..0474c1a165f 100644
--- a/src/mesa/drivers/dri/mach64/Makefile
+++ b/src/mesa/drivers/dri/mach64/Makefile
@@ -5,6 +5,8 @@ include $(TOP)/configs/current
LIBNAME = mach64_dri.so
+include ../Makefile.defines
+
DRIVER_SOURCES = \
mach64_context.c \
mach64_ioctl.c \
@@ -25,5 +27,5 @@ C_SOURCES = \
ASM_SOURCES =
-include ../Makefile.template
+include ../Makefile.targets
diff --git a/src/mesa/drivers/dri/mga/Makefile b/src/mesa/drivers/dri/mga/Makefile
index 92533bccc29..9948ee767ac 100644
--- a/src/mesa/drivers/dri/mga/Makefile
+++ b/src/mesa/drivers/dri/mga/Makefile
@@ -5,6 +5,8 @@ include $(TOP)/configs/current
LIBNAME = mga_dri.so
+include ../Makefile.defines
+
DRIVER_SOURCES = \
mgadd.c \
mgaioctl.c \
@@ -27,5 +29,5 @@ C_SOURCES = \
ASM_SOURCES =
-include ../Makefile.template
+include ../Makefile.targets
diff --git a/src/mesa/drivers/dri/nouveau/Makefile b/src/mesa/drivers/dri/nouveau/Makefile
index 3b506a91ffa..e485a98761f 100644
--- a/src/mesa/drivers/dri/nouveau/Makefile
+++ b/src/mesa/drivers/dri/nouveau/Makefile
@@ -3,11 +3,10 @@
TOP = ../../../../..
include $(TOP)/configs/current
-CFLAGS += $(shell pkg-config libdrm libdrm_nouveau --cflags)
-DRI_LIB_DEPS += $(shell pkg-config libdrm_nouveau --libs)
-
LIBNAME = nouveau_vieux_dri.so
+include ../Makefile.defines
+
DRIVER_SOURCES = \
nouveau_screen.c \
nouveau_context.c \
@@ -51,7 +50,9 @@ C_SOURCES = \
ASM_SOURCES =
+INCLUDES += $(NOUVEAU_CFLAGS)
+DRI_LIB_DEPS += $(NOUVEAU_LIBS)
-include ../Makefile.template
+include ../Makefile.targets
symlinks:
diff --git a/src/mesa/drivers/dri/r128/Makefile b/src/mesa/drivers/dri/r128/Makefile
index 8144c9b43ff..8b23ccc8cbe 100644
--- a/src/mesa/drivers/dri/r128/Makefile
+++ b/src/mesa/drivers/dri/r128/Makefile
@@ -5,6 +5,8 @@ include $(TOP)/configs/current
LIBNAME = r128_dri.so
+include ../Makefile.defines
+
DRIVER_SOURCES = \
r128_context.c \
r128_lock.c \
@@ -25,5 +27,5 @@ C_SOURCES = \
ASM_SOURCES =
-include ../Makefile.template
+include ../Makefile.targets
diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile
index 8013768e9fb..4547f7e2ee0 100644
--- a/src/mesa/drivers/dri/r200/Makefile
+++ b/src/mesa/drivers/dri/r200/Makefile
@@ -3,10 +3,10 @@
TOP = ../../../../..
include $(TOP)/configs/current
-CFLAGS += $(RADEON_CFLAGS)
-
LIBNAME = r200_dri.so
+include ../Makefile.defines
+
ifeq ($(RADEON_LDFLAGS),)
CS_SOURCES = radeon_cs_space_drm.c radeon_bo.c radeon_cs.c
endif
@@ -55,12 +55,13 @@ X86_SOURCES =
DRIVER_DEFINES = -DRADEON_R200
+INCLUDES += $(RADEON_CFLAGS)
DRI_LIB_DEPS += $(RADEON_LDFLAGS)
##### TARGETS #####
-include ../Makefile.template
+include ../Makefile.targets
#INCLUDES += -I../radeon/server
diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile
index 2245998c952..9f23a8496aa 100644
--- a/src/mesa/drivers/dri/r300/Makefile
+++ b/src/mesa/drivers/dri/r300/Makefile
@@ -3,17 +3,17 @@
TOP = ../../../../..
include $(TOP)/configs/current
-CFLAGS += $(RADEON_CFLAGS)
-
LIBNAME = r300_dri.so
+include ../Makefile.defines
+
ifeq ($(RADEON_LDFLAGS),)
CS_SOURCES = radeon_cs_space_drm.c radeon_bo.c radeon_cs.c
endif
COMMON_SOURCES = \
../../common/driverfuncs.c \
- ../common/mm.c \
+ ../common/drirenderbuffer.c \
../common/utils.c \
../common/texmem.c \
../common/vblank.c \
@@ -64,6 +64,7 @@ C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES)
DRIVER_DEFINES = -DRADEON_R300
# -DRADEON_BO_TRACK \
+INCLUDES += $(RADEON_CFLAGS)
DRI_LIB_DEPS += $(RADEON_LDFLAGS)
SUBDIRS = compiler
@@ -73,5 +74,5 @@ EXTRA_MODULES = compiler/libr300compiler.a
##### TARGETS #####
-include ../Makefile.template
+include ../Makefile.targets
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile
index 4bedfacd632..5aa13329ac2 100644
--- a/src/mesa/drivers/dri/r300/compiler/Makefile
+++ b/src/mesa/drivers/dri/r300/compiler/Makefile
@@ -38,7 +38,9 @@ C_SOURCES = \
r3xx_vertprog.c \
r3xx_vertprog_dump.c \
\
- memory_pool.c
+ memory_pool.c \
+ $(TOP)/src/glsl/ralloc.c \
+ $(TOP)/src/mesa/program/register_allocate.c
### Basic defines ###
@@ -52,6 +54,7 @@ INCLUDES = \
-I$(TOP)/include \
-I$(TOP)/src/mesa \
-I$(TOP)/src/glsl \
+ -I$(TOP)/src/mapi
##### TARGETS #####
diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript
index 9931537492e..2c748b6e214 100755
--- a/src/mesa/drivers/dri/r300/compiler/SConscript
+++ b/src/mesa/drivers/dri/r300/compiler/SConscript
@@ -4,6 +4,7 @@ env = env.Clone()
env.Append(CPPPATH = '#/include')
env.Append(CPPPATH = '#/src/mesa')
env.Append(CPPPATH = '#/src/glsl')
+env.Append(CPPPATH = '#/src/mapi')
# temporary fix
env['CFLAGS'] = str(env['CFLAGS']).replace('-Werror=declaration-after-statement', '')
@@ -43,6 +44,8 @@ r300compiler = env.ConvenienceLibrary(
'r3xx_vertprog.c',
'r3xx_vertprog_dump.c',
'memory_pool.c',
+ '#/src/glsl/ralloc.c',
+ '#/src/mesa/program/register_allocate.c'
])
Return('r300compiler')
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index da6c8b602e1..93a0dd168f4 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -1619,7 +1619,6 @@ static void r500SetupRSUnit(struct gl_context * ctx)
WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead);
}
-#define MIN3(a, b, c) ((a) < (b) ? MIN2(a, c) : MIN2(b, c))
void r300VapCntl(r300ContextPtr rmesa, GLuint input_count,
GLuint output_count, GLuint temp_count)
diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile
index 200bc20f647..bec0b5a53fe 100644
--- a/src/mesa/drivers/dri/r600/Makefile
+++ b/src/mesa/drivers/dri/r600/Makefile
@@ -3,17 +3,17 @@
TOP = ../../../../..
include $(TOP)/configs/current
-CFLAGS += $(RADEON_CFLAGS)
-
LIBNAME = r600_dri.so
+include ../Makefile.defines
+
ifeq ($(RADEON_LDFLAGS),)
CS_SOURCES = radeon_cs_space_drm.c radeon_bo.c radeon_cs.c
endif
COMMON_SOURCES = \
../../common/driverfuncs.c \
- ../common/mm.c \
+ ../common/drirenderbuffer.c \
../common/utils.c \
../common/texmem.c \
../common/vblank.c \
@@ -78,9 +78,10 @@ C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES)
DRIVER_DEFINES = -DRADEON_R600
# -DRADEON_BO_TRACK \
+INCLUDES += $(RADEON_CFLAGS)
DRI_LIB_DEPS += $(RADEON_LDFLAGS)
##### TARGETS #####
-include ../Makefile.template
+include ../Makefile.targets
diff --git a/src/mesa/drivers/dri/r600/evergreen_blit.c b/src/mesa/drivers/dri/r600/evergreen_blit.c
index 0e4da5499ba..95ac4aee5ce 100644
--- a/src/mesa/drivers/dri/r600/evergreen_blit.c
+++ b/src/mesa/drivers/dri/r600/evergreen_blit.c
@@ -1525,6 +1525,48 @@ eg_set_default_state(context_t *context)
num_hs_stack_entries = 42;
num_ls_stack_entries = 42;
break;
+ case CHIP_FAMILY_SUMO:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ num_ps_threads = 96;
+ num_vs_threads = 25;
+ num_gs_threads = 25;
+ num_es_threads = 25;
+ num_hs_threads = 25;
+ num_ls_threads = 25;
+ num_ps_stack_entries = 42;
+ num_vs_stack_entries = 42;
+ num_gs_stack_entries = 42;
+ num_es_stack_entries = 42;
+ num_hs_stack_entries = 42;
+ num_ls_stack_entries = 42;
+ break;
+ case CHIP_FAMILY_SUMO2:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ num_ps_threads = 96;
+ num_vs_threads = 25;
+ num_gs_threads = 25;
+ num_es_threads = 25;
+ num_hs_threads = 25;
+ num_ls_threads = 25;
+ num_ps_stack_entries = 85;
+ num_vs_stack_entries = 85;
+ num_gs_stack_entries = 85;
+ num_es_stack_entries = 85;
+ num_hs_stack_entries = 85;
+ num_ls_stack_entries = 85;
+ break;
case CHIP_FAMILY_BARTS:
num_ps_gprs = 93;
num_vs_gprs = 46;
@@ -1592,6 +1634,8 @@ eg_set_default_state(context_t *context)
if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_CEDAR) ||
(context->radeon.radeonScreen->chip_family == CHIP_FAMILY_PALM) ||
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_SUMO) ||
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_SUMO2) ||
(context->radeon.radeonScreen->chip_family == CHIP_FAMILY_CAICOS))
CLEARbit(sq_config, EG_SQ_CONFIG__VC_ENABLE_bit);
else
diff --git a/src/mesa/drivers/dri/r600/evergreen_chip.c b/src/mesa/drivers/dri/r600/evergreen_chip.c
index 42566e537a5..388a96ff067 100644
--- a/src/mesa/drivers/dri/r600/evergreen_chip.c
+++ b/src/mesa/drivers/dri/r600/evergreen_chip.c
@@ -287,7 +287,9 @@ static void evergreenSetupVTXConstants(struct gl_context * ctx,
return;
if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_CEDAR) ||
- (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_PALM))
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_PALM) ||
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_SUMO) ||
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_SUMO2))
r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, TC_ACTION_ENA_bit);
else
r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit);
diff --git a/src/mesa/drivers/dri/r600/evergreen_state.c b/src/mesa/drivers/dri/r600/evergreen_state.c
index 309c93fe088..81bf1172dc3 100644
--- a/src/mesa/drivers/dri/r600/evergreen_state.c
+++ b/src/mesa/drivers/dri/r600/evergreen_state.c
@@ -1470,6 +1470,22 @@ static void evergreenInitSQConfig(struct gl_context * ctx)
uMaxThreads = 192;
uMaxStackEntries = 256;
break;
+ case CHIP_FAMILY_SUMO:
+ uSqNumCfInsts = 2;
+ bVC_ENABLE = GL_FALSE;
+ uMaxGPRs = 256;
+ uPSThreadCount = 96;
+ uMaxThreads = 248;
+ uMaxStackEntries = 256;
+ break;
+ case CHIP_FAMILY_SUMO2:
+ uSqNumCfInsts = 2;
+ bVC_ENABLE = GL_FALSE;
+ uMaxGPRs = 256;
+ uPSThreadCount = 96;
+ uMaxThreads = 248;
+ uMaxStackEntries = 512;
+ break;
case CHIP_FAMILY_BARTS:
uSqNumCfInsts = 2;
bVC_ENABLE = GL_TRUE;
diff --git a/src/mesa/drivers/dri/radeon/Makefile b/src/mesa/drivers/dri/radeon/Makefile
index 93219e40afd..6b5d3335452 100644
--- a/src/mesa/drivers/dri/radeon/Makefile
+++ b/src/mesa/drivers/dri/radeon/Makefile
@@ -4,10 +4,10 @@
TOP = ../../../../..
include $(TOP)/configs/current
-CFLAGS += $(RADEON_CFLAGS)
-
LIBNAME = radeon_dri.so
+include ../Makefile.defines
+
ifeq ($(RADEON_LDFLAGS),)
CS_SOURCES = radeon_cs_space_drm.c radeon_bo.c radeon_cs.c
endif
@@ -53,9 +53,10 @@ C_SOURCES = \
DRIVER_DEFINES = -DRADEON_R100
+INCLUDES += $(RADEON_CFLAGS)
DRI_LIB_DEPS += $(RADEON_LDFLAGS)
X86_SOURCES =
-include ../Makefile.template
+include ../Makefile.targets
diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h
index 9145023826e..bd236625122 100644
--- a/src/mesa/drivers/dri/radeon/radeon_chipset.h
+++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h
@@ -450,6 +450,18 @@
#define PCI_CHIP_PALM_9806 0x9806
#define PCI_CHIP_PALM_9807 0x9807
+#define PCI_CHIP_SUMO_9640 0x9640
+#define PCI_CHIP_SUMO_9641 0x9641
+#define PCI_CHIP_SUMO2_9642 0x9642
+#define PCI_CHIP_SUMO2_9643 0x9643
+#define PCI_CHIP_SUMO2_9644 0x9644
+#define PCI_CHIP_SUMO2_9645 0x9645
+#define PCI_CHIP_SUMO_9647 0x9647
+#define PCI_CHIP_SUMO_9648 0x9648
+#define PCI_CHIP_SUMO_964A 0x964a
+#define PCI_CHIP_SUMO_964E 0x964e
+#define PCI_CHIP_SUMO_964F 0x964f
+
#define PCI_CHIP_BARTS_6720 0x6720
#define PCI_CHIP_BARTS_6721 0x6721
#define PCI_CHIP_BARTS_6722 0x6722
@@ -534,6 +546,8 @@ enum {
CHIP_FAMILY_CYPRESS,
CHIP_FAMILY_HEMLOCK,
CHIP_FAMILY_PALM,
+ CHIP_FAMILY_SUMO,
+ CHIP_FAMILY_SUMO2,
CHIP_FAMILY_BARTS,
CHIP_FAMILY_TURKS,
CHIP_FAMILY_CAICOS,
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c
index 5d7b3973d57..bf8925f61d0 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c
@@ -98,6 +98,8 @@ static const char* get_chip_family_name(int chip_family)
case CHIP_FAMILY_CYPRESS: return "CYPRESS";
case CHIP_FAMILY_HEMLOCK: return "HEMLOCK";
case CHIP_FAMILY_PALM: return "PALM";
+ case CHIP_FAMILY_SUMO: return "SUMO";
+ case CHIP_FAMILY_SUMO2: return "SUMO2";
case CHIP_FAMILY_BARTS: return "BARTS";
case CHIP_FAMILY_TURKS: return "TURKS";
case CHIP_FAMILY_CAICOS: return "CAICOS";
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
index 6449229e088..6cf843406f9 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
@@ -1168,6 +1168,25 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id)
screen->chip_flags = RADEON_CHIPSET_TCL;
break;
+ case PCI_CHIP_SUMO_9640:
+ case PCI_CHIP_SUMO_9641:
+ case PCI_CHIP_SUMO_9647:
+ case PCI_CHIP_SUMO_9648:
+ case PCI_CHIP_SUMO_964A:
+ case PCI_CHIP_SUMO_964E:
+ case PCI_CHIP_SUMO_964F:
+ screen->chip_family = CHIP_FAMILY_SUMO;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
+
+ case PCI_CHIP_SUMO2_9642:
+ case PCI_CHIP_SUMO2_9643:
+ case PCI_CHIP_SUMO2_9644:
+ case PCI_CHIP_SUMO2_9645:
+ screen->chip_family = CHIP_FAMILY_SUMO2;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
+
case PCI_CHIP_BARTS_6720:
case PCI_CHIP_BARTS_6721:
case PCI_CHIP_BARTS_6722:
diff --git a/src/mesa/drivers/dri/savage/Makefile b/src/mesa/drivers/dri/savage/Makefile
index 53511552c6d..03be3468da9 100644
--- a/src/mesa/drivers/dri/savage/Makefile
+++ b/src/mesa/drivers/dri/savage/Makefile
@@ -5,6 +5,8 @@ include $(TOP)/configs/current
LIBNAME = savage_dri.so
+include ../Makefile.defines
+
DRIVER_SOURCES = \
savage_xmesa.c \
savagedd.c \
@@ -22,5 +24,5 @@ C_SOURCES = \
ASM_SOURCES =
-include ../Makefile.template
+include ../Makefile.targets
diff --git a/src/mesa/drivers/dri/sis/Makefile b/src/mesa/drivers/dri/sis/Makefile
index 6b4f938bab3..0e0bf0d4eac 100644
--- a/src/mesa/drivers/dri/sis/Makefile
+++ b/src/mesa/drivers/dri/sis/Makefile
@@ -5,6 +5,8 @@ include $(TOP)/configs/current
LIBNAME = sis_dri.so
+include ../Makefile.defines
+
DRIVER_SOURCES = \
sis6326_state.c \
sis6326_clear.c \
@@ -28,5 +30,5 @@ C_SOURCES = \
ASM_SOURCES =
-include ../Makefile.template
+include ../Makefile.targets
diff --git a/src/mesa/drivers/dri/swrast/Makefile b/src/mesa/drivers/dri/swrast/Makefile
index d2cf6dbc55b..4cb99fd0eb7 100644
--- a/src/mesa/drivers/dri/swrast/Makefile
+++ b/src/mesa/drivers/dri/swrast/Makefile
@@ -5,6 +5,8 @@ include $(TOP)/configs/current
LIBNAME = swrast_dri.so
+include ../Makefile.defines
+
DRIVER_DEFINES = -D__NOT_HAVE_DRM_H
DRIVER_SOURCES = \
@@ -22,5 +24,5 @@ SWRAST_COMMON_SOURCES = \
../common/utils.c \
../common/drisw_util.c
-include ../Makefile.template
+include ../Makefile.targets
diff --git a/src/mesa/drivers/dri/tdfx/Makefile b/src/mesa/drivers/dri/tdfx/Makefile
index 96bd8f8202f..ed84df20925 100644
--- a/src/mesa/drivers/dri/tdfx/Makefile
+++ b/src/mesa/drivers/dri/tdfx/Makefile
@@ -5,6 +5,8 @@ include $(TOP)/configs/current
LIBNAME = tdfx_dri.so
+include ../Makefile.defines
+
DRIVER_SOURCES = \
tdfx_context.c \
tdfx_dd.c \
@@ -27,6 +29,6 @@ C_SOURCES = \
ASM_SOURCES =
-include ../Makefile.template
+include ../Makefile.targets
diff --git a/src/mesa/drivers/dri/unichrome/Makefile b/src/mesa/drivers/dri/unichrome/Makefile
index 14cf9f30386..373da6016e4 100644
--- a/src/mesa/drivers/dri/unichrome/Makefile
+++ b/src/mesa/drivers/dri/unichrome/Makefile
@@ -5,6 +5,8 @@ include $(TOP)/configs/current
LIBNAME = unichrome_dri.so
+include ../Makefile.defines
+
DRIVER_SOURCES = \
via_context.c \
via_fb.c \
@@ -25,5 +27,5 @@ C_SOURCES = \
ASM_SOURCES =
-include ../Makefile.template
+include ../Makefile.targets