summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/Android.gen.mk131
-rw-r--r--src/mesa/Android.mk115
-rw-r--r--src/mesa/Makefile25
-rw-r--r--src/mesa/SConscript26
-rw-r--r--src/mesa/drivers/common/driverfuncs.c15
-rw-r--r--src/mesa/drivers/common/meta.c520
-rw-r--r--src/mesa/drivers/common/meta.h46
-rw-r--r--src/mesa/drivers/dri/common/xmlconfig.c2
-rw-r--r--src/mesa/drivers/dri/common/xmlpool.h2
-rw-r--r--src/mesa/drivers/dri/common/xmlpool/options.h60
-rw-r--r--src/mesa/drivers/dri/common/xmlpool/t_options.h30
-rw-r--r--src/mesa/drivers/dri/i915/i830_vtbl.c7
-rw-r--r--src/mesa/drivers/dri/i915/i915_fragprog.c21
-rw-r--r--src/mesa/drivers/dri/i915/i915_program.c14
-rw-r--r--src/mesa/drivers/dri/i965/Makefile6
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h72
-rw-r--r--src/mesa/drivers/dri/i965/brw_curbe.c27
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h141
-rw-r--r--src/mesa/drivers/dri/i965/brw_disasm.c49
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw_upload.c12
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h30
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c55
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp379
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h109
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_emit.cpp84
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp239
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp55
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp16
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp151
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c28
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.cpp59
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.h6
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_dump.c20
-rw-r--r--src/mesa/drivers/dri/i965/brw_tex_layout.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp161
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h489
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_emit.cpp854
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp234
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp2156
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c66
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.h5
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_constval.c12
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c63
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_state.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_vtbl.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c36
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c15
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_fp.c14
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass0.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_sampler_state.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_vs_state.c57
-rw-r--r--src/mesa/drivers/dri/i965/gen6_wm_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen7_sampler_state.c7
-rw-r--r--src/mesa/drivers/dri/i965/gen7_vs_state.c10
-rw-r--r--src/mesa/drivers/dri/i965/gen7_wm_state.c10
-rw-r--r--src/mesa/drivers/dri/intel/intel_batchbuffer.c34
-rw-r--r--src/mesa/drivers/dri/intel/intel_blit.c4
-rw-r--r--src/mesa/drivers/dri/intel/intel_buffer_objects.c104
-rw-r--r--src/mesa/drivers/dri/intel/intel_clear.c15
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.c16
-rw-r--r--src/mesa/drivers/dri/intel/intel_fbo.c45
-rw-r--r--src/mesa/drivers/dri/intel/intel_fbo.h1
-rw-r--r--src/mesa/drivers/dri/intel/intel_mipmap_tree.c2
-rw-r--r--src/mesa/drivers/dri/intel/intel_pixel_bitmap.c9
-rw-r--r--src/mesa/drivers/dri/intel/intel_reg.h1
-rw-r--r--src/mesa/drivers/dri/intel/intel_screen.h9
-rw-r--r--src/mesa/drivers/dri/intel/intel_span.c88
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex.c5
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_copy.c101
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_image.c35
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_obj.h5
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_subimage.c6
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_validate.c12
-rw-r--r--src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c24
-rw-r--r--src/mesa/drivers/dri/r200/r200_ioctl.c3
-rw-r--r--src/mesa/drivers/dri/r200/r200_tex.c1
-rw-r--r--src/mesa/drivers/dri/r200/r200_texstate.c6
-rw-r--r--src/mesa/drivers/dri/r200/r200_vertprog.c8
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_optimize.c35
-rw-r--r--src/mesa/drivers/dri/r300/r300_draw.c22
-rw-r--r--src/mesa/drivers/dri/r300/r300_tex.c1
-rw-r--r--src/mesa/drivers/dri/r300/r300_texstate.c5
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_fragprog.c8
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_render.c20
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_tex.c8
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_vertprog.c16
-rw-r--r--src/mesa/drivers/dri/r600/r600_cmdbuf.c2
-rw-r--r--src/mesa/drivers/dri/r600/r600_tex.c1
-rw-r--r--src/mesa/drivers/dri/r600/r600_texstate.c5
-rw-r--r--src/mesa/drivers/dri/r600/r700_fragprog.c8
-rw-r--r--src/mesa/drivers/dri/r600/r700_render.c20
-rw-r--r--src/mesa/drivers/dri/r600/r700_vertprog.c16
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h6
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_buffer_objects.c27
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common.c8
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common_context.c2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_cs_legacy.c2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_ioctl.c3
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_lock.c10
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_tex.c1
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_tex_copy.c55
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_texstate.c9
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_texture.c24
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_texture.h5
-rw-r--r--src/mesa/drivers/x11/xm_dd.c22
-rw-r--r--src/mesa/main/.gitignore4
-rw-r--r--src/mesa/main/api_arrayelt.c12
-rw-r--r--src/mesa/main/api_validate.c6
-rw-r--r--src/mesa/main/bufferobj.c84
-rw-r--r--src/mesa/main/compiler.h69
-rw-r--r--src/mesa/main/dd.h84
-rw-r--r--src/mesa/main/debug.c11
-rw-r--r--src/mesa/main/dlist.c7
-rw-r--r--src/mesa/main/drawtex.c4
-rw-r--r--src/mesa/main/enable.c11
-rw-r--r--src/mesa/main/es_generator.py4
-rw-r--r--src/mesa/main/extensions.c1
-rw-r--r--src/mesa/main/fbobject.c105
-rw-r--r--src/mesa/main/ff_fragment_shader.cpp6
-rw-r--r--src/mesa/main/ffvertex_prog.c10
-rw-r--r--src/mesa/main/framebuffer.c1
-rw-r--r--src/mesa/main/get.c4
-rw-r--r--src/mesa/main/imports.c3
-rw-r--r--src/mesa/main/imports.h8
-rw-r--r--src/mesa/main/mtypes.h11
-rw-r--r--src/mesa/main/nvprogram.c20
-rw-r--r--src/mesa/main/pbo.c33
-rw-r--r--src/mesa/main/querymatrix.c2
-rw-r--r--src/mesa/main/shaderapi.c2
-rw-r--r--src/mesa/main/shaderobj.c11
-rw-r--r--src/mesa/main/shared.c2
-rw-r--r--src/mesa/main/texcompress.c205
-rw-r--r--src/mesa/main/texcompress.h5
-rw-r--r--src/mesa/main/texcompress_rgtc_tmp.h2
-rw-r--r--src/mesa/main/texgetimage.c14
-rw-r--r--src/mesa/main/teximage.c60
-rw-r--r--src/mesa/main/texobj.c6
-rw-r--r--src/mesa/main/texparam.c326
-rw-r--r--src/mesa/main/texstore.c9
-rw-r--r--src/mesa/main/uniforms.c46
-rw-r--r--src/mesa/program/ir_to_mesa.cpp317
-rw-r--r--src/mesa/program/nvfragparse.c23
-rw-r--r--src/mesa/program/prog_execute.c12
-rw-r--r--src/mesa/program/prog_opt_constant_fold.c451
-rw-r--r--src/mesa/program/prog_optimize.c19
-rw-r--r--src/mesa/program/prog_optimize.h3
-rw-r--r--src/mesa/program/prog_parameter.c78
-rw-r--r--src/mesa/program/prog_parameter.h31
-rw-r--r--src/mesa/program/prog_parameter_layout.c2
-rw-r--r--src/mesa/program/prog_print.c2
-rw-r--r--src/mesa/program/prog_statevars.c2
-rw-r--r--src/mesa/program/program.c8
-rw-r--r--src/mesa/program/program_parse.y56
-rw-r--r--src/mesa/program/program_parser.h3
-rw-r--r--src/mesa/program/register_allocate.c21
-rw-r--r--src/mesa/program/register_allocate.h2
-rw-r--r--src/mesa/program/sampler.cpp2
-rw-r--r--src/mesa/sources.mak4
-rw-r--r--src/mesa/state_tracker/st_atom_pixeltransfer.c22
-rw-r--r--src/mesa/state_tracker/st_atom_texture.c6
-rw-r--r--src/mesa/state_tracker/st_cb_bitmap.c43
-rw-r--r--src/mesa/state_tracker/st_cb_blit.c119
-rw-r--r--src/mesa/state_tracker/st_cb_bufferobjects.c51
-rw-r--r--src/mesa/state_tracker/st_cb_drawpixels.c44
-rw-r--r--src/mesa/state_tracker/st_cb_program.c14
-rw-r--r--src/mesa/state_tracker/st_cb_texture.c113
-rw-r--r--src/mesa/state_tracker/st_extensions.c22
-rw-r--r--src/mesa/state_tracker/st_gen_mipmap.c1
-rw-r--r--src/mesa/state_tracker/st_glsl_to_tgsi.cpp5142
-rw-r--r--src/mesa/state_tracker/st_glsl_to_tgsi.h72
-rw-r--r--src/mesa/state_tracker/st_manager.c2
-rw-r--r--src/mesa/state_tracker/st_mesa_to_tgsi.c6
-rw-r--r--src/mesa/state_tracker/st_mesa_to_tgsi.h6
-rw-r--r--src/mesa/state_tracker/st_program.c124
-rw-r--r--src/mesa/state_tracker/st_program.h12
-rw-r--r--src/mesa/state_tracker/st_texture.c24
-rw-r--r--src/mesa/state_tracker/st_texture.h9
-rw-r--r--src/mesa/swrast/s_aatritemp.h72
-rw-r--r--src/mesa/swrast/s_context.c105
-rw-r--r--src/mesa/swrast/s_span.c2
-rw-r--r--src/mesa/swrast/s_stencil.c3
-rw-r--r--src/mesa/swrast/s_texcombine.c4
-rw-r--r--src/mesa/tnl/t_draw.c39
-rw-r--r--src/mesa/tnl/t_pipeline.c12
-rw-r--r--src/mesa/vbo/vbo_exec_api.c38
-rw-r--r--src/mesa/vbo/vbo_exec_array.c64
-rw-r--r--src/mesa/vbo/vbo_exec_draw.c29
-rw-r--r--src/mesa/vbo/vbo_rebase.c10
-rw-r--r--src/mesa/vbo/vbo_save_api.c11
-rw-r--r--src/mesa/vbo/vbo_save_draw.c14
-rw-r--r--src/mesa/vbo/vbo_split_copy.c10
-rw-r--r--src/mesa/x86-64/xform4.S2
194 files changed, 13461 insertions, 2658 deletions
diff --git a/src/mesa/Android.gen.mk b/src/mesa/Android.gen.mk
new file mode 100644
index 00000000000..2a08184aee6
--- /dev/null
+++ b/src/mesa/Android.gen.mk
@@ -0,0 +1,131 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <[email protected]>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# included by core mesa Android.mk for source generation
+
+ifeq ($(LOCAL_MODULE_CLASS),)
+LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+endif
+
+intermediates := $(call local-intermediates-dir)
+
+sources := \
+ main/api_exec_es1.c \
+ main/api_exec_es1_dispatch.h \
+ main/api_exec_es1_remap_helper.h \
+ main/api_exec_es2.c \
+ main/api_exec_es2_dispatch.h \
+ main/api_exec_es2_remap_helper.h \
+ program/lex.yy.c \
+ program/program_parse.tab.c
+
+LOCAL_SRC_FILES := $(filter-out $(sources), $(LOCAL_SRC_FILES))
+
+LOCAL_C_INCLUDES += $(intermediates)/main
+
+ifeq ($(strip $(MESA_ENABLE_ASM)),true)
+ifeq ($(TARGET_ARCH),x86)
+sources += x86/matypes.h
+LOCAL_C_INCLUDES += $(intermediates)/x86
+endif
+endif
+
+sources += main/git_sha1.h
+
+sources := $(addprefix $(intermediates)/, $(sources))
+LOCAL_GENERATED_SOURCES += $(sources)
+
+glapi := $(MESA_TOP)/src/mapi/glapi/gen
+
+es_src_deps := \
+ $(LOCAL_PATH)/main/APIspec.xml \
+ $(LOCAL_PATH)/main/es_generator.py \
+ $(LOCAL_PATH)/main/APIspecutil.py \
+ $(LOCAL_PATH)/main/APIspec.py
+
+es_hdr_deps := \
+ $(wildcard $(glapi)/*.py) \
+ $(wildcard $(glapi)/*.xml)
+
+define es-gen
+ @mkdir -p $(dir $@)
+ @echo "Gen ES: $(PRIVATE_MODULE) <= $(notdir $(@))"
+ $(hide) $(PRIVATE_SCRIPT) $(1) $(PRIVATE_XML) > $@
+endef
+
+define local-l-to-c
+ @mkdir -p $(dir $@)
+ @echo "Mesa Lex: $(PRIVATE_MODULE) <= $<"
+ $(hide) $(LEX) -o$@ $<
+endef
+
+define local-y-to-c-and-h
+ @mkdir -p $(dir $@)
+ @echo "Mesa Yacc: $(PRIVATE_MODULE) <= $<"
+ $(hide) $(YACC) -o $@ $<
+endef
+
+$(intermediates)/main/api_exec_%.c: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/main/es_generator.py
+$(intermediates)/main/api_exec_%.c: PRIVATE_XML := -S $(LOCAL_PATH)/main/APIspec.xml
+$(intermediates)/main/api_exec_%_dispatch.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(glapi)/gl_table.py
+$(intermediates)/main/api_exec_%_dispatch.h: PRIVATE_XML := -f $(glapi)/gl_and_es_API.xml
+$(intermediates)/main/api_exec_%_remap_helper.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(glapi)/remap_helper.py
+$(intermediates)/main/api_exec_%_remap_helper.h: PRIVATE_XML := -f $(glapi)/gl_and_es_API.xml
+
+$(intermediates)/main/api_exec_es1.c: $(es_src_deps)
+ $(call es-gen,-V GLES1.1)
+
+$(intermediates)/main/api_exec_es2.c: $(es_src_deps)
+ $(call es-gen,-V GLES2.0)
+
+$(intermediates)/main/api_exec_%_dispatch.h: $(es_hdr_deps)
+ $(call es-gen, -c $* -m remap_table)
+
+$(intermediates)/main/api_exec_%_remap_helper.h: $(es_hdr_deps)
+ $(call es-gen, -c $*)
+
+$(intermediates)/program/program_parse.tab.c: $(LOCAL_PATH)/program/program_parse.y
+ $(local-y-to-c-and-h)
+
+$(intermediates)/program/lex.yy.c: $(LOCAL_PATH)/program/program_lexer.l
+ $(local-l-to-c)
+
+$(intermediates)/main/git_sha1.h:
+ @mkdir -p $(dir $@)
+ @echo "GIT-SHA1: $(PRIVATE_MODULE) <= git"
+ $(hide) touch $@
+ $(hide) if which git > /dev/null; then \
+ git --git-dir $(PRIVATE_PATH)/../../.git log -n 1 --oneline | \
+ sed 's/^\([^ ]*\) .*/#define MESA_GIT_SHA1 "git-\1"/' \
+ > $@; \
+ fi
+
+matypes_deps := \
+ $(BUILD_OUT_EXECUTABLES)/mesa_gen_matypes$(BUILD_EXECUTABLE_SUFFIX) \
+ $(LOCAL_PATH)/main/mtypes.h \
+ $(LOCAL_PATH)/tnl/t_context.h
+
+$(intermediates)/x86/matypes.h: $(matypes_deps)
+ @mkdir -p $(dir $@)
+ @echo "MATYPES: $(PRIVATE_MODULE) <= $(notdir $@)"
+ $(hide) $< > $@
diff --git a/src/mesa/Android.mk b/src/mesa/Android.mk
new file mode 100644
index 00000000000..67808d491ac
--- /dev/null
+++ b/src/mesa/Android.mk
@@ -0,0 +1,115 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <[email protected]>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# Android.mk for core mesa
+
+LOCAL_PATH := $(call my-dir)
+
+include $(LOCAL_PATH)/sources.mak
+
+common_CFLAGS := \
+ -DFEATURE_ES1=1 \
+ -DFEATURE_ES2=1
+
+common_C_INCLUDES := \
+ $(MESA_TOP)/src/mapi \
+ $(MESA_TOP)/src/glsl
+
+common_ASM :=
+
+# ---------------------------------------
+# Build mesa_gen_matypes for host
+# ---------------------------------------
+
+ifeq ($(strip $(MESA_ENABLE_ASM)),true)
+ifeq ($(TARGET_ARCH),x86)
+common_ASM += $(X86_SOURCES)
+
+include $(CLEAR_VARS)
+LOCAL_SRC_FILES := x86/gen_matypes.c
+LOCAL_CFLAGS := $(common_CFLAGS)
+LOCAL_C_INCLUDES := $(common_C_INCLUDES)
+LOCAL_MODULE := mesa_gen_matypes
+include $(MESA_COMMON_MK)
+include $(BUILD_HOST_EXECUTABLE)
+
+endif # x86
+endif # MESA_ENABLE_ASM
+
+# ---------------------------------------
+# Build libmesa_st_mesa
+# ---------------------------------------
+
+ifeq ($(strip $(MESA_BUILD_GALLIUM)),true)
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+ $(MESA_GALLIUM_SOURCES) \
+ $(MESA_GALLIUM_CXX_SOURCES) \
+ $(common_ASM)
+
+LOCAL_CFLAGS := $(common_CFLAGS)
+
+LOCAL_C_INCLUDES := \
+ $(common_C_INCLUDES) \
+ $(MESA_TOP)/src/gallium/include \
+ $(MESA_TOP)/src/gallium/auxiliary
+
+LOCAL_MODULE := libmesa_st_mesa
+
+include $(LOCAL_PATH)/Android.gen.mk
+include $(MESA_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
+endif # MESA_BUILD_GALLIUM
+
+# ---------------------------------------
+# Build libmesa_glsl_utils
+#
+# It is used to avoid circular dependency between core mesa and glsl.
+# ---------------------------------------
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+ program/hash_table.c \
+ program/symbol_table.c
+
+LOCAL_MODULE := libmesa_glsl_utils
+
+include $(MESA_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
+
+# ---------------------------------------
+# Build libmesa_glsl_utils for host
+# ---------------------------------------
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+ program/hash_table.c \
+ program/symbol_table.c
+
+LOCAL_MODULE := libmesa_glsl_utils
+
+include $(MESA_COMMON_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
diff --git a/src/mesa/Makefile b/src/mesa/Makefile
index a903a260ac9..0e15d61bd8d 100644
--- a/src/mesa/Makefile
+++ b/src/mesa/Makefile
@@ -12,11 +12,10 @@ DRICORE_OBJ_DIR := objs-dricore
include sources.mak
# adjust object dirs
+DRICORE_OBJECTS := $(addprefix $(DRICORE_OBJ_DIR)/, $(MESA_OBJECTS))
MESA_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_OBJECTS))
MESA_GALLIUM_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_GALLIUM_OBJECTS))
-DRICORE_OBJECTS := $(addprefix $(DRICORE_OBJ_DIR)/, $(MESA_OBJECTS))
-
# define preprocessor flags
MESA_CPPFLAGS := $(API_DEFINES) $(DEFINES)
@@ -68,6 +67,26 @@ $(DRICORE_OBJ_DIR)/%.o: %.S
# then convenience libs (.a) and finally the device drivers:
default: $(DEPENDS) asm_subdirs $(MESA_LIBS) $(DRICORE_LIBS) driver_subdirs
+# include glapi_gen.mk for generating glapi headers for GLES
+GLAPI := $(TOP)/src/mapi/glapi/gen
+include $(GLAPI)/glapi_gen.mk
+
+main/api_exec_es1_dispatch.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_dispatch_deps)
+ $(call glapi_gen_dispatch,$<,es1)
+
+main/api_exec_es1_remap_helper.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_remap_deps)
+ $(call glapi_gen_remap,$<,es1)
+
+main/api_exec_es1.o: main/api_exec_es1_dispatch.h main/api_exec_es1_remap_helper.h
+
+main/api_exec_es2_dispatch.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_dispatch_deps)
+ $(call glapi_gen_dispatch,$<,es2)
+
+main/api_exec_es2_remap_helper.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_remap_deps)
+ $(call glapi_gen_remap,$<,es2)
+
+main/api_exec_es2.o: main/api_exec_es2_dispatch.h main/api_exec_es2_remap_helper.h
+
main/api_exec_es1.c: main/APIspec.xml main/es_generator.py main/APIspecutil.py main/APIspec.py
$(PYTHON2) $(PYTHON_FLAGS) main/es_generator.py -S main/APIspec.xml -V GLES1.1 > $@
@@ -124,6 +143,8 @@ depend: $(ALL_SOURCES)
@ touch depend
@$(MKDEP) $(MKDEP_OPTIONS) -p$(MESA_OBJ_DIR)/ $(MESA_CPPFLAGS) \
$(ALL_SOURCES) > /dev/null 2>/dev/null
+ @$(MKDEP) $(MKDEP_OPTIONS) -a -p$(DRICORE_OBJ_DIR)/ $(MESA_CPPFLAGS) \
+ $(ALL_SOURCES) > /dev/null 2>/dev/null
######################################################################
# Installation rules
diff --git a/src/mesa/SConscript b/src/mesa/SConscript
index 24e2155c387..b0c3334fa48 100644
--- a/src/mesa/SConscript
+++ b/src/mesa/SConscript
@@ -264,6 +264,7 @@ statetracker_sources = [
'state_tracker/st_draw_feedback.c',
'state_tracker/st_extensions.c',
'state_tracker/st_format.c',
+ 'state_tracker/st_glsl_to_tgsi.cpp',
'state_tracker/st_gen_mipmap.c',
'state_tracker/st_manager.c',
'state_tracker/st_mesa_to_tgsi.c',
@@ -292,6 +293,7 @@ program_sources = [
'program/prog_instruction.c',
'program/prog_noise.c',
'program/prog_optimize.c',
+ 'program/prog_opt_constant_fold.c',
'program/prog_parameter.c',
'program/prog_parameter_layout.c',
'program/prog_print.c',
@@ -346,28 +348,28 @@ if env['gles']:
GLAPI = '#src/mapi/glapi/'
gles_headers = []
gles_headers += env.CodeGenerate(
- target = 'es1api/main/dispatch.h',
+ target = 'main/api_exec_es1_dispatch.h',
script = GLAPI + 'gen/gl_table.py',
- source = GLAPI + 'gen-es/es1_API.xml',
- command = python_cmd + ' $SCRIPT -c -m remap_table -f $SOURCE > $TARGET',
+ source = GLAPI + 'gen/gl_and_es_API.xml',
+ command = python_cmd + ' $SCRIPT -c es1 -m remap_table -f $SOURCE > $TARGET',
)
gles_headers += env.CodeGenerate(
- target = 'es1api/main/remap_helper.h',
+ target = 'main/api_exec_es1_remap_helper.h',
script = GLAPI + 'gen/remap_helper.py',
- source = GLAPI + 'gen-es/es1_API.xml',
- command = python_cmd + ' $SCRIPT -f $SOURCE > $TARGET',
+ source = GLAPI + 'gen/gl_and_es_API.xml',
+ command = python_cmd + ' $SCRIPT -c es1 -f $SOURCE > $TARGET',
)
gles_headers += env.CodeGenerate(
- target = 'es2api/main/dispatch.h',
+ target = 'main/api_exec_es2_dispatch.h',
script = GLAPI + 'gen/gl_table.py',
- source = GLAPI + 'gen-es/es2_API.xml',
- command = python_cmd + ' $SCRIPT -c -m remap_table -f $SOURCE > $TARGET',
+ source = GLAPI + 'gen/gl_and_es_API.xml',
+ command = python_cmd + ' $SCRIPT -c es2 -m remap_table -f $SOURCE > $TARGET',
)
gles_headers += env.CodeGenerate(
- target = 'es2api/main/remap_helper.h',
+ target = 'main/api_exec_es2_remap_helper.h',
script = GLAPI + 'gen/remap_helper.py',
- source = GLAPI + 'gen-es/es2_API.xml',
- command = python_cmd + ' $SCRIPT -f $SOURCE > $TARGET',
+ source = GLAPI + 'gen/gl_and_es_API.xml',
+ command = python_cmd + ' $SCRIPT -c es2 -f $SOURCE > $TARGET',
)
env.Depends(gles_sources, gles_headers)
diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c
index 8ab129dd73d..a6174ee2f56 100644
--- a/src/mesa/drivers/common/driverfuncs.c
+++ b/src/mesa/drivers/common/driverfuncs.c
@@ -95,8 +95,6 @@ _mesa_init_driver_functions(struct dd_function_table *driver)
driver->TexSubImage2D = _mesa_store_texsubimage2d;
driver->TexSubImage3D = _mesa_store_texsubimage3d;
driver->GetTexImage = _mesa_get_teximage;
- driver->CopyTexImage1D = _mesa_meta_CopyTexImage1D;
- driver->CopyTexImage2D = _mesa_meta_CopyTexImage2D;
driver->CopyTexSubImage1D = _mesa_meta_CopyTexSubImage1D;
driver->CopyTexSubImage2D = _mesa_meta_CopyTexSubImage2D;
driver->CopyTexSubImage3D = _mesa_meta_CopyTexSubImage3D;
@@ -250,10 +248,10 @@ _mesa_init_driver_state(struct gl_context *ctx)
GLuint i;
for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) {
ctx->Driver.ColorMaskIndexed(ctx, i,
- ctx->Color.ColorMask[0][RCOMP],
- ctx->Color.ColorMask[0][GCOMP],
- ctx->Color.ColorMask[0][BCOMP],
- ctx->Color.ColorMask[0][ACOMP]);
+ ctx->Color.ColorMask[i][RCOMP],
+ ctx->Color.ColorMask[i][GCOMP],
+ ctx->Color.ColorMask[i][BCOMP],
+ ctx->Color.ColorMask[i][ACOMP]);
}
}
else {
@@ -288,7 +286,10 @@ _mesa_init_driver_state(struct gl_context *ctx)
ctx->Driver.Enable(ctx, GL_TEXTURE_CUBE_MAP, GL_FALSE);
ctx->Driver.Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color);
- ctx->Driver.Fogfv(ctx, GL_FOG_MODE, 0);
+ {
+ GLfloat mode = (GLfloat) ctx->Fog.Mode;
+ ctx->Driver.Fogfv(ctx, GL_FOG_MODE, &mode);
+ }
ctx->Driver.Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density);
ctx->Driver.Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start);
ctx->Driver.Fogfv(ctx, GL_FOG_END, &ctx->Fog.End);
diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index 0e58aeca3f5..291d912121b 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -62,6 +62,7 @@
#include "main/teximage.h"
#include "main/texparam.h"
#include "main/texstate.h"
+#include "main/uniforms.h"
#include "main/varray.h"
#include "main/viewport.h"
#include "program/program.h"
@@ -72,63 +73,36 @@
/** Return offset in bytes of the field within a vertex struct */
#define OFFSET(FIELD) ((void *) offsetof(struct vertex, FIELD))
-
-/**
- * Flags passed to _mesa_meta_begin().
- */
-/*@{*/
-#define META_ALL ~0x0
-#define META_ALPHA_TEST 0x1
-#define META_BLEND 0x2 /**< includes logicop */
-#define META_COLOR_MASK 0x4
-#define META_DEPTH_TEST 0x8
-#define META_FOG 0x10
-#define META_PIXEL_STORE 0x20
-#define META_PIXEL_TRANSFER 0x40
-#define META_RASTERIZATION 0x80
-#define META_SCISSOR 0x100
-#define META_SHADER 0x200
-#define META_STENCIL_TEST 0x400
-#define META_TRANSFORM 0x800 /**< modelview, projection, clip planes */
-#define META_TEXTURE 0x1000
-#define META_VERTEX 0x2000
-#define META_VIEWPORT 0x4000
-#define META_CLAMP_FRAGMENT_COLOR 0x8000
-#define META_CLAMP_VERTEX_COLOR 0x10000
-#define META_CONDITIONAL_RENDER 0x20000
-/*@}*/
-
-
/**
* State which we may save/restore across meta ops.
* XXX this may be incomplete...
*/
struct save_state
{
- GLbitfield SavedState; /**< bitmask of META_* flags */
+ GLbitfield SavedState; /**< bitmask of MESA_META_* flags */
- /** META_ALPHA_TEST */
+ /** MESA_META_ALPHA_TEST */
GLboolean AlphaEnabled;
GLenum AlphaFunc;
GLclampf AlphaRef;
- /** META_BLEND */
+ /** MESA_META_BLEND */
GLbitfield BlendEnabled;
GLboolean ColorLogicOpEnabled;
- /** META_COLOR_MASK */
+ /** MESA_META_COLOR_MASK */
GLubyte ColorMask[MAX_DRAW_BUFFERS][4];
- /** META_DEPTH_TEST */
+ /** MESA_META_DEPTH_TEST */
struct gl_depthbuffer_attrib Depth;
- /** META_FOG */
+ /** MESA_META_FOG */
GLboolean Fog;
- /** META_PIXEL_STORE */
+ /** MESA_META_PIXEL_STORE */
struct gl_pixelstore_attrib Pack, Unpack;
- /** META_PIXEL_TRANSFER */
+ /** MESA_META_PIXEL_TRANSFER */
GLfloat RedBias, RedScale;
GLfloat GreenBias, GreenScale;
GLfloat BlueBias, BlueScale;
@@ -136,17 +110,17 @@ struct save_state
GLfloat DepthBias, DepthScale;
GLboolean MapColorFlag;
- /** META_RASTERIZATION */
+ /** MESA_META_RASTERIZATION */
GLenum FrontPolygonMode, BackPolygonMode;
GLboolean PolygonOffset;
GLboolean PolygonSmooth;
GLboolean PolygonStipple;
GLboolean PolygonCull;
- /** META_SCISSOR */
+ /** MESA_META_SCISSOR */
struct gl_scissor_attrib Scissor;
- /** META_SHADER */
+ /** MESA_META_SHADER */
GLboolean VertexProgramEnabled;
struct gl_vertex_program *VertexProgram;
GLboolean FragmentProgramEnabled;
@@ -156,17 +130,19 @@ struct save_state
struct gl_shader_program *FragmentShader;
struct gl_shader_program *ActiveShader;
- /** META_STENCIL_TEST */
+ /** MESA_META_STENCIL_TEST */
struct gl_stencil_attrib Stencil;
- /** META_TRANSFORM */
+ /** MESA_META_TRANSFORM */
GLenum MatrixMode;
GLfloat ModelviewMatrix[16];
GLfloat ProjectionMatrix[16];
GLfloat TextureMatrix[16];
+
+ /** MESA_META_CLIP */
GLbitfield ClipPlanesEnabled;
- /** META_TEXTURE */
+ /** MESA_META_TEXTURE */
GLuint ActiveUnit;
GLuint ClientActiveUnit;
/** for unit[0] only */
@@ -176,21 +152,21 @@ struct save_state
GLbitfield TexGenEnabled[MAX_TEXTURE_UNITS];
GLuint EnvMode; /* unit[0] only */
- /** META_VERTEX */
+ /** MESA_META_VERTEX */
struct gl_array_object *ArrayObj;
struct gl_buffer_object *ArrayBufferObj;
- /** META_VIEWPORT */
+ /** MESA_META_VIEWPORT */
GLint ViewportX, ViewportY, ViewportW, ViewportH;
GLclampd DepthNear, DepthFar;
- /** META_CLAMP_FRAGMENT_COLOR */
+ /** MESA_META_CLAMP_FRAGMENT_COLOR */
GLenum ClampFragmentColor;
- /** META_CLAMP_VERTEX_COLOR */
+ /** MESA_META_CLAMP_VERTEX_COLOR */
GLenum ClampVertexColor;
- /** META_CONDITIONAL_RENDER */
+ /** MESA_META_CONDITIONAL_RENDER */
struct gl_query_object *CondRenderQuery;
GLenum CondRenderMode;
@@ -235,6 +211,8 @@ struct clear_state
{
GLuint ArrayObj;
GLuint VBO;
+ GLuint ShaderProg;
+ GLint ColorLocation;
};
@@ -336,10 +314,10 @@ _mesa_meta_free(struct gl_context *ctx)
* Enter meta state. This is like a light-weight version of glPushAttrib
* but it also resets most GL state back to default values.
*
- * \param state bitmask of META_* flags indicating which attribute groups
+ * \param state bitmask of MESA_META_* flags indicating which attribute groups
* to save and reset to their defaults
*/
-static void
+void
_mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
{
struct save_state *save;
@@ -351,7 +329,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
memset(save, 0, sizeof(*save));
save->SavedState = state;
- if (state & META_ALPHA_TEST) {
+ if (state & MESA_META_ALPHA_TEST) {
save->AlphaEnabled = ctx->Color.AlphaEnabled;
save->AlphaFunc = ctx->Color.AlphaFunc;
save->AlphaRef = ctx->Color.AlphaRef;
@@ -359,7 +337,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
_mesa_set_enable(ctx, GL_ALPHA_TEST, GL_FALSE);
}
- if (state & META_BLEND) {
+ if (state & MESA_META_BLEND) {
save->BlendEnabled = ctx->Color.BlendEnabled;
if (ctx->Color.BlendEnabled) {
if (ctx->Extensions.EXT_draw_buffers2) {
@@ -377,7 +355,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
_mesa_set_enable(ctx, GL_COLOR_LOGIC_OP, GL_FALSE);
}
- if (state & META_COLOR_MASK) {
+ if (state & MESA_META_COLOR_MASK) {
memcpy(save->ColorMask, ctx->Color.ColorMask,
sizeof(ctx->Color.ColorMask));
if (!ctx->Color.ColorMask[0][0] ||
@@ -387,26 +365,26 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
_mesa_ColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
}
- if (state & META_DEPTH_TEST) {
+ if (state & MESA_META_DEPTH_TEST) {
save->Depth = ctx->Depth; /* struct copy */
if (ctx->Depth.Test)
_mesa_set_enable(ctx, GL_DEPTH_TEST, GL_FALSE);
}
- if (state & META_FOG) {
+ if (state & MESA_META_FOG) {
save->Fog = ctx->Fog.Enabled;
if (ctx->Fog.Enabled)
_mesa_set_enable(ctx, GL_FOG, GL_FALSE);
}
- if (state & META_PIXEL_STORE) {
+ if (state & MESA_META_PIXEL_STORE) {
save->Pack = ctx->Pack;
save->Unpack = ctx->Unpack;
ctx->Pack = ctx->DefaultPacking;
ctx->Unpack = ctx->DefaultPacking;
}
- if (state & META_PIXEL_TRANSFER) {
+ if (state & MESA_META_PIXEL_TRANSFER) {
save->RedScale = ctx->Pixel.RedScale;
save->RedBias = ctx->Pixel.RedBias;
save->GreenScale = ctx->Pixel.GreenScale;
@@ -429,7 +407,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
ctx->NewState |=_NEW_PIXEL;
}
- if (state & META_RASTERIZATION) {
+ if (state & MESA_META_RASTERIZATION) {
save->FrontPolygonMode = ctx->Polygon.FrontMode;
save->BackPolygonMode = ctx->Polygon.BackMode;
save->PolygonOffset = ctx->Polygon.OffsetFill;
@@ -443,12 +421,12 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
_mesa_set_enable(ctx, GL_CULL_FACE, GL_FALSE);
}
- if (state & META_SCISSOR) {
+ if (state & MESA_META_SCISSOR) {
save->Scissor = ctx->Scissor; /* struct copy */
_mesa_set_enable(ctx, GL_SCISSOR_TEST, GL_FALSE);
}
- if (state & META_SHADER) {
+ if (state & MESA_META_SHADER) {
if (ctx->Extensions.ARB_vertex_program) {
save->VertexProgramEnabled = ctx->VertexProgram.Enabled;
_mesa_reference_vertprog(ctx, &save->VertexProgram,
@@ -477,14 +455,14 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
}
}
- if (state & META_STENCIL_TEST) {
+ if (state & MESA_META_STENCIL_TEST) {
save->Stencil = ctx->Stencil; /* struct copy */
if (ctx->Stencil.Enabled)
_mesa_set_enable(ctx, GL_STENCIL_TEST, GL_FALSE);
/* NOTE: other stencil state not reset */
}
- if (state & META_TEXTURE) {
+ if (state & MESA_META_TEXTURE) {
GLuint u, tgt;
save->ActiveUnit = ctx->Texture.CurrentUnit;
@@ -523,7 +501,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
_mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
}
- if (state & META_TRANSFORM) {
+ if (state & MESA_META_TRANSFORM) {
GLuint activeTexture = ctx->Texture.CurrentUnit;
memcpy(save->ModelviewMatrix, ctx->ModelviewMatrixStack.Top->m,
16 * sizeof(GLfloat));
@@ -544,6 +522,9 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
_mesa_Ortho(0.0, ctx->DrawBuffer->Width,
0.0, ctx->DrawBuffer->Height,
-1.0, 1.0);
+ }
+
+ if (state & MESA_META_CLIP) {
save->ClipPlanesEnabled = ctx->Transform.ClipPlanesEnabled;
if (ctx->Transform.ClipPlanesEnabled) {
GLuint i;
@@ -553,7 +534,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
}
}
- if (state & META_VERTEX) {
+ if (state & MESA_META_VERTEX) {
/* save vertex array object state */
_mesa_reference_array_object(ctx, &save->ArrayObj,
ctx->Array.ArrayObj);
@@ -562,7 +543,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
/* set some default state? */
}
- if (state & META_VIEWPORT) {
+ if (state & MESA_META_VIEWPORT) {
/* save viewport state */
save->ViewportX = ctx->Viewport.X;
save->ViewportY = ctx->Viewport.Y;
@@ -583,7 +564,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
_mesa_DepthRange(0.0, 1.0);
}
- if (state & META_CLAMP_FRAGMENT_COLOR) {
+ if (state & MESA_META_CLAMP_FRAGMENT_COLOR) {
save->ClampFragmentColor = ctx->Color.ClampFragmentColor;
/* Generally in here we want to do clamping according to whether
@@ -594,7 +575,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
_mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE);
}
- if (state & META_CLAMP_VERTEX_COLOR) {
+ if (state & MESA_META_CLAMP_VERTEX_COLOR) {
save->ClampVertexColor = ctx->Light.ClampVertexColor;
/* Generally in here we never want vertex color clamping --
@@ -603,7 +584,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
_mesa_ClampColorARB(GL_CLAMP_VERTEX_COLOR, GL_FALSE);
}
- if (state & META_CONDITIONAL_RENDER) {
+ if (state & MESA_META_CONDITIONAL_RENDER) {
save->CondRenderQuery = ctx->Query.CondRenderQuery;
save->CondRenderMode = ctx->Query.CondRenderMode;
@@ -623,19 +604,19 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
/**
* Leave meta state. This is like a light-weight version of glPopAttrib().
*/
-static void
+void
_mesa_meta_end(struct gl_context *ctx)
{
struct save_state *save = &ctx->Meta->Save[--ctx->Meta->SaveStackDepth];
const GLbitfield state = save->SavedState;
- if (state & META_ALPHA_TEST) {
+ if (state & MESA_META_ALPHA_TEST) {
if (ctx->Color.AlphaEnabled != save->AlphaEnabled)
_mesa_set_enable(ctx, GL_ALPHA_TEST, save->AlphaEnabled);
_mesa_AlphaFunc(save->AlphaFunc, save->AlphaRef);
}
- if (state & META_BLEND) {
+ if (state & MESA_META_BLEND) {
if (ctx->Color.BlendEnabled != save->BlendEnabled) {
if (ctx->Extensions.EXT_draw_buffers2) {
GLuint i;
@@ -651,7 +632,7 @@ _mesa_meta_end(struct gl_context *ctx)
_mesa_set_enable(ctx, GL_COLOR_LOGIC_OP, save->ColorLogicOpEnabled);
}
- if (state & META_COLOR_MASK) {
+ if (state & MESA_META_COLOR_MASK) {
GLuint i;
for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) {
if (!TEST_EQ_4V(ctx->Color.ColorMask[i], save->ColorMask[i])) {
@@ -670,23 +651,23 @@ _mesa_meta_end(struct gl_context *ctx)
}
}
- if (state & META_DEPTH_TEST) {
+ if (state & MESA_META_DEPTH_TEST) {
if (ctx->Depth.Test != save->Depth.Test)
_mesa_set_enable(ctx, GL_DEPTH_TEST, save->Depth.Test);
_mesa_DepthFunc(save->Depth.Func);
_mesa_DepthMask(save->Depth.Mask);
}
- if (state & META_FOG) {
+ if (state & MESA_META_FOG) {
_mesa_set_enable(ctx, GL_FOG, save->Fog);
}
- if (state & META_PIXEL_STORE) {
+ if (state & MESA_META_PIXEL_STORE) {
ctx->Pack = save->Pack;
ctx->Unpack = save->Unpack;
}
- if (state & META_PIXEL_TRANSFER) {
+ if (state & MESA_META_PIXEL_TRANSFER) {
ctx->Pixel.RedScale = save->RedScale;
ctx->Pixel.RedBias = save->RedBias;
ctx->Pixel.GreenScale = save->GreenScale;
@@ -700,7 +681,7 @@ _mesa_meta_end(struct gl_context *ctx)
ctx->NewState |=_NEW_PIXEL;
}
- if (state & META_RASTERIZATION) {
+ if (state & MESA_META_RASTERIZATION) {
_mesa_PolygonMode(GL_FRONT, save->FrontPolygonMode);
_mesa_PolygonMode(GL_BACK, save->BackPolygonMode);
_mesa_set_enable(ctx, GL_POLYGON_STIPPLE, save->PolygonStipple);
@@ -709,13 +690,13 @@ _mesa_meta_end(struct gl_context *ctx)
_mesa_set_enable(ctx, GL_CULL_FACE, save->PolygonCull);
}
- if (state & META_SCISSOR) {
+ if (state & MESA_META_SCISSOR) {
_mesa_set_enable(ctx, GL_SCISSOR_TEST, save->Scissor.Enabled);
_mesa_Scissor(save->Scissor.X, save->Scissor.Y,
save->Scissor.Width, save->Scissor.Height);
}
- if (state & META_SHADER) {
+ if (state & MESA_META_SHADER) {
if (ctx->Extensions.ARB_vertex_program) {
_mesa_set_enable(ctx, GL_VERTEX_PROGRAM_ARB,
save->VertexProgramEnabled);
@@ -747,7 +728,7 @@ _mesa_meta_end(struct gl_context *ctx)
save->ActiveShader);
}
- if (state & META_STENCIL_TEST) {
+ if (state & MESA_META_STENCIL_TEST) {
const struct gl_stencil_attrib *stencil = &save->Stencil;
_mesa_set_enable(ctx, GL_STENCIL_TEST, stencil->Enabled);
@@ -778,7 +759,7 @@ _mesa_meta_end(struct gl_context *ctx)
stencil->ZPassFunc[1]);
}
- if (state & META_TEXTURE) {
+ if (state & MESA_META_TEXTURE) {
GLuint u, tgt;
ASSERT(ctx->Texture.CurrentUnit == 0);
@@ -829,7 +810,7 @@ _mesa_meta_end(struct gl_context *ctx)
_mesa_ClientActiveTextureARB(GL_TEXTURE0 + save->ClientActiveUnit);
}
- if (state & META_TRANSFORM) {
+ if (state & MESA_META_TRANSFORM) {
GLuint activeTexture = ctx->Texture.CurrentUnit;
_mesa_ActiveTextureARB(GL_TEXTURE0);
_mesa_MatrixMode(GL_TEXTURE);
@@ -843,7 +824,9 @@ _mesa_meta_end(struct gl_context *ctx)
_mesa_LoadMatrixf(save->ProjectionMatrix);
_mesa_MatrixMode(save->MatrixMode);
+ }
+ if (state & MESA_META_CLIP) {
if (save->ClipPlanesEnabled) {
GLuint i;
for (i = 0; i < ctx->Const.MaxClipPlanes; i++) {
@@ -854,7 +837,7 @@ _mesa_meta_end(struct gl_context *ctx)
}
}
- if (state & META_VERTEX) {
+ if (state & MESA_META_VERTEX) {
/* restore vertex buffer object */
_mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, save->ArrayBufferObj->Name);
_mesa_reference_buffer_object(ctx, &save->ArrayBufferObj, NULL);
@@ -864,7 +847,7 @@ _mesa_meta_end(struct gl_context *ctx)
_mesa_reference_array_object(ctx, &save->ArrayObj, NULL);
}
- if (state & META_VIEWPORT) {
+ if (state & MESA_META_VIEWPORT) {
if (save->ViewportX != ctx->Viewport.X ||
save->ViewportY != ctx->Viewport.Y ||
save->ViewportW != ctx->Viewport.Width ||
@@ -875,15 +858,15 @@ _mesa_meta_end(struct gl_context *ctx)
_mesa_DepthRange(save->DepthNear, save->DepthFar);
}
- if (state & META_CLAMP_FRAGMENT_COLOR) {
+ if (state & MESA_META_CLAMP_FRAGMENT_COLOR) {
_mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, save->ClampFragmentColor);
}
- if (state & META_CLAMP_VERTEX_COLOR) {
+ if (state & MESA_META_CLAMP_VERTEX_COLOR) {
_mesa_ClampColorARB(GL_CLAMP_VERTEX_COLOR, save->ClampVertexColor);
}
- if (state & META_CONDITIONAL_RENDER) {
+ if (state & MESA_META_CONDITIONAL_RENDER) {
if (save->CondRenderQuery)
_mesa_BeginConditionalRender(save->CondRenderQuery->Id,
save->CondRenderMode);
@@ -1349,7 +1332,7 @@ _mesa_meta_BlitFramebuffer(struct gl_context *ctx,
}
/* only scissor effects blit so save/clear all other relevant state */
- _mesa_meta_begin(ctx, ~META_SCISSOR);
+ _mesa_meta_begin(ctx, ~MESA_META_SCISSOR);
if (blit->ArrayObj == 0) {
/* one-time setup */
@@ -1478,15 +1461,15 @@ _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers)
};
struct vertex verts[4];
/* save all state but scissor, pixel pack/unpack */
- GLbitfield metaSave = (META_ALL -
- META_SCISSOR -
- META_PIXEL_STORE -
- META_CONDITIONAL_RENDER);
+ GLbitfield metaSave = (MESA_META_ALL -
+ MESA_META_SCISSOR -
+ MESA_META_PIXEL_STORE -
+ MESA_META_CONDITIONAL_RENDER);
const GLuint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1;
if (buffers & BUFFER_BITS_COLOR) {
/* if clearing color buffers, don't save/restore colormask */
- metaSave -= META_COLOR_MASK;
+ metaSave -= MESA_META_COLOR_MASK;
}
_mesa_meta_begin(ctx, metaSave);
@@ -1521,7 +1504,7 @@ _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers)
_mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE);
}
else {
- ASSERT(metaSave & META_COLOR_MASK);
+ ASSERT(metaSave & MESA_META_COLOR_MASK);
_mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
}
@@ -1589,10 +1572,166 @@ _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers)
_mesa_meta_end(ctx);
}
+static void
+meta_glsl_clear_init(struct gl_context *ctx, struct clear_state *clear)
+{
+ const char *vs_source =
+ "attribute vec4 position;\n"
+ "void main()\n"
+ "{\n"
+ " gl_Position = position;\n"
+ "}\n";
+ const char *fs_source =
+ "uniform vec4 color;\n"
+ "void main()\n"
+ "{\n"
+ " gl_FragColor = color;\n"
+ "}\n";
+ GLuint vs, fs;
+
+ if (clear->ArrayObj != 0)
+ return;
+
+ /* create vertex array object */
+ _mesa_GenVertexArrays(1, &clear->ArrayObj);
+ _mesa_BindVertexArray(clear->ArrayObj);
+
+ /* create vertex array buffer */
+ _mesa_GenBuffersARB(1, &clear->VBO);
+ _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, clear->VBO);
+
+ /* setup vertex arrays */
+ _mesa_VertexAttribPointerARB(0, 3, GL_FLOAT, GL_FALSE, 0, (void *)0);
+ _mesa_EnableVertexAttribArrayARB(0);
+
+ vs = _mesa_CreateShaderObjectARB(GL_VERTEX_SHADER);
+ _mesa_ShaderSourceARB(vs, 1, &vs_source, NULL);
+ _mesa_CompileShaderARB(vs);
+
+ fs = _mesa_CreateShaderObjectARB(GL_FRAGMENT_SHADER);
+ _mesa_ShaderSourceARB(fs, 1, &fs_source, NULL);
+ _mesa_CompileShaderARB(fs);
+
+ clear->ShaderProg = _mesa_CreateProgramObjectARB();
+ _mesa_AttachShader(clear->ShaderProg, fs);
+ _mesa_AttachShader(clear->ShaderProg, vs);
+ _mesa_BindAttribLocationARB(clear->ShaderProg, 0, "position");
+ _mesa_LinkProgramARB(clear->ShaderProg);
+
+ clear->ColorLocation = _mesa_GetUniformLocationARB(clear->ShaderProg,
+ "color");
+}
+
+/**
+ * Meta implementation of ctx->Driver.Clear() in terms of polygon rendering.
+ */
+void
+_mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers)
+{
+ struct clear_state *clear = &ctx->Meta->Clear;
+ GLbitfield metaSave;
+ const GLuint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1;
+ struct gl_framebuffer *fb = ctx->DrawBuffer;
+ const float x0 = ((float)fb->_Xmin / fb->Width) * 2.0f - 1.0f;
+ const float y0 = ((float)fb->_Ymin / fb->Height) * 2.0f - 1.0f;
+ const float x1 = ((float)fb->_Xmax / fb->Width) * 2.0f - 1.0f;
+ const float y1 = ((float)fb->_Ymax / fb->Height) * 2.0f - 1.0f;
+ const float z = -invert_z(ctx->Depth.Clear);
+ struct vertex {
+ GLfloat x, y, z;
+ } verts[4];
+
+ metaSave = (MESA_META_ALPHA_TEST |
+ MESA_META_BLEND |
+ MESA_META_DEPTH_TEST |
+ MESA_META_RASTERIZATION |
+ MESA_META_SHADER |
+ MESA_META_STENCIL_TEST |
+ MESA_META_VERTEX |
+ MESA_META_VIEWPORT |
+ MESA_META_CLIP |
+ MESA_META_CLAMP_FRAGMENT_COLOR);
+
+ if (!(buffers & BUFFER_BITS_COLOR)) {
+ /* We'll use colormask to disable color writes. Otherwise,
+ * respect color mask
+ */
+ metaSave |= MESA_META_COLOR_MASK;
+ }
+
+ _mesa_meta_begin(ctx, metaSave);
+
+ meta_glsl_clear_init(ctx, clear);
+
+ _mesa_UseProgramObjectARB(clear->ShaderProg);
+ _mesa_Uniform4fvARB(clear->ColorLocation, 1,
+ ctx->Color.ClearColorUnclamped);
+
+ _mesa_BindVertexArray(clear->ArrayObj);
+ _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, clear->VBO);
+
+ /* GL_COLOR_BUFFER_BIT */
+ if (buffers & BUFFER_BITS_COLOR) {
+ /* leave colormask, glDrawBuffer state as-is */
+
+ /* Clears never have the color clamped. */
+ _mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE);
+ }
+ else {
+ ASSERT(metaSave & MESA_META_COLOR_MASK);
+ _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
+ }
+
+ /* GL_DEPTH_BUFFER_BIT */
+ if (buffers & BUFFER_BIT_DEPTH) {
+ _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_TRUE);
+ _mesa_DepthFunc(GL_ALWAYS);
+ _mesa_DepthMask(GL_TRUE);
+ }
+ else {
+ assert(!ctx->Depth.Test);
+ }
+
+ /* GL_STENCIL_BUFFER_BIT */
+ if (buffers & BUFFER_BIT_STENCIL) {
+ _mesa_set_enable(ctx, GL_STENCIL_TEST, GL_TRUE);
+ _mesa_StencilOpSeparate(GL_FRONT_AND_BACK,
+ GL_REPLACE, GL_REPLACE, GL_REPLACE);
+ _mesa_StencilFuncSeparate(GL_FRONT_AND_BACK, GL_ALWAYS,
+ ctx->Stencil.Clear & stencilMax,
+ ctx->Stencil.WriteMask[0]);
+ }
+ else {
+ assert(!ctx->Stencil.Enabled);
+ }
+
+ /* vertex positions */
+ verts[0].x = x0;
+ verts[0].y = y0;
+ verts[0].z = z;
+ verts[1].x = x1;
+ verts[1].y = y0;
+ verts[1].z = z;
+ verts[2].x = x1;
+ verts[2].y = y1;
+ verts[2].z = z;
+ verts[3].x = x0;
+ verts[3].y = y1;
+ verts[3].z = z;
+
+ /* upload new vertex data */
+ _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(verts), verts,
+ GL_DYNAMIC_DRAW_ARB);
+
+ /* draw quad */
+ _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
+
+ _mesa_meta_end(ctx);
+}
/**
* Meta implementation of ctx->Driver.CopyPixels() in terms
- * of texture mapping and polygon rendering.
+ * of texture mapping and polygon rendering and GLSL shaders.
*/
void
_mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcX, GLint srcY,
@@ -1621,12 +1760,13 @@ _mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcX, GLint srcY,
/* Most GL state applies to glCopyPixels, but a there's a few things
* we need to override:
*/
- _mesa_meta_begin(ctx, (META_RASTERIZATION |
- META_SHADER |
- META_TEXTURE |
- META_TRANSFORM |
- META_VERTEX |
- META_VIEWPORT));
+ _mesa_meta_begin(ctx, (MESA_META_RASTERIZATION |
+ MESA_META_SHADER |
+ MESA_META_TEXTURE |
+ MESA_META_TRANSFORM |
+ MESA_META_CLIP |
+ MESA_META_VERTEX |
+ MESA_META_VIEWPORT));
if (copypix->ArrayObj == 0) {
/* one-time setup */
@@ -1901,10 +2041,10 @@ _mesa_meta_DrawPixels(struct gl_context *ctx,
* in [0,1].
*/
texIntFormat = GL_ALPHA;
- metaExtraSave = (META_COLOR_MASK |
- META_DEPTH_TEST |
- META_SHADER |
- META_STENCIL_TEST);
+ metaExtraSave = (MESA_META_COLOR_MASK |
+ MESA_META_DEPTH_TEST |
+ MESA_META_SHADER |
+ MESA_META_STENCIL_TEST);
}
else {
fallback = GL_TRUE;
@@ -1914,7 +2054,7 @@ _mesa_meta_DrawPixels(struct gl_context *ctx,
if (ctx->Extensions.ARB_depth_texture &&
ctx->Extensions.ARB_fragment_program) {
texIntFormat = GL_DEPTH_COMPONENT;
- metaExtraSave = (META_SHADER);
+ metaExtraSave = (MESA_META_SHADER);
}
else {
fallback = GL_TRUE;
@@ -1942,13 +2082,14 @@ _mesa_meta_DrawPixels(struct gl_context *ctx,
/* Most GL state applies to glDrawPixels (like blending, stencil, etc),
* but a there's a few things we need to override:
*/
- _mesa_meta_begin(ctx, (META_RASTERIZATION |
- META_SHADER |
- META_TEXTURE |
- META_TRANSFORM |
- META_VERTEX |
- META_VIEWPORT |
- META_CLAMP_FRAGMENT_COLOR |
+ _mesa_meta_begin(ctx, (MESA_META_RASTERIZATION |
+ MESA_META_SHADER |
+ MESA_META_TEXTURE |
+ MESA_META_TRANSFORM |
+ MESA_META_CLIP |
+ MESA_META_VERTEX |
+ MESA_META_VIEWPORT |
+ MESA_META_CLAMP_FRAGMENT_COLOR |
metaExtraSave));
newTex = alloc_texture(tex, width, height, texIntFormat);
@@ -2149,14 +2290,15 @@ _mesa_meta_Bitmap(struct gl_context *ctx,
/* Most GL state applies to glBitmap (like blending, stencil, etc),
* but a there's a few things we need to override:
*/
- _mesa_meta_begin(ctx, (META_ALPHA_TEST |
- META_PIXEL_STORE |
- META_RASTERIZATION |
- META_SHADER |
- META_TEXTURE |
- META_TRANSFORM |
- META_VERTEX |
- META_VIEWPORT));
+ _mesa_meta_begin(ctx, (MESA_META_ALPHA_TEST |
+ MESA_META_PIXEL_STORE |
+ MESA_META_RASTERIZATION |
+ MESA_META_SHADER |
+ MESA_META_TEXTURE |
+ MESA_META_TRANSFORM |
+ MESA_META_CLIP |
+ MESA_META_VERTEX |
+ MESA_META_VIEWPORT));
if (bitmap->ArrayObj == 0) {
/* one-time setup */
@@ -2282,7 +2424,9 @@ _mesa_meta_check_generate_mipmap_fallback(struct gl_context *ctx, GLenum target,
/* check for fallbacks */
if (!ctx->Extensions.EXT_framebuffer_object ||
- target == GL_TEXTURE_3D) {
+ target == GL_TEXTURE_3D ||
+ target == GL_TEXTURE_1D_ARRAY ||
+ target == GL_TEXTURE_2D_ARRAY) {
return GL_TRUE;
}
@@ -2334,7 +2478,8 @@ _mesa_meta_check_generate_mipmap_fallback(struct gl_context *ctx, GLenum target,
/**
* Called via ctx->Driver.GenerateMipmap()
- * Note: texture borders and 3D texture support not yet complete.
+ * Note: We don't yet support 3D textures, 1D/2D array textures or texture
+ * borders.
*/
void
_mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target,
@@ -2374,7 +2519,7 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target,
faceTarget = target;
}
- _mesa_meta_begin(ctx, META_ALL);
+ _mesa_meta_begin(ctx, MESA_META_ALL);
if (original_active_unit != 0)
_mesa_BindTexture(target, texObj->Name);
@@ -2678,119 +2823,6 @@ get_temp_image_type(struct gl_context *ctx, GLenum baseFormat)
/**
- * Helper for _mesa_meta_CopyTexImage1/2D() functions.
- * Have to be careful with locking and meta state for pixel transfer.
- */
-static void
-copy_tex_image(struct gl_context *ctx, GLuint dims, GLenum target, GLint level,
- GLenum internalFormat, GLint x, GLint y,
- GLsizei width, GLsizei height, GLint border)
-{
- struct gl_texture_object *texObj;
- struct gl_texture_image *texImage;
- GLenum format, type;
- GLint bpp;
- void *buf;
- struct gl_renderbuffer *read_rb = ctx->ReadBuffer->_ColorReadBuffer;
-
- texObj = _mesa_get_current_tex_object(ctx, target);
- texImage = _mesa_get_tex_image(ctx, texObj, target, level);
-
- /* Choose format/type for temporary image buffer */
- format = _mesa_base_tex_format(ctx, internalFormat);
-
- if (format == GL_LUMINANCE &&
- _mesa_get_format_base_format(read_rb->Format) != GL_LUMINANCE) {
- /* The glReadPixels() path will convert RGB to luminance by
- * summing R+G+B. glCopyTexImage() is supposed to behave as
- * glCopyPixels, which doesn't do that change, and instead
- * leaves it up to glTexImage which converts RGB to luminance by
- * just taking the R channel. To avoid glReadPixels() trashing
- * our data, use RGBA for our temporary image.
- */
- format = GL_RGBA;
- }
-
- type = get_temp_image_type(ctx, format);
- bpp = _mesa_bytes_per_pixel(format, type);
- if (bpp <= 0) {
- _mesa_problem(ctx, "Bad bpp in meta copy_tex_image()");
- return;
- }
-
- /*
- * Alloc image buffer (XXX could use a PBO)
- */
- buf = malloc(width * height * bpp);
- if (!buf) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims);
- return;
- }
-
- _mesa_unlock_texture(ctx, texObj); /* need to unlock first */
-
- /*
- * Read image from framebuffer (disable pixel transfer ops)
- */
- _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
- ctx->Driver.ReadPixels(ctx, x, y, width, height,
- format, type, &ctx->Pack, buf);
- _mesa_meta_end(ctx);
-
- if (texImage->Data) {
- ctx->Driver.FreeTexImageData(ctx, texImage);
- }
-
- /* The texture's format was already chosen in _mesa_CopyTexImage() */
- ASSERT(texImage->TexFormat != MESA_FORMAT_NONE);
-
- /*
- * Store texture data (with pixel transfer ops)
- */
- _mesa_meta_begin(ctx, META_PIXEL_STORE);
-
- _mesa_update_state(ctx); /* to update pixel transfer state */
-
- if (target == GL_TEXTURE_1D) {
- ctx->Driver.TexImage1D(ctx, target, level, internalFormat,
- width, border, format, type,
- buf, &ctx->Unpack, texObj, texImage);
- }
- else {
- ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
- width, height, border, format, type,
- buf, &ctx->Unpack, texObj, texImage);
- }
- _mesa_meta_end(ctx);
-
- _mesa_lock_texture(ctx, texObj); /* re-lock */
-
- free(buf);
-}
-
-
-void
-_mesa_meta_CopyTexImage1D(struct gl_context *ctx, GLenum target, GLint level,
- GLenum internalFormat, GLint x, GLint y,
- GLsizei width, GLint border)
-{
- copy_tex_image(ctx, 1, target, level, internalFormat, x, y,
- width, 1, border);
-}
-
-
-void
-_mesa_meta_CopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level,
- GLenum internalFormat, GLint x, GLint y,
- GLsizei width, GLsizei height, GLint border)
-{
- copy_tex_image(ctx, 2, target, level, internalFormat, x, y,
- width, height, border);
-}
-
-
-
-/**
* Helper for _mesa_meta_CopyTexSubImage1/2/3D() functions.
* Have to be careful with locking and meta state for pixel transfer.
*/
@@ -2812,6 +2844,16 @@ copy_tex_sub_image(struct gl_context *ctx,
/* Choose format/type for temporary image buffer */
format = _mesa_get_format_base_format(texImage->TexFormat);
+ if (format == GL_LUMINANCE ||
+ format == GL_LUMINANCE_ALPHA ||
+ format == GL_INTENSITY) {
+ /* We don't want to use GL_LUMINANCE, GL_INTENSITY, etc. for the
+ * temp image buffer because glReadPixels will do L=R+G+B which is
+ * not what we want (should be L=R).
+ */
+ format = GL_RGBA;
+ }
+
type = get_temp_image_type(ctx, format);
bpp = _mesa_bytes_per_pixel(format, type);
if (bpp <= 0) {
@@ -2833,7 +2875,7 @@ copy_tex_sub_image(struct gl_context *ctx,
/*
* Read image from framebuffer (disable pixel transfer ops)
*/
- _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
+ _mesa_meta_begin(ctx, MESA_META_PIXEL_STORE | MESA_META_PIXEL_TRANSFER);
ctx->Driver.ReadPixels(ctx, x, y, width, height,
format, type, &ctx->Pack, buf);
_mesa_meta_end(ctx);
@@ -2843,7 +2885,7 @@ copy_tex_sub_image(struct gl_context *ctx,
/*
* Store texture data (with pixel transfer ops)
*/
- _mesa_meta_begin(ctx, META_PIXEL_STORE);
+ _mesa_meta_begin(ctx, MESA_META_PIXEL_STORE);
if (target == GL_TEXTURE_1D) {
ctx->Driver.TexSubImage1D(ctx, target, level, xoffset,
width, format, type, buf,
@@ -2915,7 +2957,7 @@ _mesa_meta_CopyColorTable(struct gl_context *ctx,
/*
* Read image from framebuffer (disable pixel transfer ops)
*/
- _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
+ _mesa_meta_begin(ctx, MESA_META_PIXEL_STORE | MESA_META_PIXEL_TRANSFER);
ctx->Driver.ReadPixels(ctx, x, y, width, 1,
GL_RGBA, GL_FLOAT, &ctx->Pack, buf);
@@ -2942,7 +2984,7 @@ _mesa_meta_CopyColorSubTable(struct gl_context *ctx,GLenum target, GLsizei start
/*
* Read image from framebuffer (disable pixel transfer ops)
*/
- _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
+ _mesa_meta_begin(ctx, MESA_META_PIXEL_STORE | MESA_META_PIXEL_TRANSFER);
ctx->Driver.ReadPixels(ctx, x, y, width, 1,
GL_RGBA, GL_FLOAT, &ctx->Pack, buf);
diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h
index b0797d3d91a..ac20e370eb8 100644
--- a/src/mesa/drivers/common/meta.h
+++ b/src/mesa/drivers/common/meta.h
@@ -26,6 +26,33 @@
#ifndef META_H
#define META_H
+/**
+ * \name Flags for meta operations
+ * \{
+ *
+ * These flags are passed to _mesa_meta_begin().
+ */
+#define MESA_META_ALL ~0x0
+#define MESA_META_ALPHA_TEST 0x1
+#define MESA_META_BLEND 0x2 /**< includes logicop */
+#define MESA_META_COLOR_MASK 0x4
+#define MESA_META_DEPTH_TEST 0x8
+#define MESA_META_FOG 0x10
+#define MESA_META_PIXEL_STORE 0x20
+#define MESA_META_PIXEL_TRANSFER 0x40
+#define MESA_META_RASTERIZATION 0x80
+#define MESA_META_SCISSOR 0x100
+#define MESA_META_SHADER 0x200
+#define MESA_META_STENCIL_TEST 0x400
+#define MESA_META_TRANSFORM 0x800 /**< modelview/projection matrix state */
+#define MESA_META_TEXTURE 0x1000
+#define MESA_META_VERTEX 0x2000
+#define MESA_META_VIEWPORT 0x4000
+#define MESA_META_CLAMP_FRAGMENT_COLOR 0x8000
+#define MESA_META_CLAMP_VERTEX_COLOR 0x10000
+#define MESA_META_CONDITIONAL_RENDER 0x20000
+#define MESA_META_CLIP 0x40000
+/**\}*/
extern void
_mesa_meta_init(struct gl_context *ctx);
@@ -34,6 +61,12 @@ extern void
_mesa_meta_free(struct gl_context *ctx);
extern void
+_mesa_meta_begin(struct gl_context *ctx, GLbitfield state);
+
+extern void
+_mesa_meta_end(struct gl_context *ctx);
+
+extern void
_mesa_meta_BlitFramebuffer(struct gl_context *ctx,
GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
@@ -43,6 +76,9 @@ extern void
_mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers);
extern void
+_mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers);
+
+extern void
_mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
GLsizei width, GLsizei height,
GLint dstx, GLint dsty, GLenum type);
@@ -69,16 +105,6 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target,
struct gl_texture_object *texObj);
extern void
-_mesa_meta_CopyTexImage1D(struct gl_context *ctx, GLenum target, GLint level,
- GLenum internalFormat, GLint x, GLint y,
- GLsizei width, GLint border);
-
-extern void
-_mesa_meta_CopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level,
- GLenum internalFormat, GLint x, GLint y,
- GLsizei width, GLsizei height, GLint border);
-
-extern void
_mesa_meta_CopyTexSubImage1D(struct gl_context *ctx, GLenum target, GLint level,
GLint xoffset,
GLint x, GLint y, GLsizei width);
diff --git a/src/mesa/drivers/dri/common/xmlconfig.c b/src/mesa/drivers/dri/common/xmlconfig.c
index 77967ac2a43..12dd31bb162 100644
--- a/src/mesa/drivers/dri/common/xmlconfig.c
+++ b/src/mesa/drivers/dri/common/xmlconfig.c
@@ -567,7 +567,7 @@ static void parseOptInfoAttr (struct OptInfoData *data, const XML_Char **attr) {
} else
defaultVal = attrVal[OA_DEFAULT];
if (!parseValue (&cache->values[opt], cache->info[opt].type, defaultVal))
- XML_FATAL ("illegal default value: %s.", defaultVal);
+ XML_FATAL ("illegal default value for %s: %s.", cache->info[opt].name, defaultVal);
if (attrVal[OA_VALID]) {
if (cache->info[opt].type == DRI_BOOL)
diff --git a/src/mesa/drivers/dri/common/xmlpool.h b/src/mesa/drivers/dri/common/xmlpool.h
index 587517ea10a..ffea430024d 100644
--- a/src/mesa/drivers/dri/common/xmlpool.h
+++ b/src/mesa/drivers/dri/common/xmlpool.h
@@ -60,7 +60,7 @@
#define DRI_CONF_OPT_BEGIN(name,type,def) \
"<option name=\""#name"\" type=\""#type"\" default=\""#def"\">\n"
-/** \brief Begin an option definition with qouted default value */
+/** \brief Begin an option definition with quoted default value */
#define DRI_CONF_OPT_BEGIN_Q(name,type,def) \
"<option name=\""#name"\" type=\""#type"\" default="#def">\n"
diff --git a/src/mesa/drivers/dri/common/xmlpool/options.h b/src/mesa/drivers/dri/common/xmlpool/options.h
index d76595578c7..1e584ba086a 100644
--- a/src/mesa/drivers/dri/common/xmlpool/options.h
+++ b/src/mesa/drivers/dri/common/xmlpool/options.h
@@ -425,6 +425,66 @@ DRI_CONF_OPT_BEGIN(hyperz,bool,def) \
DRI_CONF_DESC(sv,"Använd HyperZ för att maximera prestandan") \
DRI_CONF_OPT_END
+#define DRI_CONF_PP_CELSHADE(def) \
+DRI_CONF_OPT_BEGIN_V(pp_celshade,enum,def,"0:1") \
+ DRI_CONF_DESC(en,"A post-processing filter to cel-shade the output") \
+ DRI_CONF_DESC(de,"A post-processing filter to cel-shade the output") \
+ DRI_CONF_DESC(es,"A post-processing filter to cel-shade the output") \
+ DRI_CONF_DESC(nl,"A post-processing filter to cel-shade the output") \
+ DRI_CONF_DESC(fr,"A post-processing filter to cel-shade the output") \
+ DRI_CONF_DESC(sv,"A post-processing filter to cel-shade the output") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NORED(def) \
+DRI_CONF_OPT_BEGIN_V(pp_nored,enum,def,"0:1") \
+ DRI_CONF_DESC(en,"A post-processing filter to remove the red channel") \
+ DRI_CONF_DESC(de,"A post-processing filter to remove the red channel") \
+ DRI_CONF_DESC(es,"A post-processing filter to remove the red channel") \
+ DRI_CONF_DESC(nl,"A post-processing filter to remove the red channel") \
+ DRI_CONF_DESC(fr,"A post-processing filter to remove the red channel") \
+ DRI_CONF_DESC(sv,"A post-processing filter to remove the red channel") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NOGREEN(def) \
+DRI_CONF_OPT_BEGIN_V(pp_nogreen,enum,def,"0:1") \
+ DRI_CONF_DESC(en,"A post-processing filter to remove the green channel") \
+ DRI_CONF_DESC(de,"A post-processing filter to remove the green channel") \
+ DRI_CONF_DESC(es,"A post-processing filter to remove the green channel") \
+ DRI_CONF_DESC(nl,"A post-processing filter to remove the green channel") \
+ DRI_CONF_DESC(fr,"A post-processing filter to remove the green channel") \
+ DRI_CONF_DESC(sv,"A post-processing filter to remove the green channel") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NOBLUE(def) \
+DRI_CONF_OPT_BEGIN_V(pp_noblue,enum,def,"0:1") \
+ DRI_CONF_DESC(en,"A post-processing filter to remove the blue channel") \
+ DRI_CONF_DESC(de,"A post-processing filter to remove the blue channel") \
+ DRI_CONF_DESC(es,"A post-processing filter to remove the blue channel") \
+ DRI_CONF_DESC(nl,"A post-processing filter to remove the blue channel") \
+ DRI_CONF_DESC(fr,"A post-processing filter to remove the blue channel") \
+ DRI_CONF_DESC(sv,"A post-processing filter to remove the blue channel") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_JIMENEZMLAA(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa,int,def, # min ":" # max ) \
+ DRI_CONF_DESC(en,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+ DRI_CONF_DESC(de,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+ DRI_CONF_DESC(es,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+ DRI_CONF_DESC(nl,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+ DRI_CONF_DESC(fr,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+ DRI_CONF_DESC(sv,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_JIMENEZMLAA_COLOR(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa_color,int,def, # min ":" # max ) \
+ DRI_CONF_DESC(en,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+ DRI_CONF_DESC(de,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+ DRI_CONF_DESC(es,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+ DRI_CONF_DESC(nl,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+ DRI_CONF_DESC(fr,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+ DRI_CONF_DESC(sv,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+DRI_CONF_OPT_END
+
#define DRI_CONF_MAX_TEXTURE_UNITS(def,min,max) \
DRI_CONF_OPT_BEGIN_V(texture_units,int,def, # min ":" # max ) \
DRI_CONF_DESC(en,"Number of texture units used") \
diff --git a/src/mesa/drivers/dri/common/xmlpool/t_options.h b/src/mesa/drivers/dri/common/xmlpool/t_options.h
index 5fd6ec65bf8..2427aa77f5b 100644
--- a/src/mesa/drivers/dri/common/xmlpool/t_options.h
+++ b/src/mesa/drivers/dri/common/xmlpool/t_options.h
@@ -191,6 +191,36 @@ DRI_CONF_OPT_BEGIN(hyperz,bool,def) \
DRI_CONF_DESC(en,gettext("Use HyperZ to boost performance")) \
DRI_CONF_OPT_END
+#define DRI_CONF_PP_CELSHADE(def) \
+DRI_CONF_OPT_BEGIN_V(pp_celshade,enum,def,"0:1") \
+ DRI_CONF_DESC(en,gettext("A post-processing filter to cel-shade the output")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NORED(def) \
+DRI_CONF_OPT_BEGIN_V(pp_nored,enum,def,"0:1") \
+ DRI_CONF_DESC(en,gettext("A post-processing filter to remove the red channel")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NOGREEN(def) \
+DRI_CONF_OPT_BEGIN_V(pp_nogreen,enum,def,"0:1") \
+ DRI_CONF_DESC(en,gettext("A post-processing filter to remove the green channel")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NOBLUE(def) \
+DRI_CONF_OPT_BEGIN_V(pp_noblue,enum,def,"0:1") \
+ DRI_CONF_DESC(en,gettext("A post-processing filter to remove the blue channel")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_JIMENEZMLAA(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa,int,def, # min ":" # max ) \
+ DRI_CONF_DESC(en,gettext("Morphological anti-aliasing based on Jimenez\\\' MLAA. 0 to disable, 8 for default quality")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_JIMENEZMLAA_COLOR(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa_color,int,def, # min ":" # max ) \
+ DRI_CONF_DESC(en,gettext("Morphological anti-aliasing based on Jimenez\\\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps")) \
+DRI_CONF_OPT_END
+
#define DRI_CONF_MAX_TEXTURE_UNITS(def,min,max) \
DRI_CONF_OPT_BEGIN_V(texture_units,int,def, # min ":" # max ) \
DRI_CONF_DESC(en,gettext("Number of texture units used")) \
diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c
index 6d43726beb1..ed5286fd7d9 100644
--- a/src/mesa/drivers/dri/i915/i830_vtbl.c
+++ b/src/mesa/drivers/dri/i915/i830_vtbl.c
@@ -881,6 +881,12 @@ i830_invalidate_state(struct intel_context *intel, GLuint new_state)
i830_update_provoking_vertex(&intel->ctx);
}
+static bool
+i830_is_hiz_depth_format(struct intel_context *intel, gl_format format)
+{
+ return false;
+}
+
void
i830InitVtbl(struct i830_context *i830)
{
@@ -898,4 +904,5 @@ i830InitVtbl(struct i830_context *i830)
i830->intel.vtbl.finish_batch = intel_finish_vb;
i830->intel.vtbl.invalidate_state = i830_invalidate_state;
i830->intel.vtbl.render_target_supported = i830_render_target_supported;
+ i830->intel.vtbl.is_hiz_depth_format = i830_is_hiz_depth_format;
}
diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c
index 6e1d7092237..d155b85ffca 100644
--- a/src/mesa/drivers/dri/i915/i915_fragprog.c
+++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
@@ -175,10 +175,8 @@ src_vector(struct i915_fragment_program *p,
case PROGRAM_STATE_VAR:
case PROGRAM_NAMED_PARAM:
case PROGRAM_UNIFORM:
- src =
- i915_emit_param4fv(p,
- program->Base.Parameters->ParameterValues[source->
- Index]);
+ src = i915_emit_param4fv(p,
+ &program->Base.Parameters->ParameterValues[source->Index][0].f);
break;
default:
@@ -303,7 +301,7 @@ do { \
/*
* TODO: consider moving this into core
*/
-static void calc_live_regs( struct i915_fragment_program *p )
+static bool calc_live_regs( struct i915_fragment_program *p )
{
const struct gl_fragment_program *program = &p->FragProg;
GLuint regsUsed = 0xffff0000;
@@ -317,6 +315,9 @@ static void calc_live_regs( struct i915_fragment_program *p )
/* Register is written to: unmark as live for this and preceeding ops */
if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+ if (inst->DstReg.Index > 16)
+ return false;
+
live_components[inst->DstReg.Index] &= ~inst->DstReg.WriteMask;
if (live_components[inst->DstReg.Index] == 0)
regsUsed &= ~(1 << inst->DstReg.Index);
@@ -327,6 +328,9 @@ static void calc_live_regs( struct i915_fragment_program *p )
if (inst->SrcReg[a].File == PROGRAM_TEMPORARY) {
unsigned c;
+ if (inst->SrcReg[a].Index > 16)
+ return false;
+
regsUsed |= 1 << inst->SrcReg[a].Index;
for (c = 0; c < 4; c++) {
@@ -340,6 +344,8 @@ static void calc_live_regs( struct i915_fragment_program *p )
p->usedRegs[i] = regsUsed;
}
+
+ return true;
}
static GLuint get_live_regs( struct i915_fragment_program *p,
@@ -394,7 +400,10 @@ upload_program(struct i915_fragment_program *p)
/* Not always needed:
*/
- calc_live_regs(p);
+ if (!calc_live_regs(p)) {
+ i915_program_error(p, "Could not allocate registers");
+ return;
+ }
while (1) {
GLuint src0, src1, src2, flags;
diff --git a/src/mesa/drivers/dri/i915/i915_program.c b/src/mesa/drivers/dri/i915/i915_program.c
index ca1949b223e..0a600d30bef 100644
--- a/src/mesa/drivers/dri/i915/i915_program.c
+++ b/src/mesa/drivers/dri/i915/i915_program.c
@@ -442,14 +442,16 @@ i915_emit_param4fv(struct i915_fragment_program * p, const GLfloat * values)
void
i915_program_error(struct i915_fragment_program *p, const char *fmt, ...)
{
- va_list args;
+ if (unlikely((INTEL_DEBUG & (DEBUG_WM | DEBUG_FALLBACKS)) != 0)) {
+ va_list args;
- fprintf(stderr, "i915_program_error: ");
- va_start(args, fmt);
- vfprintf(stderr, fmt, args);
- va_end(args);
+ fprintf(stderr, "i915_program_error: ");
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
- fprintf(stderr, "\n");
+ fprintf(stderr, "\n");
+ }
p->error = 1;
}
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index 44f28cd9d15..d9c885da65b 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -124,7 +124,11 @@ CXX_SOURCES = \
brw_fs_reg_allocate.cpp \
brw_fs_schedule_instructions.cpp \
brw_fs_vector_splitting.cpp \
- brw_shader.cpp
+ brw_shader.cpp \
+ brw_vec4.cpp \
+ brw_vec4_emit.cpp \
+ brw_vec4_reg_allocate.cpp \
+ brw_vec4_visitor.cpp
ASM_SOURCES =
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 471015cf9d0..df63fe1d52c 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -212,6 +212,7 @@ enum state_struct_type {
AUB_TRACE_BINDING_TABLE = 0x101,
AUB_TRACE_SURFACE_STATE = 0x102,
AUB_TRACE_VS_CONSTANTS = 0x103,
+ AUB_TRACE_WM_CONSTANTS = 0x104,
};
/** Subclass of Mesa vertex program */
@@ -247,6 +248,7 @@ enum param_conversion {
PARAM_CONVERT_F2I,
PARAM_CONVERT_F2U,
PARAM_CONVERT_F2B,
+ PARAM_CONVERT_ZERO,
};
/* Data about a particular attempt to compile a program. Note that
@@ -310,12 +312,20 @@ struct brw_vs_prog_data {
GLuint total_grf;
GLbitfield64 outputs_written;
GLuint nr_params; /**< number of float params/constants */
+ GLuint total_scratch;
GLuint inputs_read;
/* Used for calculating urb partitions:
*/
GLuint urb_entry_size;
+
+ const float *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */
+ enum param_conversion param_convert[MAX_UNIFORMS * 4];
+ const float *pull_param[MAX_UNIFORMS * 4];
+ enum param_conversion pull_param_convert[MAX_UNIFORMS * 4];
+
+ bool uses_new_param_layout;
};
@@ -528,7 +538,7 @@ struct brw_context
* the CURBE, the depth buffer, and a query BO.
*/
drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + BRW_WM_MAX_SURF + 16];
- int validated_bo_count;
+ unsigned int validated_bo_count;
} state;
struct brw_cache cache;
@@ -662,6 +672,7 @@ struct brw_context
struct brw_vs_prog_data *prog_data;
int8_t *constant_map; /* variable array following prog_data */
+ drm_intel_bo *scratch_bo;
drm_intel_bo *const_bo;
/** Offset in the program cache to the VS program */
uint32_t prog_offset;
@@ -674,6 +685,23 @@ struct brw_context
uint32_t push_const_offset; /* Offset in the batchbuffer */
int push_const_size; /* in 256-bit register increments */
+
+ /** @{ register allocator */
+
+ struct ra_regs *regs;
+
+ /**
+ * Array of the ra classes for the unaligned contiguous register
+ * block sizes used.
+ */
+ int *classes;
+
+ /**
+ * Mapping for register-allocated objects in *regs to the first
+ * GRF for that object.
+ */
+ uint8_t *ra_reg_to_grf;
+ /** @} */
} vs;
struct {
@@ -726,7 +754,6 @@ struct brw_context
GLuint render_surf;
GLuint nr_surfaces;
- GLuint max_threads;
drm_intel_bo *scratch_bo;
GLuint sampler_count;
@@ -747,6 +774,29 @@ struct brw_context
* Pre-gen6, push constants live in the CURBE.
*/
uint32_t push_const_offset;
+
+ /** @{ register allocator */
+
+ struct ra_regs *regs;
+
+ /** Array of the ra classes for the unaligned contiguous
+ * register block sizes used.
+ */
+ int *classes;
+
+ /**
+ * Mapping for register-allocated objects in *regs to the first
+ * GRF for that object.
+ */
+ uint8_t *ra_reg_to_grf;
+
+ /**
+ * ra class for the aligned pairs we use for PLN, which doesn't
+ * appear in *classes.
+ */
+ int aligned_pairs_class;
+
+ /** @} */
} wm;
@@ -827,6 +877,10 @@ void brw_validate_textures( struct brw_context *brw );
*/
void brwInitFragProgFuncs( struct dd_function_table *functions );
+int brw_get_scratch_size(int size);
+void brw_get_scratch_bo(struct intel_context *intel,
+ drm_intel_bo **scratch_bo, int size);
+
/* brw_urb.c
*/
@@ -874,7 +928,7 @@ brw_fragment_program_const(const struct gl_fragment_program *p)
}
static inline
-float convert_param(enum param_conversion conversion, float param)
+float convert_param(enum param_conversion conversion, const float *param)
{
union {
float f;
@@ -884,21 +938,23 @@ float convert_param(enum param_conversion conversion, float param)
switch (conversion) {
case PARAM_NO_CONVERT:
- return param;
+ return *param;
case PARAM_CONVERT_F2I:
- fi.i = param;
+ fi.i = *param;
return fi.f;
case PARAM_CONVERT_F2U:
- fi.u = param;
+ fi.u = *param;
return fi.f;
case PARAM_CONVERT_F2B:
- if (param != 0.0)
+ if (*param != 0.0)
fi.i = 1;
else
fi.i = 0;
return fi.f;
+ case PARAM_CONVERT_ZERO:
+ return 0.0;
default:
- return param;
+ return *param;
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index ae11c487a2c..960be10006e 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -203,7 +203,7 @@ static void prepare_constant_buffer(struct brw_context *brw)
/* copy float constants */
for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
buf[offset + i] = convert_param(brw->wm.prog_data->param_convert[i],
- *brw->wm.prog_data->param[i]);
+ brw->wm.prog_data->param[i]);
}
}
@@ -244,15 +244,22 @@ static void prepare_constant_buffer(struct brw_context *brw)
GLuint offset = brw->curbe.vs_start * 16;
GLuint nr = brw->vs.prog_data->nr_params / 4;
- /* Load the subset of push constants that will get used when
- * we also have a pull constant buffer.
- */
- for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
- if (brw->vs.constant_map[i] != -1) {
- assert(brw->vs.constant_map[i] <= nr);
- memcpy(buf + offset + brw->vs.constant_map[i] * 4,
- vp->program.Base.Parameters->ParameterValues[i],
- 4 * sizeof(float));
+ if (brw->vs.prog_data->uses_new_param_layout) {
+ for (i = 0; i < brw->vs.prog_data->nr_params; i++) {
+ buf[offset + i] = convert_param(brw->vs.prog_data->param_convert[i],
+ brw->vs.prog_data->param[i]);
+ }
+ } else {
+ /* Load the subset of push constants that will get used when
+ * we also have a pull constant buffer.
+ */
+ for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
+ if (brw->vs.constant_map[i] != -1) {
+ assert(brw->vs.constant_map[i] <= nr);
+ memcpy(buf + offset + brw->vs.constant_map[i] * 4,
+ vp->program.Base.Parameters->ParameterValues[i],
+ 4 * sizeof(float));
+ }
}
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 0a3027d04ad..d1799c0ab4f 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -557,58 +557,93 @@
#define BRW_WE_ALL 1
/** @} */
-#define BRW_OPCODE_MOV 1
-#define BRW_OPCODE_SEL 2
-#define BRW_OPCODE_NOT 4
-#define BRW_OPCODE_AND 5
-#define BRW_OPCODE_OR 6
-#define BRW_OPCODE_XOR 7
-#define BRW_OPCODE_SHR 8
-#define BRW_OPCODE_SHL 9
-#define BRW_OPCODE_RSR 10
-#define BRW_OPCODE_RSL 11
-#define BRW_OPCODE_ASR 12
-#define BRW_OPCODE_CMP 16
-#define BRW_OPCODE_CMPN 17
-#define BRW_OPCODE_JMPI 32
-#define BRW_OPCODE_IF 34
-#define BRW_OPCODE_IFF 35
-#define BRW_OPCODE_ELSE 36
-#define BRW_OPCODE_ENDIF 37
-#define BRW_OPCODE_DO 38
-#define BRW_OPCODE_WHILE 39
-#define BRW_OPCODE_BREAK 40
-#define BRW_OPCODE_CONTINUE 41
-#define BRW_OPCODE_HALT 42
-#define BRW_OPCODE_MSAVE 44
-#define BRW_OPCODE_MRESTORE 45
-#define BRW_OPCODE_PUSH 46
-#define BRW_OPCODE_POP 47
-#define BRW_OPCODE_WAIT 48
-#define BRW_OPCODE_SEND 49
-#define BRW_OPCODE_SENDC 50
-#define BRW_OPCODE_MATH 56
-#define BRW_OPCODE_ADD 64
-#define BRW_OPCODE_MUL 65
-#define BRW_OPCODE_AVG 66
-#define BRW_OPCODE_FRC 67
-#define BRW_OPCODE_RNDU 68
-#define BRW_OPCODE_RNDD 69
-#define BRW_OPCODE_RNDE 70
-#define BRW_OPCODE_RNDZ 71
-#define BRW_OPCODE_MAC 72
-#define BRW_OPCODE_MACH 73
-#define BRW_OPCODE_LZD 74
-#define BRW_OPCODE_SAD2 80
-#define BRW_OPCODE_SADA2 81
-#define BRW_OPCODE_DP4 84
-#define BRW_OPCODE_DPH 85
-#define BRW_OPCODE_DP3 86
-#define BRW_OPCODE_DP2 87
-#define BRW_OPCODE_DPA2 88
-#define BRW_OPCODE_LINE 89
-#define BRW_OPCODE_PLN 90
-#define BRW_OPCODE_NOP 126
+enum opcode {
+ /* These are the actual hardware opcodes. */
+ BRW_OPCODE_MOV = 1,
+ BRW_OPCODE_SEL = 2,
+ BRW_OPCODE_NOT = 4,
+ BRW_OPCODE_AND = 5,
+ BRW_OPCODE_OR = 6,
+ BRW_OPCODE_XOR = 7,
+ BRW_OPCODE_SHR = 8,
+ BRW_OPCODE_SHL = 9,
+ BRW_OPCODE_RSR = 10,
+ BRW_OPCODE_RSL = 11,
+ BRW_OPCODE_ASR = 12,
+ BRW_OPCODE_CMP = 16,
+ BRW_OPCODE_CMPN = 17,
+ BRW_OPCODE_JMPI = 32,
+ BRW_OPCODE_IF = 34,
+ BRW_OPCODE_IFF = 35,
+ BRW_OPCODE_ELSE = 36,
+ BRW_OPCODE_ENDIF = 37,
+ BRW_OPCODE_DO = 38,
+ BRW_OPCODE_WHILE = 39,
+ BRW_OPCODE_BREAK = 40,
+ BRW_OPCODE_CONTINUE = 41,
+ BRW_OPCODE_HALT = 42,
+ BRW_OPCODE_MSAVE = 44,
+ BRW_OPCODE_MRESTORE = 45,
+ BRW_OPCODE_PUSH = 46,
+ BRW_OPCODE_POP = 47,
+ BRW_OPCODE_WAIT = 48,
+ BRW_OPCODE_SEND = 49,
+ BRW_OPCODE_SENDC = 50,
+ BRW_OPCODE_MATH = 56,
+ BRW_OPCODE_ADD = 64,
+ BRW_OPCODE_MUL = 65,
+ BRW_OPCODE_AVG = 66,
+ BRW_OPCODE_FRC = 67,
+ BRW_OPCODE_RNDU = 68,
+ BRW_OPCODE_RNDD = 69,
+ BRW_OPCODE_RNDE = 70,
+ BRW_OPCODE_RNDZ = 71,
+ BRW_OPCODE_MAC = 72,
+ BRW_OPCODE_MACH = 73,
+ BRW_OPCODE_LZD = 74,
+ BRW_OPCODE_SAD2 = 80,
+ BRW_OPCODE_SADA2 = 81,
+ BRW_OPCODE_DP4 = 84,
+ BRW_OPCODE_DPH = 85,
+ BRW_OPCODE_DP3 = 86,
+ BRW_OPCODE_DP2 = 87,
+ BRW_OPCODE_DPA2 = 88,
+ BRW_OPCODE_LINE = 89,
+ BRW_OPCODE_PLN = 90,
+ BRW_OPCODE_NOP = 126,
+
+ /* These are compiler backend opcodes that get translated into other
+ * instructions.
+ */
+ FS_OPCODE_FB_WRITE = 128,
+ SHADER_OPCODE_RCP,
+ SHADER_OPCODE_RSQ,
+ SHADER_OPCODE_SQRT,
+ SHADER_OPCODE_EXP2,
+ SHADER_OPCODE_LOG2,
+ SHADER_OPCODE_POW,
+ SHADER_OPCODE_SIN,
+ SHADER_OPCODE_COS,
+ FS_OPCODE_DDX,
+ FS_OPCODE_DDY,
+ FS_OPCODE_PIXEL_X,
+ FS_OPCODE_PIXEL_Y,
+ FS_OPCODE_CINTERP,
+ FS_OPCODE_LINTERP,
+ FS_OPCODE_TEX,
+ FS_OPCODE_TXB,
+ FS_OPCODE_TXD,
+ FS_OPCODE_TXL,
+ FS_OPCODE_TXS,
+ FS_OPCODE_DISCARD,
+ FS_OPCODE_SPILL,
+ FS_OPCODE_UNSPILL,
+ FS_OPCODE_PULL_CONSTANT_LOAD,
+
+ VS_OPCODE_URB_WRITE,
+ VS_OPCODE_SCRATCH_READ,
+ VS_OPCODE_SCRATCH_WRITE,
+};
#define BRW_PREDICATE_NONE 0
#define BRW_PREDICATE_NORMAL 1
@@ -734,7 +769,6 @@
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1
#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
-#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2
#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2
#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3
#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3
@@ -747,6 +781,7 @@
#define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS 4
#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5
#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE 6
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10
/* for GEN5 only */
#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index af41c848308..927b0b4acc9 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -309,6 +309,35 @@ char *target_function[16] = {
[BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner"
};
+char *target_function_gen6[16] = {
+ [BRW_MESSAGE_TARGET_NULL] = "null",
+ [BRW_MESSAGE_TARGET_MATH] = "math",
+ [BRW_MESSAGE_TARGET_SAMPLER] = "sampler",
+ [BRW_MESSAGE_TARGET_GATEWAY] = "gateway",
+ [GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE] = "sampler",
+ [GEN6_MESSAGE_TARGET_DP_RENDER_CACHE] = "render",
+ [GEN6_MESSAGE_TARGET_DP_CONST_CACHE] = "const",
+ [BRW_MESSAGE_TARGET_URB] = "urb",
+ [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner"
+};
+
+char *dp_rc_msg_type_gen6[16] = {
+ [BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ] = "OWORD block read",
+ [GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ] = "RT UNORM read",
+ [GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ] = "OWORD dual block read",
+ [GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ] = "media block read",
+ [GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ] = "OWORD unaligned block read",
+ [GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ] = "DWORD scattered read",
+ [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE] = "DWORD atomic write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE] = "OWORD block write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE] = "OWORD dual block write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE] = "media block write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE] = "DWORD scattered write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE] = "RT write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE] = "streamed VB write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE] = "RT UNORMc write",
+};
+
char *math_function[16] = {
[BRW_MATH_FUNCTION_INV] = "inv",
[BRW_MATH_FUNCTION_LOG] = "log",
@@ -927,8 +956,14 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
newline (file);
pad (file, 16);
space = 0;
- err |= control (file, "target function", target_function,
- target, &space);
+
+ if (gen >= 6) {
+ err |= control (file, "target function", target_function_gen6,
+ target, &space);
+ } else {
+ err |= control (file, "target function", target_function,
+ target, &space);
+ }
switch (target) {
case BRW_MESSAGE_TARGET_MATH:
@@ -985,9 +1020,16 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
inst->bits3.dp_read.msg_type);
}
break;
+
case BRW_MESSAGE_TARGET_DATAPORT_WRITE:
if (gen >= 6) {
- format (file, " (%d, %d, %d, %d, %d, %d)",
+ format (file, " (");
+
+ err |= control (file, "DP rc message type",
+ dp_rc_msg_type_gen6,
+ inst->bits3.gen6_dp.msg_type, &space);
+
+ format (file, ", %d, %d, %d, %d, %d, %d)",
inst->bits3.gen6_dp.binding_table_index,
inst->bits3.gen6_dp.msg_control,
inst->bits3.gen6_dp.msg_type,
@@ -1003,6 +1045,7 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
inst->bits3.dp_write.send_commit_msg);
}
break;
+
case BRW_MESSAGE_TARGET_URB:
if (gen >= 5) {
format (file, " %d", inst->bits3.urb_gen5.offset);
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 56a46ced6e3..7bc69c612e3 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -689,17 +689,17 @@ static void brw_prepare_indices(struct brw_context *brw)
* rebase it into a temporary.
*/
if ((get_size(index_buffer->type) - 1) & offset) {
- GLubyte *map = ctx->Driver.MapBuffer(ctx,
- GL_ELEMENT_ARRAY_BUFFER_ARB,
- GL_DYNAMIC_DRAW_ARB,
- bufferobj);
- map += offset;
+ GLubyte *map = ctx->Driver.MapBufferRange(ctx,
+ offset,
+ ib_size,
+ GL_MAP_WRITE_BIT,
+ bufferobj);
intel_upload_data(&brw->intel, map, ib_size, ib_type_size,
&bo, &offset);
brw->ib.start_vertex_offset = offset / ib_type_size;
- ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj);
+ ctx->Driver.UnmapBuffer(ctx, bufferobj);
} else {
/* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading
* the index buffer state when we're just moving the start index
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 72d50eadbce..af50305fc2b 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -44,6 +44,9 @@
#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
+#define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1)
+#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2)
+#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3)
#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
@@ -798,6 +801,12 @@ void brw_init_compile(struct brw_context *, struct brw_compile *p,
void *mem_ctx);
const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz );
+struct brw_instruction *brw_next_insn(struct brw_compile *p, GLuint opcode);
+void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
+ struct brw_reg dest);
+void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
+ struct brw_reg reg);
+
/* Helpers for regular instructions:
*/
@@ -852,6 +861,27 @@ ROUND(RNDE)
/* Helpers for SEND instruction:
*/
+void brw_set_dp_read_message(struct brw_compile *p,
+ struct brw_instruction *insn,
+ GLuint binding_table_index,
+ GLuint msg_control,
+ GLuint msg_type,
+ GLuint target_cache,
+ GLuint msg_length,
+ GLuint response_length);
+
+void brw_set_dp_write_message(struct brw_compile *p,
+ struct brw_instruction *insn,
+ GLuint binding_table_index,
+ GLuint msg_control,
+ GLuint msg_type,
+ GLuint msg_length,
+ GLboolean header_present,
+ GLuint pixel_scoreboard_clear,
+ GLuint response_length,
+ GLuint end_of_thread,
+ GLuint send_commit_msg);
+
void brw_urb_WRITE(struct brw_compile *p,
struct brw_reg dest,
GLuint msg_reg_nr,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index e7370f36064..c5013de7ec1 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -89,9 +89,9 @@ gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
}
-static void brw_set_dest(struct brw_compile *p,
- struct brw_instruction *insn,
- struct brw_reg dest)
+void
+brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
+ struct brw_reg dest)
{
if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
dest.file != BRW_MESSAGE_REGISTER_FILE)
@@ -221,9 +221,9 @@ validate_reg(struct brw_instruction *insn, struct brw_reg reg)
/* 10. Check destination issues. */
}
-static void brw_set_src0(struct brw_compile *p,
- struct brw_instruction *insn,
- struct brw_reg reg)
+void
+brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
+ struct brw_reg reg)
{
if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
assert(reg.nr < 128);
@@ -504,17 +504,18 @@ static void brw_set_urb_message( struct brw_compile *p,
}
}
-static void brw_set_dp_write_message( struct brw_compile *p,
- struct brw_instruction *insn,
- GLuint binding_table_index,
- GLuint msg_control,
- GLuint msg_type,
- GLuint msg_length,
- GLboolean header_present,
- GLuint pixel_scoreboard_clear,
- GLuint response_length,
- GLuint end_of_thread,
- GLuint send_commit_msg)
+void
+brw_set_dp_write_message(struct brw_compile *p,
+ struct brw_instruction *insn,
+ GLuint binding_table_index,
+ GLuint msg_control,
+ GLuint msg_type,
+ GLuint msg_length,
+ GLboolean header_present,
+ GLuint pixel_scoreboard_clear,
+ GLuint response_length,
+ GLuint end_of_thread,
+ GLuint send_commit_msg)
{
struct brw_context *brw = p->brw;
struct intel_context *intel = &brw->intel;
@@ -570,7 +571,7 @@ static void brw_set_dp_write_message( struct brw_compile *p,
}
}
-static void
+void
brw_set_dp_read_message(struct brw_compile *p,
struct brw_instruction *insn,
GLuint binding_table_index,
@@ -709,9 +710,9 @@ static void brw_set_sampler_message(struct brw_compile *p,
}
-
-static struct brw_instruction *next_insn( struct brw_compile *p,
- GLuint opcode )
+#define next_insn brw_next_insn
+struct brw_instruction *
+brw_next_insn(struct brw_compile *p, GLuint opcode)
{
struct brw_instruction *insn;
@@ -732,7 +733,6 @@ static struct brw_instruction *next_insn( struct brw_compile *p,
return insn;
}
-
static struct brw_instruction *brw_alu1( struct brw_compile *p,
GLuint opcode,
struct brw_reg dest,
@@ -1341,8 +1341,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
brw_set_src1(p, insn, brw_imm_ud(0));
insn->bits3.break_cont.jip = br * (do_insn - insn);
- insn->header.execution_size = do_insn->header.execution_size;
- assert(insn->header.execution_size == BRW_EXECUTE_8);
+ insn->header.execution_size = BRW_EXECUTE_8;
} else if (intel->gen == 6) {
insn = next_insn(p, BRW_OPCODE_WHILE);
@@ -1351,8 +1350,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- insn->header.execution_size = do_insn->header.execution_size;
- assert(insn->header.execution_size == BRW_EXECUTE_8);
+ insn->header.execution_size = BRW_EXECUTE_8;
} else {
if (p->single_program_flow) {
insn = next_insn(p, BRW_OPCODE_ADD);
@@ -2246,10 +2244,13 @@ void brw_urb_WRITE(struct brw_compile *p,
if (intel->gen == 7) {
/* Enable Channel Masks in the URB_WRITE_HWORD message header */
+ brw_push_insn_state(p);
+ brw_set_access_mode(p, BRW_ALIGN_1);
brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
BRW_REGISTER_TYPE_UD),
retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
brw_imm_ud(0xff00));
+ brw_pop_insn_state(p);
}
insn = next_insn(p, BRW_OPCODE_SEND);
@@ -2311,7 +2312,7 @@ brw_find_loop_end(struct brw_compile *p, int start)
if (insn->header.opcode == BRW_OPCODE_WHILE) {
int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
: insn->bits3.break_cont.jip;
- if (ip + jip / br < start)
+ if (ip + jip / br <= start)
return ip;
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index b5ea943387d..0b0445ea142 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -143,20 +143,21 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
return 0;
switch (inst->opcode) {
- case FS_OPCODE_RCP:
- case FS_OPCODE_RSQ:
- case FS_OPCODE_SQRT:
- case FS_OPCODE_EXP2:
- case FS_OPCODE_LOG2:
- case FS_OPCODE_SIN:
- case FS_OPCODE_COS:
+ case SHADER_OPCODE_RCP:
+ case SHADER_OPCODE_RSQ:
+ case SHADER_OPCODE_SQRT:
+ case SHADER_OPCODE_EXP2:
+ case SHADER_OPCODE_LOG2:
+ case SHADER_OPCODE_SIN:
+ case SHADER_OPCODE_COS:
return 1 * c->dispatch_width / 8;
- case FS_OPCODE_POW:
+ case SHADER_OPCODE_POW:
return 2 * c->dispatch_width / 8;
case FS_OPCODE_TEX:
case FS_OPCODE_TXB:
case FS_OPCODE_TXD:
case FS_OPCODE_TXL:
+ case FS_OPCODE_TXS:
return 1;
case FS_OPCODE_FB_WRITE:
return 2;
@@ -181,29 +182,26 @@ fs_visitor::virtual_grf_alloc(int size)
virtual_grf_array_size *= 2;
virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
virtual_grf_array_size);
-
- /* This slot is always unused. */
- virtual_grf_sizes[0] = 0;
}
virtual_grf_sizes[virtual_grf_next] = size;
return virtual_grf_next++;
}
/** Fixed HW reg constructor. */
-fs_reg::fs_reg(enum register_file file, int hw_reg)
+fs_reg::fs_reg(enum register_file file, int reg)
{
init();
this->file = file;
- this->hw_reg = hw_reg;
+ this->reg = reg;
this->type = BRW_REGISTER_TYPE_F;
}
/** Fixed HW reg constructor. */
-fs_reg::fs_reg(enum register_file file, int hw_reg, uint32_t type)
+fs_reg::fs_reg(enum register_file file, int reg, uint32_t type)
{
init();
this->file = file;
- this->hw_reg = hw_reg;
+ this->reg = reg;
this->type = type;
}
@@ -242,11 +240,12 @@ import_uniforms_callback(const void *key,
* This brings in those uniform definitions
*/
void
-fs_visitor::import_uniforms(struct hash_table *src_variable_ht)
+fs_visitor::import_uniforms(fs_visitor *v)
{
- hash_table_call_foreach(src_variable_ht,
+ hash_table_call_foreach(v->variable_ht,
import_uniforms_callback,
variable_ht);
+ this->params_remap = v->params_remap;
}
/* Our support for uniforms is piggy-backed on the struct
@@ -281,23 +280,27 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
assert(param < ARRAY_SIZE(c->prog_data.param));
- switch (type->base_type) {
- case GLSL_TYPE_FLOAT:
+ if (ctx->Const.NativeIntegers) {
c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
- break;
- case GLSL_TYPE_UINT:
- c->prog_data.param_convert[param] = PARAM_CONVERT_F2U;
- break;
- case GLSL_TYPE_INT:
- c->prog_data.param_convert[param] = PARAM_CONVERT_F2I;
- break;
- case GLSL_TYPE_BOOL:
- c->prog_data.param_convert[param] = PARAM_CONVERT_F2B;
- break;
- default:
- assert(!"not reached");
- c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
- break;
+ } else {
+ switch (type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
+ break;
+ case GLSL_TYPE_UINT:
+ c->prog_data.param_convert[param] = PARAM_CONVERT_F2U;
+ break;
+ case GLSL_TYPE_INT:
+ c->prog_data.param_convert[param] = PARAM_CONVERT_F2I;
+ break;
+ case GLSL_TYPE_BOOL:
+ c->prog_data.param_convert[param] = PARAM_CONVERT_F2B;
+ break;
+ default:
+ assert(!"not reached");
+ c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
+ break;
+ }
}
this->param_index[param] = loc;
this->param_offset[param] = i;
@@ -463,9 +466,21 @@ fs_visitor::emit_general_interpolation(ir_variable *ir)
} else {
/* Perspective interpolation case. */
for (unsigned int k = 0; k < type->vector_elements; k++) {
- struct brw_reg interp = interp_reg(location, k);
- emit(FS_OPCODE_LINTERP, attr,
- this->delta_x, this->delta_y, fs_reg(interp));
+ /* FINISHME: At some point we probably want to push
+ * this farther by giving similar treatment to the
+ * other potentially constant components of the
+ * attribute, as well as making brw_vs_constval.c
+ * handle varyings other than gl_TexCoord.
+ */
+ if (location >= FRAG_ATTRIB_TEX0 &&
+ location <= FRAG_ATTRIB_TEX7 &&
+ k == 3 && !(c->key.proj_attrib_mask & (1 << location))) {
+ emit(BRW_OPCODE_MOV, attr, fs_reg(1.0f));
+ } else {
+ struct brw_reg interp = interp_reg(location, k);
+ emit(FS_OPCODE_LINTERP, attr,
+ this->delta_x, this->delta_y, fs_reg(interp));
+ }
attr.reg_offset++;
}
@@ -512,16 +527,16 @@ fs_visitor::emit_frontfacing_interpolation(ir_variable *ir)
}
fs_inst *
-fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src)
+fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src)
{
switch (opcode) {
- case FS_OPCODE_RCP:
- case FS_OPCODE_RSQ:
- case FS_OPCODE_SQRT:
- case FS_OPCODE_EXP2:
- case FS_OPCODE_LOG2:
- case FS_OPCODE_SIN:
- case FS_OPCODE_COS:
+ case SHADER_OPCODE_RCP:
+ case SHADER_OPCODE_RSQ:
+ case SHADER_OPCODE_SQRT:
+ case SHADER_OPCODE_EXP2:
+ case SHADER_OPCODE_LOG2:
+ case SHADER_OPCODE_SIN:
+ case SHADER_OPCODE_COS:
break;
default:
assert(!"not reached: bad math opcode");
@@ -555,12 +570,12 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src)
}
fs_inst *
-fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1)
+fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
{
int base_mrf = 2;
fs_inst *inst;
- assert(opcode == FS_OPCODE_POW);
+ assert(opcode == SHADER_OPCODE_POW);
if (intel->gen >= 6) {
/* Can't do hstride == 0 args to gen6 math, so expand it out.
@@ -605,7 +620,7 @@ fs_visitor::setup_paramvalues_refs()
/* Set up the pointers to ParamValues now that that array is finalized. */
for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
c->prog_data.param[i] =
- fp->Base.Parameters->ParameterValues[this->param_index[i]] +
+ (const float *)fp->Base.Parameters->ParameterValues[this->param_index[i]] +
this->param_offset[i];
}
}
@@ -621,12 +636,12 @@ fs_visitor::assign_curb_setup()
}
/* Map the offsets in the UNIFORM file to fixed HW regs. */
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
for (unsigned int i = 0; i < 3; i++) {
if (inst->src[i].file == UNIFORM) {
- int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+ int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
struct brw_reg brw_reg = brw_vec1_grf(c->nr_payload_regs +
constant_nr / 8,
constant_nr % 8);
@@ -684,8 +699,8 @@ fs_visitor::assign_urb_setup()
/* Offset all the urb_setup[] index by the actual position of the
* setup regs, now that the location of the constants has been chosen.
*/
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
if (inst->opcode == FS_OPCODE_LINTERP) {
assert(inst->src[2].file == FIXED_HW_REG);
@@ -739,8 +754,8 @@ fs_visitor::split_virtual_grfs()
split_grf[this->delta_x.reg] = false;
}
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
/* Texturing produces 4 contiguous registers, so no splitting. */
if (inst->is_tex()) {
@@ -763,8 +778,8 @@ fs_visitor::split_virtual_grfs()
}
}
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
if (inst->dst.file == GRF &&
split_grf[inst->dst.reg] &&
@@ -786,6 +801,86 @@ fs_visitor::split_virtual_grfs()
this->live_intervals_valid = false;
}
+bool
+fs_visitor::remove_dead_constants()
+{
+ if (c->dispatch_width == 8) {
+ this->params_remap = ralloc_array(mem_ctx, int, c->prog_data.nr_params);
+
+ for (unsigned int i = 0; i < c->prog_data.nr_params; i++)
+ this->params_remap[i] = -1;
+
+ /* Find which params are still in use. */
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
+
+ for (int i = 0; i < 3; i++) {
+ int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
+
+ if (inst->src[i].file != UNIFORM)
+ continue;
+
+ assert(constant_nr < (int)c->prog_data.nr_params);
+
+ /* For now, set this to non-negative. We'll give it the
+ * actual new number in a moment, in order to keep the
+ * register numbers nicely ordered.
+ */
+ this->params_remap[constant_nr] = 0;
+ }
+ }
+
+ /* Figure out what the new numbers for the params will be. At some
+ * point when we're doing uniform array access, we're going to want
+ * to keep the distinction between .reg and .reg_offset, but for
+ * now we don't care.
+ */
+ unsigned int new_nr_params = 0;
+ for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
+ if (this->params_remap[i] != -1) {
+ this->params_remap[i] = new_nr_params++;
+ }
+ }
+
+ /* Update the list of params to be uploaded to match our new numbering. */
+ for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
+ int remapped = this->params_remap[i];
+
+ if (remapped == -1)
+ continue;
+
+ /* We've already done setup_paramvalues_refs() so no need to worry
+ * about param_index and param_offset.
+ */
+ c->prog_data.param[remapped] = c->prog_data.param[i];
+ c->prog_data.param_convert[remapped] = c->prog_data.param_convert[i];
+ }
+
+ c->prog_data.nr_params = new_nr_params;
+ } else {
+ /* This should have been generated in the 8-wide pass already. */
+ assert(this->params_remap);
+ }
+
+ /* Now do the renumbering of the shader to remove unused params. */
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
+
+ for (int i = 0; i < 3; i++) {
+ int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
+
+ if (inst->src[i].file != UNIFORM)
+ continue;
+
+ assert(this->params_remap[constant_nr] != -1);
+ inst->src[i].reg = this->params_remap[constant_nr];
+ inst->src[i].reg_offset = 0;
+ }
+ }
+
+ return true;
+}
+
/**
* Choose accesses from the UNIFORM file to demote to using the pull
* constant buffer.
@@ -815,14 +910,14 @@ fs_visitor::setup_pull_constants()
int pull_uniform_base = max_uniform_components;
int pull_uniform_count = c->prog_data.nr_params - pull_uniform_base;
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
for (int i = 0; i < 3; i++) {
if (inst->src[i].file != UNIFORM)
continue;
- int uniform_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+ int uniform_nr = inst->src[i].reg + inst->src[i].reg_offset;
if (uniform_nr < pull_uniform_base)
continue;
@@ -871,8 +966,8 @@ fs_visitor::calculate_live_intervals()
}
int ip = 0;
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
if (inst->opcode == BRW_OPCODE_DO) {
if (loop_depth++ == 0)
@@ -892,7 +987,7 @@ fs_visitor::calculate_live_intervals()
}
} else {
for (unsigned int i = 0; i < 3; i++) {
- if (inst->src[i].file == GRF && inst->src[i].reg != 0) {
+ if (inst->src[i].file == GRF) {
int reg = inst->src[i].reg;
if (!loop_depth) {
@@ -908,7 +1003,7 @@ fs_visitor::calculate_live_intervals()
}
}
}
- if (inst->dst.file == GRF && inst->dst.reg != 0) {
+ if (inst->dst.file == GRF) {
int reg = inst->dst.reg;
if (!loop_depth) {
@@ -945,8 +1040,8 @@ fs_visitor::propagate_constants()
calculate_live_intervals();
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
if (inst->opcode != BRW_OPCODE_MOV ||
inst->predicated ||
@@ -965,11 +1060,9 @@ fs_visitor::propagate_constants()
/* Found a move of a constant to a GRF. Find anything else using the GRF
* before it's written, and replace it with the constant if we can.
*/
- exec_list_iterator scan_iter = iter;
- scan_iter.next();
- for (; scan_iter.has_next(); scan_iter.next()) {
- fs_inst *scan_inst = (fs_inst *)scan_iter.get();
-
+ for (fs_inst *scan_inst = (fs_inst *)inst->next;
+ !scan_inst->is_tail_sentinel();
+ scan_inst = (fs_inst *)scan_inst->next) {
if (scan_inst->opcode == BRW_OPCODE_DO ||
scan_inst->opcode == BRW_OPCODE_WHILE ||
scan_inst->opcode == BRW_OPCODE_ELSE ||
@@ -1046,6 +1139,24 @@ fs_visitor::propagate_constants()
progress = true;
}
break;
+
+ case SHADER_OPCODE_RCP:
+ /* The hardware doesn't do math on immediate values
+ * (because why are you doing that, seriously?), but
+ * the correct answer is to just constant fold it
+ * anyway.
+ */
+ assert(i == 0);
+ if (inst->src[0].imm.f != 0.0f) {
+ scan_inst->opcode = BRW_OPCODE_MOV;
+ scan_inst->src[0] = inst->src[0];
+ scan_inst->src[0].imm.f = 1.0f / scan_inst->src[0].imm.f;
+ progress = true;
+ }
+ break;
+
+ default:
+ break;
}
}
@@ -1063,6 +1174,49 @@ fs_visitor::propagate_constants()
return progress;
}
+
+
+/**
+ * Attempts to move immediate constants into the immediate
+ * constant slot of following instructions.
+ *
+ * Immediate constants are a bit tricky -- they have to be in the last
+ * operand slot, you can't do abs/negate on them,
+ */
+
+bool
+fs_visitor::opt_algebraic()
+{
+ bool progress = false;
+
+ calculate_live_intervals();
+
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
+
+ switch (inst->opcode) {
+ case BRW_OPCODE_MUL:
+ if (inst->src[1].file != IMM)
+ continue;
+
+ /* a * 1.0 = a */
+ if (inst->src[1].type == BRW_REGISTER_TYPE_F &&
+ inst->src[1].imm.f == 1.0) {
+ inst->opcode = BRW_OPCODE_MOV;
+ inst->src[1] = reg_undef;
+ progress = true;
+ break;
+ }
+
+ break;
+ default:
+ break;
+ }
+ }
+
+ return progress;
+}
+
/**
* Must be called after calculate_live_intervales() to remove unused
* writes to registers -- register allocation will fail otherwise
@@ -1077,8 +1231,8 @@ fs_visitor::dead_code_eliminate()
calculate_live_intervals();
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list_safe(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) {
inst->remove();
@@ -1101,8 +1255,8 @@ fs_visitor::register_coalesce()
int if_depth = 0;
int loop_depth = 0;
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list_safe(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
/* Make sure that we dominate the instructions we're going to
* scan for interfering with our coalescing, or we won't have
@@ -1123,6 +1277,8 @@ fs_visitor::register_coalesce()
case BRW_OPCODE_ENDIF:
if_depth--;
break;
+ default:
+ break;
}
if (loop_depth || if_depth)
continue;
@@ -1130,7 +1286,8 @@ fs_visitor::register_coalesce()
if (inst->opcode != BRW_OPCODE_MOV ||
inst->predicated ||
inst->saturate ||
- inst->dst.file != GRF || inst->src[0].file != GRF ||
+ inst->dst.file != GRF || (inst->src[0].file != GRF &&
+ inst->src[0].file != UNIFORM)||
inst->dst.type != inst->src[0].type)
continue;
@@ -1141,11 +1298,10 @@ fs_visitor::register_coalesce()
* program.
*/
bool interfered = false;
- exec_list_iterator scan_iter = iter;
- scan_iter.next();
- for (; scan_iter.has_next(); scan_iter.next()) {
- fs_inst *scan_inst = (fs_inst *)scan_iter.get();
+ for (fs_inst *scan_inst = (fs_inst *)inst->next;
+ !scan_inst->is_tail_sentinel();
+ scan_inst = (fs_inst *)scan_inst->next) {
if (scan_inst->dst.file == GRF) {
if (scan_inst->dst.reg == inst->dst.reg &&
(scan_inst->dst.reg_offset == inst->dst.reg_offset ||
@@ -1153,7 +1309,8 @@ fs_visitor::register_coalesce()
interfered = true;
break;
}
- if (scan_inst->dst.reg == inst->src[0].reg &&
+ if (inst->src[0].file == GRF &&
+ scan_inst->dst.reg == inst->src[0].reg &&
(scan_inst->dst.reg_offset == inst->src[0].reg_offset ||
scan_inst->is_tex())) {
interfered = true;
@@ -1161,10 +1318,13 @@ fs_visitor::register_coalesce()
}
}
- /* The gen6 MATH instruction can't handle source modifiers, so avoid
- * coalescing those for now. We should do something more specific.
+ /* The gen6 MATH instruction can't handle source modifiers or
+ * unusual register regions, so avoid coalescing those for
+ * now. We should do something more specific.
*/
- if (intel->gen >= 6 && scan_inst->is_math() && has_source_modifiers) {
+ if (intel->gen >= 6 &&
+ scan_inst->is_math() &&
+ (has_source_modifiers || inst->src[0].file == UNIFORM)) {
interfered = true;
break;
}
@@ -1176,19 +1336,17 @@ fs_visitor::register_coalesce()
/* Rewrite the later usage to point at the source of the move to
* be removed.
*/
- for (exec_list_iterator scan_iter = iter; scan_iter.has_next();
- scan_iter.next()) {
- fs_inst *scan_inst = (fs_inst *)scan_iter.get();
-
+ for (fs_inst *scan_inst = inst;
+ !scan_inst->is_tail_sentinel();
+ scan_inst = (fs_inst *)scan_inst->next) {
for (int i = 0; i < 3; i++) {
if (scan_inst->src[i].file == GRF &&
scan_inst->src[i].reg == inst->dst.reg &&
scan_inst->src[i].reg_offset == inst->dst.reg_offset) {
- scan_inst->src[i].reg = inst->src[0].reg;
- scan_inst->src[i].reg_offset = inst->src[0].reg_offset;
- scan_inst->src[i].abs |= inst->src[0].abs;
- scan_inst->src[i].negate ^= inst->src[0].negate;
- scan_inst->src[i].smear = inst->src[0].smear;
+ fs_reg new_src = inst->src[0];
+ new_src.negate ^= scan_inst->src[i].negate;
+ new_src.abs |= scan_inst->src[i].abs;
+ scan_inst->src[i] = new_src;
}
}
}
@@ -1212,8 +1370,8 @@ fs_visitor::compute_to_mrf()
calculate_live_intervals();
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list_safe(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
int ip = next_ip;
next_ip++;
@@ -1228,9 +1386,9 @@ fs_visitor::compute_to_mrf()
/* Work out which hardware MRF registers are written by this
* instruction.
*/
- int mrf_low = inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+ int mrf_low = inst->dst.reg & ~BRW_MRF_COMPR4;
int mrf_high;
- if (inst->dst.hw_reg & BRW_MRF_COMPR4) {
+ if (inst->dst.reg & BRW_MRF_COMPR4) {
mrf_high = mrf_low + 4;
} else if (c->dispatch_width == 16 &&
(!inst->force_uncompressed && !inst->force_sechalf)) {
@@ -1297,7 +1455,7 @@ fs_visitor::compute_to_mrf()
if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
/* Found the creator of our MRF's source value. */
scan_inst->dst.file = MRF;
- scan_inst->dst.hw_reg = inst->dst.hw_reg;
+ scan_inst->dst.reg = inst->dst.reg;
scan_inst->saturate |= inst->saturate;
inst->remove();
progress = true;
@@ -1334,10 +1492,10 @@ fs_visitor::compute_to_mrf()
/* If somebody else writes our MRF here, we can't
* compute-to-MRF before that.
*/
- int scan_mrf_low = scan_inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+ int scan_mrf_low = scan_inst->dst.reg & ~BRW_MRF_COMPR4;
int scan_mrf_high;
- if (scan_inst->dst.hw_reg & BRW_MRF_COMPR4) {
+ if (scan_inst->dst.reg & BRW_MRF_COMPR4) {
scan_mrf_high = scan_mrf_low + 4;
} else if (c->dispatch_width == 16 &&
(!scan_inst->force_uncompressed &&
@@ -1392,8 +1550,8 @@ fs_visitor::remove_duplicate_mrf_writes()
memset(last_mrf_move, 0, sizeof(last_mrf_move));
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list_safe(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
switch (inst->opcode) {
case BRW_OPCODE_DO:
@@ -1409,7 +1567,7 @@ fs_visitor::remove_duplicate_mrf_writes()
if (inst->opcode == BRW_OPCODE_MOV &&
inst->dst.file == MRF) {
- fs_inst *prev_inst = last_mrf_move[inst->dst.hw_reg];
+ fs_inst *prev_inst = last_mrf_move[inst->dst.reg];
if (prev_inst && inst->equals(prev_inst)) {
inst->remove();
progress = true;
@@ -1419,7 +1577,7 @@ fs_visitor::remove_duplicate_mrf_writes()
/* Clear out the last-write records for MRFs that were overwritten. */
if (inst->dst.file == MRF) {
- last_mrf_move[inst->dst.hw_reg] = NULL;
+ last_mrf_move[inst->dst.reg] = NULL;
}
if (inst->mlen > 0) {
@@ -1445,7 +1603,7 @@ fs_visitor::remove_duplicate_mrf_writes()
inst->dst.file == MRF &&
inst->src[0].file == GRF &&
!inst->predicated) {
- last_mrf_move[inst->dst.hw_reg] = inst;
+ last_mrf_move[inst->dst.reg] = inst;
}
}
@@ -1527,8 +1685,8 @@ fs_visitor::run()
/* Generate FS IR for main(). (the visitor only descends into
* functions called "main").
*/
- foreach_iter(exec_list_iterator, iter, *shader->ir) {
- ir_instruction *ir = (ir_instruction *)iter.get();
+ foreach_list(node, &*shader->ir) {
+ ir_instruction *ir = (ir_instruction *)node;
base_ir = ir;
this->result = reg_undef;
ir->accept(this);
@@ -1550,11 +1708,14 @@ fs_visitor::run()
progress = remove_duplicate_mrf_writes() || progress;
progress = propagate_constants() || progress;
+ progress = opt_algebraic() || progress;
progress = register_coalesce() || progress;
progress = compute_to_mrf() || progress;
progress = dead_code_eliminate() || progress;
} while (progress);
+ remove_dead_constants();
+
schedule_instructions();
assign_curb_setup();
@@ -1563,7 +1724,7 @@ fs_visitor::run()
if (0) {
/* Debug of register spilling: Go spill everything. */
int virtual_grf_count = virtual_grf_next;
- for (int i = 1; i < virtual_grf_count; i++) {
+ for (int i = 0; i < virtual_grf_count; i++) {
spill_reg(i);
}
}
@@ -1625,7 +1786,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
fs_visitor v(c, prog, shader);
if (!v.run()) {
prog->LinkStatus = GL_FALSE;
- prog->InfoLog = ralloc_strdup(prog, v.fail_msg);
+ ralloc_strcat(&prog->InfoLog, v.fail_msg);
return false;
}
@@ -1633,7 +1794,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
if (intel->gen >= 5 && c->prog_data.nr_pull_params == 0) {
c->dispatch_width = 16;
fs_visitor v2(c, prog, shader);
- v2.import_uniforms(v.variable_ht);
+ v2.import_uniforms(&v);
v2.run();
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 2bf850e5dea..10f45f30fe9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -25,6 +25,8 @@
*
*/
+#include "brw_shader.h"
+
extern "C" {
#include <sys/types.h>
@@ -51,37 +53,10 @@ enum register_file {
MRF = BRW_MESSAGE_REGISTER_FILE,
IMM = BRW_IMMEDIATE_VALUE,
FIXED_HW_REG, /* a struct brw_reg */
- UNIFORM, /* prog_data->params[hw_reg] */
+ UNIFORM, /* prog_data->params[reg] */
BAD_FILE
};
-enum fs_opcodes {
- FS_OPCODE_FB_WRITE = 256,
- FS_OPCODE_RCP,
- FS_OPCODE_RSQ,
- FS_OPCODE_SQRT,
- FS_OPCODE_EXP2,
- FS_OPCODE_LOG2,
- FS_OPCODE_POW,
- FS_OPCODE_SIN,
- FS_OPCODE_COS,
- FS_OPCODE_DDX,
- FS_OPCODE_DDY,
- FS_OPCODE_PIXEL_X,
- FS_OPCODE_PIXEL_Y,
- FS_OPCODE_CINTERP,
- FS_OPCODE_LINTERP,
- FS_OPCODE_TEX,
- FS_OPCODE_TXB,
- FS_OPCODE_TXD,
- FS_OPCODE_TXL,
- FS_OPCODE_DISCARD,
- FS_OPCODE_SPILL,
- FS_OPCODE_UNSPILL,
- FS_OPCODE_PULL_CONSTANT_LOAD,
-};
-
-
class fs_reg {
public:
/* Callers of this ralloc-based new need not call delete. It's
@@ -99,7 +74,6 @@ public:
void init()
{
memset(this, 0, sizeof(*this));
- this->hw_reg = -1;
this->smear = -1;
}
@@ -146,8 +120,8 @@ public:
this->type = fixed_hw_reg.type;
}
- fs_reg(enum register_file file, int hw_reg);
- fs_reg(enum register_file file, int hw_reg, uint32_t type);
+ fs_reg(enum register_file file, int reg);
+ fs_reg(enum register_file file, int reg, uint32_t type);
fs_reg(class fs_visitor *v, const struct glsl_type *type);
bool equals(fs_reg *r)
@@ -155,7 +129,6 @@ public:
return (file == r->file &&
reg == r->reg &&
reg_offset == r->reg_offset &&
- hw_reg == r->hw_reg &&
type == r->type &&
negate == r->negate &&
abs == r->abs &&
@@ -167,12 +140,17 @@ public:
/** Register file: ARF, GRF, MRF, IMM. */
enum register_file file;
- /** virtual register number. 0 = fixed hw reg */
+ /**
+ * Register number. For ARF/MRF, it's the hardware register. For
+ * GRF, it's a virtual register number until register allocation
+ */
int reg;
- /** Offset within the virtual register. */
+ /**
+ * For virtual registers, this is a hardware register offset from
+ * the start of the register block (for example, a constant index
+ * in an array access).
+ */
int reg_offset;
- /** HW register number. Generally unset until register allocation. */
- int hw_reg;
/** Register type. BRW_REGISTER_TYPE_* */
int type;
bool negate;
@@ -224,13 +202,13 @@ public:
init();
}
- fs_inst(int opcode)
+ fs_inst(enum opcode opcode)
{
init();
this->opcode = opcode;
}
- fs_inst(int opcode, fs_reg dst)
+ fs_inst(enum opcode opcode, fs_reg dst)
{
init();
this->opcode = opcode;
@@ -240,7 +218,7 @@ public:
assert(dst.reg_offset >= 0);
}
- fs_inst(int opcode, fs_reg dst, fs_reg src0)
+ fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0)
{
init();
this->opcode = opcode;
@@ -253,7 +231,7 @@ public:
assert(src[0].reg_offset >= 0);
}
- fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
+ fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
{
init();
this->opcode = opcode;
@@ -269,7 +247,7 @@ public:
assert(src[1].reg_offset >= 0);
}
- fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
+ fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
{
init();
this->opcode = opcode;
@@ -313,22 +291,23 @@ public:
return (opcode == FS_OPCODE_TEX ||
opcode == FS_OPCODE_TXB ||
opcode == FS_OPCODE_TXD ||
- opcode == FS_OPCODE_TXL);
+ opcode == FS_OPCODE_TXL ||
+ opcode == FS_OPCODE_TXS);
}
bool is_math()
{
- return (opcode == FS_OPCODE_RCP ||
- opcode == FS_OPCODE_RSQ ||
- opcode == FS_OPCODE_SQRT ||
- opcode == FS_OPCODE_EXP2 ||
- opcode == FS_OPCODE_LOG2 ||
- opcode == FS_OPCODE_SIN ||
- opcode == FS_OPCODE_COS ||
- opcode == FS_OPCODE_POW);
+ return (opcode == SHADER_OPCODE_RCP ||
+ opcode == SHADER_OPCODE_RSQ ||
+ opcode == SHADER_OPCODE_SQRT ||
+ opcode == SHADER_OPCODE_EXP2 ||
+ opcode == SHADER_OPCODE_LOG2 ||
+ opcode == SHADER_OPCODE_SIN ||
+ opcode == SHADER_OPCODE_COS ||
+ opcode == SHADER_OPCODE_POW);
}
- int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
+ enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
fs_reg dst;
fs_reg src[3];
bool saturate;
@@ -402,7 +381,7 @@ public:
this->base_ir = NULL;
this->virtual_grf_sizes = NULL;
- this->virtual_grf_next = 1;
+ this->virtual_grf_next = 0;
this->virtual_grf_array_size = 0;
this->virtual_grf_def = NULL;
this->virtual_grf_use = NULL;
@@ -421,7 +400,7 @@ public:
fs_reg *variable_storage(ir_variable *var);
int virtual_grf_alloc(int size);
- void import_uniforms(struct hash_table *src_variable_ht);
+ void import_uniforms(fs_visitor *v);
void visit(ir_variable *ir);
void visit(ir_assignment *ir);
@@ -445,27 +424,28 @@ public:
fs_inst *emit(fs_inst inst);
- fs_inst *emit(int opcode)
+ fs_inst *emit(enum opcode opcode)
{
return emit(fs_inst(opcode));
}
- fs_inst *emit(int opcode, fs_reg dst)
+ fs_inst *emit(enum opcode opcode, fs_reg dst)
{
return emit(fs_inst(opcode, dst));
}
- fs_inst *emit(int opcode, fs_reg dst, fs_reg src0)
+ fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0)
{
return emit(fs_inst(opcode, dst, src0));
}
- fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
+ fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
{
return emit(fs_inst(opcode, dst, src0, src1));
}
- fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
+ fs_inst *emit(enum opcode opcode, fs_reg dst,
+ fs_reg src0, fs_reg src1, fs_reg src2)
{
return emit(fs_inst(opcode, dst, src0, src1, src2));
}
@@ -485,9 +465,11 @@ public:
void setup_pull_constants();
void calculate_live_intervals();
bool propagate_constants();
+ bool opt_algebraic();
bool register_coalesce();
bool compute_to_mrf();
bool dead_code_eliminate();
+ bool remove_dead_constants();
bool remove_duplicate_mrf_writes();
bool virtual_grf_interferes(int a, int b);
void schedule_instructions();
@@ -524,8 +506,8 @@ public:
int sampler);
fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
int sampler);
- fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0);
- fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0, fs_reg src1);
+ fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0);
+ fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1);
bool try_emit_saturate(ir_expression *ir);
void emit_bool_to_cond_code(ir_rvalue *condition);
void emit_if_gen6(ir_if *ir);
@@ -565,6 +547,13 @@ public:
int *virtual_grf_use;
bool live_intervals_valid;
+ /* This is the map from UNIFORM hw_reg + reg_offset as generated by
+ * the visitor to the packed uniform number after
+ * remove_dead_constants() that represents the actual uploaded
+ * uniform index.
+ */
+ int *params_remap;
+
struct hash_table *variable_ht;
ir_variable *frag_color, *frag_data, *frag_depth;
int first_non_payload_grf;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 1d89b8f1d11..28efbd3605f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -59,7 +59,8 @@ fs_visitor::generate_fb_write(fs_inst *inst)
if (inst->target > 0) {
/* Set the render target index for choosing BLEND_STATE. */
- brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2),
+ brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+ inst->base_mrf, 2),
BRW_REGISTER_TYPE_UD),
brw_imm_ud(inst->target));
}
@@ -145,43 +146,12 @@ void
fs_visitor::generate_math(fs_inst *inst,
struct brw_reg dst, struct brw_reg *src)
{
- int op;
-
- switch (inst->opcode) {
- case FS_OPCODE_RCP:
- op = BRW_MATH_FUNCTION_INV;
- break;
- case FS_OPCODE_RSQ:
- op = BRW_MATH_FUNCTION_RSQ;
- break;
- case FS_OPCODE_SQRT:
- op = BRW_MATH_FUNCTION_SQRT;
- break;
- case FS_OPCODE_EXP2:
- op = BRW_MATH_FUNCTION_EXP;
- break;
- case FS_OPCODE_LOG2:
- op = BRW_MATH_FUNCTION_LOG;
- break;
- case FS_OPCODE_POW:
- op = BRW_MATH_FUNCTION_POW;
- break;
- case FS_OPCODE_SIN:
- op = BRW_MATH_FUNCTION_SIN;
- break;
- case FS_OPCODE_COS:
- op = BRW_MATH_FUNCTION_COS;
- break;
- default:
- assert(!"not reached: unknown math function");
- op = 0;
- break;
- }
+ int op = brw_math_function(inst->opcode);
if (intel->gen >= 6) {
assert(inst->mlen == 0);
- if (inst->opcode == FS_OPCODE_POW) {
+ if (inst->opcode == SHADER_OPCODE_POW) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math2(p, dst, op, src[0], src[1]);
@@ -272,10 +242,16 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
}
break;
+ case FS_OPCODE_TXS:
+ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
+ break;
case FS_OPCODE_TXD:
/* There is no sample_d_c message; comparisons are done manually */
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
break;
+ default:
+ assert(!"not reached");
+ break;
}
} else {
switch (inst->opcode) {
@@ -316,6 +292,14 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
assert(inst->mlen == 7 || inst->mlen == 10);
msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS;
break;
+ case FS_OPCODE_TXS:
+ assert(inst->mlen == 3);
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_RESINFO;
+ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+ break;
+ default:
+ assert(!"not reached");
+ break;
}
}
assert(msg_type != -1);
@@ -537,11 +521,9 @@ brw_reg_from_fs_reg(fs_reg *reg)
case ARF:
case MRF:
if (reg->smear == -1) {
- brw_reg = brw_vec8_reg(reg->file,
- reg->hw_reg, 0);
+ brw_reg = brw_vec8_reg(reg->file, reg->reg, 0);
} else {
- brw_reg = brw_vec1_reg(reg->file,
- reg->hw_reg, reg->smear);
+ brw_reg = brw_vec1_reg(reg->file, reg->reg, reg->smear);
}
brw_reg = retype(brw_reg, reg->type);
if (reg->sechalf)
@@ -608,8 +590,8 @@ fs_visitor::generate_code()
prog->Name, c->dispatch_width);
}
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
struct brw_reg src[3], dst;
if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
@@ -656,6 +638,11 @@ fs_visitor::generate_code()
case BRW_OPCODE_MUL:
brw_MUL(p, dst, src[0], src[1]);
break;
+ case BRW_OPCODE_MACH:
+ brw_set_acc_write_control(p, 1);
+ brw_MACH(p, dst, src[0], src[1]);
+ brw_set_acc_write_control(p, 0);
+ break;
case BRW_OPCODE_FRC:
brw_FRC(p, dst, src[0]);
@@ -770,14 +757,14 @@ fs_visitor::generate_code()
}
break;
- case FS_OPCODE_RCP:
- case FS_OPCODE_RSQ:
- case FS_OPCODE_SQRT:
- case FS_OPCODE_EXP2:
- case FS_OPCODE_LOG2:
- case FS_OPCODE_POW:
- case FS_OPCODE_SIN:
- case FS_OPCODE_COS:
+ case SHADER_OPCODE_RCP:
+ case SHADER_OPCODE_RSQ:
+ case SHADER_OPCODE_SQRT:
+ case SHADER_OPCODE_EXP2:
+ case SHADER_OPCODE_LOG2:
+ case SHADER_OPCODE_POW:
+ case SHADER_OPCODE_SIN:
+ case SHADER_OPCODE_COS:
generate_math(inst, dst, src);
break;
case FS_OPCODE_PIXEL_X:
@@ -796,6 +783,7 @@ fs_visitor::generate_code()
case FS_OPCODE_TXB:
case FS_OPCODE_TXD:
case FS_OPCODE_TXL:
+ case FS_OPCODE_TXS:
generate_tex(inst, dst, src[0]);
break;
case FS_OPCODE_DISCARD:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index b4689d2c293..7c5414ac26c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -25,23 +25,6 @@
*
*/
-extern "C" {
-
-#include <sys/types.h>
-
-#include "main/macros.h"
-#include "main/shaderobj.h"
-#include "main/uniforms.h"
-#include "program/prog_parameter.h"
-#include "program/prog_print.h"
-#include "program/prog_optimize.h"
-#include "program/register_allocate.h"
-#include "program/sampler.h"
-#include "program/hash_table.h"
-#include "brw_context.h"
-#include "brw_eu.h"
-#include "brw_wm.h"
-}
#include "brw_fs.h"
#include "../glsl/glsl_types.h"
#include "../glsl/ir_optimization.h"
@@ -50,45 +33,115 @@ extern "C" {
static void
assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width)
{
- if (reg->file == GRF && reg->reg != 0) {
+ if (reg->file == GRF) {
assert(reg->reg_offset >= 0);
- reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width;
- reg->reg = 0;
+ reg->reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width;
+ reg->reg_offset = 0;
}
}
void
fs_visitor::assign_regs_trivial()
{
- int last_grf = 0;
- int hw_reg_mapping[this->virtual_grf_next];
+ int hw_reg_mapping[this->virtual_grf_next + 1];
int i;
int reg_width = c->dispatch_width / 8;
- hw_reg_mapping[0] = 0;
/* Note that compressed instructions require alignment to 2 registers. */
- hw_reg_mapping[1] = ALIGN(this->first_non_payload_grf, reg_width);
- for (i = 2; i < this->virtual_grf_next; i++) {
+ hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width);
+ for (i = 1; i <= this->virtual_grf_next; i++) {
hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
this->virtual_grf_sizes[i - 1] * reg_width);
}
- last_grf = hw_reg_mapping[i - 1] + (this->virtual_grf_sizes[i - 1] *
- reg_width);
+ this->grf_used = hw_reg_mapping[this->virtual_grf_next];
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
assign_reg(hw_reg_mapping, &inst->dst, reg_width);
assign_reg(hw_reg_mapping, &inst->src[0], reg_width);
assign_reg(hw_reg_mapping, &inst->src[1], reg_width);
}
- if (last_grf >= BRW_MAX_GRF) {
+ if (this->grf_used >= BRW_MAX_GRF) {
fail("Ran out of regs on trivial allocator (%d/%d)\n",
- last_grf, BRW_MAX_GRF);
+ this->grf_used, BRW_MAX_GRF);
+ }
+
+}
+
+static void
+brw_alloc_reg_set_for_classes(struct brw_context *brw,
+ int *class_sizes,
+ int class_count,
+ int reg_width,
+ int base_reg_count)
+{
+ struct intel_context *intel = &brw->intel;
+
+ /* Compute the total number of registers across all classes. */
+ int ra_reg_count = 0;
+ for (int i = 0; i < class_count; i++) {
+ ra_reg_count += base_reg_count - (class_sizes[i] - 1);
+ }
+
+ ralloc_free(brw->wm.ra_reg_to_grf);
+ brw->wm.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count);
+ ralloc_free(brw->wm.regs);
+ brw->wm.regs = ra_alloc_reg_set(ra_reg_count);
+ ralloc_free(brw->wm.classes);
+ brw->wm.classes = ralloc_array(brw, int, class_count + 1);
+
+ brw->wm.aligned_pairs_class = -1;
+
+ /* Now, add the registers to their classes, and add the conflicts
+ * between them and the base GRF registers (and also each other).
+ */
+ int reg = 0;
+ int pairs_base_reg = 0;
+ int pairs_reg_count = 0;
+ for (int i = 0; i < class_count; i++) {
+ int class_reg_count = base_reg_count - (class_sizes[i] - 1);
+ brw->wm.classes[i] = ra_alloc_reg_class(brw->wm.regs);
+
+ /* Save this off for the aligned pair class at the end. */
+ if (class_sizes[i] == 2) {
+ pairs_base_reg = reg;
+ pairs_reg_count = class_reg_count;
+ }
+
+ for (int j = 0; j < class_reg_count; j++) {
+ ra_class_add_reg(brw->wm.regs, brw->wm.classes[i], reg);
+
+ brw->wm.ra_reg_to_grf[reg] = j;
+
+ for (int base_reg = j;
+ base_reg < j + class_sizes[i];
+ base_reg++) {
+ ra_add_transitive_reg_conflict(brw->wm.regs, base_reg, reg);
+ }
+
+ reg++;
+ }
+ }
+ assert(reg == ra_reg_count);
+
+ /* Add a special class for aligned pairs, which we'll put delta_x/y
+ * in on gen5 so that we can do PLN.
+ */
+ if (brw->has_pln && reg_width == 1 && intel->gen < 6) {
+ brw->wm.aligned_pairs_class = ra_alloc_reg_class(brw->wm.regs);
+
+ for (int i = 0; i < pairs_reg_count; i++) {
+ if ((brw->wm.ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) {
+ ra_class_add_reg(brw->wm.regs, brw->wm.aligned_pairs_class,
+ pairs_base_reg + i);
+ }
+ }
+ class_count++;
}
- this->grf_used = last_grf + reg_width;
+ ra_set_finalize(brw->wm.regs);
}
bool
@@ -101,12 +154,11 @@ fs_visitor::assign_regs()
* for reg_width == 2.
*/
int reg_width = c->dispatch_width / 8;
- int hw_reg_mapping[this->virtual_grf_next + 1];
+ int hw_reg_mapping[this->virtual_grf_next];
int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width);
int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width;
int class_sizes[base_reg_count];
int class_count = 0;
- int aligned_pair_class = -1;
calculate_live_intervals();
@@ -125,7 +177,7 @@ fs_visitor::assign_regs()
*/
class_sizes[class_count++] = 2;
}
- for (int r = 1; r < this->virtual_grf_next; r++) {
+ for (int r = 0; r < this->virtual_grf_next; r++) {
int i;
for (i = 0; i < class_count; i++) {
@@ -141,94 +193,26 @@ fs_visitor::assign_regs()
}
}
- int ra_reg_count = 0;
- int class_base_reg[class_count];
- int class_reg_count[class_count];
- int classes[class_count + 1];
-
- for (int i = 0; i < class_count; i++) {
- class_base_reg[i] = ra_reg_count;
- class_reg_count[i] = base_reg_count - (class_sizes[i] - 1);
- ra_reg_count += class_reg_count[i];
- }
-
- struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count);
- for (int i = 0; i < class_count; i++) {
- classes[i] = ra_alloc_reg_class(regs);
-
- for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
- ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r);
- }
+ brw_alloc_reg_set_for_classes(brw, class_sizes, class_count,
+ reg_width, base_reg_count);
- /* Add conflicts between our contiguous registers aliasing
- * base regs and other register classes' contiguous registers
- * that alias base regs, or the base regs themselves for classes[0].
- */
- for (int c = 0; c <= i; c++) {
- for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
- for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1));
- c_r < MIN2(class_reg_count[c], i_r + class_sizes[i]);
- c_r++) {
-
- if (0) {
- printf("%d/%d conflicts %d/%d\n",
- class_sizes[i], first_assigned_grf + i_r,
- class_sizes[c], first_assigned_grf + c_r);
- }
-
- ra_add_reg_conflict(regs,
- class_base_reg[i] + i_r,
- class_base_reg[c] + c_r);
- }
- }
- }
- }
-
- /* Add a special class for aligned pairs, which we'll put delta_x/y
- * in on gen5 so that we can do PLN.
- */
- if (brw->has_pln && reg_width == 1 && intel->gen < 6) {
- int reg_count = (base_reg_count - 1) / 2;
- int unaligned_pair_class = 1;
- assert(class_sizes[unaligned_pair_class] == 2);
-
- aligned_pair_class = class_count;
- classes[aligned_pair_class] = ra_alloc_reg_class(regs);
- class_sizes[aligned_pair_class] = 2;
- class_base_reg[aligned_pair_class] = 0;
- class_reg_count[aligned_pair_class] = 0;
- int start = (first_assigned_grf & 1) ? 1 : 0;
-
- for (int i = 0; i < reg_count; i++) {
- ra_class_add_reg(regs, classes[aligned_pair_class],
- class_base_reg[unaligned_pair_class] + i * 2 + start);
- }
- class_count++;
- }
-
- ra_set_finalize(regs);
-
- struct ra_graph *g = ra_alloc_interference_graph(regs,
+ struct ra_graph *g = ra_alloc_interference_graph(brw->wm.regs,
this->virtual_grf_next);
- /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1
- * with nodes.
- */
- ra_set_node_class(g, 0, classes[0]);
- for (int i = 1; i < this->virtual_grf_next; i++) {
+ for (int i = 0; i < this->virtual_grf_next; i++) {
for (int c = 0; c < class_count; c++) {
if (class_sizes[c] == this->virtual_grf_sizes[i]) {
- if (aligned_pair_class >= 0 &&
+ if (brw->wm.aligned_pairs_class >= 0 &&
this->delta_x.reg == i) {
- ra_set_node_class(g, i, classes[aligned_pair_class]);
+ ra_set_node_class(g, i, brw->wm.aligned_pairs_class);
} else {
- ra_set_node_class(g, i, classes[c]);
+ ra_set_node_class(g, i, brw->wm.classes[c]);
}
break;
}
}
- for (int j = 1; j < i; j++) {
+ for (int j = 0; j < i; j++) {
if (virtual_grf_interferes(i, j)) {
ra_add_node_interference(g, i, j);
}
@@ -253,7 +237,6 @@ fs_visitor::assign_regs()
ralloc_free(g);
- ralloc_free(regs);
return false;
}
@@ -263,28 +246,18 @@ fs_visitor::assign_regs()
* numbers.
*/
this->grf_used = first_assigned_grf;
- hw_reg_mapping[0] = 0; /* unused */
- for (int i = 1; i < this->virtual_grf_next; i++) {
+ for (int i = 0; i < this->virtual_grf_next; i++) {
int reg = ra_get_node_reg(g, i);
- int hw_reg = -1;
-
- for (int c = 0; c < class_count; c++) {
- if (reg >= class_base_reg[c] &&
- reg < class_base_reg[c] + class_reg_count[c]) {
- hw_reg = reg - class_base_reg[c];
- break;
- }
- }
- assert(hw_reg >= 0);
- hw_reg_mapping[i] = first_assigned_grf + hw_reg * reg_width;
+ hw_reg_mapping[i] = (first_assigned_grf +
+ brw->wm.ra_reg_to_grf[reg] * reg_width);
this->grf_used = MAX2(this->grf_used,
hw_reg_mapping[i] + this->virtual_grf_sizes[i] *
reg_width);
}
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
assign_reg(hw_reg_mapping, &inst->dst, reg_width);
assign_reg(hw_reg_mapping, &inst->src[0], reg_width);
@@ -292,7 +265,6 @@ fs_visitor::assign_regs()
}
ralloc_free(g);
- ralloc_free(regs);
return true;
}
@@ -336,8 +308,8 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
* spill/unspill we'll have to do, and guess that the insides of
* loops run 10 times.
*/
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
for (unsigned int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF) {
@@ -370,6 +342,9 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
if (inst->dst.file == GRF)
no_spill[inst->dst.reg] = true;
break;
+
+ default:
+ break;
}
}
@@ -394,8 +369,8 @@ fs_visitor::spill_reg(int spill_reg)
* virtual grf of the same size. For most instructions, though, we
* could just spill/unspill the GRF being accessed.
*/
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
for (unsigned int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF &&
diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
index d8218c26edb..0ea4e5c36f0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -25,21 +25,6 @@
*
*/
-extern "C" {
-
-#include <sys/types.h>
-
-#include "main/macros.h"
-#include "main/shaderobj.h"
-#include "main/uniforms.h"
-#include "program/prog_optimize.h"
-#include "program/register_allocate.h"
-#include "program/sampler.h"
-#include "program/hash_table.h"
-#include "brw_context.h"
-#include "brw_eu.h"
-#include "brw_wm.h"
-}
#include "brw_fs.h"
#include "../glsl/glsl_types.h"
#include "../glsl/ir_optimization.h"
@@ -84,26 +69,26 @@ public:
int math_latency = 22;
switch (inst->opcode) {
- case FS_OPCODE_RCP:
+ case SHADER_OPCODE_RCP:
this->latency = 1 * chans * math_latency;
break;
- case FS_OPCODE_RSQ:
+ case SHADER_OPCODE_RSQ:
this->latency = 2 * chans * math_latency;
break;
- case FS_OPCODE_SQRT:
- case FS_OPCODE_LOG2:
+ case SHADER_OPCODE_SQRT:
+ case SHADER_OPCODE_LOG2:
/* full precision log. partial is 2. */
this->latency = 3 * chans * math_latency;
break;
- case FS_OPCODE_EXP2:
+ case SHADER_OPCODE_EXP2:
/* full precision. partial is 3, same throughput. */
this->latency = 4 * chans * math_latency;
break;
- case FS_OPCODE_POW:
+ case SHADER_OPCODE_POW:
this->latency = 8 * chans * math_latency;
break;
- case FS_OPCODE_SIN:
- case FS_OPCODE_COS:
+ case SHADER_OPCODE_SIN:
+ case SHADER_OPCODE_COS:
/* minimum latency, max is 12 rounds. */
this->latency = 5 * chans * math_latency;
break;
@@ -283,8 +268,8 @@ instruction_scheduler::calculate_deps()
memset(last_mrf_write, 0, sizeof(last_mrf_write));
/* top-to-bottom dependencies: RAW and WAW. */
- foreach_iter(exec_list_iterator, iter, instructions) {
- schedule_node *n = (schedule_node *)iter.get();
+ foreach_list(node, &instructions) {
+ schedule_node *n = (schedule_node *)node;
fs_inst *inst = n->inst;
/* read-after-write deps. */
@@ -321,12 +306,12 @@ instruction_scheduler::calculate_deps()
add_dep(last_grf_write[inst->dst.reg], n);
last_grf_write[inst->dst.reg] = n;
} else if (inst->dst.file == MRF) {
- int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+ int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
add_dep(last_mrf_write[reg], n);
last_mrf_write[reg] = n;
if (is_compressed(inst)) {
- if (inst->dst.hw_reg & BRW_MRF_COMPR4)
+ if (inst->dst.reg & BRW_MRF_COMPR4)
reg += 4;
else
reg++;
@@ -401,12 +386,12 @@ instruction_scheduler::calculate_deps()
if (inst->dst.file == GRF) {
last_grf_write[inst->dst.reg] = n;
} else if (inst->dst.file == MRF) {
- int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+ int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
last_mrf_write[reg] = n;
if (is_compressed(inst)) {
- if (inst->dst.hw_reg & BRW_MRF_COMPR4)
+ if (inst->dst.reg & BRW_MRF_COMPR4)
reg += 4;
else
reg++;
@@ -437,8 +422,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header)
int time = 0;
/* Remove non-DAG heads from the list. */
- foreach_iter(exec_list_iterator, iter, instructions) {
- schedule_node *n = (schedule_node *)iter.get();
+ foreach_list_safe(node, &instructions) {
+ schedule_node *n = (schedule_node *)node;
if (n->parent_count != 0)
n->remove();
}
@@ -447,8 +432,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header)
schedule_node *chosen = NULL;
int chosen_time = 0;
- foreach_iter(exec_list_iterator, iter, instructions) {
- schedule_node *n = (schedule_node *)iter.get();
+ foreach_list(node, &instructions) {
+ schedule_node *n = (schedule_node *)node;
if (!chosen || n->unblocked_time < chosen_time) {
chosen = n;
@@ -490,8 +475,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header)
* progress until the first is done.
*/
if (chosen->inst->is_math()) {
- foreach_iter(exec_list_iterator, iter, instructions) {
- schedule_node *n = (schedule_node *)iter.get();
+ foreach_list(node, &instructions) {
+ schedule_node *n = (schedule_node *)node;
if (n->inst->is_math())
n->unblocked_time = MAX2(n->unblocked_time,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
index 530ffa26580..a9a60c2fd8a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
@@ -122,8 +122,8 @@ ir_vector_reference_visitor::get_variable_entry(ir_variable *var)
break;
}
- foreach_iter(exec_list_iterator, iter, this->variable_list) {
- variable_entry *entry = (variable_entry *)iter.get();
+ foreach_list(node, &this->variable_list) {
+ variable_entry *entry = (variable_entry *)node;
if (entry->var == var)
return entry;
}
@@ -222,8 +222,8 @@ ir_vector_splitting_visitor::get_splitting_entry(ir_variable *var)
if (!var->type->is_vector())
return NULL;
- foreach_iter(exec_list_iterator, iter, *this->variable_list) {
- variable_entry *entry = (variable_entry *)iter.get();
+ foreach_list(node, &*this->variable_list) {
+ variable_entry *entry = (variable_entry *)node;
if (entry->var == var) {
return entry;
}
@@ -341,8 +341,8 @@ brw_do_vector_splitting(exec_list *instructions)
visit_list_elements(&refs, instructions);
/* Trim out variables we can't split. */
- foreach_iter(exec_list_iterator, iter, refs.variable_list) {
- variable_entry *entry = (variable_entry *)iter.get();
+ foreach_list_safe(node, &refs.variable_list) {
+ variable_entry *entry = (variable_entry *)node;
if (debug) {
printf("vector %s@%p: decl %d, whole_access %d\n",
@@ -363,8 +363,8 @@ brw_do_vector_splitting(exec_list *instructions)
/* Replace the decls of the vectors to be split with their split
* components.
*/
- foreach_iter(exec_list_iterator, iter, refs.variable_list) {
- variable_entry *entry = (variable_entry *)iter.get();
+ foreach_list(node, &refs.variable_list) {
+ variable_entry *entry = (variable_entry *)node;
const struct glsl_type *type;
type = glsl_type::get_instance(entry->var->type->base_type, 1, 1);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index cbe5cf428c5..cdaf543c88b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -142,9 +142,7 @@ fs_visitor::visit(ir_dereference_array *ir)
this->result.type = brw_type_for_base_type(ir->type);
if (index) {
- assert(this->result.file == UNIFORM ||
- (this->result.file == GRF &&
- this->result.reg != 0));
+ assert(this->result.file == UNIFORM || this->result.file == GRF);
this->result.reg_offset += index->value.i[0] * element_size;
} else {
assert(!"FINISHME: non-constant array element");
@@ -252,14 +250,14 @@ fs_visitor::visit(ir_expression *ir)
break;
case ir_unop_rcp:
- emit_math(FS_OPCODE_RCP, this->result, op[0]);
+ emit_math(SHADER_OPCODE_RCP, this->result, op[0]);
break;
case ir_unop_exp2:
- emit_math(FS_OPCODE_EXP2, this->result, op[0]);
+ emit_math(SHADER_OPCODE_EXP2, this->result, op[0]);
break;
case ir_unop_log2:
- emit_math(FS_OPCODE_LOG2, this->result, op[0]);
+ emit_math(SHADER_OPCODE_LOG2, this->result, op[0]);
break;
case ir_unop_exp:
case ir_unop_log:
@@ -267,11 +265,11 @@ fs_visitor::visit(ir_expression *ir)
break;
case ir_unop_sin:
case ir_unop_sin_reduced:
- emit_math(FS_OPCODE_SIN, this->result, op[0]);
+ emit_math(SHADER_OPCODE_SIN, this->result, op[0]);
break;
case ir_unop_cos:
case ir_unop_cos_reduced:
- emit_math(FS_OPCODE_COS, this->result, op[0]);
+ emit_math(SHADER_OPCODE_COS, this->result, op[0]);
break;
case ir_unop_dFdx:
@@ -289,7 +287,23 @@ fs_visitor::visit(ir_expression *ir)
break;
case ir_binop_mul:
- emit(BRW_OPCODE_MUL, this->result, op[0], op[1]);
+ if (ir->type->is_integer()) {
+ /* For integer multiplication, the MUL uses the low 16 bits
+ * of one of the operands (src0 on gen6, src1 on gen7). The
+ * MACH accumulates in the contribution of the upper 16 bits
+ * of that operand.
+ *
+ * FINISHME: Emit just the MUL if we know an operand is small
+ * enough.
+ */
+ struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
+
+ emit(BRW_OPCODE_MUL, acc, op[0], op[1]);
+ emit(BRW_OPCODE_MACH, reg_null_d, op[0], op[1]);
+ emit(BRW_OPCODE_MOV, this->result, fs_reg(acc));
+ } else {
+ emit(BRW_OPCODE_MUL, this->result, op[0], op[1]);
+ }
break;
case ir_binop_div:
assert(!"not reached: should be handled by ir_div_to_mul_rcp");
@@ -342,11 +356,11 @@ fs_visitor::visit(ir_expression *ir)
break;
case ir_unop_sqrt:
- emit_math(FS_OPCODE_SQRT, this->result, op[0]);
+ emit_math(SHADER_OPCODE_SQRT, this->result, op[0]);
break;
case ir_unop_rsq:
- emit_math(FS_OPCODE_RSQ, this->result, op[0]);
+ emit_math(SHADER_OPCODE_RSQ, this->result, op[0]);
break;
case ir_unop_i2u:
@@ -425,7 +439,7 @@ fs_visitor::visit(ir_expression *ir)
break;
case ir_binop_pow:
- emit_math(FS_OPCODE_POW, this->result, op[0], op[1]);
+ emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]);
break;
case ir_unop_bit_not:
@@ -496,7 +510,7 @@ fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r,
void
fs_visitor::visit(ir_assignment *ir)
{
- struct fs_reg l, r;
+ fs_reg l, r;
fs_inst *inst;
/* FINISHME: arrays on the lhs */
@@ -603,9 +617,11 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
/* gen4's SIMD8 sampler always has the slots for u,v,r present. */
mlen += 3;
} else if (ir->op == ir_txd) {
+ this->result = reg_undef;
ir->lod_info.grad.dPdx->accept(this);
fs_reg dPdx = this->result;
+ this->result = reg_undef;
ir->lod_info.grad.dPdy->accept(this);
fs_reg dPdy = this->result;
@@ -620,6 +636,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
* dPdx = dudx, dvdx, drdx
* dPdy = dudy, dvdy, drdy
*
+ * 1-arg: Does not exist.
+ *
* 2-arg: dudx dvdx dudy dvdy
* dPdx.x dPdx.y dPdy.x dPdy.y
* m4 m5 m6 m7
@@ -631,18 +649,26 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) {
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx);
dPdx.reg_offset++;
- mlen++;
}
+ mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2);
for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) {
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy);
dPdy.reg_offset++;
- mlen++;
}
+ mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2);
+ } else if (ir->op == ir_txs) {
+ /* There's no SIMD8 resinfo message on Gen4. Use SIMD16 instead. */
+ simd16 = true;
+ this->result = reg_undef;
+ ir->lod_info.lod->accept(this);
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result);
+ mlen += 2;
} else {
/* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod
* instructions. We'll need to do SIMD16 here.
*/
+ simd16 = true;
assert(ir->op == ir_txb || ir->op == ir_txl);
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
@@ -671,16 +697,19 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
/* The unused upper half. */
mlen++;
+ }
+ if (simd16) {
/* Now, since we're doing simd16, the return is 2 interleaved
* vec4s where the odd-indexed ones are junk. We'll need to move
* this weirdness around to the expected layout.
*/
- simd16 = true;
orig_dst = dst;
- dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type,
- 2));
- dst.type = BRW_REGISTER_TYPE_F;
+ const glsl_type *vec_type =
+ glsl_type::get_instance(ir->type->base_type, 4, 1);
+ dst = fs_reg(this, glsl_type::get_array_instance(vec_type, 2));
+ dst.type = intel->is_g4x ? brw_type_for_base_type(ir->type)
+ : BRW_REGISTER_TYPE_F;
}
fs_inst *inst = NULL;
@@ -697,6 +726,9 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
case ir_txd:
inst = emit(FS_OPCODE_TXD, dst);
break;
+ case ir_txs:
+ inst = emit(FS_OPCODE_TXS, dst);
+ break;
case ir_txf:
assert(!"GLSL 1.30 features unsupported");
break;
@@ -732,6 +764,8 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
int base_mrf = 2;
int reg_width = c->dispatch_width / 8;
bool header_present = false;
+ const int vector_elements =
+ ir->coordinate ? ir->coordinate->type->vector_elements : 0;
if (ir->offset) {
/* The offsets set up by the ir_texture visitor are in the
@@ -742,7 +776,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
base_mrf--;
}
- for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
+ for (int i = 0; i < vector_elements; i++) {
fs_inst *inst = emit(BRW_OPCODE_MOV,
fs_reg(MRF, base_mrf + mlen + i * reg_width),
coordinate);
@@ -750,7 +784,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
inst->saturate = true;
coordinate.reg_offset++;
}
- mlen += ir->coordinate->type->vector_elements * reg_width;
+ mlen += vector_elements * reg_width;
if (ir->shadow_comparitor && ir->op != ir_txd) {
mlen = MAX2(mlen, header_present + 4 * reg_width);
@@ -786,9 +820,11 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
inst = emit(FS_OPCODE_TXL, dst);
break;
case ir_txd: {
+ this->result = reg_undef;
ir->lod_info.grad.dPdx->accept(this);
fs_reg dPdx = this->result;
+ this->result = reg_undef;
ir->lod_info.grad.dPdy->accept(this);
fs_reg dPdy = this->result;
@@ -816,6 +852,13 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
inst = emit(FS_OPCODE_TXD, dst);
break;
}
+ case ir_txs:
+ this->result = reg_undef;
+ ir->lod_info.lod->accept(this);
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result);
+ mlen += reg_width;
+ inst = emit(FS_OPCODE_TXS, dst);
+ break;
case ir_txf:
assert(!"GLSL 1.30 features unsupported");
break;
@@ -850,6 +893,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
}
if (ir->shadow_comparitor && ir->op != ir_txd) {
+ this->result = reg_undef;
ir->shadow_comparitor->accept(this);
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
mlen += reg_width;
@@ -860,11 +904,13 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
case ir_tex:
break;
case ir_txb:
+ this->result = reg_undef;
ir->lod_info.bias->accept(this);
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
mlen += reg_width;
break;
case ir_txl:
+ this->result = reg_undef;
ir->lod_info.lod->accept(this);
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
mlen += reg_width;
@@ -873,9 +919,11 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
if (c->dispatch_width == 16)
fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode.");
+ this->result = reg_undef;
ir->lod_info.grad.dPdx->accept(this);
fs_reg dPdx = this->result;
+ this->result = reg_undef;
ir->lod_info.grad.dPdy->accept(this);
fs_reg dPdy = this->result;
@@ -900,13 +948,19 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
}
break;
}
+ case ir_txs:
+ this->result = reg_undef;
+ ir->lod_info.lod->accept(this);
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result);
+ mlen += reg_width;
+ break;
case ir_txf:
assert(!"GLSL 1.30 features unsupported");
break;
}
/* Set up the coordinate (except for TXD where it was done earlier) */
- if (ir->op != ir_txd) {
+ if (ir->op != ir_txd && ir->op != ir_txs) {
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
coordinate);
@@ -924,7 +978,8 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break;
case ir_txl: inst = emit(FS_OPCODE_TXL, dst); break;
case ir_txd: inst = emit(FS_OPCODE_TXD, dst); break;
- case ir_txf: assert(!"TXF unsupported.");
+ case ir_txf: assert(!"TXF unsupported."); break;
+ case ir_txs: inst = emit(FS_OPCODE_TXS, dst); break;
}
inst->base_mrf = base_mrf;
inst->mlen = mlen;
@@ -959,7 +1014,8 @@ fs_visitor::visit(ir_texture *ir)
}
this->result = reg_undef;
- ir->coordinate->accept(this);
+ if (ir->coordinate)
+ ir->coordinate->accept(this);
fs_reg coordinate = this->result;
if (ir->offset != NULL) {
@@ -1000,7 +1056,8 @@ fs_visitor::visit(ir_texture *ir)
* texture coordinates. We use the program parameter state
* tracking to get the scaling factor.
*/
- if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) {
+ if (intel->gen < 6 &&
+ ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) {
struct gl_program_parameter_list *params = c->fp->program.Base.Parameters;
int tokens[STATE_LENGTH] = {
STATE_INTERNAL,
@@ -1046,7 +1103,7 @@ fs_visitor::visit(ir_texture *ir)
/* Writemasking doesn't eliminate channels on SIMD8 texture
* samples, so don't worry about them.
*/
- fs_reg dst = fs_reg(this, glsl_type::vec4_type);
+ fs_reg dst = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 4, 1));
if (intel->gen >= 7) {
inst = emit_texture_gen7(ir, dst, coordinate, sampler);
@@ -1070,6 +1127,7 @@ fs_visitor::visit(ir_texture *ir)
if (hw_compare_supported) {
inst->shadow_compare = true;
} else {
+ this->result = reg_undef;
ir->shadow_comparitor->accept(this);
fs_reg ref = this->result;
@@ -1465,8 +1523,8 @@ fs_visitor::visit(ir_if *ir)
inst->predicated = true;
}
- foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
- ir_instruction *ir = (ir_instruction *)iter.get();
+ foreach_list(node, &ir->then_instructions) {
+ ir_instruction *ir = (ir_instruction *)node;
this->base_ir = ir;
this->result = reg_undef;
ir->accept(this);
@@ -1475,8 +1533,8 @@ fs_visitor::visit(ir_if *ir)
if (!ir->else_instructions.is_empty()) {
emit(BRW_OPCODE_ELSE);
- foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
- ir_instruction *ir = (ir_instruction *)iter.get();
+ foreach_list(node, &ir->else_instructions) {
+ ir_instruction *ir = (ir_instruction *)node;
this->base_ir = ir;
this->result = reg_undef;
ir->accept(this);
@@ -1526,8 +1584,8 @@ fs_visitor::visit(ir_loop *ir)
inst->predicated = true;
}
- foreach_iter(exec_list_iterator, iter, ir->body_instructions) {
- ir_instruction *ir = (ir_instruction *)iter.get();
+ foreach_list(node, &ir->body_instructions) {
+ ir_instruction *ir = (ir_instruction *)node;
this->base_ir = ir;
this->result = reg_undef;
@@ -1583,8 +1641,8 @@ fs_visitor::visit(ir_function *ir)
assert(sig);
- foreach_iter(exec_list_iterator, iter, sig->body) {
- ir_instruction *ir = (ir_instruction *)iter.get();
+ foreach_list(node, &sig->body) {
+ ir_instruction *ir = (ir_instruction *)node;
this->base_ir = ir;
this->result = reg_undef;
ir->accept(this);
@@ -1684,7 +1742,7 @@ fs_visitor::emit_interpolation_setup_gen4()
interp_reg(FRAG_ATTRIB_WPOS, 3));
/* Compute the pixel 1/W value from wpos.w. */
this->pixel_w = fs_reg(this, glsl_type::float_type);
- emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w);
+ emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w);
this->current_annotation = NULL;
}
@@ -1721,7 +1779,7 @@ fs_visitor::emit_interpolation_setup_gen6()
this->current_annotation = "compute pos.w";
this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0));
this->wpos_w = fs_reg(this, glsl_type::float_type);
- emit_math(FS_OPCODE_RCP, this->wpos_w, this->pixel_w);
+ emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);
this->delta_x = fs_reg(brw_vec8_grf(2, 0));
this->delta_y = fs_reg(brw_vec8_grf(3, 0));
@@ -1733,6 +1791,7 @@ void
fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color)
{
int reg_width = c->dispatch_width / 8;
+ fs_inst *inst;
if (c->dispatch_width == 8 || intel->gen == 6) {
/* SIMD8 write looks like:
@@ -1751,8 +1810,10 @@ fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color)
* m + 6: a0
* m + 7: a1
*/
- emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index * reg_width),
- color);
+ inst = emit(BRW_OPCODE_MOV,
+ fs_reg(MRF, first_color_mrf + index * reg_width),
+ color);
+ inst->saturate = c->key.clamp_fragment_color;
} else {
/* pre-gen6 SIMD16 single source DP write looks like:
* m + 0: r0
@@ -1770,16 +1831,22 @@ fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color)
* usual destination + 1 for the second half we get
* destination + 4.
*/
- emit(BRW_OPCODE_MOV,
- fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index), color);
+ inst = emit(BRW_OPCODE_MOV,
+ fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index),
+ color);
+ inst->saturate = c->key.clamp_fragment_color;
} else {
push_force_uncompressed();
- emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), color);
+ inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index),
+ color);
+ inst->saturate = c->key.clamp_fragment_color;
pop_force_uncompressed();
push_force_sechalf();
color.sechalf = true;
- emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), color);
+ inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4),
+ color);
+ inst->saturate = c->key.clamp_fragment_color;
pop_force_sechalf();
color.sechalf = false;
}
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 03cebbb824b..f7e6e7c81d1 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -46,7 +46,7 @@ static void upload_drawing_rect(struct brw_context *brw)
struct gl_context *ctx = &intel->ctx;
BEGIN_BATCH(4);
- OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE);
+ OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
OUT_BATCH(0); /* xmin, ymin */
OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
((ctx->DrawBuffer->Height - 1) << 16));
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 6674f1640c8..09b5be4c96e 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -226,6 +226,34 @@ static GLboolean brwProgramStringNotify( struct gl_context *ctx,
return GL_TRUE;
}
+/* Per-thread scratch space is a power-of-two multiple of 1KB. */
+int
+brw_get_scratch_size(int size)
+{
+ int i;
+
+ for (i = 1024; i < size; i *= 2)
+ ;
+
+ return i;
+}
+
+void
+brw_get_scratch_bo(struct intel_context *intel,
+ drm_intel_bo **scratch_bo, int size)
+{
+ drm_intel_bo *old_bo = *scratch_bo;
+
+ if (old_bo && old_bo->size < size) {
+ drm_intel_bo_unreference(old_bo);
+ old_bo = NULL;
+ }
+
+ if (!old_bo) {
+ *scratch_bo = drm_intel_bo_alloc(intel->bufmgr, "scratch bo", size, 4096);
+ }
+}
+
void brwInitFragProgFuncs( struct dd_function_table *functions )
{
assert(functions->ProgramStringNotify == _tnl_program_string);
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 9471883fb2b..3ff6bbaed47 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -24,6 +24,7 @@
extern "C" {
#include "main/macros.h"
#include "brw_context.h"
+#include "brw_vs.h"
}
#include "brw_fs.h"
#include "../glsl/ir_optimization.h"
@@ -67,6 +68,9 @@ brw_shader_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
if (!brw_fs_precompile(ctx, prog))
return false;
+ if (!brw_vs_precompile(ctx, prog))
+ return false;
+
return true;
}
@@ -75,10 +79,15 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
{
struct brw_context *brw = brw_context(ctx);
struct intel_context *intel = &brw->intel;
+ unsigned int stage;
+
+ for (stage = 0; stage < ARRAY_SIZE(prog->_LinkedShaders); stage++) {
+ struct brw_shader *shader =
+ (struct brw_shader *)prog->_LinkedShaders[stage];
+
+ if (!shader)
+ continue;
- struct brw_shader *shader =
- (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
- if (shader != NULL) {
void *mem_ctx = ralloc_context(NULL);
bool progress;
@@ -106,18 +115,22 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
brw_do_cubemap_normalize(shader->ir);
lower_noise(shader->ir);
lower_quadop_vector(shader->ir, false);
+
+ bool input = true;
+ bool output = stage == MESA_SHADER_FRAGMENT;
+ bool temp = stage == MESA_SHADER_FRAGMENT;
+ bool uniform = true;
+
lower_variable_index_to_cond_assign(shader->ir,
- GL_TRUE, /* input */
- GL_TRUE, /* output */
- GL_TRUE, /* temp */
- GL_TRUE /* uniform */
- );
+ input, output, temp, uniform);
do {
progress = false;
- brw_do_channel_expressions(shader->ir);
- brw_do_vector_splitting(shader->ir);
+ if (stage == MESA_SHADER_FRAGMENT) {
+ brw_do_channel_expressions(shader->ir);
+ brw_do_vector_splitting(shader->ir);
+ }
progress = do_lower_jumps(shader->ir, true, true,
true, /* main return */
@@ -192,3 +205,29 @@ brw_conditional_for_comparison(unsigned int op)
return BRW_CONDITIONAL_NZ;
}
}
+
+uint32_t
+brw_math_function(enum opcode op)
+{
+ switch (op) {
+ case SHADER_OPCODE_RCP:
+ return BRW_MATH_FUNCTION_INV;
+ case SHADER_OPCODE_RSQ:
+ return BRW_MATH_FUNCTION_RSQ;
+ case SHADER_OPCODE_SQRT:
+ return BRW_MATH_FUNCTION_SQRT;
+ case SHADER_OPCODE_EXP2:
+ return BRW_MATH_FUNCTION_EXP;
+ case SHADER_OPCODE_LOG2:
+ return BRW_MATH_FUNCTION_LOG;
+ case SHADER_OPCODE_POW:
+ return BRW_MATH_FUNCTION_POW;
+ case SHADER_OPCODE_SIN:
+ return BRW_MATH_FUNCTION_SIN;
+ case SHADER_OPCODE_COS:
+ return BRW_MATH_FUNCTION_COS;
+ default:
+ assert(!"not reached: unknown math function");
+ return 0;
+ }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 4c568a26caa..1054d7a589e 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -21,5 +21,11 @@
* IN THE SOFTWARE.
*/
+#include <stdint.h>
+#include "brw_defines.h"
+
+#pragma once
+
int brw_type_for_base_type(const struct glsl_type *type);
uint32_t brw_conditional_for_comparison(unsigned int op);
+uint32_t brw_math_function(enum opcode op);
diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c
index b9e5cc1a534..cb7a3ef73d3 100644
--- a/src/mesa/drivers/dri/i965/brw_state_dump.c
+++ b/src/mesa/drivers/dri/i965/brw_state_dump.c
@@ -455,6 +455,23 @@ dump_vs_constants(struct brw_context *brw, uint32_t offset, uint32_t size)
}
}
+static void
+dump_wm_constants(struct brw_context *brw, uint32_t offset, uint32_t size)
+{
+ const char *name = "WM_CONST";
+ struct intel_context *intel = &brw->intel;
+ uint32_t *as_uint = intel->batch.bo->virtual + offset;
+ float *as_float = intel->batch.bo->virtual + offset;
+ int i;
+
+ for (i = 0; i < size / 4; i += 4) {
+ batch_out(brw, name, offset, i, "%3d: (% f % f % f % f) (0x%08x 0x%08x 0x%08x 0x%08x)\n",
+ i / 4,
+ as_float[i], as_float[i + 1], as_float[i + 2], as_float[i + 3],
+ as_uint[i], as_uint[i + 1], as_uint[i + 2], as_uint[i + 3]);
+ }
+}
+
static void dump_binding_table(struct brw_context *brw, uint32_t offset,
uint32_t size)
{
@@ -602,6 +619,9 @@ dump_state_batch(struct brw_context *brw)
case AUB_TRACE_VS_CONSTANTS:
dump_vs_constants(brw, offset, size);
break;
+ case AUB_TRACE_WM_CONSTANTS:
+ dump_wm_constants(brw, offset, size);
+ break;
default:
break;
}
diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index f462f32b19a..46a417a08ed 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -60,7 +60,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel,
* given in Volume 1 of the BSpec.
*/
h0 = ALIGN(mt->height0, align_h);
- h1 = ALIGN(minify(h0), align_h);
+ h1 = ALIGN(minify(mt->height0), align_h);
qpitch = (h0 + h1 + (intel->gen >= 7 ? 12 : 11) * align_h);
if (mt->compressed)
qpitch /= 4;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
new file mode 100644
index 00000000000..760bc1f7acd
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -0,0 +1,161 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_vec4.h"
+extern "C" {
+#include "main/macros.h"
+#include "program/prog_parameter.h"
+}
+
+#define MAX_INSTRUCTION (1 << 30)
+
+namespace brw {
+
+void
+vec4_visitor::calculate_live_intervals()
+{
+ int *def = ralloc_array(mem_ctx, int, virtual_grf_count);
+ int *use = ralloc_array(mem_ctx, int, virtual_grf_count);
+ int loop_depth = 0;
+ int loop_start = 0;
+
+ if (this->live_intervals_valid)
+ return;
+
+ for (int i = 0; i < virtual_grf_count; i++) {
+ def[i] = MAX_INSTRUCTION;
+ use[i] = -1;
+ }
+
+ int ip = 0;
+ foreach_list(node, &this->instructions) {
+ vec4_instruction *inst = (vec4_instruction *)node;
+
+ if (inst->opcode == BRW_OPCODE_DO) {
+ if (loop_depth++ == 0)
+ loop_start = ip;
+ } else if (inst->opcode == BRW_OPCODE_WHILE) {
+ loop_depth--;
+
+ if (loop_depth == 0) {
+ /* Patches up the use of vars marked for being live across
+ * the whole loop.
+ */
+ for (int i = 0; i < virtual_grf_count; i++) {
+ if (use[i] == loop_start) {
+ use[i] = ip;
+ }
+ }
+ }
+ } else {
+ for (unsigned int i = 0; i < 3; i++) {
+ if (inst->src[i].file == GRF) {
+ int reg = inst->src[i].reg;
+
+ if (!loop_depth) {
+ use[reg] = ip;
+ } else {
+ def[reg] = MIN2(loop_start, def[reg]);
+ use[reg] = loop_start;
+
+ /* Nobody else is going to go smash our start to
+ * later in the loop now, because def[reg] now
+ * points before the bb header.
+ */
+ }
+ }
+ }
+ if (inst->dst.file == GRF) {
+ int reg = inst->dst.reg;
+
+ if (!loop_depth) {
+ def[reg] = MIN2(def[reg], ip);
+ } else {
+ def[reg] = MIN2(def[reg], loop_start);
+ }
+ }
+ }
+
+ ip++;
+ }
+
+ ralloc_free(this->virtual_grf_def);
+ ralloc_free(this->virtual_grf_use);
+ this->virtual_grf_def = def;
+ this->virtual_grf_use = use;
+
+ this->live_intervals_valid = true;
+}
+
+bool
+vec4_visitor::virtual_grf_interferes(int a, int b)
+{
+ int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]);
+ int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]);
+
+ /* We can't handle dead register writes here, without iterating
+ * over the whole instruction stream to find every single dead
+ * write to that register to compare to the live interval of the
+ * other register. Just assert that dead_code_eliminate() has been
+ * called.
+ */
+ assert((this->virtual_grf_use[a] != -1 ||
+ this->virtual_grf_def[a] == MAX_INSTRUCTION) &&
+ (this->virtual_grf_use[b] != -1 ||
+ this->virtual_grf_def[b] == MAX_INSTRUCTION));
+
+ return start < end;
+}
+
+/**
+ * Must be called after calculate_live_intervales() to remove unused
+ * writes to registers -- register allocation will fail otherwise
+ * because something deffed but not used won't be considered to
+ * interfere with other regs.
+ */
+bool
+vec4_visitor::dead_code_eliminate()
+{
+ bool progress = false;
+ int pc = 0;
+
+ calculate_live_intervals();
+
+ foreach_list_safe(node, &this->instructions) {
+ vec4_instruction *inst = (vec4_instruction *)node;
+
+ if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) {
+ inst->remove();
+ progress = true;
+ }
+
+ pc++;
+ }
+
+ if (progress)
+ live_intervals_valid = false;
+
+ return progress;
+}
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
new file mode 100644
index 00000000000..1db910e2b99
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -0,0 +1,489 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef BRW_VEC4_H
+#define BRW_VEC4_H
+
+#include <stdint.h>
+#include "brw_shader.h"
+#include "main/compiler.h"
+#include "program/hash_table.h"
+
+extern "C" {
+#include "brw_vs.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+};
+
+#include "../glsl/ir.h"
+
+namespace brw {
+
+class dst_reg;
+
+/**
+ * Common helper for constructing swizzles. When only a subset of
+ * channels of a vec4 are used, we don't want to reference the other
+ * channels, as that will tell optimization passes that those other
+ * channels are used.
+ */
+static int
+swizzle_for_size(int size)
+{
+ int size_swizzles[4] = {
+ BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
+ BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
+ BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
+ };
+
+ assert((size >= 1) && (size <= 4));
+ return size_swizzles[size - 1];
+}
+
+enum register_file {
+ ARF = BRW_ARCHITECTURE_REGISTER_FILE,
+ GRF = BRW_GENERAL_REGISTER_FILE,
+ MRF = BRW_MESSAGE_REGISTER_FILE,
+ IMM = BRW_IMMEDIATE_VALUE,
+ HW_REG, /* a struct brw_reg */
+ ATTR,
+ UNIFORM, /* prog_data->params[hw_reg] */
+ BAD_FILE
+};
+
+class reg
+{
+public:
+ /** Register file: ARF, GRF, MRF, IMM. */
+ enum register_file file;
+ /** virtual register number. 0 = fixed hw reg */
+ int reg;
+ /** Offset within the virtual register. */
+ int reg_offset;
+ /** Register type. BRW_REGISTER_TYPE_* */
+ int type;
+ bool sechalf;
+ struct brw_reg fixed_hw_reg;
+ int smear; /* -1, or a channel of the reg to smear to all channels. */
+
+ /** Value for file == BRW_IMMMEDIATE_FILE */
+ union {
+ int32_t i;
+ uint32_t u;
+ float f;
+ } imm;
+};
+
+class src_reg : public reg
+{
+public:
+ /* Callers of this ralloc-based new need not call delete. It's
+ * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+ static void* operator new(size_t size, void *ctx)
+ {
+ void *node;
+
+ node = ralloc_size(ctx, size);
+ assert(node != NULL);
+
+ return node;
+ }
+
+ void init()
+ {
+ memset(this, 0, sizeof(*this));
+
+ this->file = BAD_FILE;
+ }
+
+ src_reg(register_file file, int reg, const glsl_type *type)
+ {
+ init();
+
+ this->file = file;
+ this->reg = reg;
+ if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
+ this->swizzle = swizzle_for_size(type->vector_elements);
+ else
+ this->swizzle = SWIZZLE_XYZW;
+ }
+
+ /** Generic unset register constructor. */
+ src_reg()
+ {
+ init();
+ }
+
+ src_reg(float f)
+ {
+ init();
+
+ this->file = IMM;
+ this->type = BRW_REGISTER_TYPE_F;
+ this->imm.f = f;
+ }
+
+ src_reg(uint32_t u)
+ {
+ init();
+
+ this->file = IMM;
+ this->type = BRW_REGISTER_TYPE_UD;
+ this->imm.f = u;
+ }
+
+ src_reg(int32_t i)
+ {
+ init();
+
+ this->file = IMM;
+ this->type = BRW_REGISTER_TYPE_D;
+ this->imm.i = i;
+ }
+
+ src_reg(class vec4_visitor *v, const struct glsl_type *type);
+
+ explicit src_reg(dst_reg reg);
+
+ GLuint swizzle; /**< SWIZZLE_XYZW swizzles from Mesa. */
+ bool negate;
+ bool abs;
+
+ src_reg *reladdr;
+};
+
+class dst_reg : public reg
+{
+public:
+ /* Callers of this ralloc-based new need not call delete. It's
+ * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+ static void* operator new(size_t size, void *ctx)
+ {
+ void *node;
+
+ node = ralloc_size(ctx, size);
+ assert(node != NULL);
+
+ return node;
+ }
+
+ void init()
+ {
+ memset(this, 0, sizeof(*this));
+ this->file = BAD_FILE;
+ this->writemask = WRITEMASK_XYZW;
+ }
+
+ dst_reg()
+ {
+ init();
+ }
+
+ dst_reg(register_file file, int reg)
+ {
+ init();
+
+ this->file = file;
+ this->reg = reg;
+ }
+
+ dst_reg(struct brw_reg reg)
+ {
+ init();
+
+ this->file = HW_REG;
+ this->fixed_hw_reg = reg;
+ }
+
+ dst_reg(class vec4_visitor *v, const struct glsl_type *type);
+
+ explicit dst_reg(src_reg reg);
+
+ int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
+
+ src_reg *reladdr;
+};
+
+class vec4_instruction : public exec_node {
+public:
+ /* Callers of this ralloc-based new need not call delete. It's
+ * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+ static void* operator new(size_t size, void *ctx)
+ {
+ void *node;
+
+ node = rzalloc_size(ctx, size);
+ assert(node != NULL);
+
+ return node;
+ }
+
+ struct brw_reg get_dst(void);
+ struct brw_reg get_src(int i);
+
+ enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
+ dst_reg dst;
+ src_reg src[3];
+
+ bool saturate;
+ bool predicate_inverse;
+ uint32_t predicate;
+
+ int conditional_mod; /**< BRW_CONDITIONAL_* */
+
+ int sampler;
+ int target; /**< MRT target. */
+ bool shadow_compare;
+
+ bool eot;
+ bool header_present;
+ int mlen; /**< SEND message length */
+ int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
+
+ uint32_t offset; /* spill/unspill offset */
+ /** @{
+ * Annotation for the generated IR. One of the two can be set.
+ */
+ ir_instruction *ir;
+ const char *annotation;
+};
+
+class vec4_visitor : public ir_visitor
+{
+public:
+ vec4_visitor(struct brw_vs_compile *c,
+ struct gl_shader_program *prog, struct brw_shader *shader);
+ ~vec4_visitor();
+
+ dst_reg dst_null_f()
+ {
+ return dst_reg(brw_null_reg());
+ }
+
+ dst_reg dst_null_d()
+ {
+ return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ }
+
+ dst_reg dst_null_cmp()
+ {
+ if (intel->gen > 4)
+ return dst_null_d();
+ else
+ return dst_null_f();
+ }
+
+ struct brw_context *brw;
+ const struct gl_vertex_program *vp;
+ struct intel_context *intel;
+ struct gl_context *ctx;
+ struct brw_vs_compile *c;
+ struct brw_vs_prog_data *prog_data;
+ struct brw_compile *p;
+ struct brw_shader *shader;
+ struct gl_shader_program *prog;
+ void *mem_ctx;
+ exec_list instructions;
+
+ char *fail_msg;
+ bool failed;
+
+ /**
+ * GLSL IR currently being processed, which is associated with our
+ * driver IR instructions for debugging purposes.
+ */
+ ir_instruction *base_ir;
+ const char *current_annotation;
+
+ int *virtual_grf_sizes;
+ int virtual_grf_count;
+ int virtual_grf_array_size;
+ int first_non_payload_grf;
+ int *virtual_grf_def;
+ int *virtual_grf_use;
+ bool live_intervals_valid;
+
+ dst_reg *variable_storage(ir_variable *var);
+
+ void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
+
+ src_reg src_reg_for_float(float val);
+
+ /**
+ * \name Visit methods
+ *
+ * As typical for the visitor pattern, there must be one \c visit method for
+ * each concrete subclass of \c ir_instruction. Virtual base classes within
+ * the hierarchy should not have \c visit methods.
+ */
+ /*@{*/
+ virtual void visit(ir_variable *);
+ virtual void visit(ir_loop *);
+ virtual void visit(ir_loop_jump *);
+ virtual void visit(ir_function_signature *);
+ virtual void visit(ir_function *);
+ virtual void visit(ir_expression *);
+ virtual void visit(ir_swizzle *);
+ virtual void visit(ir_dereference_variable *);
+ virtual void visit(ir_dereference_array *);
+ virtual void visit(ir_dereference_record *);
+ virtual void visit(ir_assignment *);
+ virtual void visit(ir_constant *);
+ virtual void visit(ir_call *);
+ virtual void visit(ir_return *);
+ virtual void visit(ir_discard *);
+ virtual void visit(ir_texture *);
+ virtual void visit(ir_if *);
+ /*@}*/
+
+ src_reg result;
+
+ /* Regs for vertex results. Generated at ir_variable visiting time
+ * for the ir->location's used.
+ */
+ dst_reg output_reg[VERT_RESULT_MAX];
+ int uniform_size[MAX_UNIFORMS];
+ int uniforms;
+
+ struct hash_table *variable_ht;
+
+ bool run(void);
+ void fail(const char *msg, ...);
+
+ int virtual_grf_alloc(int size);
+ int setup_uniform_values(int loc, const glsl_type *type);
+ void setup_builtin_uniform_values(ir_variable *ir);
+ int setup_attributes(int payload_reg);
+ int setup_uniforms(int payload_reg);
+ void setup_payload();
+ void reg_allocate_trivial();
+ void reg_allocate();
+ void move_grf_array_access_to_scratch();
+ void calculate_live_intervals();
+ bool dead_code_eliminate();
+ bool virtual_grf_interferes(int a, int b);
+
+ vec4_instruction *emit(enum opcode opcode);
+
+ vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0);
+
+ vec4_instruction *emit(enum opcode opcode, dst_reg dst,
+ src_reg src0, src_reg src1);
+
+ vec4_instruction *emit(enum opcode opcode, dst_reg dst,
+ src_reg src0, src_reg src1, src_reg src2);
+
+ bool try_rewrite_rhs_to_dst(ir_assignment *ir,
+ dst_reg dst,
+ src_reg src,
+ vec4_instruction *pre_rhs_inst,
+ vec4_instruction *last_rhs_inst);
+
+ /** Walks an exec_list of ir_instruction and sends it through this visitor. */
+ void visit_instructions(const exec_list *list);
+
+ void emit_bool_to_cond_code(ir_rvalue *ir);
+ void emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1);
+ void emit_if_gen6(ir_if *ir);
+
+ void emit_block_move(dst_reg *dst, src_reg *src,
+ const struct glsl_type *type, bool predicated);
+
+ void emit_constant_values(dst_reg *dst, ir_constant *value);
+
+ /**
+ * Emit the correct dot-product instruction for the type of arguments
+ */
+ void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
+
+ void emit_scalar(ir_instruction *ir, enum prog_opcode op,
+ dst_reg dst, src_reg src0);
+
+ void emit_scalar(ir_instruction *ir, enum prog_opcode op,
+ dst_reg dst, src_reg src0, src_reg src1);
+
+ void emit_scs(ir_instruction *ir, enum prog_opcode op,
+ dst_reg dst, const src_reg &src);
+
+ void emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src);
+ void emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src);
+ void emit_math(enum opcode opcode, dst_reg dst, src_reg src);
+ void emit_math2_gen6(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
+ void emit_math2_gen4(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
+ void emit_math(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
+
+ int emit_vue_header_gen6(int header_mrf);
+ int emit_vue_header_gen4(int header_mrf);
+ void emit_urb_writes(void);
+
+ src_reg get_scratch_offset(vec4_instruction *inst,
+ src_reg *reladdr, int reg_offset);
+ void emit_scratch_read(vec4_instruction *inst,
+ dst_reg dst,
+ src_reg orig_src,
+ int base_offset);
+ void emit_scratch_write(vec4_instruction *inst,
+ src_reg temp,
+ dst_reg orig_dst,
+ int base_offset);
+
+ GLboolean try_emit_sat(ir_expression *ir);
+
+ bool process_move_condition(ir_rvalue *ir);
+
+ void generate_code();
+ void generate_vs_instruction(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg *src);
+
+ void generate_math1_gen4(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src);
+ void generate_math1_gen6(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src);
+ void generate_math2_gen4(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1);
+ void generate_math2_gen6(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1);
+
+ void generate_urb_write(vec4_instruction *inst);
+ void generate_oword_dual_block_offsets(struct brw_reg m1,
+ struct brw_reg index);
+ void generate_scratch_write(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg index);
+ void generate_scratch_read(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg index);
+};
+
+} /* namespace brw */
+
+#endif /* BRW_VEC4_H */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
new file mode 100644
index 00000000000..65ac7d9dc09
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -0,0 +1,854 @@
+/* Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_vec4.h"
+#include "../glsl/ir_print_visitor.h"
+
+extern "C" {
+#include "brw_eu.h"
+};
+
+using namespace brw;
+
+namespace brw {
+
+int
+vec4_visitor::setup_attributes(int payload_reg)
+{
+ int nr_attributes;
+ int attribute_map[VERT_ATTRIB_MAX];
+
+ nr_attributes = 0;
+ for (int i = 0; i < VERT_ATTRIB_MAX; i++) {
+ if (prog_data->inputs_read & BITFIELD64_BIT(i)) {
+ attribute_map[i] = payload_reg + nr_attributes;
+ nr_attributes++;
+
+ /* Do GL_FIXED rescaling for GLES2.0. Our GL_FIXED
+ * attributes come in as floating point conversions of the
+ * integer values.
+ */
+ if (c->key.gl_fixed_input_size[i] != 0) {
+ struct brw_reg reg = brw_vec8_grf(attribute_map[i], 0);
+
+ brw_MUL(p,
+ brw_writemask(reg, (1 << c->key.gl_fixed_input_size[i]) - 1),
+ reg, brw_imm_f(1.0 / 65536.0));
+ }
+ }
+ }
+
+ foreach_list(node, &this->instructions) {
+ vec4_instruction *inst = (vec4_instruction *)node;
+
+ for (int i = 0; i < 3; i++) {
+ if (inst->src[i].file != ATTR)
+ continue;
+
+ int grf = attribute_map[inst->src[i].reg + inst->src[i].reg_offset];
+
+ struct brw_reg reg = brw_vec8_grf(grf, 0);
+ reg.dw1.bits.swizzle = inst->src[i].swizzle;
+ if (inst->src[i].abs)
+ reg = brw_abs(reg);
+ if (inst->src[i].negate)
+ reg = negate(reg);
+
+ inst->src[i].file = HW_REG;
+ inst->src[i].fixed_hw_reg = reg;
+ }
+ }
+
+ /* The BSpec says we always have to read at least one thing from
+ * the VF, and it appears that the hardware wedges otherwise.
+ */
+ if (nr_attributes == 0)
+ nr_attributes = 1;
+
+ prog_data->urb_read_length = (nr_attributes + 1) / 2;
+
+ return payload_reg + nr_attributes;
+}
+
+int
+vec4_visitor::setup_uniforms(int reg)
+{
+ /* User clip planes from curbe:
+ */
+ if (c->key.nr_userclip) {
+ if (intel->gen >= 6) {
+ for (int i = 0; i < c->key.nr_userclip; i++) {
+ c->userplane[i] = stride(brw_vec4_grf(reg + i / 2,
+ (i % 2) * 4), 0, 4, 1);
+ }
+ reg += ALIGN(c->key.nr_userclip, 2) / 2;
+ } else {
+ for (int i = 0; i < c->key.nr_userclip; i++) {
+ c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2,
+ (i % 2) * 4), 0, 4, 1);
+ }
+ reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2;
+ }
+ }
+
+ /* The pre-gen6 VS requires that some push constants get loaded no
+ * matter what, or the GPU would hang.
+ */
+ if (intel->gen < 6 && this->uniforms == 0) {
+ this->uniform_size[this->uniforms] = 1;
+
+ for (unsigned int i = 0; i < 4; i++) {
+ unsigned int slot = this->uniforms * 4 + i;
+
+ c->prog_data.param[slot] = NULL;
+ c->prog_data.param_convert[slot] = PARAM_CONVERT_ZERO;
+ }
+
+ this->uniforms++;
+ reg++;
+ } else {
+ reg += ALIGN(uniforms, 2) / 2;
+ }
+
+ /* for now, we are not doing any elimination of unused slots, nor
+ * are we packing our uniforms.
+ */
+ c->prog_data.nr_params = this->uniforms * 4;
+
+ c->prog_data.curb_read_length = reg - 1;
+ c->prog_data.uses_new_param_layout = true;
+
+ return reg;
+}
+
+void
+vec4_visitor::setup_payload(void)
+{
+ int reg = 0;
+
+ /* The payload always contains important data in g0, which contains
+ * the URB handles that are passed on to the URB write at the end
+ * of the thread. So, we always start push constants at g1.
+ */
+ reg++;
+
+ reg = setup_uniforms(reg);
+
+ reg = setup_attributes(reg);
+
+ this->first_non_payload_grf = reg;
+}
+
+struct brw_reg
+vec4_instruction::get_dst(void)
+{
+ struct brw_reg brw_reg;
+
+ switch (dst.file) {
+ case GRF:
+ brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0);
+ brw_reg = retype(brw_reg, dst.type);
+ brw_reg.dw1.bits.writemask = dst.writemask;
+ break;
+
+ case HW_REG:
+ brw_reg = dst.fixed_hw_reg;
+ break;
+
+ case BAD_FILE:
+ brw_reg = brw_null_reg();
+ break;
+
+ default:
+ assert(!"not reached");
+ brw_reg = brw_null_reg();
+ break;
+ }
+ return brw_reg;
+}
+
+struct brw_reg
+vec4_instruction::get_src(int i)
+{
+ struct brw_reg brw_reg;
+
+ switch (src[i].file) {
+ case GRF:
+ brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0);
+ brw_reg = retype(brw_reg, src[i].type);
+ brw_reg.dw1.bits.swizzle = src[i].swizzle;
+ if (src[i].abs)
+ brw_reg = brw_abs(brw_reg);
+ if (src[i].negate)
+ brw_reg = negate(brw_reg);
+ break;
+
+ case IMM:
+ switch (src[i].type) {
+ case BRW_REGISTER_TYPE_F:
+ brw_reg = brw_imm_f(src[i].imm.f);
+ break;
+ case BRW_REGISTER_TYPE_D:
+ brw_reg = brw_imm_d(src[i].imm.i);
+ break;
+ case BRW_REGISTER_TYPE_UD:
+ brw_reg = brw_imm_ud(src[i].imm.u);
+ break;
+ default:
+ assert(!"not reached");
+ brw_reg = brw_null_reg();
+ break;
+ }
+ break;
+
+ case UNIFORM:
+ brw_reg = stride(brw_vec4_grf(1 + (src[i].reg + src[i].reg_offset) / 2,
+ ((src[i].reg + src[i].reg_offset) % 2) * 4),
+ 0, 4, 1);
+ brw_reg = retype(brw_reg, src[i].type);
+ brw_reg.dw1.bits.swizzle = src[i].swizzle;
+ if (src[i].abs)
+ brw_reg = brw_abs(brw_reg);
+ if (src[i].negate)
+ brw_reg = negate(brw_reg);
+ break;
+
+ case HW_REG:
+ brw_reg = src[i].fixed_hw_reg;
+ break;
+
+ case BAD_FILE:
+ /* Probably unused. */
+ brw_reg = brw_null_reg();
+ break;
+ case ATTR:
+ default:
+ assert(!"not reached");
+ brw_reg = brw_null_reg();
+ break;
+ }
+
+ return brw_reg;
+}
+
+void
+vec4_visitor::generate_math1_gen4(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ brw_math(p,
+ dst,
+ brw_math_function(inst->opcode),
+ BRW_MATH_SATURATE_NONE,
+ inst->base_mrf,
+ src,
+ BRW_MATH_DATA_SCALAR,
+ BRW_MATH_PRECISION_FULL);
+}
+
+static void
+check_gen6_math_src_arg(struct brw_reg src)
+{
+ /* Source swizzles are ignored. */
+ assert(!src.abs);
+ assert(!src.negate);
+ assert(src.dw1.bits.swizzle = BRW_SWIZZLE_XYZW);
+}
+
+void
+vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ /* Can't do writemask because math can't be align16. */
+ assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
+ check_gen6_math_src_arg(src);
+
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_math(p,
+ dst,
+ brw_math_function(inst->opcode),
+ BRW_MATH_SATURATE_NONE,
+ inst->base_mrf,
+ src,
+ BRW_MATH_DATA_SCALAR,
+ BRW_MATH_PRECISION_FULL);
+ brw_set_access_mode(p, BRW_ALIGN_16);
+}
+
+void
+vec4_visitor::generate_math2_gen6(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ /* Can't do writemask because math can't be align16. */
+ assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
+ /* Source swizzles are ignored. */
+ check_gen6_math_src_arg(src0);
+ check_gen6_math_src_arg(src1);
+
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_math2(p,
+ dst,
+ brw_math_function(inst->opcode),
+ src0, src1);
+ brw_set_access_mode(p, BRW_ALIGN_16);
+}
+
+void
+vec4_visitor::generate_math2_gen4(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ /* Can't do writemask because math can't be align16. */
+ assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
+
+ brw_MOV(p, brw_message_reg(inst->base_mrf + 1), src1);
+
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_math(p,
+ dst,
+ brw_math_function(inst->opcode),
+ BRW_MATH_SATURATE_NONE,
+ inst->base_mrf,
+ src0,
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+ brw_set_access_mode(p, BRW_ALIGN_16);
+}
+
+void
+vec4_visitor::generate_urb_write(vec4_instruction *inst)
+{
+ brw_urb_WRITE(p,
+ brw_null_reg(), /* dest */
+ inst->base_mrf, /* starting mrf reg nr */
+ brw_vec8_grf(0, 0), /* src */
+ false, /* allocate */
+ true, /* used */
+ inst->mlen,
+ 0, /* response len */
+ inst->eot, /* eot */
+ inst->eot, /* writes complete */
+ inst->offset, /* urb destination offset */
+ BRW_URB_SWIZZLE_INTERLEAVE);
+}
+
+void
+vec4_visitor::generate_oword_dual_block_offsets(struct brw_reg m1,
+ struct brw_reg index)
+{
+ int second_vertex_offset;
+
+ if (intel->gen >= 6)
+ second_vertex_offset = 1;
+ else
+ second_vertex_offset = 16;
+
+ m1 = retype(m1, BRW_REGISTER_TYPE_D);
+
+ /* Set up M1 (message payload). Only the block offsets in M1.0 and
+ * M1.4 are used, and the rest are ignored.
+ */
+ struct brw_reg m1_0 = suboffset(vec1(m1), 0);
+ struct brw_reg m1_4 = suboffset(vec1(m1), 4);
+ struct brw_reg index_0 = suboffset(vec1(index), 0);
+ struct brw_reg index_4 = suboffset(vec1(index), 4);
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_access_mode(p, BRW_ALIGN_1);
+
+ brw_MOV(p, m1_0, index_0);
+
+ brw_set_predicate_inverse(p, true);
+ if (index.file == BRW_IMMEDIATE_VALUE) {
+ index_4.dw1.ud++;
+ brw_MOV(p, m1_4, index_4);
+ } else {
+ brw_ADD(p, m1_4, index_4, brw_imm_d(second_vertex_offset));
+ }
+
+ brw_pop_insn_state(p);
+}
+
+void
+vec4_visitor::generate_scratch_read(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg index)
+{
+ if (intel->gen >= 6) {
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV(p,
+ retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D),
+ retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D));
+ brw_pop_insn_state(p);
+ }
+
+ generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
+ index);
+
+ uint32_t msg_type;
+
+ if (intel->gen >= 6)
+ msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+ else if (intel->gen == 5 || intel->is_g4x)
+ msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+ else
+ msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+
+ /* Each of the 8 channel enables is considered for whether each
+ * dword is written.
+ */
+ struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+ brw_set_dest(p, send, dst);
+ brw_set_src0(p, send, brw_message_reg(inst->base_mrf));
+ brw_set_dp_read_message(p, send,
+ 255, /* binding table index: stateless access */
+ BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
+ msg_type,
+ BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
+ 2, /* mlen */
+ 1 /* rlen */);
+}
+
+void
+vec4_visitor::generate_scratch_write(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg index)
+{
+ /* If the instruction is predicated, we'll predicate the send, not
+ * the header setup.
+ */
+ brw_set_predicate_control(p, false);
+
+ if (intel->gen >= 6) {
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV(p,
+ retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D),
+ retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D));
+ brw_pop_insn_state(p);
+ }
+
+ generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
+ index);
+
+ brw_MOV(p,
+ retype(brw_message_reg(inst->base_mrf + 2), BRW_REGISTER_TYPE_D),
+ retype(src, BRW_REGISTER_TYPE_D));
+
+ uint32_t msg_type;
+
+ if (intel->gen >= 6)
+ msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
+ else
+ msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
+
+ brw_set_predicate_control(p, inst->predicate);
+
+ /* Each of the 8 channel enables is considered for whether each
+ * dword is written.
+ */
+ struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+ brw_set_dest(p, send, dst);
+ brw_set_src0(p, send, brw_message_reg(inst->base_mrf));
+ brw_set_dp_write_message(p, send,
+ 255, /* binding table index: stateless access */
+ BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
+ msg_type,
+ 3, /* mlen */
+ true, /* header present */
+ false, /* pixel scoreboard */
+ 0, /* rlen */
+ false, /* eot */
+ false /* commit */);
+}
+
+void
+vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
+ struct brw_reg dst,
+ struct brw_reg *src)
+{
+ vec4_instruction *inst = (vec4_instruction *)instruction;
+
+ switch (inst->opcode) {
+ case SHADER_OPCODE_RCP:
+ case SHADER_OPCODE_RSQ:
+ case SHADER_OPCODE_SQRT:
+ case SHADER_OPCODE_EXP2:
+ case SHADER_OPCODE_LOG2:
+ case SHADER_OPCODE_SIN:
+ case SHADER_OPCODE_COS:
+ if (intel->gen >= 6) {
+ generate_math1_gen6(inst, dst, src[0]);
+ } else {
+ generate_math1_gen4(inst, dst, src[0]);
+ }
+ break;
+
+ case SHADER_OPCODE_POW:
+ if (intel->gen >= 6) {
+ generate_math2_gen6(inst, dst, src[0], src[1]);
+ } else {
+ generate_math2_gen4(inst, dst, src[0], src[1]);
+ }
+ break;
+
+ case VS_OPCODE_URB_WRITE:
+ generate_urb_write(inst);
+ break;
+
+ case VS_OPCODE_SCRATCH_READ:
+ generate_scratch_read(inst, dst, src[0]);
+ break;
+
+ case VS_OPCODE_SCRATCH_WRITE:
+ generate_scratch_write(inst, dst, src[0], src[1]);
+ break;
+
+ default:
+ if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
+ fail("unsupported opcode in `%s' in VS\n",
+ brw_opcodes[inst->opcode].name);
+ } else {
+ fail("Unsupported opcode %d in VS", inst->opcode);
+ }
+ }
+}
+
+bool
+vec4_visitor::run()
+{
+ /* Generate VS IR for main(). (the visitor only descends into
+ * functions called "main").
+ */
+ visit_instructions(shader->ir);
+
+ emit_urb_writes();
+
+ /* Before any optimization, push array accesses out to scratch
+ * space where we need them to be. This pass may allocate new
+ * virtual GRFs, so we want to do it early. It also makes sure
+ * that we have reladdr computations available for CSE, since we'll
+ * often do repeated subexpressions for those.
+ */
+ move_grf_array_access_to_scratch();
+
+ bool progress;
+ do {
+ progress = false;
+ progress = dead_code_eliminate() || progress;
+ } while (progress);
+
+ if (failed)
+ return false;
+
+ setup_payload();
+ reg_allocate();
+
+ if (failed)
+ return false;
+
+ brw_set_access_mode(p, BRW_ALIGN_16);
+
+ generate_code();
+
+ return !failed;
+}
+
+void
+vec4_visitor::generate_code()
+{
+ int last_native_inst = p->nr_insn;
+ const char *last_annotation_string = NULL;
+ ir_instruction *last_annotation_ir = NULL;
+
+ int loop_stack_array_size = 16;
+ int loop_stack_depth = 0;
+ brw_instruction **loop_stack =
+ rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size);
+ int *if_depth_in_loop =
+ rzalloc_array(this->mem_ctx, int, loop_stack_array_size);
+
+
+ if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+ printf("Native code for vertex shader %d:\n", prog->Name);
+ }
+
+ foreach_list(node, &this->instructions) {
+ vec4_instruction *inst = (vec4_instruction *)node;
+ struct brw_reg src[3], dst;
+
+ if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+ if (last_annotation_ir != inst->ir) {
+ last_annotation_ir = inst->ir;
+ if (last_annotation_ir) {
+ printf(" ");
+ last_annotation_ir->print();
+ printf("\n");
+ }
+ }
+ if (last_annotation_string != inst->annotation) {
+ last_annotation_string = inst->annotation;
+ if (last_annotation_string)
+ printf(" %s\n", last_annotation_string);
+ }
+ }
+
+ for (unsigned int i = 0; i < 3; i++) {
+ src[i] = inst->get_src(i);
+ }
+ dst = inst->get_dst();
+
+ brw_set_conditionalmod(p, inst->conditional_mod);
+ brw_set_predicate_control(p, inst->predicate);
+ brw_set_predicate_inverse(p, inst->predicate_inverse);
+ brw_set_saturate(p, inst->saturate);
+
+ switch (inst->opcode) {
+ case BRW_OPCODE_MOV:
+ brw_MOV(p, dst, src[0]);
+ break;
+ case BRW_OPCODE_ADD:
+ brw_ADD(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_MUL:
+ brw_MUL(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_MACH:
+ brw_set_acc_write_control(p, 1);
+ brw_MACH(p, dst, src[0], src[1]);
+ brw_set_acc_write_control(p, 0);
+ break;
+
+ case BRW_OPCODE_FRC:
+ brw_FRC(p, dst, src[0]);
+ break;
+ case BRW_OPCODE_RNDD:
+ brw_RNDD(p, dst, src[0]);
+ break;
+ case BRW_OPCODE_RNDE:
+ brw_RNDE(p, dst, src[0]);
+ break;
+ case BRW_OPCODE_RNDZ:
+ brw_RNDZ(p, dst, src[0]);
+ break;
+
+ case BRW_OPCODE_AND:
+ brw_AND(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_OR:
+ brw_OR(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_XOR:
+ brw_XOR(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_NOT:
+ brw_NOT(p, dst, src[0]);
+ break;
+ case BRW_OPCODE_ASR:
+ brw_ASR(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_SHR:
+ brw_SHR(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_SHL:
+ brw_SHL(p, dst, src[0], src[1]);
+ break;
+
+ case BRW_OPCODE_CMP:
+ brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
+ break;
+ case BRW_OPCODE_SEL:
+ brw_SEL(p, dst, src[0], src[1]);
+ break;
+
+ case BRW_OPCODE_DP4:
+ brw_DP4(p, dst, src[0], src[1]);
+ break;
+
+ case BRW_OPCODE_DP3:
+ brw_DP3(p, dst, src[0], src[1]);
+ break;
+
+ case BRW_OPCODE_DP2:
+ brw_DP2(p, dst, src[0], src[1]);
+ break;
+
+ case BRW_OPCODE_IF:
+ if (inst->src[0].file != BAD_FILE) {
+ /* The instruction has an embedded compare (only allowed on gen6) */
+ assert(intel->gen == 6);
+ gen6_IF(p, inst->conditional_mod, src[0], src[1]);
+ } else {
+ struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8);
+ brw_inst->header.predicate_control = inst->predicate;
+ }
+ if_depth_in_loop[loop_stack_depth]++;
+ break;
+
+ case BRW_OPCODE_ELSE:
+ brw_ELSE(p);
+ break;
+ case BRW_OPCODE_ENDIF:
+ brw_ENDIF(p);
+ if_depth_in_loop[loop_stack_depth]--;
+ break;
+
+ case BRW_OPCODE_DO:
+ loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
+ if (loop_stack_array_size <= loop_stack_depth) {
+ loop_stack_array_size *= 2;
+ loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *,
+ loop_stack_array_size);
+ if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int,
+ loop_stack_array_size);
+ }
+ if_depth_in_loop[loop_stack_depth] = 0;
+ break;
+
+ case BRW_OPCODE_BREAK:
+ brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case BRW_OPCODE_CONTINUE:
+ /* FINISHME: We need to write the loop instruction support still. */
+ if (intel->gen >= 6)
+ gen6_CONT(p, loop_stack[loop_stack_depth - 1]);
+ else
+ brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+
+ case BRW_OPCODE_WHILE: {
+ struct brw_instruction *inst0, *inst1;
+ GLuint br = 1;
+
+ if (intel->gen >= 5)
+ br = 2;
+
+ assert(loop_stack_depth > 0);
+ loop_stack_depth--;
+ inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
+ if (intel->gen < 6) {
+ /* patch all the BREAK/CONT instructions from last BGNLOOP */
+ while (inst0 > loop_stack[loop_stack_depth]) {
+ inst0--;
+ if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+ inst0->bits3.if_else.jump_count == 0) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+ }
+ else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+ inst0->bits3.if_else.jump_count == 0) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+ }
+ }
+ }
+ }
+ break;
+
+ default:
+ generate_vs_instruction(inst, dst, src);
+ break;
+ }
+
+ if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+ for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
+ if (0) {
+ printf("0x%08x 0x%08x 0x%08x 0x%08x ",
+ ((uint32_t *)&p->store[i])[3],
+ ((uint32_t *)&p->store[i])[2],
+ ((uint32_t *)&p->store[i])[1],
+ ((uint32_t *)&p->store[i])[0]);
+ }
+ brw_disasm(stdout, &p->store[i], intel->gen);
+ }
+ }
+
+ last_native_inst = p->nr_insn;
+ }
+
+ if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+ printf("\n");
+ }
+
+ ralloc_free(loop_stack);
+ ralloc_free(if_depth_in_loop);
+
+ brw_set_uip_jip(p);
+
+ /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS
+ * emit issues, it doesn't get the jump distances into the output,
+ * which is often something we want to debug. So this is here in
+ * case you're doing that.
+ */
+ if (0) {
+ if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+ for (unsigned int i = 0; i < p->nr_insn; i++) {
+ printf("0x%08x 0x%08x 0x%08x 0x%08x ",
+ ((uint32_t *)&p->store[i])[3],
+ ((uint32_t *)&p->store[i])[2],
+ ((uint32_t *)&p->store[i])[1],
+ ((uint32_t *)&p->store[i])[0]);
+ brw_disasm(stdout, &p->store[i], intel->gen);
+ }
+ }
+ }
+}
+
+extern "C" {
+
+bool
+brw_vs_emit(struct gl_shader_program *prog, struct brw_vs_compile *c)
+{
+ if (!prog)
+ return false;
+
+ struct brw_shader *shader =
+ (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
+ if (!shader)
+ return false;
+
+ if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+ printf("GLSL IR for native vertex shader %d:\n", prog->Name);
+ _mesa_print_ir(shader->ir, NULL);
+ printf("\n\n");
+ }
+
+ vec4_visitor v(c, prog, shader);
+ if (!v.run()) {
+ prog->LinkStatus = GL_FALSE;
+ ralloc_strcat(&prog->InfoLog, v.fail_msg);
+ return false;
+ }
+
+ return true;
+}
+
+} /* extern "C" */
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
new file mode 100644
index 00000000000..3f052ff64cf
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -0,0 +1,234 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+extern "C" {
+#include "main/macros.h"
+#include "program/register_allocate.h"
+} /* extern "C" */
+
+#include "brw_vec4.h"
+#include "../glsl/ir_print_visitor.h"
+
+using namespace brw;
+
+namespace brw {
+
+static void
+assign(int *reg_hw_locations, reg *reg)
+{
+ if (reg->file == GRF) {
+ reg->reg = reg_hw_locations[reg->reg];
+ }
+}
+
+void
+vec4_visitor::reg_allocate_trivial()
+{
+ int hw_reg_mapping[this->virtual_grf_count];
+ bool virtual_grf_used[this->virtual_grf_count];
+ int i;
+ int next;
+
+ /* Calculate which virtual GRFs are actually in use after whatever
+ * optimization passes have occurred.
+ */
+ for (int i = 0; i < this->virtual_grf_count; i++) {
+ virtual_grf_used[i] = false;
+ }
+
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ vec4_instruction *inst = (vec4_instruction *)iter.get();
+
+ if (inst->dst.file == GRF)
+ virtual_grf_used[inst->dst.reg] = true;
+
+ for (int i = 0; i < 3; i++) {
+ if (inst->src[i].file == GRF)
+ virtual_grf_used[inst->src[i].reg] = true;
+ }
+ }
+
+ hw_reg_mapping[0] = this->first_non_payload_grf;
+ next = hw_reg_mapping[0] + this->virtual_grf_sizes[0];
+ for (i = 1; i < this->virtual_grf_count; i++) {
+ if (virtual_grf_used[i]) {
+ hw_reg_mapping[i] = next;
+ next += this->virtual_grf_sizes[i];
+ }
+ }
+ prog_data->total_grf = next;
+
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ vec4_instruction *inst = (vec4_instruction *)iter.get();
+
+ assign(hw_reg_mapping, &inst->dst);
+ assign(hw_reg_mapping, &inst->src[0]);
+ assign(hw_reg_mapping, &inst->src[1]);
+ assign(hw_reg_mapping, &inst->src[2]);
+ }
+
+ if (prog_data->total_grf > BRW_MAX_GRF) {
+ fail("Ran out of regs on trivial allocator (%d/%d)\n",
+ prog_data->total_grf, BRW_MAX_GRF);
+ }
+}
+
+static void
+brw_alloc_reg_set_for_classes(struct brw_context *brw,
+ int *class_sizes,
+ int class_count,
+ int base_reg_count)
+{
+ /* Compute the total number of registers across all classes. */
+ int ra_reg_count = 0;
+ for (int i = 0; i < class_count; i++) {
+ ra_reg_count += base_reg_count - (class_sizes[i] - 1);
+ }
+
+ ralloc_free(brw->vs.ra_reg_to_grf);
+ brw->vs.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count);
+ ralloc_free(brw->vs.regs);
+ brw->vs.regs = ra_alloc_reg_set(ra_reg_count);
+ ralloc_free(brw->vs.classes);
+ brw->vs.classes = ralloc_array(brw, int, class_count + 1);
+
+ /* Now, add the registers to their classes, and add the conflicts
+ * between them and the base GRF registers (and also each other).
+ */
+ int reg = 0;
+ for (int i = 0; i < class_count; i++) {
+ int class_reg_count = base_reg_count - (class_sizes[i] - 1);
+ brw->vs.classes[i] = ra_alloc_reg_class(brw->vs.regs);
+
+ for (int j = 0; j < class_reg_count; j++) {
+ ra_class_add_reg(brw->vs.regs, brw->vs.classes[i], reg);
+
+ brw->vs.ra_reg_to_grf[reg] = j;
+
+ for (int base_reg = j;
+ base_reg < j + class_sizes[i];
+ base_reg++) {
+ ra_add_transitive_reg_conflict(brw->vs.regs, base_reg, reg);
+ }
+
+ reg++;
+ }
+ }
+ assert(reg == ra_reg_count);
+
+ ra_set_finalize(brw->vs.regs);
+}
+
+void
+vec4_visitor::reg_allocate()
+{
+ int hw_reg_mapping[virtual_grf_count];
+ int first_assigned_grf = this->first_non_payload_grf;
+ int base_reg_count = BRW_MAX_GRF - first_assigned_grf;
+ int class_sizes[base_reg_count];
+ int class_count = 0;
+
+ /* Using the trivial allocator can be useful in debugging undefined
+ * register access as a result of broken optimization passes.
+ */
+ if (0) {
+ reg_allocate_trivial();
+ return;
+ }
+
+ calculate_live_intervals();
+
+ /* Set up the register classes.
+ *
+ * The base registers store a vec4. However, we'll need larger
+ * storage for arrays, structures, and matrices, which will be sets
+ * of contiguous registers.
+ */
+ class_sizes[class_count++] = 1;
+
+ for (int r = 0; r < virtual_grf_count; r++) {
+ int i;
+
+ for (i = 0; i < class_count; i++) {
+ if (class_sizes[i] == this->virtual_grf_sizes[r])
+ break;
+ }
+ if (i == class_count) {
+ if (this->virtual_grf_sizes[r] >= base_reg_count) {
+ fail("Object too large to register allocate.\n");
+ }
+
+ class_sizes[class_count++] = this->virtual_grf_sizes[r];
+ }
+ }
+
+ brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, base_reg_count);
+
+ struct ra_graph *g = ra_alloc_interference_graph(brw->vs.regs,
+ virtual_grf_count);
+
+ for (int i = 0; i < virtual_grf_count; i++) {
+ for (int c = 0; c < class_count; c++) {
+ if (class_sizes[c] == this->virtual_grf_sizes[i]) {
+ ra_set_node_class(g, i, brw->vs.classes[c]);
+ break;
+ }
+ }
+
+ for (int j = 0; j < i; j++) {
+ if (virtual_grf_interferes(i, j)) {
+ ra_add_node_interference(g, i, j);
+ }
+ }
+ }
+
+ if (!ra_allocate_no_spills(g)) {
+ ralloc_free(g);
+ fail("No register spilling support yet\n");
+ }
+
+ /* Get the chosen virtual registers for each node, and map virtual
+ * regs in the register classes back down to real hardware reg
+ * numbers.
+ */
+ prog_data->total_grf = first_assigned_grf;
+ for (int i = 0; i < virtual_grf_count; i++) {
+ int reg = ra_get_node_reg(g, i);
+
+ hw_reg_mapping[i] = first_assigned_grf + brw->vs.ra_reg_to_grf[reg];
+ prog_data->total_grf = MAX2(prog_data->total_grf, hw_reg_mapping[i] + 1);
+ }
+
+ foreach_list(node, &this->instructions) {
+ vec4_instruction *inst = (vec4_instruction *)node;
+
+ assign(hw_reg_mapping, &inst->dst);
+ assign(hw_reg_mapping, &inst->src[0]);
+ assign(hw_reg_mapping, &inst->src[1]);
+ assign(hw_reg_mapping, &inst->src[2]);
+ }
+
+ ralloc_free(g);
+}
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
new file mode 100644
index 00000000000..b3a07bd0539
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -0,0 +1,2156 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_vec4.h"
+extern "C" {
+#include "main/macros.h"
+#include "program/prog_parameter.h"
+}
+
+namespace brw {
+
+src_reg::src_reg(dst_reg reg)
+{
+ init();
+
+ this->file = reg.file;
+ this->reg = reg.reg;
+ this->reg_offset = reg.reg_offset;
+ this->type = reg.type;
+ this->reladdr = reg.reladdr;
+ this->fixed_hw_reg = reg.fixed_hw_reg;
+
+ int swizzles[4];
+ int next_chan = 0;
+ int last = 0;
+
+ for (int i = 0; i < 4; i++) {
+ if (!(reg.writemask & (1 << i)))
+ continue;
+
+ swizzles[next_chan++] = last = i;
+ }
+
+ for (; next_chan < 4; next_chan++) {
+ swizzles[next_chan] = last;
+ }
+
+ this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
+ swizzles[2], swizzles[3]);
+}
+
+dst_reg::dst_reg(src_reg reg)
+{
+ init();
+
+ this->file = reg.file;
+ this->reg = reg.reg;
+ this->reg_offset = reg.reg_offset;
+ this->type = reg.type;
+ this->writemask = WRITEMASK_XYZW;
+ this->reladdr = reg.reladdr;
+ this->fixed_hw_reg = reg.fixed_hw_reg;
+}
+
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode, dst_reg dst,
+ src_reg src0, src_reg src1, src_reg src2)
+{
+ vec4_instruction *inst = new(mem_ctx) vec4_instruction();
+
+ inst->opcode = opcode;
+ inst->dst = dst;
+ inst->src[0] = src0;
+ inst->src[1] = src1;
+ inst->src[2] = src2;
+ inst->ir = this->base_ir;
+ inst->annotation = this->current_annotation;
+
+ this->instructions.push_tail(inst);
+
+ return inst;
+}
+
+
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
+{
+ return emit(opcode, dst, src0, src1, src_reg());
+}
+
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
+{
+ assert(dst.writemask != 0);
+ return emit(opcode, dst, src0, src_reg(), src_reg());
+}
+
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode)
+{
+ return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg());
+}
+
+void
+vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
+{
+ static enum opcode dot_opcodes[] = {
+ BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
+ };
+
+ emit(dot_opcodes[elements - 2], dst, src0, src1);
+}
+
+void
+vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
+{
+ /* The gen6 math instruction ignores the source modifiers --
+ * swizzle, abs, negate, and at least some parts of the register
+ * region description.
+ */
+ src_reg temp_src = src_reg(this, glsl_type::vec4_type);
+ emit(BRW_OPCODE_MOV, dst_reg(temp_src), src);
+
+ if (dst.writemask != WRITEMASK_XYZW) {
+ /* The gen6 math instruction must be align1, so we can't do
+ * writemasks.
+ */
+ dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
+
+ emit(opcode, temp_dst, temp_src);
+
+ emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
+ } else {
+ emit(opcode, dst, temp_src);
+ }
+}
+
+void
+vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
+{
+ vec4_instruction *inst = emit(opcode, dst, src);
+ inst->base_mrf = 1;
+ inst->mlen = 1;
+}
+
+void
+vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
+{
+ switch (opcode) {
+ case SHADER_OPCODE_RCP:
+ case SHADER_OPCODE_RSQ:
+ case SHADER_OPCODE_SQRT:
+ case SHADER_OPCODE_EXP2:
+ case SHADER_OPCODE_LOG2:
+ case SHADER_OPCODE_SIN:
+ case SHADER_OPCODE_COS:
+ break;
+ default:
+ assert(!"not reached: bad math opcode");
+ return;
+ }
+
+ if (intel->gen >= 6) {
+ return emit_math1_gen6(opcode, dst, src);
+ } else {
+ return emit_math1_gen4(opcode, dst, src);
+ }
+}
+
+void
+vec4_visitor::emit_math2_gen6(enum opcode opcode,
+ dst_reg dst, src_reg src0, src_reg src1)
+{
+ src_reg expanded;
+
+ /* The gen6 math instruction ignores the source modifiers --
+ * swizzle, abs, negate, and at least some parts of the register
+ * region description. Move the sources to temporaries to make it
+ * generally work.
+ */
+
+ expanded = src_reg(this, glsl_type::vec4_type);
+ emit(BRW_OPCODE_MOV, dst_reg(expanded), src0);
+ src0 = expanded;
+
+ expanded = src_reg(this, glsl_type::vec4_type);
+ emit(BRW_OPCODE_MOV, dst_reg(expanded), src1);
+ src1 = expanded;
+
+ if (dst.writemask != WRITEMASK_XYZW) {
+ /* The gen6 math instruction must be align1, so we can't do
+ * writemasks.
+ */
+ dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
+
+ emit(opcode, temp_dst, src0, src1);
+
+ emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
+ } else {
+ emit(opcode, dst, src0, src1);
+ }
+}
+
+void
+vec4_visitor::emit_math2_gen4(enum opcode opcode,
+ dst_reg dst, src_reg src0, src_reg src1)
+{
+ vec4_instruction *inst = emit(opcode, dst, src0, src1);
+ inst->base_mrf = 1;
+ inst->mlen = 2;
+}
+
+void
+vec4_visitor::emit_math(enum opcode opcode,
+ dst_reg dst, src_reg src0, src_reg src1)
+{
+ assert(opcode == SHADER_OPCODE_POW);
+
+ if (intel->gen >= 6) {
+ return emit_math2_gen6(opcode, dst, src0, src1);
+ } else {
+ return emit_math2_gen4(opcode, dst, src0, src1);
+ }
+}
+
+void
+vec4_visitor::visit_instructions(const exec_list *list)
+{
+ foreach_list(node, list) {
+ ir_instruction *ir = (ir_instruction *)node;
+
+ base_ir = ir;
+ ir->accept(this);
+ }
+}
+
+
+static int
+type_size(const struct glsl_type *type)
+{
+ unsigned int i;
+ int size;
+
+ switch (type->base_type) {
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_BOOL:
+ if (type->is_matrix()) {
+ return type->matrix_columns;
+ } else {
+ /* Regardless of size of vector, it gets a vec4. This is bad
+ * packing for things like floats, but otherwise arrays become a
+ * mess. Hopefully a later pass over the code can pack scalars
+ * down if appropriate.
+ */
+ return 1;
+ }
+ case GLSL_TYPE_ARRAY:
+ assert(type->length > 0);
+ return type_size(type->fields.array) * type->length;
+ case GLSL_TYPE_STRUCT:
+ size = 0;
+ for (i = 0; i < type->length; i++) {
+ size += type_size(type->fields.structure[i].type);
+ }
+ return size;
+ case GLSL_TYPE_SAMPLER:
+ /* Samplers take up one slot in UNIFORMS[], but they're baked in
+ * at link time.
+ */
+ return 1;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+int
+vec4_visitor::virtual_grf_alloc(int size)
+{
+ if (virtual_grf_array_size <= virtual_grf_count) {
+ if (virtual_grf_array_size == 0)
+ virtual_grf_array_size = 16;
+ else
+ virtual_grf_array_size *= 2;
+ virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
+ virtual_grf_array_size);
+ }
+ virtual_grf_sizes[virtual_grf_count] = size;
+ return virtual_grf_count++;
+}
+
+src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
+{
+ init();
+
+ this->file = GRF;
+ this->reg = v->virtual_grf_alloc(type_size(type));
+
+ if (type->is_array() || type->is_record()) {
+ this->swizzle = BRW_SWIZZLE_NOOP;
+ } else {
+ this->swizzle = swizzle_for_size(type->vector_elements);
+ }
+
+ this->type = brw_type_for_base_type(type);
+}
+
+dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
+{
+ init();
+
+ this->file = GRF;
+ this->reg = v->virtual_grf_alloc(type_size(type));
+
+ if (type->is_array() || type->is_record()) {
+ this->writemask = WRITEMASK_XYZW;
+ } else {
+ this->writemask = (1 << type->vector_elements) - 1;
+ }
+
+ this->type = brw_type_for_base_type(type);
+}
+
+/* Our support for uniforms is piggy-backed on the struct
+ * gl_fragment_program, because that's where the values actually
+ * get stored, rather than in some global gl_shader_program uniform
+ * store.
+ */
+int
+vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
+{
+ unsigned int offset = 0;
+ float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
+
+ if (type->is_matrix()) {
+ const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
+ type->vector_elements,
+ 1);
+
+ for (unsigned int i = 0; i < type->matrix_columns; i++) {
+ offset += setup_uniform_values(loc + offset, column);
+ }
+
+ return offset;
+ }
+
+ switch (type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_BOOL:
+ for (unsigned int i = 0; i < type->vector_elements; i++) {
+ int slot = this->uniforms * 4 + i;
+ switch (type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
+ break;
+ case GLSL_TYPE_UINT:
+ c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U;
+ break;
+ case GLSL_TYPE_INT:
+ c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I;
+ break;
+ case GLSL_TYPE_BOOL:
+ c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B;
+ break;
+ default:
+ assert(!"not reached");
+ c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
+ break;
+ }
+ c->prog_data.param[slot] = &values[i];
+ }
+
+ for (unsigned int i = type->vector_elements; i < 4; i++) {
+ c->prog_data.param_convert[this->uniforms * 4 + i] =
+ PARAM_CONVERT_ZERO;
+ c->prog_data.param[this->uniforms * 4 + i] = NULL;
+ }
+
+ this->uniform_size[this->uniforms] = type->vector_elements;
+ this->uniforms++;
+
+ return 1;
+
+ case GLSL_TYPE_STRUCT:
+ for (unsigned int i = 0; i < type->length; i++) {
+ offset += setup_uniform_values(loc + offset,
+ type->fields.structure[i].type);
+ }
+ return offset;
+
+ case GLSL_TYPE_ARRAY:
+ for (unsigned int i = 0; i < type->length; i++) {
+ offset += setup_uniform_values(loc + offset, type->fields.array);
+ }
+ return offset;
+
+ case GLSL_TYPE_SAMPLER:
+ /* The sampler takes up a slot, but we don't use any values from it. */
+ return 1;
+
+ default:
+ assert(!"not reached");
+ return 0;
+ }
+}
+
+/* Our support for builtin uniforms is even scarier than non-builtin.
+ * It sits on top of the PROG_STATE_VAR parameters that are
+ * automatically updated from GL context state.
+ */
+void
+vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
+{
+ const ir_state_slot *const slots = ir->state_slots;
+ assert(ir->state_slots != NULL);
+
+ for (unsigned int i = 0; i < ir->num_state_slots; i++) {
+ /* This state reference has already been setup by ir_to_mesa,
+ * but we'll get the same index back here. We can reference
+ * ParameterValues directly, since unlike brw_fs.cpp, we never
+ * add new state references during compile.
+ */
+ int index = _mesa_add_state_reference(this->vp->Base.Parameters,
+ (gl_state_index *)slots[i].tokens);
+ float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
+
+ this->uniform_size[this->uniforms] = 0;
+ /* Add each of the unique swizzled channels of the element.
+ * This will end up matching the size of the glsl_type of this field.
+ */
+ int last_swiz = -1;
+ for (unsigned int j = 0; j < 4; j++) {
+ int swiz = GET_SWZ(slots[i].swizzle, j);
+ last_swiz = swiz;
+
+ c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
+ c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT;
+ if (swiz <= last_swiz)
+ this->uniform_size[this->uniforms]++;
+ }
+ this->uniforms++;
+ }
+}
+
+dst_reg *
+vec4_visitor::variable_storage(ir_variable *var)
+{
+ return (dst_reg *)hash_table_find(this->variable_ht, var);
+}
+
+void
+vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
+{
+ ir_expression *expr = ir->as_expression();
+
+ if (expr) {
+ src_reg op[2];
+ vec4_instruction *inst;
+
+ assert(expr->get_num_operands() <= 2);
+ for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
+ assert(expr->operands[i]->type->is_scalar());
+
+ expr->operands[i]->accept(this);
+ op[i] = this->result;
+ }
+
+ switch (expr->operation) {
+ case ir_unop_logic_not:
+ inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1));
+ inst->conditional_mod = BRW_CONDITIONAL_Z;
+ break;
+
+ case ir_binop_logic_xor:
+ inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ break;
+
+ case ir_binop_logic_or:
+ inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ break;
+
+ case ir_binop_logic_and:
+ inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ break;
+
+ case ir_unop_f2b:
+ if (intel->gen >= 6) {
+ inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f));
+ } else {
+ inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]);
+ }
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ break;
+
+ case ir_unop_i2b:
+ if (intel->gen >= 6) {
+ inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
+ } else {
+ inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]);
+ }
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ break;
+
+ case ir_binop_greater:
+ case ir_binop_gequal:
+ case ir_binop_less:
+ case ir_binop_lequal:
+ case ir_binop_equal:
+ case ir_binop_all_equal:
+ case ir_binop_nequal:
+ case ir_binop_any_nequal:
+ inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
+ inst->conditional_mod =
+ brw_conditional_for_comparison(expr->operation);
+ break;
+
+ default:
+ assert(!"not reached");
+ break;
+ }
+ return;
+ }
+
+ ir->accept(this);
+
+ if (intel->gen >= 6) {
+ vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(),
+ this->result, src_reg(1));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ } else {
+ vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ }
+}
+
+/**
+ * Emit a gen6 IF statement with the comparison folded into the IF
+ * instruction.
+ */
+void
+vec4_visitor::emit_if_gen6(ir_if *ir)
+{
+ ir_expression *expr = ir->condition->as_expression();
+
+ if (expr) {
+ src_reg op[2];
+ vec4_instruction *inst;
+ dst_reg temp;
+
+ assert(expr->get_num_operands() <= 2);
+ for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
+ expr->operands[i]->accept(this);
+ op[i] = this->result;
+ }
+
+ switch (expr->operation) {
+ case ir_unop_logic_not:
+ inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_Z;
+ return;
+
+ case ir_binop_logic_xor:
+ inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ return;
+
+ case ir_binop_logic_or:
+ temp = dst_reg(this, glsl_type::bool_type);
+ emit(BRW_OPCODE_OR, temp, op[0], op[1]);
+ inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ return;
+
+ case ir_binop_logic_and:
+ temp = dst_reg(this, glsl_type::bool_type);
+ emit(BRW_OPCODE_AND, temp, op[0], op[1]);
+ inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ return;
+
+ case ir_unop_f2b:
+ inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ return;
+
+ case ir_unop_i2b:
+ inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ return;
+
+ case ir_binop_greater:
+ case ir_binop_gequal:
+ case ir_binop_less:
+ case ir_binop_lequal:
+ case ir_binop_equal:
+ case ir_binop_nequal:
+ inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
+ inst->conditional_mod =
+ brw_conditional_for_comparison(expr->operation);
+ return;
+
+ case ir_binop_all_equal:
+ inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_Z;
+
+ inst = emit(BRW_OPCODE_IF);
+ inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
+ return;
+
+ case ir_binop_any_nequal:
+ inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+ inst = emit(BRW_OPCODE_IF);
+ inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+ return;
+
+ case ir_unop_any:
+ inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+ inst = emit(BRW_OPCODE_IF);
+ inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+ return;
+
+ default:
+ assert(!"not reached");
+ inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ return;
+ }
+ return;
+ }
+
+ ir->condition->accept(this);
+
+ vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(),
+ this->result, src_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+}
+
+void
+vec4_visitor::visit(ir_variable *ir)
+{
+ dst_reg *reg = NULL;
+
+ if (variable_storage(ir))
+ return;
+
+ switch (ir->mode) {
+ case ir_var_in:
+ reg = new(mem_ctx) dst_reg(ATTR, ir->location);
+ break;
+
+ case ir_var_out:
+ reg = new(mem_ctx) dst_reg(this, ir->type);
+
+ for (int i = 0; i < type_size(ir->type); i++) {
+ output_reg[ir->location + i] = *reg;
+ output_reg[ir->location + i].reg_offset = i;
+ output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F;
+ }
+ break;
+
+ case ir_var_auto:
+ case ir_var_temporary:
+ reg = new(mem_ctx) dst_reg(this, ir->type);
+ break;
+
+ case ir_var_uniform:
+ reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
+
+ if (!strncmp(ir->name, "gl_", 3)) {
+ setup_builtin_uniform_values(ir);
+ } else {
+ setup_uniform_values(ir->location, ir->type);
+ }
+ break;
+
+ default:
+ assert(!"not reached");
+ }
+
+ reg->type = brw_type_for_base_type(ir->type);
+ hash_table_insert(this->variable_ht, reg, ir);
+}
+
+void
+vec4_visitor::visit(ir_loop *ir)
+{
+ dst_reg counter;
+
+ /* We don't want debugging output to print the whole body of the
+ * loop as the annotation.
+ */
+ this->base_ir = NULL;
+
+ if (ir->counter != NULL) {
+ this->base_ir = ir->counter;
+ ir->counter->accept(this);
+ counter = *(variable_storage(ir->counter));
+
+ if (ir->from != NULL) {
+ this->base_ir = ir->from;
+ ir->from->accept(this);
+
+ emit(BRW_OPCODE_MOV, counter, this->result);
+ }
+ }
+
+ emit(BRW_OPCODE_DO);
+
+ if (ir->to) {
+ this->base_ir = ir->to;
+ ir->to->accept(this);
+
+ vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst_null_d(),
+ src_reg(counter), this->result);
+ inst->conditional_mod = brw_conditional_for_comparison(ir->cmp);
+
+ inst = emit(BRW_OPCODE_BREAK);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ }
+
+ visit_instructions(&ir->body_instructions);
+
+
+ if (ir->increment) {
+ this->base_ir = ir->increment;
+ ir->increment->accept(this);
+ emit(BRW_OPCODE_ADD, counter, src_reg(counter), this->result);
+ }
+
+ emit(BRW_OPCODE_WHILE);
+}
+
+void
+vec4_visitor::visit(ir_loop_jump *ir)
+{
+ switch (ir->mode) {
+ case ir_loop_jump::jump_break:
+ emit(BRW_OPCODE_BREAK);
+ break;
+ case ir_loop_jump::jump_continue:
+ emit(BRW_OPCODE_CONTINUE);
+ break;
+ }
+}
+
+
+void
+vec4_visitor::visit(ir_function_signature *ir)
+{
+ assert(0);
+ (void)ir;
+}
+
+void
+vec4_visitor::visit(ir_function *ir)
+{
+ /* Ignore function bodies other than main() -- we shouldn't see calls to
+ * them since they should all be inlined.
+ */
+ if (strcmp(ir->name, "main") == 0) {
+ const ir_function_signature *sig;
+ exec_list empty;
+
+ sig = ir->matching_signature(&empty);
+
+ assert(sig);
+
+ visit_instructions(&sig->body);
+ }
+}
+
+GLboolean
+vec4_visitor::try_emit_sat(ir_expression *ir)
+{
+ ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
+ if (!sat_src)
+ return false;
+
+ sat_src->accept(this);
+ src_reg src = this->result;
+
+ this->result = src_reg(this, ir->type);
+ vec4_instruction *inst;
+ inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src);
+ inst->saturate = true;
+
+ return true;
+}
+
+void
+vec4_visitor::emit_bool_comparison(unsigned int op,
+ dst_reg dst, src_reg src0, src_reg src1)
+{
+ /* original gen4 does destination conversion before comparison. */
+ if (intel->gen < 5)
+ dst.type = src0.type;
+
+ vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1);
+ inst->conditional_mod = brw_conditional_for_comparison(op);
+
+ dst.type = BRW_REGISTER_TYPE_D;
+ emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1));
+}
+
+void
+vec4_visitor::visit(ir_expression *ir)
+{
+ unsigned int operand;
+ src_reg op[Elements(ir->operands)];
+ src_reg result_src;
+ dst_reg result_dst;
+ vec4_instruction *inst;
+
+ if (try_emit_sat(ir))
+ return;
+
+ for (operand = 0; operand < ir->get_num_operands(); operand++) {
+ this->result.file = BAD_FILE;
+ ir->operands[operand]->accept(this);
+ if (this->result.file == BAD_FILE) {
+ printf("Failed to get tree for expression operand:\n");
+ ir->operands[operand]->print();
+ exit(1);
+ }
+ op[operand] = this->result;
+
+ /* Matrix expression operands should have been broken down to vector
+ * operations already.
+ */
+ assert(!ir->operands[operand]->type->is_matrix());
+ }
+
+ int vector_elements = ir->operands[0]->type->vector_elements;
+ if (ir->operands[1]) {
+ vector_elements = MAX2(vector_elements,
+ ir->operands[1]->type->vector_elements);
+ }
+
+ this->result.file = BAD_FILE;
+
+ /* Storage for our result. Ideally for an assignment we'd be using
+ * the actual storage for the result here, instead.
+ */
+ result_src = src_reg(this, ir->type);
+ /* convenience for the emit functions below. */
+ result_dst = dst_reg(result_src);
+ /* If nothing special happens, this is the result. */
+ this->result = result_src;
+ /* Limit writes to the channels that will be used by result_src later.
+ * This does limit this temp's use as a temporary for multi-instruction
+ * sequences.
+ */
+ result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+
+ switch (ir->operation) {
+ case ir_unop_logic_not:
+ /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
+ * ones complement of the whole register, not just bit 0.
+ */
+ emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1));
+ break;
+ case ir_unop_neg:
+ op[0].negate = !op[0].negate;
+ this->result = op[0];
+ break;
+ case ir_unop_abs:
+ op[0].abs = true;
+ op[0].negate = false;
+ this->result = op[0];
+ break;
+
+ case ir_unop_sign:
+ emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f));
+
+ inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
+ inst->conditional_mod = BRW_CONDITIONAL_G;
+ inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f));
+ inst->predicate = BRW_PREDICATE_NORMAL;
+
+ inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
+ inst->conditional_mod = BRW_CONDITIONAL_L;
+ inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f));
+ inst->predicate = BRW_PREDICATE_NORMAL;
+
+ break;
+
+ case ir_unop_rcp:
+ emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
+ break;
+
+ case ir_unop_exp2:
+ emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
+ break;
+ case ir_unop_log2:
+ emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
+ break;
+ case ir_unop_exp:
+ case ir_unop_log:
+ assert(!"not reached: should be handled by ir_explog_to_explog2");
+ break;
+ case ir_unop_sin:
+ case ir_unop_sin_reduced:
+ emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
+ break;
+ case ir_unop_cos:
+ case ir_unop_cos_reduced:
+ emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
+ break;
+
+ case ir_unop_dFdx:
+ case ir_unop_dFdy:
+ assert(!"derivatives not valid in vertex shader");
+ break;
+
+ case ir_unop_noise:
+ assert(!"not reached: should be handled by lower_noise");
+ break;
+
+ case ir_binop_add:
+ emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]);
+ break;
+ case ir_binop_sub:
+ assert(!"not reached: should be handled by ir_sub_to_add_neg");
+ break;
+
+ case ir_binop_mul:
+ if (ir->type->is_integer()) {
+ /* For integer multiplication, the MUL uses the low 16 bits
+ * of one of the operands (src0 on gen6, src1 on gen7). The
+ * MACH accumulates in the contribution of the upper 16 bits
+ * of that operand.
+ *
+ * FINISHME: Emit just the MUL if we know an operand is small
+ * enough.
+ */
+ struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
+
+ emit(BRW_OPCODE_MUL, acc, op[0], op[1]);
+ emit(BRW_OPCODE_MACH, dst_null_d(), op[0], op[1]);
+ emit(BRW_OPCODE_MOV, result_dst, src_reg(acc));
+ } else {
+ emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]);
+ }
+ break;
+ case ir_binop_div:
+ assert(!"not reached: should be handled by ir_div_to_mul_rcp");
+ case ir_binop_mod:
+ assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
+ break;
+
+ case ir_binop_less:
+ case ir_binop_greater:
+ case ir_binop_lequal:
+ case ir_binop_gequal:
+ case ir_binop_equal:
+ case ir_binop_nequal: {
+ dst_reg temp = result_dst;
+ /* original gen4 does implicit conversion before comparison. */
+ if (intel->gen < 5)
+ temp.type = op[0].type;
+
+ inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
+ inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
+ emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1));
+ break;
+ }
+
+ case ir_binop_all_equal:
+ /* "==" operator producing a scalar boolean. */
+ if (ir->operands[0]->type->is_vector() ||
+ ir->operands[1]->type->is_vector()) {
+ inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_Z;
+
+ emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
+ inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
+ inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
+ } else {
+ dst_reg temp = result_dst;
+ /* original gen4 does implicit conversion before comparison. */
+ if (intel->gen < 5)
+ temp.type = op[0].type;
+
+ inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_Z;
+ emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
+ }
+ break;
+ case ir_binop_any_nequal:
+ /* "!=" operator producing a scalar boolean. */
+ if (ir->operands[0]->type->is_vector() ||
+ ir->operands[1]->type->is_vector()) {
+ inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+ emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
+ inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
+ inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+ } else {
+ dst_reg temp = result_dst;
+ /* original gen4 does implicit conversion before comparison. */
+ if (intel->gen < 5)
+ temp.type = op[0].type;
+
+ inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
+ }
+ break;
+
+ case ir_unop_any:
+ inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+ emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
+
+ inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
+ inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+ break;
+
+ case ir_binop_logic_xor:
+ emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
+ break;
+
+ case ir_binop_logic_or:
+ emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
+ break;
+
+ case ir_binop_logic_and:
+ emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
+ break;
+
+ case ir_binop_dot:
+ assert(ir->operands[0]->type->is_vector());
+ assert(ir->operands[0]->type == ir->operands[1]->type);
+ emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
+ break;
+
+ case ir_unop_sqrt:
+ emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
+ break;
+ case ir_unop_rsq:
+ emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
+ break;
+ case ir_unop_i2f:
+ case ir_unop_i2u:
+ case ir_unop_u2i:
+ case ir_unop_u2f:
+ case ir_unop_b2f:
+ case ir_unop_b2i:
+ case ir_unop_f2i:
+ emit(BRW_OPCODE_MOV, result_dst, op[0]);
+ break;
+ case ir_unop_f2b:
+ case ir_unop_i2b: {
+ dst_reg temp = result_dst;
+ /* original gen4 does implicit conversion before comparison. */
+ if (intel->gen < 5)
+ temp.type = op[0].type;
+
+ inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1));
+ break;
+ }
+
+ case ir_unop_trunc:
+ emit(BRW_OPCODE_RNDZ, result_dst, op[0]);
+ break;
+ case ir_unop_ceil:
+ op[0].negate = !op[0].negate;
+ inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
+ this->result.negate = true;
+ break;
+ case ir_unop_floor:
+ inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
+ break;
+ case ir_unop_fract:
+ inst = emit(BRW_OPCODE_FRC, result_dst, op[0]);
+ break;
+ case ir_unop_round_even:
+ emit(BRW_OPCODE_RNDE, result_dst, op[0]);
+ break;
+
+ case ir_binop_min:
+ inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_L;
+
+ inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ break;
+ case ir_binop_max:
+ inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_G;
+
+ inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ break;
+
+ case ir_binop_pow:
+ emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
+ break;
+
+ case ir_unop_bit_not:
+ inst = emit(BRW_OPCODE_NOT, result_dst, op[0]);
+ break;
+ case ir_binop_bit_and:
+ inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
+ break;
+ case ir_binop_bit_xor:
+ inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
+ break;
+ case ir_binop_bit_or:
+ inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
+ break;
+
+ case ir_binop_lshift:
+ case ir_binop_rshift:
+ assert(!"GLSL 1.30 features unsupported");
+ break;
+
+ case ir_quadop_vector:
+ assert(!"not reached: should be handled by lower_quadop_vector");
+ break;
+ }
+}
+
+
+void
+vec4_visitor::visit(ir_swizzle *ir)
+{
+ src_reg src;
+ int i = 0;
+ int swizzle[4];
+
+ /* Note that this is only swizzles in expressions, not those on the left
+ * hand side of an assignment, which do write masking. See ir_assignment
+ * for that.
+ */
+
+ ir->val->accept(this);
+ src = this->result;
+ assert(src.file != BAD_FILE);
+
+ for (i = 0; i < ir->type->vector_elements; i++) {
+ switch (i) {
+ case 0:
+ swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
+ break;
+ case 1:
+ swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
+ break;
+ case 2:
+ swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
+ break;
+ case 3:
+ swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
+ break;
+ }
+ }
+ for (; i < 4; i++) {
+ /* Replicate the last channel out. */
+ swizzle[i] = swizzle[ir->type->vector_elements - 1];
+ }
+
+ src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
+
+ this->result = src;
+}
+
+void
+vec4_visitor::visit(ir_dereference_variable *ir)
+{
+ const struct glsl_type *type = ir->type;
+ dst_reg *reg = variable_storage(ir->var);
+
+ if (!reg) {
+ fail("Failed to find variable storage for %s\n", ir->var->name);
+ this->result = src_reg(brw_null_reg());
+ return;
+ }
+
+ this->result = src_reg(*reg);
+
+ if (type->is_scalar() || type->is_vector() || type->is_matrix())
+ this->result.swizzle = swizzle_for_size(type->vector_elements);
+}
+
+void
+vec4_visitor::visit(ir_dereference_array *ir)
+{
+ ir_constant *constant_index;
+ src_reg src;
+ int element_size = type_size(ir->type);
+
+ constant_index = ir->array_index->constant_expression_value();
+
+ ir->array->accept(this);
+ src = this->result;
+
+ if (constant_index) {
+ src.reg_offset += constant_index->value.i[0] * element_size;
+ } else {
+ /* Variable index array dereference. It eats the "vec4" of the
+ * base of the array and an index that offsets the Mesa register
+ * index.
+ */
+ ir->array_index->accept(this);
+
+ src_reg index_reg;
+
+ if (element_size == 1) {
+ index_reg = this->result;
+ } else {
+ index_reg = src_reg(this, glsl_type::int_type);
+
+ emit(BRW_OPCODE_MUL, dst_reg(index_reg),
+ this->result, src_reg(element_size));
+ }
+
+ if (src.reladdr) {
+ src_reg temp = src_reg(this, glsl_type::int_type);
+
+ emit(BRW_OPCODE_ADD, dst_reg(temp), *src.reladdr, index_reg);
+
+ index_reg = temp;
+ }
+
+ src.reladdr = ralloc(mem_ctx, src_reg);
+ memcpy(src.reladdr, &index_reg, sizeof(index_reg));
+ }
+
+ /* If the type is smaller than a vec4, replicate the last channel out. */
+ if (ir->type->is_scalar() || ir->type->is_vector())
+ src.swizzle = swizzle_for_size(ir->type->vector_elements);
+ else
+ src.swizzle = BRW_SWIZZLE_NOOP;
+ src.type = brw_type_for_base_type(ir->type);
+
+ this->result = src;
+}
+
+void
+vec4_visitor::visit(ir_dereference_record *ir)
+{
+ unsigned int i;
+ const glsl_type *struct_type = ir->record->type;
+ int offset = 0;
+
+ ir->record->accept(this);
+
+ for (i = 0; i < struct_type->length; i++) {
+ if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
+ break;
+ offset += type_size(struct_type->fields.structure[i].type);
+ }
+
+ /* If the type is smaller than a vec4, replicate the last channel out. */
+ if (ir->type->is_scalar() || ir->type->is_vector())
+ this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
+ else
+ this->result.swizzle = BRW_SWIZZLE_NOOP;
+ this->result.type = brw_type_for_base_type(ir->type);
+
+ this->result.reg_offset += offset;
+}
+
+/**
+ * We want to be careful in assignment setup to hit the actual storage
+ * instead of potentially using a temporary like we might with the
+ * ir_dereference handler.
+ */
+static dst_reg
+get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
+{
+ /* The LHS must be a dereference. If the LHS is a variable indexed array
+ * access of a vector, it must be separated into a series conditional moves
+ * before reaching this point (see ir_vec_index_to_cond_assign).
+ */
+ assert(ir->as_dereference());
+ ir_dereference_array *deref_array = ir->as_dereference_array();
+ if (deref_array) {
+ assert(!deref_array->array->type->is_vector());
+ }
+
+ /* Use the rvalue deref handler for the most part. We'll ignore
+ * swizzles in it and write swizzles using writemask, though.
+ */
+ ir->accept(v);
+ return dst_reg(v->result);
+}
+
+void
+vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
+ const struct glsl_type *type, bool predicated)
+{
+ if (type->base_type == GLSL_TYPE_STRUCT) {
+ for (unsigned int i = 0; i < type->length; i++) {
+ emit_block_move(dst, src, type->fields.structure[i].type, predicated);
+ }
+ return;
+ }
+
+ if (type->is_array()) {
+ for (unsigned int i = 0; i < type->length; i++) {
+ emit_block_move(dst, src, type->fields.array, predicated);
+ }
+ return;
+ }
+
+ if (type->is_matrix()) {
+ const struct glsl_type *vec_type;
+
+ vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
+ type->vector_elements, 1);
+
+ for (int i = 0; i < type->matrix_columns; i++) {
+ emit_block_move(dst, src, vec_type, predicated);
+ }
+ return;
+ }
+
+ assert(type->is_scalar() || type->is_vector());
+
+ dst->type = brw_type_for_base_type(type);
+ src->type = dst->type;
+
+ dst->writemask = (1 << type->vector_elements) - 1;
+
+ /* Do we need to worry about swizzling a swizzle? */
+ assert(src->swizzle = BRW_SWIZZLE_NOOP);
+ src->swizzle = swizzle_for_size(type->vector_elements);
+
+ vec4_instruction *inst = emit(BRW_OPCODE_MOV, *dst, *src);
+ if (predicated)
+ inst->predicate = BRW_PREDICATE_NORMAL;
+
+ dst->reg_offset++;
+ src->reg_offset++;
+}
+
+
+/* If the RHS processing resulted in an instruction generating a
+ * temporary value, and it would be easy to rewrite the instruction to
+ * generate its result right into the LHS instead, do so. This ends
+ * up reliably removing instructions where it can be tricky to do so
+ * later without real UD chain information.
+ */
+bool
+vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
+ dst_reg dst,
+ src_reg src,
+ vec4_instruction *pre_rhs_inst,
+ vec4_instruction *last_rhs_inst)
+{
+ /* This could be supported, but it would take more smarts. */
+ if (ir->condition)
+ return false;
+
+ if (pre_rhs_inst == last_rhs_inst)
+ return false; /* No instructions generated to work with. */
+
+ /* Make sure the last instruction generated our source reg. */
+ if (src.file != GRF ||
+ src.file != last_rhs_inst->dst.file ||
+ src.reg != last_rhs_inst->dst.reg ||
+ src.reg_offset != last_rhs_inst->dst.reg_offset ||
+ src.reladdr ||
+ src.abs ||
+ src.negate ||
+ last_rhs_inst->predicate != BRW_PREDICATE_NONE)
+ return false;
+
+ /* Check that that last instruction fully initialized the channels
+ * we want to use, in the order we want to use them. We could
+ * potentially reswizzle the operands of many instructions so that
+ * we could handle out of order channels, but don't yet.
+ */
+ for (int i = 0; i < 4; i++) {
+ if (dst.writemask & (1 << i)) {
+ if (!(last_rhs_inst->dst.writemask & (1 << i)))
+ return false;
+
+ if (BRW_GET_SWZ(src.swizzle, i) != i)
+ return false;
+ }
+ }
+
+ /* Success! Rewrite the instruction. */
+ last_rhs_inst->dst.file = dst.file;
+ last_rhs_inst->dst.reg = dst.reg;
+ last_rhs_inst->dst.reg_offset = dst.reg_offset;
+ last_rhs_inst->dst.reladdr = dst.reladdr;
+ last_rhs_inst->dst.writemask &= dst.writemask;
+
+ return true;
+}
+
+void
+vec4_visitor::visit(ir_assignment *ir)
+{
+ dst_reg dst = get_assignment_lhs(ir->lhs, this);
+
+ if (!ir->lhs->type->is_scalar() &&
+ !ir->lhs->type->is_vector()) {
+ ir->rhs->accept(this);
+ src_reg src = this->result;
+
+ if (ir->condition) {
+ emit_bool_to_cond_code(ir->condition);
+ }
+
+ emit_block_move(&dst, &src, ir->rhs->type, ir->condition != NULL);
+ return;
+ }
+
+ /* Now we're down to just a scalar/vector with writemasks. */
+ int i;
+
+ vec4_instruction *pre_rhs_inst, *last_rhs_inst;
+ pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
+
+ ir->rhs->accept(this);
+
+ last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
+
+ src_reg src = this->result;
+
+ int swizzles[4];
+ int first_enabled_chan = 0;
+ int src_chan = 0;
+
+ assert(ir->lhs->type->is_vector() ||
+ ir->lhs->type->is_scalar());
+ dst.writemask = ir->write_mask;
+
+ for (int i = 0; i < 4; i++) {
+ if (dst.writemask & (1 << i)) {
+ first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
+ break;
+ }
+ }
+
+ /* Swizzle a small RHS vector into the channels being written.
+ *
+ * glsl ir treats write_mask as dictating how many channels are
+ * present on the RHS while in our instructions we need to make
+ * those channels appear in the slots of the vec4 they're written to.
+ */
+ for (int i = 0; i < 4; i++) {
+ if (dst.writemask & (1 << i))
+ swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
+ else
+ swizzles[i] = first_enabled_chan;
+ }
+ src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
+ swizzles[2], swizzles[3]);
+
+ if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
+ return;
+ }
+
+ if (ir->condition) {
+ emit_bool_to_cond_code(ir->condition);
+ }
+
+ for (i = 0; i < type_size(ir->lhs->type); i++) {
+ vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src);
+
+ if (ir->condition)
+ inst->predicate = BRW_PREDICATE_NORMAL;
+
+ dst.reg_offset++;
+ src.reg_offset++;
+ }
+}
+
+void
+vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
+{
+ if (ir->type->base_type == GLSL_TYPE_STRUCT) {
+ foreach_list(node, &ir->components) {
+ ir_constant *field_value = (ir_constant *)node;
+
+ emit_constant_values(dst, field_value);
+ }
+ return;
+ }
+
+ if (ir->type->is_array()) {
+ for (unsigned int i = 0; i < ir->type->length; i++) {
+ emit_constant_values(dst, ir->array_elements[i]);
+ }
+ return;
+ }
+
+ if (ir->type->is_matrix()) {
+ for (int i = 0; i < ir->type->matrix_columns; i++) {
+ for (int j = 0; j < ir->type->vector_elements; j++) {
+ dst->writemask = 1 << j;
+ dst->type = BRW_REGISTER_TYPE_F;
+
+ emit(BRW_OPCODE_MOV, *dst,
+ src_reg(ir->value.f[i * ir->type->vector_elements + j]));
+ }
+ dst->reg_offset++;
+ }
+ return;
+ }
+
+ for (int i = 0; i < ir->type->vector_elements; i++) {
+ dst->writemask = 1 << i;
+ dst->type = brw_type_for_base_type(ir->type);
+
+ switch (ir->type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.f[i]));
+ break;
+ case GLSL_TYPE_INT:
+ emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.i[i]));
+ break;
+ case GLSL_TYPE_UINT:
+ emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.u[i]));
+ break;
+ case GLSL_TYPE_BOOL:
+ emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.b[i]));
+ break;
+ default:
+ assert(!"Non-float/uint/int/bool constant");
+ break;
+ }
+ }
+ dst->reg_offset++;
+}
+
+void
+vec4_visitor::visit(ir_constant *ir)
+{
+ dst_reg dst = dst_reg(this, ir->type);
+ this->result = src_reg(dst);
+
+ emit_constant_values(&dst, ir);
+}
+
+void
+vec4_visitor::visit(ir_call *ir)
+{
+ assert(!"not reached");
+}
+
+void
+vec4_visitor::visit(ir_texture *ir)
+{
+ /* FINISHME: Implement vertex texturing.
+ *
+ * With 0 vertex samplers available, the linker will reject
+ * programs that do vertex texturing, but after our visitor has
+ * run.
+ */
+}
+
+void
+vec4_visitor::visit(ir_return *ir)
+{
+ assert(!"not reached");
+}
+
+void
+vec4_visitor::visit(ir_discard *ir)
+{
+ assert(!"not reached");
+}
+
+void
+vec4_visitor::visit(ir_if *ir)
+{
+ /* Don't point the annotation at the if statement, because then it plus
+ * the then and else blocks get printed.
+ */
+ this->base_ir = ir->condition;
+
+ if (intel->gen == 6) {
+ emit_if_gen6(ir);
+ } else {
+ emit_bool_to_cond_code(ir->condition);
+ vec4_instruction *inst = emit(BRW_OPCODE_IF);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ }
+
+ visit_instructions(&ir->then_instructions);
+
+ if (!ir->else_instructions.is_empty()) {
+ this->base_ir = ir->condition;
+ emit(BRW_OPCODE_ELSE);
+
+ visit_instructions(&ir->else_instructions);
+ }
+
+ this->base_ir = ir->condition;
+ emit(BRW_OPCODE_ENDIF);
+}
+
+int
+vec4_visitor::emit_vue_header_gen4(int header_mrf)
+{
+ /* Get the position */
+ src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
+
+ /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
+ dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
+
+ current_annotation = "NDC";
+ dst_reg ndc_w = ndc;
+ ndc_w.writemask = WRITEMASK_W;
+ src_reg pos_w = pos;
+ pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
+ emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
+
+ dst_reg ndc_xyz = ndc;
+ ndc_xyz.writemask = WRITEMASK_XYZ;
+
+ emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w));
+
+ if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
+ c->key.nr_userclip || brw->has_negative_rhw_bug) {
+ dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
+ GLuint i;
+
+ emit(BRW_OPCODE_MOV, header1, 0u);
+
+ if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
+ assert(!"finishme: psiz");
+ src_reg psiz;
+
+ header1.writemask = WRITEMASK_W;
+ emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11);
+ emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8);
+ }
+
+ for (i = 0; i < c->key.nr_userclip; i++) {
+ vec4_instruction *inst;
+
+ inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()),
+ pos, src_reg(c->userplane[i]));
+ inst->conditional_mod = BRW_CONDITIONAL_L;
+
+ emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ }
+
+ /* i965 clipping workaround:
+ * 1) Test for -ve rhw
+ * 2) If set,
+ * set ndc = (0,0,0,0)
+ * set ucp[6] = 1
+ *
+ * Later, clipping will detect ucp[6] and ensure the primitive is
+ * clipped against all fixed planes.
+ */
+ if (brw->has_negative_rhw_bug) {
+#if 0
+ /* FINISHME */
+ brw_CMP(p,
+ vec8(brw_null_reg()),
+ BRW_CONDITIONAL_L,
+ brw_swizzle1(ndc, 3),
+ brw_imm_f(0));
+
+ brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
+ brw_MOV(p, ndc, brw_imm_f(0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+#endif
+ }
+
+ header1.writemask = WRITEMASK_XYZW;
+ emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1));
+ } else {
+ emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++),
+ BRW_REGISTER_TYPE_UD), 0u);
+ }
+
+ if (intel->gen == 5) {
+ /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
+ * dword 0-3 (m1) of the header is indices, point width, clip flags.
+ * dword 4-7 (m2) is the ndc position (set above)
+ * dword 8-11 (m3) of the vertex header is the 4D space position
+ * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
+ * m6 is a pad so that the vertex element data is aligned
+ * m7 is the first vertex data we fill.
+ */
+ current_annotation = "NDC";
+ emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
+
+ current_annotation = "gl_Position";
+ emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
+
+ /* user clip distance. */
+ header_mrf += 2;
+
+ /* Pad so that vertex element data is aligned. */
+ header_mrf++;
+ } else {
+ /* There are 8 dwords in VUE header pre-Ironlake:
+ * dword 0-3 (m1) is indices, point width, clip flags.
+ * dword 4-7 (m2) is ndc position (set above)
+ *
+ * dword 8-11 (m3) is the first vertex data.
+ */
+ current_annotation = "NDC";
+ emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
+
+ current_annotation = "gl_Position";
+ emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
+ }
+
+ return header_mrf;
+}
+
+int
+vec4_visitor::emit_vue_header_gen6(int header_mrf)
+{
+ struct brw_reg reg;
+
+ /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
+ * dword 0-3 (m2) of the header is indices, point width, clip flags.
+ * dword 4-7 (m3) is the 4D space position
+ * dword 8-15 (m4,m5) of the vertex header is the user clip distance if
+ * enabled.
+ *
+ * m4 or 6 is the first vertex element data we fill.
+ */
+
+ current_annotation = "indices, point width, clip flags";
+ reg = brw_message_reg(header_mrf++);
+ emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0));
+ if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
+ emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W),
+ src_reg(output_reg[VERT_RESULT_PSIZ]));
+ }
+
+ current_annotation = "gl_Position";
+ emit(BRW_OPCODE_MOV,
+ brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS]));
+
+ current_annotation = "user clip distances";
+ if (c->key.nr_userclip) {
+ for (int i = 0; i < c->key.nr_userclip; i++) {
+ struct brw_reg m;
+ if (i < 4)
+ m = brw_message_reg(header_mrf);
+ else
+ m = brw_message_reg(header_mrf + 1);
+
+ emit(BRW_OPCODE_DP4,
+ dst_reg(brw_writemask(m, 1 << (i & 3))),
+ src_reg(c->userplane[i]));
+ }
+ header_mrf += 2;
+ }
+
+ current_annotation = NULL;
+
+ return header_mrf;
+}
+
+static int
+align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
+{
+ struct intel_context *intel = &brw->intel;
+
+ if (intel->gen >= 6) {
+ /* URB data written (does not include the message header reg) must
+ * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
+ * section 5.4.3.2.2: URB_INTERLEAVED.
+ *
+ * URB entries are allocated on a multiple of 1024 bits, so an
+ * extra 128 bits written here to make the end align to 256 is
+ * no problem.
+ */
+ if ((mlen % 2) != 1)
+ mlen++;
+ }
+
+ return mlen;
+}
+
+/**
+ * Generates the VUE payload plus the 1 or 2 URB write instructions to
+ * complete the VS thread.
+ *
+ * The VUE layout is documented in Volume 2a.
+ */
+void
+vec4_visitor::emit_urb_writes()
+{
+ /* MRF 0 is reserved for the debugger, so start with message header
+ * in MRF 1.
+ */
+ int base_mrf = 1;
+ int mrf = base_mrf;
+ int urb_entry_size;
+ uint64_t outputs_remaining = c->prog_data.outputs_written;
+ /* In the process of generating our URB write message contents, we
+ * may need to unspill a register or load from an array. Those
+ * reads would use MRFs 14-15.
+ */
+ int max_usable_mrf = 13;
+
+ /* FINISHME: edgeflag */
+
+ /* First mrf is the g0-based message header containing URB handles and such,
+ * which is implied in VS_OPCODE_URB_WRITE.
+ */
+ mrf++;
+
+ if (intel->gen >= 6) {
+ mrf = emit_vue_header_gen6(mrf);
+ } else {
+ mrf = emit_vue_header_gen4(mrf);
+ }
+
+ /* Set up the VUE data for the first URB write */
+ int attr;
+ for (attr = 0; attr < VERT_RESULT_MAX; attr++) {
+ if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
+ continue;
+
+ outputs_remaining &= ~BITFIELD64_BIT(attr);
+
+ /* This is set up in the VUE header. */
+ if (attr == VERT_RESULT_HPOS)
+ continue;
+
+ /* This is loaded into the VUE header, and thus doesn't occupy
+ * an attribute slot.
+ */
+ if (attr == VERT_RESULT_PSIZ)
+ continue;
+
+ vec4_instruction *inst = emit(BRW_OPCODE_MOV, brw_message_reg(mrf++),
+ src_reg(output_reg[attr]));
+
+ if ((attr == VERT_RESULT_COL0 ||
+ attr == VERT_RESULT_COL1 ||
+ attr == VERT_RESULT_BFC0 ||
+ attr == VERT_RESULT_BFC1) &&
+ c->key.clamp_vertex_color) {
+ inst->saturate = true;
+ }
+
+ /* If this was MRF 15, we can't fit anything more into this URB
+ * WRITE. Note that base_mrf of 1 means that MRF 15 is an
+ * even-numbered amount of URB write data, which will meet
+ * gen6's requirements for length alignment.
+ */
+ if (mrf > max_usable_mrf) {
+ attr++;
+ break;
+ }
+ }
+
+ vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
+ inst->base_mrf = base_mrf;
+ inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
+ inst->eot = !outputs_remaining;
+
+ urb_entry_size = mrf - base_mrf;
+
+ /* Optional second URB write */
+ if (outputs_remaining) {
+ mrf = base_mrf + 1;
+
+ for (; attr < VERT_RESULT_MAX; attr++) {
+ if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
+ continue;
+
+ assert(mrf < max_usable_mrf);
+
+ emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
+ }
+
+ inst = emit(VS_OPCODE_URB_WRITE);
+ inst->base_mrf = base_mrf;
+ inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
+ inst->eot = true;
+ /* URB destination offset. In the previous write, we got MRFs
+ * 2-13 minus the one header MRF, so 12 regs. URB offset is in
+ * URB row increments, and each of our MRFs is half of one of
+ * those, since we're doing interleaved writes.
+ */
+ inst->offset = (max_usable_mrf - base_mrf) / 2;
+
+ urb_entry_size += mrf - base_mrf;
+ }
+
+ if (intel->gen == 6)
+ c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8;
+ else
+ c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4;
+}
+
+src_reg
+vec4_visitor::get_scratch_offset(vec4_instruction *inst,
+ src_reg *reladdr, int reg_offset)
+{
+ /* Because we store the values to scratch interleaved like our
+ * vertex data, we need to scale the vec4 index by 2.
+ */
+ int message_header_scale = 2;
+
+ /* Pre-gen6, the message header uses byte offsets instead of vec4
+ * (16-byte) offset units.
+ */
+ if (intel->gen < 6)
+ message_header_scale *= 16;
+
+ if (reladdr) {
+ src_reg index = src_reg(this, glsl_type::int_type);
+
+ vec4_instruction *add = emit(BRW_OPCODE_ADD,
+ dst_reg(index),
+ *reladdr,
+ src_reg(reg_offset));
+ /* Move our new instruction from the tail to its correct place. */
+ add->remove();
+ inst->insert_before(add);
+
+ vec4_instruction *mul = emit(BRW_OPCODE_MUL, dst_reg(index),
+ index, src_reg(message_header_scale));
+ mul->remove();
+ inst->insert_before(mul);
+
+ return index;
+ } else {
+ return src_reg(reg_offset * message_header_scale);
+ }
+}
+
+/**
+ * Emits an instruction before @inst to load the value named by @orig_src
+ * from scratch space at @base_offset to @temp.
+ */
+void
+vec4_visitor::emit_scratch_read(vec4_instruction *inst,
+ dst_reg temp, src_reg orig_src,
+ int base_offset)
+{
+ int reg_offset = base_offset + orig_src.reg_offset;
+ src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
+
+ vec4_instruction *scratch_read_inst = emit(VS_OPCODE_SCRATCH_READ,
+ temp, index);
+
+ scratch_read_inst->base_mrf = 14;
+ scratch_read_inst->mlen = 1;
+ /* Move our instruction from the tail to its correct place. */
+ scratch_read_inst->remove();
+ inst->insert_before(scratch_read_inst);
+}
+
+/**
+ * Emits an instruction after @inst to store the value to be written
+ * to @orig_dst to scratch space at @base_offset, from @temp.
+ */
+void
+vec4_visitor::emit_scratch_write(vec4_instruction *inst,
+ src_reg temp, dst_reg orig_dst,
+ int base_offset)
+{
+ int reg_offset = base_offset + orig_dst.reg_offset;
+ src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset);
+
+ dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
+ orig_dst.writemask));
+ vec4_instruction *scratch_write_inst = emit(VS_OPCODE_SCRATCH_WRITE,
+ dst, temp, index);
+ scratch_write_inst->base_mrf = 13;
+ scratch_write_inst->mlen = 2;
+ scratch_write_inst->predicate = inst->predicate;
+ /* Move our instruction from the tail to its correct place. */
+ scratch_write_inst->remove();
+ inst->insert_after(scratch_write_inst);
+}
+
+/**
+ * We can't generally support array access in GRF space, because a
+ * single instruction's destination can only span 2 contiguous
+ * registers. So, we send all GRF arrays that get variable index
+ * access to scratch space.
+ */
+void
+vec4_visitor::move_grf_array_access_to_scratch()
+{
+ int scratch_loc[this->virtual_grf_count];
+
+ for (int i = 0; i < this->virtual_grf_count; i++) {
+ scratch_loc[i] = -1;
+ }
+
+ /* First, calculate the set of virtual GRFs that need to be punted
+ * to scratch due to having any array access on them, and where in
+ * scratch.
+ */
+ foreach_list(node, &this->instructions) {
+ vec4_instruction *inst = (vec4_instruction *)node;
+
+ if (inst->dst.file == GRF && inst->dst.reladdr &&
+ scratch_loc[inst->dst.reg] == -1) {
+ scratch_loc[inst->dst.reg] = c->last_scratch;
+ c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4;
+ }
+
+ for (int i = 0 ; i < 3; i++) {
+ src_reg *src = &inst->src[i];
+
+ if (src->file == GRF && src->reladdr &&
+ scratch_loc[src->reg] == -1) {
+ scratch_loc[src->reg] = c->last_scratch;
+ c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4;
+ }
+ }
+ }
+
+ /* Now, for anything that will be accessed through scratch, rewrite
+ * it to load/store. Note that this is a _safe list walk, because
+ * we may generate a new scratch_write instruction after the one
+ * we're processing.
+ */
+ foreach_list_safe(node, &this->instructions) {
+ vec4_instruction *inst = (vec4_instruction *)node;
+
+ /* Set up the annotation tracking for new generated instructions. */
+ base_ir = inst->ir;
+ current_annotation = inst->annotation;
+
+ if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
+ src_reg temp = src_reg(this, glsl_type::vec4_type);
+
+ emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]);
+
+ inst->dst.file = temp.file;
+ inst->dst.reg = temp.reg;
+ inst->dst.reg_offset = temp.reg_offset;
+ inst->dst.reladdr = NULL;
+ }
+
+ for (int i = 0 ; i < 3; i++) {
+ if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
+ continue;
+
+ dst_reg temp = dst_reg(this, glsl_type::vec4_type);
+
+ emit_scratch_read(inst, temp, inst->src[i],
+ scratch_loc[inst->src[i].reg]);
+
+ inst->src[i].file = temp.file;
+ inst->src[i].reg = temp.reg;
+ inst->src[i].reg_offset = temp.reg_offset;
+ inst->src[i].reladdr = NULL;
+ }
+ }
+}
+
+
+vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
+ struct gl_shader_program *prog,
+ struct brw_shader *shader)
+{
+ this->c = c;
+ this->p = &c->func;
+ this->brw = p->brw;
+ this->intel = &brw->intel;
+ this->ctx = &intel->ctx;
+ this->prog = prog;
+ this->shader = shader;
+
+ this->mem_ctx = ralloc_context(NULL);
+ this->failed = false;
+
+ this->base_ir = NULL;
+ this->current_annotation = NULL;
+
+ this->c = c;
+ this->vp = prog->VertexProgram;
+ this->prog_data = &c->prog_data;
+
+ this->variable_ht = hash_table_ctor(0,
+ hash_table_pointer_hash,
+ hash_table_pointer_compare);
+
+ this->virtual_grf_def = NULL;
+ this->virtual_grf_use = NULL;
+ this->virtual_grf_sizes = NULL;
+ this->virtual_grf_count = 0;
+ this->virtual_grf_array_size = 0;
+ this->live_intervals_valid = false;
+
+ this->uniforms = 0;
+
+ this->variable_ht = hash_table_ctor(0,
+ hash_table_pointer_hash,
+ hash_table_pointer_compare);
+}
+
+vec4_visitor::~vec4_visitor()
+{
+ ralloc_free(this->mem_ctx);
+ hash_table_dtor(this->variable_ht);
+}
+
+
+void
+vec4_visitor::fail(const char *format, ...)
+{
+ va_list va;
+ char *msg;
+
+ if (failed)
+ return;
+
+ failed = true;
+
+ va_start(va, format);
+ msg = ralloc_vasprintf(mem_ctx, format, va);
+ va_end(va);
+ msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
+
+ this->fail_msg = msg;
+
+ if (INTEL_DEBUG & DEBUG_VS) {
+ fprintf(stderr, "%s", msg);
+ }
+}
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index a9ad5311fe3..3373e707d98 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -30,6 +30,7 @@
*/
+#include "main/compiler.h"
#include "brw_context.h"
#include "brw_vs.h"
#include "brw_util.h"
@@ -39,17 +40,21 @@
#include "../glsl/ralloc.h"
-static void do_vs_prog( struct brw_context *brw,
- struct brw_vertex_program *vp,
- struct brw_vs_prog_key *key )
+static bool
+do_vs_prog(struct brw_context *brw,
+ struct gl_shader_program *prog,
+ struct brw_vertex_program *vp,
+ struct brw_vs_prog_key *key)
{
struct gl_context *ctx = &brw->intel.ctx;
+ struct intel_context *intel = &brw->intel;
GLuint program_size;
const GLuint *program;
struct brw_vs_compile c;
void *mem_ctx;
int aux_size;
int i;
+ static int new_vs = -1;
memset(&c, 0, sizeof(c));
memcpy(&c.key, key, sizeof(*key));
@@ -85,7 +90,25 @@ static void do_vs_prog( struct brw_context *brw,
/* Emit GEN4 code.
*/
- brw_vs_emit(&c);
+ if (new_vs == -1)
+ new_vs = getenv("INTEL_NEW_VS") != NULL;
+
+ if (new_vs && prog) {
+ if (!brw_vs_emit(prog, &c)) {
+ ralloc_free(mem_ctx);
+ return false;
+ }
+ } else {
+ brw_old_vs_emit(&c);
+ }
+
+ /* Scratch space is used for register spilling */
+ if (c.last_scratch) {
+ c.prog_data.total_scratch = brw_get_scratch_size(c.last_scratch);
+
+ brw_get_scratch_bo(intel, &brw->vs.scratch_bo,
+ c.prog_data.total_scratch * brw->vs_max_threads);
+ }
/* get the program
*/
@@ -111,6 +134,8 @@ static void do_vs_prog( struct brw_context *brw,
&c.prog_data, aux_size,
&brw->vs.prog_offset, &brw->vs.prog_data);
ralloc_free(mem_ctx);
+
+ return true;
}
@@ -155,13 +180,15 @@ static void brw_upload_vs_prog(struct brw_context *brw)
if (!brw_search_cache(&brw->cache, BRW_VS_PROG,
&key, sizeof(key),
&brw->vs.prog_offset, &brw->vs.prog_data)) {
- do_vs_prog(brw, vp, &key);
+ bool success = do_vs_prog(brw, ctx->Shader.CurrentVertexProgram,
+ vp, &key);
+
+ assert(success);
}
brw->vs.constant_map = ((int8_t *)brw->vs.prog_data +
sizeof(*brw->vs.prog_data));
}
-
/* See brw_vs.c:
*/
const struct brw_tracked_state brw_vs_prog = {
@@ -174,3 +201,30 @@ const struct brw_tracked_state brw_vs_prog = {
},
.prepare = brw_upload_vs_prog
};
+
+bool
+brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_vs_prog_key key;
+ struct gl_vertex_program *vp = prog->VertexProgram;
+ struct brw_vertex_program *bvp = brw_vertex_program(vp);
+ uint32_t old_prog_offset = brw->vs.prog_offset;
+ struct brw_vs_prog_data *old_prog_data = brw->vs.prog_data;
+ bool success;
+
+ if (!vp)
+ return true;
+
+ memset(&key, 0, sizeof(key));
+
+ key.program_string_id = bvp->id;
+ key.clamp_vertex_color = true;
+
+ success = do_vs_prog(brw, prog, bvp, &key);
+
+ brw->vs.prog_offset = old_prog_offset;
+ brw->vs.prog_data = old_prog_data;
+
+ return success;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 432994a8534..beccb381ee2 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -66,6 +66,7 @@ struct brw_vs_compile {
GLuint first_output;
GLuint nr_outputs;
GLuint first_overflow_output; /**< VERT_ATTRIB_x */
+ GLuint last_scratch;
GLuint first_tmp;
GLuint last_tmp;
@@ -92,6 +93,8 @@ struct brw_vs_compile {
GLboolean needs_stack;
};
-void brw_vs_emit( struct brw_vs_compile *c );
+bool brw_vs_emit(struct gl_shader_program *prog, struct brw_vs_compile *c);
+void brw_old_vs_emit(struct brw_vs_compile *c);
+bool brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog);
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c
index 9fdfebe9f76..47cc0a7da7a 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_constval.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c
@@ -194,19 +194,11 @@ static void calc_wm_input_sizes( struct brw_context *brw )
/* BRW_NEW_VERTEX_PROGRAM */
const struct brw_vertex_program *vp =
brw_vertex_program_const(brw->vertex_program);
- /* BRW_NEW_FRAGMENT_PROGRAM */
- struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram;
/* BRW_NEW_INPUT_DIMENSIONS */
struct tracker t;
GLuint insn;
GLuint i;
- /* If we're going to go through brw_fs.cpp, we don't end up using
- * brw->wm.input_size_masks.
- */
- if (prog && prog->_LinkedShaders[MESA_SHADER_FRAGMENT])
- return;
-
memset(&t, 0, sizeof(t));
/* _NEW_LIGHT */
@@ -246,9 +238,7 @@ static void calc_wm_input_sizes( struct brw_context *brw )
const struct brw_tracked_state brw_wm_input_sizes = {
.dirty = {
.mesa = _NEW_LIGHT,
- .brw = (BRW_NEW_FRAGMENT_PROGRAM |
- BRW_NEW_VERTEX_PROGRAM |
- BRW_NEW_INPUT_DIMENSIONS),
+ .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_INPUT_DIMENSIONS,
.cache = 0
},
.prepare = calc_wm_input_sizes
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 9d733344a26..bfee811e13d 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -1096,31 +1096,6 @@ static void emit_lrp_noalias(struct brw_vs_compile *c,
brw_MAC(p, dst, arg0, arg1);
}
-/** 3 or 4-component vector normalization */
-static void emit_nrm( struct brw_vs_compile *c,
- struct brw_reg dst,
- struct brw_reg arg0,
- int num_comps)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg tmp = get_tmp(c);
-
- /* tmp = dot(arg0, arg0) */
- if (num_comps == 3)
- brw_DP3(p, tmp, arg0, arg0);
- else
- brw_DP4(p, tmp, arg0, arg0);
-
- /* tmp = 1 / sqrt(tmp) */
- emit_math1(c, BRW_MATH_FUNCTION_RSQ, tmp, tmp, BRW_MATH_PRECISION_FULL);
-
- /* dst = arg0 * tmp */
- brw_MUL(p, dst, arg0, tmp);
-
- release_tmp(c, tmp);
-}
-
-
static struct brw_reg
get_constant(struct brw_vs_compile *c,
const struct prog_instruction *inst,
@@ -1359,7 +1334,7 @@ get_src_reg( struct brw_vs_compile *c,
if (component >= 0) {
params = c->vp->program.Base.Parameters;
- f = params->ParameterValues[src->Index][component];
+ f = params->ParameterValues[src->Index][component].f;
if (src->Abs)
f = fabs(f);
@@ -1821,6 +1796,9 @@ accumulator_contains(struct brw_vs_compile *c, struct brw_reg val)
if (val.address_mode != BRW_ADDRESS_DIRECT)
return GL_FALSE;
+ if (val.negate || val.abs)
+ return GL_FALSE;
+
switch (prev_insn->header.opcode) {
case BRW_OPCODE_MOV:
case BRW_OPCODE_MAC:
@@ -1900,7 +1878,7 @@ brw_vs_rescale_gl_fixed(struct brw_vs_compile *c)
/* Emit the vertex program instructions here.
*/
-void brw_vs_emit(struct brw_vs_compile *c )
+void brw_old_vs_emit(struct brw_vs_compile *c )
{
#define MAX_IF_DEPTH 32
#define MAX_LOOP_DEPTH 32
@@ -1980,9 +1958,22 @@ void brw_vs_emit(struct brw_vs_compile *c )
const struct prog_src_register *src = &inst->SrcReg[i];
index = src->Index;
file = src->File;
- if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
- args[i] = c->output_regs[index].reg;
- else
+ if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) {
+ /* Can't just make get_arg "do the right thing" here because
+ * other callers of get_arg and get_src_reg don't expect any
+ * special behavior for the c->output_regs[index].used_in_src
+ * case.
+ */
+ args[i] = c->output_regs[index].reg;
+ args[i].dw1.bits.swizzle =
+ BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0),
+ GET_SWZ(src->Swizzle, 1),
+ GET_SWZ(src->Swizzle, 2),
+ GET_SWZ(src->Swizzle, 3));
+
+ /* Note this is ok for non-swizzle ARB_vp instructions */
+ args[i].negate = src->Negate ? 1 : 0;
+ } else
args[i] = get_arg(c, inst, i);
}
@@ -1993,7 +1984,11 @@ void brw_vs_emit(struct brw_vs_compile *c )
index = inst->DstReg.Index;
file = inst->DstReg.File;
if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
- dst = c->output_regs[index].reg;
+ /* Can't just make get_dst "do the right thing" here because other
+ * callers of get_dst don't expect any special behavior for the
+ * c->output_regs[index].used_in_src case.
+ */
+ dst = brw_writemask(c->output_regs[index].reg, inst->DstReg.WriteMask);
else
dst = get_dst(c, inst->DstReg);
@@ -2025,12 +2020,6 @@ void brw_vs_emit(struct brw_vs_compile *c )
case OPCODE_DPH:
brw_DPH(p, dst, args[0], args[1]);
break;
- case OPCODE_NRM3:
- emit_nrm(c, dst, args[0], 3);
- break;
- case OPCODE_NRM4:
- emit_nrm(c, dst, args[0], 4);
- break;
case OPCODE_DST:
unalias2(c, dst, args[0], args[1], emit_dst_noalias);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
index fc4373ab311..29b3e47ab0c 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@@ -77,6 +77,16 @@ brw_prepare_vs_unit(struct brw_context *brw)
else
vs->thread1.binding_table_entry_count = brw->vs.nr_surfaces;
+ if (brw->vs.prog_data->total_scratch != 0) {
+ vs->thread2.scratch_space_base_pointer =
+ brw->vs.scratch_bo->offset >> 10; /* reloc */
+ vs->thread2.per_thread_scratch_space =
+ ffs(brw->vs.prog_data->total_scratch) - 11;
+ } else {
+ vs->thread2.scratch_space_base_pointer = 0;
+ vs->thread2.per_thread_scratch_space = 0;
+ }
+
vs->thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length;
vs->thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length;
vs->thread3.dispatch_grf_start_reg = 1;
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index 55dbd4fa8b0..40360b23fff 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -213,6 +213,7 @@ static void brw_new_batch( struct intel_context *intel )
brw->state_batch_count = 0;
brw->vb.nr_current_buffers = 0;
+ brw->ib.type = -1;
/* Mark that the current program cache BO has been used by the GPU.
* It will be reallocated if we need to put new programs in for the
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index b0dfdd536aa..e76832515fe 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -206,10 +206,6 @@ bool do_wm_prog(struct brw_context *brw,
*/
return false;
}
- c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN);
- c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN);
- c->vreg = rzalloc_array(c, struct brw_wm_value, BRW_WM_MAX_VREG);
- c->refs = rzalloc_array(c, struct brw_wm_ref, BRW_WM_MAX_REF);
} else {
void *instruction = c->instruction;
void *prog_instructions = c->prog_instructions;
@@ -232,6 +228,13 @@ bool do_wm_prog(struct brw_context *brw,
if (!brw_wm_fs_emit(brw, c, prog))
return false;
} else {
+ if (!c->instruction) {
+ c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN);
+ c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN);
+ c->vreg = rzalloc_array(c, struct brw_wm_value, BRW_WM_MAX_VREG);
+ c->refs = rzalloc_array(c, struct brw_wm_ref, BRW_WM_MAX_REF);
+ }
+
/* Fallback for fixed function and ARB_fp shaders. */
c->dispatch_width = 16;
brw_wm_payload_setup(brw, c);
@@ -241,29 +244,10 @@ bool do_wm_prog(struct brw_context *brw,
/* Scratch space is used for register spilling */
if (c->last_scratch) {
- uint32_t total_scratch;
-
- /* Per-thread scratch space is power-of-two sized. */
- for (c->prog_data.total_scratch = 1024;
- c->prog_data.total_scratch <= c->last_scratch;
- c->prog_data.total_scratch *= 2) {
- /* empty */
- }
- total_scratch = c->prog_data.total_scratch * brw->wm_max_threads;
+ c->prog_data.total_scratch = brw_get_scratch_size(c->last_scratch);
- if (brw->wm.scratch_bo && total_scratch > brw->wm.scratch_bo->size) {
- drm_intel_bo_unreference(brw->wm.scratch_bo);
- brw->wm.scratch_bo = NULL;
- }
- if (brw->wm.scratch_bo == NULL) {
- brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
- "wm scratch",
- total_scratch,
- 4096);
- }
- }
- else {
- c->prog_data.total_scratch = 0;
+ brw_get_scratch_bo(intel, &brw->wm.scratch_bo,
+ c->prog_data.total_scratch * brw->wm_max_threads);
}
if (unlikely(INTEL_DEBUG & DEBUG_WM))
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index f61757a8cac..6ea4a7d6e50 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -1094,9 +1094,16 @@ void emit_tex(struct brw_wm_compile *c,
if (intel->gen < 5 && c->dispatch_width == 8)
nr_texcoords = 3;
- /* For shadow comparisons, we have to supply u,v,r. */
- if (shadow)
- nr_texcoords = 3;
+ if (shadow) {
+ if (intel->gen < 7) {
+ /* For shadow comparisons, we have to supply u,v,r. */
+ nr_texcoords = 3;
+ } else {
+ /* On Ivybridge, the shadow comparitor comes first. Just load it. */
+ brw_MOV(p, brw_message_reg(cur_mrf), arg[2]);
+ cur_mrf += mrf_per_channel;
+ }
+ }
/* Emit the texcoords. */
for (i = 0; i < nr_texcoords; i++) {
@@ -1113,7 +1120,7 @@ void emit_tex(struct brw_wm_compile *c,
}
/* Fill in the shadow comparison reference value. */
- if (shadow) {
+ if (shadow && intel->gen < 7) {
if (intel->gen >= 5) {
/* Fill in the cube map array index value. */
brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
index 7cd3edad235..bd46bd8de43 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_fp.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -535,15 +535,15 @@ static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
GLfloat s3)
{
struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
- GLfloat values[4];
+ gl_constant_value values[4];
GLuint idx;
GLuint swizzle;
struct prog_src_register reg;
- values[0] = s0;
- values[1] = s1;
- values[2] = s2;
- values[3] = s3;
+ values[0].f = s0;
+ values[1].f = s1;
+ values[2].f = s2;
+ values[3].f = s3;
idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
reg = src_reg(PROGRAM_STATE_VAR, idx);
@@ -664,6 +664,8 @@ static void precalc_lit( struct brw_wm_compile *c,
static void precalc_tex( struct brw_wm_compile *c,
const struct prog_instruction *inst )
{
+ struct brw_compile *p = &c->func;
+ struct intel_context *intel = &p->brw->intel;
struct prog_src_register coord;
struct prog_dst_register tmpcoord = { 0 };
const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
@@ -727,7 +729,7 @@ static void precalc_tex( struct brw_wm_compile *c,
release_temp(c, tmp0);
release_temp(c, tmp1);
}
- else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
+ else if (intel->gen < 6 && inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
struct prog_src_register scale =
search_or_add_param5( c,
STATE_INTERNAL,
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
index f78bdc31866..ccf9dc2bc18 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
@@ -205,14 +205,14 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c,
case PROGRAM_CONSTANT:
/* These are invarient:
*/
- ref = get_const_ref(c, &plist->ParameterValues[idx][component]);
+ ref = get_const_ref(c, &plist->ParameterValues[idx][component].f);
break;
case PROGRAM_STATE_VAR:
case PROGRAM_UNIFORM:
/* These may change from run to run:
*/
- ref = get_param_ref(c, &plist->ParameterValues[idx][component] );
+ ref = get_param_ref(c, &plist->ParameterValues[idx][component].f );
break;
default:
diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
index 98146136703..6834ebad780 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@@ -289,6 +289,13 @@ static void brw_update_sampler_state(struct brw_context *brw,
sampler->ss1.max_lod = U_FIXED(CLAMP(gl_sampler->MaxLod, 0, 13), 6);
sampler->ss1.min_lod = U_FIXED(CLAMP(gl_sampler->MinLod, 0, 13), 6);
+ /* On Gen6+, the sampler can handle non-normalized texture
+ * rectangle coordinates natively
+ */
+ if (intel->gen >= 6 && texObj->Target == GL_TEXTURE_RECTANGLE) {
+ sampler->ss3.non_normalized_coord = 1;
+ }
+
upload_default_color(brw, gl_sampler, unit);
if (intel->gen >= 6) {
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index fb4fb146f8d..ad909789d82 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -342,7 +342,7 @@ prepare_wm_pull_constants(struct brw_context *brw)
constants = brw->wm.const_bo->virtual;
for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
constants[i] = convert_param(brw->wm.prog_data->pull_param_convert[i],
- *brw->wm.prog_data->pull_param[i]);
+ brw->wm.prog_data->pull_param[i]);
}
drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index fb4cdbaadf9..b94121e8437 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -81,12 +81,21 @@ gen6_prepare_vs_push_constants(struct brw_context *brw)
params_uploaded++;
}
- for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
- if (brw->vs.constant_map[i] != -1) {
- memcpy(param + brw->vs.constant_map[i] * 4,
- vp->program.Base.Parameters->ParameterValues[i],
- 4 * sizeof(float));
- params_uploaded++;
+ if (brw->vs.prog_data->uses_new_param_layout) {
+ for (i = 0; i < brw->vs.prog_data->nr_params; i++) {
+ *param = convert_param(brw->vs.prog_data->param_convert[i],
+ brw->vs.prog_data->param[i]);
+ param++;
+ }
+ params_uploaded += brw->vs.prog_data->nr_params / 4;
+ } else {
+ for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
+ if (brw->vs.constant_map[i] != -1) {
+ memcpy(param + brw->vs.constant_map[i] * 4,
+ vp->program.Base.Parameters->ParameterValues[i],
+ 4 * sizeof(float));
+ params_uploaded++;
+ }
}
}
@@ -151,7 +160,15 @@ upload_vs_state(struct brw_context *brw)
OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
GEN6_VS_FLOATING_POINT_MODE_ALT |
(brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
- OUT_BATCH(0); /* scratch space base offset */
+
+ if (brw->vs.prog_data->total_scratch) {
+ OUT_RELOC(brw->vs.scratch_bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ ffs(brw->vs.prog_data->total_scratch) - 11);
+ } else {
+ OUT_BATCH(0);
+ }
+
OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
(brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
(0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
@@ -160,6 +177,32 @@ upload_vs_state(struct brw_context *brw)
GEN6_VS_STATISTICS_ENABLE |
GEN6_VS_ENABLE);
ADVANCE_BATCH();
+
+ /* Based on my reading of the simulator, the VS constants don't get
+ * pulled into the VS FF unit until an appropriate pipeline flush
+ * happens, and instead the 3DSTATE_CONSTANT_VS packet just adds
+ * references to them into a little FIFO. The flushes are common,
+ * but don't reliably happen between this and a 3DPRIMITIVE, causing
+ * the primitive to use the wrong constants. Then the FIFO
+ * containing the constant setup gets added to again on the next
+ * constants change, and eventually when a flush does happen the
+ * unit is overwhelmed by constant changes and dies.
+ *
+ * To avoid this, send a PIPE_CONTROL down the line that will
+ * update the unit immediately loading the constants. The flush
+ * type bits here were those set by the STATE_BASE_ADDRESS whose
+ * move in a82a43e8d99e1715dd11c9c091b5ab734079b6a6 triggered the
+ * bug reports that led to this workaround, and may be more than
+ * what is strictly required to avoid the issue.
+ */
+ BEGIN_BATCH(4);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+ OUT_BATCH(PIPE_CONTROL_DEPTH_STALL |
+ PIPE_CONTROL_INSTRUCTION_FLUSH |
+ PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+ OUT_BATCH(0); /* address */
+ OUT_BATCH(0); /* write data */
+ ADVANCE_BATCH();
}
const struct brw_tracked_state gen6_vs_state = {
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 185da9c355f..07e9995f53b 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -54,14 +54,14 @@ gen6_prepare_wm_push_constants(struct brw_context *brw)
float *constants;
unsigned int i;
- constants = brw_state_batch(brw, AUB_TRACE_NO_TYPE,
+ constants = brw_state_batch(brw, AUB_TRACE_WM_CONSTANTS,
brw->wm.prog_data->nr_params *
sizeof(float),
32, &brw->wm.push_const_offset);
for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
constants[i] = convert_param(brw->wm.prog_data->param_convert[i],
- *brw->wm.prog_data->param[i]);
+ brw->wm.prog_data->param[i]);
}
if (0) {
diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c b/src/mesa/drivers/dri/i965/gen7_sampler_state.c
index e787c21f4d1..aee67c87472 100644
--- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c
@@ -157,6 +157,13 @@ gen7_update_sampler_state(struct brw_context *brw, int unit,
sampler->ss1.max_lod = U_FIXED(CLAMP(gl_sampler->MaxLod, 0, 13), 8);
sampler->ss1.min_lod = U_FIXED(CLAMP(gl_sampler->MinLod, 0, 13), 8);
+ /* The sampler can handle non-normalized texture rectangle coordinates
+ * natively
+ */
+ if (texObj->Target == GL_TEXTURE_RECTANGLE) {
+ sampler->ss3.non_normalized_coord = 1;
+ }
+
upload_default_color(brw, gl_sampler, unit);
sampler->ss2.default_color_pointer = brw->wm.sdc_offset[unit] >> 5;
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index 0fad3d2fb68..f3cd5d15bf0 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -71,7 +71,15 @@ upload_vs_state(struct brw_context *brw)
OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
GEN6_VS_FLOATING_POINT_MODE_ALT |
(brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
- OUT_BATCH(0); /* scratch space base offset */
+
+ if (brw->vs.prog_data->total_scratch) {
+ OUT_RELOC(brw->vs.scratch_bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ ffs(brw->vs.prog_data->total_scratch) - 11);
+ } else {
+ OUT_BATCH(0);
+ }
+
OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
(brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
(0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index a102ca772b3..55a603e887a 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -58,7 +58,7 @@ gen7_prepare_wm_constants(struct brw_context *brw)
for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
constants[i] = convert_param(brw->wm.prog_data->param_convert[i],
- *brw->wm.prog_data->param[i]);
+ brw->wm.prog_data->param[i]);
}
if (0) {
@@ -228,7 +228,13 @@ upload_ps_state(struct brw_context *brw)
OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
OUT_BATCH(brw->wm.prog_offset);
OUT_BATCH(dw2);
- OUT_BATCH(0); /* scratch space base offset */
+ if (brw->wm.prog_data->total_scratch) {
+ OUT_RELOC(brw->wm.scratch_bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ ffs(brw->wm.prog_data->total_scratch) - 11);
+ } else {
+ OUT_BATCH(0);
+ }
OUT_BATCH(dw4);
OUT_BATCH(dw5);
OUT_BATCH(0); /* kernel 1 pointer */
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index b61a2ffef19..db4343be10c 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -308,12 +308,29 @@ emit:
* [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
* =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
*
- * XXX: There is also a workaround that would appear to apply to this
- * workaround, but it doesn't appear to be necessary so far:
+ * And the workaround for these two requires this workaround first:
*
- * Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
+ * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
* BEFORE the pipe-control with a post-sync op and no write-cache
* flushes.
+ *
+ * And this last workaround is tricky because of the requirements on
+ * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
+ * volume 2 part 1:
+ *
+ * "1 of the following must also be set:
+ * - Render Target Cache Flush Enable ([12] of DW1)
+ * - Depth Cache Flush Enable ([0] of DW1)
+ * - Stall at Pixel Scoreboard ([1] of DW1)
+ * - Depth Stall ([13] of DW1)
+ * - Post-Sync Operation ([13] of DW1)
+ * - Notify Enable ([8] of DW1)"
+ *
+ * The cache flushes require the workaround flush that triggered this
+ * one, so we can't use it. Depth stall would trigger the same.
+ * Post-sync nonzero is what triggered this second workaround, so we
+ * can't use that one either. Notify enable is IRQs, which aren't
+ * really our business. That leaves only stall at scoreboard.
*/
void
intel_emit_post_sync_nonzero_flush(struct intel_context *intel)
@@ -323,9 +340,17 @@ intel_emit_post_sync_nonzero_flush(struct intel_context *intel)
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+ OUT_BATCH(PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_STALL_AT_SCOREBOARD);
+ OUT_BATCH(0); /* address */
+ OUT_BATCH(0); /* write data */
+ ADVANCE_BATCH();
+
+ BEGIN_BATCH(4);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL);
OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
OUT_RELOC(intel->batch.workaround_bo,
- I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT, 0);
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
OUT_BATCH(0); /* write data */
ADVANCE_BATCH();
@@ -365,6 +390,7 @@ intel_batchbuffer_emit_mi_flush(struct intel_context *intel)
OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |
PIPE_CONTROL_WRITE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_TC_FLUSH |
PIPE_CONTROL_NO_WRITE);
OUT_BATCH(0); /* write address */
OUT_BATCH(0); /* write data */
diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c
index 30be1b9382f..b18dd2922d9 100644
--- a/src/mesa/drivers/dri/intel/intel_blit.c
+++ b/src/mesa/drivers/dri/intel/intel_blit.c
@@ -541,8 +541,8 @@ intel_set_teximage_alpha_to_one(struct gl_context *ctx,
/* get dest x/y in destination texture */
intel_miptree_get_image_offset(intel_image->mt,
- intel_image->level,
- intel_image->face,
+ intel_image->base.Level,
+ intel_image->base.Face,
0,
&image_x, &image_y);
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index 439d6fc8247..d908975fc87 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -41,8 +41,7 @@
#include "intel_regions.h"
static GLboolean
-intel_bufferobj_unmap(struct gl_context * ctx,
- GLenum target, struct gl_buffer_object *obj);
+intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj);
/** Allocates a new drm_intel_bo to store the data for the buffer object. */
static void
@@ -122,7 +121,7 @@ intel_bufferobj_free(struct gl_context * ctx, struct gl_buffer_object *obj)
* (though it does if you call glDeleteBuffers)
*/
if (obj->Pointer)
- intel_bufferobj_unmap(ctx, 0, obj);
+ intel_bufferobj_unmap(ctx, obj);
free(intel_obj->sys_buffer);
if (intel_obj->region) {
@@ -203,7 +202,6 @@ intel_bufferobj_data(struct gl_context * ctx,
*/
static void
intel_bufferobj_subdata(struct gl_context * ctx,
- GLenum target,
GLintptrARB offset,
GLsizeiptrARB size,
const GLvoid * data, struct gl_buffer_object *obj)
@@ -276,82 +274,28 @@ intel_bufferobj_subdata(struct gl_context * ctx,
*/
static void
intel_bufferobj_get_subdata(struct gl_context * ctx,
- GLenum target,
GLintptrARB offset,
GLsizeiptrARB size,
GLvoid * data, struct gl_buffer_object *obj)
{
struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+ struct intel_context *intel = intel_context(ctx);
assert(intel_obj);
if (intel_obj->sys_buffer)
memcpy(data, (char *)intel_obj->sys_buffer + offset, size);
- else
- drm_intel_bo_get_subdata(intel_obj->buffer, offset, size, data);
-}
-
-
-
-/**
- * Called via glMapBufferARB().
- */
-static void *
-intel_bufferobj_map(struct gl_context * ctx,
- GLenum target,
- GLenum access, struct gl_buffer_object *obj)
-{
- struct intel_context *intel = intel_context(ctx);
- struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
- GLboolean read_only = (access == GL_READ_ONLY_ARB);
- GLboolean write_only = (access == GL_WRITE_ONLY_ARB);
-
- assert(intel_obj);
-
- if (intel_obj->sys_buffer) {
- if (!read_only && intel_obj->source) {
- release_buffer(intel_obj);
- }
-
- if (!intel_obj->buffer || intel_obj->source) {
- obj->Pointer = intel_obj->sys_buffer;
- obj->Length = obj->Size;
- obj->Offset = 0;
- return obj->Pointer;
+ else {
+ if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) {
+ intel_batchbuffer_flush(intel);
}
-
- free(intel_obj->sys_buffer);
- intel_obj->sys_buffer = NULL;
- }
-
- /* Flush any existing batchbuffer that might reference this data. */
- if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer))
- intel_flush(ctx);
-
- if (intel_obj->region)
- intel_bufferobj_cow(intel, intel_obj);
-
- if (intel_obj->buffer == NULL) {
- obj->Pointer = NULL;
- return NULL;
- }
-
- if (write_only) {
- drm_intel_gem_bo_map_gtt(intel_obj->buffer);
- intel_obj->mapped_gtt = GL_TRUE;
- } else {
- drm_intel_bo_map(intel_obj->buffer, !read_only);
- intel_obj->mapped_gtt = GL_FALSE;
+ drm_intel_bo_get_subdata(intel_obj->buffer, offset, size, data);
}
+}
- obj->Pointer = intel_obj->buffer->virtual;
- obj->Length = obj->Size;
- obj->Offset = 0;
- return obj->Pointer;
-}
/**
- * Called via glMapBufferRange().
+ * Called via glMapBufferRange and glMapBuffer
*
* The goal of this extension is to allow apps to accumulate their rendering
* at the same time as they accumulate their buffer object. Without it,
@@ -368,12 +312,11 @@ intel_bufferobj_map(struct gl_context * ctx,
*/
static void *
intel_bufferobj_map_range(struct gl_context * ctx,
- GLenum target, GLintptr offset, GLsizeiptr length,
+ GLintptr offset, GLsizeiptr length,
GLbitfield access, struct gl_buffer_object *obj)
{
struct intel_context *intel = intel_context(ctx);
struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
- GLboolean read_only = (access == GL_READ_ONLY_ARB);
assert(intel_obj);
@@ -385,6 +328,9 @@ intel_bufferobj_map_range(struct gl_context * ctx,
obj->AccessFlags = access;
if (intel_obj->sys_buffer) {
+ const bool read_only =
+ (access & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)) == GL_MAP_READ_BIT;
+
if (!read_only && intel_obj->source)
release_buffer(intel_obj);
@@ -468,7 +414,7 @@ intel_bufferobj_map_range(struct gl_context * ctx,
* would defeat the point.
*/
static void
-intel_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target,
+intel_bufferobj_flush_mapped_range(struct gl_context *ctx,
GLintptr offset, GLsizeiptr length,
struct gl_buffer_object *obj)
{
@@ -502,8 +448,7 @@ intel_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target,
* Called via glUnmapBuffer().
*/
static GLboolean
-intel_bufferobj_unmap(struct gl_context * ctx,
- GLenum target, struct gl_buffer_object *obj)
+intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj)
{
struct intel_context *intel = intel_context(ctx);
struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
@@ -758,23 +703,23 @@ intel_bufferobj_copy_subdata(struct gl_context *ctx,
* not overlap.
*/
if (src == dst) {
- char *ptr = intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER,
- GL_READ_WRITE, dst);
+ char *ptr = intel_bufferobj_map_range(ctx, 0, dst->Size,
+ GL_MAP_READ_BIT, dst);
memmove(ptr + write_offset, ptr + read_offset, size);
- intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst);
+ intel_bufferobj_unmap(ctx, dst);
} else {
const char *src_ptr;
char *dst_ptr;
- src_ptr = intel_bufferobj_map(ctx, GL_COPY_READ_BUFFER,
- GL_READ_ONLY, src);
- dst_ptr = intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER,
- GL_WRITE_ONLY, dst);
+ src_ptr = intel_bufferobj_map_range(ctx, 0, src->Size,
+ GL_MAP_READ_BIT, src);
+ dst_ptr = intel_bufferobj_map_range(ctx, 0, dst->Size,
+ GL_MAP_WRITE_BIT, dst);
memcpy(dst_ptr + write_offset, src_ptr + read_offset, size);
- intel_bufferobj_unmap(ctx, GL_COPY_READ_BUFFER, src);
- intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst);
+ intel_bufferobj_unmap(ctx, src);
+ intel_bufferobj_unmap(ctx, dst);
}
return;
}
@@ -924,7 +869,6 @@ intelInitBufferObjectFuncs(struct dd_function_table *functions)
functions->BufferData = intel_bufferobj_data;
functions->BufferSubData = intel_bufferobj_subdata;
functions->GetBufferSubData = intel_bufferobj_get_subdata;
- functions->MapBuffer = intel_bufferobj_map;
functions->MapBufferRange = intel_bufferobj_map_range;
functions->FlushMappedBufferRange = intel_bufferobj_flush_mapped_range;
functions->UnmapBuffer = intel_bufferobj_unmap;
diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c
index dfca03c14bf..76d33f9b37e 100644
--- a/src/mesa/drivers/dri/intel/intel_clear.c
+++ b/src/mesa/drivers/dri/intel/intel_clear.c
@@ -116,13 +116,13 @@ intelClear(struct gl_context *ctx, GLbitfield mask)
}
/* HW color buffers (front, back, aux, generic FBO, etc) */
- if (colorMask == ~0) {
+ if (intel->gen < 6 && colorMask == ~0) {
/* clear all R,G,B,A */
blit_mask |= (mask & BUFFER_BITS_COLOR);
}
else {
/* glColorMask in effect */
- tri_mask |= (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT));
+ tri_mask |= (mask & BUFFER_BITS_COLOR);
}
/* Make sure we have up to date buffers before we start looking at
@@ -143,6 +143,12 @@ intelClear(struct gl_context *ctx, GLbitfield mask)
*/
tri_mask |= BUFFER_BIT_STENCIL;
}
+ else if (intel->has_separate_stencil &&
+ stencilRegion->tiling == I915_TILING_NONE) {
+ /* The stencil buffer is actually W tiled, which the hardware
+ * cannot blit to. */
+ tri_mask |= BUFFER_BIT_STENCIL;
+ }
else {
/* clearing all stencil bits, use blitting */
blit_mask |= BUFFER_BIT_STENCIL;
@@ -182,7 +188,10 @@ intelClear(struct gl_context *ctx, GLbitfield mask)
if (tri_mask) {
debug_mask("tri", tri_mask);
- _mesa_meta_Clear(&intel->ctx, tri_mask);
+ if (ctx->Extensions.ARB_fragment_shader)
+ _mesa_meta_glsl_Clear(&intel->ctx, tri_mask);
+ else
+ _mesa_meta_Clear(&intel->ctx, tri_mask);
}
}
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index 2ba13632569..14342ef6246 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -1439,7 +1439,12 @@ intel_verify_dri2_has_hiz(struct intel_context *intel,
assert(stencil_rb->Base.Format == MESA_FORMAT_S8);
assert(depth_rb && depth_rb->Base.Format == MESA_FORMAT_X8_Z24);
- if (stencil_rb->region->tiling == I915_TILING_Y) {
+ if (stencil_rb->region->tiling == I915_TILING_NONE) {
+ /*
+ * The stencil buffer is actually W tiled. The region's tiling is
+ * I915_TILING_NONE, however, because the GTT is incapable of W
+ * fencing.
+ */
intel->intelScreen->dri2_has_hiz = INTEL_DRI2_HAS_HIZ_TRUE;
return;
} else {
@@ -1449,6 +1454,13 @@ intel_verify_dri2_has_hiz(struct intel_context *intel,
* a combined depth/stencil buffer. Discard the hiz buffer too.
*/
intel->intelScreen->dri2_has_hiz = INTEL_DRI2_HAS_HIZ_FALSE;
+ if (intel->must_use_separate_stencil) {
+ _mesa_problem(&intel->ctx,
+ "intel_context requires separate stencil, but the "
+ "DRIscreen does not support it. You may need to "
+ "upgrade the Intel X driver to 2.16.0");
+ abort();
+ }
/* 1. Discard depth and stencil renderbuffers. */
_mesa_remove_renderbuffer(fb, BUFFER_DEPTH);
@@ -1527,7 +1539,7 @@ intel_verify_dri2_has_hiz(struct intel_context *intel,
* Presently, however, no verification or clean up is necessary, and
* execution should not reach here. If the framebuffer still has a hiz
* region, then we have already set dri2_has_hiz to true after
- * confirming above that the stencil buffer is Y tiled.
+ * confirming above that the stencil buffer is W tiled.
*/
assert(0);
}
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 55bcc757873..754f9f202d1 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -173,6 +173,9 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer
if (irb->Base.Format == MESA_FORMAT_S8) {
/*
+ * The stencil buffer is W tiled. However, we request from the kernel a
+ * non-tiled buffer because the GTT is incapable of W fencing.
+ *
* The stencil buffer has quirky pitch requirements. From Vol 2a,
* 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
* The pitch must be set to 2x the value computed based on width, as
@@ -180,14 +183,13 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer
* To accomplish this, we resort to the nasty hack of doubling the drm
* region's cpp and halving its height.
*
- * If we neglect to double the pitch, then drm_intel_gem_bo_map_gtt()
- * maps the memory incorrectly.
+ * If we neglect to double the pitch, then render corruption occurs.
*/
irb->region = intel_region_alloc(intel->intelScreen,
- I915_TILING_Y,
+ I915_TILING_NONE,
cpp * 2,
- width,
- height / 2,
+ ALIGN(width, 64),
+ ALIGN((height + 1) / 2, 64),
GL_TRUE);
if (!irb->region)
return false;
@@ -594,17 +596,15 @@ intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb,
struct intel_texture_image *intel_image,
int zoffset)
{
- struct intel_mipmap_tree *mt = intel_image->mt;
unsigned int dst_x, dst_y;
/* compute offset of the particular 2D image within the texture region */
intel_miptree_get_image_offset(intel_image->mt,
- intel_image->level,
- intel_image->face,
+ intel_image->base.Level,
+ intel_image->base.Face,
zoffset,
&dst_x, &dst_y);
- irb->draw_offset = (dst_y * mt->region->pitch + dst_x) * mt->cpp;
irb->draw_x = dst_x;
irb->draw_y = dst_y;
}
@@ -645,6 +645,22 @@ intel_renderbuffer_tile_offsets(struct intel_renderbuffer *irb,
}
}
+#ifndef I915
+static bool
+need_tile_offset_workaround(struct brw_context *brw,
+ struct intel_renderbuffer *irb)
+{
+ uint32_t tile_x, tile_y;
+
+ if (brw->has_surface_tile_offset)
+ return false;
+
+ intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y);
+
+ return tile_x != 0 || tile_y != 0;
+}
+#endif
+
/**
* Called by glFramebufferTexture[123]DEXT() (and other places) to
* prepare for rendering into texture memory. This might be called
@@ -698,8 +714,7 @@ intel_render_texture(struct gl_context * ctx,
intel_image->used_as_render_target = GL_TRUE;
#ifndef I915
- if (!brw_context(ctx)->has_surface_tile_offset &&
- (irb->draw_offset & 4095) != 0) {
+ if (need_tile_offset_workaround(brw_context(ctx), irb)) {
/* Original gen4 hardware couldn't draw to a non-tile-aligned
* destination in a miptree unless you actually setup your
* renderbuffer as a miptree and used the fragile
@@ -713,8 +728,8 @@ intel_render_texture(struct gl_context * ctx,
new_mt = intel_miptree_create(intel, image->TexObject->Target,
intel_image->base.TexFormat,
- intel_image->level,
- intel_image->level,
+ intel_image->base.Level,
+ intel_image->base.Level,
intel_image->base.Width,
intel_image->base.Height,
intel_image->base.Depth,
@@ -722,8 +737,8 @@ intel_render_texture(struct gl_context * ctx,
intel_miptree_image_copy(intel,
new_mt,
- intel_image->face,
- intel_image->level,
+ intel_image->base.Face,
+ intel_image->base.Level,
old_mt);
intel_miptree_release(intel, &intel_image->mt);
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h
index f7f99a4f00c..2487994fde5 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.h
+++ b/src/mesa/drivers/dri/intel/intel_fbo.h
@@ -58,7 +58,6 @@ struct intel_renderbuffer
/** \} */
- GLuint draw_offset; /**< Offset of drawing address within the region */
GLuint draw_x, draw_y; /**< Offset of drawing within the region */
};
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index 4e711de1ce1..f36240d7f1d 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -227,7 +227,7 @@ intel_miptree_match_image(struct intel_mipmap_tree *mt,
struct gl_texture_image *image)
{
struct intel_texture_image *intelImage = intel_texture_image(image);
- GLuint level = intelImage->level;
+ GLuint level = intelImage->base.Level;
/* Images with borders are never pulled into mipmap trees. */
if (image->Border)
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
index 86d0ef2d748..d9873a303ee 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -74,9 +74,9 @@ static const GLubyte *map_pbo( struct gl_context *ctx,
return NULL;
}
- buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
- GL_READ_ONLY_ARB,
- unpack->BufferObj);
+ buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size,
+ GL_MAP_READ_BIT,
+ unpack->BufferObj);
if (!buf) {
_mesa_error(ctx, GL_INVALID_OPERATION, "glBitmap(PBO is mapped)");
return NULL;
@@ -292,8 +292,7 @@ out:
if (_mesa_is_bufferobj(unpack->BufferObj)) {
/* done with PBO so unmap it now */
- ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
- unpack->BufferObj);
+ ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj);
}
intel_check_front_buffer_rendering(intel);
diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h
index 5aa629150cf..a98a669af21 100644
--- a/src/mesa/drivers/dri/intel/intel_reg.h
+++ b/src/mesa/drivers/dri/intel/intel_reg.h
@@ -75,6 +75,7 @@
#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4)
#define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1 << 3)
#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1 << 2)
+#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1)
#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
#define PIPE_CONTROL_PPGTT_WRITE (0 << 2)
#define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2)
diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h
index b2013af1a29..9dd6a525566 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.h
+++ b/src/mesa/drivers/dri/intel/intel_screen.h
@@ -63,9 +63,12 @@
* x8_z24 and s8).
*
* Eventually, intel_update_renderbuffers() makes a DRI2 request for
- * DRI2BufferStencil and DRI2BufferHiz. If the returned buffers are Y tiled,
- * then we joyfully set intel_screen.dri2_has_hiz to true and continue as if
- * nothing happend.
+ * DRI2BufferStencil and DRI2BufferHiz. If the stencil buffer's tiling is
+ * I915_TILING_NONE [1], then we joyfully set intel_screen.dri2_has_hiz to
+ * true and continue as if nothing happend.
+ *
+ * [1] The stencil buffer is actually W tiled. However, we request from the
+ * kernel a non-tiled buffer because the GTT is incapable of W fencing.
*
* If the buffers are X tiled, however, the handshake has failed and we must
* clean up.
diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c
index 153803fba09..2e1c80c4766 100644
--- a/src/mesa/drivers/dri/intel/intel_span.c
+++ b/src/mesa/drivers/dri/intel/intel_span.c
@@ -131,38 +131,84 @@ intel_set_span_functions(struct intel_context *intel,
int miny = 0; \
int maxx = rb->Width; \
int maxy = rb->Height; \
- int stride = rb->RowStride; \
- uint8_t *buf = rb->Data; \
+ \
+ /* \
+ * Here we ignore rb->Data and rb->RowStride as set by \
+ * intelSpanRenderStart. Since intel_offset_S8 decodes the W tile \
+ * manually, the region's *real* base address and stride is \
+ * required. \
+ */ \
+ struct intel_renderbuffer *irb = intel_renderbuffer(rb); \
+ uint8_t *buf = irb->region->buffer->virtual; \
+ unsigned stride = irb->region->pitch; \
+ unsigned height = 2 * irb->region->height; \
+ bool flip = rb->Name == 0; \
+ int y_scale = flip ? -1 : 1; \
+ int y_bias = flip ? (height - 1) : 0; \
-/* Don't flip y. */
#undef Y_FLIP
-#define Y_FLIP(y) y
+#define Y_FLIP(y) (y_scale * (y) + y_bias)
/**
* \brief Get pointer offset into stencil buffer.
*
- * The stencil buffer interleaves two rows into one. Yay for crazy hardware.
- * The table below demonstrates how the pointer arithmetic behaves for a buffer
- * with positive stride (s=stride).
- *
- * x | y | byte offset
- * --------------------------
- * 0 | 0 | 0
- * 0 | 1 | 1
- * 1 | 0 | 2
- * 1 | 1 | 3
- * ... | ... | ...
- * 0 | 2 | s
- * 0 | 3 | s + 1
- * 1 | 2 | s + 2
- * 1 | 3 | s + 3
+ * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we
+ * must decode the tile's layout in software.
*
+ * See
+ * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile
+ * Format.
+ * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm
*
+ * Even though the returned offset is always positive, the return type is
+ * signed due to
+ * commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137
+ * mesa: Fix return type of _mesa_get_format_bytes() (#37351)
*/
static inline intptr_t
-intel_offset_S8(int stride, GLint x, GLint y)
+intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y)
{
- return 2 * ((y / 2) * stride + x) + y % 2;
+ uint32_t tile_size = 4096;
+ uint32_t tile_width = 64;
+ uint32_t tile_height = 64;
+ uint32_t row_size = 64 * stride;
+
+ uint32_t tile_x = x / tile_width;
+ uint32_t tile_y = y / tile_height;
+
+ /* The byte's address relative to the tile's base addres. */
+ uint32_t byte_x = x % tile_width;
+ uint32_t byte_y = y % tile_height;
+
+ uintptr_t u = tile_y * row_size
+ + tile_x * tile_size
+ + 512 * (byte_x / 8)
+ + 64 * (byte_y / 8)
+ + 32 * ((byte_y / 4) % 2)
+ + 16 * ((byte_x / 4) % 2)
+ + 8 * ((byte_y / 2) % 2)
+ + 4 * ((byte_x / 2) % 2)
+ + 2 * (byte_y % 2)
+ + 1 * (byte_x % 2);
+
+ /*
+ * Errata for Gen5:
+ *
+ * An additional offset is needed which is not documented in the PRM.
+ *
+ * if ((byte_x / 8) % 2 == 1) {
+ * if ((byte_y / 8) % 2) == 0) {
+ * u += 64;
+ * } else {
+ * u -= 64;
+ * }
+ * }
+ *
+ * The offset is expressed more tersely as
+ * u += ((int) x & 0x8) * (8 - (((int) y & 0x8) << 1));
+ */
+
+ return u;
}
#define WRITE_STENCIL(x, y, src) buf[intel_offset_S8(stride, x, y)] = src;
diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c
index 21c4a1dddba..ee0cd252375 100644
--- a/src/mesa/drivers/dri/intel/intel_tex.c
+++ b/src/mesa/drivers/dri/intel/intel_tex.c
@@ -95,17 +95,12 @@ intelGenerateMipmap(struct gl_context *ctx, GLenum target,
if (!_mesa_is_format_compressed(first_image->TexFormat)) {
GLuint nr_faces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
GLuint face, i;
- /* Update the level information in our private data in the new images,
- * since it didn't get set as part of a normal TexImage path.
- */
for (face = 0; face < nr_faces; face++) {
for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) {
struct intel_texture_image *intelImage =
intel_texture_image(texObj->Image[face][i]);
if (!intelImage)
break;
- intelImage->level = i;
- intelImage->face = face;
/* Unreference the miptree to signal that the new Data is a
* bare pointer from mesa.
*/
diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c
index 1a3643da593..600bd1251e0 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c
@@ -118,8 +118,8 @@ intel_copy_texsubimage(struct intel_context *intel,
/* get dest x/y in destination texture */
intel_miptree_get_image_offset(intelImage->mt,
- intelImage->level,
- intelImage->face,
+ intelImage->base.Level,
+ intelImage->base.Face,
0,
&image_x, &image_y);
@@ -164,101 +164,6 @@ intel_copy_texsubimage(struct intel_context *intel,
static void
-intelCopyTexImage1D(struct gl_context * ctx, GLenum target, GLint level,
- GLenum internalFormat,
- GLint x, GLint y, GLsizei width, GLint border)
-{
- struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
- struct gl_texture_object *texObj =
- _mesa_select_tex_object(ctx, texUnit, target);
- struct gl_texture_image *texImage =
- _mesa_select_tex_image(ctx, texObj, target, level);
- int srcx, srcy, dstx, dsty, height;
-
- if (border)
- goto fail;
-
- /* Setup or redefine the texture object, mipmap tree and texture
- * image. Don't populate yet.
- */
- ctx->Driver.TexImage1D(ctx, target, level, internalFormat,
- width, border,
- GL_RGBA, CHAN_TYPE, NULL,
- &ctx->DefaultPacking, texObj, texImage);
- srcx = x;
- srcy = y;
- dstx = 0;
- dsty = 0;
- height = 1;
- if (!_mesa_clip_copytexsubimage(ctx,
- &dstx, &dsty,
- &srcx, &srcy,
- &width, &height))
- return;
-
- if (!intel_copy_texsubimage(intel_context(ctx), target,
- intel_texture_image(texImage),
- internalFormat, 0, 0, x, y, width, height))
- goto fail;
-
- return;
-
- fail:
- fallback_debug("%s - fallback to swrast\n", __FUNCTION__);
- _mesa_meta_CopyTexImage1D(ctx, target, level, internalFormat, x, y,
- width, border);
-}
-
-
-static void
-intelCopyTexImage2D(struct gl_context * ctx, GLenum target, GLint level,
- GLenum internalFormat,
- GLint x, GLint y, GLsizei width, GLsizei height,
- GLint border)
-{
- struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
- struct gl_texture_object *texObj =
- _mesa_select_tex_object(ctx, texUnit, target);
- struct gl_texture_image *texImage =
- _mesa_select_tex_image(ctx, texObj, target, level);
- int srcx, srcy, dstx, dsty;
-
- if (border)
- goto fail;
-
- /* Setup or redefine the texture object, mipmap tree and texture
- * image. Don't populate yet.
- */
- ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
- width, height, border,
- GL_RGBA, GL_UNSIGNED_BYTE, NULL,
- &ctx->DefaultPacking, texObj, texImage);
-
- srcx = x;
- srcy = y;
- dstx = 0;
- dsty = 0;
- if (!_mesa_clip_copytexsubimage(ctx,
- &dstx, &dsty,
- &srcx, &srcy,
- &width, &height))
- return;
-
- if (!intel_copy_texsubimage(intel_context(ctx), target,
- intel_texture_image(texImage),
- internalFormat, 0, 0, x, y, width, height))
- goto fail;
-
- return;
-
- fail:
- fallback_debug("%s - fallback to swrast\n", __FUNCTION__);
- _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y,
- width, height, border);
-}
-
-
-static void
intelCopyTexSubImage1D(struct gl_context * ctx, GLenum target, GLint level,
GLint xoffset, GLint x, GLint y, GLsizei width)
{
@@ -312,8 +217,6 @@ intelCopyTexSubImage2D(struct gl_context * ctx, GLenum target, GLint level,
void
intelInitTextureCopyImageFuncs(struct dd_function_table *functions)
{
- functions->CopyTexImage1D = intelCopyTexImage1D;
- functions->CopyTexImage2D = intelCopyTexImage2D;
functions->CopyTexSubImage1D = intelCopyTexSubImage1D;
functions->CopyTexSubImage2D = intelCopyTexSubImage2D;
}
diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c
index 1f8b885bbec..4ee66847255 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_image.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_image.c
@@ -63,7 +63,7 @@ intel_miptree_create_for_teximage(struct intel_context *intel,
if (intelImage->base.Border)
return NULL;
- if (intelImage->level > intelObj->base.BaseLevel &&
+ if (intelImage->base.Level > intelObj->base.BaseLevel &&
(intelImage->base.Width == 1 ||
(intelObj->base.Target != GL_TEXTURE_1D &&
intelImage->base.Height == 1) ||
@@ -74,19 +74,19 @@ intel_miptree_create_for_teximage(struct intel_context *intel,
* likely base level width/height/depth for a full mipmap stack
* from this info, so just allocate this one level.
*/
- firstLevel = intelImage->level;
- lastLevel = intelImage->level;
+ firstLevel = intelImage->base.Level;
+ lastLevel = intelImage->base.Level;
} else {
/* If this image disrespects BaseLevel, allocate from level zero.
* Usually BaseLevel == 0, so it's unlikely to happen.
*/
- if (intelImage->level < intelObj->base.BaseLevel)
+ if (intelImage->base.Level < intelObj->base.BaseLevel)
firstLevel = 0;
else
firstLevel = intelObj->base.BaseLevel;
/* Figure out image dimensions at start level. */
- for (i = intelImage->level; i > firstLevel; i--) {
+ for (i = intelImage->base.Level; i > firstLevel; i--) {
width <<= 1;
if (height != 1)
height <<= 1;
@@ -101,7 +101,7 @@ intel_miptree_create_for_teximage(struct intel_context *intel,
*/
if ((intelObj->base.Sampler.MinFilter == GL_NEAREST ||
intelObj->base.Sampler.MinFilter == GL_LINEAR) &&
- intelImage->level == firstLevel &&
+ intelImage->base.Level == firstLevel &&
(intel->gen < 4 || firstLevel == 0)) {
lastLevel = firstLevel;
} else {
@@ -186,8 +186,8 @@ try_pbo_upload(struct intel_context *intel,
else
src_stride = width;
- intel_miptree_get_image_offset(intelImage->mt, intelImage->level,
- intelImage->face, 0,
+ intel_miptree_get_image_offset(intelImage->mt, intelImage->base.Level,
+ intelImage->base.Face, 0,
&dst_x, &dst_y);
dst_stride = intelImage->mt->region->pitch;
@@ -243,8 +243,8 @@ try_pbo_zcopy(struct intel_context *intel,
else
src_stride = width;
- intel_miptree_get_image_offset(intelImage->mt, intelImage->level,
- intelImage->face, 0,
+ intel_miptree_get_image_offset(intelImage->mt, intelImage->base.Level,
+ intelImage->base.Face, 0,
&dst_x, &dst_y);
dst_stride = intelImage->mt->region->pitch;
@@ -407,9 +407,6 @@ intelTexImage(struct gl_context * ctx,
DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__,
_mesa_lookup_enum_by_nr(target), level, width, height, depth, border);
- intelImage->face = _mesa_tex_target_to_face(target);
- intelImage->level = level;
-
if (_mesa_is_format_compressed(texImage->TexFormat)) {
texelBytes = 0;
}
@@ -514,8 +511,8 @@ intelTexImage(struct gl_context * ctx,
}
texImage->Data = intel_miptree_image_map(intel,
intelImage->mt,
- intelImage->face,
- intelImage->level,
+ intelImage->base.Face,
+ intelImage->base.Level,
&dstRowStride,
intelImage->base.ImageOffsets);
}
@@ -684,8 +681,8 @@ intel_get_tex_image(struct gl_context * ctx, GLenum target, GLint level,
intelImage->base.Data =
intel_miptree_image_map(intel,
intelImage->mt,
- intelImage->face,
- intelImage->level,
+ intelImage->base.Face,
+ intelImage->base.Level,
&intelImage->base.RowStride,
intelImage->base.ImageOffsets);
intelImage->base.RowStride /= intelImage->mt->cpp;
@@ -816,8 +813,6 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
rb->region->width, rb->region->height, 1,
0, internalFormat, texFormat);
- intelImage->face = _mesa_tex_target_to_face(target);
- intelImage->level = level;
texImage->RowStride = rb->region->pitch;
intel_miptree_reference(&intelImage->mt, intelObj->mt);
@@ -874,8 +869,6 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target,
image->region->width, image->region->height, 1,
0, image->internal_format, image->format);
- intelImage->face = _mesa_tex_target_to_face(target);
- intelImage->level = 0;
texImage->RowStride = image->region->pitch;
intel_miptree_reference(&intelImage->mt, intelObj->mt);
diff --git a/src/mesa/drivers/dri/intel/intel_tex_obj.h b/src/mesa/drivers/dri/intel/intel_tex_obj.h
index a9ae2ec5429..e7a4318b8d8 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_obj.h
+++ b/src/mesa/drivers/dri/intel/intel_tex_obj.h
@@ -52,11 +52,6 @@ struct intel_texture_image
{
struct gl_texture_image base;
- /* These aren't stored in gl_texture_image
- */
- GLuint level;
- GLuint face;
-
/* If intelImage->mt != NULL, image data is stored here.
* Else if intelImage->base.Data != NULL, image is stored there.
* Else there is no image data.
diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
index 8b43c406cf9..5fd2cc36234 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
@@ -113,7 +113,7 @@ intelTexSubimage(struct gl_context * ctx,
dstRowStride = pitch;
intel_miptree_get_image_offset(intelImage->mt, level,
- intelImage->face, 0,
+ intelImage->base.Face, 0,
&blit_x, &blit_y);
blit_x += xoffset;
blit_y += yoffset;
@@ -122,8 +122,8 @@ intelTexSubimage(struct gl_context * ctx,
} else {
texImage->Data = intel_miptree_image_map(intel,
intelImage->mt,
- intelImage->face,
- intelImage->level,
+ intelImage->base.Face,
+ intelImage->base.Level,
&dstRowStride,
texImage->ImageOffsets);
}
diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c
index 7135a6276fe..31ac689ad77 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c
@@ -42,8 +42,8 @@ copy_image_data_to_tree(struct intel_context *intel,
*/
intel_miptree_image_copy(intel,
intelObj->mt,
- intelImage->face,
- intelImage->level, intelImage->mt);
+ intelImage->base.Face,
+ intelImage->base.Level, intelImage->mt);
intel_miptree_release(intel, &intelImage->mt);
}
@@ -54,8 +54,8 @@ copy_image_data_to_tree(struct intel_context *intel,
*/
intel_miptree_image_data(intel,
intelObj->mt,
- intelImage->face,
- intelImage->level,
+ intelImage->base.Face,
+ intelImage->base.Level,
intelImage->base.Data,
intelImage->base.RowStride,
intelImage->base.RowStride *
@@ -177,8 +177,8 @@ intel_tex_map_level_images(struct intel_context *intel,
intelImage->base.Data =
intel_miptree_image_map(intel,
intelImage->mt,
- intelImage->face,
- intelImage->level,
+ intelImage->base.Face,
+ intelImage->base.Level,
&intelImage->base.RowStride,
intelImage->base.ImageOffsets);
/* convert stride to texels, not bytes */
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
index e60b91f64be..433590c4181 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
@@ -107,7 +107,7 @@ nouveau_bufferobj_data(struct gl_context *ctx, GLenum target, GLsizeiptrARB size
}
static void
-nouveau_bufferobj_subdata(struct gl_context *ctx, GLenum target, GLintptrARB offset,
+nouveau_bufferobj_subdata(struct gl_context *ctx, GLintptrARB offset,
GLsizeiptrARB size, const GLvoid *data,
struct gl_buffer_object *obj)
{
@@ -115,7 +115,7 @@ nouveau_bufferobj_subdata(struct gl_context *ctx, GLenum target, GLintptrARB off
}
static void
-nouveau_bufferobj_get_subdata(struct gl_context *ctx, GLenum target, GLintptrARB offset,
+nouveau_bufferobj_get_subdata(struct gl_context *ctx, GLintptrARB offset,
GLsizeiptrARB size, GLvoid *data,
struct gl_buffer_object *obj)
{
@@ -123,23 +123,6 @@ nouveau_bufferobj_get_subdata(struct gl_context *ctx, GLenum target, GLintptrARB
}
static void *
-nouveau_bufferobj_map(struct gl_context *ctx, GLenum target, GLenum access,
- struct gl_buffer_object *obj)
-{
- unsigned flags = 0;
-
- if (access == GL_READ_ONLY_ARB ||
- access == GL_READ_WRITE_ARB)
- flags |= GL_MAP_READ_BIT;
- if (access == GL_WRITE_ONLY_ARB ||
- access == GL_READ_WRITE_ARB)
- flags |= GL_MAP_WRITE_BIT;
-
- return ctx->Driver.MapBufferRange(ctx, target, 0, obj->Size, flags,
- obj);
-}
-
-static void *
nouveau_bufferobj_map_range(struct gl_context *ctx, GLenum target, GLintptr offset,
GLsizeiptr length, GLbitfield access,
struct gl_buffer_object *obj)
@@ -169,7 +152,7 @@ nouveau_bufferobj_map_range(struct gl_context *ctx, GLenum target, GLintptr offs
}
static GLboolean
-nouveau_bufferobj_unmap(struct gl_context *ctx, GLenum target, struct gl_buffer_object *obj)
+nouveau_bufferobj_unmap(struct gl_context *ctx, struct gl_buffer_object *obj)
{
assert(obj->Pointer);
@@ -189,7 +172,6 @@ nouveau_bufferobj_functions_init(struct dd_function_table *functions)
functions->BufferData = nouveau_bufferobj_data;
functions->BufferSubData = nouveau_bufferobj_subdata;
functions->GetBufferSubData = nouveau_bufferobj_get_subdata;
- functions->MapBuffer = nouveau_bufferobj_map;
functions->MapBufferRange = nouveau_bufferobj_map_range;
functions->UnmapBuffer = nouveau_bufferobj_unmap;
}
diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c
index 02201cb53d6..44a794da396 100644
--- a/src/mesa/drivers/dri/r200/r200_ioctl.c
+++ b/src/mesa/drivers/dri/r200/r200_ioctl.c
@@ -185,7 +185,6 @@ static void r200Clear( struct gl_context *ctx, GLbitfield mask )
r200ContextPtr rmesa = R200_CONTEXT(ctx);
__DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon);
GLuint flags = 0;
- GLuint color_mask = 0;
GLuint orig_mask = mask;
if ( R200_DEBUG & RADEON_IOCTL ) {
@@ -206,13 +205,11 @@ static void r200Clear( struct gl_context *ctx, GLbitfield mask )
if ( mask & BUFFER_BIT_FRONT_LEFT ) {
flags |= RADEON_FRONT;
- color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
mask &= ~BUFFER_BIT_FRONT_LEFT;
}
if ( mask & BUFFER_BIT_BACK_LEFT ) {
flags |= RADEON_BACK;
- color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
mask &= ~BUFFER_BIT_BACK_LEFT;
}
diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c
index d42e8f12041..91e77f9f7da 100644
--- a/src/mesa/drivers/dri/r200/r200_tex.c
+++ b/src/mesa/drivers/dri/r200/r200_tex.c
@@ -527,7 +527,6 @@ void r200InitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *fu
functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
if (radeon->radeonScreen->kernel_mm) {
- functions->CopyTexImage2D = radeonCopyTexImage2D;
functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
}
diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c
index 7adf9ad73ed..8c9bd6d00b2 100644
--- a/src/mesa/drivers/dri/r200/r200_texstate.c
+++ b/src/mesa/drivers/dri/r200/r200_texstate.c
@@ -773,18 +773,12 @@ void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format
struct radeon_renderbuffer *rb;
radeon_texture_image *rImage;
radeonContextPtr radeon;
- r200ContextPtr rmesa;
struct radeon_framebuffer *rfb;
radeonTexObjPtr t;
uint32_t pitch_val;
- uint32_t internalFormat, format;
gl_format texFormat;
- format = GL_UNSIGNED_BYTE;
- internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
-
radeon = pDRICtx->driverPrivate;
- rmesa = pDRICtx->driverPrivate;
rfb = dPriv->driverPrivate;
texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c
index 63e03b0e0c7..cf44d7f459c 100644
--- a/src/mesa/drivers/dri/r200/r200_vertprog.c
+++ b/src/mesa/drivers/dri/r200/r200_vertprog.c
@@ -126,10 +126,10 @@ static GLboolean r200VertexProgUpdateParams(struct gl_context *ctx, struct r200_
case PROGRAM_NAMED_PARAM:
//fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
case PROGRAM_CONSTANT:
- *fcmd++ = paramList->ParameterValues[pi][0];
- *fcmd++ = paramList->ParameterValues[pi][1];
- *fcmd++ = paramList->ParameterValues[pi][2];
- *fcmd++ = paramList->ParameterValues[pi][3];
+ *fcmd++ = paramList->ParameterValues[pi][0].f;
+ *fcmd++ = paramList->ParameterValues[pi][1].f;
+ *fcmd++ = paramList->ParameterValues[pi][2].f;
+ *fcmd++ = paramList->ParameterValues[pi][3].f;
break;
default:
_mesa_problem(NULL, "Bad param type in %s", __FUNCTION__);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
index b24274259f4..39dcb21d4f4 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -561,28 +561,29 @@ static int peephole_add_presub_add(
struct rc_instruction * inst_add)
{
unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
- struct rc_src_register * src1 = NULL;
- unsigned int i;
-
- if (!is_presub_candidate(c, inst_add))
- return 0;
+ unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
+ unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
return 0;
- /* XXX This isn't fully implemented, is it? */
- /* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */
- for (i = 0; i < 2; i++) {
- if (inst_add->U.I.SrcReg[i].Abs)
- return 0;
+ /* src0 and src1 can't have absolute values */
+ if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
+ return 0;
- /* XXX This looks weird, but it's basically what was here before this commit (see git blame): */
- if ((inst_add->U.I.SrcReg[i].Negate & dstmask) != dstmask && !src1) {
- src1 = &inst_add->U.I.SrcReg[i];
- }
- }
+ /* presub_replace_add() assumes only one is negative */
+ if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
+ return 0;
+
+ /* if src0 is negative, at least all bits of dstmask have to be set */
+ if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
+ return 0;
- if (!src1)
+ /* if src1 is negative, at least all bits of dstmask have to be set */
+ if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
+ return 0;
+
+ if (!is_presub_candidate(c, inst_add))
return 0;
if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
@@ -615,7 +616,7 @@ static void presub_replace_inv(
* of the add instruction must have the constatnt 1 swizzle. This function
* does not check const registers to see if their value is 1.0, so it should
* be called after the constant_folding optimization.
- * @return
+ * @return
* 0 if the ADD instruction is still part of the program.
* 1 if the ADD instruction is no longer part of the program.
*/
diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c
index 0c4d8537c61..5587c16dd44 100644
--- a/src/mesa/drivers/dri/r300/r300_draw.c
+++ b/src/mesa/drivers/dri/r300/r300_draw.c
@@ -84,7 +84,8 @@ static void r300FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
GLboolean mapped_named_bo = GL_FALSE;
if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) {
- ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+ ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size,
+ GL_MAP_READ_BIT, mesa_ind_buf->obj);
mapped_named_bo = GL_TRUE;
assert(mesa_ind_buf->obj->Pointer != NULL);
}
@@ -138,7 +139,7 @@ static void r300FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
r300->ind_buf.count = mesa_ind_buf->count;
if (mapped_named_bo) {
- ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+ ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
}
}
@@ -163,7 +164,10 @@ static void r300SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
GLboolean mapped_named_bo = GL_FALSE;
if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) {
- ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+ ctx->Driver.MapBufferRange(ctx, 0,
+ mesa_ind_buf->obj->Size,
+ GL_MAP_READ_BIT,
+ mesa_ind_buf->obj);
assert(mesa_ind_buf->obj->Pointer != NULL);
mapped_named_bo = GL_TRUE;
}
@@ -184,7 +188,7 @@ static void r300SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
r300->ind_buf.count = mesa_ind_buf->count;
if (mapped_named_bo) {
- ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+ ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
}
} else {
r300FixupIndexBuffer(ctx, mesa_ind_buf);
@@ -235,7 +239,8 @@ static void r300ConvertAttrib(struct gl_context *ctx, int count, const struct gl
if (input->BufferObj->Name) {
if (!input->BufferObj->Pointer) {
- ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+ ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+ GL_MAP_READ_BIT, input->BufferObj);
mapped_named_bo = GL_TRUE;
}
@@ -286,7 +291,7 @@ static void r300ConvertAttrib(struct gl_context *ctx, int count, const struct gl
radeon_bo_unmap(attr->bo);
if (mapped_named_bo) {
- ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+ ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
}
}
@@ -302,7 +307,8 @@ static void r300AlignDataToDword(struct gl_context *ctx, const struct gl_client_
radeon_bo_map(attr->bo, 1);
if (!input->BufferObj->Pointer) {
- ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+ ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+ GL_MAP_READ_BIT, input->BufferObj);
mapped_named_bo = GL_TRUE;
}
@@ -321,7 +327,7 @@ static void r300AlignDataToDword(struct gl_context *ctx, const struct gl_client_
}
if (mapped_named_bo) {
- ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+ ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
}
radeon_bo_unmap(attr->bo);
diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c
index 590d9afe14a..93d8fe185ef 100644
--- a/src/mesa/drivers/dri/r300/r300_tex.c
+++ b/src/mesa/drivers/dri/r300/r300_tex.c
@@ -379,7 +379,6 @@ void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *fun
functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
if (radeon->radeonScreen->kernel_mm) {
- functions->CopyTexImage2D = radeonCopyTexImage2D;
functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
}
diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c
index e24ad6f088d..e4388a021ed 100644
--- a/src/mesa/drivers/dri/r300/r300_texstate.c
+++ b/src/mesa/drivers/dri/r300/r300_texstate.c
@@ -427,13 +427,8 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format
struct radeon_framebuffer *rfb;
radeonTexObjPtr t;
uint32_t pitch_val;
- uint32_t internalFormat, type, format;
gl_format texFormat;
- type = GL_BGRA;
- format = GL_UNSIGNED_BYTE;
- internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
-
radeon = pDRICtx->driverPrivate;
rmesa = pDRICtx->driverPrivate;
diff --git a/src/mesa/drivers/dri/r600/evergreen_fragprog.c b/src/mesa/drivers/dri/r600/evergreen_fragprog.c
index e527c379b62..cc584ca2b35 100644
--- a/src/mesa/drivers/dri/r600/evergreen_fragprog.c
+++ b/src/mesa/drivers/dri/r600/evergreen_fragprog.c
@@ -752,10 +752,10 @@ GLboolean evergreenSetupFPconstants(struct gl_context * ctx)
unNumParamData = paramList->NumParameters;
for(ui=0; ui<unNumParamData; ui++) {
- evergreen->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
- evergreen->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
- evergreen->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
- evergreen->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+ evergreen->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f;
+ evergreen->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f;
+ evergreen->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f;
+ evergreen->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f;
}
/* alloc multiple of 16 constants */
diff --git a/src/mesa/drivers/dri/r600/evergreen_render.c b/src/mesa/drivers/dri/r600/evergreen_render.c
index 4507be29d86..74563caf47c 100644
--- a/src/mesa/drivers/dri/r600/evergreen_render.c
+++ b/src/mesa/drivers/dri/r600/evergreen_render.c
@@ -403,7 +403,8 @@ static void evergreenConvertAttrib(struct gl_context *ctx, int count,
{
if (!input->BufferObj->Pointer)
{
- ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+ ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+ GL_MAP_READ_BIT, input->BufferObj);
mapped_named_bo = GL_TRUE;
}
@@ -456,7 +457,7 @@ static void evergreenConvertAttrib(struct gl_context *ctx, int count,
if (mapped_named_bo)
{
- ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+ ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
}
}
@@ -470,7 +471,8 @@ static void evergreenFixupIndexBuffer(struct gl_context *ctx, const struct _mesa
if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
{
- ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+ ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size,
+ GL_MAP_READ_BIT, mesa_ind_buf->obj);
mapped_named_bo = GL_TRUE;
assert(mesa_ind_buf->obj->Pointer != NULL);
}
@@ -531,7 +533,7 @@ static void evergreenFixupIndexBuffer(struct gl_context *ctx, const struct _mesa
if (mapped_named_bo)
{
- ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+ ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
}
}
@@ -606,7 +608,8 @@ static void evergreenSetupIndexBuffer(struct gl_context *ctx, const struct _mesa
if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
{
- ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+ ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size,
+ GL_MAP_READ_BIT, mesa_ind_buf->obj);
assert(mesa_ind_buf->obj->Pointer != NULL);
mapped_named_bo = GL_TRUE;
}
@@ -629,7 +632,7 @@ static void evergreenSetupIndexBuffer(struct gl_context *ctx, const struct _mesa
if (mapped_named_bo)
{
- ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+ ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
}
}
else
@@ -655,7 +658,8 @@ static void evergreenAlignDataToDword(struct gl_context *ctx,
if (!input->BufferObj->Pointer)
{
- ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+ ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+ GL_MAP_READ_BIT, input->BufferObj->obj);
mapped_named_bo = GL_TRUE;
}
@@ -675,7 +679,7 @@ static void evergreenAlignDataToDword(struct gl_context *ctx,
radeon_bo_unmap(attr->bo);
if (mapped_named_bo)
{
- ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+ ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
}
attr->stride = dst_stride;
diff --git a/src/mesa/drivers/dri/r600/evergreen_tex.c b/src/mesa/drivers/dri/r600/evergreen_tex.c
index 33a5f277683..d240a216817 100644
--- a/src/mesa/drivers/dri/r600/evergreen_tex.c
+++ b/src/mesa/drivers/dri/r600/evergreen_tex.c
@@ -1288,19 +1288,12 @@ void evergreenSetTexBuffer(__DRIcontext *pDRICtx, GLint target, GLint glx_textur
struct radeon_renderbuffer *rb;
radeon_texture_image *rImage;
radeonContextPtr radeon;
- context_t *rmesa;
struct radeon_framebuffer *rfb;
radeonTexObjPtr t;
uint32_t pitch_val;
- uint32_t internalFormat, type, format;
gl_format texFormat;
- type = GL_BGRA;
- format = GL_UNSIGNED_BYTE;
- internalFormat = (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
-
radeon = pDRICtx->driverPrivate;
- rmesa = pDRICtx->driverPrivate;
rfb = dPriv->driverPrivate;
texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
@@ -1688,7 +1681,6 @@ void evergreenInitTextureFuncs(radeonContextPtr radeon, struct dd_function_table
functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
if (radeon->radeonScreen->kernel_mm) {
- functions->CopyTexImage2D = radeonCopyTexImage2D;
functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
}
diff --git a/src/mesa/drivers/dri/r600/evergreen_vertprog.c b/src/mesa/drivers/dri/r600/evergreen_vertprog.c
index 018869b9996..117916ac78f 100644
--- a/src/mesa/drivers/dri/r600/evergreen_vertprog.c
+++ b/src/mesa/drivers/dri/r600/evergreen_vertprog.c
@@ -684,17 +684,17 @@ GLboolean evergreenSetupVPconstants(struct gl_context * ctx)
for(ui=0; ui<unNumParamData; ui++) {
if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM)
{
- evergreen->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0];
- evergreen->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1];
- evergreen->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2];
- evergreen->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3];
+ evergreen->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0].f;
+ evergreen->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1].f;
+ evergreen->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2].f;
+ evergreen->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3].f;
}
else
{
- evergreen->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
- evergreen->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
- evergreen->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
- evergreen->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+ evergreen->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f;
+ evergreen->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f;
+ evergreen->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f;
+ evergreen->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f;
}
}
diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c
index ce2f7779563..74f048b1062 100644
--- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c
+++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c
@@ -259,13 +259,11 @@ static int r600_cs_process_relocs(struct radeon_cs_int *csi,
uint32_t * reloc_chunk,
uint32_t * length_dw_reloc_chunk)
{
- struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm;
struct r600_cs_reloc_legacy *relocs;
int i, j, r;
uint32_t offset_dw = 0;
- csm = (struct r600_cs_manager_legacy*)csi->csm;
relocs = (struct r600_cs_reloc_legacy *)csi->relocs;
restart:
for (i = 0; i < csi->crelocs; i++) {
diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c
index eb7ed30c7a3..3efa1d197fa 100644
--- a/src/mesa/drivers/dri/r600/r600_tex.c
+++ b/src/mesa/drivers/dri/r600/r600_tex.c
@@ -470,7 +470,6 @@ void r600InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *fun
functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
if (radeon->radeonScreen->kernel_mm) {
- functions->CopyTexImage2D = radeonCopyTexImage2D;
functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
}
diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c
index 949db29c189..65fae7195fd 100644
--- a/src/mesa/drivers/dri/r600/r600_texstate.c
+++ b/src/mesa/drivers/dri/r600/r600_texstate.c
@@ -1141,13 +1141,8 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo
struct radeon_framebuffer *rfb;
radeonTexObjPtr t;
uint32_t pitch_val;
- uint32_t internalFormat, type, format;
gl_format texFormat;
- type = GL_BGRA;
- format = GL_UNSIGNED_BYTE;
- internalFormat = (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
-
radeon = pDRICtx->driverPrivate;
rmesa = pDRICtx->driverPrivate;
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c
index 40494cd6af0..6f9834e68fe 100644
--- a/src/mesa/drivers/dri/r600/r700_fragprog.c
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.c
@@ -778,10 +778,10 @@ GLboolean r700SetupFragmentProgram(struct gl_context * ctx)
unNumParamData = paramList->NumParameters;
for(ui=0; ui<unNumParamData; ui++) {
- r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
- r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
- r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
- r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+ r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f;
+ r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f;
+ r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f;
+ r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f;
}
/* Load fp constants to gpu */
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
index 0f7a7a46b71..a565c9f2087 100644
--- a/src/mesa/drivers/dri/r600/r700_render.c
+++ b/src/mesa/drivers/dri/r600/r700_render.c
@@ -490,7 +490,8 @@ static void r700ConvertAttrib(struct gl_context *ctx, int count,
{
if (!input->BufferObj->Pointer)
{
- ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+ ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+ GL_MAP_READ_BIT, input->BufferObj);
mapped_named_bo = GL_TRUE;
}
@@ -543,7 +544,7 @@ static void r700ConvertAttrib(struct gl_context *ctx, int count,
if (mapped_named_bo)
{
- ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+ ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
}
}
@@ -564,7 +565,8 @@ static void r700AlignDataToDword(struct gl_context *ctx,
if (!input->BufferObj->Pointer)
{
- ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+ ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+ GL_MAP_READ_BIT, input->BufferObj);
mapped_named_bo = GL_TRUE;
}
@@ -584,7 +586,7 @@ static void r700AlignDataToDword(struct gl_context *ctx,
radeon_bo_unmap(attr->bo);
if (mapped_named_bo)
{
- ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+ ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
}
attr->stride = dst_stride;
@@ -727,7 +729,8 @@ static void r700FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
{
- ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+ ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size,
+ GL_MAP_READ_BIT, mesa_ind_buf->obj);
mapped_named_bo = GL_TRUE;
assert(mesa_ind_buf->obj->Pointer != NULL);
}
@@ -788,7 +791,7 @@ static void r700FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
if (mapped_named_bo)
{
- ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+ ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
}
}
@@ -813,7 +816,8 @@ static void r700SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
{
- ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+ ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size,
+ GL_MAP_READ_BIT, mesa_ind_buf->obj);
assert(mesa_ind_buf->obj->Pointer != NULL);
mapped_named_bo = GL_TRUE;
}
@@ -836,7 +840,7 @@ static void r700SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
if (mapped_named_bo)
{
- ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+ ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
}
}
else
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c
index 7d4be9180a0..b1e2742b27d 100644
--- a/src/mesa/drivers/dri/r600/r700_vertprog.c
+++ b/src/mesa/drivers/dri/r600/r700_vertprog.c
@@ -720,17 +720,17 @@ GLboolean r700SetupVertexProgram(struct gl_context * ctx)
for(ui=0; ui<unNumParamData; ui++) {
if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM)
{
- r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0];
- r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1];
- r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2];
- r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3];
+ r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0].f;
+ r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1].f;
+ r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2].f;
+ r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3].f;
}
else
{
- r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
- r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
- r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
- r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+ r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f;
+ r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f;
+ r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f;
+ r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f;
}
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
index 607b7470d4b..a74c6c7a575 100644
--- a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
+++ b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
@@ -78,6 +78,9 @@ static inline uint32_t radeon_gem_name_bo(struct radeon_bo *dummy)
static inline void *radeon_bo_manager_gem_ctor(int fd)
{
+ fprintf(stderr, "[%s:%u] Mesa built without Radeon libdrm support.\n",
+ __func__, __LINE__);
+
return NULL;
}
@@ -87,6 +90,9 @@ static inline void radeon_bo_manager_gem_dtor(void *dummy)
static inline void *radeon_cs_manager_gem_ctor(int fd)
{
+ fprintf(stderr, "[%s:%u] Mesa built without Radeon libdrm support.\n",
+ __func__, __LINE__);
+
return NULL;
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
index 0d1af726c07..7b59c0377f8 100644
--- a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
+++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
@@ -130,7 +130,6 @@ radeonBufferData(struct gl_context * ctx,
*/
static void
radeonBufferSubData(struct gl_context * ctx,
- GLenum target,
GLintptrARB offset,
GLsizeiptrARB size,
const GLvoid * data,
@@ -155,7 +154,6 @@ radeonBufferSubData(struct gl_context * ctx,
*/
static void
radeonGetBufferSubData(struct gl_context * ctx,
- GLenum target,
GLintptrARB offset,
GLsizeiptrARB size,
GLvoid * data,
@@ -171,17 +169,18 @@ radeonGetBufferSubData(struct gl_context * ctx,
}
/**
- * Called via glMapBufferARB()
+ * Called via glMapBuffer() and glMapBufferRange()
*/
static void *
-radeonMapBuffer(struct gl_context * ctx,
- GLenum target,
- GLenum access,
- struct gl_buffer_object *obj)
+radeonMapBufferRange(struct gl_context * ctx,
+ GLintptr offset, GLsizeiptr length,
+ GLbitfield access, struct gl_buffer_object *obj)
{
struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+ const GLboolean write_only =
+ (access & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)) == GL_MAP_WRITE_BIT;
- if (access == GL_WRITE_ONLY_ARB) {
+ if (write_only) {
ctx->Driver.Flush(ctx);
}
@@ -190,12 +189,13 @@ radeonMapBuffer(struct gl_context * ctx,
return NULL;
}
- radeon_bo_map(radeon_obj->bo, access == GL_WRITE_ONLY_ARB);
+ obj->Offset = offset;
+ obj->Length = length;
+ obj->AccessFlags = access;
- obj->Pointer = radeon_obj->bo->ptr;
- obj->Length = obj->Size;
- obj->Offset = 0;
+ radeon_bo_map(radeon_obj->bo, write_only);
+ obj->Pointer = radeon_obj->bo->ptr + offset;
return obj->Pointer;
}
@@ -205,7 +205,6 @@ radeonMapBuffer(struct gl_context * ctx,
*/
static GLboolean
radeonUnmapBuffer(struct gl_context * ctx,
- GLenum target,
struct gl_buffer_object *obj)
{
struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
@@ -229,6 +228,6 @@ radeonInitBufferObjectFuncs(struct dd_function_table *functions)
functions->BufferData = radeonBufferData;
functions->BufferSubData = radeonBufferSubData;
functions->GetBufferSubData = radeonGetBufferSubData;
- functions->MapBuffer = radeonMapBuffer;
+ functions->MapBufferRange = radeonMapBufferRange;
functions->UnmapBuffer = radeonUnmapBuffer;
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c
index bfc307ca987..e7a6623cf84 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common.c
@@ -436,7 +436,6 @@ void radeonCopyBuffer( __DRIdrawable *dPriv,
const drm_clip_rect_t *rect)
{
radeonContextPtr rmesa;
- struct radeon_framebuffer *rfb;
GLint nbox, i, ret;
assert(dPriv);
@@ -447,8 +446,6 @@ void radeonCopyBuffer( __DRIdrawable *dPriv,
LOCK_HARDWARE(rmesa);
- rfb = dPriv->driverPrivate;
-
if ( RADEON_DEBUG & RADEON_IOCTL ) {
fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx );
}
@@ -527,8 +524,6 @@ static GLboolean radeonPageFlip( __DRIdrawable *dPriv )
{
radeonContextPtr radeon;
GLint ret;
- __DRIscreen *psp;
- struct radeon_renderbuffer *rrb;
struct radeon_framebuffer *rfb;
assert(dPriv);
@@ -537,9 +532,6 @@ static GLboolean radeonPageFlip( __DRIdrawable *dPriv )
radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
rfb = dPriv->driverPrivate;
- rrb = (void *)rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
-
- psp = dPriv->driScreenPriv;
LOCK_HARDWARE(radeon);
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c
index bf8925f61d0..c08b79484af 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c
@@ -515,7 +515,6 @@ void radeon_prepare_render(radeonContextPtr radeon)
__DRIcontext *driContext = radeon->dri.context;
__DRIdrawable *drawable;
__DRIscreen *screen;
- struct radeon_framebuffer *draw;
screen = driContext->driScreenPriv;
if (!screen->dri2.loader)
@@ -527,7 +526,6 @@ void radeon_prepare_render(radeonContextPtr radeon)
radeon_update_renderbuffers(driContext, drawable, GL_FALSE);
/* Intel driver does the equivalent of this, no clue if it is needed:*/
- draw = drawable->driverPrivate;
radeon_draw_buffer(radeon->glCtx, radeon->glCtx->DrawBuffer);
driContext->dri2.draw_stamp = drawable->dri2.stamp;
diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
index c2722a4e195..5595b705b15 100644
--- a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
+++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
@@ -218,11 +218,9 @@ static int cs_end(struct radeon_cs_int *cs,
static int cs_process_relocs(struct radeon_cs_int *cs)
{
- struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
struct cs_reloc_legacy *relocs;
int i, j, r;
- csm = (struct cs_manager_legacy*)cs->csm;
relocs = (struct cs_reloc_legacy *)cs->relocs;
restart:
for (i = 0; i < cs->crelocs; i++)
diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
index a91d8727792..c23e9c2d2a2 100644
--- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
@@ -560,7 +560,6 @@ static void radeonClear( struct gl_context *ctx, GLbitfield mask )
r100ContextPtr rmesa = R100_CONTEXT(ctx);
__DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon);
GLuint flags = 0;
- GLuint color_mask = 0;
GLuint orig_mask = mask;
if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) {
@@ -582,13 +581,11 @@ static void radeonClear( struct gl_context *ctx, GLbitfield mask )
if ( mask & BUFFER_BIT_FRONT_LEFT ) {
flags |= RADEON_FRONT;
- color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
mask &= ~BUFFER_BIT_FRONT_LEFT;
}
if ( mask & BUFFER_BIT_BACK_LEFT ) {
flags |= RADEON_BACK;
- color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
mask &= ~BUFFER_BIT_BACK_LEFT;
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c
index 7b6bd36dcf7..ae8a212f806 100644
--- a/src/mesa/drivers/dri/radeon/radeon_lock.c
+++ b/src/mesa/drivers/dri/radeon/radeon_lock.c
@@ -114,16 +114,6 @@ void radeon_lock_hardware(radeonContextPtr radeon
)
{
char ret = 0;
- struct radeon_framebuffer *rfb = NULL;
- struct radeon_renderbuffer *rrb = NULL;
-
- if (radeon_get_drawable(radeon)) {
- rfb = radeon_get_drawable(radeon)->driverPrivate;
-
- if (rfb)
- rrb = radeon_get_renderbuffer(&rfb->base,
- rfb->base._ColorDrawBufferIndexes[0]);
- }
if (!radeon->radeonScreen->driScreen->dri2.enabled) {
if (ATOMIC_INC_AND_FETCH(radeon->dri.hwLockCount) > 1)
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c
index 25a8ddf7b6a..a0b5506ae76 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tex.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tex.c
@@ -455,7 +455,6 @@ void radeonInitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *
functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
if (radeon->radeonScreen->kernel_mm) {
- functions->CopyTexImage2D = radeonCopyTexImage2D;
functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
index f14dfa25d40..94ff3c4a727 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
@@ -141,61 +141,6 @@ do_copy_texsubimage(struct gl_context *ctx,
}
void
-radeonCopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level,
- GLenum internalFormat,
- GLint x, GLint y, GLsizei width, GLsizei height,
- GLint border)
-{
- struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
- struct gl_texture_object *texObj =
- _mesa_select_tex_object(ctx, texUnit, target);
- struct gl_texture_image *texImage =
- _mesa_select_tex_image(ctx, texObj, target, level);
- int srcx, srcy, dstx, dsty;
-
- radeonContextPtr radeon = RADEON_CONTEXT(ctx);
- radeon_prepare_render(radeon);
-
- if (border)
- goto fail;
-
- /* Setup or redefine the texture object, mipmap tree and texture
- * image. Don't populate yet.
- */
- ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
- width, height, border,
- GL_RGBA, GL_UNSIGNED_BYTE, NULL,
- &ctx->DefaultPacking, texObj, texImage);
-
- srcx = x;
- srcy = y;
- dstx = 0;
- dsty = 0;
- if (!_mesa_clip_copytexsubimage(ctx,
- &dstx, &dsty,
- &srcx, &srcy,
- &width, &height)) {
- return;
- }
-
- if (!do_copy_texsubimage(ctx, target, level,
- radeon_tex_obj(texObj), (radeon_texture_image *)texImage,
- 0, 0, x, y, width, height)) {
- goto fail;
- }
-
- return;
-
-fail:
- radeon_print(RADEON_FALLBACKS, RADEON_NORMAL,
- "Falling back to sw for glCopyTexImage2D (internalFormat %s, border %d)\n",
- _mesa_lookup_enum_by_nr(internalFormat), border);
-
- _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y,
- width, height, border);
-}
-
-void
radeonCopyTexSubImage2D(struct gl_context *ctx, GLenum target, GLint level,
GLint xoffset, GLint yoffset,
GLint x, GLint y,
diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c
index 9ba98e303a7..430309392a0 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texstate.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c
@@ -648,18 +648,12 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_form
struct radeon_renderbuffer *rb;
radeon_texture_image *rImage;
radeonContextPtr radeon;
- r100ContextPtr rmesa;
struct radeon_framebuffer *rfb;
radeonTexObjPtr t;
uint32_t pitch_val;
- uint32_t internalFormat, format;
gl_format texFormat;
- format = GL_UNSIGNED_BYTE;
- internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? GL_RGB : GL_RGBA);
-
radeon = pDRICtx->driverPrivate;
- rmesa = pDRICtx->driverPrivate;
rfb = dPriv->driverPrivate;
texUnit = _mesa_get_current_tex_unit(radeon->glCtx);
@@ -1018,7 +1012,7 @@ static GLboolean radeon_validate_texgen( struct gl_context *ctx, GLuint unit )
static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int unit)
{
const struct gl_texture_image *firstImage;
- GLint log2Width, log2Height, log2Depth, texelBytes;
+ GLint log2Width, log2Height, texelBytes;
if ( t->bo ) {
return GL_TRUE;
@@ -1033,7 +1027,6 @@ static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int
log2Width = firstImage->WidthLog2;
log2Height = firstImage->HeightLog2;
- log2Depth = firstImage->DepthLog2;
texelBytes = _mesa_get_format_bytes(firstImage->TexFormat);
if (!t->image_override) {
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c
index ce0df32bfe4..ad7e4c146a4 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texture.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.c
@@ -787,18 +787,6 @@ static void radeon_teximage(
radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
"%s %dd: texObj %p, texImage %p, face %d, level %d\n",
__func__, dims, texObj, texImage, face, level);
- {
- struct radeon_bo *bo;
- bo = !image->mt ? image->bo : image->mt->bo;
- if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) {
- radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
- "%s Calling teximage for texture that is "
- "queued for GPU processing.\n",
- __func__);
- radeon_firevertices(rmesa);
- }
- }
-
t->validated = GL_FALSE;
@@ -820,6 +808,18 @@ static void radeon_teximage(
}
}
+ {
+ struct radeon_bo *bo;
+ bo = !image->mt ? image->bo : image->mt->bo;
+ if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) {
+ radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
+ "%s Calling teximage for texture that is "
+ "queued for GPU processing.\n",
+ __func__);
+ radeon_firevertices(rmesa);
+ }
+ }
+
/* Upload texture image; note that the spec allows pixels to be NULL */
if (compressed) {
pixels = _mesa_validate_pbo_compressed_teximage(
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.h b/src/mesa/drivers/dri/radeon/radeon_texture.h
index 538a07fbba8..6fc06d967dd 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texture.h
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.h
@@ -126,11 +126,6 @@ void radeonGetCompressedTexImage(struct gl_context *ctx, GLenum target, GLint le
struct gl_texture_object *texObj,
struct gl_texture_image *texImage);
-void radeonCopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level,
- GLenum internalFormat,
- GLint x, GLint y, GLsizei width, GLsizei height,
- GLint border);
-
void radeonCopyTexSubImage2D(struct gl_context *ctx, GLenum target, GLint level,
GLint xoffset, GLint yoffset,
GLint x, GLint y,
diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c
index 81f48f9d95a..81d000b3952 100644
--- a/src/mesa/drivers/x11/xm_dd.c
+++ b/src/mesa/drivers/x11/xm_dd.c
@@ -454,10 +454,10 @@ xmesa_DrawPixels_8R8G8B( struct gl_context *ctx,
"glDrawPixels(invalid PBO access)");
return;
}
- buf = (GLubyte *) ctx->Driver.MapBuffer(ctx,
- GL_PIXEL_UNPACK_BUFFER_EXT,
- GL_READ_ONLY_ARB,
- unpack->BufferObj);
+ buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0,
+ unpack->BufferObj->Size,
+ GL_MAP_READ_BIT,
+ unpack->BufferObj);
if (!buf) {
/* buffer is already mapped - that's an error */
_mesa_error(ctx, GL_INVALID_OPERATION,
@@ -508,8 +508,7 @@ xmesa_DrawPixels_8R8G8B( struct gl_context *ctx,
}
if (_mesa_is_bufferobj(unpack->BufferObj)) {
- ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
- unpack->BufferObj);
+ ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj);
}
}
else {
@@ -589,10 +588,10 @@ xmesa_DrawPixels_5R6G5B( struct gl_context *ctx,
"glDrawPixels(invalid PBO access)");
return;
}
- buf = (GLubyte *) ctx->Driver.MapBuffer(ctx,
- GL_PIXEL_UNPACK_BUFFER_EXT,
- GL_READ_ONLY_ARB,
- unpack->BufferObj);
+ buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0,
+ unpack->BufferObj->Size,
+ GL_MAP_READ_BIT,
+ unpack->BufferObj);
if (!buf) {
/* buffer is already mapped - that's an error */
_mesa_error(ctx, GL_INVALID_OPERATION,
@@ -642,8 +641,7 @@ xmesa_DrawPixels_5R6G5B( struct gl_context *ctx,
}
if (unpack->BufferObj->Name) {
- ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
- unpack->BufferObj);
+ ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj);
}
}
else {
diff --git a/src/mesa/main/.gitignore b/src/mesa/main/.gitignore
index 2575f44df4a..d0744e3f0d7 100644
--- a/src/mesa/main/.gitignore
+++ b/src/mesa/main/.gitignore
@@ -4,3 +4,7 @@ get_es1.c
get_es2.c
git_sha1.h
git_sha1.h.tmp
+api_exec_es1_dispatch.h
+api_exec_es1_remap_helper.h
+api_exec_es2_dispatch.h
+api_exec_es2_remap_helper.h
diff --git a/src/mesa/main/api_arrayelt.c b/src/mesa/main/api_arrayelt.c
index f88da845853..b93a057e68b 100644
--- a/src/mesa/main/api_arrayelt.c
+++ b/src/mesa/main/api_arrayelt.c
@@ -1602,10 +1602,10 @@ void _ae_map_vbos( struct gl_context *ctx )
_ae_update_state(ctx);
for (i = 0; i < actx->nr_vbos; i++)
- ctx->Driver.MapBuffer(ctx,
- GL_ARRAY_BUFFER_ARB,
- GL_DYNAMIC_DRAW_ARB,
- actx->vbo[i]);
+ ctx->Driver.MapBufferRange(ctx, 0,
+ actx->vbo[i]->Size,
+ GL_MAP_READ_BIT,
+ actx->vbo[i]);
if (actx->nr_vbos)
actx->mapped_vbos = GL_TRUE;
@@ -1622,9 +1622,7 @@ void _ae_unmap_vbos( struct gl_context *ctx )
assert (!actx->NewState);
for (i = 0; i < actx->nr_vbos; i++)
- ctx->Driver.UnmapBuffer(ctx,
- GL_ARRAY_BUFFER_ARB,
- actx->vbo[i]);
+ ctx->Driver.UnmapBuffer(ctx, actx->vbo[i]);
actx->mapped_vbos = GL_FALSE;
}
diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
index 2981d42297a..699b414f502 100644
--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@@ -65,8 +65,8 @@ _mesa_max_buffer_index(struct gl_context *ctx, GLuint count, GLenum type,
if (_mesa_is_bufferobj(elementBuf)) {
/* elements are in a user-defined buffer object. need to map it */
- map = ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER,
- GL_READ_ONLY, elementBuf);
+ map = ctx->Driver.MapBufferRange(ctx, 0, elementBuf->Size,
+ GL_MAP_READ_BIT, elementBuf);
/* Actual address is the sum of pointers */
indices = (const GLvoid *) ADD_POINTERS(map, (const GLubyte *) indices);
}
@@ -89,7 +89,7 @@ _mesa_max_buffer_index(struct gl_context *ctx, GLuint count, GLenum type,
}
if (map) {
- ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, elementBuf);
+ ctx->Driver.UnmapBuffer(ctx, elementBuf);
}
return max;
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index c52358ecb04..c453f9c8554 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -386,11 +386,11 @@ _mesa_buffer_data( struct gl_context *ctx, GLenum target, GLsizeiptrARB size,
* \sa glBufferSubDataARB, dd_function_table::BufferSubData.
*/
static void
-_mesa_buffer_subdata( struct gl_context *ctx, GLenum target, GLintptrARB offset,
+_mesa_buffer_subdata( struct gl_context *ctx, GLintptrARB offset,
GLsizeiptrARB size, const GLvoid * data,
struct gl_buffer_object * bufObj )
{
- (void) ctx; (void) target;
+ (void) ctx;
/* this should have been caught in _mesa_BufferSubData() */
ASSERT(size + offset <= bufObj->Size);
@@ -419,12 +419,11 @@ _mesa_buffer_subdata( struct gl_context *ctx, GLenum target, GLintptrARB offset,
* \sa glBufferGetSubDataARB, dd_function_table::GetBufferSubData.
*/
static void
-_mesa_buffer_get_subdata( struct gl_context *ctx,
- GLenum target, GLintptrARB offset,
+_mesa_buffer_get_subdata( struct gl_context *ctx, GLintptrARB offset,
GLsizeiptrARB size, GLvoid * data,
struct gl_buffer_object * bufObj )
{
- (void) ctx; (void) target;
+ (void) ctx;
if (bufObj->Data && ((GLsizeiptrARB) (size + offset) <= bufObj->Size)) {
memcpy( data, (GLubyte *) bufObj->Data + offset, size );
@@ -433,49 +432,15 @@ _mesa_buffer_get_subdata( struct gl_context *ctx,
/**
- * Default callback for \c dd_function_tabel::MapBuffer().
- *
- * The function parameters will have been already tested for errors.
- *
- * \param ctx GL context.
- * \param target Buffer object target on which to operate.
- * \param access Information about how the buffer will be accessed.
- * \param bufObj Object to be mapped.
- * \return A pointer to the object's internal data store that can be accessed
- * by the processor
- *
- * \sa glMapBufferARB, dd_function_table::MapBuffer
- */
-static void *
-_mesa_buffer_map( struct gl_context *ctx, GLenum target, GLenum access,
- struct gl_buffer_object *bufObj )
-{
- (void) ctx;
- (void) target;
- (void) access;
- /* Just return a direct pointer to the data */
- if (_mesa_bufferobj_mapped(bufObj)) {
- /* already mapped! */
- return NULL;
- }
- bufObj->Pointer = bufObj->Data;
- bufObj->Length = bufObj->Size;
- bufObj->Offset = 0;
- return bufObj->Pointer;
-}
-
-
-/**
* Default fallback for \c dd_function_table::MapBufferRange().
* Called via glMapBufferRange().
*/
static void *
-_mesa_buffer_map_range( struct gl_context *ctx, GLenum target, GLintptr offset,
+_mesa_buffer_map_range( struct gl_context *ctx, GLintptr offset,
GLsizeiptr length, GLbitfield access,
struct gl_buffer_object *bufObj )
{
(void) ctx;
- (void) target;
assert(!_mesa_bufferobj_mapped(bufObj));
/* Just return a direct pointer to the data */
bufObj->Pointer = bufObj->Data + offset;
@@ -491,12 +456,11 @@ _mesa_buffer_map_range( struct gl_context *ctx, GLenum target, GLintptr offset,
* Called via glFlushMappedBufferRange().
*/
static void
-_mesa_buffer_flush_mapped_range( struct gl_context *ctx, GLenum target,
+_mesa_buffer_flush_mapped_range( struct gl_context *ctx,
GLintptr offset, GLsizeiptr length,
struct gl_buffer_object *obj )
{
(void) ctx;
- (void) target;
(void) offset;
(void) length;
(void) obj;
@@ -512,11 +476,9 @@ _mesa_buffer_flush_mapped_range( struct gl_context *ctx, GLenum target,
* \sa glUnmapBufferARB, dd_function_table::UnmapBuffer
*/
static GLboolean
-_mesa_buffer_unmap( struct gl_context *ctx, GLenum target,
- struct gl_buffer_object *bufObj )
+_mesa_buffer_unmap( struct gl_context *ctx, struct gl_buffer_object *bufObj )
{
(void) ctx;
- (void) target;
/* XXX we might assert here that bufObj->Pointer is non-null */
bufObj->Pointer = NULL;
bufObj->Length = 0;
@@ -543,16 +505,16 @@ _mesa_copy_buffer_subdata(struct gl_context *ctx,
assert(!_mesa_bufferobj_mapped(src));
assert(!_mesa_bufferobj_mapped(dst));
- srcPtr = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_COPY_READ_BUFFER,
- GL_READ_ONLY, src);
- dstPtr = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_COPY_WRITE_BUFFER,
- GL_WRITE_ONLY, dst);
+ srcPtr = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, src->Size,
+ GL_MAP_READ_BIT, src);
+ dstPtr = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, dst->Size,
+ GL_MAP_WRITE_BIT, dst);
if (srcPtr && dstPtr)
memcpy(dstPtr + writeOffset, srcPtr + readOffset, size);
- ctx->Driver.UnmapBuffer(ctx, GL_COPY_READ_BUFFER, src);
- ctx->Driver.UnmapBuffer(ctx, GL_COPY_WRITE_BUFFER, dst);
+ ctx->Driver.UnmapBuffer(ctx, src);
+ ctx->Driver.UnmapBuffer(ctx, dst);
}
@@ -712,7 +674,6 @@ _mesa_init_buffer_object_functions(struct dd_function_table *driver)
driver->BufferData = _mesa_buffer_data;
driver->BufferSubData = _mesa_buffer_subdata;
driver->GetBufferSubData = _mesa_buffer_get_subdata;
- driver->MapBuffer = _mesa_buffer_map;
driver->UnmapBuffer = _mesa_buffer_unmap;
/* GL_ARB_map_buffer_range */
@@ -774,7 +735,7 @@ _mesa_DeleteBuffersARB(GLsizei n, const GLuint *ids)
if (_mesa_bufferobj_mapped(bufObj)) {
/* if mapped, unmap it now */
- ctx->Driver.UnmapBuffer(ctx, 0, bufObj);
+ ctx->Driver.UnmapBuffer(ctx, bufObj);
bufObj->AccessFlags = DEFAULT_ACCESS;
bufObj->Pointer = NULL;
}
@@ -934,7 +895,7 @@ _mesa_BufferDataARB(GLenum target, GLsizeiptrARB size,
if (_mesa_bufferobj_mapped(bufObj)) {
/* Unmap the existing buffer. We'll replace it now. Not an error. */
- ctx->Driver.UnmapBuffer(ctx, target, bufObj);
+ ctx->Driver.UnmapBuffer(ctx, bufObj);
bufObj->AccessFlags = DEFAULT_ACCESS;
ASSERT(bufObj->Pointer == NULL);
}
@@ -980,7 +941,7 @@ _mesa_BufferSubDataARB(GLenum target, GLintptrARB offset,
bufObj->Written = GL_TRUE;
ASSERT(ctx->Driver.BufferSubData);
- ctx->Driver.BufferSubData( ctx, target, offset, size, data, bufObj );
+ ctx->Driver.BufferSubData( ctx, offset, size, data, bufObj );
}
@@ -1000,7 +961,7 @@ _mesa_GetBufferSubDataARB(GLenum target, GLintptrARB offset,
}
ASSERT(ctx->Driver.GetBufferSubData);
- ctx->Driver.GetBufferSubData( ctx, target, offset, size, data, bufObj );
+ ctx->Driver.GetBufferSubData( ctx, offset, size, data, bufObj );
}
@@ -1043,8 +1004,8 @@ _mesa_MapBufferARB(GLenum target, GLenum access)
return NULL;
}
- ASSERT(ctx->Driver.MapBuffer);
- map = ctx->Driver.MapBuffer( ctx, target, access, bufObj );
+ ASSERT(ctx->Driver.MapBufferRange);
+ map = ctx->Driver.MapBufferRange(ctx, 0, bufObj->Size, accessFlags, bufObj);
if (!map) {
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glMapBufferARB(map failed)");
return NULL;
@@ -1147,7 +1108,7 @@ _mesa_UnmapBufferARB(GLenum target)
}
#endif
- status = ctx->Driver.UnmapBuffer( ctx, target, bufObj );
+ status = ctx->Driver.UnmapBuffer( ctx, bufObj );
bufObj->AccessFlags = DEFAULT_ACCESS;
ASSERT(bufObj->Pointer == NULL);
ASSERT(bufObj->Offset == 0);
@@ -1451,8 +1412,7 @@ _mesa_MapBufferRange(GLenum target, GLintptr offset, GLsizeiptr length,
}
ASSERT(ctx->Driver.MapBufferRange);
- map = ctx->Driver.MapBufferRange(ctx, target, offset, length,
- access, bufObj);
+ map = ctx->Driver.MapBufferRange(ctx, offset, length, access, bufObj);
if (!map) {
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glMapBufferARB(map failed)");
}
@@ -1535,7 +1495,7 @@ _mesa_FlushMappedBufferRange(GLenum target, GLintptr offset, GLsizeiptr length)
ASSERT(bufObj->AccessFlags & GL_MAP_WRITE_BIT);
if (ctx->Driver.FlushMappedBufferRange)
- ctx->Driver.FlushMappedBufferRange(ctx, target, offset, length, bufObj);
+ ctx->Driver.FlushMappedBufferRange(ctx, offset, length, bufObj);
}
diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h
index 743841be4ef..8ed1c6fa61f 100644
--- a/src/mesa/main/compiler.h
+++ b/src/mesa/main/compiler.h
@@ -45,9 +45,6 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
-#if defined(__linux__) && defined(__i386__)
-#include <fpu_control.h>
-#endif
#include <float.h>
#include <stdarg.h>
@@ -60,29 +57,7 @@ extern "C" {
/**
* Get standard integer types
*/
-#if defined(_MSC_VER)
- typedef __int8 int8_t;
- typedef unsigned __int8 uint8_t;
- typedef __int16 int16_t;
- typedef unsigned __int16 uint16_t;
- typedef __int32 int32_t;
- typedef unsigned __int32 uint32_t;
- typedef __int64 int64_t;
- typedef unsigned __int64 uint64_t;
-
-# if defined(_WIN64)
- typedef __int64 intptr_t;
- typedef unsigned __int64 uintptr_t;
-# else
- typedef __int32 intptr_t;
- typedef unsigned __int32 uintptr_t;
-# endif
-
-# define INT64_C(__val) __val##i64
-# define UINT64_C(__val) __val##ui64
-#else
-# include <stdint.h>
-#endif
+#include <stdint.h>
/**
@@ -139,26 +114,28 @@ extern "C" {
/**
* Function inlining
*/
-#if defined(__GNUC__)
-# define INLINE __inline__
-#elif defined(__MSC__)
-# define INLINE __inline
-#elif defined(_MSC_VER)
-# define INLINE __inline
-#elif defined(__ICL)
-# define INLINE __inline
-#elif defined(__INTEL_COMPILER)
-# define INLINE inline
-#elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100)
-# define INLINE __inline
-#elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
-# define INLINE inline
-# define __inline inline
-# define __inline__ inline
-#elif (__STDC_VERSION__ >= 199901L) /* C99 */
-# define INLINE inline
-#else
-# define INLINE
+#ifndef INLINE
+# if defined(__GNUC__)
+# define INLINE __inline__
+# elif defined(__MSC__)
+# define INLINE __inline
+# elif defined(_MSC_VER)
+# define INLINE __inline
+# elif defined(__ICL)
+# define INLINE __inline
+# elif defined(__INTEL_COMPILER)
+# define INLINE inline
+# elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100)
+# define INLINE __inline
+# elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
+# define INLINE inline
+# define __inline inline
+# define __inline__ inline
+# elif (__STDC_VERSION__ >= 199901L) /* C99 */
+# define INLINE inline
+# else
+# define INLINE
+# endif
#endif
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 9fe6d527f92..fcf40ecf102 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -189,31 +189,22 @@ struct dd_function_table {
/*@{*/
/**
- * Choose texture format.
- *
- * This is called by the \c _mesa_store_tex[sub]image[123]d() fallback
- * functions. The driver should examine \p internalFormat and return a
- * gl_format value.
+ * Choose actual hardware texture format given the user-provided source
+ * image format and type and the desired internal format. In some
+ * cases, srcFormat and srcType can be GL_NONE.
+ * Called by glTexImage(), etc.
*/
- GLuint (*ChooseTextureFormat)( struct gl_context *ctx, GLint internalFormat,
+ gl_format (*ChooseTextureFormat)( struct gl_context *ctx, GLint internalFormat,
GLenum srcFormat, GLenum srcType );
/**
- * Called by glTexImage1D().
- *
- * \param target user specified.
- * \param format user specified.
- * \param type user specified.
- * \param pixels user specified.
- * \param packing indicates the image packing of pixels.
+ * Called by glTexImage1D(). Simply copy the source texture data into the
+ * destination texture memory. The gl_texture_image fields, etc. will be
+ * fully initialized.
+ * The parameters are the same as glTexImage1D(), plus:
+ * \param packing describes how to unpack the source data.
* \param texObj is the target texture object.
- * \param texImage is the target texture image. It will have the texture \p
- * width, \p height, \p depth, \p border and \p internalFormat information.
- *
- * \p retainInternalCopy is returned by this function and indicates whether
- * core Mesa should keep an internal copy of the texture image.
- *
- * Drivers should call a fallback routine from texstore.c if needed.
+ * \param texImage is the target texture image.
*/
void (*TexImage1D)( struct gl_context *ctx, GLenum target, GLint level,
GLint internalFormat,
@@ -250,25 +241,9 @@ struct dd_function_table {
struct gl_texture_image *texImage );
/**
- * Called by glTexSubImage1D().
- *
- * \param target user specified.
- * \param level user specified.
- * \param xoffset user specified.
- * \param yoffset user specified.
- * \param zoffset user specified.
- * \param width user specified.
- * \param height user specified.
- * \param depth user specified.
- * \param format user specified.
- * \param type user specified.
- * \param pixels user specified.
- * \param packing indicates the image packing of pixels.
- * \param texObj is the target texture object.
- * \param texImage is the target texture image. It will have the texture \p
- * width, \p height, \p border and \p internalFormat information.
- *
- * The driver should use a fallback routine from texstore.c if needed.
+ * Called by glTexSubImage1D(). Replace a subset of the target texture
+ * with new texel data.
+ * \sa dd_function_table::TexImage1D.
*/
void (*TexSubImage1D)( struct gl_context *ctx, GLenum target, GLint level,
GLint xoffset, GLsizei width,
@@ -315,24 +290,6 @@ struct dd_function_table {
struct gl_texture_image *texImage );
/**
- * Called by glCopyTexImage1D().
- *
- * Drivers should use a fallback routine from texstore.c if needed.
- */
- void (*CopyTexImage1D)( struct gl_context *ctx, GLenum target, GLint level,
- GLenum internalFormat, GLint x, GLint y,
- GLsizei width, GLint border );
-
- /**
- * Called by glCopyTexImage2D().
- *
- * Drivers should use a fallback routine from texstore.c if needed.
- */
- void (*CopyTexImage2D)( struct gl_context *ctx, GLenum target, GLint level,
- GLenum internalFormat, GLint x, GLint y,
- GLsizei width, GLsizei height, GLint border );
-
- /**
* Called by glCopyTexSubImage1D().
*
* Drivers should use a fallback routine from texstore.c if needed.
@@ -741,17 +698,14 @@ struct dd_function_table {
const GLvoid *data, GLenum usage,
struct gl_buffer_object *obj );
- void (*BufferSubData)( struct gl_context *ctx, GLenum target, GLintptrARB offset,
+ void (*BufferSubData)( struct gl_context *ctx, GLintptrARB offset,
GLsizeiptrARB size, const GLvoid *data,
struct gl_buffer_object *obj );
- void (*GetBufferSubData)( struct gl_context *ctx, GLenum target,
+ void (*GetBufferSubData)( struct gl_context *ctx,
GLintptrARB offset, GLsizeiptrARB size,
GLvoid *data, struct gl_buffer_object *obj );
- void * (*MapBuffer)( struct gl_context *ctx, GLenum target, GLenum access,
- struct gl_buffer_object *obj );
-
void (*CopyBufferSubData)( struct gl_context *ctx,
struct gl_buffer_object *src,
struct gl_buffer_object *dst,
@@ -760,15 +714,15 @@ struct dd_function_table {
/* May return NULL if MESA_MAP_NOWAIT_BIT is set in access:
*/
- void * (*MapBufferRange)( struct gl_context *ctx, GLenum target, GLintptr offset,
+ void * (*MapBufferRange)( struct gl_context *ctx, GLintptr offset,
GLsizeiptr length, GLbitfield access,
struct gl_buffer_object *obj);
- void (*FlushMappedBufferRange)(struct gl_context *ctx, GLenum target,
+ void (*FlushMappedBufferRange)(struct gl_context *ctx,
GLintptr offset, GLsizeiptr length,
struct gl_buffer_object *obj);
- GLboolean (*UnmapBuffer)( struct gl_context *ctx, GLenum target,
+ GLboolean (*UnmapBuffer)( struct gl_context *ctx,
struct gl_buffer_object *obj );
/*@}*/
diff --git a/src/mesa/main/debug.c b/src/mesa/main/debug.c
index e7f6be99481..b1fc096f296 100644
--- a/src/mesa/main/debug.c
+++ b/src/mesa/main/debug.c
@@ -192,17 +192,6 @@ static void add_debug_flags( const char *debug )
if (strstr(debug, "flush"))
MESA_DEBUG_FLAGS |= DEBUG_ALWAYS_FLUSH;
-#if defined(_FPU_GETCW) && defined(_FPU_SETCW)
- if (strstr(debug, "fpexceptions")) {
- /* raise FP exceptions */
- fpu_control_t mask;
- _FPU_GETCW(mask);
- mask &= ~(_FPU_MASK_IM | _FPU_MASK_DM | _FPU_MASK_ZM
- | _FPU_MASK_OM | _FPU_MASK_UM);
- _FPU_SETCW(mask);
- }
-#endif
-
#else
(void) debug;
#endif
diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index f9282398c21..6e075b4e54b 100644
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -894,8 +894,8 @@ unpack_image(struct gl_context *ctx, GLuint dimensions,
GLvoid *image;
map = (GLubyte *)
- ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
- GL_READ_ONLY_ARB, unpack->BufferObj);
+ ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size,
+ GL_MAP_READ_BIT, unpack->BufferObj);
if (!map) {
/* unable to map src buffer! */
_mesa_error(ctx, GL_INVALID_OPERATION, "unable to map PBO");
@@ -906,8 +906,7 @@ unpack_image(struct gl_context *ctx, GLuint dimensions,
image = _mesa_unpack_image(dimensions, width, height, depth,
format, type, src, unpack);
- ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
- unpack->BufferObj);
+ ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj);
if (!image) {
_mesa_error(ctx, GL_OUT_OF_MEMORY, "display list construction");
diff --git a/src/mesa/main/drawtex.c b/src/mesa/main/drawtex.c
index 2089cdfcef9..83485a928d8 100644
--- a/src/mesa/main/drawtex.c
+++ b/src/mesa/main/drawtex.c
@@ -45,11 +45,15 @@ draw_texture(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
return;
}
+ _mesa_set_vp_override(ctx, GL_TRUE);
+
if (ctx->NewState)
_mesa_update_state(ctx);
ASSERT(ctx->Driver.DrawTex);
ctx->Driver.DrawTex(ctx, x, y, z, width, height);
+
+ _mesa_set_vp_override(ctx, GL_FALSE);
}
diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c
index aac8b9c5eaf..3ba4df6342f 100644
--- a/src/mesa/main/enable.c
+++ b/src/mesa/main/enable.c
@@ -5,7 +5,6 @@
/*
* Mesa 3-D graphics library
- * Version: 7.0.3
*
* Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
*
@@ -560,7 +559,6 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state)
ctx->Polygon.OffsetLine = state;
break;
case GL_POLYGON_OFFSET_FILL:
- /*case GL_POLYGON_OFFSET_EXT:*/
if (ctx->Polygon.OffsetFill == state)
return;
FLUSH_VERTICES(ctx, _NEW_POLYGON);
@@ -643,9 +641,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state)
break;
#endif
- /*
- * CLIENT STATE!!!
- */
+ /* client-side state */
case GL_VERTEX_ARRAY:
case GL_NORMAL_ARRAY:
case GL_COLOR_ARRAY:
@@ -1174,7 +1170,6 @@ _mesa_IsEnabled( GLenum cap )
case GL_POLYGON_OFFSET_LINE:
return ctx->Polygon.OffsetLine;
case GL_POLYGON_OFFSET_FILL:
- /*case GL_POLYGON_OFFSET_EXT:*/
return ctx->Polygon.OffsetFill;
case GL_RESCALE_NORMAL_EXT:
return ctx->Transform.RescaleNormals;
@@ -1213,9 +1208,7 @@ _mesa_IsEnabled( GLenum cap )
}
#endif
- /*
- * CLIENT STATE!!!
- */
+ /* client-side state */
case GL_VERTEX_ARRAY:
return (ctx->Array.ArrayObj->Vertex.Enabled != 0);
case GL_NORMAL_ARRAY:
diff --git a/src/mesa/main/es_generator.py b/src/mesa/main/es_generator.py
index c0b0a445806..cad3deaef94 100644
--- a/src/mesa/main/es_generator.py
+++ b/src/mesa/main/es_generator.py
@@ -681,10 +681,10 @@ print """
#if FEATURE_remap_table
/* define esLocalRemapTable */
-#include "%sapi/main/dispatch.h"
+#include "main/api_exec_%s_dispatch.h"
#define need_MESA_remap_table
-#include "%sapi/main/remap_helper.h"
+#include "main/api_exec_%s_remap_helper.h"
static void
init_remap_table(void)
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index bc61c50a90f..14b0cf9acbd 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -81,6 +81,7 @@ static const struct extension extension_table[] = {
{ "GL_ARB_blend_func_extended", o(ARB_blend_func_extended), GL, 2009 },
{ "GL_ARB_color_buffer_float", o(ARB_color_buffer_float), GL, 2004 },
{ "GL_ARB_copy_buffer", o(ARB_copy_buffer), GL, 2008 },
+ { "GL_ARB_conservative_depth", o(AMD_conservative_depth), GL, 2011 },
{ "GL_ARB_depth_buffer_float", o(ARB_depth_buffer_float), GL, 2008 },
{ "GL_ARB_depth_clamp", o(ARB_depth_clamp), GL, 2003 },
{ "GL_ARB_depth_texture", o(ARB_depth_texture), GL, 2001 },
diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 84969360d92..0b48fc7eab0 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -1984,10 +1984,26 @@ _mesa_FramebufferTexture1DEXT(GLenum target, GLenum attachment,
{
GET_CURRENT_CONTEXT(ctx);
- if ((texture != 0) && (textarget != GL_TEXTURE_1D)) {
- _mesa_error(ctx, GL_INVALID_ENUM,
- "glFramebufferTexture1DEXT(textarget)");
- return;
+ if (texture != 0) {
+ GLboolean error;
+
+ switch (textarget) {
+ case GL_TEXTURE_1D:
+ error = GL_FALSE;
+ break;
+ case GL_TEXTURE_1D_ARRAY:
+ error = !ctx->Extensions.EXT_texture_array;
+ break;
+ default:
+ error = GL_TRUE;
+ }
+
+ if (error) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "glFramebufferTexture1DEXT(textarget=%s)",
+ _mesa_lookup_enum_by_nr(textarget));
+ return;
+ }
}
framebuffer_texture(ctx, "1D", target, attachment, textarget, texture,
@@ -2001,13 +2017,37 @@ _mesa_FramebufferTexture2DEXT(GLenum target, GLenum attachment,
{
GET_CURRENT_CONTEXT(ctx);
- if ((texture != 0) &&
- (textarget != GL_TEXTURE_2D) &&
- (textarget != GL_TEXTURE_RECTANGLE_ARB) &&
- (!is_cube_face(textarget))) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glFramebufferTexture2DEXT(textarget=0x%x)", textarget);
- return;
+ if (texture != 0) {
+ GLboolean error;
+
+ switch (textarget) {
+ case GL_TEXTURE_2D:
+ error = GL_FALSE;
+ break;
+ case GL_TEXTURE_RECTANGLE:
+ error = !ctx->Extensions.NV_texture_rectangle;
+ break;
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+ error = !ctx->Extensions.ARB_texture_cube_map;
+ break;
+ case GL_TEXTURE_2D_ARRAY:
+ error = !ctx->Extensions.EXT_texture_array;
+ break;
+ default:
+ error = GL_FALSE;
+ }
+
+ if (error) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "glFramebufferTexture2DEXT(textarget=%s)",
+ _mesa_lookup_enum_by_nr(textarget));
+ return;
+ }
}
framebuffer_texture(ctx, "2D", target, attachment, textarget, texture,
@@ -2023,7 +2063,7 @@ _mesa_FramebufferTexture3DEXT(GLenum target, GLenum attachment,
GET_CURRENT_CONTEXT(ctx);
if ((texture != 0) && (textarget != GL_TEXTURE_3D)) {
- _mesa_error(ctx, GL_INVALID_ENUM,
+ _mesa_error(ctx, GL_INVALID_OPERATION,
"glFramebufferTexture3DEXT(textarget)");
return;
}
@@ -2134,10 +2174,14 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment,
{
const struct gl_renderbuffer_attachment *att;
struct gl_framebuffer *buffer;
+ GLenum err;
GET_CURRENT_CONTEXT(ctx);
ASSERT_OUTSIDE_BEGIN_END(ctx);
+ /* The error differs in GL andd GLES. */
+ err = ctx->API == API_OPENGL ? GL_INVALID_OPERATION : GL_INVALID_ENUM;
+
buffer = get_framebuffer_target(ctx, target);
if (!buffer) {
_mesa_error(ctx, GL_INVALID_ENUM,
@@ -2188,7 +2232,12 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment,
}
else {
assert(att->Type == GL_NONE);
- *params = 0;
+ if (ctx->API == API_OPENGL) {
+ *params = 0;
+ } else {
+ _mesa_error(ctx, GL_INVALID_ENUM,
+ "glGetFramebufferAttachmentParameterivEXT(pname)");
+ }
}
return;
case GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL_EXT:
@@ -2196,7 +2245,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment,
*params = att->TextureLevel;
}
else if (att->Type == GL_NONE) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
+ _mesa_error(ctx, err,
"glGetFramebufferAttachmentParameterivEXT(pname)");
}
else {
@@ -2214,7 +2263,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment,
}
}
else if (att->Type == GL_NONE) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
+ _mesa_error(ctx, err,
"glGetFramebufferAttachmentParameterivEXT(pname)");
}
else {
@@ -2232,7 +2281,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment,
}
}
else if (att->Type == GL_NONE) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
+ _mesa_error(ctx, err,
"glGetFramebufferAttachmentParameterivEXT(pname)");
}
else {
@@ -2246,7 +2295,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment,
"glGetFramebufferAttachmentParameterivEXT(pname)");
}
else if (att->Type == GL_NONE) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
+ _mesa_error(ctx, err,
"glGetFramebufferAttachmentParameterivEXT(pname)");
}
else {
@@ -2267,7 +2316,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment,
return;
}
else if (att->Type == GL_NONE) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
+ _mesa_error(ctx, err,
"glGetFramebufferAttachmentParameterivEXT(pname)");
}
else {
@@ -2301,7 +2350,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment,
"glGetFramebufferAttachmentParameterivEXT(pname)");
}
else if (att->Type == GL_NONE) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
+ _mesa_error(ctx, err,
"glGetFramebufferAttachmentParameterivEXT(pname)");
}
else if (att->Texture) {
@@ -2337,6 +2386,8 @@ void GLAPIENTRY
_mesa_GenerateMipmapEXT(GLenum target)
{
struct gl_texture_object *texObj;
+ GLboolean error;
+
GET_CURRENT_CONTEXT(ctx);
ASSERT_OUTSIDE_BEGIN_END(ctx);
@@ -2346,12 +2397,22 @@ _mesa_GenerateMipmapEXT(GLenum target)
case GL_TEXTURE_1D:
case GL_TEXTURE_2D:
case GL_TEXTURE_3D:
+ error = GL_FALSE;
+ break;
case GL_TEXTURE_CUBE_MAP:
- /* OK, legal value */
+ error = !ctx->Extensions.ARB_texture_cube_map;
+ break;
+ case GL_TEXTURE_1D_ARRAY:
+ case GL_TEXTURE_2D_ARRAY:
+ error = !ctx->Extensions.EXT_texture_array;
break;
default:
- /* XXX need to implement GL_TEXTURE_1D_ARRAY and GL_TEXTURE_2D_ARRAY */
- _mesa_error(ctx, GL_INVALID_ENUM, "glGenerateMipmapEXT(target)");
+ error = GL_TRUE;
+ }
+
+ if (error) {
+ _mesa_error(ctx, GL_INVALID_ENUM, "glGenerateMipmapEXT(target=%s)",
+ _mesa_lookup_enum_by_nr(target));
return;
}
diff --git a/src/mesa/main/ff_fragment_shader.cpp b/src/mesa/main/ff_fragment_shader.cpp
index 0b53c28f7ae..7cc17216884 100644
--- a/src/mesa/main/ff_fragment_shader.cpp
+++ b/src/mesa/main/ff_fragment_shader.cpp
@@ -330,8 +330,7 @@ static GLbitfield get_fp_input_mask( struct gl_context *ctx )
/* _NEW_RENDERMODE */
fp_inputs = (FRAG_BIT_COL0 | FRAG_BIT_TEX0);
}
- else if (!(vertexProgram || vertexShader) ||
- !ctx->VertexProgram._Current) {
+ else if (!(vertexProgram || vertexShader)) {
/* Fixed function vertex logic */
/* _NEW_ARRAY */
GLbitfield varying_inputs = ctx->varying_vp_inputs;
@@ -875,7 +874,8 @@ static struct ureg register_const4f( struct texenv_fragment_program *p,
values[1] = s1;
values[2] = s2;
values[3] = s3;
- idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4,
+ idx = _mesa_add_unnamed_constant( p->program->Base.Parameters,
+ (gl_constant_value *) values, 4,
&swizzle );
r = make_ureg(PROGRAM_CONSTANT, idx);
r.swz = swizzle;
diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c
index b8e49a3757f..2d2485c9e06 100644
--- a/src/mesa/main/ffvertex_prog.c
+++ b/src/mesa/main/ffvertex_prog.c
@@ -455,13 +455,13 @@ static struct ureg register_const4f( struct tnl_program *p,
GLfloat s2,
GLfloat s3)
{
- GLfloat values[4];
+ gl_constant_value values[4];
GLint idx;
GLuint swizzle;
- values[0] = s0;
- values[1] = s1;
- values[2] = s2;
- values[3] = s3;
+ values[0].f = s0;
+ values[1].f = s1;
+ values[2].f = s2;
+ values[3].f = s3;
idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4,
&swizzle );
ASSERT(swizzle == SWIZZLE_NOOP);
diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c
index e27569a6fac..23fa1b2c11e 100644
--- a/src/mesa/main/framebuffer.c
+++ b/src/mesa/main/framebuffer.c
@@ -548,6 +548,7 @@ _mesa_update_framebuffer_visual(struct gl_context *ctx,
fb->Visual.rgbBits = fb->Visual.redBits
+ fb->Visual.greenBits + fb->Visual.blueBits;
fb->Visual.samples = rb->NumSamples;
+ fb->Visual.sampleBuffers = rb->NumSamples > 0 ? 1 : 0;
if (_mesa_get_format_color_encoding(fmt) == GL_SRGB)
fb->Visual.sRGBCapable = ctx->Const.sRGBCapable;
break;
diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index 0492e1585c3..d32c68a53a4 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -1569,11 +1569,11 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu
break;
case GL_NUM_COMPRESSED_TEXTURE_FORMATS_ARB:
- v->value_int = _mesa_get_compressed_formats(ctx, NULL, GL_FALSE);
+ v->value_int = _mesa_get_compressed_formats(ctx, NULL);
break;
case GL_COMPRESSED_TEXTURE_FORMATS_ARB:
v->value_int_n.n =
- _mesa_get_compressed_formats(ctx, v->value_int_n.ints, GL_FALSE);
+ _mesa_get_compressed_formats(ctx, v->value_int_n.ints);
ASSERT(v->value_int_n.n <= 100);
break;
diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c
index 0a572ec225d..8f097195922 100644
--- a/src/mesa/main/imports.c
+++ b/src/mesa/main/imports.c
@@ -753,7 +753,8 @@ _mesa_strdup( const char *s )
float
_mesa_strtof( const char *s, char **end )
{
-#if defined(_GNU_SOURCE) && !defined(__CYGWIN__) && !defined(__FreeBSD__)
+#if defined(_GNU_SOURCE) && !defined(__CYGWIN__) && !defined(__FreeBSD__) && \
+ !defined(ANDROID)
static locale_t loc = NULL;
if (!loc) {
loc = newlocale(LC_CTYPE_MASK, "C", NULL);
diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h
index 3fa1db02aee..70defdc4327 100644
--- a/src/mesa/main/imports.h
+++ b/src/mesa/main/imports.h
@@ -134,7 +134,13 @@ typedef union { GLfloat f; GLint i; } fi_type;
#define exp2f(f) ((float) exp2(f))
#define floorf(f) ((float) floor(f))
#define logf(f) ((float) log(f))
+
+#ifdef ANDROID
+#define log2f(f) (logf(f) * (float) (1.0 / M_LN2))
+#else
#define log2f(f) ((float) log2(f))
+#endif
+
#define powf(x,y) ((float) pow(x,y))
#define sinf(f) ((float) sin(f))
#define sinhf(f) ((float) sinh(f))
@@ -562,7 +568,7 @@ _mesa_init_sqrt_table(void);
#ifdef __GNUC__
-#ifdef __MINGW32__
+#if defined(__MINGW32__) || defined(ANDROID)
#define ffs __builtin_ffs
#define ffsll __builtin_ffsll
#endif
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index b88118366b2..f2eb889feb4 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1279,6 +1279,9 @@ struct gl_texture_image
GLboolean _IsPowerOfTwo; /**< Are all dimensions powers of two? */
struct gl_texture_object *TexObject; /**< Pointer back to parent object */
+ GLuint Level; /**< Which mipmap level am I? */
+ /** Cube map face: index into gl_texture_object::Image[] array */
+ GLuint Face;
FetchTexelFuncC FetchTexelc; /**< GLchan texel fetch function pointer */
FetchTexelFuncF FetchTexelf; /**< Float texel fetch function pointer */
@@ -2252,8 +2255,6 @@ struct gl_shader_state
*/
struct gl_shader_program *ActiveProgram;
- void *MemPool;
-
GLbitfield Flags; /**< Mask of GLSL_x flags */
};
@@ -2719,6 +2720,12 @@ struct gl_constants
GLuint GLSLVersion; /**< GLSL version supported (ex: 120 = 1.20) */
+ /**
+ * Does the driver support real 32-bit integers? (Otherwise, integers are
+ * simulated via floats.)
+ */
+ GLboolean NativeIntegers;
+
/** Which texture units support GL_ATI_envmap_bumpmap as targets */
GLbitfield SupportedBumpUnits;
diff --git a/src/mesa/main/nvprogram.c b/src/mesa/main/nvprogram.c
index dd198b8141a..7ff7645b7b7 100644
--- a/src/mesa/main/nvprogram.c
+++ b/src/mesa/main/nvprogram.c
@@ -812,7 +812,7 @@ _mesa_ProgramNamedParameter4fNV(GLuint id, GLsizei len, const GLubyte *name,
{
struct gl_program *prog;
struct gl_fragment_program *fragProg;
- GLfloat *v;
+ gl_constant_value *v;
GET_CURRENT_CONTEXT(ctx);
ASSERT_OUTSIDE_BEGIN_END(ctx);
@@ -834,10 +834,10 @@ _mesa_ProgramNamedParameter4fNV(GLuint id, GLsizei len, const GLubyte *name,
v = _mesa_lookup_parameter_value(fragProg->Base.Parameters, len,
(char *) name);
if (v) {
- v[0] = x;
- v[1] = y;
- v[2] = z;
- v[3] = w;
+ v[0].f = x;
+ v[1].f = y;
+ v[2].f = z;
+ v[3].f = w;
return;
}
@@ -878,7 +878,7 @@ _mesa_GetProgramNamedParameterfvNV(GLuint id, GLsizei len, const GLubyte *name,
{
struct gl_program *prog;
struct gl_fragment_program *fragProg;
- const GLfloat *v;
+ const gl_constant_value *v;
GET_CURRENT_CONTEXT(ctx);
@@ -899,10 +899,10 @@ _mesa_GetProgramNamedParameterfvNV(GLuint id, GLsizei len, const GLubyte *name,
v = _mesa_lookup_parameter_value(fragProg->Base.Parameters,
len, (char *) name);
if (v) {
- params[0] = v[0];
- params[1] = v[1];
- params[2] = v[2];
- params[3] = v[3];
+ params[0] = v[0].f;
+ params[1] = v[1].f;
+ params[2] = v[2].f;
+ params[3] = v[3].f;
return;
}
diff --git a/src/mesa/main/pbo.c b/src/mesa/main/pbo.c
index 15e0480e9f1..4e7e6f925cc 100644
--- a/src/mesa/main/pbo.c
+++ b/src/mesa/main/pbo.c
@@ -128,9 +128,10 @@ _mesa_map_pbo_source(struct gl_context *ctx,
if (_mesa_is_bufferobj(unpack->BufferObj)) {
/* unpack from PBO */
- buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
- GL_READ_ONLY_ARB,
- unpack->BufferObj);
+ buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0,
+ unpack->BufferObj->Size,
+ GL_MAP_READ_BIT,
+ unpack->BufferObj);
if (!buf)
return NULL;
@@ -201,8 +202,7 @@ _mesa_unmap_pbo_source(struct gl_context *ctx,
{
ASSERT(unpack != &ctx->Pack); /* catch pack/unpack mismatch */
if (_mesa_is_bufferobj(unpack->BufferObj)) {
- ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
- unpack->BufferObj);
+ ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj);
}
}
@@ -224,9 +224,10 @@ _mesa_map_pbo_dest(struct gl_context *ctx,
if (_mesa_is_bufferobj(pack->BufferObj)) {
/* pack into PBO */
- buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT,
- GL_WRITE_ONLY_ARB,
- pack->BufferObj);
+ buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0,
+ pack->BufferObj->Size,
+ GL_MAP_WRITE_BIT,
+ pack->BufferObj);
if (!buf)
return NULL;
@@ -297,7 +298,7 @@ _mesa_unmap_pbo_dest(struct gl_context *ctx,
{
ASSERT(pack != &ctx->Unpack); /* catch pack/unpack mismatch */
if (_mesa_is_bufferobj(pack->BufferObj)) {
- ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT, pack->BufferObj);
+ ctx->Driver.UnmapBuffer(ctx, pack->BufferObj);
}
}
@@ -327,8 +328,9 @@ _mesa_validate_pbo_teximage(struct gl_context *ctx, GLuint dimensions,
return NULL;
}
- buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
- GL_READ_ONLY_ARB, unpack->BufferObj);
+ buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size,
+ GL_MAP_READ_BIT,
+ unpack->BufferObj);
if (!buf) {
_mesa_error(ctx, GL_INVALID_OPERATION, funcName, "(PBO is mapped)");
return NULL;
@@ -364,8 +366,10 @@ _mesa_validate_pbo_compressed_teximage(struct gl_context *ctx,
return NULL;
}
- buf = (GLubyte*) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
- GL_READ_ONLY_ARB, packing->BufferObj);
+ buf = (GLubyte*) ctx->Driver.MapBufferRange(ctx, 0,
+ packing->BufferObj->Size,
+ GL_MAP_READ_BIT,
+ packing->BufferObj);
if (!buf) {
_mesa_error(ctx, GL_INVALID_OPERATION, funcName, "(PBO is mapped");
return NULL;
@@ -384,8 +388,7 @@ _mesa_unmap_teximage_pbo(struct gl_context *ctx,
const struct gl_pixelstore_attrib *unpack)
{
if (_mesa_is_bufferobj(unpack->BufferObj)) {
- ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
- unpack->BufferObj);
+ ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj);
}
}
diff --git a/src/mesa/main/querymatrix.c b/src/mesa/main/querymatrix.c
index 944ad435f7a..eaedf7cd238 100644
--- a/src/mesa/main/querymatrix.c
+++ b/src/mesa/main/querymatrix.c
@@ -73,7 +73,7 @@ fpclassify(double x)
#elif defined(__APPLE__) || defined(__CYGWIN__) || defined(__FreeBSD__) || \
defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || \
(defined(__sun) && defined(__C99FEATURES__)) || defined(__MINGW32__) || \
- (defined(__sun) && defined(__GNUC__))
+ (defined(__sun) && defined(__GNUC__)) || defined(ANDROID)
/* fpclassify is available. */
diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 8df25c3f988..74997eaaa77 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -1125,7 +1125,7 @@ static void
validate_program(struct gl_context *ctx, GLuint program)
{
struct gl_shader_program *shProg;
- char errMsg[100];
+ char errMsg[100] = "";
shProg = _mesa_lookup_shader_program_err(ctx, program, "glValidateProgram");
if (!shProg) {
diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c
index 33d91ad594d..f128648f477 100644
--- a/src/mesa/main/shaderobj.c
+++ b/src/mesa/main/shaderobj.c
@@ -244,6 +244,8 @@ _mesa_init_shader_program(struct gl_context *ctx, struct gl_shader_program *prog
prog->Geom.InputType = GL_TRIANGLES;
prog->Geom.OutputType = GL_TRIANGLE_STRIP;
#endif
+
+ prog->InfoLog = ralloc_strdup(prog, "");
}
/**
@@ -283,6 +285,10 @@ _mesa_clear_shader_program_data(struct gl_context *ctx,
_mesa_free_parameter_list(shProg->Varying);
shProg->Varying = NULL;
}
+
+ assert(shProg->InfoLog != NULL);
+ ralloc_free(shProg->InfoLog);
+ shProg->InfoLog = ralloc_strdup(shProg, "");
}
@@ -317,11 +323,6 @@ _mesa_free_shader_program_data(struct gl_context *ctx,
shProg->Shaders = NULL;
}
- if (shProg->InfoLog) {
- ralloc_free(shProg->InfoLog);
- shProg->InfoLog = NULL;
- }
-
/* Transform feedback varying vars */
for (i = 0; i < shProg->TransformFeedback.NumVarying; i++) {
free(shProg->TransformFeedback.VaryingNames[i]);
diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c
index d84f59690c5..8b7159db09c 100644
--- a/src/mesa/main/shared.c
+++ b/src/mesa/main/shared.c
@@ -200,7 +200,7 @@ delete_bufferobj_cb(GLuint id, void *data, void *userData)
struct gl_buffer_object *bufObj = (struct gl_buffer_object *) data;
struct gl_context *ctx = (struct gl_context *) userData;
if (_mesa_bufferobj_mapped(bufObj)) {
- ctx->Driver.UnmapBuffer(ctx, 0, bufObj);
+ ctx->Driver.UnmapBuffer(ctx, bufObj);
bufObj->Pointer = NULL;
}
_mesa_reference_buffer_object(ctx, &bufObj, NULL);
diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c
index d820ae92747..42bd1eee5ca 100644
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -40,19 +40,192 @@
/**
+ * Get the GL base format of a specified GL compressed texture format
+ *
+ * From page 232 of the OpenGL 3.3 (Compatiblity Profile) spec:
+ *
+ * "Compressed Internal Format Base Internal Format Type
+ * --------------------------- -------------------- ---------
+ * COMPRESSED_ALPHA ALPHA Generic
+ * COMPRESSED_LUMINANCE LUMINANCE Generic
+ * COMPRESSED_LUMINANCE_ALPHA LUMINANCE_ALPHA Generic
+ * COMPRESSED_INTENSITY INTENSITY Generic
+ * COMPRESSED_RED RED Generic
+ * COMPRESSED_RG RG Generic
+ * COMPRESSED_RGB RGB Generic
+ * COMPRESSED_RGBA RGBA Generic
+ * COMPRESSED_SRGB RGB Generic
+ * COMPRESSED_SRGB_ALPHA RGBA Generic
+ * COMPRESSED_SLUMINANCE LUMINANCE Generic
+ * COMPRESSED_SLUMINANCE_ALPHA LUMINANCE_ALPHA Generic
+ * COMPRESSED_RED_RGTC1 RED Specific
+ * COMPRESSED_SIGNED_RED_RGTC1 RED Specific
+ * COMPRESSED_RG_RGTC2 RG Specific
+ * COMPRESSED_SIGNED_RG_RGTC2 RG Specific"
+ *
+ * \return
+ * The base format of \c format if \c format is a compressed format (either
+ * generic or specific. Otherwise 0 is returned.
+ */
+GLenum
+_mesa_gl_compressed_format_base_format(GLenum format)
+{
+ switch (format) {
+ case GL_COMPRESSED_RED:
+ case GL_COMPRESSED_RED_RGTC1:
+ case GL_COMPRESSED_SIGNED_RED_RGTC1:
+ return GL_RED;
+
+ case GL_COMPRESSED_RG:
+ case GL_COMPRESSED_RG_RGTC2:
+ case GL_COMPRESSED_SIGNED_RG_RGTC2:
+ return GL_RG;
+
+ case GL_COMPRESSED_RGB:
+ case GL_COMPRESSED_SRGB:
+ case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+ case GL_COMPRESSED_RGB_FXT1_3DFX:
+ case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
+ return GL_RGB;
+
+ case GL_COMPRESSED_RGBA:
+ case GL_COMPRESSED_SRGB_ALPHA:
+ case GL_COMPRESSED_RGBA_BPTC_UNORM_ARB:
+ case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_ARB:
+ case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB:
+ case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT_ARB:
+ case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+ case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+ case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+ case GL_COMPRESSED_RGBA_FXT1_3DFX:
+ case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
+ case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
+ case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
+ return GL_RGBA;
+
+ case GL_COMPRESSED_ALPHA:
+ return GL_ALPHA;
+
+ case GL_COMPRESSED_LUMINANCE:
+ case GL_COMPRESSED_SLUMINANCE:
+ case GL_COMPRESSED_LUMINANCE_LATC1_EXT:
+ case GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT:
+ return GL_LUMINANCE;
+
+ case GL_COMPRESSED_LUMINANCE_ALPHA:
+ case GL_COMPRESSED_SLUMINANCE_ALPHA:
+ case GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT:
+ case GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT:
+ case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI:
+ return GL_LUMINANCE_ALPHA;
+
+ case GL_COMPRESSED_INTENSITY:
+ return GL_INTENSITY;
+
+ default:
+ return 0;
+ }
+}
+
+/**
* Return list of (and count of) all specific texture compression
* formats that are supported.
*
+ * Some formats are \b not returned by this function. The
+ * \c GL_COMPRESSED_TEXTURE_FORMATS query only returns formats that are
+ * "suitable for general-purpose usage." All texture compression extensions
+ * have taken this to mean either linear RGB or linear RGBA.
+ *
+ * The GL_ARB_texture_compress_rgtc spec says:
+ *
+ * "19) Should the GL_NUM_COMPRESSED_TEXTURE_FORMATS and
+ * GL_COMPRESSED_TEXTURE_FORMATS queries return the RGTC formats?
+ *
+ * RESOLVED: No.
+ *
+ * The OpenGL 2.1 specification says "The only values returned
+ * by this query [GL_COMPRESSED_TEXTURE_FORMATS"] are those
+ * corresponding to formats suitable for general-purpose usage.
+ * The renderer will not enumerate formats with restrictions that
+ * need to be specifically understood prior to use."
+ *
+ * Compressed textures with just red or red-green components are
+ * not general-purpose so should not be returned by these queries
+ * because they have restrictions.
+ *
+ * Applications that seek to use the RGTC formats should do so
+ * by looking for this extension's name in the string returned by
+ * glGetString(GL_EXTENSIONS) rather than
+ * what GL_NUM_COMPRESSED_TEXTURE_FORMATS and
+ * GL_COMPRESSED_TEXTURE_FORMATS return."
+ *
+ * There is nearly identical wording in the GL_EXT_texture_compression_rgtc
+ * spec.
+ *
+ * The GL_EXT_texture_rRGB spec says:
+ *
+ * "22) Should the new COMPRESSED_SRGB_* formats be listed in an
+ * implementation's GL_COMPRESSED_TEXTURE_FORMATS list?
+ *
+ * RESOLVED: No. Section 3.8.1 says formats listed by
+ * GL_COMPRESSED_TEXTURE_FORMATS are "suitable for general-purpose
+ * usage." The non-linear distribution of red, green, and
+ * blue for these sRGB compressed formats makes them not really
+ * general-purpose."
+ *
+ * The GL_EXT_texture_compression_latc spec says:
+ *
+ * "16) Should the GL_NUM_COMPRESSED_TEXTURE_FORMATS and
+ * GL_COMPRESSED_TEXTURE_FORMATS queries return the LATC formats?
+ *
+ * RESOLVED: No.
+ *
+ * The OpenGL 2.1 specification says "The only values returned
+ * by this query [GL_COMPRESSED_TEXTURE_FORMATS"] are those
+ * corresponding to formats suitable for general-purpose usage.
+ * The renderer will not enumerate formats with restrictions that
+ * need to be specifically understood prior to use."
+ *
+ * Historically, OpenGL implementation have advertised the RGB and
+ * RGBA versions of the S3TC extensions compressed format tokens
+ * through this mechanism.
+ *
+ * The specification is not sufficiently clear about what "suitable
+ * for general-purpose usage" means. Historically that seems to mean
+ * unsigned RGB or unsigned RGBA. The DXT1 format supporting alpha
+ * (GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) is not exposed in the list (at
+ * least for NVIDIA drivers) because the alpha is always 1.0 expect
+ * when it is 0.0 when RGB is required to be black. NVIDIA's even
+ * limits itself to true linear RGB or RGBA formats, specifically
+ * not including EXT_texture_sRGB's sRGB S3TC compressed formats.
+ *
+ * Adding luminance and luminance-alpha texture formats (and
+ * certainly signed versions of luminance and luminance-alpha
+ * formats!) invites potential comptaibility problems with old
+ * applications using this mechanism since old applications are
+ * unlikely to expect non-RGB or non-RGBA formats to be advertised
+ * through this mechanism. However no specific misinteractions
+ * with old applications is known.
+ *
+ * Applications that seek to use the LATC formats should do so
+ * by looking for this extension's name in the string returned by
+ * glGetString(GL_EXTENSIONS) rather than
+ * what GL_NUM_COMPRESSED_TEXTURE_FORMATS and
+ * GL_COMPRESSED_TEXTURE_FORMATS return."
+ *
+ * There is no formal spec for GL_ATI_texture_compression_3dc. Since the
+ * formats added by this extension are luminance-alpha formats, it is
+ * reasonable to expect them to follow the same rules as
+ * GL_EXT_texture_compression_latc. At the very least, Catalyst 11.6 does not
+ * expose the 3dc formats through this mechanism.
+ *
* \param ctx the GL context
* \param formats the resulting format list (may be NULL).
- * \param all if true return all formats, even those with some kind
- * of restrictions/limitations (See GL_ARB_texture_compression
- * spec for more info).
*
* \return number of formats.
*/
GLuint
-_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean all)
+_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats)
{
GLuint n = 0;
if (ctx->Extensions.TDFX_texture_compression_FXT1) {
@@ -64,24 +237,15 @@ _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean a
n += 2;
}
}
- /* don't return RGTC - ARB_texture_compression_rgtc query 19 */
+
if (ctx->Extensions.EXT_texture_compression_s3tc) {
if (formats) {
formats[n++] = GL_COMPRESSED_RGB_S3TC_DXT1_EXT;
- /* This format has some restrictions/limitations and so should
- * not be returned via the GL_COMPRESSED_TEXTURE_FORMATS query.
- * Specifically, all transparent pixels become black. NVIDIA
- * omits this format too.
- */
- if (all)
- formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
}
else {
n += 3;
- if (all)
- n += 1;
}
}
if (ctx->Extensions.S3_s3tc) {
@@ -95,19 +259,6 @@ _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean a
n += 4;
}
}
-#if FEATURE_EXT_texture_sRGB
- if (ctx->Extensions.EXT_texture_sRGB) {
- if (formats) {
- formats[n++] = GL_COMPRESSED_SRGB_S3TC_DXT1_EXT;
- formats[n++] = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT;
- formats[n++] = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT;
- formats[n++] = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT;
- }
- else {
- n += 4;
- }
- }
-#endif /* FEATURE_EXT_texture_sRGB */
return n;
#if FEATURE_ES1 || FEATURE_ES2
diff --git a/src/mesa/main/texcompress.h b/src/mesa/main/texcompress.h
index 19b08bbadf6..375cf90c8a2 100644
--- a/src/mesa/main/texcompress.h
+++ b/src/mesa/main/texcompress.h
@@ -33,8 +33,11 @@ struct gl_context;
#if _HAVE_FULL_GL
+extern GLenum
+_mesa_gl_compressed_format_base_format(GLenum format);
+
extern GLuint
-_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean all);
+_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats);
extern gl_format
_mesa_glenum_to_compressed_format(GLenum format);
diff --git a/src/mesa/main/texcompress_rgtc_tmp.h b/src/mesa/main/texcompress_rgtc_tmp.h
index c8bf082a158..48bbd374e08 100644
--- a/src/mesa/main/texcompress_rgtc_tmp.h
+++ b/src/mesa/main/texcompress_rgtc_tmp.h
@@ -181,7 +181,7 @@ static void TAG(encode_rgtc_chan)(TYPE *blkaddr, TYPE srccolors[4][4],
fprintf(stderr, "%d ", alphaenc1[i]);
}
fprintf(stderr, "cutVals ");
- for (i = 0; i < 8; i++) {
+ for (i = 0; i < 7; i++) {
fprintf(stderr, "%d ", acutValues[i]);
}
fprintf(stderr, "srcVals ");
diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
index 26c2ff98ba1..b2ebb0de475 100644
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -441,8 +441,8 @@ _mesa_get_teximage(struct gl_context *ctx, GLenum target, GLint level,
* texture data to the PBO if the PBO is in VRAM along with the texture.
*/
GLubyte *buf = (GLubyte *)
- ctx->Driver.MapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT,
- GL_WRITE_ONLY_ARB, ctx->Pack.BufferObj);
+ ctx->Driver.MapBufferRange(ctx, 0, ctx->Pack.BufferObj->Size,
+ GL_MAP_WRITE_BIT, ctx->Pack.BufferObj);
if (!buf) {
/* out of memory or other unexpected error */
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage(map PBO failed)");
@@ -474,8 +474,7 @@ _mesa_get_teximage(struct gl_context *ctx, GLenum target, GLint level,
}
if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
- ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT,
- ctx->Pack.BufferObj);
+ ctx->Driver.UnmapBuffer(ctx, ctx->Pack.BufferObj);
}
}
@@ -500,8 +499,8 @@ _mesa_get_compressed_teximage(struct gl_context *ctx, GLenum target, GLint level
if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
/* pack texture image into a PBO */
GLubyte *buf = (GLubyte *)
- ctx->Driver.MapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT,
- GL_WRITE_ONLY_ARB, ctx->Pack.BufferObj);
+ ctx->Driver.MapBufferRange(ctx, 0, ctx->Pack.BufferObj->Size,
+ GL_MAP_WRITE_BIT, ctx->Pack.BufferObj);
if (!buf) {
/* out of memory or other unexpected error */
_mesa_error(ctx, GL_OUT_OF_MEMORY,
@@ -531,8 +530,7 @@ _mesa_get_compressed_teximage(struct gl_context *ctx, GLenum target, GLint level
}
if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
- ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT,
- ctx->Pack.BufferObj);
+ ctx->Driver.UnmapBuffer(ctx, ctx->Pack.BufferObj);
}
}
diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 6f53686e7ff..a005d2935fa 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -1,6 +1,5 @@
/*
- * mesa 3-D graphics library
- * Version: 7.6
+ * Mesa 3-D graphics library
*
* Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
* Copyright (C) 2009 VMware, Inc. All Rights Reserved.
@@ -556,8 +555,6 @@ _mesa_tex_target_to_face(GLenum target)
* \param target texture target.
* \param level image level.
* \param texImage texture image.
- *
- * This was basically prompted by the introduction of cube maps.
*/
void
_mesa_set_tex_image(struct gl_texture_object *tObj,
@@ -574,6 +571,8 @@ _mesa_set_tex_image(struct gl_texture_object *tObj,
/* Set the 'back' pointer */
texImage->TexObject = tObj;
+ texImage->Level = level;
+ texImage->Face = face;
}
@@ -709,15 +708,13 @@ get_proxy_target(GLenum target)
/**
* Get the texture object that corresponds to the target of the given
- * texture unit.
+ * texture unit. The target should have already been checked for validity.
*
* \param ctx GL context.
* \param texUnit texture unit.
* \param target texture target.
*
* \return pointer to the texture object on success, or NULL on failure.
- *
- * \sa gl_texture_unit.
*/
struct gl_texture_object *
_mesa_select_tex_object(struct gl_context *ctx,
@@ -2797,29 +2794,43 @@ copyteximage(struct gl_context *ctx, GLuint dims,
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims);
}
else {
- gl_format texFormat;
-
- if (texImage->Data) {
- ctx->Driver.FreeTexImageData( ctx, texImage );
- }
+ /* choose actual hw format */
+ gl_format texFormat = _mesa_choose_texture_format(ctx, texObj,
+ target, level,
+ internalFormat,
+ GL_NONE, GL_NONE);
- ASSERT(texImage->Data == NULL);
+ if (legal_texture_size(ctx, texFormat, width, height, 1)) {
+ GLint srcX = x, srcY = y, dstX = 0, dstY = 0;
- texFormat = _mesa_choose_texture_format(ctx, texObj, target, level,
- internalFormat, GL_NONE,
- GL_NONE);
+ /* Free old texture image */
+ ctx->Driver.FreeTexImageData(ctx, texImage);
- if (legal_texture_size(ctx, texFormat, width, height, 1)) {
_mesa_init_teximage_fields(ctx, target, texImage, width, height, 1,
border, internalFormat, texFormat);
- ASSERT(ctx->Driver.CopyTexImage2D);
- if (dims == 1)
- ctx->Driver.CopyTexImage1D(ctx, target, level, internalFormat,
- x, y, width, border);
- else
- ctx->Driver.CopyTexImage2D(ctx, target, level, internalFormat,
- x, y, width, height, border);
+ /* Allocate texture memory (no pixel data yet) */
+ if (dims == 1) {
+ ctx->Driver.TexImage1D(ctx, target, level, internalFormat,
+ width, border, GL_NONE, GL_NONE, NULL,
+ &ctx->Unpack, texObj, texImage);
+ }
+ else {
+ ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
+ width, height, border, GL_NONE, GL_NONE,
+ NULL, &ctx->Unpack, texObj, texImage);
+ }
+
+ if (_mesa_clip_copytexsubimage(ctx, &dstX, &dstY, &srcX, &srcY,
+ &width, &height)) {
+ if (dims == 1)
+ ctx->Driver.CopyTexSubImage1D(ctx, target, level, dstX,
+ srcX, srcY, width);
+
+ else
+ ctx->Driver.CopyTexSubImage2D(ctx, target, level, dstX, dstY,
+ srcX, srcY, width, height);
+ }
check_gen_mipmap(ctx, target, texObj, level);
@@ -2830,6 +2841,7 @@ copyteximage(struct gl_context *ctx, GLuint dims,
ctx->NewState |= _NEW_TEXTURE;
}
else {
+ /* probably too large of image */
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims);
}
}
diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c
index 3021716a0b6..078a43ab153 100644
--- a/src/mesa/main/texobj.c
+++ b/src/mesa/main/texobj.c
@@ -842,7 +842,7 @@ _mesa_GenTextures( GLsizei n, GLuint *textures )
struct gl_texture_object *texObj;
GLuint name = first + i;
GLenum target = 0;
- texObj = (*ctx->Driver.NewTextureObject)( ctx, name, target);
+ texObj = ctx->Driver.NewTextureObject(ctx, name, target);
if (!texObj) {
_glthread_UNLOCK_MUTEX(ctx->Shared->Mutex);
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glGenTextures");
@@ -1066,7 +1066,7 @@ _mesa_BindTexture( GLenum target, GLuint texName )
}
else {
/* if this is a new texture id, allocate a texture object now */
- newTexObj = (*ctx->Driver.NewTextureObject)(ctx, texName, target);
+ newTexObj = ctx->Driver.NewTextureObject(ctx, texName, target);
if (!newTexObj) {
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glBindTexture");
return;
@@ -1108,7 +1108,7 @@ _mesa_BindTexture( GLenum target, GLuint texName )
/* Pass BindTexture call to device driver */
if (ctx->Driver.BindTexture)
- (*ctx->Driver.BindTexture)( ctx, target, newTexObj );
+ ctx->Driver.BindTexture(ctx, target, newTexObj);
}
diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
index 4b9dcb5d3b5..bbbb306b2d9 100644
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -888,7 +888,7 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level,
texObj = _mesa_select_tex_object(ctx, texUnit, target);
img = _mesa_select_tex_image(ctx, texObj, target, level);
- if (!img || !img->TexFormat) {
+ if (!img || img->TexFormat == MESA_FORMAT_NONE) {
/* undefined texture image */
if (pname == GL_TEXTURE_COMPONENTS)
*params = 1;
@@ -915,9 +915,23 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level,
*params = _mesa_compressed_format_to_glenum(ctx, texFormat);
}
else {
- /* return the user's requested internal format */
- *params = img->InternalFormat;
- }
+ /* If the true internal format is not compressed but the user
+ * requested a generic compressed format, we have to return the
+ * generic base format that matches.
+ *
+ * From page 119 (page 129 of the PDF) of the OpenGL 1.3 spec:
+ *
+ * "If no specific compressed format is available,
+ * internalformat is instead replaced by the corresponding base
+ * internal format."
+ *
+ * Otherwise just return the user's requested internal format
+ */
+ const GLenum f =
+ _mesa_gl_compressed_format_base_format(img->InternalFormat);
+
+ *params = (f != 0) ? f : img->InternalFormat;
+ }
break;
case GL_TEXTURE_BORDER:
*params = img->Border;
@@ -980,28 +994,21 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level,
*params = 0;
break;
case GL_TEXTURE_DEPTH_SIZE_ARB:
- if (ctx->Extensions.ARB_depth_texture)
- *params = _mesa_get_format_bits(texFormat, pname);
- else
+ if (!ctx->Extensions.ARB_depth_texture)
goto invalid_pname;
+ *params = _mesa_get_format_bits(texFormat, pname);
break;
case GL_TEXTURE_STENCIL_SIZE_EXT:
- if (ctx->Extensions.EXT_packed_depth_stencil ||
- ctx->Extensions.ARB_framebuffer_object) {
- *params = _mesa_get_format_bits(texFormat, pname);
- }
- else {
+ if (!ctx->Extensions.EXT_packed_depth_stencil &&
+ !ctx->Extensions.ARB_framebuffer_object)
goto invalid_pname;
- }
+ *params = _mesa_get_format_bits(texFormat, pname);
break;
case GL_TEXTURE_SHARED_SIZE:
- if (ctx->VersionMajor >= 3 ||
- ctx->Extensions.EXT_texture_shared_exponent) {
- *params = texFormat == MESA_FORMAT_RGB9_E5_FLOAT ? 5 : 0;
- }
- else {
+ if (ctx->VersionMajor < 3 &&
+ !ctx->Extensions.EXT_texture_shared_exponent)
goto invalid_pname;
- }
+ *params = texFormat == MESA_FORMAT_RGB9_E5_FLOAT ? 5 : 0;
break;
/* GL_ARB_texture_compression */
@@ -1022,67 +1029,46 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level,
/* GL_ARB_texture_float */
case GL_TEXTURE_RED_TYPE_ARB:
- if (ctx->Extensions.ARB_texture_float) {
- *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_RED_SIZE) ?
- _mesa_get_format_datatype(texFormat) : GL_NONE;
- }
- else {
+ if (!ctx->Extensions.ARB_texture_float)
goto invalid_pname;
- }
+ *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_RED_SIZE) ?
+ _mesa_get_format_datatype(texFormat) : GL_NONE;
break;
case GL_TEXTURE_GREEN_TYPE_ARB:
- if (ctx->Extensions.ARB_texture_float) {
- *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_GREEN_SIZE) ?
- _mesa_get_format_datatype(texFormat) : GL_NONE;
- }
- else {
+ if (!ctx->Extensions.ARB_texture_float)
goto invalid_pname;
- }
+ *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_GREEN_SIZE) ?
+ _mesa_get_format_datatype(texFormat) : GL_NONE;
break;
case GL_TEXTURE_BLUE_TYPE_ARB:
- if (ctx->Extensions.ARB_texture_float) {
- *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_BLUE_SIZE) ?
- _mesa_get_format_datatype(texFormat) : GL_NONE;
- }
- else {
+ if (!ctx->Extensions.ARB_texture_float)
goto invalid_pname;
- }
+ *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_BLUE_SIZE) ?
+ _mesa_get_format_datatype(texFormat) : GL_NONE;
break;
case GL_TEXTURE_ALPHA_TYPE_ARB:
- if (ctx->Extensions.ARB_texture_float) {
- *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_ALPHA_SIZE) ?
- _mesa_get_format_datatype(texFormat) : GL_NONE;
- }
- else {
+ if (!ctx->Extensions.ARB_texture_float)
goto invalid_pname;
- }
+ *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_ALPHA_SIZE) ?
+ _mesa_get_format_datatype(texFormat) : GL_NONE;
break;
case GL_TEXTURE_LUMINANCE_TYPE_ARB:
- if (ctx->Extensions.ARB_texture_float) {
- *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_LUMINANCE_SIZE) ?
- _mesa_get_format_datatype(texFormat) : GL_NONE;
- }
- else {
+ if (!ctx->Extensions.ARB_texture_float)
goto invalid_pname;
- }
+ *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_LUMINANCE_SIZE) ?
+ _mesa_get_format_datatype(texFormat) : GL_NONE;
break;
case GL_TEXTURE_INTENSITY_TYPE_ARB:
- if (ctx->Extensions.ARB_texture_float) {
- *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_INTENSITY_SIZE) ?
- _mesa_get_format_datatype(texFormat) : GL_NONE;
- }
- else {
+ if (!ctx->Extensions.ARB_texture_float)
goto invalid_pname;
- }
+ *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_INTENSITY_SIZE) ?
+ _mesa_get_format_datatype(texFormat) : GL_NONE;
break;
case GL_TEXTURE_DEPTH_TYPE_ARB:
- if (ctx->Extensions.ARB_texture_float) {
- *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_DEPTH_SIZE) ?
- _mesa_get_format_datatype(texFormat) : GL_NONE;
- }
- else {
+ if (!ctx->Extensions.ARB_texture_float)
goto invalid_pname;
- }
+ *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_DEPTH_SIZE) ?
+ _mesa_get_format_datatype(texFormat) : GL_NONE;
break;
default:
@@ -1104,7 +1090,6 @@ void GLAPIENTRY
_mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
{
struct gl_texture_object *obj;
- GLboolean error = GL_FALSE;
GET_CURRENT_CONTEXT(ctx);
ASSERT_OUTSIDE_BEGIN_END(ctx);
@@ -1130,17 +1115,15 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
*params = ENUM_TO_FLOAT(obj->Sampler.WrapR);
break;
case GL_TEXTURE_BORDER_COLOR:
- if(ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP))
+ if (ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP))
_mesa_update_state_locked(ctx);
- if(ctx->Color._ClampFragmentColor)
- {
+ if (ctx->Color._ClampFragmentColor) {
params[0] = CLAMP(obj->Sampler.BorderColor.f[0], 0.0F, 1.0F);
params[1] = CLAMP(obj->Sampler.BorderColor.f[1], 0.0F, 1.0F);
params[2] = CLAMP(obj->Sampler.BorderColor.f[2], 0.0F, 1.0F);
params[3] = CLAMP(obj->Sampler.BorderColor.f[3], 0.0F, 1.0F);
}
- else
- {
+ else {
params[0] = obj->Sampler.BorderColor.f[0];
params[1] = obj->Sampler.BorderColor.f[1];
params[2] = obj->Sampler.BorderColor.f[2];
@@ -1148,14 +1131,8 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
}
break;
case GL_TEXTURE_RESIDENT:
- {
- GLboolean resident;
- if (ctx->Driver.IsTextureResident)
- resident = ctx->Driver.IsTextureResident(ctx, obj);
- else
- resident = GL_TRUE;
- *params = ENUM_TO_FLOAT(resident);
- }
+ *params = ctx->Driver.IsTextureResident ?
+ ctx->Driver.IsTextureResident(ctx, obj) : 1.0F;
break;
case GL_TEXTURE_PRIORITY:
*params = obj->Priority;
@@ -1173,49 +1150,37 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
*params = (GLfloat) obj->MaxLevel;
break;
case GL_TEXTURE_MAX_ANISOTROPY_EXT:
- if (ctx->Extensions.EXT_texture_filter_anisotropic) {
- *params = obj->Sampler.MaxAnisotropy;
- }
- else
- error = GL_TRUE;
+ if (!ctx->Extensions.EXT_texture_filter_anisotropic)
+ goto invalid_pname;
+ *params = obj->Sampler.MaxAnisotropy;
break;
case GL_TEXTURE_COMPARE_FAIL_VALUE_ARB:
- if (ctx->Extensions.ARB_shadow_ambient) {
- *params = obj->Sampler.CompareFailValue;
- }
- else
- error = GL_TRUE;
+ if (!ctx->Extensions.ARB_shadow_ambient)
+ goto invalid_pname;
+ *params = obj->Sampler.CompareFailValue;
break;
case GL_GENERATE_MIPMAP_SGIS:
*params = (GLfloat) obj->GenerateMipmap;
break;
case GL_TEXTURE_COMPARE_MODE_ARB:
- if (ctx->Extensions.ARB_shadow) {
- *params = (GLfloat) obj->Sampler.CompareMode;
- }
- else
- error = GL_TRUE;
+ if (!ctx->Extensions.ARB_shadow)
+ goto invalid_pname;
+ *params = (GLfloat) obj->Sampler.CompareMode;
break;
case GL_TEXTURE_COMPARE_FUNC_ARB:
- if (ctx->Extensions.ARB_shadow) {
- *params = (GLfloat) obj->Sampler.CompareFunc;
- }
- else
- error = GL_TRUE;
+ if (!ctx->Extensions.ARB_shadow)
+ goto invalid_pname;
+ *params = (GLfloat) obj->Sampler.CompareFunc;
break;
case GL_DEPTH_TEXTURE_MODE_ARB:
- if (ctx->Extensions.ARB_depth_texture) {
- *params = (GLfloat) obj->Sampler.DepthMode;
- }
- else
- error = GL_TRUE;
+ if (!ctx->Extensions.ARB_depth_texture)
+ goto invalid_pname;
+ *params = (GLfloat) obj->Sampler.DepthMode;
break;
case GL_TEXTURE_LOD_BIAS:
- if (ctx->Extensions.EXT_texture_lod_bias) {
- *params = obj->Sampler.LodBias;
- }
- else
- error = GL_TRUE;
+ if (!ctx->Extensions.EXT_texture_lod_bias)
+ goto invalid_pname;
+ *params = obj->Sampler.LodBias;
break;
#if FEATURE_OES_draw_texture
case GL_TEXTURE_CROP_RECT_OES:
@@ -1230,45 +1195,40 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
case GL_TEXTURE_SWIZZLE_G_EXT:
case GL_TEXTURE_SWIZZLE_B_EXT:
case GL_TEXTURE_SWIZZLE_A_EXT:
- if (ctx->Extensions.EXT_texture_swizzle) {
- GLuint comp = pname - GL_TEXTURE_SWIZZLE_R_EXT;
- *params = (GLfloat) obj->Swizzle[comp];
- }
- else {
- error = GL_TRUE;
- }
+ if (!ctx->Extensions.EXT_texture_swizzle)
+ goto invalid_pname;
+ *params = (GLfloat) obj->Swizzle[pname - GL_TEXTURE_SWIZZLE_R_EXT];
break;
case GL_TEXTURE_SWIZZLE_RGBA_EXT:
- if (ctx->Extensions.EXT_texture_swizzle) {
+ if (!ctx->Extensions.EXT_texture_swizzle) {
+ goto invalid_pname;
+ }
+ else {
GLuint comp;
for (comp = 0; comp < 4; comp++) {
params[comp] = (GLfloat) obj->Swizzle[comp];
}
}
- else {
- error = GL_TRUE;
- }
break;
case GL_TEXTURE_CUBE_MAP_SEAMLESS:
- if (ctx->Extensions.AMD_seamless_cubemap_per_texture) {
+ if (!ctx->Extensions.AMD_seamless_cubemap_per_texture)
+ goto invalid_pname;
*params = (GLfloat) obj->Sampler.CubeMapSeamless;
- }
- else {
- error = GL_TRUE;
- }
+ break;
default:
- error = GL_TRUE;
- break;
+ goto invalid_pname;
}
- if (error)
- _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameterfv(pname=0x%x)",
- pname);
+ /* no error if we get here */
+ _mesa_unlock_texture(ctx, obj);
+ return;
+invalid_pname:
_mesa_unlock_texture(ctx, obj);
+ _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameterfv(pname=0x%x)", pname);
}
@@ -1276,13 +1236,12 @@ void GLAPIENTRY
_mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
{
struct gl_texture_object *obj;
- GLboolean error = GL_FALSE;
GET_CURRENT_CONTEXT(ctx);
ASSERT_OUTSIDE_BEGIN_END(ctx);
- obj = get_texobj(ctx, target, GL_TRUE);
- if (!obj)
- return;
+ obj = get_texobj(ctx, target, GL_TRUE);
+ if (!obj)
+ return;
_mesa_lock_texture(ctx, obj);
switch (pname) {
@@ -1315,14 +1274,8 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
}
break;;
case GL_TEXTURE_RESIDENT:
- {
- GLboolean resident;
- if (ctx->Driver.IsTextureResident)
- resident = ctx->Driver.IsTextureResident(ctx, obj);
- else
- resident = GL_TRUE;
- *params = (GLint) resident;
- }
+ *params = ctx->Driver.IsTextureResident ?
+ ctx->Driver.IsTextureResident(ctx, obj) : 1;
break;;
case GL_TEXTURE_PRIORITY:
*params = FLOAT_TO_INT(obj->Priority);
@@ -1340,55 +1293,37 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
*params = obj->MaxLevel;
break;;
case GL_TEXTURE_MAX_ANISOTROPY_EXT:
- if (ctx->Extensions.EXT_texture_filter_anisotropic) {
- *params = (GLint) obj->Sampler.MaxAnisotropy;
- }
- else {
- error = GL_TRUE;
- }
+ if (!ctx->Extensions.EXT_texture_filter_anisotropic)
+ goto invalid_pname;
+ *params = (GLint) obj->Sampler.MaxAnisotropy;
break;
case GL_TEXTURE_COMPARE_FAIL_VALUE_ARB:
- if (ctx->Extensions.ARB_shadow_ambient) {
- *params = (GLint) FLOAT_TO_INT(obj->Sampler.CompareFailValue);
- }
- else {
- error = GL_TRUE;
- }
+ if (!ctx->Extensions.ARB_shadow_ambient)
+ goto invalid_pname;
+ *params = (GLint) FLOAT_TO_INT(obj->Sampler.CompareFailValue);
break;
case GL_GENERATE_MIPMAP_SGIS:
*params = (GLint) obj->GenerateMipmap;
break;
case GL_TEXTURE_COMPARE_MODE_ARB:
- if (ctx->Extensions.ARB_shadow) {
- *params = (GLint) obj->Sampler.CompareMode;
- }
- else {
- error = GL_TRUE;
- }
+ if (!ctx->Extensions.ARB_shadow)
+ goto invalid_pname;
+ *params = (GLint) obj->Sampler.CompareMode;
break;
case GL_TEXTURE_COMPARE_FUNC_ARB:
- if (ctx->Extensions.ARB_shadow) {
- *params = (GLint) obj->Sampler.CompareFunc;
- }
- else {
- error = GL_TRUE;
- }
+ if (!ctx->Extensions.ARB_shadow)
+ goto invalid_pname;
+ *params = (GLint) obj->Sampler.CompareFunc;
break;
case GL_DEPTH_TEXTURE_MODE_ARB:
- if (ctx->Extensions.ARB_depth_texture) {
- *params = (GLint) obj->Sampler.DepthMode;
- }
- else {
- error = GL_TRUE;
- }
+ if (!ctx->Extensions.ARB_depth_texture)
+ goto invalid_pname;
+ *params = (GLint) obj->Sampler.DepthMode;
break;
case GL_TEXTURE_LOD_BIAS:
- if (ctx->Extensions.EXT_texture_lod_bias) {
- *params = (GLint) obj->Sampler.LodBias;
- }
- else {
- error = GL_TRUE;
- }
+ if (!ctx->Extensions.EXT_texture_lod_bias)
+ goto invalid_pname;
+ *params = (GLint) obj->Sampler.LodBias;
break;
#if FEATURE_OES_draw_texture
case GL_TEXTURE_CROP_RECT_OES:
@@ -1402,41 +1337,34 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
case GL_TEXTURE_SWIZZLE_G_EXT:
case GL_TEXTURE_SWIZZLE_B_EXT:
case GL_TEXTURE_SWIZZLE_A_EXT:
- if (ctx->Extensions.EXT_texture_swizzle) {
- GLuint comp = pname - GL_TEXTURE_SWIZZLE_R_EXT;
- *params = obj->Swizzle[comp];
- }
- else {
- error = GL_TRUE;
- }
+ if (!ctx->Extensions.EXT_texture_swizzle)
+ goto invalid_pname;
+ *params = obj->Swizzle[pname - GL_TEXTURE_SWIZZLE_R_EXT];
break;
case GL_TEXTURE_SWIZZLE_RGBA_EXT:
- if (ctx->Extensions.EXT_texture_swizzle) {
- COPY_4V(params, obj->Swizzle);
- }
- else {
- error = GL_TRUE;
- }
+ if (!ctx->Extensions.EXT_texture_swizzle)
+ goto invalid_pname;
+ COPY_4V(params, obj->Swizzle);
break;
case GL_TEXTURE_CUBE_MAP_SEAMLESS:
- if (ctx->Extensions.AMD_seamless_cubemap_per_texture) {
- *params = (GLint) obj->Sampler.CubeMapSeamless;
- }
- else {
- error = GL_TRUE;
- }
+ if (!ctx->Extensions.AMD_seamless_cubemap_per_texture)
+ goto invalid_pname;
+ *params = (GLint) obj->Sampler.CubeMapSeamless;
+ break;
default:
- ; /* silence warnings */
+ goto invalid_pname;
}
- if (error)
- _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameteriv(pname=0x%x)",
- pname);
+ /* no error if we get here */
+ _mesa_unlock_texture(ctx, obj);
+ return;
+invalid_pname:
_mesa_unlock_texture(ctx, obj);
+ _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameteriv(pname=0x%x)", pname);
}
@@ -1449,6 +1377,8 @@ _mesa_GetTexParameterIiv(GLenum target, GLenum pname, GLint *params)
ASSERT_OUTSIDE_BEGIN_END(ctx);
texObj = get_texobj(ctx, target, GL_TRUE);
+ if (!texObj)
+ return;
switch (pname) {
case GL_TEXTURE_BORDER_COLOR:
@@ -1469,6 +1399,8 @@ _mesa_GetTexParameterIuiv(GLenum target, GLenum pname, GLuint *params)
ASSERT_OUTSIDE_BEGIN_END(ctx);
texObj = get_texobj(ctx, target, GL_TRUE);
+ if (!texObj)
+ return;
switch (pname) {
case GL_TEXTURE_BORDER_COLOR:
diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c
index 6e1e63bdfb0..c4aeaa8f16d 100644
--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -4577,8 +4577,7 @@ texture_row_stride(const struct gl_texture_image *texImage)
/**
- * This is the software fallback for Driver.TexImage1D()
- * and Driver.CopyTexImage1D().
+ * This is the software fallback for Driver.TexImage1D().
* \sa _mesa_store_teximage2d()
*/
void
@@ -4629,8 +4628,7 @@ _mesa_store_teximage1d(struct gl_context *ctx, GLenum target, GLint level,
/**
- * This is the software fallback for Driver.TexImage2D()
- * and Driver.CopyTexImage2D().
+ * This is the software fallback for Driver.TexImage2D().
*
* This function is oriented toward storing images in main memory, rather
* than VRAM. Device driver's can easily plug in their own replacement.
@@ -4684,8 +4682,7 @@ _mesa_store_teximage2d(struct gl_context *ctx, GLenum target, GLint level,
/**
- * This is the software fallback for Driver.TexImage3D()
- * and Driver.CopyTexImage3D().
+ * This is the software fallback for Driver.TexImage3D().
* \sa _mesa_store_teximage2d()
*/
void
diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c
index dd069a3a4d1..cda840fe2d2 100644
--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -429,7 +429,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location,
for (i = 0; i < rows; i++) {
const int base = paramPos + offset + i;
for (j = 0; j < cols; j++ ) {
- params[k++] = prog->Parameters->ParameterValues[base][j];
+ params[k++] = prog->Parameters->ParameterValues[base][j].f;
}
}
}
@@ -442,7 +442,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location,
const int base = paramPos + offset + i;
for (j = 0; j < cols; j++ ) {
params[k++] = (GLdouble)
- prog->Parameters->ParameterValues[base][j];
+ prog->Parameters->ParameterValues[base][j].f;
}
}
}
@@ -454,8 +454,9 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location,
for (i = 0; i < rows; i++) {
const int base = paramPos + offset + i;
for (j = 0; j < cols; j++ ) {
- params[k++] = (GLint)
- prog->Parameters->ParameterValues[base][j];
+ params[k++] = ctx->Const.NativeIntegers ?
+ prog->Parameters->ParameterValues[base][j].i :
+ (GLint) prog->Parameters->ParameterValues[base][j].f;
}
}
}
@@ -467,8 +468,9 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location,
for (i = 0; i < rows; i++) {
const int base = paramPos + offset + i;
for (j = 0; j < cols; j++ ) {
- params[k++] = (GLuint)
- prog->Parameters->ParameterValues[base][j];
+ params[k++] = ctx->Const.NativeIntegers ?
+ prog->Parameters->ParameterValues[base][j].u :
+ (GLuint) prog->Parameters->ParameterValues[base][j].f;
}
}
}
@@ -670,7 +672,7 @@ set_program_uniform(struct gl_context *ctx, struct gl_program *program,
/* loop over number of samplers to change */
for (i = 0; i < count; i++) {
GLuint sampler = (GLuint)
- program->Parameters->ParameterValues[index + offset + i][0];
+ program->Parameters->ParameterValues[index+offset + i][0].f;
GLuint texUnit = ((GLuint *) values)[i];
/* check that the sampler (tex unit index) is legal */
@@ -735,42 +737,52 @@ set_program_uniform(struct gl_context *ctx, struct gl_program *program,
/* loop over number of array elements */
for (k = 0; k < count; k++) {
- GLfloat *uniformVal;
+ gl_constant_value *uniformVal;
if (offset + k >= slots) {
/* Extra array data is ignored */
break;
}
- /* uniformVal (the destination) is always float[4] */
+ /* uniformVal (the destination) is always gl_constant_value[4] */
uniformVal = program->Parameters->ParameterValues[index + offset + k];
if (basicType == GL_INT) {
- /* convert user's ints to floats */
const GLint *iValues = ((const GLint *) values) + k * elems;
for (i = 0; i < elems; i++) {
- uniformVal[i] = (GLfloat) iValues[i];
+ if (!ctx->Const.NativeIntegers)
+ uniformVal[i].f = (GLfloat) iValues[i];
+ else
+ uniformVal[i].i = iValues[i];
}
}
else if (basicType == GL_UNSIGNED_INT) {
- /* convert user's uints to floats */
const GLuint *iValues = ((const GLuint *) values) + k * elems;
for (i = 0; i < elems; i++) {
- uniformVal[i] = (GLfloat) iValues[i];
+ if (!ctx->Const.NativeIntegers)
+ uniformVal[i].f = (GLfloat)(GLuint) iValues[i];
+ else
+ uniformVal[i].u = iValues[i];
}
}
else {
const GLfloat *fValues = ((const GLfloat *) values) + k * elems;
assert(basicType == GL_FLOAT);
for (i = 0; i < elems; i++) {
- uniformVal[i] = fValues[i];
+ uniformVal[i].f = fValues[i];
}
}
- /* if the uniform is bool-valued, convert to 1.0 or 0.0 */
+ /* if the uniform is bool-valued, convert to 1 or 0 */
if (isUniformBool) {
for (i = 0; i < elems; i++) {
- uniformVal[i] = uniformVal[i] ? 1.0f : 0.0f;
+ if (basicType == GL_FLOAT)
+ uniformVal[i].b = uniformVal[i].f != 0.0f ? 1 : 0;
+ else
+ uniformVal[i].b = uniformVal[i].u ? 1 : 0;
+
+ if (!ctx->Const.NativeIntegers)
+ uniformVal[i].f = uniformVal[i].b ? 1.0f : 0.0f;
}
}
}
@@ -936,7 +948,7 @@ set_program_uniform_matrix(struct gl_context *ctx, struct gl_program *program,
/* Ignore writes beyond the end of (the used part of) an array */
return;
}
- v = program->Parameters->ParameterValues[index + offset];
+ v = (GLfloat *) program->Parameters->ParameterValues[index + offset];
for (row = 0; row < rows; row++) {
if (transpose) {
v[row] = values[src + row * cols + col];
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index d8e5a3a9772..6820e4c6ba7 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -134,7 +134,7 @@ src_reg::src_reg(dst_reg reg)
this->index = reg.index;
this->swizzle = SWIZZLE_XYZW;
this->negate = 0;
- this->reladdr = NULL;
+ this->reladdr = reg.reladdr;
}
dst_reg::dst_reg(src_reg reg)
@@ -297,11 +297,11 @@ public:
/**
* Emit the correct dot-product instruction for the type of arguments
*/
- void emit_dp(ir_instruction *ir,
- dst_reg dst,
- src_reg src0,
- src_reg src1,
- unsigned elements);
+ ir_to_mesa_instruction * emit_dp(ir_instruction *ir,
+ dst_reg dst,
+ src_reg src0,
+ src_reg src1,
+ unsigned elements);
void emit_scalar(ir_instruction *ir, enum prog_opcode op,
dst_reg dst, src_reg src0);
@@ -312,9 +312,11 @@ public:
void emit_scs(ir_instruction *ir, enum prog_opcode op,
dst_reg dst, const src_reg &src);
- GLboolean try_emit_mad(ir_expression *ir,
+ bool try_emit_mad(ir_expression *ir,
int mul_operand);
- GLboolean try_emit_sat(ir_expression *ir);
+ bool try_emit_mad_for_and_not(ir_expression *ir,
+ int mul_operand);
+ bool try_emit_sat(ir_expression *ir);
void emit_swz(ir_expression *ir);
@@ -331,20 +333,6 @@ dst_reg undef_dst = dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP);
dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X);
-static void
-fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
-
-static void
-fail_link(struct gl_shader_program *prog, const char *fmt, ...)
-{
- va_list args;
- va_start(args, fmt);
- ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
- va_end(args);
-
- prog->LinkStatus = GL_FALSE;
-}
-
static int
swizzle_for_size(int size)
{
@@ -422,7 +410,7 @@ ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op)
return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
}
-void
+ir_to_mesa_instruction *
ir_to_mesa_visitor::emit_dp(ir_instruction *ir,
dst_reg dst, src_reg src0, src_reg src1,
unsigned elements)
@@ -431,7 +419,7 @@ ir_to_mesa_visitor::emit_dp(ir_instruction *ir,
OPCODE_DP2, OPCODE_DP3, OPCODE_DP4
};
- emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
+ return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
}
/**
@@ -593,13 +581,13 @@ ir_to_mesa_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,
}
}
-struct src_reg
+src_reg
ir_to_mesa_visitor::src_reg_for_float(float val)
{
src_reg src(PROGRAM_CONSTANT, -1, NULL);
src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
- &val, 1, &src.swizzle);
+ (const gl_constant_value *)&val, 1, &src.swizzle);
return src;
}
@@ -655,8 +643,6 @@ src_reg
ir_to_mesa_visitor::get_temp(const glsl_type *type)
{
src_reg src;
- int swizzle[4];
- int i;
src.file = PROGRAM_TEMPORARY;
src.index = next_temp;
@@ -666,12 +652,7 @@ ir_to_mesa_visitor::get_temp(const glsl_type *type)
if (type->is_array() || type->is_record()) {
src.swizzle = SWIZZLE_NOOP;
} else {
- for (i = 0; i < type->vector_elements; i++)
- swizzle[i] = i;
- for (; i < 4; i++)
- swizzle[i] = type->vector_elements - 1;
- src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1],
- swizzle[2], swizzle[3]);
+ src.swizzle = swizzle_for_size(type->vector_elements);
}
src.negate = 0;
@@ -744,7 +725,7 @@ ir_to_mesa_visitor::visit(ir_variable *ir)
}
}
- struct variable_storage *storage;
+ variable_storage *storage;
dst_reg dst;
if (i == ir->num_state_slots) {
/* We'll set the index later. */
@@ -789,10 +770,11 @@ ir_to_mesa_visitor::visit(ir_variable *ir)
if (storage->file == PROGRAM_TEMPORARY &&
dst.index != storage->index + (int) ir->num_state_slots) {
- fail_link(this->shader_program,
- "failed to load builtin uniform `%s' (%d/%d regs loaded)\n",
- ir->name, dst.index - storage->index,
- type_size(ir->type));
+ linker_error(this->shader_program,
+ "failed to load builtin uniform `%s' "
+ "(%d/%d regs loaded)\n",
+ ir->name, dst.index - storage->index,
+ type_size(ir->type));
}
}
}
@@ -889,7 +871,7 @@ ir_to_mesa_visitor::visit(ir_function *ir)
}
}
-GLboolean
+bool
ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
{
int nonmul_operand = 1 - mul_operand;
@@ -912,7 +894,47 @@ ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
return true;
}
-GLboolean
+/**
+ * Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b))
+ *
+ * The logic values are 1.0 for true and 0.0 for false. Logical-and is
+ * implemented using multiplication, and logical-or is implemented using
+ * addition. Logical-not can be implemented as (true - x), or (1.0 - x).
+ * As result, the logical expression (a & !b) can be rewritten as:
+ *
+ * - a * !b
+ * - a * (1 - b)
+ * - (a * 1) - (a * b)
+ * - a + -(a * b)
+ * - a + (a * -b)
+ *
+ * This final expression can be implemented as a single MAD(a, -b, a)
+ * instruction.
+ */
+bool
+ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
+{
+ const int other_operand = 1 - try_operand;
+ src_reg a, b;
+
+ ir_expression *expr = ir->operands[try_operand]->as_expression();
+ if (!expr || expr->operation != ir_unop_logic_not)
+ return false;
+
+ ir->operands[other_operand]->accept(this);
+ a = this->result;
+ expr->operands[0]->accept(this);
+ b = this->result;
+
+ b.negate = ~b.negate;
+
+ this->result = get_temp(ir->type);
+ emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a);
+
+ return true;
+}
+
+bool
ir_to_mesa_visitor::try_emit_sat(ir_expression *ir)
{
/* Saturates were only introduced to vertex programs in
@@ -928,10 +950,30 @@ ir_to_mesa_visitor::try_emit_sat(ir_expression *ir)
sat_src->accept(this);
src_reg src = this->result;
- this->result = get_temp(ir->type);
- ir_to_mesa_instruction *inst;
- inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src);
- inst->saturate = true;
+ /* If we generated an expression instruction into a temporary in
+ * processing the saturate's operand, apply the saturate to that
+ * instruction. Otherwise, generate a MOV to do the saturate.
+ *
+ * Note that we have to be careful to only do this optimization if
+ * the instruction in question was what generated src->result. For
+ * example, ir_dereference_array might generate a MUL instruction
+ * to create the reladdr, and return us a src reg using that
+ * reladdr. That MUL result is not the value we're trying to
+ * saturate.
+ */
+ ir_expression *sat_src_expr = sat_src->as_expression();
+ ir_to_mesa_instruction *new_inst;
+ new_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
+ if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
+ sat_src_expr->operation == ir_binop_add ||
+ sat_src_expr->operation == ir_binop_dot)) {
+ new_inst->saturate = true;
+ } else {
+ this->result = get_temp(ir->type);
+ ir_to_mesa_instruction *inst;
+ inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src);
+ inst->saturate = true;
+ }
return true;
}
@@ -1088,6 +1130,16 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
if (try_emit_mad(ir, 0))
return;
}
+
+ /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
+ */
+ if (ir->operation == ir_binop_logic_and) {
+ if (try_emit_mad_for_and_not(ir, 1))
+ return;
+ if (try_emit_mad_for_and_not(ir, 0))
+ return;
+ }
+
if (try_emit_sat(ir))
return;
@@ -1135,7 +1187,13 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
switch (ir->operation) {
case ir_unop_logic_not:
- emit(ir, OPCODE_SEQ, result_dst, op[0], src_reg_for_float(0.0));
+ /* Previously 'SEQ dst, src, 0.0' was used for this. However, many
+ * older GPUs implement SEQ using multiple instructions (i915 uses two
+ * SGE instructions and a MUL instruction). Since our logic values are
+ * 0.0 and 1.0, 1-x also implements !x.
+ */
+ op[0].negate = ~op[0].negate;
+ emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0));
break;
case ir_unop_neg:
op[0].negate = ~op[0].negate;
@@ -1231,8 +1289,19 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
ir->operands[1]->type->is_vector()) {
src_reg temp = get_temp(glsl_type::vec4_type);
emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
+
+ /* After the dot-product, the value will be an integer on the
+ * range [0,4]. Zero becomes 1.0, and positive values become zero.
+ */
emit_dp(ir, result_dst, temp, temp, vector_elements);
- emit(ir, OPCODE_SEQ, result_dst, result_src, src_reg_for_float(0.0));
+
+ /* Negating the result of the dot-product gives values on the range
+ * [-4, 0]. Zero becomes 1.0, and negative values become zero. This
+ * achieved using SGE.
+ */
+ src_reg sge_src = result_src;
+ sge_src.negate = ~sge_src.negate;
+ emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0));
} else {
emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
}
@@ -1243,29 +1312,83 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
ir->operands[1]->type->is_vector()) {
src_reg temp = get_temp(glsl_type::vec4_type);
emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
- emit_dp(ir, result_dst, temp, temp, vector_elements);
- emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0));
+
+ /* After the dot-product, the value will be an integer on the
+ * range [0,4]. Zero stays zero, and positive values become 1.0.
+ */
+ ir_to_mesa_instruction *const dp =
+ emit_dp(ir, result_dst, temp, temp, vector_elements);
+ if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+ /* The clamping to [0,1] can be done for free in the fragment
+ * shader with a saturate.
+ */
+ dp->saturate = true;
+ } else {
+ /* Negating the result of the dot-product gives values on the range
+ * [-4, 0]. Zero stays zero, and negative values become 1.0. This
+ * achieved using SLT.
+ */
+ src_reg slt_src = result_src;
+ slt_src.negate = ~slt_src.negate;
+ emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
+ }
} else {
emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
}
break;
- case ir_unop_any:
+ case ir_unop_any: {
assert(ir->operands[0]->type->is_vector());
- emit_dp(ir, result_dst, op[0], op[0],
- ir->operands[0]->type->vector_elements);
- emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0));
+
+ /* After the dot-product, the value will be an integer on the
+ * range [0,4]. Zero stays zero, and positive values become 1.0.
+ */
+ ir_to_mesa_instruction *const dp =
+ emit_dp(ir, result_dst, op[0], op[0],
+ ir->operands[0]->type->vector_elements);
+ if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+ /* The clamping to [0,1] can be done for free in the fragment
+ * shader with a saturate.
+ */
+ dp->saturate = true;
+ } else {
+ /* Negating the result of the dot-product gives values on the range
+ * [-4, 0]. Zero stays zero, and negative values become 1.0. This
+ * is achieved using SLT.
+ */
+ src_reg slt_src = result_src;
+ slt_src.negate = ~slt_src.negate;
+ emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
+ }
break;
+ }
case ir_binop_logic_xor:
emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
break;
- case ir_binop_logic_or:
- /* This could be a saturated add and skip the SNE. */
- emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
- emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0));
+ case ir_binop_logic_or: {
+ /* After the addition, the value will be an integer on the
+ * range [0,2]. Zero stays zero, and positive values become 1.0.
+ */
+ ir_to_mesa_instruction *add =
+ emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
+ if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+ /* The clamping to [0,1] can be done for free in the fragment
+ * shader with a saturate.
+ */
+ add->saturate = true;
+ } else {
+ /* Negating the result of the addition gives values on the range
+ * [-2, 0]. Zero stays zero, and negative values become 1.0. This
+ * is achieved using SLT.
+ */
+ src_reg slt_src = result_src;
+ slt_src.negate = ~slt_src.negate;
+ emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
+ }
break;
+ }
case ir_binop_logic_and:
/* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
@@ -1496,6 +1619,18 @@ ir_to_mesa_visitor::visit(ir_dereference_array *ir)
this->result, src_reg_for_float(element_size));
}
+ /* If there was already a relative address register involved, add the
+ * new and the old together to get the new offset.
+ */
+ if (src.reladdr != NULL) {
+ src_reg accum_reg = get_temp(glsl_type::float_type);
+
+ emit(ir, OPCODE_ADD, dst_reg(accum_reg),
+ index_reg, *src.reladdr);
+
+ index_reg = accum_reg;
+ }
+
src.reladdr = ralloc(mem_ctx, src_reg);
memcpy(src.reladdr, &index_reg, sizeof(index_reg));
}
@@ -1796,7 +1931,7 @@ ir_to_mesa_visitor::visit(ir_constant *ir)
src = src_reg(PROGRAM_CONSTANT, -1, NULL);
src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
- values,
+ (gl_constant_value *) values,
ir->type->vector_elements,
&src.swizzle);
emit(ir, OPCODE_MOV, mat_column, src);
@@ -1834,7 +1969,7 @@ ir_to_mesa_visitor::visit(ir_constant *ir)
this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type);
this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
- values,
+ (gl_constant_value *) values,
ir->type->vector_elements,
&this->result.swizzle);
}
@@ -1969,7 +2104,10 @@ ir_to_mesa_visitor::visit(ir_texture *ir)
ir_to_mesa_instruction *inst = NULL;
prog_opcode opcode = OPCODE_NOP;
- ir->coordinate->accept(this);
+ if (ir->op == ir_txs)
+ this->result = src_reg_for_float(0.0);
+ else
+ ir->coordinate->accept(this);
/* Put our coords in a temp. We'll need to modify them for shadow,
* projection, or LOD, so the only case we'd use it as is is if
@@ -1993,6 +2131,7 @@ ir_to_mesa_visitor::visit(ir_texture *ir)
switch (ir->op) {
case ir_tex:
+ case ir_txs:
opcode = OPCODE_TEX;
break;
case ir_txb:
@@ -2401,29 +2540,32 @@ check_resources(const struct gl_context *ctx,
case GL_VERTEX_PROGRAM_ARB:
if (_mesa_bitcount(prog->SamplersUsed) >
ctx->Const.MaxVertexTextureImageUnits) {
- fail_link(shader_program, "Too many vertex shader texture samplers");
+ linker_error(shader_program,
+ "Too many vertex shader texture samplers");
}
if (prog->Parameters->NumParameters > MAX_UNIFORMS) {
- fail_link(shader_program, "Too many vertex shader constants");
+ linker_error(shader_program, "Too many vertex shader constants");
}
break;
case MESA_GEOMETRY_PROGRAM:
if (_mesa_bitcount(prog->SamplersUsed) >
ctx->Const.MaxGeometryTextureImageUnits) {
- fail_link(shader_program, "Too many geometry shader texture samplers");
+ linker_error(shader_program,
+ "Too many geometry shader texture samplers");
}
if (prog->Parameters->NumParameters >
MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) {
- fail_link(shader_program, "Too many geometry shader constants");
+ linker_error(shader_program, "Too many geometry shader constants");
}
break;
case GL_FRAGMENT_PROGRAM_ARB:
if (_mesa_bitcount(prog->SamplersUsed) >
ctx->Const.MaxTextureImageUnits) {
- fail_link(shader_program, "Too many fragment shader texture samplers");
+ linker_error(shader_program,
+ "Too many fragment shader texture samplers");
}
if (prog->Parameters->NumParameters > MAX_UNIFORMS) {
- fail_link(shader_program, "Too many fragment shader constants");
+ linker_error(shader_program, "Too many fragment shader constants");
}
break;
default:
@@ -2531,16 +2673,17 @@ add_uniforms_to_parameters_list(struct gl_shader_program *shader_program,
*/
if (file == PROGRAM_SAMPLER) {
for (unsigned int j = 0; j < size / 4; j++)
- prog->Parameters->ParameterValues[index + j][0] = next_sampler++;
+ prog->Parameters->ParameterValues[index + j][0].f = next_sampler++;
}
/* The location chosen in the Parameters list here (returned
* from _mesa_add_uniform) has to match what the linker chose.
*/
if (index != parameter_index) {
- fail_link(shader_program, "Allocation of uniform `%s' to target "
- "failed (%d vs %d)\n",
- uniform->Name, index, parameter_index);
+ linker_error(shader_program,
+ "Allocation of uniform `%s' to target failed "
+ "(%d vs %d)\n",
+ uniform->Name, index, parameter_index);
}
}
}
@@ -2573,8 +2716,8 @@ set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
int loc = _mesa_get_uniform_location(ctx, shader_program, name);
if (loc == -1) {
- fail_link(shader_program,
- "Couldn't find uniform for initializer %s\n", name);
+ linker_error(shader_program,
+ "Couldn't find uniform for initializer %s\n", name);
return;
}
@@ -2974,11 +3117,31 @@ get_mesa_program(struct gl_context *ctx,
if (mesa_inst->SrcReg[src].RelAddr)
prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File;
- if (options->EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) {
- fail_link(shader_program, "Couldn't flatten if statement\n");
- }
-
switch (mesa_inst->Opcode) {
+ case OPCODE_IF:
+ if (options->EmitNoIfs) {
+ linker_warning(shader_program,
+ "Couldn't flatten if-statement. "
+ "This will likely result in software "
+ "rasterization.\n");
+ }
+ break;
+ case OPCODE_BGNLOOP:
+ if (options->EmitNoLoops) {
+ linker_warning(shader_program,
+ "Couldn't unroll loop. "
+ "This will likely result in software "
+ "rasterization.\n");
+ }
+ break;
+ case OPCODE_CONT:
+ if (options->EmitNoCont) {
+ linker_warning(shader_program,
+ "Couldn't lower continue-statement. "
+ "This will likely result in software "
+ "rasterization.\n");
+ }
+ break;
case OPCODE_BGNSUB:
inst->function->inst = i;
mesa_inst->Comment = strdup(inst->function->sig->function_name());
@@ -3246,7 +3409,7 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
for (i = 0; i < prog->NumShaders; i++) {
if (!prog->Shaders[i]->CompileStatus) {
- fail_link(prog, "linking with uncompiled shader");
+ linker_error(prog, "linking with uncompiled shader");
prog->LinkStatus = GL_FALSE;
}
}
diff --git a/src/mesa/program/nvfragparse.c b/src/mesa/program/nvfragparse.c
index 8516b5fc1ff..ce72c610d89 100644
--- a/src/mesa/program/nvfragparse.c
+++ b/src/mesa/program/nvfragparse.c
@@ -472,8 +472,9 @@ Parse_ScalarConstant(struct parse_state *parseState, GLfloat *number)
const GLfloat *constant;
if (!Parse_Identifier(parseState, ident))
RETURN_ERROR1("Expected an identifier");
- constant = _mesa_lookup_parameter_value(parseState->parameters,
- -1, (const char *) ident);
+ constant = (GLfloat *)_mesa_lookup_parameter_value(parseState->parameters,
+ -1,
+ (const char *) ident);
/* XXX Check that it's a constant and not a parameter */
if (!constant) {
RETURN_ERROR1("Undefined symbol");
@@ -1039,7 +1040,8 @@ Parse_VectorSrc(struct parse_state *parseState,
if (!Parse_ScalarConstant(parseState, values))
RETURN_ERROR;
paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
- values, 4, NULL);
+ (gl_constant_value *) values,
+ 4, NULL);
srcReg->File = PROGRAM_NAMED_PARAM;
srcReg->Index = paramIndex;
}
@@ -1051,7 +1053,8 @@ Parse_VectorSrc(struct parse_state *parseState,
if (!Parse_VectorConstant(parseState, values))
RETURN_ERROR;
paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
- values, 4, NULL);
+ (gl_constant_value *) values,
+ 4, NULL);
srcReg->File = PROGRAM_NAMED_PARAM;
srcReg->Index = paramIndex;
}
@@ -1145,7 +1148,8 @@ Parse_ScalarSrcReg(struct parse_state *parseState,
if (!Parse_VectorConstant(parseState, values))
RETURN_ERROR;
paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
- values, 4, NULL);
+ (gl_constant_value *) values,
+ 4, NULL);
srcReg->File = PROGRAM_NAMED_PARAM;
srcReg->Index = paramIndex;
}
@@ -1170,7 +1174,8 @@ Parse_ScalarSrcReg(struct parse_state *parseState,
if (!Parse_ScalarConstant(parseState, values))
RETURN_ERROR;
paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
- values, 4, NULL);
+ (gl_constant_value *) values,
+ 4, NULL);
srcReg->Index = paramIndex;
srcReg->File = PROGRAM_NAMED_PARAM;
needSuffix = GL_FALSE;
@@ -1296,7 +1301,8 @@ Parse_InstructionSequence(struct parse_state *parseState,
RETURN_ERROR2(id, "already defined");
}
_mesa_add_named_parameter(parseState->parameters,
- (const char *) id, value);
+ (const char *) id,
+ (gl_constant_value *) value);
}
else if (Parse_String(parseState, "DECLARE")) {
GLubyte id[100];
@@ -1315,7 +1321,8 @@ Parse_InstructionSequence(struct parse_state *parseState,
RETURN_ERROR2(id, "already declared");
}
_mesa_add_named_parameter(parseState->parameters,
- (const char *) id, value);
+ (const char *) id,
+ (gl_constant_value *) value);
}
else if (Parse_String(parseState, "END")) {
inst->Opcode = OPCODE_END;
diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c
index e7553c69dbe..77f842a1630 100644
--- a/src/mesa/program/prog_execute.c
+++ b/src/mesa/program/prog_execute.c
@@ -157,7 +157,7 @@ get_src_register_pointer(const struct prog_src_register *source,
case PROGRAM_NAMED_PARAM:
if (reg >= (GLint) prog->Parameters->NumParameters)
return ZeroVec;
- return prog->Parameters->ParameterValues[reg];
+ return (GLfloat *) prog->Parameters->ParameterValues[reg];
case PROGRAM_SYSTEM_VALUE:
assert(reg < Elements(machine->SystemValues));
@@ -639,7 +639,7 @@ _mesa_execute_program(struct gl_context * ctx,
struct gl_program_machine *machine)
{
const GLuint numInst = program->NumInstructions;
- const GLuint maxExec = 10000;
+ const GLuint maxExec = 65536;
GLuint pc, numExec = 0;
machine->CurProgram = program;
@@ -1651,6 +1651,14 @@ _mesa_execute_program(struct gl_context * ctx,
GLfloat texcoord[4], color[4];
fetch_vector4(&inst->SrcReg[0], machine, texcoord);
+ /* For TEX, texcoord.Q should not be used and its value should not
+ * matter (at most, we pass coord.xyz to texture3D() in GLSL).
+ * Set Q=1 so that FetchTexelDeriv() doesn't get a garbage value
+ * which is effectively what happens when the texcoord swizzle
+ * is .xyzz
+ */
+ texcoord[3] = 1.0f;
+
fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
if (DEBUG_PROG) {
diff --git a/src/mesa/program/prog_opt_constant_fold.c b/src/mesa/program/prog_opt_constant_fold.c
new file mode 100644
index 00000000000..e2418b55451
--- /dev/null
+++ b/src/mesa/program/prog_opt_constant_fold.c
@@ -0,0 +1,451 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "program.h"
+#include "prog_instruction.h"
+#include "prog_optimize.h"
+#include "prog_parameter.h"
+#include <stdbool.h>
+
+static bool
+src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs)
+{
+ unsigned i;
+
+ for (i = 0; i < num_srcs; i++) {
+ if (inst->SrcReg[i].File != PROGRAM_CONSTANT)
+ return false;
+ }
+
+ return true;
+}
+
+static struct prog_src_register
+src_reg_for_float(struct gl_program *prog, float val)
+{
+ struct prog_src_register src;
+ unsigned swiz;
+
+ memset(&src, 0, sizeof(src));
+
+ src.File = PROGRAM_CONSTANT;
+ src.Index = _mesa_add_unnamed_constant(prog->Parameters,
+ (gl_constant_value *) &val, 1, &swiz);
+ src.Swizzle = swiz;
+ return src;
+}
+
+static struct prog_src_register
+src_reg_for_vec4(struct gl_program *prog, const float *val)
+{
+ struct prog_src_register src;
+ unsigned swiz;
+
+ memset(&src, 0, sizeof(src));
+
+ src.File = PROGRAM_CONSTANT;
+ src.Index = _mesa_add_unnamed_constant(prog->Parameters,
+ (gl_constant_value *) val, 4, &swiz);
+ src.Swizzle = swiz;
+ return src;
+}
+
+static bool
+src_regs_are_same(const struct prog_src_register *a,
+ const struct prog_src_register *b)
+{
+ return (a->File == b->File)
+ && (a->Index == b->Index)
+ && (a->Swizzle == b->Swizzle)
+ && (a->Abs == b->Abs)
+ && (a->Negate == b->Negate)
+ && (a->RelAddr == 0)
+ && (b->RelAddr == 0);
+}
+
+static void
+get_value(struct gl_program *prog, struct prog_src_register *r, float *data)
+{
+ const gl_constant_value *const value =
+ prog->Parameters->ParameterValues[r->Index];
+
+ data[0] = value[GET_SWZ(r->Swizzle, 0)].f;
+ data[1] = value[GET_SWZ(r->Swizzle, 1)].f;
+ data[2] = value[GET_SWZ(r->Swizzle, 2)].f;
+ data[3] = value[GET_SWZ(r->Swizzle, 3)].f;
+
+ if (r->Abs) {
+ data[0] = fabsf(data[0]);
+ data[1] = fabsf(data[1]);
+ data[2] = fabsf(data[2]);
+ data[3] = fabsf(data[3]);
+ }
+
+ if (r->Negate & 0x01) {
+ data[0] = -data[0];
+ }
+
+ if (r->Negate & 0x02) {
+ data[1] = -data[1];
+ }
+
+ if (r->Negate & 0x04) {
+ data[2] = -data[2];
+ }
+
+ if (r->Negate & 0x08) {
+ data[3] = -data[3];
+ }
+}
+
+/**
+ * Try to replace instructions that produce a constant result with simple moves
+ *
+ * The hope is that a following copy propagation pass will eliminate the
+ * unnecessary move instructions.
+ */
+GLboolean
+_mesa_constant_fold(struct gl_program *prog)
+{
+ bool progress = false;
+ unsigned i;
+
+ for (i = 0; i < prog->NumInstructions; i++) {
+ struct prog_instruction *const inst = &prog->Instructions[i];
+
+ switch (inst->Opcode) {
+ case OPCODE_ADD:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = a[0] + b[0];
+ result[1] = a[1] + b[1];
+ result[2] = a[2] + b[2];
+ result[3] = a[3] + b[3];
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_CMP:
+ /* FINISHME: We could also optimize CMP instructions where the first
+ * FINISHME: source is a constant that is either all < 0.0 or all
+ * FINISHME: >= 0.0.
+ */
+ if (src_regs_are_constant(inst, 3)) {
+ float a[4];
+ float b[4];
+ float c[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+ get_value(prog, &inst->SrcReg[2], c);
+
+ result[0] = a[0] < 0.0f ? b[0] : c[0];
+ result[1] = a[1] < 0.0f ? b[1] : c[1];
+ result[2] = a[2] < 0.0f ? b[2] : c[2];
+ result[3] = a[3] < 0.0f ? b[3] : c[3];
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+ inst->SrcReg[2].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_DP2:
+ case OPCODE_DP3:
+ case OPCODE_DP4:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result;
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ /* It seems like a loop could be used here, but we cleverly put
+ * DP2A between DP2 and DP3. Subtracting DP2 (or similar) from
+ * the opcode results in various failures of the loop control.
+ */
+ result = (a[0] * b[0]) + (a[1] * b[1]);
+
+ if (inst->Opcode >= OPCODE_DP3)
+ result += a[2] * b[2];
+
+ if (inst->Opcode == OPCODE_DP4)
+ result += a[3] * b[3];
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_MUL:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = a[0] * b[0];
+ result[1] = a[1] * b[1];
+ result[2] = a[2] * b[2];
+ result[3] = a[3] * b[3];
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_SEQ:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = (a[0] == b[0]) ? 1.0f : 0.0f;
+ result[1] = (a[1] == b[1]) ? 1.0f : 0.0f;
+ result[2] = (a[2] == b[2]) ? 1.0f : 0.0f;
+ result[3] = (a[3] == b[3]) ? 1.0f : 0.0f;
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_SGE:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = (a[0] >= b[0]) ? 1.0f : 0.0f;
+ result[1] = (a[1] >= b[1]) ? 1.0f : 0.0f;
+ result[2] = (a[2] >= b[2]) ? 1.0f : 0.0f;
+ result[3] = (a[3] >= b[3]) ? 1.0f : 0.0f;
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_SGT:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = (a[0] > b[0]) ? 1.0f : 0.0f;
+ result[1] = (a[1] > b[1]) ? 1.0f : 0.0f;
+ result[2] = (a[2] > b[2]) ? 1.0f : 0.0f;
+ result[3] = (a[3] > b[3]) ? 1.0f : 0.0f;
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_SLE:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = (a[0] <= b[0]) ? 1.0f : 0.0f;
+ result[1] = (a[1] <= b[1]) ? 1.0f : 0.0f;
+ result[2] = (a[2] <= b[2]) ? 1.0f : 0.0f;
+ result[3] = (a[3] <= b[3]) ? 1.0f : 0.0f;
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_SLT:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = (a[0] < b[0]) ? 1.0f : 0.0f;
+ result[1] = (a[1] < b[1]) ? 1.0f : 0.0f;
+ result[2] = (a[2] < b[2]) ? 1.0f : 0.0f;
+ result[3] = (a[3] < b[3]) ? 1.0f : 0.0f;
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_SNE:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = (a[0] != b[0]) ? 1.0f : 0.0f;
+ result[1] = (a[1] != b[1]) ? 1.0f : 0.0f;
+ result[2] = (a[2] != b[2]) ? 1.0f : 0.0f;
+ result[3] = (a[3] != b[3]) ? 1.0f : 0.0f;
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return progress;
+}
diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c
index 8a40fa69eca..25d9684b137 100644
--- a/src/mesa/program/prog_optimize.c
+++ b/src/mesa/program/prog_optimize.c
@@ -472,8 +472,7 @@ can_downward_mov_be_modifed(const struct prog_instruction *mov)
mov->SrcReg[0].HasIndex2 == 0 &&
mov->SrcReg[0].RelAddr2 == 0 &&
mov->DstReg.RelAddr == 0 &&
- mov->DstReg.CondMask == COND_TR &&
- mov->SaturateMode == SATURATE_OFF;
+ mov->DstReg.CondMask == COND_TR;
}
@@ -482,7 +481,8 @@ can_upward_mov_be_modifed(const struct prog_instruction *mov)
{
return
can_downward_mov_be_modifed(mov) &&
- mov->DstReg.File == PROGRAM_TEMPORARY;
+ mov->DstReg.File == PROGRAM_TEMPORARY &&
+ mov->SaturateMode == SATURATE_OFF;
}
@@ -657,6 +657,8 @@ _mesa_merge_mov_into_inst(struct prog_instruction *inst,
if (mask != (inst->DstReg.WriteMask & mask))
return GL_FALSE;
+ inst->SaturateMode |= mov->SaturateMode;
+
/* Depending on the instruction, we may need to recompute the swizzles.
* Also, some other instructions (like TEX) are not linear. We will only
* consider completely active sources and destinations
@@ -1319,6 +1321,15 @@ _mesa_simplify_cmp(struct gl_program * program)
inst->Opcode = OPCODE_MOV;
inst->SrcReg[0] = inst->SrcReg[1];
+
+ /* Unused operands are expected to have the file set to
+ * PROGRAM_UNDEFINED. This is how _mesa_init_instructions initializes
+ * all of the sources.
+ */
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+ inst->SrcReg[2].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
}
}
if (dbg) {
@@ -1347,6 +1358,8 @@ _mesa_optimize_program(struct gl_context *ctx, struct gl_program *program)
any_change = GL_TRUE;
if (_mesa_remove_dead_code_local(program))
any_change = GL_TRUE;
+
+ any_change = _mesa_constant_fold(program) || any_change;
_mesa_reallocate_registers(program);
} while (any_change);
}
diff --git a/src/mesa/program/prog_optimize.h b/src/mesa/program/prog_optimize.h
index 463f5fc51c4..9854fb7a491 100644
--- a/src/mesa/program/prog_optimize.h
+++ b/src/mesa/program/prog_optimize.h
@@ -44,4 +44,7 @@ _mesa_find_temp_intervals(const struct prog_instruction *instructions,
extern void
_mesa_optimize_program(struct gl_context *ctx, struct gl_program *program);
+extern GLboolean
+_mesa_constant_fold(struct gl_program *prog);
+
#endif
diff --git a/src/mesa/program/prog_parameter.c b/src/mesa/program/prog_parameter.c
index 3570cab118b..49b3ffbdd5c 100644
--- a/src/mesa/program/prog_parameter.c
+++ b/src/mesa/program/prog_parameter.c
@@ -56,8 +56,8 @@ _mesa_new_parameter_list_sized(unsigned size)
p->Parameters = (struct gl_program_parameter *)
calloc(1, size * sizeof(struct gl_program_parameter));
- p->ParameterValues = (GLfloat (*)[4])
- _mesa_align_malloc(size * 4 *sizeof(GLfloat), 16);
+ p->ParameterValues = (gl_constant_value (*)[4])
+ _mesa_align_malloc(size * 4 *sizeof(gl_constant_value), 16);
if ((p->Parameters == NULL) || (p->ParameterValues == NULL)) {
@@ -101,14 +101,15 @@ _mesa_free_parameter_list(struct gl_program_parameter_list *paramList)
* \param name the parameter name, will be duplicated/copied!
* \param size number of elements in 'values' vector (1..4, or more)
* \param datatype GL_FLOAT, GL_FLOAT_VECx, GL_INT, GL_INT_VECx or GL_NONE.
- * \param values initial parameter value, up to 4 GLfloats, or NULL
+ * \param values initial parameter value, up to 4 gl_constant_values, or NULL
* \param state state indexes, or NULL
* \return index of new parameter in the list, or -1 if error (out of mem)
*/
GLint
_mesa_add_parameter(struct gl_program_parameter_list *paramList,
gl_register_file type, const char *name,
- GLuint size, GLenum datatype, const GLfloat *values,
+ GLuint size, GLenum datatype,
+ const gl_constant_value *values,
const gl_state_index state[STATE_LENGTH],
GLbitfield flags)
{
@@ -127,10 +128,10 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList,
oldNum * sizeof(struct gl_program_parameter),
paramList->Size * sizeof(struct gl_program_parameter));
- paramList->ParameterValues = (GLfloat (*)[4])
+ paramList->ParameterValues = (gl_constant_value (*)[4])
_mesa_align_realloc(paramList->ParameterValues, /* old buf */
- oldNum * 4 * sizeof(GLfloat), /* old size */
- paramList->Size * 4 *sizeof(GLfloat), /* new sz */
+ oldNum * 4 * sizeof(gl_constant_value),/* old sz */
+ paramList->Size*4*sizeof(gl_constant_value),/*new*/
16);
}
@@ -142,7 +143,7 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList,
return -1;
}
else {
- GLuint i;
+ GLuint i, j;
paramList->NumParameters = oldNum + sz4;
@@ -163,7 +164,8 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList,
}
else {
/* silence valgrind */
- ASSIGN_4V(paramList->ParameterValues[oldNum + i], 0, 0, 0, 0);
+ for (j = 0; j < 4; j++)
+ paramList->ParameterValues[oldNum + i][j].f = 0;
}
size -= 4;
}
@@ -184,7 +186,7 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList,
*/
GLint
_mesa_add_named_parameter(struct gl_program_parameter_list *paramList,
- const char *name, const GLfloat values[4])
+ const char *name, const gl_constant_value values[4])
{
return _mesa_add_parameter(paramList, PROGRAM_NAMED_PARAM, name,
4, GL_NONE, values, NULL, 0x0);
@@ -204,17 +206,17 @@ _mesa_add_named_parameter(struct gl_program_parameter_list *paramList,
*/
GLint
_mesa_add_named_constant(struct gl_program_parameter_list *paramList,
- const char *name, const GLfloat values[4],
+ const char *name, const gl_constant_value values[4],
GLuint size)
{
/* first check if this is a duplicate constant */
GLint pos;
for (pos = 0; pos < (GLint)paramList->NumParameters; pos++) {
- const GLfloat *pvals = paramList->ParameterValues[pos];
- if (pvals[0] == values[0] &&
- pvals[1] == values[1] &&
- pvals[2] == values[2] &&
- pvals[3] == values[3] &&
+ const gl_constant_value *pvals = paramList->ParameterValues[pos];
+ if (pvals[0].u == values[0].u &&
+ pvals[1].u == values[1].u &&
+ pvals[2].u == values[2].u &&
+ pvals[3].u == values[3].u &&
strcmp(paramList->Parameters[pos].Name, name) == 0) {
/* Same name and value is already in the param list - reuse it */
return pos;
@@ -239,9 +241,9 @@ _mesa_add_named_constant(struct gl_program_parameter_list *paramList,
* \return index/position of the new parameter in the parameter list.
*/
GLint
-_mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
- const GLfloat values[4], GLuint size,
- GLuint *swizzleOut)
+_mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList,
+ const gl_constant_value values[4], GLuint size,
+ GLenum datatype, GLuint *swizzleOut)
{
GLint pos;
ASSERT(size >= 1);
@@ -262,7 +264,7 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
struct gl_program_parameter *p = paramList->Parameters + pos;
if (p->Type == PROGRAM_CONSTANT && p->Size + size <= 4) {
/* ok, found room */
- GLfloat *pVal = paramList->ParameterValues[pos];
+ gl_constant_value *pVal = paramList->ParameterValues[pos];
GLuint swz = p->Size; /* 1, 2 or 3 for Y, Z, W */
pVal[p->Size] = values[0];
p->Size++;
@@ -274,7 +276,7 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
/* add a new parameter to store this constant */
pos = _mesa_add_parameter(paramList, PROGRAM_CONSTANT, NULL,
- size, GL_NONE, values, NULL, 0x0);
+ size, datatype, values, NULL, 0x0);
if (pos >= 0 && swizzleOut) {
if (size == 1)
*swizzleOut = SWIZZLE_XXXX;
@@ -285,6 +287,28 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
}
/**
+ * Add a new unnamed constant to the parameter list. This will be used
+ * when a fragment/vertex program contains something like this:
+ * MOV r, { 0, 1, 2, 3 };
+ * If swizzleOut is non-null we'll search the parameter list for an
+ * existing instance of the constant which matches with a swizzle.
+ *
+ * \param paramList the parameter list
+ * \param values four float values
+ * \param swizzleOut returns swizzle mask for accessing the constant
+ * \return index/position of the new parameter in the parameter list.
+ * \sa _mesa_add_typed_unnamed_constant
+ */
+GLint
+_mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
+ const gl_constant_value values[4], GLuint size,
+ GLuint *swizzleOut)
+{
+ return _mesa_add_typed_unnamed_constant(paramList, values, size, GL_NONE,
+ swizzleOut);
+}
+
+/**
* Add parameter representing a varying variable.
*/
GLint
@@ -401,7 +425,7 @@ _mesa_add_state_reference(struct gl_program_parameter_list *paramList,
* Lookup a parameter value by name in the given parameter list.
* \return pointer to the float[4] values.
*/
-GLfloat *
+gl_constant_value *
_mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList,
GLsizei nameLen, const char *name)
{
@@ -465,7 +489,7 @@ _mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList,
*/
GLboolean
_mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
- const GLfloat v[], GLuint vSize,
+ const gl_constant_value v[], GLuint vSize,
GLint *posOut, GLuint *swizzleOut)
{
GLuint i;
@@ -484,7 +508,7 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
/* swizzle not allowed */
GLuint j, match = 0;
for (j = 0; j < vSize; j++) {
- if (v[j] == list->ParameterValues[i][j])
+ if (v[j].u == list->ParameterValues[i][j].u)
match++;
}
if (match == vSize) {
@@ -498,7 +522,7 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
/* look for v[0] anywhere within float[4] value */
GLuint j;
for (j = 0; j < list->Parameters[i].Size; j++) {
- if (list->ParameterValues[i][j] == v[0]) {
+ if (list->ParameterValues[i][j].u == v[0].u) {
/* found it */
*posOut = i;
*swizzleOut = MAKE_SWIZZLE4(j, j, j, j);
@@ -511,13 +535,13 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
GLuint swz[4];
GLuint match = 0, j, k;
for (j = 0; j < vSize; j++) {
- if (v[j] == list->ParameterValues[i][j]) {
+ if (v[j].u == list->ParameterValues[i][j].u) {
swz[j] = j;
match++;
}
else {
for (k = 0; k < list->Parameters[i].Size; k++) {
- if (v[j] == list->ParameterValues[i][k]) {
+ if (v[j].u == list->ParameterValues[i][k].u) {
swz[j] = k;
match++;
break;
diff --git a/src/mesa/program/prog_parameter.h b/src/mesa/program/prog_parameter.h
index 10cbbe57a6c..1a5ed343937 100644
--- a/src/mesa/program/prog_parameter.h
+++ b/src/mesa/program/prog_parameter.h
@@ -47,6 +47,17 @@
/*@}*/
+/**
+ * Actual data for constant values of parameters.
+ */
+typedef union gl_constant_value
+{
+ GLfloat f;
+ GLboolean b;
+ GLint i;
+ GLuint u;
+} gl_constant_value;
+
/**
* Program parameter.
@@ -81,7 +92,7 @@ struct gl_program_parameter_list
GLuint Size; /**< allocated size of Parameters, ParameterValues */
GLuint NumParameters; /**< number of parameters in arrays */
struct gl_program_parameter *Parameters; /**< Array [Size] */
- GLfloat (*ParameterValues)[4]; /**< Array [Size] of GLfloat[4] */
+ gl_constant_value (*ParameterValues)[4]; /**< Array [Size] of constant[4] */
GLbitfield StateFlags; /**< _NEW_* flags indicating which state changes
might invalidate ParameterValues[] */
};
@@ -112,22 +123,28 @@ _mesa_num_parameters(const struct gl_program_parameter_list *list)
extern GLint
_mesa_add_parameter(struct gl_program_parameter_list *paramList,
gl_register_file type, const char *name,
- GLuint size, GLenum datatype, const GLfloat *values,
+ GLuint size, GLenum datatype,
+ const gl_constant_value *values,
const gl_state_index state[STATE_LENGTH],
GLbitfield flags);
extern GLint
_mesa_add_named_parameter(struct gl_program_parameter_list *paramList,
- const char *name, const GLfloat values[4]);
+ const char *name, const gl_constant_value values[4]);
extern GLint
_mesa_add_named_constant(struct gl_program_parameter_list *paramList,
- const char *name, const GLfloat values[4],
+ const char *name, const gl_constant_value values[4],
GLuint size);
extern GLint
+_mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList,
+ const gl_constant_value values[4], GLuint size,
+ GLenum datatype, GLuint *swizzleOut);
+
+extern GLint
_mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
- const GLfloat values[4], GLuint size,
+ const gl_constant_value values[4], GLuint size,
GLuint *swizzleOut);
extern GLint
@@ -143,7 +160,7 @@ extern GLint
_mesa_add_state_reference(struct gl_program_parameter_list *paramList,
const gl_state_index stateTokens[STATE_LENGTH]);
-extern GLfloat *
+extern gl_constant_value *
_mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList,
GLsizei nameLen, const char *name);
@@ -153,7 +170,7 @@ _mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList,
extern GLboolean
_mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
- const GLfloat v[], GLuint vSize,
+ const gl_constant_value v[], GLuint vSize,
GLint *posOut, GLuint *swizzleOut);
extern GLuint
diff --git a/src/mesa/program/prog_parameter_layout.c b/src/mesa/program/prog_parameter_layout.c
index 90a9771080c..28fca3b92d9 100644
--- a/src/mesa/program/prog_parameter_layout.c
+++ b/src/mesa/program/prog_parameter_layout.c
@@ -182,7 +182,7 @@ _mesa_layout_parameters(struct asm_parser_state *state)
switch (p->Type) {
case PROGRAM_CONSTANT: {
- const float *const v =
+ const gl_constant_value *const v =
state->prog->Parameters->ParameterValues[idx];
inst->Base.SrcReg[i].Index =
diff --git a/src/mesa/program/prog_print.c b/src/mesa/program/prog_print.c
index 7c3b4909e73..70412b1fa6a 100644
--- a/src/mesa/program/prog_print.c
+++ b/src/mesa/program/prog_print.c
@@ -985,7 +985,7 @@ _mesa_fprint_parameter_list(FILE *f,
fprintf(f, "dirty state flags: 0x%x\n", list->StateFlags);
for (i = 0; i < list->NumParameters; i++){
struct gl_program_parameter *param = list->Parameters + i;
- const GLfloat *v = list->ParameterValues[i];
+ const GLfloat *v = (GLfloat *) list->ParameterValues[i];
fprintf(f, "param[%d] sz=%d %s %s = {%.3g, %.3g, %.3g, %.3g}",
i, param->Size,
_mesa_register_file_name(list->Parameters[i].Type),
diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c
index 16f9690e865..6aa2409e85e 100644
--- a/src/mesa/program/prog_statevars.c
+++ b/src/mesa/program/prog_statevars.c
@@ -1111,7 +1111,7 @@ _mesa_load_state_parameters(struct gl_context *ctx,
if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) {
_mesa_fetch_state(ctx,
paramList->Parameters[i].StateIndexes,
- paramList->ParameterValues[i]);
+ &paramList->ParameterValues[i][0].f);
}
}
}
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index adca094ee89..ecff2344a44 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -388,8 +388,9 @@ _mesa_delete_program(struct gl_context *ctx, struct gl_program *prog)
if (prog->String)
free(prog->String);
- _mesa_free_instructions(prog->Instructions, prog->NumInstructions);
-
+ if (prog->Instructions) {
+ _mesa_free_instructions(prog->Instructions, prog->NumInstructions);
+ }
if (prog->Parameters) {
_mesa_free_parameter_list(prog->Parameters);
}
@@ -1031,7 +1032,8 @@ _mesa_postprocess_program(struct gl_context *ctx, struct gl_program *prog)
GLuint i;
GLuint whiteSwizzle;
GLint whiteIndex = _mesa_add_unnamed_constant(prog->Parameters,
- white, 4, &whiteSwizzle);
+ (gl_constant_value *) white,
+ 4, &whiteSwizzle);
(void) whiteIndex;
diff --git a/src/mesa/program/program_parse.y b/src/mesa/program/program_parse.y
index dbf5abaa617..dec35038be5 100644
--- a/src/mesa/program/program_parse.y
+++ b/src/mesa/program/program_parse.y
@@ -1854,64 +1854,64 @@ paramConstUse: paramConstScalarUse | paramConstVector;
paramConstScalarDecl: signedFloatConstant
{
$$.count = 4;
- $$.data[0] = $1;
- $$.data[1] = $1;
- $$.data[2] = $1;
- $$.data[3] = $1;
+ $$.data[0].f = $1;
+ $$.data[1].f = $1;
+ $$.data[2].f = $1;
+ $$.data[3].f = $1;
}
;
paramConstScalarUse: REAL
{
$$.count = 1;
- $$.data[0] = $1;
- $$.data[1] = $1;
- $$.data[2] = $1;
- $$.data[3] = $1;
+ $$.data[0].f = $1;
+ $$.data[1].f = $1;
+ $$.data[2].f = $1;
+ $$.data[3].f = $1;
}
| INTEGER
{
$$.count = 1;
- $$.data[0] = (float) $1;
- $$.data[1] = (float) $1;
- $$.data[2] = (float) $1;
- $$.data[3] = (float) $1;
+ $$.data[0].f = (float) $1;
+ $$.data[1].f = (float) $1;
+ $$.data[2].f = (float) $1;
+ $$.data[3].f = (float) $1;
}
;
paramConstVector: '{' signedFloatConstant '}'
{
$$.count = 4;
- $$.data[0] = $2;
- $$.data[1] = 0.0f;
- $$.data[2] = 0.0f;
- $$.data[3] = 1.0f;
+ $$.data[0].f = $2;
+ $$.data[1].f = 0.0f;
+ $$.data[2].f = 0.0f;
+ $$.data[3].f = 1.0f;
}
| '{' signedFloatConstant ',' signedFloatConstant '}'
{
$$.count = 4;
- $$.data[0] = $2;
- $$.data[1] = $4;
- $$.data[2] = 0.0f;
- $$.data[3] = 1.0f;
+ $$.data[0].f = $2;
+ $$.data[1].f = $4;
+ $$.data[2].f = 0.0f;
+ $$.data[3].f = 1.0f;
}
| '{' signedFloatConstant ',' signedFloatConstant ','
signedFloatConstant '}'
{
$$.count = 4;
- $$.data[0] = $2;
- $$.data[1] = $4;
- $$.data[2] = $6;
- $$.data[3] = 1.0f;
+ $$.data[0].f = $2;
+ $$.data[1].f = $4;
+ $$.data[2].f = $6;
+ $$.data[3].f = 1.0f;
}
| '{' signedFloatConstant ',' signedFloatConstant ','
signedFloatConstant ',' signedFloatConstant '}'
{
$$.count = 4;
- $$.data[0] = $2;
- $$.data[1] = $4;
- $$.data[2] = $6;
- $$.data[3] = $8;
+ $$.data[0].f = $2;
+ $$.data[1].f = $4;
+ $$.data[2].f = $6;
+ $$.data[3].f = $8;
}
;
diff --git a/src/mesa/program/program_parser.h b/src/mesa/program/program_parser.h
index 8e5aaee95e5..5637598f3b3 100644
--- a/src/mesa/program/program_parser.h
+++ b/src/mesa/program/program_parser.h
@@ -23,6 +23,7 @@
#pragma once
#include "main/config.h"
+#include "program/prog_parameter.h"
struct gl_context;
@@ -96,7 +97,7 @@ struct asm_symbol {
struct asm_vector {
unsigned count;
- float data[4];
+ gl_constant_value data[4];
};
diff --git a/src/mesa/program/register_allocate.c b/src/mesa/program/register_allocate.c
index de96eb42c9b..f5b5174fc18 100644
--- a/src/mesa/program/register_allocate.c
+++ b/src/mesa/program/register_allocate.c
@@ -200,6 +200,27 @@ ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2)
}
}
+/**
+ * Adds a conflict between base_reg and reg, and also between reg and
+ * anything that base_reg conflicts with.
+ *
+ * This can simplify code for setting up multiple register classes
+ * which are aggregates of some base hardware registers, compared to
+ * explicitly using ra_add_reg_conflict.
+ */
+void
+ra_add_transitive_reg_conflict(struct ra_regs *regs,
+ unsigned int base_reg, unsigned int reg)
+{
+ int i;
+
+ ra_add_reg_conflict(regs, reg, base_reg);
+
+ for (i = 0; i < regs->regs[base_reg].num_conflicts; i++) {
+ ra_add_reg_conflict(regs, reg, regs->regs[base_reg].conflict_list[i]);
+ }
+}
+
unsigned int
ra_alloc_reg_class(struct ra_regs *regs)
{
diff --git a/src/mesa/program/register_allocate.h b/src/mesa/program/register_allocate.h
index 5b95833f394..ee2e58a4756 100644
--- a/src/mesa/program/register_allocate.h
+++ b/src/mesa/program/register_allocate.h
@@ -40,6 +40,8 @@ struct ra_regs *ra_alloc_reg_set(unsigned int count);
unsigned int ra_alloc_reg_class(struct ra_regs *regs);
void ra_add_reg_conflict(struct ra_regs *regs,
unsigned int r1, unsigned int r2);
+void ra_add_transitive_reg_conflict(struct ra_regs *regs,
+ unsigned int base_reg, unsigned int reg);
void ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int reg);
void ra_set_finalize(struct ra_regs *regs);
/** @} */
diff --git a/src/mesa/program/sampler.cpp b/src/mesa/program/sampler.cpp
index 1457d1199fa..e8d34c670a9 100644
--- a/src/mesa/program/sampler.cpp
+++ b/src/mesa/program/sampler.cpp
@@ -132,6 +132,6 @@ _mesa_get_sampler_uniform_value(class ir_dereference *sampler,
index += getname.offset;
- return prog->Parameters->ParameterValues[index][0];
+ return prog->Parameters->ParameterValues[index][0].f;
}
}
diff --git a/src/mesa/sources.mak b/src/mesa/sources.mak
index 4b2ec08bbb0..5e77e0f5919 100644
--- a/src/mesa/sources.mak
+++ b/src/mesa/sources.mak
@@ -251,6 +251,7 @@ PROGRAM_SOURCES = \
program/prog_instruction.c \
program/prog_noise.c \
program/prog_optimize.c \
+ program/prog_opt_constant_fold.c \
program/prog_parameter.c \
program/prog_parameter_layout.c \
program/prog_print.c \
@@ -336,7 +337,8 @@ MESA_GALLIUM_SOURCES = \
MESA_GALLIUM_CXX_SOURCES = \
$(MAIN_CXX_SOURCES) \
- $(SHADER_CXX_SOURCES)
+ $(SHADER_CXX_SOURCES) \
+ state_tracker/st_glsl_to_tgsi.cpp
# All the core C sources, for dependency checking
ALL_SOURCES = \
diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c
index 1f833d28212..12b5bc5ba79 100644
--- a/src/mesa/state_tracker/st_atom_pixeltransfer.c
+++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c
@@ -84,26 +84,6 @@ make_state_key(struct gl_context *ctx, struct state_key *key)
}
-static struct pipe_resource *
-create_color_map_texture(struct gl_context *ctx)
-{
- struct st_context *st = st_context(ctx);
- struct pipe_context *pipe = st->pipe;
- struct pipe_resource *pt;
- enum pipe_format format;
- const uint texSize = 256; /* simple, and usually perfect */
-
- /* find an RGBA texture format */
- format = st_choose_format(pipe->screen, GL_RGBA, GL_NONE, GL_NONE,
- PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW);
-
- /* create texture for color map/table */
- pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0,
- texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW);
- return pt;
-}
-
-
/**
* Update the pixelmap texture with the contents of the R/G/B/A pixel maps.
*/
@@ -219,7 +199,7 @@ get_pixel_transfer_program(struct gl_context *ctx, const struct state_key *key)
/* create the colormap/texture now if not already done */
if (!st->pixel_xfer.pixelmap_texture) {
- st->pixel_xfer.pixelmap_texture = create_color_map_texture(ctx);
+ st->pixel_xfer.pixelmap_texture = st_create_color_map_texture(ctx);
st->pixel_xfer.pixelmap_sampler_view =
st_create_texture_sampler_view(st->pipe,
st->pixel_xfer.pixelmap_texture);
diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c
index 800a9f1f0e0..3115a2511ce 100644
--- a/src/mesa/state_tracker/st_atom_texture.c
+++ b/src/mesa/state_tracker/st_atom_texture.c
@@ -221,9 +221,9 @@ update_single_texture(struct st_context *st, struct pipe_sampler_view **sampler_
if ((samp->sRGBDecode == GL_SKIP_DECODE_EXT) &&
(_mesa_get_format_color_encoding(texFormat) == GL_SRGB)) {
- /* don't do sRGB->RGB conversion. Interpret the texture
- * texture data as linear values.
- */
+ /* Don't do sRGB->RGB conversion. Interpret the texture data as
+ * linear values.
+ */
const gl_format linearFormat =
_mesa_get_srgb_format_linear(texFormat);
firstImageFormat = st_mesa_format_to_pipe_format(linearFormat);
diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
index 49b196032b9..beb5e7cab31 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -172,6 +172,23 @@ make_bitmap_fragment_program(struct gl_context *ctx, GLuint samplerIndex)
}
+static struct gl_program *
+make_bitmap_fragment_program_glsl(struct st_context *st,
+ struct st_fragment_program *orig,
+ GLuint samplerIndex)
+{
+ struct gl_context *ctx = st->ctx;
+ struct st_fragment_program *fp = (struct st_fragment_program *)
+ ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
+
+ if (!fp)
+ return NULL;
+
+ get_bitmap_visitor(fp, orig->glsl_to_tgsi, samplerIndex);
+ return &fp->Base.Base;
+}
+
+
static int
find_free_bit(uint bitfield)
{
@@ -199,6 +216,7 @@ st_make_bitmap_fragment_program(struct st_context *st,
GLuint *bitmap_sampler)
{
struct st_fragment_program *bitmap_prog;
+ struct st_fragment_program *stfpIn = (struct st_fragment_program *) fpIn;
struct gl_program *newProg;
uint sampler;
@@ -207,13 +225,18 @@ st_make_bitmap_fragment_program(struct st_context *st,
* with the bitmap sampler/kill instructions.
*/
sampler = find_free_bit(fpIn->Base.SamplersUsed);
- bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler);
+
+ if (stfpIn->glsl_to_tgsi)
+ newProg = make_bitmap_fragment_program_glsl(st, stfpIn, sampler);
+ else {
+ bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler);
- newProg = _mesa_combine_programs(st->ctx,
- &bitmap_prog->Base.Base,
- &fpIn->Base);
- /* done with this after combining */
- st_reference_fragprog(st, &bitmap_prog, NULL);
+ newProg = _mesa_combine_programs(st->ctx,
+ &bitmap_prog->Base.Base,
+ &fpIn->Base);
+ /* done with this after combining */
+ st_reference_fragprog(st, &bitmap_prog, NULL);
+ }
#if 0
{
@@ -328,8 +351,8 @@ setup_bitmap_vertex_data(struct st_context *st, bool normalized,
if(!normalized)
{
- sRight = width;
- tBot = height;
+ sRight = (GLfloat) width;
+ tBot = (GLfloat) height;
}
/* XXX: Need to improve buffer_write to allow NO_WAIT (as well as
@@ -381,7 +404,7 @@ setup_bitmap_vertex_data(struct st_context *st, bool normalized,
/* same for all verts: */
for (i = 0; i < 4; i++) {
st->bitmap.vertices[i][0][2] = z;
- st->bitmap.vertices[i][0][3] = 1.0;
+ st->bitmap.vertices[i][0][3] = 1.0f;
st->bitmap.vertices[i][1][0] = color[0];
st->bitmap.vertices[i][1][1] = color[1];
st->bitmap.vertices[i][1][2] = color[2];
@@ -513,7 +536,7 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
cso_set_vertex_elements(cso, 3, st->velems_util_draw);
/* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
- z = z * 2.0 - 1.0;
+ z = z * 2.0f - 1.0f;
/* draw textured quad */
offset = setup_bitmap_vertex_data(st,
diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c
index 416be194d11..750f541b5dd 100644
--- a/src/mesa/state_tracker/st_cb_blit.c
+++ b/src/mesa/state_tracker/st_cb_blit.c
@@ -62,6 +62,84 @@ st_destroy_blit(struct st_context *st)
#if FEATURE_EXT_framebuffer_blit
static void
+st_BlitFramebuffer_resolve(struct gl_context *ctx,
+ GLbitfield mask,
+ struct pipe_resolve_info *info)
+{
+ const GLbitfield depthStencil = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
+
+ struct st_context *st = st_context(ctx);
+
+ struct st_renderbuffer *srcRb, *dstRb;
+
+ if (mask & GL_COLOR_BUFFER_BIT) {
+ srcRb = st_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer);
+ dstRb = st_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]);
+
+ info->mask = PIPE_MASK_RGBA;
+
+ info->src.res = srcRb->texture;
+ info->src.layer = srcRb->surface->u.tex.first_layer;
+ info->dst.res = dstRb->texture;
+ info->dst.level = dstRb->surface->u.tex.level;
+ info->dst.layer = dstRb->surface->u.tex.first_layer;
+
+ st->pipe->resource_resolve(st->pipe, info);
+ }
+
+ if (mask & depthStencil) {
+ struct gl_renderbuffer_attachment *srcDepth, *srcStencil;
+ struct gl_renderbuffer_attachment *dstDepth, *dstStencil;
+ boolean combined;
+
+ srcDepth = &ctx->ReadBuffer->Attachment[BUFFER_DEPTH];
+ dstDepth = &ctx->DrawBuffer->Attachment[BUFFER_DEPTH];
+ srcStencil = &ctx->ReadBuffer->Attachment[BUFFER_STENCIL];
+ dstStencil = &ctx->DrawBuffer->Attachment[BUFFER_STENCIL];
+
+ combined =
+ st_is_depth_stencil_combined(srcDepth, srcStencil) &&
+ st_is_depth_stencil_combined(dstDepth, dstStencil);
+
+ if ((mask & GL_DEPTH_BUFFER_BIT) || combined) {
+ /* resolve depth and, if combined and requested, stencil as well */
+ srcRb = st_renderbuffer(srcDepth->Renderbuffer);
+ dstRb = st_renderbuffer(dstDepth->Renderbuffer);
+
+ info->mask = (mask & GL_DEPTH_BUFFER_BIT) ? PIPE_MASK_Z : 0;
+ if (combined && (mask & GL_STENCIL_BUFFER_BIT)) {
+ mask &= ~GL_STENCIL_BUFFER_BIT;
+ info->mask |= PIPE_MASK_S;
+ }
+
+ info->src.res = srcRb->texture;
+ info->src.layer = srcRb->surface->u.tex.first_layer;
+ info->dst.res = dstRb->texture;
+ info->dst.level = dstRb->surface->u.tex.level;
+ info->dst.layer = dstRb->surface->u.tex.first_layer;
+
+ st->pipe->resource_resolve(st->pipe, info);
+ }
+
+ if (mask & GL_STENCIL_BUFFER_BIT) {
+ /* resolve separate stencil buffer */
+ srcRb = st_renderbuffer(srcStencil->Renderbuffer);
+ dstRb = st_renderbuffer(dstStencil->Renderbuffer);
+
+ info->mask = PIPE_MASK_S;
+
+ info->src.res = srcRb->texture;
+ info->src.layer = srcRb->surface->u.tex.first_layer;
+ info->dst.res = dstRb->texture;
+ info->dst.level = dstRb->surface->u.tex.level;
+ info->dst.layer = dstRb->surface->u.tex.first_layer;
+
+ st->pipe->resource_resolve(st->pipe, info);
+ }
+ }
+}
+
+static void
st_BlitFramebuffer(struct gl_context *ctx,
GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
@@ -95,6 +173,42 @@ st_BlitFramebuffer(struct gl_context *ctx,
srcY1 = readFB->Height - srcY1;
}
+ /* Disable conditional rendering. */
+ if (st->render_condition) {
+ st->pipe->render_condition(st->pipe, NULL, 0);
+ }
+
+ if (readFB->Visual.sampleBuffers > drawFB->Visual.sampleBuffers) {
+ struct pipe_resolve_info info;
+
+ if (dstX0 < dstX1) {
+ info.dst.x0 = dstX0;
+ info.dst.x1 = dstX1;
+ info.src.x0 = srcX0;
+ info.src.x1 = srcX1;
+ } else {
+ info.dst.x0 = dstX1;
+ info.dst.x1 = dstX0;
+ info.src.x0 = srcX1;
+ info.src.x1 = srcX0;
+ }
+ if (dstY0 < dstY1) {
+ info.dst.y0 = dstY0;
+ info.dst.y1 = dstY1;
+ info.src.y0 = srcY0;
+ info.src.y1 = srcY1;
+ } else {
+ info.dst.y0 = dstY1;
+ info.dst.y1 = dstY0;
+ info.src.y0 = srcY1;
+ info.src.y1 = srcY0;
+ }
+
+ st_BlitFramebuffer_resolve(ctx, mask, &info); /* filter doesn't apply */
+
+ goto done;
+ }
+
if (srcY0 > srcY1 && dstY0 > dstY1) {
/* Both src and dst are upside down. Swap Y to make it
* right-side up to increase odds of using a fast path.
@@ -109,11 +223,6 @@ st_BlitFramebuffer(struct gl_context *ctx,
dstY1 = tmp;
}
- /* Disable conditional rendering. */
- if (st->render_condition) {
- st->pipe->render_condition(st->pipe, NULL, 0);
- }
-
if (mask & GL_COLOR_BUFFER_BIT) {
struct gl_renderbuffer_attachment *srcAtt =
&readFB->Attachment[readFB->_ColorReadBufferIndex];
diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c
index 7374bb0acc5..a451b44049e 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -93,7 +93,6 @@ st_bufferobj_free(struct gl_context *ctx, struct gl_buffer_object *obj)
*/
static void
st_bufferobj_subdata(struct gl_context *ctx,
- GLenum target,
GLintptrARB offset,
GLsizeiptrARB size,
const GLvoid * data, struct gl_buffer_object *obj)
@@ -133,7 +132,6 @@ st_bufferobj_subdata(struct gl_context *ctx,
*/
static void
st_bufferobj_get_subdata(struct gl_context *ctx,
- GLenum target,
GLintptrARB offset,
GLsizeiptrARB size,
GLvoid * data, struct gl_buffer_object *obj)
@@ -238,52 +236,10 @@ static long st_bufferobj_zero_length = 0;
/**
- * Called via glMapBufferARB().
- */
-static void *
-st_bufferobj_map(struct gl_context *ctx, GLenum target, GLenum access,
- struct gl_buffer_object *obj)
-{
- struct st_buffer_object *st_obj = st_buffer_object(obj);
- uint flags;
-
- switch (access) {
- case GL_WRITE_ONLY:
- flags = PIPE_TRANSFER_WRITE;
- break;
- case GL_READ_ONLY:
- flags = PIPE_TRANSFER_READ;
- break;
- case GL_READ_WRITE:
- default:
- flags = PIPE_TRANSFER_READ_WRITE;
- break;
- }
-
- /* Handle zero-size buffers here rather than in drivers */
- if (obj->Size == 0) {
- obj->Pointer = &st_bufferobj_zero_length;
- }
- else {
- obj->Pointer = pipe_buffer_map(st_context(ctx)->pipe,
- st_obj->buffer,
- flags,
- &st_obj->transfer);
- }
-
- if (obj->Pointer) {
- obj->Offset = 0;
- obj->Length = obj->Size;
- }
- return obj->Pointer;
-}
-
-
-/**
* Called via glMapBufferRange().
*/
static void *
-st_bufferobj_map_range(struct gl_context *ctx, GLenum target,
+st_bufferobj_map_range(struct gl_context *ctx,
GLintptr offset, GLsizeiptr length, GLbitfield access,
struct gl_buffer_object *obj)
{
@@ -353,7 +309,7 @@ st_bufferobj_map_range(struct gl_context *ctx, GLenum target,
static void
-st_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target,
+st_bufferobj_flush_mapped_range(struct gl_context *ctx,
GLintptr offset, GLsizeiptr length,
struct gl_buffer_object *obj)
{
@@ -378,7 +334,7 @@ st_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target,
* Called via glUnmapBufferARB().
*/
static GLboolean
-st_bufferobj_unmap(struct gl_context *ctx, GLenum target, struct gl_buffer_object *obj)
+st_bufferobj_unmap(struct gl_context *ctx, struct gl_buffer_object *obj)
{
struct pipe_context *pipe = st_context(ctx)->pipe;
struct st_buffer_object *st_obj = st_buffer_object(obj);
@@ -444,7 +400,6 @@ st_init_bufferobject_functions(struct dd_function_table *functions)
functions->BufferData = st_bufferobj_data;
functions->BufferSubData = st_bufferobj_subdata;
functions->GetBufferSubData = st_bufferobj_get_subdata;
- functions->MapBuffer = st_bufferobj_map;
functions->MapBufferRange = st_bufferobj_map_range;
functions->FlushMappedBufferRange = st_bufferobj_flush_mapped_range;
functions->UnmapBuffer = st_bufferobj_unmap;
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index 1d908c0317a..390c518699f 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -94,6 +94,46 @@ is_passthrough_program(const struct gl_fragment_program *prog)
}
+/**
+ * Returns a fragment program which implements the current pixel transfer ops.
+ */
+static struct gl_fragment_program *
+get_glsl_pixel_transfer_program(struct st_context *st,
+ struct st_fragment_program *orig)
+{
+ int pixelMaps = 0, scaleAndBias = 0;
+ struct gl_context *ctx = st->ctx;
+ struct st_fragment_program *fp = (struct st_fragment_program *)
+ ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
+
+ if (!fp)
+ return NULL;
+
+ if (ctx->Pixel.RedBias != 0.0 || ctx->Pixel.RedScale != 1.0 ||
+ ctx->Pixel.GreenBias != 0.0 || ctx->Pixel.GreenScale != 1.0 ||
+ ctx->Pixel.BlueBias != 0.0 || ctx->Pixel.BlueScale != 1.0 ||
+ ctx->Pixel.AlphaBias != 0.0 || ctx->Pixel.AlphaScale != 1.0) {
+ scaleAndBias = 1;
+ }
+
+ pixelMaps = ctx->Pixel.MapColorFlag;
+
+ if (pixelMaps) {
+ /* create the colormap/texture now if not already done */
+ if (!st->pixel_xfer.pixelmap_texture) {
+ st->pixel_xfer.pixelmap_texture = st_create_color_map_texture(ctx);
+ st->pixel_xfer.pixelmap_sampler_view =
+ st_create_texture_sampler_view(st->pipe,
+ st->pixel_xfer.pixelmap_texture);
+ }
+ }
+
+ get_pixel_transfer_visitor(fp, orig->glsl_to_tgsi,
+ scaleAndBias, pixelMaps);
+
+ return &fp->Base;
+}
+
/**
* Make fragment shader for glDraw/CopyPixels. This shader is made
@@ -107,11 +147,15 @@ st_make_drawpix_fragment_program(struct st_context *st,
struct gl_fragment_program **fpOut)
{
struct gl_program *newProg;
+ struct st_fragment_program *stfp = (struct st_fragment_program *) fpIn;
if (is_passthrough_program(fpIn)) {
newProg = (struct gl_program *) _mesa_clone_fragment_program(st->ctx,
&st->pixel_xfer.program->Base);
}
+ else if (stfp->glsl_to_tgsi != NULL) {
+ newProg = (struct gl_program *) get_glsl_pixel_transfer_program(st, stfp);
+ }
else {
#if 0
/* debug */
diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c
index 32694975d17..2abb4d8f082 100644
--- a/src/mesa/state_tracker/st_cb_program.c
+++ b/src/mesa/state_tracker/st_cb_program.c
@@ -44,6 +44,7 @@
#include "st_program.h"
#include "st_mesa_to_tgsi.h"
#include "st_cb_program.h"
+#include "st_glsl_to_tgsi.h"
@@ -129,6 +130,9 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog)
{
struct st_vertex_program *stvp = (struct st_vertex_program *) prog;
st_release_vp_variants( st, stvp );
+
+ if (stvp->glsl_to_tgsi)
+ free_glsl_to_tgsi_visitor(stvp->glsl_to_tgsi);
}
break;
case MESA_GEOMETRY_PROGRAM:
@@ -137,6 +141,9 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog)
(struct st_geometry_program *) prog;
st_release_gp_variants(st, stgp);
+
+ if (stgp->glsl_to_tgsi)
+ free_glsl_to_tgsi_visitor(stgp->glsl_to_tgsi);
if (stgp->tgsi.tokens) {
st_free_tokens((void *) stgp->tgsi.tokens);
@@ -151,6 +158,9 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog)
st_release_fp_variants(st, stfp);
+ if (stfp->glsl_to_tgsi)
+ free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
+
if (stfp->tgsi.tokens) {
st_free_tokens(stfp->tgsi.tokens);
stfp->tgsi.tokens = NULL;
@@ -242,4 +252,8 @@ st_init_program_functions(struct dd_function_table *functions)
functions->DeleteProgram = st_delete_program;
functions->IsProgramNative = st_is_program_native;
functions->ProgramStringNotify = st_program_string_notify;
+
+ functions->NewShader = st_new_shader;
+ functions->NewShaderProgram = st_new_shader_program;
+ functions->LinkShader = st_link_shader;
}
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index 6907cfc03cf..a3b2ba9e78d 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -344,7 +344,7 @@ guess_and_alloc_texture(struct st_context *st,
stImage->base.Width2,
stImage->base.Height2,
stImage->base.Depth2,
- stImage->level,
+ stImage->base.Level,
&width, &height, &depth)) {
/* we can't determine the image size at level=0 */
stObj->width0 = stObj->height0 = stObj->depth0 = 0;
@@ -367,7 +367,7 @@ guess_and_alloc_texture(struct st_context *st,
stImage->base._BaseFormat == GL_DEPTH_COMPONENT ||
stImage->base._BaseFormat == GL_DEPTH_STENCIL_EXT) &&
!stObj->base.GenerateMipmap &&
- stImage->level == 0) {
+ stImage->base.Level == 0) {
/* only alloc space for a single mipmap level */
lastLevel = 0;
}
@@ -506,8 +506,8 @@ st_TexImage(struct gl_context * ctx,
assert(texImage->Depth == depth);
}
- stImage->face = _mesa_tex_target_to_face(target);
- stImage->level = level;
+ stImage->base.Face = _mesa_tex_target_to_face(target);
+ stImage->base.Level = level;
_mesa_set_fetch_functions(texImage, dims);
@@ -529,7 +529,7 @@ st_TexImage(struct gl_context * ctx,
if (stObj->pt) {
if (level > (GLint) stObj->pt->last_level ||
!st_texture_match_image(stObj->pt, &stImage->base,
- stImage->face, stImage->level)) {
+ stImage->base.Face, stImage->base.Level)) {
DBG("release it\n");
pipe_resource_reference(&stObj->pt, NULL);
assert(!stObj->pt);
@@ -563,7 +563,7 @@ st_TexImage(struct gl_context * ctx,
*/
if (stObj->pt &&
st_texture_match_image(stObj->pt, &stImage->base,
- stImage->face, stImage->level)) {
+ stImage->base.Face, stImage->base.Level)) {
pipe_resource_reference(&stImage->pt, stObj->pt);
assert(stImage->pt);
@@ -1466,34 +1466,6 @@ st_copy_texsubimage(struct gl_context *ctx,
depth/stencil samples per pixel? Need some transfer clarifications. */
assert(sample_count < 2);
- if (srcX < 0) {
- width -= -srcX;
- destX += -srcX;
- srcX = 0;
- }
-
- if (srcY < 0) {
- height -= -srcY;
- destY += -srcY;
- srcY = 0;
- }
-
- if (destX < 0) {
- width -= -destX;
- srcX += -destX;
- destX = 0;
- }
-
- if (destY < 0) {
- height -= -destY;
- srcY += -destY;
- destY = 0;
- }
-
- if (width < 0 || height < 0)
- return;
-
-
assert(strb);
assert(strb->surface);
assert(stImage->pt);
@@ -1529,8 +1501,8 @@ st_copy_texsubimage(struct gl_context *ctx,
pipe->resource_copy_region(pipe,
/* dest */
stImage->pt,
- stImage->level,
- destX, destY, destZ + stImage->face,
+ stImage->base.Level,
+ destX, destY, destZ + stImage->base.Face,
/* src */
strb->texture,
strb->surface->u.tex.level,
@@ -1552,9 +1524,9 @@ st_copy_texsubimage(struct gl_context *ctx,
memset(&surf_tmpl, 0, sizeof(surf_tmpl));
surf_tmpl.format = util_format_linear(stImage->pt->format);
surf_tmpl.usage = PIPE_BIND_RENDER_TARGET;
- surf_tmpl.u.tex.level = stImage->level;
- surf_tmpl.u.tex.first_layer = stImage->face + destZ;
- surf_tmpl.u.tex.last_layer = stImage->face + destZ;
+ surf_tmpl.u.tex.level = stImage->base.Level;
+ surf_tmpl.u.tex.first_layer = stImage->base.Face + destZ;
+ surf_tmpl.u.tex.last_layer = stImage->base.Face + destZ;
dest_surface = pipe->create_surface(pipe, stImage->pt,
&surf_tmpl);
@@ -1610,59 +1582,6 @@ st_copy_texsubimage(struct gl_context *ctx,
static void
-st_CopyTexImage1D(struct gl_context * ctx, GLenum target, GLint level,
- GLenum internalFormat,
- GLint x, GLint y, GLsizei width, GLint border)
-{
- struct gl_texture_unit *texUnit =
- &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
- struct gl_texture_object *texObj =
- _mesa_select_tex_object(ctx, texUnit, target);
- struct gl_texture_image *texImage =
- _mesa_select_tex_image(ctx, texObj, target, level);
-
- /* Setup or redefine the texture object, texture and texture
- * image. Don't populate yet.
- */
- ctx->Driver.TexImage1D(ctx, target, level, internalFormat,
- width, border,
- GL_RGBA, CHAN_TYPE, NULL,
- &ctx->DefaultPacking, texObj, texImage);
-
- st_copy_texsubimage(ctx, target, level,
- 0, 0, 0, /* destX,Y,Z */
- x, y, width, 1); /* src X, Y, size */
-}
-
-
-static void
-st_CopyTexImage2D(struct gl_context * ctx, GLenum target, GLint level,
- GLenum internalFormat,
- GLint x, GLint y, GLsizei width, GLsizei height,
- GLint border)
-{
- struct gl_texture_unit *texUnit =
- &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
- struct gl_texture_object *texObj =
- _mesa_select_tex_object(ctx, texUnit, target);
- struct gl_texture_image *texImage =
- _mesa_select_tex_image(ctx, texObj, target, level);
-
- /* Setup or redefine the texture object, texture and texture
- * image. Don't populate yet.
- */
- ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
- width, height, border,
- GL_RGBA, CHAN_TYPE, NULL,
- &ctx->DefaultPacking, texObj, texImage);
-
- st_copy_texsubimage(ctx, target, level,
- 0, 0, 0, /* destX,Y,Z */
- x, y, width, height); /* src X, Y, size */
-}
-
-
-static void
st_CopyTexSubImage1D(struct gl_context * ctx, GLenum target, GLint level,
GLint xoffset, GLint x, GLint y, GLsizei width)
{
@@ -1710,7 +1629,7 @@ copy_image_data_to_texture(struct st_context *st,
/* debug checks */
{
const struct gl_texture_image *dstImage =
- stObj->base.Image[stImage->face][dstLevel];
+ stObj->base.Image[stImage->base.Face][dstLevel];
assert(dstImage);
assert(dstImage->Width == stImage->base.Width);
assert(dstImage->Height == stImage->base.Height);
@@ -1722,15 +1641,15 @@ copy_image_data_to_texture(struct st_context *st,
*/
st_texture_image_copy(st->pipe,
stObj->pt, dstLevel, /* dest texture, level */
- stImage->pt, stImage->level, /* src texture, level */
- stImage->face);
+ stImage->pt, stImage->base.Level, /* src texture, level */
+ stImage->base.Face);
pipe_resource_reference(&stImage->pt, NULL);
}
else if (stImage->base.Data) {
st_texture_image_data(st,
stObj->pt,
- stImage->face,
+ stImage->base.Face,
dstLevel,
stImage->base.Data,
stImage->base.RowStride *
@@ -1947,8 +1866,6 @@ st_init_texture_functions(struct dd_function_table *functions)
functions->CompressedTexSubImage1D = st_CompressedTexSubImage1D;
functions->CompressedTexSubImage2D = st_CompressedTexSubImage2D;
functions->CompressedTexSubImage3D = st_CompressedTexSubImage3D;
- functions->CopyTexImage1D = st_CopyTexImage1D;
- functions->CopyTexImage2D = st_CopyTexImage2D;
functions->CopyTexSubImage1D = st_CopyTexSubImage1D;
functions->CopyTexSubImage2D = st_CopyTexSubImage2D;
functions->CopyTexSubImage3D = st_CopyTexSubImage3D;
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index 99b231d9706..8e900934054 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -208,6 +208,15 @@ void st_init_limits(struct st_context *st)
}
+static GLboolean st_get_s3tc_override(void)
+{
+ const char *override = _mesa_getenv("force_s3tc_enable");
+ if (override && !strcmp(override, "true"))
+ return GL_TRUE;
+ return GL_FALSE;
+}
+
+
/**
* Use pipe_screen::get_param() to query PIPE_CAP_ values to determine
* which GL extensions are supported.
@@ -219,6 +228,7 @@ void st_init_extensions(struct st_context *st)
{
struct pipe_screen *screen = st->pipe->screen;
struct gl_context *ctx = st->ctx;
+ int i;
/*
* Extensions that are supported by all Gallium drivers:
@@ -426,7 +436,7 @@ void st_init_extensions(struct st_context *st)
if (screen->is_format_supported(screen, PIPE_FORMAT_DXT5_RGBA,
PIPE_TEXTURE_2D, 0,
PIPE_BIND_SAMPLER_VIEW) &&
- ctx->Mesa_DXTn) {
+ (ctx->Mesa_DXTn || st_get_s3tc_override())) {
ctx->Extensions.EXT_texture_compression_s3tc = GL_TRUE;
ctx->Extensions.S3_s3tc = GL_TRUE;
}
@@ -596,6 +606,16 @@ void st_init_extensions(struct st_context *st)
ctx->Extensions.EXT_packed_float = GL_TRUE;
}
+ /* Maximum sample count. */
+ for (i = 16; i > 0; --i) {
+ if (screen->is_format_supported(screen, PIPE_FORMAT_B8G8R8A8_UNORM,
+ PIPE_TEXTURE_2D, i,
+ PIPE_BIND_RENDER_TARGET)) {
+ ctx->Const.MaxSamples = i;
+ break;
+ }
+ }
+
if (screen->get_param(screen, PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE)) {
ctx->Extensions.ARB_seamless_cube_map = GL_TRUE;
ctx->Extensions.AMD_seamless_cubemap_per_texture = GL_TRUE;
diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c
index b0911294a7c..82ca4af7fe4 100644
--- a/src/mesa/state_tracker/st_gen_mipmap.c
+++ b/src/mesa/state_tracker/st_gen_mipmap.c
@@ -453,7 +453,6 @@ st_generate_mipmap(struct gl_context *ctx, GLenum target,
srcImage->TexFormat);
stImage = st_texture_image(dstImage);
- stImage->level = dstLevel;
pipe_resource_reference(&stImage->pt, pt);
}
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
new file mode 100644
index 00000000000..9cac30995af
--- /dev/null
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -0,0 +1,5142 @@
+/*
+ * Copyright (C) 2005-2007 Brian Paul All Rights Reserved.
+ * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
+ * Copyright © 2010 Intel Corporation
+ * Copyright © 2011 Bryan Cain
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file glsl_to_tgsi.cpp
+ *
+ * Translate GLSL IR to TGSI.
+ */
+
+#include <stdio.h>
+#include "main/compiler.h"
+#include "ir.h"
+#include "ir_visitor.h"
+#include "ir_print_visitor.h"
+#include "ir_expression_flattening.h"
+#include "glsl_types.h"
+#include "glsl_parser_extras.h"
+#include "../glsl/program.h"
+#include "ir_optimization.h"
+#include "ast.h"
+
+extern "C" {
+#include "main/mtypes.h"
+#include "main/shaderapi.h"
+#include "main/shaderobj.h"
+#include "main/uniforms.h"
+#include "program/hash_table.h"
+#include "program/prog_instruction.h"
+#include "program/prog_optimize.h"
+#include "program/prog_print.h"
+#include "program/program.h"
+#include "program/prog_uniform.h"
+#include "program/prog_parameter.h"
+#include "program/sampler.h"
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_context.h"
+#include "pipe/p_screen.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_state.h"
+#include "util/u_math.h"
+#include "tgsi/tgsi_ureg.h"
+#include "tgsi/tgsi_info.h"
+#include "st_context.h"
+#include "st_program.h"
+#include "st_glsl_to_tgsi.h"
+#include "st_mesa_to_tgsi.h"
+}
+
+#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX
+#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \
+ (1 << PROGRAM_ENV_PARAM) | \
+ (1 << PROGRAM_STATE_VAR) | \
+ (1 << PROGRAM_NAMED_PARAM) | \
+ (1 << PROGRAM_CONSTANT) | \
+ (1 << PROGRAM_UNIFORM))
+
+#define MAX_TEMPS 4096
+
+class st_src_reg;
+class st_dst_reg;
+
+static int swizzle_for_size(int size);
+
+/**
+ * This struct is a corresponding struct to TGSI ureg_src.
+ */
+class st_src_reg {
+public:
+ st_src_reg(gl_register_file file, int index, const glsl_type *type)
+ {
+ this->file = file;
+ this->index = index;
+ if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
+ this->swizzle = swizzle_for_size(type->vector_elements);
+ else
+ this->swizzle = SWIZZLE_XYZW;
+ this->negate = 0;
+ this->type = type ? type->base_type : GLSL_TYPE_ERROR;
+ this->reladdr = NULL;
+ }
+
+ st_src_reg(gl_register_file file, int index, int type)
+ {
+ this->type = type;
+ this->file = file;
+ this->index = index;
+ this->swizzle = SWIZZLE_XYZW;
+ this->negate = 0;
+ this->reladdr = NULL;
+ }
+
+ st_src_reg()
+ {
+ this->type = GLSL_TYPE_ERROR;
+ this->file = PROGRAM_UNDEFINED;
+ this->index = 0;
+ this->swizzle = 0;
+ this->negate = 0;
+ this->reladdr = NULL;
+ }
+
+ explicit st_src_reg(st_dst_reg reg);
+
+ gl_register_file file; /**< PROGRAM_* from Mesa */
+ int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
+ GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
+ int negate; /**< NEGATE_XYZW mask from mesa */
+ int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
+ /** Register index should be offset by the integer in this reg. */
+ st_src_reg *reladdr;
+};
+
+class st_dst_reg {
+public:
+ st_dst_reg(gl_register_file file, int writemask, int type)
+ {
+ this->file = file;
+ this->index = 0;
+ this->writemask = writemask;
+ this->cond_mask = COND_TR;
+ this->reladdr = NULL;
+ this->type = type;
+ }
+
+ st_dst_reg()
+ {
+ this->type = GLSL_TYPE_ERROR;
+ this->file = PROGRAM_UNDEFINED;
+ this->index = 0;
+ this->writemask = 0;
+ this->cond_mask = COND_TR;
+ this->reladdr = NULL;
+ }
+
+ explicit st_dst_reg(st_src_reg reg);
+
+ gl_register_file file; /**< PROGRAM_* from Mesa */
+ int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
+ int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
+ GLuint cond_mask:4;
+ int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
+ /** Register index should be offset by the integer in this reg. */
+ st_src_reg *reladdr;
+};
+
+st_src_reg::st_src_reg(st_dst_reg reg)
+{
+ this->type = reg.type;
+ this->file = reg.file;
+ this->index = reg.index;
+ this->swizzle = SWIZZLE_XYZW;
+ this->negate = 0;
+ this->reladdr = reg.reladdr;
+}
+
+st_dst_reg::st_dst_reg(st_src_reg reg)
+{
+ this->type = reg.type;
+ this->file = reg.file;
+ this->index = reg.index;
+ this->writemask = WRITEMASK_XYZW;
+ this->cond_mask = COND_TR;
+ this->reladdr = reg.reladdr;
+}
+
+class glsl_to_tgsi_instruction : public exec_node {
+public:
+ /* Callers of this ralloc-based new need not call delete. It's
+ * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+ static void* operator new(size_t size, void *ctx)
+ {
+ void *node;
+
+ node = rzalloc_size(ctx, size);
+ assert(node != NULL);
+
+ return node;
+ }
+
+ unsigned op;
+ st_dst_reg dst;
+ st_src_reg src[3];
+ /** Pointer to the ir source this tree came from for debugging */
+ ir_instruction *ir;
+ GLboolean cond_update;
+ bool saturate;
+ int sampler; /**< sampler index */
+ int tex_target; /**< One of TEXTURE_*_INDEX */
+ GLboolean tex_shadow;
+ int dead_mask; /**< Used in dead code elimination */
+
+ class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
+};
+
+class variable_storage : public exec_node {
+public:
+ variable_storage(ir_variable *var, gl_register_file file, int index)
+ : file(file), index(index), var(var)
+ {
+ /* empty */
+ }
+
+ gl_register_file file;
+ int index;
+ ir_variable *var; /* variable that maps to this, if any */
+};
+
+class immediate_storage : public exec_node {
+public:
+ immediate_storage(gl_constant_value *values, int size, int type)
+ {
+ memcpy(this->values, values, size * sizeof(gl_constant_value));
+ this->size = size;
+ this->type = type;
+ }
+
+ gl_constant_value values[4];
+ int size; /**< Number of components (1-4) */
+ int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
+};
+
+class function_entry : public exec_node {
+public:
+ ir_function_signature *sig;
+
+ /**
+ * identifier of this function signature used by the program.
+ *
+ * At the point that TGSI instructions for function calls are
+ * generated, we don't know the address of the first instruction of
+ * the function body. So we make the BranchTarget that is called a
+ * small integer and rewrite them during set_branchtargets().
+ */
+ int sig_id;
+
+ /**
+ * Pointer to first instruction of the function body.
+ *
+ * Set during function body emits after main() is processed.
+ */
+ glsl_to_tgsi_instruction *bgn_inst;
+
+ /**
+ * Index of the first instruction of the function body in actual TGSI.
+ *
+ * Set after conversion from glsl_to_tgsi_instruction to TGSI.
+ */
+ int inst;
+
+ /** Storage for the return value. */
+ st_src_reg return_reg;
+};
+
+class glsl_to_tgsi_visitor : public ir_visitor {
+public:
+ glsl_to_tgsi_visitor();
+ ~glsl_to_tgsi_visitor();
+
+ function_entry *current_function;
+
+ struct gl_context *ctx;
+ struct gl_program *prog;
+ struct gl_shader_program *shader_program;
+ struct gl_shader_compiler_options *options;
+
+ int next_temp;
+
+ int num_address_regs;
+ int samplers_used;
+ bool indirect_addr_temps;
+ bool indirect_addr_consts;
+
+ int glsl_version;
+ bool native_integers;
+
+ variable_storage *find_variable_storage(ir_variable *var);
+
+ int add_constant(gl_register_file file, gl_constant_value values[4],
+ int size, int datatype, GLuint *swizzle_out);
+
+ function_entry *get_function_signature(ir_function_signature *sig);
+
+ st_src_reg get_temp(const glsl_type *type);
+ void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
+
+ st_src_reg st_src_reg_for_float(float val);
+ st_src_reg st_src_reg_for_int(int val);
+ st_src_reg st_src_reg_for_type(int type, int val);
+
+ /**
+ * \name Visit methods
+ *
+ * As typical for the visitor pattern, there must be one \c visit method for
+ * each concrete subclass of \c ir_instruction. Virtual base classes within
+ * the hierarchy should not have \c visit methods.
+ */
+ /*@{*/
+ virtual void visit(ir_variable *);
+ virtual void visit(ir_loop *);
+ virtual void visit(ir_loop_jump *);
+ virtual void visit(ir_function_signature *);
+ virtual void visit(ir_function *);
+ virtual void visit(ir_expression *);
+ virtual void visit(ir_swizzle *);
+ virtual void visit(ir_dereference_variable *);
+ virtual void visit(ir_dereference_array *);
+ virtual void visit(ir_dereference_record *);
+ virtual void visit(ir_assignment *);
+ virtual void visit(ir_constant *);
+ virtual void visit(ir_call *);
+ virtual void visit(ir_return *);
+ virtual void visit(ir_discard *);
+ virtual void visit(ir_texture *);
+ virtual void visit(ir_if *);
+ /*@}*/
+
+ st_src_reg result;
+
+ /** List of variable_storage */
+ exec_list variables;
+
+ /** List of immediate_storage */
+ exec_list immediates;
+ int num_immediates;
+
+ /** List of function_entry */
+ exec_list function_signatures;
+ int next_signature_id;
+
+ /** List of glsl_to_tgsi_instruction */
+ exec_list instructions;
+
+ glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op);
+
+ glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
+ st_dst_reg dst, st_src_reg src0);
+
+ glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
+ st_dst_reg dst, st_src_reg src0, st_src_reg src1);
+
+ glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
+ st_dst_reg dst,
+ st_src_reg src0, st_src_reg src1, st_src_reg src2);
+
+ unsigned get_opcode(ir_instruction *ir, unsigned op,
+ st_dst_reg dst,
+ st_src_reg src0, st_src_reg src1);
+
+ /**
+ * Emit the correct dot-product instruction for the type of arguments
+ */
+ glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir,
+ st_dst_reg dst,
+ st_src_reg src0,
+ st_src_reg src1,
+ unsigned elements);
+
+ void emit_scalar(ir_instruction *ir, unsigned op,
+ st_dst_reg dst, st_src_reg src0);
+
+ void emit_scalar(ir_instruction *ir, unsigned op,
+ st_dst_reg dst, st_src_reg src0, st_src_reg src1);
+
+ void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
+
+ void emit_scs(ir_instruction *ir, unsigned op,
+ st_dst_reg dst, const st_src_reg &src);
+
+ bool try_emit_mad(ir_expression *ir,
+ int mul_operand);
+ bool try_emit_mad_for_and_not(ir_expression *ir,
+ int mul_operand);
+ bool try_emit_sat(ir_expression *ir);
+
+ void emit_swz(ir_expression *ir);
+
+ bool process_move_condition(ir_rvalue *ir);
+
+ void remove_output_reads(gl_register_file type);
+ void simplify_cmp(void);
+
+ void rename_temp_register(int index, int new_index);
+ int get_first_temp_read(int index);
+ int get_first_temp_write(int index);
+ int get_last_temp_read(int index);
+ int get_last_temp_write(int index);
+
+ void copy_propagate(void);
+ void eliminate_dead_code(void);
+ int eliminate_dead_code_advanced(void);
+ void merge_registers(void);
+ void renumber_registers(void);
+
+ void *mem_ctx;
+};
+
+static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
+
+static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
+
+static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT);
+
+static void
+fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
+
+static void
+fail_link(struct gl_shader_program *prog, const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
+ va_end(args);
+
+ prog->LinkStatus = GL_FALSE;
+}
+
+static int
+swizzle_for_size(int size)
+{
+ int size_swizzles[4] = {
+ MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
+ MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
+ MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
+ };
+
+ assert((size >= 1) && (size <= 4));
+ return size_swizzles[size - 1];
+}
+
+static bool
+is_tex_instruction(unsigned opcode)
+{
+ const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
+ return info->is_tex;
+}
+
+static unsigned
+num_inst_dst_regs(unsigned opcode)
+{
+ const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
+ return info->num_dst;
+}
+
+static unsigned
+num_inst_src_regs(unsigned opcode)
+{
+ const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
+ return info->is_tex ? info->num_src - 1 : info->num_src;
+}
+
+glsl_to_tgsi_instruction *
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
+ st_dst_reg dst,
+ st_src_reg src0, st_src_reg src1, st_src_reg src2)
+{
+ glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
+ int num_reladdr = 0, i;
+
+ op = get_opcode(ir, op, dst, src0, src1);
+
+ /* If we have to do relative addressing, we want to load the ARL
+ * reg directly for one of the regs, and preload the other reladdr
+ * sources into temps.
+ */
+ num_reladdr += dst.reladdr != NULL;
+ num_reladdr += src0.reladdr != NULL;
+ num_reladdr += src1.reladdr != NULL;
+ num_reladdr += src2.reladdr != NULL;
+
+ reladdr_to_temp(ir, &src2, &num_reladdr);
+ reladdr_to_temp(ir, &src1, &num_reladdr);
+ reladdr_to_temp(ir, &src0, &num_reladdr);
+
+ if (dst.reladdr) {
+ emit_arl(ir, address_reg, *dst.reladdr);
+ num_reladdr--;
+ }
+ assert(num_reladdr == 0);
+
+ inst->op = op;
+ inst->dst = dst;
+ inst->src[0] = src0;
+ inst->src[1] = src1;
+ inst->src[2] = src2;
+ inst->ir = ir;
+ inst->dead_mask = 0;
+
+ inst->function = NULL;
+
+ if (op == TGSI_OPCODE_ARL)
+ this->num_address_regs = 1;
+
+ /* Update indirect addressing status used by TGSI */
+ if (dst.reladdr) {
+ switch(dst.file) {
+ case PROGRAM_TEMPORARY:
+ this->indirect_addr_temps = true;
+ break;
+ case PROGRAM_LOCAL_PARAM:
+ case PROGRAM_ENV_PARAM:
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_NAMED_PARAM:
+ case PROGRAM_CONSTANT:
+ case PROGRAM_UNIFORM:
+ this->indirect_addr_consts = true;
+ break;
+ case PROGRAM_IMMEDIATE:
+ assert(!"immediates should not have indirect addressing");
+ break;
+ default:
+ break;
+ }
+ }
+ else {
+ for (i=0; i<3; i++) {
+ if(inst->src[i].reladdr) {
+ switch(inst->src[i].file) {
+ case PROGRAM_TEMPORARY:
+ this->indirect_addr_temps = true;
+ break;
+ case PROGRAM_LOCAL_PARAM:
+ case PROGRAM_ENV_PARAM:
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_NAMED_PARAM:
+ case PROGRAM_CONSTANT:
+ case PROGRAM_UNIFORM:
+ this->indirect_addr_consts = true;
+ break;
+ case PROGRAM_IMMEDIATE:
+ assert(!"immediates should not have indirect addressing");
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ }
+
+ this->instructions.push_tail(inst);
+
+ return inst;
+}
+
+
+glsl_to_tgsi_instruction *
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
+ st_dst_reg dst, st_src_reg src0, st_src_reg src1)
+{
+ return emit(ir, op, dst, src0, src1, undef_src);
+}
+
+glsl_to_tgsi_instruction *
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
+ st_dst_reg dst, st_src_reg src0)
+{
+ assert(dst.writemask != 0);
+ return emit(ir, op, dst, src0, undef_src, undef_src);
+}
+
+glsl_to_tgsi_instruction *
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
+{
+ return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
+}
+
+/**
+ * Determines whether to use an integer, unsigned integer, or float opcode
+ * based on the operands and input opcode, then emits the result.
+ *
+ * TODO: type checking for remaining TGSI opcodes
+ */
+unsigned
+glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
+ st_dst_reg dst,
+ st_src_reg src0, st_src_reg src1)
+{
+ int type = GLSL_TYPE_FLOAT;
+
+ if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
+ type = GLSL_TYPE_FLOAT;
+ else if (native_integers)
+ type = src0.type;
+
+#define case4(c, f, i, u) \
+ case TGSI_OPCODE_##c: \
+ if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \
+ else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \
+ else op = TGSI_OPCODE_##f; \
+ break;
+#define case3(f, i, u) case4(f, f, i, u)
+#define case2fi(f, i) case4(f, f, i, i)
+#define case2iu(i, u) case4(i, LAST, i, u)
+
+ switch(op) {
+ case2fi(ADD, UADD);
+ case2fi(MUL, UMUL);
+ case2fi(MAD, UMAD);
+ case3(DIV, IDIV, UDIV);
+ case3(MAX, IMAX, UMAX);
+ case3(MIN, IMIN, UMIN);
+ case2iu(MOD, UMOD);
+
+ case2fi(SEQ, USEQ);
+ case2fi(SNE, USNE);
+ case3(SGE, ISGE, USGE);
+ case3(SLT, ISLT, USLT);
+
+ case2iu(SHL, SHL);
+ case2iu(ISHR, USHR);
+ case2iu(NOT, NOT);
+ case2iu(AND, AND);
+ case2iu(OR, OR);
+ case2iu(XOR, XOR);
+
+ default: break;
+ }
+
+ assert(op != TGSI_OPCODE_LAST);
+ return op;
+}
+
+glsl_to_tgsi_instruction *
+glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
+ st_dst_reg dst, st_src_reg src0, st_src_reg src1,
+ unsigned elements)
+{
+ static const unsigned dot_opcodes[] = {
+ TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
+ };
+
+ return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
+}
+
+/**
+ * Emits TGSI scalar opcodes to produce unique answers across channels.
+ *
+ * Some TGSI opcodes are scalar-only, like ARB_fp/vp. The src X
+ * channel determines the result across all channels. So to do a vec4
+ * of this operation, we want to emit a scalar per source channel used
+ * to produce dest channels.
+ */
+void
+glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
+ st_dst_reg dst,
+ st_src_reg orig_src0, st_src_reg orig_src1)
+{
+ int i, j;
+ int done_mask = ~dst.writemask;
+
+ /* TGSI RCP is a scalar operation splatting results to all channels,
+ * like ARB_fp/vp. So emit as many RCPs as necessary to cover our
+ * dst channels.
+ */
+ for (i = 0; i < 4; i++) {
+ GLuint this_mask = (1 << i);
+ glsl_to_tgsi_instruction *inst;
+ st_src_reg src0 = orig_src0;
+ st_src_reg src1 = orig_src1;
+
+ if (done_mask & this_mask)
+ continue;
+
+ GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
+ GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
+ for (j = i + 1; j < 4; j++) {
+ /* If there is another enabled component in the destination that is
+ * derived from the same inputs, generate its value on this pass as
+ * well.
+ */
+ if (!(done_mask & (1 << j)) &&
+ GET_SWZ(src0.swizzle, j) == src0_swiz &&
+ GET_SWZ(src1.swizzle, j) == src1_swiz) {
+ this_mask |= (1 << j);
+ }
+ }
+ src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
+ src0_swiz, src0_swiz);
+ src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
+ src1_swiz, src1_swiz);
+
+ inst = emit(ir, op, dst, src0, src1);
+ inst->dst.writemask = this_mask;
+ done_mask |= this_mask;
+ }
+}
+
+void
+glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
+ st_dst_reg dst, st_src_reg src0)
+{
+ st_src_reg undef = undef_src;
+
+ undef.swizzle = SWIZZLE_XXXX;
+
+ emit_scalar(ir, op, dst, src0, undef);
+}
+
+void
+glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
+ st_dst_reg dst, st_src_reg src0)
+{
+ st_src_reg tmp = get_temp(glsl_type::float_type);
+
+ if (src0.type == GLSL_TYPE_INT)
+ emit(NULL, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0);
+ else if (src0.type == GLSL_TYPE_UINT)
+ emit(NULL, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0);
+ else
+ tmp = src0;
+
+ emit(NULL, TGSI_OPCODE_ARL, dst, tmp);
+}
+
+/**
+ * Emit an TGSI_OPCODE_SCS instruction
+ *
+ * The \c SCS opcode functions a bit differently than the other TGSI opcodes.
+ * Instead of splatting its result across all four components of the
+ * destination, it writes one value to the \c x component and another value to
+ * the \c y component.
+ *
+ * \param ir IR instruction being processed
+ * \param op Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending
+ * on which value is desired.
+ * \param dst Destination register
+ * \param src Source register
+ */
+void
+glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
+ st_dst_reg dst,
+ const st_src_reg &src)
+{
+ /* Vertex programs cannot use the SCS opcode.
+ */
+ if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) {
+ emit_scalar(ir, op, dst, src);
+ return;
+ }
+
+ const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1;
+ const unsigned scs_mask = (1U << component);
+ int done_mask = ~dst.writemask;
+ st_src_reg tmp;
+
+ assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS);
+
+ /* If there are compnents in the destination that differ from the component
+ * that will be written by the SCS instrution, we'll need a temporary.
+ */
+ if (scs_mask != unsigned(dst.writemask)) {
+ tmp = get_temp(glsl_type::vec4_type);
+ }
+
+ for (unsigned i = 0; i < 4; i++) {
+ unsigned this_mask = (1U << i);
+ st_src_reg src0 = src;
+
+ if ((done_mask & this_mask) != 0)
+ continue;
+
+ /* The source swizzle specified which component of the source generates
+ * sine / cosine for the current component in the destination. The SCS
+ * instruction requires that this value be swizzle to the X component.
+ * Replace the current swizzle with a swizzle that puts the source in
+ * the X component.
+ */
+ unsigned src0_swiz = GET_SWZ(src.swizzle, i);
+
+ src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
+ src0_swiz, src0_swiz);
+ for (unsigned j = i + 1; j < 4; j++) {
+ /* If there is another enabled component in the destination that is
+ * derived from the same inputs, generate its value on this pass as
+ * well.
+ */
+ if (!(done_mask & (1 << j)) &&
+ GET_SWZ(src0.swizzle, j) == src0_swiz) {
+ this_mask |= (1 << j);
+ }
+ }
+
+ if (this_mask != scs_mask) {
+ glsl_to_tgsi_instruction *inst;
+ st_dst_reg tmp_dst = st_dst_reg(tmp);
+
+ /* Emit the SCS instruction.
+ */
+ inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0);
+ inst->dst.writemask = scs_mask;
+
+ /* Move the result of the SCS instruction to the desired location in
+ * the destination.
+ */
+ tmp.swizzle = MAKE_SWIZZLE4(component, component,
+ component, component);
+ inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp);
+ inst->dst.writemask = this_mask;
+ } else {
+ /* Emit the SCS instruction to write directly to the destination.
+ */
+ glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0);
+ inst->dst.writemask = scs_mask;
+ }
+
+ done_mask |= this_mask;
+ }
+}
+
+int
+glsl_to_tgsi_visitor::add_constant(gl_register_file file,
+ gl_constant_value values[4], int size, int datatype,
+ GLuint *swizzle_out)
+{
+ if (file == PROGRAM_CONSTANT) {
+ return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values,
+ size, datatype, swizzle_out);
+ } else {
+ int index = 0;
+ immediate_storage *entry;
+ assert(file == PROGRAM_IMMEDIATE);
+
+ /* Search immediate storage to see if we already have an identical
+ * immediate that we can use instead of adding a duplicate entry.
+ */
+ foreach_iter(exec_list_iterator, iter, this->immediates) {
+ entry = (immediate_storage *)iter.get();
+
+ if (entry->size == size &&
+ entry->type == datatype &&
+ !memcmp(entry->values, values, size * sizeof(gl_constant_value))) {
+ return index;
+ }
+ index++;
+ }
+
+ /* Add this immediate to the list. */
+ entry = new(mem_ctx) immediate_storage(values, size, datatype);
+ this->immediates.push_tail(entry);
+ this->num_immediates++;
+ return index;
+ }
+}
+
+st_src_reg
+glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
+{
+ st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT);
+ union gl_constant_value uval;
+
+ uval.f = val;
+ src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle);
+
+ return src;
+}
+
+st_src_reg
+glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
+{
+ st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
+ union gl_constant_value uval;
+
+ assert(native_integers);
+
+ uval.i = val;
+ src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle);
+
+ return src;
+}
+
+st_src_reg
+glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
+{
+ if (native_integers)
+ return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) :
+ st_src_reg_for_int(val);
+ else
+ return st_src_reg_for_float(val);
+}
+
+static int
+type_size(const struct glsl_type *type)
+{
+ unsigned int i;
+ int size;
+
+ switch (type->base_type) {
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_BOOL:
+ if (type->is_matrix()) {
+ return type->matrix_columns;
+ } else {
+ /* Regardless of size of vector, it gets a vec4. This is bad
+ * packing for things like floats, but otherwise arrays become a
+ * mess. Hopefully a later pass over the code can pack scalars
+ * down if appropriate.
+ */
+ return 1;
+ }
+ case GLSL_TYPE_ARRAY:
+ assert(type->length > 0);
+ return type_size(type->fields.array) * type->length;
+ case GLSL_TYPE_STRUCT:
+ size = 0;
+ for (i = 0; i < type->length; i++) {
+ size += type_size(type->fields.structure[i].type);
+ }
+ return size;
+ case GLSL_TYPE_SAMPLER:
+ /* Samplers take up one slot in UNIFORMS[], but they're baked in
+ * at link time.
+ */
+ return 1;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+/**
+ * In the initial pass of codegen, we assign temporary numbers to
+ * intermediate results. (not SSA -- variable assignments will reuse
+ * storage).
+ */
+st_src_reg
+glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
+{
+ st_src_reg src;
+
+ src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
+ src.file = PROGRAM_TEMPORARY;
+ src.index = next_temp;
+ src.reladdr = NULL;
+ next_temp += type_size(type);
+
+ if (type->is_array() || type->is_record()) {
+ src.swizzle = SWIZZLE_NOOP;
+ } else {
+ src.swizzle = swizzle_for_size(type->vector_elements);
+ }
+ src.negate = 0;
+
+ return src;
+}
+
+variable_storage *
+glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
+{
+
+ variable_storage *entry;
+
+ foreach_iter(exec_list_iterator, iter, this->variables) {
+ entry = (variable_storage *)iter.get();
+
+ if (entry->var == var)
+ return entry;
+ }
+
+ return NULL;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_variable *ir)
+{
+ if (strcmp(ir->name, "gl_FragCoord") == 0) {
+ struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
+
+ fp->OriginUpperLeft = ir->origin_upper_left;
+ fp->PixelCenterInteger = ir->pixel_center_integer;
+
+ } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
+ struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
+ switch (ir->depth_layout) {
+ case ir_depth_layout_none:
+ fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE;
+ break;
+ case ir_depth_layout_any:
+ fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY;
+ break;
+ case ir_depth_layout_greater:
+ fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER;
+ break;
+ case ir_depth_layout_less:
+ fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS;
+ break;
+ case ir_depth_layout_unchanged:
+ fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+ if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
+ unsigned int i;
+ const ir_state_slot *const slots = ir->state_slots;
+ assert(ir->state_slots != NULL);
+
+ /* Check if this statevar's setup in the STATE file exactly
+ * matches how we'll want to reference it as a
+ * struct/array/whatever. If not, then we need to move it into
+ * temporary storage and hope that it'll get copy-propagated
+ * out.
+ */
+ for (i = 0; i < ir->num_state_slots; i++) {
+ if (slots[i].swizzle != SWIZZLE_XYZW) {
+ break;
+ }
+ }
+
+ variable_storage *storage;
+ st_dst_reg dst;
+ if (i == ir->num_state_slots) {
+ /* We'll set the index later. */
+ storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
+ this->variables.push_tail(storage);
+
+ dst = undef_dst;
+ } else {
+ /* The variable_storage constructor allocates slots based on the size
+ * of the type. However, this had better match the number of state
+ * elements that we're going to copy into the new temporary.
+ */
+ assert((int) ir->num_state_slots == type_size(ir->type));
+
+ storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
+ this->next_temp);
+ this->variables.push_tail(storage);
+ this->next_temp += type_size(ir->type);
+
+ dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index,
+ native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT));
+ }
+
+
+ for (unsigned int i = 0; i < ir->num_state_slots; i++) {
+ int index = _mesa_add_state_reference(this->prog->Parameters,
+ (gl_state_index *)slots[i].tokens);
+
+ if (storage->file == PROGRAM_STATE_VAR) {
+ if (storage->index == -1) {
+ storage->index = index;
+ } else {
+ assert(index == storage->index + (int)i);
+ }
+ } else {
+ st_src_reg src(PROGRAM_STATE_VAR, index,
+ native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT);
+ src.swizzle = slots[i].swizzle;
+ emit(ir, TGSI_OPCODE_MOV, dst, src);
+ /* even a float takes up a whole vec4 reg in a struct/array. */
+ dst.index++;
+ }
+ }
+
+ if (storage->file == PROGRAM_TEMPORARY &&
+ dst.index != storage->index + (int) ir->num_state_slots) {
+ fail_link(this->shader_program,
+ "failed to load builtin uniform `%s' (%d/%d regs loaded)\n",
+ ir->name, dst.index - storage->index,
+ type_size(ir->type));
+ }
+ }
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_loop *ir)
+{
+ ir_dereference_variable *counter = NULL;
+
+ if (ir->counter != NULL)
+ counter = new(ir) ir_dereference_variable(ir->counter);
+
+ if (ir->from != NULL) {
+ assert(ir->counter != NULL);
+
+ ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
+
+ a->accept(this);
+ delete a;
+ }
+
+ emit(NULL, TGSI_OPCODE_BGNLOOP);
+
+ if (ir->to) {
+ ir_expression *e =
+ new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
+ counter, ir->to);
+ ir_if *if_stmt = new(ir) ir_if(e);
+
+ ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
+
+ if_stmt->then_instructions.push_tail(brk);
+
+ if_stmt->accept(this);
+
+ delete if_stmt;
+ delete e;
+ delete brk;
+ }
+
+ visit_exec_list(&ir->body_instructions, this);
+
+ if (ir->increment) {
+ ir_expression *e =
+ new(ir) ir_expression(ir_binop_add, counter->type,
+ counter, ir->increment);
+
+ ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
+
+ a->accept(this);
+ delete a;
+ delete e;
+ }
+
+ emit(NULL, TGSI_OPCODE_ENDLOOP);
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_loop_jump *ir)
+{
+ switch (ir->mode) {
+ case ir_loop_jump::jump_break:
+ emit(NULL, TGSI_OPCODE_BRK);
+ break;
+ case ir_loop_jump::jump_continue:
+ emit(NULL, TGSI_OPCODE_CONT);
+ break;
+ }
+}
+
+
+void
+glsl_to_tgsi_visitor::visit(ir_function_signature *ir)
+{
+ assert(0);
+ (void)ir;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_function *ir)
+{
+ /* Ignore function bodies other than main() -- we shouldn't see calls to
+ * them since they should all be inlined before we get to glsl_to_tgsi.
+ */
+ if (strcmp(ir->name, "main") == 0) {
+ const ir_function_signature *sig;
+ exec_list empty;
+
+ sig = ir->matching_signature(&empty);
+
+ assert(sig);
+
+ foreach_iter(exec_list_iterator, iter, sig->body) {
+ ir_instruction *ir = (ir_instruction *)iter.get();
+
+ ir->accept(this);
+ }
+ }
+}
+
+bool
+glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
+{
+ int nonmul_operand = 1 - mul_operand;
+ st_src_reg a, b, c;
+ st_dst_reg result_dst;
+
+ ir_expression *expr = ir->operands[mul_operand]->as_expression();
+ if (!expr || expr->operation != ir_binop_mul)
+ return false;
+
+ expr->operands[0]->accept(this);
+ a = this->result;
+ expr->operands[1]->accept(this);
+ b = this->result;
+ ir->operands[nonmul_operand]->accept(this);
+ c = this->result;
+
+ this->result = get_temp(ir->type);
+ result_dst = st_dst_reg(this->result);
+ result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+ emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
+
+ return true;
+}
+
+/**
+ * Emit MAD(a, -b, a) instead of AND(a, NOT(b))
+ *
+ * The logic values are 1.0 for true and 0.0 for false. Logical-and is
+ * implemented using multiplication, and logical-or is implemented using
+ * addition. Logical-not can be implemented as (true - x), or (1.0 - x).
+ * As result, the logical expression (a & !b) can be rewritten as:
+ *
+ * - a * !b
+ * - a * (1 - b)
+ * - (a * 1) - (a * b)
+ * - a + -(a * b)
+ * - a + (a * -b)
+ *
+ * This final expression can be implemented as a single MAD(a, -b, a)
+ * instruction.
+ */
+bool
+glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
+{
+ const int other_operand = 1 - try_operand;
+ st_src_reg a, b;
+
+ ir_expression *expr = ir->operands[try_operand]->as_expression();
+ if (!expr || expr->operation != ir_unop_logic_not)
+ return false;
+
+ ir->operands[other_operand]->accept(this);
+ a = this->result;
+ expr->operands[0]->accept(this);
+ b = this->result;
+
+ b.negate = ~b.negate;
+
+ this->result = get_temp(ir->type);
+ emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
+
+ return true;
+}
+
+bool
+glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
+{
+ /* Saturates were only introduced to vertex programs in
+ * NV_vertex_program3, so don't give them to drivers in the VP.
+ */
+ if (this->prog->Target == GL_VERTEX_PROGRAM_ARB)
+ return false;
+
+ ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
+ if (!sat_src)
+ return false;
+
+ sat_src->accept(this);
+ st_src_reg src = this->result;
+
+ /* If we generated an expression instruction into a temporary in
+ * processing the saturate's operand, apply the saturate to that
+ * instruction. Otherwise, generate a MOV to do the saturate.
+ *
+ * Note that we have to be careful to only do this optimization if
+ * the instruction in question was what generated src->result. For
+ * example, ir_dereference_array might generate a MUL instruction
+ * to create the reladdr, and return us a src reg using that
+ * reladdr. That MUL result is not the value we're trying to
+ * saturate.
+ */
+ ir_expression *sat_src_expr = sat_src->as_expression();
+ if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
+ sat_src_expr->operation == ir_binop_add ||
+ sat_src_expr->operation == ir_binop_dot)) {
+ glsl_to_tgsi_instruction *new_inst;
+ new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
+ new_inst->saturate = true;
+ } else {
+ this->result = get_temp(ir->type);
+ st_dst_reg result_dst = st_dst_reg(this->result);
+ result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+ glsl_to_tgsi_instruction *inst;
+ inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src);
+ inst->saturate = true;
+ }
+
+ return true;
+}
+
+void
+glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
+ st_src_reg *reg, int *num_reladdr)
+{
+ if (!reg->reladdr)
+ return;
+
+ emit_arl(ir, address_reg, *reg->reladdr);
+
+ if (*num_reladdr != 1) {
+ st_src_reg temp = get_temp(glsl_type::vec4_type);
+
+ emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
+ *reg = temp;
+ }
+
+ (*num_reladdr)--;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_expression *ir)
+{
+ unsigned int operand;
+ st_src_reg op[Elements(ir->operands)];
+ st_src_reg result_src;
+ st_dst_reg result_dst;
+
+ /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c)
+ */
+ if (ir->operation == ir_binop_add) {
+ if (try_emit_mad(ir, 1))
+ return;
+ if (try_emit_mad(ir, 0))
+ return;
+ }
+
+ /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
+ */
+ if (ir->operation == ir_binop_logic_and) {
+ if (try_emit_mad_for_and_not(ir, 1))
+ return;
+ if (try_emit_mad_for_and_not(ir, 0))
+ return;
+ }
+
+ if (try_emit_sat(ir))
+ return;
+
+ if (ir->operation == ir_quadop_vector)
+ assert(!"ir_quadop_vector should have been lowered");
+
+ for (operand = 0; operand < ir->get_num_operands(); operand++) {
+ this->result.file = PROGRAM_UNDEFINED;
+ ir->operands[operand]->accept(this);
+ if (this->result.file == PROGRAM_UNDEFINED) {
+ ir_print_visitor v;
+ printf("Failed to get tree for expression operand:\n");
+ ir->operands[operand]->accept(&v);
+ exit(1);
+ }
+ op[operand] = this->result;
+
+ /* Matrix expression operands should have been broken down to vector
+ * operations already.
+ */
+ assert(!ir->operands[operand]->type->is_matrix());
+ }
+
+ int vector_elements = ir->operands[0]->type->vector_elements;
+ if (ir->operands[1]) {
+ vector_elements = MAX2(vector_elements,
+ ir->operands[1]->type->vector_elements);
+ }
+
+ this->result.file = PROGRAM_UNDEFINED;
+
+ /* Storage for our result. Ideally for an assignment we'd be using
+ * the actual storage for the result here, instead.
+ */
+ result_src = get_temp(ir->type);
+ /* convenience for the emit functions below. */
+ result_dst = st_dst_reg(result_src);
+ /* Limit writes to the channels that will be used by result_src later.
+ * This does limit this temp's use as a temporary for multi-instruction
+ * sequences.
+ */
+ result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+
+ switch (ir->operation) {
+ case ir_unop_logic_not:
+ if (result_dst.type != GLSL_TYPE_FLOAT)
+ emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0));
+ else {
+ /* Previously 'SEQ dst, src, 0.0' was used for this. However, many
+ * older GPUs implement SEQ using multiple instructions (i915 uses two
+ * SGE instructions and a MUL instruction). Since our logic values are
+ * 0.0 and 1.0, 1-x also implements !x.
+ */
+ op[0].negate = ~op[0].negate;
+ emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
+ }
+ break;
+ case ir_unop_neg:
+ assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT);
+ if (result_dst.type == GLSL_TYPE_INT)
+ emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
+ else {
+ op[0].negate = ~op[0].negate;
+ result_src = op[0];
+ }
+ break;
+ case ir_unop_abs:
+ assert(result_dst.type == GLSL_TYPE_FLOAT);
+ emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
+ break;
+ case ir_unop_sign:
+ emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
+ break;
+ case ir_unop_rcp:
+ emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]);
+ break;
+
+ case ir_unop_exp2:
+ emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]);
+ break;
+ case ir_unop_exp:
+ case ir_unop_log:
+ assert(!"not reached: should be handled by ir_explog_to_explog2");
+ break;
+ case ir_unop_log2:
+ emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]);
+ break;
+ case ir_unop_sin:
+ emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
+ break;
+ case ir_unop_cos:
+ emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]);
+ break;
+ case ir_unop_sin_reduced:
+ emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
+ break;
+ case ir_unop_cos_reduced:
+ emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]);
+ break;
+
+ case ir_unop_dFdx:
+ emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]);
+ break;
+ case ir_unop_dFdy:
+ op[0].negate = ~op[0].negate;
+ emit(ir, TGSI_OPCODE_DDY, result_dst, op[0]);
+ break;
+
+ case ir_unop_noise: {
+ /* At some point, a motivated person could add a better
+ * implementation of noise. Currently not even the nvidia
+ * binary drivers do anything more than this. In any case, the
+ * place to do this is in the GL state tracker, not the poor
+ * driver.
+ */
+ emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
+ break;
+ }
+
+ case ir_binop_add:
+ emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
+ break;
+ case ir_binop_sub:
+ emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]);
+ break;
+
+ case ir_binop_mul:
+ emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
+ break;
+ case ir_binop_div:
+ if (result_dst.type == GLSL_TYPE_FLOAT)
+ assert(!"not reached: should be handled by ir_div_to_mul_rcp");
+ else
+ emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
+ break;
+ case ir_binop_mod:
+ if (result_dst.type == GLSL_TYPE_FLOAT)
+ assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
+ else
+ emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]);
+ break;
+
+ case ir_binop_less:
+ emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
+ break;
+ case ir_binop_greater:
+ emit(ir, TGSI_OPCODE_SGT, result_dst, op[0], op[1]);
+ break;
+ case ir_binop_lequal:
+ emit(ir, TGSI_OPCODE_SLE, result_dst, op[0], op[1]);
+ break;
+ case ir_binop_gequal:
+ emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
+ break;
+ case ir_binop_equal:
+ emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
+ break;
+ case ir_binop_nequal:
+ emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
+ break;
+ case ir_binop_all_equal:
+ /* "==" operator producing a scalar boolean. */
+ if (ir->operands[0]->type->is_vector() ||
+ ir->operands[1]->type->is_vector()) {
+ st_src_reg temp = get_temp(native_integers ?
+ glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
+ glsl_type::vec4_type);
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+ emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
+
+ /* After the dot-product, the value will be an integer on the
+ * range [0,4]. Zero becomes 1.0, and positive values become zero.
+ */
+ emit_dp(ir, result_dst, temp, temp, vector_elements);
+
+ if (result_dst.type == GLSL_TYPE_FLOAT) {
+ /* Negating the result of the dot-product gives values on the range
+ * [-4, 0]. Zero becomes 1.0, and negative values become zero.
+ * This is achieved using SGE.
+ */
+ st_src_reg sge_src = result_src;
+ sge_src.negate = ~sge_src.negate;
+ emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0));
+ } else {
+ /* The TGSI negate flag doesn't work for integers, so use SEQ 0
+ * instead.
+ */
+ emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_int(0));
+ }
+ } else {
+ emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
+ }
+ break;
+ case ir_binop_any_nequal:
+ /* "!=" operator producing a scalar boolean. */
+ if (ir->operands[0]->type->is_vector() ||
+ ir->operands[1]->type->is_vector()) {
+ st_src_reg temp = get_temp(native_integers ?
+ glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
+ glsl_type::vec4_type);
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+ emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
+
+ /* After the dot-product, the value will be an integer on the
+ * range [0,4]. Zero stays zero, and positive values become 1.0.
+ */
+ glsl_to_tgsi_instruction *const dp =
+ emit_dp(ir, result_dst, temp, temp, vector_elements);
+ if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
+ result_dst.type == GLSL_TYPE_FLOAT) {
+ /* The clamping to [0,1] can be done for free in the fragment
+ * shader with a saturate.
+ */
+ dp->saturate = true;
+ } else if (result_dst.type == GLSL_TYPE_FLOAT) {
+ /* Negating the result of the dot-product gives values on the range
+ * [-4, 0]. Zero stays zero, and negative values become 1.0. This
+ * achieved using SLT.
+ */
+ st_src_reg slt_src = result_src;
+ slt_src.negate = ~slt_src.negate;
+ emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+ } else {
+ emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
+ }
+ } else {
+ emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
+ }
+ break;
+
+ case ir_unop_any: {
+ assert(ir->operands[0]->type->is_vector());
+
+ /* After the dot-product, the value will be an integer on the
+ * range [0,4]. Zero stays zero, and positive values become 1.0.
+ */
+ glsl_to_tgsi_instruction *const dp =
+ emit_dp(ir, result_dst, op[0], op[0],
+ ir->operands[0]->type->vector_elements);
+ if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
+ result_dst.type == GLSL_TYPE_FLOAT) {
+ /* The clamping to [0,1] can be done for free in the fragment
+ * shader with a saturate.
+ */
+ dp->saturate = true;
+ } else if (result_dst.type == GLSL_TYPE_FLOAT) {
+ /* Negating the result of the dot-product gives values on the range
+ * [-4, 0]. Zero stays zero, and negative values become 1.0. This
+ * is achieved using SLT.
+ */
+ st_src_reg slt_src = result_src;
+ slt_src.negate = ~slt_src.negate;
+ emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+ }
+ else {
+ /* Use SNE 0 if integers are being used as boolean values. */
+ emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
+ }
+ break;
+ }
+
+ case ir_binop_logic_xor:
+ emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
+ break;
+
+ case ir_binop_logic_or: {
+ /* After the addition, the value will be an integer on the
+ * range [0,2]. Zero stays zero, and positive values become 1.0.
+ */
+ glsl_to_tgsi_instruction *add =
+ emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
+ if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
+ result_dst.type == GLSL_TYPE_FLOAT) {
+ /* The clamping to [0,1] can be done for free in the fragment
+ * shader with a saturate if floats are being used as boolean values.
+ */
+ add->saturate = true;
+ } else if (result_dst.type == GLSL_TYPE_FLOAT) {
+ /* Negating the result of the addition gives values on the range
+ * [-2, 0]. Zero stays zero, and negative values become 1.0. This
+ * is achieved using SLT.
+ */
+ st_src_reg slt_src = result_src;
+ slt_src.negate = ~slt_src.negate;
+ emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+ } else {
+ /* Use an SNE on the result of the addition. Zero stays zero,
+ * 1 stays 1, and 2 becomes 1.
+ */
+ emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
+ }
+ break;
+ }
+
+ case ir_binop_logic_and:
+ /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
+ emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
+ break;
+
+ case ir_binop_dot:
+ assert(ir->operands[0]->type->is_vector());
+ assert(ir->operands[0]->type == ir->operands[1]->type);
+ emit_dp(ir, result_dst, op[0], op[1],
+ ir->operands[0]->type->vector_elements);
+ break;
+
+ case ir_unop_sqrt:
+ /* sqrt(x) = x * rsq(x). */
+ emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
+ emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]);
+ /* For incoming channels <= 0, set the result to 0. */
+ op[0].negate = ~op[0].negate;
+ emit(ir, TGSI_OPCODE_CMP, result_dst,
+ op[0], result_src, st_src_reg_for_float(0.0));
+ break;
+ case ir_unop_rsq:
+ emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
+ break;
+ case ir_unop_i2f:
+ case ir_unop_b2f:
+ if (native_integers) {
+ emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
+ break;
+ }
+ case ir_unop_i2u:
+ case ir_unop_u2i:
+ /* Converting between signed and unsigned integers is a no-op. */
+ case ir_unop_b2i:
+ /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). */
+ result_src = op[0];
+ break;
+ case ir_unop_f2i:
+ if (native_integers)
+ emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
+ else
+ emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
+ break;
+ case ir_unop_f2b:
+ case ir_unop_i2b:
+ emit(ir, TGSI_OPCODE_SNE, result_dst, op[0],
+ st_src_reg_for_type(result_dst.type, 0));
+ break;
+ case ir_unop_trunc:
+ emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
+ break;
+ case ir_unop_ceil:
+ op[0].negate = ~op[0].negate;
+ emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
+ result_src.negate = ~result_src.negate;
+ break;
+ case ir_unop_floor:
+ emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
+ break;
+ case ir_unop_fract:
+ emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]);
+ break;
+
+ case ir_binop_min:
+ emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]);
+ break;
+ case ir_binop_max:
+ emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]);
+ break;
+ case ir_binop_pow:
+ emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]);
+ break;
+
+ case ir_unop_bit_not:
+ if (glsl_version >= 130) {
+ emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
+ break;
+ }
+ case ir_unop_u2f:
+ if (native_integers) {
+ emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
+ break;
+ }
+ case ir_binop_lshift:
+ if (glsl_version >= 130) {
+ emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]);
+ break;
+ }
+ case ir_binop_rshift:
+ if (glsl_version >= 130) {
+ emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]);
+ break;
+ }
+ case ir_binop_bit_and:
+ if (glsl_version >= 130) {
+ emit(ir, TGSI_OPCODE_AND, result_dst, op[0]);
+ break;
+ }
+ case ir_binop_bit_xor:
+ if (glsl_version >= 130) {
+ emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]);
+ break;
+ }
+ case ir_binop_bit_or:
+ if (glsl_version >= 130) {
+ emit(ir, TGSI_OPCODE_OR, result_dst, op[0]);
+ break;
+ }
+ case ir_unop_round_even:
+ assert(!"GLSL 1.30 features unsupported");
+ break;
+
+ case ir_quadop_vector:
+ /* This operation should have already been handled.
+ */
+ assert(!"Should not get here.");
+ break;
+ }
+
+ this->result = result_src;
+}
+
+
+void
+glsl_to_tgsi_visitor::visit(ir_swizzle *ir)
+{
+ st_src_reg src;
+ int i;
+ int swizzle[4];
+
+ /* Note that this is only swizzles in expressions, not those on the left
+ * hand side of an assignment, which do write masking. See ir_assignment
+ * for that.
+ */
+
+ ir->val->accept(this);
+ src = this->result;
+ assert(src.file != PROGRAM_UNDEFINED);
+
+ for (i = 0; i < 4; i++) {
+ if (i < ir->type->vector_elements) {
+ switch (i) {
+ case 0:
+ swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
+ break;
+ case 1:
+ swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
+ break;
+ case 2:
+ swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
+ break;
+ case 3:
+ swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
+ break;
+ }
+ } else {
+ /* If the type is smaller than a vec4, replicate the last
+ * channel out.
+ */
+ swizzle[i] = swizzle[ir->type->vector_elements - 1];
+ }
+ }
+
+ src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
+
+ this->result = src;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
+{
+ variable_storage *entry = find_variable_storage(ir->var);
+ ir_variable *var = ir->var;
+
+ if (!entry) {
+ switch (var->mode) {
+ case ir_var_uniform:
+ entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
+ var->location);
+ this->variables.push_tail(entry);
+ break;
+ case ir_var_in:
+ case ir_var_inout:
+ /* The linker assigns locations for varyings and attributes,
+ * including deprecated builtins (like gl_Color), user-assign
+ * generic attributes (glBindVertexLocation), and
+ * user-defined varyings.
+ *
+ * FINISHME: We would hit this path for function arguments. Fix!
+ */
+ assert(var->location != -1);
+ entry = new(mem_ctx) variable_storage(var,
+ PROGRAM_INPUT,
+ var->location);
+ if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
+ var->location >= VERT_ATTRIB_GENERIC0) {
+ _mesa_add_attribute(this->prog->Attributes,
+ var->name,
+ _mesa_sizeof_glsl_type(var->type->gl_type),
+ var->type->gl_type,
+ var->location - VERT_ATTRIB_GENERIC0);
+ }
+ break;
+ case ir_var_out:
+ assert(var->location != -1);
+ entry = new(mem_ctx) variable_storage(var,
+ PROGRAM_OUTPUT,
+ var->location);
+ break;
+ case ir_var_system_value:
+ entry = new(mem_ctx) variable_storage(var,
+ PROGRAM_SYSTEM_VALUE,
+ var->location);
+ break;
+ case ir_var_auto:
+ case ir_var_temporary:
+ entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY,
+ this->next_temp);
+ this->variables.push_tail(entry);
+
+ next_temp += type_size(var->type);
+ break;
+ }
+
+ if (!entry) {
+ printf("Failed to make storage for %s\n", var->name);
+ exit(1);
+ }
+ }
+
+ this->result = st_src_reg(entry->file, entry->index, var->type);
+ if (!native_integers)
+ this->result.type = GLSL_TYPE_FLOAT;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
+{
+ ir_constant *index;
+ st_src_reg src;
+ int element_size = type_size(ir->type);
+
+ index = ir->array_index->constant_expression_value();
+
+ ir->array->accept(this);
+ src = this->result;
+
+ if (index) {
+ src.index += index->value.i[0] * element_size;
+ } else {
+ /* Variable index array dereference. It eats the "vec4" of the
+ * base of the array and an index that offsets the TGSI register
+ * index.
+ */
+ ir->array_index->accept(this);
+
+ st_src_reg index_reg;
+
+ if (element_size == 1) {
+ index_reg = this->result;
+ } else {
+ index_reg = get_temp(glsl_type::float_type);
+
+ emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
+ this->result, st_src_reg_for_float(element_size));
+ }
+
+ /* If there was already a relative address register involved, add the
+ * new and the old together to get the new offset.
+ */
+ if (src.reladdr != NULL) {
+ st_src_reg accum_reg = get_temp(glsl_type::float_type);
+
+ emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg),
+ index_reg, *src.reladdr);
+
+ index_reg = accum_reg;
+ }
+
+ src.reladdr = ralloc(mem_ctx, st_src_reg);
+ memcpy(src.reladdr, &index_reg, sizeof(index_reg));
+ }
+
+ /* If the type is smaller than a vec4, replicate the last channel out. */
+ if (ir->type->is_scalar() || ir->type->is_vector())
+ src.swizzle = swizzle_for_size(ir->type->vector_elements);
+ else
+ src.swizzle = SWIZZLE_NOOP;
+
+ this->result = src;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_dereference_record *ir)
+{
+ unsigned int i;
+ const glsl_type *struct_type = ir->record->type;
+ int offset = 0;
+
+ ir->record->accept(this);
+
+ for (i = 0; i < struct_type->length; i++) {
+ if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
+ break;
+ offset += type_size(struct_type->fields.structure[i].type);
+ }
+
+ /* If the type is smaller than a vec4, replicate the last channel out. */
+ if (ir->type->is_scalar() || ir->type->is_vector())
+ this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
+ else
+ this->result.swizzle = SWIZZLE_NOOP;
+
+ this->result.index += offset;
+}
+
+/**
+ * We want to be careful in assignment setup to hit the actual storage
+ * instead of potentially using a temporary like we might with the
+ * ir_dereference handler.
+ */
+static st_dst_reg
+get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v)
+{
+ /* The LHS must be a dereference. If the LHS is a variable indexed array
+ * access of a vector, it must be separated into a series conditional moves
+ * before reaching this point (see ir_vec_index_to_cond_assign).
+ */
+ assert(ir->as_dereference());
+ ir_dereference_array *deref_array = ir->as_dereference_array();
+ if (deref_array) {
+ assert(!deref_array->array->type->is_vector());
+ }
+
+ /* Use the rvalue deref handler for the most part. We'll ignore
+ * swizzles in it and write swizzles using writemask, though.
+ */
+ ir->accept(v);
+ return st_dst_reg(v->result);
+}
+
+/**
+ * Process the condition of a conditional assignment
+ *
+ * Examines the condition of a conditional assignment to generate the optimal
+ * first operand of a \c CMP instruction. If the condition is a relational
+ * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
+ * used as the source for the \c CMP instruction. Otherwise the comparison
+ * is processed to a boolean result, and the boolean result is used as the
+ * operand to the CMP instruction.
+ */
+bool
+glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
+{
+ ir_rvalue *src_ir = ir;
+ bool negate = true;
+ bool switch_order = false;
+
+ ir_expression *const expr = ir->as_expression();
+ if ((expr != NULL) && (expr->get_num_operands() == 2)) {
+ bool zero_on_left = false;
+
+ if (expr->operands[0]->is_zero()) {
+ src_ir = expr->operands[1];
+ zero_on_left = true;
+ } else if (expr->operands[1]->is_zero()) {
+ src_ir = expr->operands[0];
+ zero_on_left = false;
+ }
+
+ /* a is - 0 + - 0 +
+ * (a < 0) T F F ( a < 0) T F F
+ * (0 < a) F F T (-a < 0) F F T
+ * (a <= 0) T T F (-a < 0) F F T (swap order of other operands)
+ * (0 <= a) F T T ( a < 0) T F F (swap order of other operands)
+ * (a > 0) F F T (-a < 0) F F T
+ * (0 > a) T F F ( a < 0) T F F
+ * (a >= 0) F T T ( a < 0) T F F (swap order of other operands)
+ * (0 >= a) T T F (-a < 0) F F T (swap order of other operands)
+ *
+ * Note that exchanging the order of 0 and 'a' in the comparison simply
+ * means that the value of 'a' should be negated.
+ */
+ if (src_ir != ir) {
+ switch (expr->operation) {
+ case ir_binop_less:
+ switch_order = false;
+ negate = zero_on_left;
+ break;
+
+ case ir_binop_greater:
+ switch_order = false;
+ negate = !zero_on_left;
+ break;
+
+ case ir_binop_lequal:
+ switch_order = true;
+ negate = !zero_on_left;
+ break;
+
+ case ir_binop_gequal:
+ switch_order = true;
+ negate = zero_on_left;
+ break;
+
+ default:
+ /* This isn't the right kind of comparison afterall, so make sure
+ * the whole condition is visited.
+ */
+ src_ir = ir;
+ break;
+ }
+ }
+ }
+
+ src_ir->accept(this);
+
+ /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
+ * condition we produced is 0.0 or 1.0. By flipping the sign, we can
+ * choose which value TGSI_OPCODE_CMP produces without an extra instruction
+ * computing the condition.
+ */
+ if (negate)
+ this->result.negate = ~this->result.negate;
+
+ return switch_order;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_assignment *ir)
+{
+ st_dst_reg l;
+ st_src_reg r;
+ int i;
+
+ ir->rhs->accept(this);
+ r = this->result;
+
+ l = get_assignment_lhs(ir->lhs, this);
+
+ /* FINISHME: This should really set to the correct maximal writemask for each
+ * FINISHME: component written (in the loops below). This case can only
+ * FINISHME: occur for matrices, arrays, and structures.
+ */
+ if (ir->write_mask == 0) {
+ assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
+ l.writemask = WRITEMASK_XYZW;
+ } else if (ir->lhs->type->is_scalar() &&
+ ir->lhs->variable_referenced()->mode == ir_var_out) {
+ /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
+ * FINISHME: W component of fragment shader output zero, work correctly.
+ */
+ l.writemask = WRITEMASK_XYZW;
+ } else {
+ int swizzles[4];
+ int first_enabled_chan = 0;
+ int rhs_chan = 0;
+
+ l.writemask = ir->write_mask;
+
+ for (int i = 0; i < 4; i++) {
+ if (l.writemask & (1 << i)) {
+ first_enabled_chan = GET_SWZ(r.swizzle, i);
+ break;
+ }
+ }
+
+ /* Swizzle a small RHS vector into the channels being written.
+ *
+ * glsl ir treats write_mask as dictating how many channels are
+ * present on the RHS while TGSI treats write_mask as just
+ * showing which channels of the vec4 RHS get written.
+ */
+ for (int i = 0; i < 4; i++) {
+ if (l.writemask & (1 << i))
+ swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
+ else
+ swizzles[i] = first_enabled_chan;
+ }
+ r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
+ swizzles[2], swizzles[3]);
+ }
+
+ assert(l.file != PROGRAM_UNDEFINED);
+ assert(r.file != PROGRAM_UNDEFINED);
+
+ if (ir->condition) {
+ const bool switch_order = this->process_move_condition(ir->condition);
+ st_src_reg condition = this->result;
+
+ for (i = 0; i < type_size(ir->lhs->type); i++) {
+ st_src_reg l_src = st_src_reg(l);
+ l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements);
+
+ if (switch_order) {
+ emit(ir, TGSI_OPCODE_CMP, l, condition, l_src, r);
+ } else {
+ emit(ir, TGSI_OPCODE_CMP, l, condition, r, l_src);
+ }
+
+ l.index++;
+ r.index++;
+ }
+ } else if (ir->rhs->as_expression() &&
+ this->instructions.get_tail() &&
+ ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
+ type_size(ir->lhs->type) == 1 &&
+ l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) {
+ /* To avoid emitting an extra MOV when assigning an expression to a
+ * variable, emit the last instruction of the expression again, but
+ * replace the destination register with the target of the assignment.
+ * Dead code elimination will remove the original instruction.
+ */
+ glsl_to_tgsi_instruction *inst, *new_inst;
+ inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
+ new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
+ new_inst->saturate = inst->saturate;
+ } else {
+ for (i = 0; i < type_size(ir->lhs->type); i++) {
+ emit(ir, TGSI_OPCODE_MOV, l, r);
+ l.index++;
+ r.index++;
+ }
+ }
+}
+
+
+void
+glsl_to_tgsi_visitor::visit(ir_constant *ir)
+{
+ st_src_reg src;
+ GLfloat stack_vals[4] = { 0 };
+ gl_constant_value *values = (gl_constant_value *) stack_vals;
+ GLenum gl_type = GL_NONE;
+ unsigned int i;
+ static int in_array = 0;
+ gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
+
+ /* Unfortunately, 4 floats is all we can get into
+ * _mesa_add_typed_unnamed_constant. So, make a temp to store an
+ * aggregate constant and move each constant value into it. If we
+ * get lucky, copy propagation will eliminate the extra moves.
+ */
+ if (ir->type->base_type == GLSL_TYPE_STRUCT) {
+ st_src_reg temp_base = get_temp(ir->type);
+ st_dst_reg temp = st_dst_reg(temp_base);
+
+ foreach_iter(exec_list_iterator, iter, ir->components) {
+ ir_constant *field_value = (ir_constant *)iter.get();
+ int size = type_size(field_value->type);
+
+ assert(size > 0);
+
+ field_value->accept(this);
+ src = this->result;
+
+ for (i = 0; i < (unsigned int)size; i++) {
+ emit(ir, TGSI_OPCODE_MOV, temp, src);
+
+ src.index++;
+ temp.index++;
+ }
+ }
+ this->result = temp_base;
+ return;
+ }
+
+ if (ir->type->is_array()) {
+ st_src_reg temp_base = get_temp(ir->type);
+ st_dst_reg temp = st_dst_reg(temp_base);
+ int size = type_size(ir->type->fields.array);
+
+ assert(size > 0);
+ in_array++;
+
+ for (i = 0; i < ir->type->length; i++) {
+ ir->array_elements[i]->accept(this);
+ src = this->result;
+ for (int j = 0; j < size; j++) {
+ emit(ir, TGSI_OPCODE_MOV, temp, src);
+
+ src.index++;
+ temp.index++;
+ }
+ }
+ this->result = temp_base;
+ in_array--;
+ return;
+ }
+
+ if (ir->type->is_matrix()) {
+ st_src_reg mat = get_temp(ir->type);
+ st_dst_reg mat_column = st_dst_reg(mat);
+
+ for (i = 0; i < ir->type->matrix_columns; i++) {
+ assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+ values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
+
+ src = st_src_reg(file, -1, ir->type->base_type);
+ src.index = add_constant(file,
+ values,
+ ir->type->vector_elements,
+ GL_FLOAT,
+ &src.swizzle);
+ emit(ir, TGSI_OPCODE_MOV, mat_column, src);
+
+ mat_column.index++;
+ }
+
+ this->result = mat;
+ return;
+ }
+
+ switch (ir->type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ gl_type = GL_FLOAT;
+ for (i = 0; i < ir->type->vector_elements; i++) {
+ values[i].f = ir->value.f[i];
+ }
+ break;
+ case GLSL_TYPE_UINT:
+ gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
+ for (i = 0; i < ir->type->vector_elements; i++) {
+ if (native_integers)
+ values[i].u = ir->value.u[i];
+ else
+ values[i].f = ir->value.u[i];
+ }
+ break;
+ case GLSL_TYPE_INT:
+ gl_type = native_integers ? GL_INT : GL_FLOAT;
+ for (i = 0; i < ir->type->vector_elements; i++) {
+ if (native_integers)
+ values[i].i = ir->value.i[i];
+ else
+ values[i].f = ir->value.i[i];
+ }
+ break;
+ case GLSL_TYPE_BOOL:
+ gl_type = native_integers ? GL_BOOL : GL_FLOAT;
+ for (i = 0; i < ir->type->vector_elements; i++) {
+ if (native_integers)
+ values[i].b = ir->value.b[i];
+ else
+ values[i].f = ir->value.b[i];
+ }
+ break;
+ default:
+ assert(!"Non-float/uint/int/bool constant");
+ }
+
+ this->result = st_src_reg(file, -1, ir->type);
+ this->result.index = add_constant(file,
+ values,
+ ir->type->vector_elements,
+ gl_type,
+ &this->result.swizzle);
+}
+
+function_entry *
+glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
+{
+ function_entry *entry;
+
+ foreach_iter(exec_list_iterator, iter, this->function_signatures) {
+ entry = (function_entry *)iter.get();
+
+ if (entry->sig == sig)
+ return entry;
+ }
+
+ entry = ralloc(mem_ctx, function_entry);
+ entry->sig = sig;
+ entry->sig_id = this->next_signature_id++;
+ entry->bgn_inst = NULL;
+
+ /* Allocate storage for all the parameters. */
+ foreach_iter(exec_list_iterator, iter, sig->parameters) {
+ ir_variable *param = (ir_variable *)iter.get();
+ variable_storage *storage;
+
+ storage = find_variable_storage(param);
+ assert(!storage);
+
+ storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
+ this->next_temp);
+ this->variables.push_tail(storage);
+
+ this->next_temp += type_size(param->type);
+ }
+
+ if (!sig->return_type->is_void()) {
+ entry->return_reg = get_temp(sig->return_type);
+ } else {
+ entry->return_reg = undef_src;
+ }
+
+ this->function_signatures.push_tail(entry);
+ return entry;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_call *ir)
+{
+ glsl_to_tgsi_instruction *call_inst;
+ ir_function_signature *sig = ir->get_callee();
+ function_entry *entry = get_function_signature(sig);
+ int i;
+
+ /* Process in parameters. */
+ exec_list_iterator sig_iter = sig->parameters.iterator();
+ foreach_iter(exec_list_iterator, iter, *ir) {
+ ir_rvalue *param_rval = (ir_rvalue *)iter.get();
+ ir_variable *param = (ir_variable *)sig_iter.get();
+
+ if (param->mode == ir_var_in ||
+ param->mode == ir_var_inout) {
+ variable_storage *storage = find_variable_storage(param);
+ assert(storage);
+
+ param_rval->accept(this);
+ st_src_reg r = this->result;
+
+ st_dst_reg l;
+ l.file = storage->file;
+ l.index = storage->index;
+ l.reladdr = NULL;
+ l.writemask = WRITEMASK_XYZW;
+ l.cond_mask = COND_TR;
+
+ for (i = 0; i < type_size(param->type); i++) {
+ emit(ir, TGSI_OPCODE_MOV, l, r);
+ l.index++;
+ r.index++;
+ }
+ }
+
+ sig_iter.next();
+ }
+ assert(!sig_iter.has_next());
+
+ /* Emit call instruction */
+ call_inst = emit(ir, TGSI_OPCODE_CAL);
+ call_inst->function = entry;
+
+ /* Process out parameters. */
+ sig_iter = sig->parameters.iterator();
+ foreach_iter(exec_list_iterator, iter, *ir) {
+ ir_rvalue *param_rval = (ir_rvalue *)iter.get();
+ ir_variable *param = (ir_variable *)sig_iter.get();
+
+ if (param->mode == ir_var_out ||
+ param->mode == ir_var_inout) {
+ variable_storage *storage = find_variable_storage(param);
+ assert(storage);
+
+ st_src_reg r;
+ r.file = storage->file;
+ r.index = storage->index;
+ r.reladdr = NULL;
+ r.swizzle = SWIZZLE_NOOP;
+ r.negate = 0;
+
+ param_rval->accept(this);
+ st_dst_reg l = st_dst_reg(this->result);
+
+ for (i = 0; i < type_size(param->type); i++) {
+ emit(ir, TGSI_OPCODE_MOV, l, r);
+ l.index++;
+ r.index++;
+ }
+ }
+
+ sig_iter.next();
+ }
+ assert(!sig_iter.has_next());
+
+ /* Process return value. */
+ this->result = entry->return_reg;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_texture *ir)
+{
+ st_src_reg result_src, coord, lod_info, projector, dx, dy;
+ st_dst_reg result_dst, coord_dst;
+ glsl_to_tgsi_instruction *inst = NULL;
+ unsigned opcode = TGSI_OPCODE_NOP;
+
+ if (ir->coordinate) {
+ ir->coordinate->accept(this);
+
+ /* Put our coords in a temp. We'll need to modify them for shadow,
+ * projection, or LOD, so the only case we'd use it as is is if
+ * we're doing plain old texturing. The optimization passes on
+ * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
+ */
+ coord = get_temp(glsl_type::vec4_type);
+ coord_dst = st_dst_reg(coord);
+ emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
+ }
+
+ if (ir->projector) {
+ ir->projector->accept(this);
+ projector = this->result;
+ }
+
+ /* Storage for our result. Ideally for an assignment we'd be using
+ * the actual storage for the result here, instead.
+ */
+ result_src = get_temp(glsl_type::vec4_type);
+ result_dst = st_dst_reg(result_src);
+
+ switch (ir->op) {
+ case ir_tex:
+ opcode = TGSI_OPCODE_TEX;
+ break;
+ case ir_txb:
+ opcode = TGSI_OPCODE_TXB;
+ ir->lod_info.bias->accept(this);
+ lod_info = this->result;
+ break;
+ case ir_txl:
+ opcode = TGSI_OPCODE_TXL;
+ ir->lod_info.lod->accept(this);
+ lod_info = this->result;
+ break;
+ case ir_txd:
+ opcode = TGSI_OPCODE_TXD;
+ ir->lod_info.grad.dPdx->accept(this);
+ dx = this->result;
+ ir->lod_info.grad.dPdy->accept(this);
+ dy = this->result;
+ break;
+ case ir_txs:
+ opcode = TGSI_OPCODE_TXQ;
+ ir->lod_info.lod->accept(this);
+ lod_info = this->result;
+ break;
+ case ir_txf:
+ opcode = TGSI_OPCODE_TXF;
+ ir->lod_info.lod->accept(this);
+ lod_info = this->result;
+ break;
+ }
+
+ if (ir->projector) {
+ if (opcode == TGSI_OPCODE_TEX) {
+ /* Slot the projector in as the last component of the coord. */
+ coord_dst.writemask = WRITEMASK_W;
+ emit(ir, TGSI_OPCODE_MOV, coord_dst, projector);
+ coord_dst.writemask = WRITEMASK_XYZW;
+ opcode = TGSI_OPCODE_TXP;
+ } else {
+ st_src_reg coord_w = coord;
+ coord_w.swizzle = SWIZZLE_WWWW;
+
+ /* For the other TEX opcodes there's no projective version
+ * since the last slot is taken up by LOD info. Do the
+ * projective divide now.
+ */
+ coord_dst.writemask = WRITEMASK_W;
+ emit(ir, TGSI_OPCODE_RCP, coord_dst, projector);
+
+ /* In the case where we have to project the coordinates "by hand,"
+ * the shadow comparator value must also be projected.
+ */
+ st_src_reg tmp_src = coord;
+ if (ir->shadow_comparitor) {
+ /* Slot the shadow value in as the second to last component of the
+ * coord.
+ */
+ ir->shadow_comparitor->accept(this);
+
+ tmp_src = get_temp(glsl_type::vec4_type);
+ st_dst_reg tmp_dst = st_dst_reg(tmp_src);
+
+ tmp_dst.writemask = WRITEMASK_Z;
+ emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
+
+ tmp_dst.writemask = WRITEMASK_XY;
+ emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord);
+ }
+
+ coord_dst.writemask = WRITEMASK_XYZ;
+ emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w);
+
+ coord_dst.writemask = WRITEMASK_XYZW;
+ coord.swizzle = SWIZZLE_XYZW;
+ }
+ }
+
+ /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow
+ * comparator was put in the correct place (and projected) by the code,
+ * above, that handles by-hand projection.
+ */
+ if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) {
+ /* Slot the shadow value in as the second to last component of the
+ * coord.
+ */
+ ir->shadow_comparitor->accept(this);
+ coord_dst.writemask = WRITEMASK_Z;
+ emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
+ coord_dst.writemask = WRITEMASK_XYZW;
+ }
+
+ if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB ||
+ opcode == TGSI_OPCODE_TXF) {
+ /* TGSI stores LOD or LOD bias in the last channel of the coords. */
+ coord_dst.writemask = WRITEMASK_W;
+ emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
+ coord_dst.writemask = WRITEMASK_XYZW;
+ }
+
+ if (opcode == TGSI_OPCODE_TXD)
+ inst = emit(ir, opcode, result_dst, coord, dx, dy);
+ else if (opcode == TGSI_OPCODE_TXQ)
+ inst = emit(ir, opcode, result_dst, lod_info);
+ else
+ inst = emit(ir, opcode, result_dst, coord);
+
+ if (ir->shadow_comparitor)
+ inst->tex_shadow = GL_TRUE;
+
+ inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler,
+ this->shader_program,
+ this->prog);
+
+ const glsl_type *sampler_type = ir->sampler->type;
+
+ switch (sampler_type->sampler_dimensionality) {
+ case GLSL_SAMPLER_DIM_1D:
+ inst->tex_target = (sampler_type->sampler_array)
+ ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
+ break;
+ case GLSL_SAMPLER_DIM_2D:
+ inst->tex_target = (sampler_type->sampler_array)
+ ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
+ break;
+ case GLSL_SAMPLER_DIM_3D:
+ inst->tex_target = TEXTURE_3D_INDEX;
+ break;
+ case GLSL_SAMPLER_DIM_CUBE:
+ inst->tex_target = TEXTURE_CUBE_INDEX;
+ break;
+ case GLSL_SAMPLER_DIM_RECT:
+ inst->tex_target = TEXTURE_RECT_INDEX;
+ break;
+ case GLSL_SAMPLER_DIM_BUF:
+ assert(!"FINISHME: Implement ARB_texture_buffer_object");
+ break;
+ default:
+ assert(!"Should not get here.");
+ }
+
+ this->result = result_src;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_return *ir)
+{
+ if (ir->get_value()) {
+ st_dst_reg l;
+ int i;
+
+ assert(current_function);
+
+ ir->get_value()->accept(this);
+ st_src_reg r = this->result;
+
+ l = st_dst_reg(current_function->return_reg);
+
+ for (i = 0; i < type_size(current_function->sig->return_type); i++) {
+ emit(ir, TGSI_OPCODE_MOV, l, r);
+ l.index++;
+ r.index++;
+ }
+ }
+
+ emit(ir, TGSI_OPCODE_RET);
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_discard *ir)
+{
+ struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
+
+ if (ir->condition) {
+ ir->condition->accept(this);
+ this->result.negate = ~this->result.negate;
+ emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result);
+ } else {
+ emit(ir, TGSI_OPCODE_KILP);
+ }
+
+ fp->UsesKill = GL_TRUE;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_if *ir)
+{
+ glsl_to_tgsi_instruction *cond_inst, *if_inst;
+ glsl_to_tgsi_instruction *prev_inst;
+
+ prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
+
+ ir->condition->accept(this);
+ assert(this->result.file != PROGRAM_UNDEFINED);
+
+ if (this->options->EmitCondCodes) {
+ cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
+
+ /* See if we actually generated any instruction for generating
+ * the condition. If not, then cook up a move to a temp so we
+ * have something to set cond_update on.
+ */
+ if (cond_inst == prev_inst) {
+ st_src_reg temp = get_temp(glsl_type::bool_type);
+ cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result);
+ }
+ cond_inst->cond_update = GL_TRUE;
+
+ if_inst = emit(ir->condition, TGSI_OPCODE_IF);
+ if_inst->dst.cond_mask = COND_NE;
+ } else {
+ if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result);
+ }
+
+ this->instructions.push_tail(if_inst);
+
+ visit_exec_list(&ir->then_instructions, this);
+
+ if (!ir->else_instructions.is_empty()) {
+ emit(ir->condition, TGSI_OPCODE_ELSE);
+ visit_exec_list(&ir->else_instructions, this);
+ }
+
+ if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF);
+}
+
+glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
+{
+ result.file = PROGRAM_UNDEFINED;
+ next_temp = 1;
+ next_signature_id = 1;
+ num_immediates = 0;
+ current_function = NULL;
+ num_address_regs = 0;
+ indirect_addr_temps = false;
+ indirect_addr_consts = false;
+ mem_ctx = ralloc_context(NULL);
+}
+
+glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
+{
+ ralloc_free(mem_ctx);
+}
+
+extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
+{
+ delete v;
+}
+
+
+/**
+ * Count resources used by the given gpu program (number of texture
+ * samplers, etc).
+ */
+static void
+count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
+{
+ v->samplers_used = 0;
+
+ foreach_iter(exec_list_iterator, iter, v->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+ if (is_tex_instruction(inst->op)) {
+ v->samplers_used |= 1 << inst->sampler;
+
+ prog->SamplerTargets[inst->sampler] =
+ (gl_texture_index)inst->tex_target;
+ if (inst->tex_shadow) {
+ prog->ShadowSamplers |= 1 << inst->sampler;
+ }
+ }
+ }
+
+ prog->SamplersUsed = v->samplers_used;
+ _mesa_update_shader_textures_used(prog);
+}
+
+
+/**
+ * Check if the given vertex/fragment/shader program is within the
+ * resource limits of the context (number of texture units, etc).
+ * If any of those checks fail, record a linker error.
+ *
+ * XXX more checks are needed...
+ */
+static void
+check_resources(const struct gl_context *ctx,
+ struct gl_shader_program *shader_program,
+ glsl_to_tgsi_visitor *prog,
+ struct gl_program *proginfo)
+{
+ switch (proginfo->Target) {
+ case GL_VERTEX_PROGRAM_ARB:
+ if (_mesa_bitcount(prog->samplers_used) >
+ ctx->Const.MaxVertexTextureImageUnits) {
+ fail_link(shader_program, "Too many vertex shader texture samplers");
+ }
+ if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
+ fail_link(shader_program, "Too many vertex shader constants");
+ }
+ break;
+ case MESA_GEOMETRY_PROGRAM:
+ if (_mesa_bitcount(prog->samplers_used) >
+ ctx->Const.MaxGeometryTextureImageUnits) {
+ fail_link(shader_program, "Too many geometry shader texture samplers");
+ }
+ if (proginfo->Parameters->NumParameters >
+ MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) {
+ fail_link(shader_program, "Too many geometry shader constants");
+ }
+ break;
+ case GL_FRAGMENT_PROGRAM_ARB:
+ if (_mesa_bitcount(prog->samplers_used) >
+ ctx->Const.MaxTextureImageUnits) {
+ fail_link(shader_program, "Too many fragment shader texture samplers");
+ }
+ if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
+ fail_link(shader_program, "Too many fragment shader constants");
+ }
+ break;
+ default:
+ _mesa_problem(ctx, "unexpected program type in check_resources()");
+ }
+}
+
+
+
+struct uniform_sort {
+ struct gl_uniform *u;
+ int pos;
+};
+
+/* The shader_program->Uniforms list is almost sorted in increasing
+ * uniform->{Frag,Vert}Pos locations, but not quite when there are
+ * uniforms shared between targets. We need to add parameters in
+ * increasing order for the targets.
+ */
+static int
+sort_uniforms(const void *a, const void *b)
+{
+ struct uniform_sort *u1 = (struct uniform_sort *)a;
+ struct uniform_sort *u2 = (struct uniform_sort *)b;
+
+ return u1->pos - u2->pos;
+}
+
+/* Add the uniforms to the parameters. The linker chose locations
+ * in our parameters lists (which weren't created yet), which the
+ * uniforms code will use to poke values into our parameters list
+ * when uniforms are updated.
+ */
+static void
+add_uniforms_to_parameters_list(struct gl_shader_program *shader_program,
+ struct gl_shader *shader,
+ struct gl_program *prog)
+{
+ unsigned int i;
+ unsigned int next_sampler = 0, num_uniforms = 0;
+ struct uniform_sort *sorted_uniforms;
+
+ sorted_uniforms = ralloc_array(NULL, struct uniform_sort,
+ shader_program->Uniforms->NumUniforms);
+
+ for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) {
+ struct gl_uniform *uniform = shader_program->Uniforms->Uniforms + i;
+ int parameter_index = -1;
+
+ switch (shader->Type) {
+ case GL_VERTEX_SHADER:
+ parameter_index = uniform->VertPos;
+ break;
+ case GL_FRAGMENT_SHADER:
+ parameter_index = uniform->FragPos;
+ break;
+ case GL_GEOMETRY_SHADER:
+ parameter_index = uniform->GeomPos;
+ break;
+ }
+
+ /* Only add uniforms used in our target. */
+ if (parameter_index != -1) {
+ sorted_uniforms[num_uniforms].pos = parameter_index;
+ sorted_uniforms[num_uniforms].u = uniform;
+ num_uniforms++;
+ }
+ }
+
+ qsort(sorted_uniforms, num_uniforms, sizeof(struct uniform_sort),
+ sort_uniforms);
+
+ for (i = 0; i < num_uniforms; i++) {
+ struct gl_uniform *uniform = sorted_uniforms[i].u;
+ int parameter_index = sorted_uniforms[i].pos;
+ const glsl_type *type = uniform->Type;
+ unsigned int size;
+
+ if (type->is_vector() ||
+ type->is_scalar()) {
+ size = type->vector_elements;
+ } else {
+ size = type_size(type) * 4;
+ }
+
+ gl_register_file file;
+ if (type->is_sampler() ||
+ (type->is_array() && type->fields.array->is_sampler())) {
+ file = PROGRAM_SAMPLER;
+ } else {
+ file = PROGRAM_UNIFORM;
+ }
+
+ GLint index = _mesa_lookup_parameter_index(prog->Parameters, -1,
+ uniform->Name);
+
+ if (index < 0) {
+ index = _mesa_add_parameter(prog->Parameters, file,
+ uniform->Name, size, type->gl_type,
+ NULL, NULL, 0x0);
+
+ /* Sampler uniform values are stored in prog->SamplerUnits,
+ * and the entry in that array is selected by this index we
+ * store in ParameterValues[].
+ */
+ if (file == PROGRAM_SAMPLER) {
+ for (unsigned int j = 0; j < size / 4; j++)
+ prog->Parameters->ParameterValues[index + j][0].f = next_sampler++;
+ }
+
+ /* The location chosen in the Parameters list here (returned
+ * from _mesa_add_uniform) has to match what the linker chose.
+ */
+ if (index != parameter_index) {
+ fail_link(shader_program, "Allocation of uniform `%s' to target "
+ "failed (%d vs %d)\n",
+ uniform->Name, index, parameter_index);
+ }
+ }
+ }
+
+ ralloc_free(sorted_uniforms);
+}
+
+static void
+set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
+ struct gl_shader_program *shader_program,
+ const char *name, const glsl_type *type,
+ ir_constant *val)
+{
+ if (type->is_record()) {
+ ir_constant *field_constant;
+
+ field_constant = (ir_constant *)val->components.get_head();
+
+ for (unsigned int i = 0; i < type->length; i++) {
+ const glsl_type *field_type = type->fields.structure[i].type;
+ const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name,
+ type->fields.structure[i].name);
+ set_uniform_initializer(ctx, mem_ctx, shader_program, field_name,
+ field_type, field_constant);
+ field_constant = (ir_constant *)field_constant->next;
+ }
+ return;
+ }
+
+ int loc = _mesa_get_uniform_location(ctx, shader_program, name);
+
+ if (loc == -1) {
+ fail_link(shader_program,
+ "Couldn't find uniform for initializer %s\n", name);
+ return;
+ }
+
+ for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) {
+ ir_constant *element;
+ const glsl_type *element_type;
+ if (type->is_array()) {
+ element = val->array_elements[i];
+ element_type = type->fields.array;
+ } else {
+ element = val;
+ element_type = type;
+ }
+
+ void *values;
+
+ if (element_type->base_type == GLSL_TYPE_BOOL) {
+ int *conv = ralloc_array(mem_ctx, int, element_type->components());
+ for (unsigned int j = 0; j < element_type->components(); j++) {
+ conv[j] = element->value.b[j];
+ }
+ values = (void *)conv;
+ element_type = glsl_type::get_instance(GLSL_TYPE_INT,
+ element_type->vector_elements,
+ 1);
+ } else {
+ values = &element->value;
+ }
+
+ if (element_type->is_matrix()) {
+ _mesa_uniform_matrix(ctx, shader_program,
+ element_type->matrix_columns,
+ element_type->vector_elements,
+ loc, 1, GL_FALSE, (GLfloat *)values);
+ loc += element_type->matrix_columns;
+ } else {
+ _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns,
+ values, element_type->gl_type);
+ loc += type_size(element_type);
+ }
+ }
+}
+
+static void
+set_uniform_initializers(struct gl_context *ctx,
+ struct gl_shader_program *shader_program)
+{
+ void *mem_ctx = NULL;
+
+ for (unsigned int i = 0; i < MESA_SHADER_TYPES; i++) {
+ struct gl_shader *shader = shader_program->_LinkedShaders[i];
+
+ if (shader == NULL)
+ continue;
+
+ foreach_iter(exec_list_iterator, iter, *shader->ir) {
+ ir_instruction *ir = (ir_instruction *)iter.get();
+ ir_variable *var = ir->as_variable();
+
+ if (!var || var->mode != ir_var_uniform || !var->constant_value)
+ continue;
+
+ if (!mem_ctx)
+ mem_ctx = ralloc_context(NULL);
+
+ set_uniform_initializer(ctx, mem_ctx, shader_program, var->name,
+ var->type, var->constant_value);
+ }
+ }
+
+ ralloc_free(mem_ctx);
+}
+
+/*
+ * Scan/rewrite program to remove reads of custom (output) registers.
+ * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING
+ * (for vertex shaders).
+ * In GLSL shaders, varying vars can be read and written.
+ * On some hardware, trying to read an output register causes trouble.
+ * So, rewrite the program to use a temporary register in this case.
+ *
+ * Based on _mesa_remove_output_reads from programopt.c.
+ */
+void
+glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
+{
+ GLuint i;
+ GLint outputMap[VERT_RESULT_MAX];
+ GLint outputTypes[VERT_RESULT_MAX];
+ GLuint numVaryingReads = 0;
+ GLboolean usedTemps[MAX_TEMPS];
+ GLuint firstTemp = 0;
+
+ _mesa_find_used_registers(prog, PROGRAM_TEMPORARY,
+ usedTemps, MAX_TEMPS);
+
+ assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT);
+ assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING);
+
+ for (i = 0; i < VERT_RESULT_MAX; i++)
+ outputMap[i] = -1;
+
+ /* look for instructions which read from varying vars */
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+ const GLuint numSrc = num_inst_src_regs(inst->op);
+ GLuint j;
+ for (j = 0; j < numSrc; j++) {
+ if (inst->src[j].file == type) {
+ /* replace the read with a temp reg */
+ const GLuint var = inst->src[j].index;
+ if (outputMap[var] == -1) {
+ numVaryingReads++;
+ outputMap[var] = _mesa_find_free_register(usedTemps,
+ MAX_TEMPS,
+ firstTemp);
+ outputTypes[var] = inst->src[j].type;
+ firstTemp = outputMap[var] + 1;
+ }
+ inst->src[j].file = PROGRAM_TEMPORARY;
+ inst->src[j].index = outputMap[var];
+ }
+ }
+ }
+
+ if (numVaryingReads == 0)
+ return; /* nothing to be done */
+
+ /* look for instructions which write to the varying vars identified above */
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+ if (inst->dst.file == type && outputMap[inst->dst.index] >= 0) {
+ /* change inst to write to the temp reg, instead of the varying */
+ inst->dst.file = PROGRAM_TEMPORARY;
+ inst->dst.index = outputMap[inst->dst.index];
+ }
+ }
+
+ /* insert new MOV instructions at the end */
+ for (i = 0; i < VERT_RESULT_MAX; i++) {
+ if (outputMap[i] >= 0) {
+ /* MOV VAR[i], TEMP[tmp]; */
+ st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i], outputTypes[i]);
+ st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW, outputTypes[i]);
+ dst.index = i;
+ this->emit(NULL, TGSI_OPCODE_MOV, dst, src);
+ }
+ }
+}
+
+/**
+ * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
+ * are read from the given src in this instruction
+ */
+static int
+get_src_arg_mask(st_dst_reg dst, st_src_reg src)
+{
+ int read_mask = 0, comp;
+
+ /* Now, given the src swizzle and the written channels, find which
+ * components are actually read
+ */
+ for (comp = 0; comp < 4; ++comp) {
+ const unsigned coord = GET_SWZ(src.swizzle, comp);
+ ASSERT(coord < 4);
+ if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W)
+ read_mask |= 1 << coord;
+ }
+
+ return read_mask;
+}
+
+/**
+ * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
+ * instruction is the first instruction to write to register T0. There are
+ * several lowering passes done in GLSL IR (e.g. branches and
+ * relative addressing) that create a large number of conditional assignments
+ * that ir_to_mesa converts to CMP instructions like the one mentioned above.
+ *
+ * Here is why this conversion is safe:
+ * CMP T0, T1 T2 T0 can be expanded to:
+ * if (T1 < 0.0)
+ * MOV T0, T2;
+ * else
+ * MOV T0, T0;
+ *
+ * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
+ * as the original program. If (T1 < 0.0) evaluates to false, executing
+ * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
+ * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
+ * because any instruction that was going to read from T0 after this was going
+ * to read a garbage value anyway.
+ */
+void
+glsl_to_tgsi_visitor::simplify_cmp(void)
+{
+ unsigned tempWrites[MAX_TEMPS];
+ unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
+
+ memset(tempWrites, 0, sizeof(tempWrites));
+ memset(outputWrites, 0, sizeof(outputWrites));
+
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+ unsigned prevWriteMask = 0;
+
+ /* Give up if we encounter relative addressing or flow control. */
+ if (inst->dst.reladdr ||
+ tgsi_get_opcode_info(inst->op)->is_branch ||
+ inst->op == TGSI_OPCODE_BGNSUB ||
+ inst->op == TGSI_OPCODE_CONT ||
+ inst->op == TGSI_OPCODE_END ||
+ inst->op == TGSI_OPCODE_ENDSUB ||
+ inst->op == TGSI_OPCODE_RET) {
+ return;
+ }
+
+ if (inst->dst.file == PROGRAM_OUTPUT) {
+ assert(inst->dst.index < MAX_PROGRAM_OUTPUTS);
+ prevWriteMask = outputWrites[inst->dst.index];
+ outputWrites[inst->dst.index] |= inst->dst.writemask;
+ } else if (inst->dst.file == PROGRAM_TEMPORARY) {
+ assert(inst->dst.index < MAX_TEMPS);
+ prevWriteMask = tempWrites[inst->dst.index];
+ tempWrites[inst->dst.index] |= inst->dst.writemask;
+ }
+
+ /* For a CMP to be considered a conditional write, the destination
+ * register and source register two must be the same. */
+ if (inst->op == TGSI_OPCODE_CMP
+ && !(inst->dst.writemask & prevWriteMask)
+ && inst->src[2].file == inst->dst.file
+ && inst->src[2].index == inst->dst.index
+ && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) {
+
+ inst->op = TGSI_OPCODE_MOV;
+ inst->src[0] = inst->src[1];
+ }
+ }
+}
+
+/* Replaces all references to a temporary register index with another index. */
+void
+glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
+{
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+ unsigned j;
+
+ for (j=0; j < num_inst_src_regs(inst->op); j++) {
+ if (inst->src[j].file == PROGRAM_TEMPORARY &&
+ inst->src[j].index == index) {
+ inst->src[j].index = new_index;
+ }
+ }
+
+ if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
+ inst->dst.index = new_index;
+ }
+ }
+}
+
+int
+glsl_to_tgsi_visitor::get_first_temp_read(int index)
+{
+ int depth = 0; /* loop depth */
+ int loop_start = -1; /* index of the first active BGNLOOP (if any) */
+ unsigned i = 0, j;
+
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+ for (j=0; j < num_inst_src_regs(inst->op); j++) {
+ if (inst->src[j].file == PROGRAM_TEMPORARY &&
+ inst->src[j].index == index) {
+ return (depth == 0) ? i : loop_start;
+ }
+ }
+
+ if (inst->op == TGSI_OPCODE_BGNLOOP) {
+ if(depth++ == 0)
+ loop_start = i;
+ } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
+ if (--depth == 0)
+ loop_start = -1;
+ }
+ assert(depth >= 0);
+
+ i++;
+ }
+
+ return -1;
+}
+
+int
+glsl_to_tgsi_visitor::get_first_temp_write(int index)
+{
+ int depth = 0; /* loop depth */
+ int loop_start = -1; /* index of the first active BGNLOOP (if any) */
+ int i = 0;
+
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+ if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
+ return (depth == 0) ? i : loop_start;
+ }
+
+ if (inst->op == TGSI_OPCODE_BGNLOOP) {
+ if(depth++ == 0)
+ loop_start = i;
+ } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
+ if (--depth == 0)
+ loop_start = -1;
+ }
+ assert(depth >= 0);
+
+ i++;
+ }
+
+ return -1;
+}
+
+int
+glsl_to_tgsi_visitor::get_last_temp_read(int index)
+{
+ int depth = 0; /* loop depth */
+ int last = -1; /* index of last instruction that reads the temporary */
+ unsigned i = 0, j;
+
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+ for (j=0; j < num_inst_src_regs(inst->op); j++) {
+ if (inst->src[j].file == PROGRAM_TEMPORARY &&
+ inst->src[j].index == index) {
+ last = (depth == 0) ? i : -2;
+ }
+ }
+
+ if (inst->op == TGSI_OPCODE_BGNLOOP)
+ depth++;
+ else if (inst->op == TGSI_OPCODE_ENDLOOP)
+ if (--depth == 0 && last == -2)
+ last = i;
+ assert(depth >= 0);
+
+ i++;
+ }
+
+ assert(last >= -1);
+ return last;
+}
+
+int
+glsl_to_tgsi_visitor::get_last_temp_write(int index)
+{
+ int depth = 0; /* loop depth */
+ int last = -1; /* index of last instruction that writes to the temporary */
+ int i = 0;
+
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+ if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index)
+ last = (depth == 0) ? i : -2;
+
+ if (inst->op == TGSI_OPCODE_BGNLOOP)
+ depth++;
+ else if (inst->op == TGSI_OPCODE_ENDLOOP)
+ if (--depth == 0 && last == -2)
+ last = i;
+ assert(depth >= 0);
+
+ i++;
+ }
+
+ assert(last >= -1);
+ return last;
+}
+
+/*
+ * On a basic block basis, tracks available PROGRAM_TEMPORARY register
+ * channels for copy propagation and updates following instructions to
+ * use the original versions.
+ *
+ * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
+ * will occur. As an example, a TXP production before this pass:
+ *
+ * 0: MOV TEMP[1], INPUT[4].xyyy;
+ * 1: MOV TEMP[1].w, INPUT[4].wwww;
+ * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
+ *
+ * and after:
+ *
+ * 0: MOV TEMP[1], INPUT[4].xyyy;
+ * 1: MOV TEMP[1].w, INPUT[4].wwww;
+ * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
+ *
+ * which allows for dead code elimination on TEMP[1]'s writes.
+ */
+void
+glsl_to_tgsi_visitor::copy_propagate(void)
+{
+ glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
+ glsl_to_tgsi_instruction *,
+ this->next_temp * 4);
+ int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
+ int level = 0;
+
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+ assert(inst->dst.file != PROGRAM_TEMPORARY
+ || inst->dst.index < this->next_temp);
+
+ /* First, do any copy propagation possible into the src regs. */
+ for (int r = 0; r < 3; r++) {
+ glsl_to_tgsi_instruction *first = NULL;
+ bool good = true;
+ int acp_base = inst->src[r].index * 4;
+
+ if (inst->src[r].file != PROGRAM_TEMPORARY ||
+ inst->src[r].reladdr)
+ continue;
+
+ /* See if we can find entries in the ACP consisting of MOVs
+ * from the same src register for all the swizzled channels
+ * of this src register reference.
+ */
+ for (int i = 0; i < 4; i++) {
+ int src_chan = GET_SWZ(inst->src[r].swizzle, i);
+ glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
+
+ if (!copy_chan) {
+ good = false;
+ break;
+ }
+
+ assert(acp_level[acp_base + src_chan] <= level);
+
+ if (!first) {
+ first = copy_chan;
+ } else {
+ if (first->src[0].file != copy_chan->src[0].file ||
+ first->src[0].index != copy_chan->src[0].index) {
+ good = false;
+ break;
+ }
+ }
+ }
+
+ if (good) {
+ /* We've now validated that we can copy-propagate to
+ * replace this src register reference. Do it.
+ */
+ inst->src[r].file = first->src[0].file;
+ inst->src[r].index = first->src[0].index;
+
+ int swizzle = 0;
+ for (int i = 0; i < 4; i++) {
+ int src_chan = GET_SWZ(inst->src[r].swizzle, i);
+ glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan];
+ swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) <<
+ (3 * i));
+ }
+ inst->src[r].swizzle = swizzle;
+ }
+ }
+
+ switch (inst->op) {
+ case TGSI_OPCODE_BGNLOOP:
+ case TGSI_OPCODE_ENDLOOP:
+ /* End of a basic block, clear the ACP entirely. */
+ memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
+ break;
+
+ case TGSI_OPCODE_IF:
+ ++level;
+ break;
+
+ case TGSI_OPCODE_ENDIF:
+ case TGSI_OPCODE_ELSE:
+ /* Clear all channels written inside the block from the ACP, but
+ * leaving those that were not touched.
+ */
+ for (int r = 0; r < this->next_temp; r++) {
+ for (int c = 0; c < 4; c++) {
+ if (!acp[4 * r + c])
+ continue;
+
+ if (acp_level[4 * r + c] >= level)
+ acp[4 * r + c] = NULL;
+ }
+ }
+ if (inst->op == TGSI_OPCODE_ENDIF)
+ --level;
+ break;
+
+ default:
+ /* Continuing the block, clear any written channels from
+ * the ACP.
+ */
+ if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) {
+ /* Any temporary might be written, so no copy propagation
+ * across this instruction.
+ */
+ memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
+ } else if (inst->dst.file == PROGRAM_OUTPUT &&
+ inst->dst.reladdr) {
+ /* Any output might be written, so no copy propagation
+ * from outputs across this instruction.
+ */
+ for (int r = 0; r < this->next_temp; r++) {
+ for (int c = 0; c < 4; c++) {
+ if (!acp[4 * r + c])
+ continue;
+
+ if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
+ acp[4 * r + c] = NULL;
+ }
+ }
+ } else if (inst->dst.file == PROGRAM_TEMPORARY ||
+ inst->dst.file == PROGRAM_OUTPUT) {
+ /* Clear where it's used as dst. */
+ if (inst->dst.file == PROGRAM_TEMPORARY) {
+ for (int c = 0; c < 4; c++) {
+ if (inst->dst.writemask & (1 << c)) {
+ acp[4 * inst->dst.index + c] = NULL;
+ }
+ }
+ }
+
+ /* Clear where it's used as src. */
+ for (int r = 0; r < this->next_temp; r++) {
+ for (int c = 0; c < 4; c++) {
+ if (!acp[4 * r + c])
+ continue;
+
+ int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
+
+ if (acp[4 * r + c]->src[0].file == inst->dst.file &&
+ acp[4 * r + c]->src[0].index == inst->dst.index &&
+ inst->dst.writemask & (1 << src_chan))
+ {
+ acp[4 * r + c] = NULL;
+ }
+ }
+ }
+ }
+ break;
+ }
+
+ /* If this is a copy, add it to the ACP. */
+ if (inst->op == TGSI_OPCODE_MOV &&
+ inst->dst.file == PROGRAM_TEMPORARY &&
+ !inst->dst.reladdr &&
+ !inst->saturate &&
+ !inst->src[0].reladdr &&
+ !inst->src[0].negate) {
+ for (int i = 0; i < 4; i++) {
+ if (inst->dst.writemask & (1 << i)) {
+ acp[4 * inst->dst.index + i] = inst;
+ acp_level[4 * inst->dst.index + i] = level;
+ }
+ }
+ }
+ }
+
+ ralloc_free(acp_level);
+ ralloc_free(acp);
+}
+
+/*
+ * Tracks available PROGRAM_TEMPORARY registers for dead code elimination.
+ *
+ * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
+ * will occur. As an example, a TXP production after copy propagation but
+ * before this pass:
+ *
+ * 0: MOV TEMP[1], INPUT[4].xyyy;
+ * 1: MOV TEMP[1].w, INPUT[4].wwww;
+ * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
+ *
+ * and after this pass:
+ *
+ * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
+ *
+ * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB)
+ * FIXME: doesn't eliminate all dead code inside of loops; it steps around them
+ */
+void
+glsl_to_tgsi_visitor::eliminate_dead_code(void)
+{
+ int i;
+
+ for (i=0; i < this->next_temp; i++) {
+ int last_read = get_last_temp_read(i);
+ int j = 0;
+
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+ if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i &&
+ j > last_read)
+ {
+ iter.remove();
+ delete inst;
+ }
+
+ j++;
+ }
+ }
+}
+
+/*
+ * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
+ * code elimination. This is less primitive than eliminate_dead_code(), as it
+ * is per-channel and can detect consecutive writes without a read between them
+ * as dead code. However, there is some dead code that can be eliminated by
+ * eliminate_dead_code() but not this function - for example, this function
+ * cannot eliminate an instruction writing to a register that is never read and
+ * is the only instruction writing to that register.
+ *
+ * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
+ * will occur.
+ */
+int
+glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
+{
+ glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
+ glsl_to_tgsi_instruction *,
+ this->next_temp * 4);
+ int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
+ int level = 0;
+ int removed = 0;
+
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+ assert(inst->dst.file != PROGRAM_TEMPORARY
+ || inst->dst.index < this->next_temp);
+
+ switch (inst->op) {
+ case TGSI_OPCODE_BGNLOOP:
+ case TGSI_OPCODE_ENDLOOP:
+ /* End of a basic block, clear the write array entirely.
+ * FIXME: This keeps us from killing dead code when the writes are
+ * on either side of a loop, even when the register isn't touched
+ * inside the loop.
+ */
+ memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
+ break;
+
+ case TGSI_OPCODE_ENDIF:
+ --level;
+ break;
+
+ case TGSI_OPCODE_ELSE:
+ /* Clear all channels written inside the preceding if block from the
+ * write array, but leave those that were not touched.
+ *
+ * FIXME: This destroys opportunities to remove dead code inside of
+ * IF blocks that are followed by an ELSE block.
+ */
+ for (int r = 0; r < this->next_temp; r++) {
+ for (int c = 0; c < 4; c++) {
+ if (!writes[4 * r + c])
+ continue;
+
+ if (write_level[4 * r + c] >= level)
+ writes[4 * r + c] = NULL;
+ }
+ }
+ break;
+
+ case TGSI_OPCODE_IF:
+ ++level;
+ /* fallthrough to default case to mark the condition as read */
+
+ default:
+ /* Continuing the block, clear any channels from the write array that
+ * are read by this instruction.
+ */
+ for (unsigned i = 0; i < Elements(inst->src); i++) {
+ if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
+ /* Any temporary might be read, so no dead code elimination
+ * across this instruction.
+ */
+ memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
+ } else if (inst->src[i].file == PROGRAM_TEMPORARY) {
+ /* Clear where it's used as src. */
+ int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0);
+ src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1);
+ src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2);
+ src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3);
+
+ for (int c = 0; c < 4; c++) {
+ if (src_chans & (1 << c)) {
+ writes[4 * inst->src[i].index + c] = NULL;
+ }
+ }
+ }
+ }
+ break;
+ }
+
+ /* If this instruction writes to a temporary, add it to the write array.
+ * If there is already an instruction in the write array for one or more
+ * of the channels, flag that channel write as dead.
+ */
+ if (inst->dst.file == PROGRAM_TEMPORARY &&
+ !inst->dst.reladdr &&
+ !inst->saturate) {
+ for (int c = 0; c < 4; c++) {
+ if (inst->dst.writemask & (1 << c)) {
+ if (writes[4 * inst->dst.index + c]) {
+ if (write_level[4 * inst->dst.index + c] < level)
+ continue;
+ else
+ writes[4 * inst->dst.index + c]->dead_mask |= (1 << c);
+ }
+ writes[4 * inst->dst.index + c] = inst;
+ write_level[4 * inst->dst.index + c] = level;
+ }
+ }
+ }
+ }
+
+ /* Anything still in the write array at this point is dead code. */
+ for (int r = 0; r < this->next_temp; r++) {
+ for (int c = 0; c < 4; c++) {
+ glsl_to_tgsi_instruction *inst = writes[4 * r + c];
+ if (inst)
+ inst->dead_mask |= (1 << c);
+ }
+ }
+
+ /* Now actually remove the instructions that are completely dead and update
+ * the writemask of other instructions with dead channels.
+ */
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+ if (!inst->dead_mask || !inst->dst.writemask)
+ continue;
+ else if (inst->dead_mask == inst->dst.writemask) {
+ iter.remove();
+ delete inst;
+ removed++;
+ } else
+ inst->dst.writemask &= ~(inst->dead_mask);
+ }
+
+ ralloc_free(write_level);
+ ralloc_free(writes);
+
+ return removed;
+}
+
+/* Merges temporary registers together where possible to reduce the number of
+ * registers needed to run a program.
+ *
+ * Produces optimal code only after copy propagation and dead code elimination
+ * have been run. */
+void
+glsl_to_tgsi_visitor::merge_registers(void)
+{
+ int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp);
+ int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp);
+ int i, j;
+
+ /* Read the indices of the last read and first write to each temp register
+ * into an array so that we don't have to traverse the instruction list as
+ * much. */
+ for (i=0; i < this->next_temp; i++) {
+ last_reads[i] = get_last_temp_read(i);
+ first_writes[i] = get_first_temp_write(i);
+ }
+
+ /* Start looking for registers with non-overlapping usages that can be
+ * merged together. */
+ for (i=0; i < this->next_temp; i++) {
+ /* Don't touch unused registers. */
+ if (last_reads[i] < 0 || first_writes[i] < 0) continue;
+
+ for (j=0; j < this->next_temp; j++) {
+ /* Don't touch unused registers. */
+ if (last_reads[j] < 0 || first_writes[j] < 0) continue;
+
+ /* We can merge the two registers if the first write to j is after or
+ * in the same instruction as the last read from i. Note that the
+ * register at index i will always be used earlier or at the same time
+ * as the register at index j. */
+ if (first_writes[i] <= first_writes[j] &&
+ last_reads[i] <= first_writes[j])
+ {
+ rename_temp_register(j, i); /* Replace all references to j with i.*/
+
+ /* Update the first_writes and last_reads arrays with the new
+ * values for the merged register index, and mark the newly unused
+ * register index as such. */
+ last_reads[i] = last_reads[j];
+ first_writes[j] = -1;
+ last_reads[j] = -1;
+ }
+ }
+ }
+
+ ralloc_free(last_reads);
+ ralloc_free(first_writes);
+}
+
+/* Reassign indices to temporary registers by reusing unused indices created
+ * by optimization passes. */
+void
+glsl_to_tgsi_visitor::renumber_registers(void)
+{
+ int i = 0;
+ int new_index = 0;
+
+ for (i=0; i < this->next_temp; i++) {
+ if (get_first_temp_read(i) < 0) continue;
+ if (i != new_index)
+ rename_temp_register(i, new_index);
+ new_index++;
+ }
+
+ this->next_temp = new_index;
+}
+
+/**
+ * Returns a fragment program which implements the current pixel transfer ops.
+ * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c.
+ */
+extern "C" void
+get_pixel_transfer_visitor(struct st_fragment_program *fp,
+ glsl_to_tgsi_visitor *original,
+ int scale_and_bias, int pixel_maps)
+{
+ glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
+ struct st_context *st = st_context(original->ctx);
+ struct gl_program *prog = &fp->Base.Base;
+ struct gl_program_parameter_list *params = _mesa_new_parameter_list();
+ st_src_reg coord, src0;
+ st_dst_reg dst0;
+ glsl_to_tgsi_instruction *inst;
+
+ /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
+ v->ctx = original->ctx;
+ v->prog = prog;
+ v->glsl_version = original->glsl_version;
+ v->native_integers = original->native_integers;
+ v->options = original->options;
+ v->next_temp = original->next_temp;
+ v->num_address_regs = original->num_address_regs;
+ v->samplers_used = prog->SamplersUsed = original->samplers_used;
+ v->indirect_addr_temps = original->indirect_addr_temps;
+ v->indirect_addr_consts = original->indirect_addr_consts;
+ memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
+
+ /*
+ * Get initial pixel color from the texture.
+ * TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
+ */
+ coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
+ src0 = v->get_temp(glsl_type::vec4_type);
+ dst0 = st_dst_reg(src0);
+ inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
+ inst->sampler = 0;
+ inst->tex_target = TEXTURE_2D_INDEX;
+
+ prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
+ prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */
+ v->samplers_used |= (1 << 0);
+
+ if (scale_and_bias) {
+ static const gl_state_index scale_state[STATE_LENGTH] =
+ { STATE_INTERNAL, STATE_PT_SCALE,
+ (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
+ static const gl_state_index bias_state[STATE_LENGTH] =
+ { STATE_INTERNAL, STATE_PT_BIAS,
+ (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
+ GLint scale_p, bias_p;
+ st_src_reg scale, bias;
+
+ scale_p = _mesa_add_state_reference(params, scale_state);
+ bias_p = _mesa_add_state_reference(params, bias_state);
+
+ /* MAD colorTemp, colorTemp, scale, bias; */
+ scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT);
+ bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT);
+ inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias);
+ }
+
+ if (pixel_maps) {
+ st_src_reg temp = v->get_temp(glsl_type::vec4_type);
+ st_dst_reg temp_dst = st_dst_reg(temp);
+
+ assert(st->pixel_xfer.pixelmap_texture);
+
+ /* With a little effort, we can do four pixel map look-ups with
+ * two TEX instructions:
+ */
+
+ /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
+ temp_dst.writemask = WRITEMASK_XY; /* write R,G */
+ inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
+ inst->sampler = 1;
+ inst->tex_target = TEXTURE_2D_INDEX;
+
+ /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
+ src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
+ temp_dst.writemask = WRITEMASK_ZW; /* write B,A */
+ inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
+ inst->sampler = 1;
+ inst->tex_target = TEXTURE_2D_INDEX;
+
+ prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */
+ v->samplers_used |= (1 << 1);
+
+ /* MOV colorTemp, temp; */
+ inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp);
+ }
+
+ /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
+ * new visitor. */
+ foreach_iter(exec_list_iterator, iter, original->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+ st_src_reg src_regs[3];
+
+ if (inst->dst.file == PROGRAM_OUTPUT)
+ prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
+
+ for (int i=0; i<3; i++) {
+ src_regs[i] = inst->src[i];
+ if (src_regs[i].file == PROGRAM_INPUT &&
+ src_regs[i].index == FRAG_ATTRIB_COL0)
+ {
+ src_regs[i].file = PROGRAM_TEMPORARY;
+ src_regs[i].index = src0.index;
+ }
+ else if (src_regs[i].file == PROGRAM_INPUT)
+ prog->InputsRead |= (1 << src_regs[i].index);
+ }
+
+ v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
+ }
+
+ /* Make modifications to fragment program info. */
+ prog->Parameters = _mesa_combine_parameter_lists(params,
+ original->prog->Parameters);
+ prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes);
+ prog->Varying = _mesa_clone_parameter_list(original->prog->Varying);
+ _mesa_free_parameter_list(params);
+ count_resources(v, prog);
+ fp->glsl_to_tgsi = v;
+}
+
+/**
+ * Make fragment program for glBitmap:
+ * Sample the texture and kill the fragment if the bit is 0.
+ * This program will be combined with the user's fragment program.
+ *
+ * Based on make_bitmap_fragment_program in st_cb_bitmap.c.
+ */
+extern "C" void
+get_bitmap_visitor(struct st_fragment_program *fp,
+ glsl_to_tgsi_visitor *original, int samplerIndex)
+{
+ glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
+ struct st_context *st = st_context(original->ctx);
+ struct gl_program *prog = &fp->Base.Base;
+ st_src_reg coord, src0;
+ st_dst_reg dst0;
+ glsl_to_tgsi_instruction *inst;
+
+ /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
+ v->ctx = original->ctx;
+ v->prog = prog;
+ v->glsl_version = original->glsl_version;
+ v->native_integers = original->native_integers;
+ v->options = original->options;
+ v->next_temp = original->next_temp;
+ v->num_address_regs = original->num_address_regs;
+ v->samplers_used = prog->SamplersUsed = original->samplers_used;
+ v->indirect_addr_temps = original->indirect_addr_temps;
+ v->indirect_addr_consts = original->indirect_addr_consts;
+ memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
+
+ /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
+ coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
+ src0 = v->get_temp(glsl_type::vec4_type);
+ dst0 = st_dst_reg(src0);
+ inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
+ inst->sampler = samplerIndex;
+ inst->tex_target = TEXTURE_2D_INDEX;
+
+ prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
+ prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */
+ v->samplers_used |= (1 << samplerIndex);
+
+ /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
+ src0.negate = NEGATE_XYZW;
+ if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
+ src0.swizzle = SWIZZLE_XXXX;
+ inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0);
+
+ /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
+ * new visitor. */
+ foreach_iter(exec_list_iterator, iter, original->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+ st_src_reg src_regs[3];
+
+ if (inst->dst.file == PROGRAM_OUTPUT)
+ prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
+
+ for (int i=0; i<3; i++) {
+ src_regs[i] = inst->src[i];
+ if (src_regs[i].file == PROGRAM_INPUT)
+ prog->InputsRead |= (1 << src_regs[i].index);
+ }
+
+ v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
+ }
+
+ /* Make modifications to fragment program info. */
+ prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters);
+ prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes);
+ prog->Varying = _mesa_clone_parameter_list(original->prog->Varying);
+ count_resources(v, prog);
+ fp->glsl_to_tgsi = v;
+}
+
+/* ------------------------- TGSI conversion stuff -------------------------- */
+struct label {
+ unsigned branch_target;
+ unsigned token;
+};
+
+/**
+ * Intermediate state used during shader translation.
+ */
+struct st_translate {
+ struct ureg_program *ureg;
+
+ struct ureg_dst temps[MAX_TEMPS];
+ struct ureg_src *constants;
+ struct ureg_src *immediates;
+ struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
+ struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
+ struct ureg_dst address[1];
+ struct ureg_src samplers[PIPE_MAX_SAMPLERS];
+ struct ureg_src systemValues[SYSTEM_VALUE_MAX];
+
+ /* Extra info for handling point size clamping in vertex shader */
+ struct ureg_dst pointSizeResult; /**< Actual point size output register */
+ struct ureg_src pointSizeConst; /**< Point size range constant register */
+ GLint pointSizeOutIndex; /**< Temp point size output register */
+ GLboolean prevInstWrotePointSize;
+
+ const GLuint *inputMapping;
+ const GLuint *outputMapping;
+
+ /* For every instruction that contains a label (eg CALL), keep
+ * details so that we can go back afterwards and emit the correct
+ * tgsi instruction number for each label.
+ */
+ struct label *labels;
+ unsigned labels_size;
+ unsigned labels_count;
+
+ /* Keep a record of the tgsi instruction number that each mesa
+ * instruction starts at, will be used to fix up labels after
+ * translation.
+ */
+ unsigned *insn;
+ unsigned insn_size;
+ unsigned insn_count;
+
+ unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */
+
+ boolean error;
+};
+
+/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
+static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
+ TGSI_SEMANTIC_FACE,
+ TGSI_SEMANTIC_INSTANCEID
+};
+
+/**
+ * Make note of a branch to a label in the TGSI code.
+ * After we've emitted all instructions, we'll go over the list
+ * of labels built here and patch the TGSI code with the actual
+ * location of each label.
+ */
+static unsigned *get_label(struct st_translate *t, unsigned branch_target)
+{
+ unsigned i;
+
+ if (t->labels_count + 1 >= t->labels_size) {
+ t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
+ t->labels = (struct label *)realloc(t->labels,
+ t->labels_size * sizeof(struct label));
+ if (t->labels == NULL) {
+ static unsigned dummy;
+ t->error = TRUE;
+ return &dummy;
+ }
+ }
+
+ i = t->labels_count++;
+ t->labels[i].branch_target = branch_target;
+ return &t->labels[i].token;
+}
+
+/**
+ * Called prior to emitting the TGSI code for each instruction.
+ * Allocate additional space for instructions if needed.
+ * Update the insn[] array so the next glsl_to_tgsi_instruction points to
+ * the next TGSI instruction.
+ */
+static void set_insn_start(struct st_translate *t, unsigned start)
+{
+ if (t->insn_count + 1 >= t->insn_size) {
+ t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
+ t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0]));
+ if (t->insn == NULL) {
+ t->error = TRUE;
+ return;
+ }
+ }
+
+ t->insn[t->insn_count++] = start;
+}
+
+/**
+ * Map a glsl_to_tgsi constant/immediate to a TGSI immediate.
+ */
+static struct ureg_src
+emit_immediate(struct st_translate *t,
+ gl_constant_value values[4],
+ int type, int size)
+{
+ struct ureg_program *ureg = t->ureg;
+
+ switch(type)
+ {
+ case GL_FLOAT:
+ return ureg_DECL_immediate(ureg, &values[0].f, size);
+ case GL_INT:
+ return ureg_DECL_immediate_int(ureg, &values[0].i, size);
+ case GL_UNSIGNED_INT:
+ case GL_BOOL:
+ return ureg_DECL_immediate_uint(ureg, &values[0].u, size);
+ default:
+ assert(!"should not get here - type must be float, int, uint, or bool");
+ return ureg_src_undef();
+ }
+}
+
+/**
+ * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register.
+ */
+static struct ureg_dst
+dst_register(struct st_translate *t,
+ gl_register_file file,
+ GLuint index)
+{
+ switch(file) {
+ case PROGRAM_UNDEFINED:
+ return ureg_dst_undef();
+
+ case PROGRAM_TEMPORARY:
+ if (ureg_dst_is_undef(t->temps[index]))
+ t->temps[index] = ureg_DECL_temporary(t->ureg);
+
+ return t->temps[index];
+
+ case PROGRAM_OUTPUT:
+ if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ)
+ t->prevInstWrotePointSize = GL_TRUE;
+
+ if (t->procType == TGSI_PROCESSOR_VERTEX)
+ assert(index < VERT_RESULT_MAX);
+ else if (t->procType == TGSI_PROCESSOR_FRAGMENT)
+ assert(index < FRAG_RESULT_MAX);
+ else
+ assert(index < GEOM_RESULT_MAX);
+
+ assert(t->outputMapping[index] < Elements(t->outputs));
+
+ return t->outputs[t->outputMapping[index]];
+
+ case PROGRAM_ADDRESS:
+ return t->address[index];
+
+ default:
+ assert(!"unknown dst register file");
+ return ureg_dst_undef();
+ }
+}
+
+/**
+ * Map a glsl_to_tgsi src register to a TGSI ureg_src register.
+ */
+static struct ureg_src
+src_register(struct st_translate *t,
+ gl_register_file file,
+ GLuint index)
+{
+ switch(file) {
+ case PROGRAM_UNDEFINED:
+ return ureg_src_undef();
+
+ case PROGRAM_TEMPORARY:
+ assert(index >= 0);
+ assert(index < Elements(t->temps));
+ if (ureg_dst_is_undef(t->temps[index]))
+ t->temps[index] = ureg_DECL_temporary(t->ureg);
+ return ureg_src(t->temps[index]);
+
+ case PROGRAM_NAMED_PARAM:
+ case PROGRAM_ENV_PARAM:
+ case PROGRAM_LOCAL_PARAM:
+ case PROGRAM_UNIFORM:
+ assert(index >= 0);
+ return t->constants[index];
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_CONSTANT: /* ie, immediate */
+ if (index < 0)
+ return ureg_DECL_constant(t->ureg, 0);
+ else
+ return t->constants[index];
+
+ case PROGRAM_IMMEDIATE:
+ return t->immediates[index];
+
+ case PROGRAM_INPUT:
+ assert(t->inputMapping[index] < Elements(t->inputs));
+ return t->inputs[t->inputMapping[index]];
+
+ case PROGRAM_OUTPUT:
+ assert(t->outputMapping[index] < Elements(t->outputs));
+ return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
+
+ case PROGRAM_ADDRESS:
+ return ureg_src(t->address[index]);
+
+ case PROGRAM_SYSTEM_VALUE:
+ assert(index < Elements(t->systemValues));
+ return t->systemValues[index];
+
+ default:
+ assert(!"unknown src register file");
+ return ureg_src_undef();
+ }
+}
+
+/**
+ * Create a TGSI ureg_dst register from an st_dst_reg.
+ */
+static struct ureg_dst
+translate_dst(struct st_translate *t,
+ const st_dst_reg *dst_reg,
+ bool saturate)
+{
+ struct ureg_dst dst = dst_register(t,
+ dst_reg->file,
+ dst_reg->index);
+
+ dst = ureg_writemask(dst, dst_reg->writemask);
+
+ if (saturate)
+ dst = ureg_saturate(dst);
+
+ if (dst_reg->reladdr != NULL)
+ dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
+
+ return dst;
+}
+
+/**
+ * Create a TGSI ureg_src register from an st_src_reg.
+ */
+static struct ureg_src
+translate_src(struct st_translate *t, const st_src_reg *src_reg)
+{
+ struct ureg_src src = src_register(t, src_reg->file, src_reg->index);
+
+ src = ureg_swizzle(src,
+ GET_SWZ(src_reg->swizzle, 0) & 0x3,
+ GET_SWZ(src_reg->swizzle, 1) & 0x3,
+ GET_SWZ(src_reg->swizzle, 2) & 0x3,
+ GET_SWZ(src_reg->swizzle, 3) & 0x3);
+
+ if ((src_reg->negate & 0xf) == NEGATE_XYZW)
+ src = ureg_negate(src);
+
+ if (src_reg->reladdr != NULL) {
+ /* Normally ureg_src_indirect() would be used here, but a stupid compiler
+ * bug in g++ makes ureg_src_indirect (an inline C function) erroneously
+ * set the bit for src.Negate. So we have to do the operation manually
+ * here to work around the compiler's problems. */
+ /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/
+ struct ureg_src addr = ureg_src(t->address[0]);
+ src.Indirect = 1;
+ src.IndirectFile = addr.File;
+ src.IndirectIndex = addr.Index;
+ src.IndirectSwizzle = addr.SwizzleX;
+
+ if (src_reg->file != PROGRAM_INPUT &&
+ src_reg->file != PROGRAM_OUTPUT) {
+ /* If src_reg->index was negative, it was set to zero in
+ * src_register(). Reassign it now. But don't do this
+ * for input/output regs since they get remapped while
+ * const buffers don't.
+ */
+ src.Index = src_reg->index;
+ }
+ }
+
+ return src;
+}
+
+static void
+compile_tgsi_instruction(struct st_translate *t,
+ const glsl_to_tgsi_instruction *inst)
+{
+ struct ureg_program *ureg = t->ureg;
+ GLuint i;
+ struct ureg_dst dst[1];
+ struct ureg_src src[4];
+ unsigned num_dst;
+ unsigned num_src;
+
+ num_dst = num_inst_dst_regs(inst->op);
+ num_src = num_inst_src_regs(inst->op);
+
+ if (num_dst)
+ dst[0] = translate_dst(t,
+ &inst->dst,
+ inst->saturate);
+
+ for (i = 0; i < num_src; i++)
+ src[i] = translate_src(t, &inst->src[i]);
+
+ switch(inst->op) {
+ case TGSI_OPCODE_BGNLOOP:
+ case TGSI_OPCODE_CAL:
+ case TGSI_OPCODE_ELSE:
+ case TGSI_OPCODE_ENDLOOP:
+ case TGSI_OPCODE_IF:
+ assert(num_dst == 0);
+ ureg_label_insn(ureg,
+ inst->op,
+ src, num_src,
+ get_label(t,
+ inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0));
+ return;
+
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXD:
+ case TGSI_OPCODE_TXL:
+ case TGSI_OPCODE_TXP:
+ case TGSI_OPCODE_TXQ:
+ case TGSI_OPCODE_TXF:
+ src[num_src++] = t->samplers[inst->sampler];
+ ureg_tex_insn(ureg,
+ inst->op,
+ dst, num_dst,
+ translate_texture_target(inst->tex_target, inst->tex_shadow),
+ src, num_src);
+ return;
+
+ case TGSI_OPCODE_SCS:
+ dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
+ ureg_insn(ureg, inst->op, dst, num_dst, src, num_src);
+ break;
+
+ default:
+ ureg_insn(ureg,
+ inst->op,
+ dst, num_dst,
+ src, num_src);
+ break;
+ }
+}
+
+/**
+ * Emit the TGSI instructions to adjust the WPOS pixel center convention
+ * Basically, add (adjX, adjY) to the fragment position.
+ */
+static void
+emit_adjusted_wpos(struct st_translate *t,
+ const struct gl_program *program,
+ float adjX, float adjY)
+{
+ struct ureg_program *ureg = t->ureg;
+ struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
+ struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
+
+ /* Note that we bias X and Y and pass Z and W through unchanged.
+ * The shader might also use gl_FragCoord.w and .z.
+ */
+ ureg_ADD(ureg, wpos_temp, wpos_input,
+ ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f));
+
+ t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
+}
+
+
+/**
+ * Emit the TGSI instructions for inverting the WPOS y coordinate.
+ * This code is unavoidable because it also depends on whether
+ * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
+ */
+static void
+emit_wpos_inversion(struct st_translate *t,
+ const struct gl_program *program,
+ bool invert)
+{
+ struct ureg_program *ureg = t->ureg;
+
+ /* Fragment program uses fragment position input.
+ * Need to replace instances of INPUT[WPOS] with temp T
+ * where T = INPUT[WPOS] by y is inverted.
+ */
+ static const gl_state_index wposTransformState[STATE_LENGTH]
+ = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM,
+ (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
+
+ /* XXX: note we are modifying the incoming shader here! Need to
+ * do this before emitting the constant decls below, or this
+ * will be missed:
+ */
+ unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
+ wposTransformState);
+
+ struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst);
+ struct ureg_dst wpos_temp;
+ struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
+
+ /* MOV wpos_temp, input[wpos]
+ */
+ if (wpos_input.File == TGSI_FILE_TEMPORARY)
+ wpos_temp = ureg_dst(wpos_input);
+ else {
+ wpos_temp = ureg_DECL_temporary(ureg);
+ ureg_MOV(ureg, wpos_temp, wpos_input);
+ }
+
+ if (invert) {
+ /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
+ */
+ ureg_MAD(ureg,
+ ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
+ wpos_input,
+ ureg_scalar(wpostrans, 0),
+ ureg_scalar(wpostrans, 1));
+ } else {
+ /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
+ */
+ ureg_MAD(ureg,
+ ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
+ wpos_input,
+ ureg_scalar(wpostrans, 2),
+ ureg_scalar(wpostrans, 3));
+ }
+
+ /* Use wpos_temp as position input from here on:
+ */
+ t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
+}
+
+
+/**
+ * Emit fragment position/ooordinate code.
+ */
+static void
+emit_wpos(struct st_context *st,
+ struct st_translate *t,
+ const struct gl_program *program,
+ struct ureg_program *ureg)
+{
+ const struct gl_fragment_program *fp =
+ (const struct gl_fragment_program *) program;
+ struct pipe_screen *pscreen = st->pipe->screen;
+ boolean invert = FALSE;
+
+ if (fp->OriginUpperLeft) {
+ /* Fragment shader wants origin in upper-left */
+ if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
+ /* the driver supports upper-left origin */
+ }
+ else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
+ /* the driver supports lower-left origin, need to invert Y */
+ ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
+ invert = TRUE;
+ }
+ else
+ assert(0);
+ }
+ else {
+ /* Fragment shader wants origin in lower-left */
+ if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
+ /* the driver supports lower-left origin */
+ ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
+ else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
+ /* the driver supports upper-left origin, need to invert Y */
+ invert = TRUE;
+ else
+ assert(0);
+ }
+
+ if (fp->PixelCenterInteger) {
+ /* Fragment shader wants pixel center integer */
+ if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER))
+ /* the driver supports pixel center integer */
+ ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
+ else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER))
+ /* the driver supports pixel center half integer, need to bias X,Y */
+ emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f);
+ else
+ assert(0);
+ }
+ else {
+ /* Fragment shader wants pixel center half integer */
+ if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
+ /* the driver supports pixel center half integer */
+ }
+ else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
+ /* the driver supports pixel center integer, need to bias X,Y */
+ ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
+ emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f);
+ }
+ else
+ assert(0);
+ }
+
+ /* we invert after adjustment so that we avoid the MOV to temporary,
+ * and reuse the adjustment ADD instead */
+ emit_wpos_inversion(t, program, invert);
+}
+
+/**
+ * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
+ * TGSI uses +1 for front, -1 for back.
+ * This function converts the TGSI value to the GL value. Simply clamping/
+ * saturating the value to [0,1] does the job.
+ */
+static void
+emit_face_var(struct st_translate *t)
+{
+ struct ureg_program *ureg = t->ureg;
+ struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
+ struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]];
+
+ /* MOV_SAT face_temp, input[face] */
+ face_temp = ureg_saturate(face_temp);
+ ureg_MOV(ureg, face_temp, face_input);
+
+ /* Use face_temp as face input from here on: */
+ t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp);
+}
+
+static void
+emit_edgeflags(struct st_translate *t)
+{
+ struct ureg_program *ureg = t->ureg;
+ struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]];
+ struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
+
+ ureg_MOV(ureg, edge_dst, edge_src);
+}
+
+/**
+ * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
+ * \param program the program to translate
+ * \param numInputs number of input registers used
+ * \param inputMapping maps Mesa fragment program inputs to TGSI generic
+ * input indexes
+ * \param inputSemanticName the TGSI_SEMANTIC flag for each input
+ * \param inputSemanticIndex the semantic index (ex: which texcoord) for
+ * each input
+ * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
+ * \param numOutputs number of output registers used
+ * \param outputMapping maps Mesa fragment program outputs to TGSI
+ * generic outputs
+ * \param outputSemanticName the TGSI_SEMANTIC flag for each output
+ * \param outputSemanticIndex the semantic index (ex: which texcoord) for
+ * each output
+ *
+ * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
+ */
+extern "C" enum pipe_error
+st_translate_program(
+ struct gl_context *ctx,
+ uint procType,
+ struct ureg_program *ureg,
+ glsl_to_tgsi_visitor *program,
+ const struct gl_program *proginfo,
+ GLuint numInputs,
+ const GLuint inputMapping[],
+ const ubyte inputSemanticName[],
+ const ubyte inputSemanticIndex[],
+ const GLuint interpMode[],
+ GLuint numOutputs,
+ const GLuint outputMapping[],
+ const ubyte outputSemanticName[],
+ const ubyte outputSemanticIndex[],
+ boolean passthrough_edgeflags)
+{
+ struct st_translate translate, *t;
+ unsigned i;
+ enum pipe_error ret = PIPE_OK;
+
+ assert(numInputs <= Elements(t->inputs));
+ assert(numOutputs <= Elements(t->outputs));
+
+ t = &translate;
+ memset(t, 0, sizeof *t);
+
+ t->procType = procType;
+ t->inputMapping = inputMapping;
+ t->outputMapping = outputMapping;
+ t->ureg = ureg;
+ t->pointSizeOutIndex = -1;
+ t->prevInstWrotePointSize = GL_FALSE;
+
+ /*
+ * Declare input attributes.
+ */
+ if (procType == TGSI_PROCESSOR_FRAGMENT) {
+ for (i = 0; i < numInputs; i++) {
+ t->inputs[i] = ureg_DECL_fs_input(ureg,
+ inputSemanticName[i],
+ inputSemanticIndex[i],
+ interpMode[i]);
+ }
+
+ if (proginfo->InputsRead & FRAG_BIT_WPOS) {
+ /* Must do this after setting up t->inputs, and before
+ * emitting constant references, below:
+ */
+ emit_wpos(st_context(ctx), t, proginfo, ureg);
+ }
+
+ if (proginfo->InputsRead & FRAG_BIT_FACE)
+ emit_face_var(t);
+
+ /*
+ * Declare output attributes.
+ */
+ for (i = 0; i < numOutputs; i++) {
+ switch (outputSemanticName[i]) {
+ case TGSI_SEMANTIC_POSITION:
+ t->outputs[i] = ureg_DECL_output(ureg,
+ TGSI_SEMANTIC_POSITION, /* Z/Depth */
+ outputSemanticIndex[i]);
+ t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z);
+ break;
+ case TGSI_SEMANTIC_STENCIL:
+ t->outputs[i] = ureg_DECL_output(ureg,
+ TGSI_SEMANTIC_STENCIL, /* Stencil */
+ outputSemanticIndex[i]);
+ t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y);
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ t->outputs[i] = ureg_DECL_output(ureg,
+ TGSI_SEMANTIC_COLOR,
+ outputSemanticIndex[i]);
+ break;
+ default:
+ assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
+ return PIPE_ERROR_BAD_INPUT;
+ }
+ }
+ }
+ else if (procType == TGSI_PROCESSOR_GEOMETRY) {
+ for (i = 0; i < numInputs; i++) {
+ t->inputs[i] = ureg_DECL_gs_input(ureg,
+ i,
+ inputSemanticName[i],
+ inputSemanticIndex[i]);
+ }
+
+ for (i = 0; i < numOutputs; i++) {
+ t->outputs[i] = ureg_DECL_output(ureg,
+ outputSemanticName[i],
+ outputSemanticIndex[i]);
+ }
+ }
+ else {
+ assert(procType == TGSI_PROCESSOR_VERTEX);
+
+ for (i = 0; i < numInputs; i++) {
+ t->inputs[i] = ureg_DECL_vs_input(ureg, i);
+ }
+
+ for (i = 0; i < numOutputs; i++) {
+ t->outputs[i] = ureg_DECL_output(ureg,
+ outputSemanticName[i],
+ outputSemanticIndex[i]);
+ if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) {
+ /* Writing to the point size result register requires special
+ * handling to implement clamping.
+ */
+ static const gl_state_index pointSizeClampState[STATE_LENGTH]
+ = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
+ /* XXX: note we are modifying the incoming shader here! Need to
+ * do this before emitting the constant decls below, or this
+ * will be missed.
+ */
+ unsigned pointSizeClampConst =
+ _mesa_add_state_reference(proginfo->Parameters,
+ pointSizeClampState);
+ struct ureg_dst psizregtemp = ureg_DECL_temporary(ureg);
+ t->pointSizeConst = ureg_DECL_constant(ureg, pointSizeClampConst);
+ t->pointSizeResult = t->outputs[i];
+ t->pointSizeOutIndex = i;
+ t->outputs[i] = psizregtemp;
+ }
+ }
+ if (passthrough_edgeflags)
+ emit_edgeflags(t);
+ }
+
+ /* Declare address register.
+ */
+ if (program->num_address_regs > 0) {
+ assert(program->num_address_regs == 1);
+ t->address[0] = ureg_DECL_address(ureg);
+ }
+
+ /* Declare misc input registers
+ */
+ {
+ GLbitfield sysInputs = proginfo->SystemValuesRead;
+ unsigned numSys = 0;
+ for (i = 0; sysInputs; i++) {
+ if (sysInputs & (1 << i)) {
+ unsigned semName = mesa_sysval_to_semantic[i];
+ t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0);
+ numSys++;
+ sysInputs &= ~(1 << i);
+ }
+ }
+ }
+
+ if (program->indirect_addr_temps) {
+ /* If temps are accessed with indirect addressing, declare temporaries
+ * in sequential order. Else, we declare them on demand elsewhere.
+ * (Note: the number of temporaries is equal to program->next_temp)
+ */
+ for (i = 0; i < (unsigned)program->next_temp; i++) {
+ /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
+ t->temps[i] = ureg_DECL_temporary(t->ureg);
+ }
+ }
+
+ /* Emit constants and uniforms. TGSI uses a single index space for these,
+ * so we put all the translated regs in t->constants.
+ */
+ if (proginfo->Parameters) {
+ t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0]));
+ if (t->constants == NULL) {
+ ret = PIPE_ERROR_OUT_OF_MEMORY;
+ goto out;
+ }
+
+ for (i = 0; i < proginfo->Parameters->NumParameters; i++) {
+ switch (proginfo->Parameters->Parameters[i].Type) {
+ case PROGRAM_ENV_PARAM:
+ case PROGRAM_LOCAL_PARAM:
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_NAMED_PARAM:
+ case PROGRAM_UNIFORM:
+ t->constants[i] = ureg_DECL_constant(ureg, i);
+ break;
+
+ /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect
+ * addressing of the const buffer.
+ * FIXME: Be smarter and recognize param arrays:
+ * indirect addressing is only valid within the referenced
+ * array.
+ */
+ case PROGRAM_CONSTANT:
+ if (program->indirect_addr_consts)
+ t->constants[i] = ureg_DECL_constant(ureg, i);
+ else
+ t->constants[i] = emit_immediate(t,
+ proginfo->Parameters->ParameterValues[i],
+ proginfo->Parameters->Parameters[i].DataType,
+ 4);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ /* Emit immediate values.
+ */
+ t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src));
+ if (t->immediates == NULL) {
+ ret = PIPE_ERROR_OUT_OF_MEMORY;
+ goto out;
+ }
+ i = 0;
+ foreach_iter(exec_list_iterator, iter, program->immediates) {
+ immediate_storage *imm = (immediate_storage *)iter.get();
+ t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size);
+ }
+
+ /* texture samplers */
+ for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
+ if (program->samplers_used & (1 << i)) {
+ t->samplers[i] = ureg_DECL_sampler(ureg, i);
+ }
+ }
+
+ /* Emit each instruction in turn:
+ */
+ foreach_iter(exec_list_iterator, iter, program->instructions) {
+ set_insn_start(t, ureg_get_instruction_number(ureg));
+ compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get());
+
+ if (t->prevInstWrotePointSize && proginfo->Id) {
+ /* The previous instruction wrote to the (fake) vertex point size
+ * result register. Now we need to clamp that value to the min/max
+ * point size range, putting the result into the real point size
+ * register.
+ * Note that we can't do this easily at the end of program due to
+ * possible early return.
+ */
+ set_insn_start(t, ureg_get_instruction_number(ureg));
+ ureg_MAX(t->ureg,
+ ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X),
+ ureg_src(t->outputs[t->pointSizeOutIndex]),
+ ureg_swizzle(t->pointSizeConst, 1,1,1,1));
+ ureg_MIN(t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X),
+ ureg_src(t->outputs[t->pointSizeOutIndex]),
+ ureg_swizzle(t->pointSizeConst, 2,2,2,2));
+ }
+ t->prevInstWrotePointSize = GL_FALSE;
+ }
+
+ /* Fix up all emitted labels:
+ */
+ for (i = 0; i < t->labels_count; i++) {
+ ureg_fixup_label(ureg, t->labels[i].token,
+ t->insn[t->labels[i].branch_target]);
+ }
+
+out:
+ FREE(t->insn);
+ FREE(t->labels);
+ FREE(t->constants);
+ FREE(t->immediates);
+
+ if (t->error) {
+ debug_printf("%s: translate error flag set\n", __FUNCTION__);
+ }
+
+ return ret;
+}
+/* ----------------------------- End TGSI code ------------------------------ */
+
+/**
+ * Convert a shader's GLSL IR into a Mesa gl_program, although without
+ * generating Mesa IR.
+ */
+static struct gl_program *
+get_mesa_program(struct gl_context *ctx,
+ struct gl_shader_program *shader_program,
+ struct gl_shader *shader)
+{
+ glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor();
+ struct gl_program *prog;
+ GLenum target;
+ const char *target_string;
+ bool progress;
+ struct gl_shader_compiler_options *options =
+ &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)];
+
+ switch (shader->Type) {
+ case GL_VERTEX_SHADER:
+ target = GL_VERTEX_PROGRAM_ARB;
+ target_string = "vertex";
+ break;
+ case GL_FRAGMENT_SHADER:
+ target = GL_FRAGMENT_PROGRAM_ARB;
+ target_string = "fragment";
+ break;
+ case GL_GEOMETRY_SHADER:
+ target = GL_GEOMETRY_PROGRAM_NV;
+ target_string = "geometry";
+ break;
+ default:
+ assert(!"should not be reached");
+ return NULL;
+ }
+
+ validate_ir_tree(shader->ir);
+
+ prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
+ if (!prog)
+ return NULL;
+ prog->Parameters = _mesa_new_parameter_list();
+ prog->Varying = _mesa_new_parameter_list();
+ prog->Attributes = _mesa_new_parameter_list();
+ v->ctx = ctx;
+ v->prog = prog;
+ v->shader_program = shader_program;
+ v->options = options;
+ v->glsl_version = ctx->Const.GLSLVersion;
+ v->native_integers = ctx->Const.NativeIntegers;
+
+ add_uniforms_to_parameters_list(shader_program, shader, prog);
+
+ /* Emit intermediate IR for main(). */
+ visit_exec_list(shader->ir, v);
+
+ /* Now emit bodies for any functions that were used. */
+ do {
+ progress = GL_FALSE;
+
+ foreach_iter(exec_list_iterator, iter, v->function_signatures) {
+ function_entry *entry = (function_entry *)iter.get();
+
+ if (!entry->bgn_inst) {
+ v->current_function = entry;
+
+ entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB);
+ entry->bgn_inst->function = entry;
+
+ visit_exec_list(&entry->sig->body, v);
+
+ glsl_to_tgsi_instruction *last;
+ last = (glsl_to_tgsi_instruction *)v->instructions.get_tail();
+ if (last->op != TGSI_OPCODE_RET)
+ v->emit(NULL, TGSI_OPCODE_RET);
+
+ glsl_to_tgsi_instruction *end;
+ end = v->emit(NULL, TGSI_OPCODE_ENDSUB);
+ end->function = entry;
+
+ progress = GL_TRUE;
+ }
+ }
+ } while (progress);
+
+#if 0
+ /* Print out some information (for debugging purposes) used by the
+ * optimization passes. */
+ for (i=0; i < v->next_temp; i++) {
+ int fr = v->get_first_temp_read(i);
+ int fw = v->get_first_temp_write(i);
+ int lr = v->get_last_temp_read(i);
+ int lw = v->get_last_temp_write(i);
+
+ printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw);
+ assert(fw <= fr);
+ }
+#endif
+
+ /* Remove reads to output registers, and to varyings in vertex shaders. */
+ v->remove_output_reads(PROGRAM_OUTPUT);
+ if (target == GL_VERTEX_PROGRAM_ARB)
+ v->remove_output_reads(PROGRAM_VARYING);
+
+ /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
+ v->simplify_cmp();
+ v->copy_propagate();
+ while (v->eliminate_dead_code_advanced());
+
+ /* FIXME: These passes to optimize temporary registers don't work when there
+ * is indirect addressing of the temporary register space. We need proper
+ * array support so that we don't have to give up these passes in every
+ * shader that uses arrays.
+ */
+ if (!v->indirect_addr_temps) {
+ v->eliminate_dead_code();
+ v->merge_registers();
+ v->renumber_registers();
+ }
+
+ /* Write the END instruction. */
+ v->emit(NULL, TGSI_OPCODE_END);
+
+ if (ctx->Shader.Flags & GLSL_DUMP) {
+ printf("\n");
+ printf("GLSL IR for linked %s program %d:\n", target_string,
+ shader_program->Name);
+ _mesa_print_ir(shader->ir, NULL);
+ printf("\n");
+ printf("\n");
+ }
+
+ prog->Instructions = NULL;
+ prog->NumInstructions = 0;
+
+ do_set_program_inouts(shader->ir, prog);
+ count_resources(v, prog);
+
+ check_resources(ctx, shader_program, v, prog);
+
+ _mesa_reference_program(ctx, &shader->Program, prog);
+
+ struct st_vertex_program *stvp;
+ struct st_fragment_program *stfp;
+ struct st_geometry_program *stgp;
+
+ switch (shader->Type) {
+ case GL_VERTEX_SHADER:
+ stvp = (struct st_vertex_program *)prog;
+ stvp->glsl_to_tgsi = v;
+ break;
+ case GL_FRAGMENT_SHADER:
+ stfp = (struct st_fragment_program *)prog;
+ stfp->glsl_to_tgsi = v;
+ break;
+ case GL_GEOMETRY_SHADER:
+ stgp = (struct st_geometry_program *)prog;
+ stgp->glsl_to_tgsi = v;
+ break;
+ default:
+ assert(!"should not be reached");
+ return NULL;
+ }
+
+ return prog;
+}
+
+extern "C" {
+
+struct gl_shader *
+st_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
+{
+ struct gl_shader *shader;
+ assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER ||
+ type == GL_GEOMETRY_SHADER_ARB);
+ shader = rzalloc(NULL, struct gl_shader);
+ if (shader) {
+ shader->Type = type;
+ shader->Name = name;
+ _mesa_init_shader(ctx, shader);
+ }
+ return shader;
+}
+
+struct gl_shader_program *
+st_new_shader_program(struct gl_context *ctx, GLuint name)
+{
+ struct gl_shader_program *shProg;
+ shProg = rzalloc(NULL, struct gl_shader_program);
+ if (shProg) {
+ shProg->Name = name;
+ _mesa_init_shader_program(ctx, shProg);
+ }
+ return shProg;
+}
+
+/**
+ * Link a shader.
+ * Called via ctx->Driver.LinkShader()
+ * This actually involves converting GLSL IR into an intermediate TGSI-like IR
+ * with code lowering and other optimizations.
+ */
+GLboolean
+st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
+{
+ assert(prog->LinkStatus);
+
+ for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
+ if (prog->_LinkedShaders[i] == NULL)
+ continue;
+
+ bool progress;
+ exec_list *ir = prog->_LinkedShaders[i]->ir;
+ const struct gl_shader_compiler_options *options =
+ &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)];
+
+ do {
+ progress = false;
+
+ /* Lowering */
+ do_mat_op_to_vec(ir);
+ lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
+ | LOG_TO_LOG2
+ | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
+
+ progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
+
+ progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress;
+
+ progress = lower_quadop_vector(ir, false) || progress;
+
+ if (options->EmitNoIfs) {
+ progress = lower_discard(ir) || progress;
+ progress = lower_if_to_cond_assign(ir) || progress;
+ }
+
+ if (options->EmitNoNoise)
+ progress = lower_noise(ir) || progress;
+
+ /* If there are forms of indirect addressing that the driver
+ * cannot handle, perform the lowering pass.
+ */
+ if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
+ || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
+ progress =
+ lower_variable_index_to_cond_assign(ir,
+ options->EmitNoIndirectInput,
+ options->EmitNoIndirectOutput,
+ options->EmitNoIndirectTemp,
+ options->EmitNoIndirectUniform)
+ || progress;
+
+ progress = do_vec_index_to_cond_assign(ir) || progress;
+ } while (progress);
+
+ validate_ir_tree(ir);
+ }
+
+ for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
+ struct gl_program *linked_prog;
+
+ if (prog->_LinkedShaders[i] == NULL)
+ continue;
+
+ linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
+
+ if (linked_prog) {
+ bool ok = true;
+
+ switch (prog->_LinkedShaders[i]->Type) {
+ case GL_VERTEX_SHADER:
+ _mesa_reference_vertprog(ctx, &prog->VertexProgram,
+ (struct gl_vertex_program *)linked_prog);
+ ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
+ linked_prog);
+ break;
+ case GL_FRAGMENT_SHADER:
+ _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
+ (struct gl_fragment_program *)linked_prog);
+ ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
+ linked_prog);
+ break;
+ case GL_GEOMETRY_SHADER:
+ _mesa_reference_geomprog(ctx, &prog->GeometryProgram,
+ (struct gl_geometry_program *)linked_prog);
+ ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV,
+ linked_prog);
+ break;
+ }
+ if (!ok) {
+ return GL_FALSE;
+ }
+ }
+
+ _mesa_reference_program(ctx, &linked_prog, NULL);
+ }
+
+ return GL_TRUE;
+}
+
+
+/**
+ * Link a GLSL shader program. Called via glLinkProgram().
+ */
+void
+st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
+{
+ unsigned int i;
+
+ _mesa_clear_shader_program_data(ctx, prog);
+
+ prog->LinkStatus = GL_TRUE;
+
+ for (i = 0; i < prog->NumShaders; i++) {
+ if (!prog->Shaders[i]->CompileStatus) {
+ fail_link(prog, "linking with uncompiled shader");
+ prog->LinkStatus = GL_FALSE;
+ }
+ }
+
+ prog->Varying = _mesa_new_parameter_list();
+ _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
+ _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
+ _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL);
+
+ if (prog->LinkStatus) {
+ link_shaders(ctx, prog);
+ }
+
+ if (prog->LinkStatus) {
+ if (!ctx->Driver.LinkShader(ctx, prog)) {
+ prog->LinkStatus = GL_FALSE;
+ }
+ }
+
+ set_uniform_initializers(ctx, prog);
+
+ if (ctx->Shader.Flags & GLSL_DUMP) {
+ if (!prog->LinkStatus) {
+ printf("GLSL shader program %d failed to link\n", prog->Name);
+ }
+
+ if (prog->InfoLog && prog->InfoLog[0] != 0) {
+ printf("GLSL shader program %d info log:\n", prog->Name);
+ printf("%s\n", prog->InfoLog);
+ }
+ }
+}
+
+} /* extern "C" */
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h
new file mode 100644
index 00000000000..d877471785d
--- /dev/null
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ * Copyright © 2011 Bryan Cain
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "main/glheader.h"
+#include "tgsi/tgsi_ureg.h"
+
+struct gl_context;
+struct gl_shader;
+struct gl_shader_program;
+struct glsl_to_tgsi_visitor;
+
+enum pipe_error st_translate_program(
+ struct gl_context *ctx,
+ uint procType,
+ struct ureg_program *ureg,
+ struct glsl_to_tgsi_visitor *program,
+ const struct gl_program *proginfo,
+ GLuint numInputs,
+ const GLuint inputMapping[],
+ const ubyte inputSemanticName[],
+ const ubyte inputSemanticIndex[],
+ const GLuint interpMode[],
+ GLuint numOutputs,
+ const GLuint outputMapping[],
+ const ubyte outputSemanticName[],
+ const ubyte outputSemanticIndex[],
+ boolean passthrough_edgeflags);
+
+void free_glsl_to_tgsi_visitor(struct glsl_to_tgsi_visitor *v);
+void get_pixel_transfer_visitor(struct st_fragment_program *fp,
+ struct glsl_to_tgsi_visitor *original,
+ int scale_and_bias, int pixel_maps);
+void get_bitmap_visitor(struct st_fragment_program *fp,
+ struct glsl_to_tgsi_visitor *original,
+ int samplerIndex);
+
+struct gl_shader *st_new_shader(struct gl_context *ctx, GLuint name, GLuint type);
+
+struct gl_shader_program *
+st_new_shader_program(struct gl_context *ctx, GLuint name);
+
+void st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog);
+GLboolean st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c
index 7bd82aae206..d5228d387f7 100644
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -587,7 +587,7 @@ st_context_teximage(struct st_context_iface *stctxi,
internalFormat = GL_RGB;
texFormat = st_ChooseTextureFormat(ctx, internalFormat,
- GL_RGBA, GL_UNSIGNED_BYTE);
+ GL_BGRA, GL_UNSIGNED_BYTE);
_mesa_init_teximage_fields(ctx, target, texImage,
tex->width0, tex->height0, 1, 0,
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index a41e5b16a85..656c985d78f 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -267,7 +267,7 @@ src_register( struct st_translate *t,
/**
* Map mesa texture target to TGSI texture target.
*/
-static unsigned
+unsigned
translate_texture_target( GLuint textarget,
GLboolean shadow )
{
@@ -511,7 +511,7 @@ static void emit_ddy( struct st_translate *t,
-static unsigned
+unsigned
translate_opcode( unsigned op )
{
switch( op ) {
@@ -1207,7 +1207,7 @@ st_translate_mesa_program(
else
t->constants[i] =
ureg_DECL_immediate( ureg,
- program->Parameters->ParameterValues[i],
+ (const float*) program->Parameters->ParameterValues[i],
4 );
break;
default:
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.h b/src/mesa/state_tracker/st_mesa_to_tgsi.h
index 0615e52ef62..0dbdf5f6159 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.h
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.h
@@ -64,6 +64,12 @@ st_translate_mesa_program(
void
st_free_tokens(const struct tgsi_token *tokens);
+unsigned
+translate_opcode(unsigned op);
+
+unsigned
+translate_texture_target(GLuint textarget, GLboolean shadow);
+
#if defined __cplusplus
} /* extern "C" */
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index 132ebdbadc9..a4f47edfcd3 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -174,8 +174,8 @@ st_release_gp_variants(struct st_context *st, struct st_geometry_program *stgp)
* \param tokensOut destination for TGSI tokens
* \return pointer to cached pipe_shader object.
*/
-static void
-st_prepare_vertex_program(struct st_context *st,
+void
+st_prepare_vertex_program(struct gl_context *ctx,
struct st_vertex_program *stvp)
{
GLuint attr;
@@ -184,9 +184,10 @@ st_prepare_vertex_program(struct st_context *st,
stvp->num_outputs = 0;
if (stvp->Base.IsPositionInvariant)
- _mesa_insert_mvp_code(st->ctx, &stvp->Base);
+ _mesa_insert_mvp_code(ctx, &stvp->Base);
- assert(stvp->Base.Base.NumInstructions > 1);
+ if (!stvp->glsl_to_tgsi)
+ assert(stvp->Base.Base.NumInstructions > 1);
/*
* Determine number of inputs, the mappings between VERT_ATTRIB_x
@@ -292,10 +293,13 @@ st_translate_vertex_program(struct st_context *st,
enum pipe_error error;
unsigned num_outputs;
- st_prepare_vertex_program( st, stvp );
+ st_prepare_vertex_program(st->ctx, stvp);
- _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT);
- _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_VARYING);
+ if (!stvp->glsl_to_tgsi)
+ {
+ _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT);
+ _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_VARYING);
+ }
ureg = ureg_create( TGSI_PROCESSOR_VERTEX );
if (ureg == NULL) {
@@ -318,22 +322,41 @@ st_translate_vertex_program(struct st_context *st,
debug_printf("\n");
}
- error = st_translate_mesa_program(st->ctx,
- TGSI_PROCESSOR_VERTEX,
- ureg,
- &stvp->Base.Base,
- /* inputs */
- vpv->num_inputs,
- stvp->input_to_index,
- NULL, /* input semantic name */
- NULL, /* input semantic index */
- NULL,
- /* outputs */
- num_outputs,
- stvp->result_to_output,
- stvp->output_semantic_name,
- stvp->output_semantic_index,
- key->passthrough_edgeflags );
+ if (stvp->glsl_to_tgsi)
+ error = st_translate_program(st->ctx,
+ TGSI_PROCESSOR_VERTEX,
+ ureg,
+ stvp->glsl_to_tgsi,
+ &stvp->Base.Base,
+ /* inputs */
+ stvp->num_inputs,
+ stvp->input_to_index,
+ NULL, /* input semantic name */
+ NULL, /* input semantic index */
+ NULL, /* interp mode */
+ /* outputs */
+ stvp->num_outputs,
+ stvp->result_to_output,
+ stvp->output_semantic_name,
+ stvp->output_semantic_index,
+ key->passthrough_edgeflags );
+ else
+ error = st_translate_mesa_program(st->ctx,
+ TGSI_PROCESSOR_VERTEX,
+ ureg,
+ &stvp->Base.Base,
+ /* inputs */
+ vpv->num_inputs,
+ stvp->input_to_index,
+ NULL, /* input semantic name */
+ NULL, /* input semantic index */
+ NULL,
+ /* outputs */
+ num_outputs,
+ stvp->result_to_output,
+ stvp->output_semantic_name,
+ stvp->output_semantic_index,
+ key->passthrough_edgeflags );
if (error)
goto fail;
@@ -451,6 +474,7 @@ st_translate_fragment_program(struct st_context *st,
GLuint attr;
const GLbitfield inputsRead = stfp->Base.Base.InputsRead;
struct ureg_program *ureg;
+
GLboolean write_all = GL_FALSE;
ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
@@ -460,9 +484,9 @@ st_translate_fragment_program(struct st_context *st,
ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
uint fs_num_outputs = 0;
-
-
- _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT);
+
+ if (!stfp->glsl_to_tgsi)
+ _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT);
/*
* Convert Mesa program inputs to TGSI input register semantics.
@@ -605,21 +629,39 @@ st_translate_fragment_program(struct st_context *st,
if (write_all == GL_TRUE)
ureg_property_fs_color0_writes_all_cbufs(ureg, 1);
- st_translate_mesa_program(st->ctx,
- TGSI_PROCESSOR_FRAGMENT,
- ureg,
- &stfp->Base.Base,
- /* inputs */
- fs_num_inputs,
- inputMapping,
- input_semantic_name,
- input_semantic_index,
- interpMode,
- /* outputs */
- fs_num_outputs,
- outputMapping,
- fs_output_semantic_name,
- fs_output_semantic_index, FALSE );
+ if (stfp->glsl_to_tgsi)
+ st_translate_program(st->ctx,
+ TGSI_PROCESSOR_FRAGMENT,
+ ureg,
+ stfp->glsl_to_tgsi,
+ &stfp->Base.Base,
+ /* inputs */
+ fs_num_inputs,
+ inputMapping,
+ input_semantic_name,
+ input_semantic_index,
+ interpMode,
+ /* outputs */
+ fs_num_outputs,
+ outputMapping,
+ fs_output_semantic_name,
+ fs_output_semantic_index, FALSE );
+ else
+ st_translate_mesa_program(st->ctx,
+ TGSI_PROCESSOR_FRAGMENT,
+ ureg,
+ &stfp->Base.Base,
+ /* inputs */
+ fs_num_inputs,
+ inputMapping,
+ input_semantic_name,
+ input_semantic_index,
+ interpMode,
+ /* outputs */
+ fs_num_outputs,
+ outputMapping,
+ fs_output_semantic_name,
+ fs_output_semantic_index, FALSE );
stfp->tgsi.tokens = ureg_get_tokens( ureg, NULL );
ureg_destroy( ureg );
diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h
index c4244df939e..699b6e8ccb7 100644
--- a/src/mesa/state_tracker/st_program.h
+++ b/src/mesa/state_tracker/st_program.h
@@ -38,6 +38,7 @@
#include "program/program.h"
#include "pipe/p_state.h"
#include "st_context.h"
+#include "st_glsl_to_tgsi.h"
/** Fragment program variant key */
@@ -83,6 +84,7 @@ struct st_fp_variant
struct st_fragment_program
{
struct gl_fragment_program Base;
+ struct glsl_to_tgsi_visitor* glsl_to_tgsi;
struct pipe_shader_state tgsi;
@@ -136,6 +138,7 @@ struct st_vp_variant
struct st_vertex_program
{
struct gl_vertex_program Base; /**< The Mesa vertex program */
+ struct glsl_to_tgsi_visitor* glsl_to_tgsi;
/** maps a Mesa VERT_ATTRIB_x to a packed TGSI input index */
GLuint input_to_index[VERT_ATTRIB_MAX];
@@ -184,6 +187,7 @@ struct st_gp_variant
struct st_geometry_program
{
struct gl_geometry_program Base; /**< The Mesa geometry program */
+ struct glsl_to_tgsi_visitor* glsl_to_tgsi;
/** map GP input back to VP output */
GLuint input_map[PIPE_MAX_SHADER_INPUTS];
@@ -276,6 +280,14 @@ st_get_gp_variant(struct st_context *st,
const struct st_gp_variant_key *key);
+extern void
+st_prepare_vertex_program(struct gl_context *ctx,
+ struct st_vertex_program *stvp);
+
+extern GLboolean
+st_prepare_fragment_program(struct gl_context *ctx,
+ struct st_fragment_program *stfp);
+
extern void
st_release_vp_variants( struct st_context *st,
diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c
index ffe7e256a56..232c286c1d1 100644
--- a/src/mesa/state_tracker/st_texture.c
+++ b/src/mesa/state_tracker/st_texture.c
@@ -221,8 +221,8 @@ st_texture_image_map(struct st_context *st, struct st_texture_image *stImage,
DBG("%s \n", __FUNCTION__);
- stImage->transfer = pipe_get_transfer(st->pipe, pt, stImage->level,
- stImage->face + zoffset,
+ stImage->transfer = pipe_get_transfer(st->pipe, pt, stImage->base.Level,
+ stImage->base.Face + zoffset,
usage, x, y, w, h);
if (stImage->transfer)
@@ -396,3 +396,23 @@ st_texture_image_copy(struct pipe_context *pipe,
}
}
+
+struct pipe_resource *
+st_create_color_map_texture(struct gl_context *ctx)
+{
+ struct st_context *st = st_context(ctx);
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_resource *pt;
+ enum pipe_format format;
+ const uint texSize = 256; /* simple, and usually perfect */
+
+ /* find an RGBA texture format */
+ format = st_choose_format(pipe->screen, GL_RGBA, GL_NONE, GL_NONE,
+ PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW);
+
+ /* create texture for color map/table */
+ pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0,
+ texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW);
+ return pt;
+}
+
diff --git a/src/mesa/state_tracker/st_texture.h b/src/mesa/state_tracker/st_texture.h
index d50c3c9af79..50b7284e760 100644
--- a/src/mesa/state_tracker/st_texture.h
+++ b/src/mesa/state_tracker/st_texture.h
@@ -45,11 +45,6 @@ struct st_texture_image
{
struct gl_texture_image base;
- /* These aren't stored in gl_texture_image
- */
- GLuint level;
- GLuint face;
-
/* If stImage->pt != NULL, image data is stored here.
* Else if stImage->base.Data != NULL, image is stored there.
* Else there is no image data.
@@ -232,4 +227,8 @@ st_texture_image_copy(struct pipe_context *pipe,
struct pipe_resource *src, GLuint srcLevel,
GLuint face);
+
+extern struct pipe_resource *
+st_create_color_map_texture(struct gl_context *ctx);
+
#endif
diff --git a/src/mesa/swrast/s_aatritemp.h b/src/mesa/swrast/s_aatritemp.h
index 91d4f7a10ab..77b3ae6ec7a 100644
--- a/src/mesa/swrast/s_aatritemp.h
+++ b/src/mesa/swrast/s_aatritemp.h
@@ -181,13 +181,20 @@
const GLfloat *pMax = vMax->attrib[FRAG_ATTRIB_WPOS];
const GLfloat dxdy = majDx / majDy;
const GLfloat xAdj = dxdy < 0.0F ? -dxdy : 0.0F;
- GLfloat x = pMin[0] - (yMin - iyMin) * dxdy;
GLint iy;
- for (iy = iyMin; iy < iyMax; iy++, x += dxdy) {
+#ifdef _OPENMP
+#pragma omp parallel for schedule(dynamic) private(iy) firstprivate(span)
+#endif
+ for (iy = iyMin; iy < iyMax; iy++) {
+ GLfloat x = pMin[0] - (yMin - iy) * dxdy;
GLint ix, startX = (GLint) (x - xAdj);
GLuint count;
GLfloat coverage = 0.0F;
+#ifdef _OPENMP
+ /* each thread needs to use a different (global) SpanArrays variable */
+ span.array = SWRAST_CONTEXT(ctx)->SpanArrays + omp_get_thread_num();
+#endif
/* skip over fragments with zero coverage */
while (startX < MAX_WIDTH) {
coverage = compute_coveragef(pMin, pMid, pMax, startX, iy);
@@ -228,13 +235,12 @@
coverage = compute_coveragef(pMin, pMid, pMax, ix, iy);
}
- if (ix <= startX)
- continue;
-
- span.x = startX;
- span.y = iy;
- span.end = (GLuint) ix - (GLuint) startX;
- _swrast_write_rgba_span(ctx, &span);
+ if (ix > startX) {
+ span.x = startX;
+ span.y = iy;
+ span.end = (GLuint) ix - (GLuint) startX;
+ _swrast_write_rgba_span(ctx, &span);
+ }
}
}
else {
@@ -244,13 +250,20 @@
const GLfloat *pMax = vMax->attrib[FRAG_ATTRIB_WPOS];
const GLfloat dxdy = majDx / majDy;
const GLfloat xAdj = dxdy > 0 ? dxdy : 0.0F;
- GLfloat x = pMin[0] - (yMin - iyMin) * dxdy;
GLint iy;
- for (iy = iyMin; iy < iyMax; iy++, x += dxdy) {
+#ifdef _OPENMP
+#pragma omp parallel for schedule(dynamic) private(iy) firstprivate(span)
+#endif
+ for (iy = iyMin; iy < iyMax; iy++) {
+ GLfloat x = pMin[0] - (yMin - iy) * dxdy;
GLint ix, left, startX = (GLint) (x + xAdj);
GLuint count, n;
GLfloat coverage = 0.0F;
+#ifdef _OPENMP
+ /* each thread needs to use a different (global) SpanArrays variable */
+ span.array = SWRAST_CONTEXT(ctx)->SpanArrays + omp_get_thread_num();
+#endif
/* make sure we're not past the window edge */
if (startX >= ctx->DrawBuffer->_Xmax) {
startX = ctx->DrawBuffer->_Xmax - 1;
@@ -296,31 +309,30 @@
ATTRIB_LOOP_END
#endif
- if (startX <= ix)
- continue;
-
- n = (GLuint) startX - (GLuint) ix;
+ if (startX > ix) {
+ n = (GLuint) startX - (GLuint) ix;
- left = ix + 1;
+ left = ix + 1;
- /* shift all values to the left */
- /* XXX this is temporary */
- {
- SWspanarrays *array = span.array;
- GLint j;
- for (j = 0; j < (GLint) n; j++) {
- array->coverage[j] = array->coverage[j + left];
- COPY_CHAN4(array->rgba[j], array->rgba[j + left]);
+ /* shift all values to the left */
+ /* XXX this is temporary */
+ {
+ SWspanarrays *array = span.array;
+ GLint j;
+ for (j = 0; j < (GLint) n; j++) {
+ array->coverage[j] = array->coverage[j + left];
+ COPY_CHAN4(array->rgba[j], array->rgba[j + left]);
#ifdef DO_Z
- array->z[j] = array->z[j + left];
+ array->z[j] = array->z[j + left];
#endif
+ }
}
- }
- span.x = left;
- span.y = iy;
- span.end = n;
- _swrast_write_rgba_span(ctx, &span);
+ span.x = left;
+ span.y = iy;
+ span.end = n;
+ _swrast_write_rgba_span(ctx, &span);
+ }
}
}
}
diff --git a/src/mesa/swrast/s_context.c b/src/mesa/swrast/s_context.c
index def1531d7ff..792b528ee34 100644
--- a/src/mesa/swrast/s_context.c
+++ b/src/mesa/swrast/s_context.c
@@ -417,84 +417,6 @@ _swrast_validate_blend_func(struct gl_context *ctx, GLuint n, const GLubyte mask
swrast->BlendFunc( ctx, n, mask, src, dst, chanType );
}
-
-/**
- * Make sure we have texture image data for all the textures we may need
- * for subsequent rendering.
- */
-static void
-_swrast_validate_texture_images(struct gl_context *ctx)
-{
- SWcontext *swrast = SWRAST_CONTEXT(ctx);
- GLuint u;
-
- if (!swrast->ValidateTextureImage || !ctx->Texture._EnabledUnits) {
- /* no textures enabled, or no way to validate images! */
- return;
- }
-
- for (u = 0; u < ctx->Const.MaxTextureImageUnits; u++) {
- if (ctx->Texture.Unit[u]._ReallyEnabled) {
- struct gl_texture_object *texObj = ctx->Texture.Unit[u]._Current;
- ASSERT(texObj);
- if (texObj) {
- GLuint numFaces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
- GLuint face;
- for (face = 0; face < numFaces; face++) {
- GLint lvl;
- for (lvl = texObj->BaseLevel; lvl <= texObj->_MaxLevel; lvl++) {
- struct gl_texture_image *texImg = texObj->Image[face][lvl];
- if (texImg && !texImg->Data) {
- swrast->ValidateTextureImage(ctx, texObj, face, lvl);
- ASSERT(texObj->Image[face][lvl]->Data);
- }
- }
- }
- }
- }
- }
-}
-
-
-/**
- * Free the texture image data attached to all currently enabled
- * textures. Meant to be called by device drivers when transitioning
- * from software to hardware rendering.
- */
-void
-_swrast_eject_texture_images(struct gl_context *ctx)
-{
- GLuint u;
-
- if (!ctx->Texture._EnabledUnits) {
- /* no textures enabled */
- return;
- }
-
- for (u = 0; u < ctx->Const.MaxTextureImageUnits; u++) {
- if (ctx->Texture.Unit[u]._ReallyEnabled) {
- struct gl_texture_object *texObj = ctx->Texture.Unit[u]._Current;
- ASSERT(texObj);
- if (texObj) {
- GLuint numFaces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
- GLuint face;
- for (face = 0; face < numFaces; face++) {
- GLint lvl;
- for (lvl = texObj->BaseLevel; lvl <= texObj->_MaxLevel; lvl++) {
- struct gl_texture_image *texImg = texObj->Image[face][lvl];
- if (texImg && texImg->Data) {
- _mesa_free_texmemory(texImg->Data);
- texImg->Data = NULL;
- }
- }
- }
- }
- }
- }
-}
-
-
-
static void
_swrast_sleep( struct gl_context *ctx, GLbitfield new_state )
{
@@ -640,7 +562,6 @@ _swrast_validate_derived( struct gl_context *ctx )
if (swrast->NewState & (_NEW_TEXTURE | _NEW_PROGRAM)) {
_swrast_update_texture_samplers( ctx );
- _swrast_validate_texture_images(ctx);
}
if (swrast->NewState & (_NEW_COLOR | _NEW_PROGRAM))
@@ -772,6 +693,11 @@ _swrast_CreateContext( struct gl_context *ctx )
{
GLuint i;
SWcontext *swrast = (SWcontext *)CALLOC(sizeof(SWcontext));
+#ifdef _OPENMP
+ const GLint maxThreads = omp_get_max_threads();
+#else
+ const GLint maxThreads = 1;
+#endif
if (SWRAST_DEBUG) {
_mesa_debug(ctx, "_swrast_CreateContext\n");
@@ -806,19 +732,25 @@ _swrast_CreateContext( struct gl_context *ctx )
for (i = 0; i < MAX_TEXTURE_IMAGE_UNITS; i++)
swrast->TextureSample[i] = NULL;
- swrast->SpanArrays = MALLOC_STRUCT(sw_span_arrays);
+ /* SpanArrays is global and shared by all SWspan instances. However, when
+ * using multiple threads, it is necessary to have one SpanArrays instance
+ * per thread.
+ */
+ swrast->SpanArrays = (SWspanarrays *) MALLOC(maxThreads * sizeof(SWspanarrays));
if (!swrast->SpanArrays) {
FREE(swrast);
return GL_FALSE;
}
- swrast->SpanArrays->ChanType = CHAN_TYPE;
+ for(i = 0; i < maxThreads; i++) {
+ swrast->SpanArrays[i].ChanType = CHAN_TYPE;
#if CHAN_TYPE == GL_UNSIGNED_BYTE
- swrast->SpanArrays->rgba = swrast->SpanArrays->rgba8;
+ swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].rgba8;
#elif CHAN_TYPE == GL_UNSIGNED_SHORT
- swrast->SpanArrays->rgba = swrast->SpanArrays->rgba16;
+ swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].rgba16;
#else
- swrast->SpanArrays->rgba = swrast->SpanArrays->attribs[FRAG_ATTRIB_COL0];
+ swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].attribs[FRAG_ATTRIB_COL0];
#endif
+ }
/* init point span buffer */
swrast->PointSpan.primitive = GL_POINT;
@@ -826,7 +758,10 @@ _swrast_CreateContext( struct gl_context *ctx )
swrast->PointSpan.facing = 0;
swrast->PointSpan.array = swrast->SpanArrays;
- swrast->TexelBuffer = (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits *
+ /* TexelBuffer is also global and normally shared by all SWspan instances;
+ * when running with multiple threads, create one per thread.
+ */
+ swrast->TexelBuffer = (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits * maxThreads *
MAX_WIDTH * 4 * sizeof(GLfloat));
if (!swrast->TexelBuffer) {
FREE(swrast->SpanArrays);
diff --git a/src/mesa/swrast/s_span.c b/src/mesa/swrast/s_span.c
index db102ac7946..9a91be39970 100644
--- a/src/mesa/swrast/s_span.c
+++ b/src/mesa/swrast/s_span.c
@@ -212,10 +212,10 @@ interpolate_active_attribs(struct gl_context *ctx, SWspan *span, GLbitfield attr
static INLINE void
interpolate_int_colors(struct gl_context *ctx, SWspan *span)
{
+#if CHAN_BITS != 32
const GLuint n = span->end;
GLuint i;
-#if CHAN_BITS != 32
ASSERT(!(span->arrayMask & SPAN_RGBA));
#endif
diff --git a/src/mesa/swrast/s_stencil.c b/src/mesa/swrast/s_stencil.c
index 5bec71c057b..fa5093a3407 100644
--- a/src/mesa/swrast/s_stencil.c
+++ b/src/mesa/swrast/s_stencil.c
@@ -462,7 +462,8 @@ stencil_and_ztest_span(struct gl_context *ctx, SWspan *span, GLuint face)
* Some fragments passed the stencil test, apply depth test to them
* and apply Zpass and Zfail stencil ops.
*/
- if (ctx->Depth.Test == GL_FALSE) {
+ if (ctx->Depth.Test == GL_FALSE ||
+ ctx->DrawBuffer->_DepthBuffer == NULL) {
/*
* No depth buffer, just apply zpass stencil function to active pixels.
*/
diff --git a/src/mesa/swrast/s_texcombine.c b/src/mesa/swrast/s_texcombine.c
index 086ed0b33d7..80b9dff3cc2 100644
--- a/src/mesa/swrast/s_texcombine.c
+++ b/src/mesa/swrast/s_texcombine.c
@@ -48,7 +48,11 @@ typedef float (*float4_array)[4];
static INLINE float4_array
get_texel_array(SWcontext *swrast, GLuint unit)
{
+#ifdef _OPENMP
+ return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4 * omp_get_num_threads() + (MAX_WIDTH * 4 * omp_get_thread_num()));
+#else
return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4);
+#endif
}
diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c
index b1967e65417..86af4b7cfe2 100644
--- a/src/mesa/tnl/t_draw.c
+++ b/src/mesa/tnl/t_draw.c
@@ -280,10 +280,9 @@ static void bind_inputs( struct gl_context *ctx,
if (!inputs[i]->BufferObj->Pointer) {
bo[*nr_bo] = inputs[i]->BufferObj;
(*nr_bo)++;
- ctx->Driver.MapBuffer(ctx,
- GL_ARRAY_BUFFER,
- GL_READ_ONLY_ARB,
- inputs[i]->BufferObj);
+ ctx->Driver.MapBufferRange(ctx, 0, inputs[i]->BufferObj->Size,
+ GL_MAP_READ_BIT,
+ inputs[i]->BufferObj);
assert(inputs[i]->BufferObj->Pointer);
}
@@ -348,18 +347,32 @@ static void bind_indices( struct gl_context *ctx,
}
if (ib->obj->Name && !ib->obj->Pointer) {
+ unsigned map_size;
+
+ switch (ib->type) {
+ case GL_UNSIGNED_BYTE:
+ map_size = ib->count * sizeof(GLubyte);
+ break;
+ case GL_UNSIGNED_SHORT:
+ map_size = ib->count * sizeof(GLushort);
+ break;
+ case GL_UNSIGNED_INT:
+ map_size = ib->count * sizeof(GLuint);
+ break;
+ default:
+ assert(0);
+ map_size = 0;
+ }
+
bo[*nr_bo] = ib->obj;
(*nr_bo)++;
- ctx->Driver.MapBuffer(ctx,
- GL_ELEMENT_ARRAY_BUFFER,
- GL_READ_ONLY_ARB,
- ib->obj);
-
+ ptr = ctx->Driver.MapBufferRange(ctx, (GLsizeiptr) ib->ptr, map_size,
+ GL_MAP_READ_BIT, ib->obj);
assert(ib->obj->Pointer);
+ } else {
+ ptr = ib->ptr;
}
- ptr = ADD_POINTERS(ib->obj->Pointer, ib->ptr);
-
if (ib->type == GL_UNSIGNED_INT && VB->Primitive[0].basevertex == 0) {
VB->Elts = (GLuint *) ptr;
}
@@ -402,9 +415,7 @@ static void unmap_vbos( struct gl_context *ctx,
{
GLuint i;
for (i = 0; i < nr_bo; i++) {
- ctx->Driver.UnmapBuffer(ctx,
- 0, /* target -- I don't see why this would be needed */
- bo[i]);
+ ctx->Driver.UnmapBuffer(ctx, bo[i]);
}
}
diff --git a/src/mesa/tnl/t_pipeline.c b/src/mesa/tnl/t_pipeline.c
index 18f095f0d4b..881d5d5f535 100644
--- a/src/mesa/tnl/t_pipeline.c
+++ b/src/mesa/tnl/t_pipeline.c
@@ -146,7 +146,17 @@ void _tnl_run_pipeline( struct gl_context *ctx )
_tnl_notify_pipeline_output_change( ctx );
}
+#ifndef _OPENMP
+ /* Don't adjust FPU precision mode in case multiple threads are to be used.
+ * This would require that the additional threads also changed the FPU mode
+ * which is quite a mess as this had to be done in all parallelized sections;
+ * otherwise the master thread and all other threads are running in different
+ * modes, producing inconsistent results.
+ * Note that all x64 implementations don't define/use START_FAST_MATH, so
+ * this is "hack" is only used in i386 mode
+ */
START_FAST_MATH(__tmp);
+#endif
for (i = 0; i < tnl->pipeline.nr_stages ; i++) {
struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i];
@@ -154,7 +164,9 @@ void _tnl_run_pipeline( struct gl_context *ctx )
break;
}
+#ifndef _OPENMP
END_FAST_MATH(__tmp);
+#endif
}
diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
index 2b8d38ef283..8474c787a46 100644
--- a/src/mesa/vbo/vbo_exec_api.c
+++ b/src/mesa/vbo/vbo_exec_api.c
@@ -431,6 +431,24 @@ do { \
#include "vbo_attrib_tmp.h"
+/**
+ * Flush (draw) vertices.
+ * \param unmap - leave VBO unmapped after flushing?
+ */
+static void
+vbo_exec_FlushVertices_internal(struct vbo_exec_context *exec, GLboolean unmap)
+{
+ if (exec->vtx.vert_count || unmap) {
+ vbo_exec_vtx_flush( exec, unmap );
+ }
+
+ if (exec->vtx.vertex_size) {
+ vbo_exec_copy_to_current( exec );
+ reset_attrfv( exec );
+ }
+}
+
+
#if FEATURE_beginend
@@ -535,24 +553,6 @@ static void GLAPIENTRY vbo_exec_EvalPoint2( GLint i, GLint j )
/**
- * Flush (draw) vertices.
- * \param unmap - leave VBO unmapped after flushing?
- */
-static void
-vbo_exec_FlushVertices_internal(struct vbo_exec_context *exec, GLboolean unmap)
-{
- if (exec->vtx.vert_count || unmap) {
- vbo_exec_vtx_flush( exec, unmap );
- }
-
- if (exec->vtx.vertex_size) {
- vbo_exec_copy_to_current( exec );
- reset_attrfv( exec );
- }
-}
-
-
-/**
* Called via glBegin.
*/
static void GLAPIENTRY vbo_exec_Begin( GLenum mode )
@@ -947,7 +947,7 @@ void vbo_exec_vtx_destroy( struct vbo_exec_context *exec )
/* Free the vertex buffer. Unmap first if needed.
*/
if (_mesa_bufferobj_mapped(exec->vtx.bufferobj)) {
- ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, exec->vtx.bufferobj);
+ ctx->Driver.UnmapBuffer(ctx, exec->vtx.bufferobj);
}
_mesa_reference_buffer_object(ctx, &exec->vtx.bufferobj, NULL);
}
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index b908d5aea7e..18719d5f537 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -95,10 +95,25 @@ vbo_get_minmax_index(struct gl_context *ctx,
GLuint i;
if (_mesa_is_bufferobj(ib->obj)) {
- const GLvoid *map =
- ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB,
- GL_READ_ONLY, ib->obj);
- indices = ADD_POINTERS(map, ib->ptr);
+ unsigned map_size;
+
+ switch (ib->type) {
+ case GL_UNSIGNED_INT:
+ map_size = count * sizeof(GLuint);
+ break;
+ case GL_UNSIGNED_SHORT:
+ map_size = count * sizeof(GLushort);
+ break;
+ case GL_UNSIGNED_BYTE:
+ map_size = count * sizeof(GLubyte);
+ break;
+ default:
+ assert(0);
+ map_size = 0;
+ }
+
+ indices = ctx->Driver.MapBufferRange(ctx, (GLsizeiptr) ib->ptr, map_size,
+ GL_MAP_READ_BIT, ib->obj);
} else {
indices = ib->ptr;
}
@@ -176,7 +191,7 @@ vbo_get_minmax_index(struct gl_context *ctx,
}
if (_mesa_is_bufferobj(ib->obj)) {
- ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, ib->obj);
+ ctx->Driver.UnmapBuffer(ctx, ib->obj);
}
}
@@ -196,8 +211,8 @@ check_array_data(struct gl_context *ctx, struct gl_client_array *array,
if (!array->BufferObj->Pointer) {
/* need to map now */
array->BufferObj->Pointer =
- ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER_ARB,
- GL_READ_ONLY, array->BufferObj);
+ ctx->Driver.MapBufferRange(ctx, 0, array->BufferObj->Size,
+ GL_MAP_READ_BIT, array->BufferObj);
}
data = ADD_POINTERS(data, array->BufferObj->Pointer);
}
@@ -238,7 +253,7 @@ unmap_array_buffer(struct gl_context *ctx, struct gl_client_array *array)
if (array->Enabled &&
_mesa_is_bufferobj(array->BufferObj) &&
_mesa_bufferobj_mapped(array->BufferObj)) {
- ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, array->BufferObj);
+ ctx->Driver.UnmapBuffer(ctx, array->BufferObj);
}
}
@@ -256,10 +271,10 @@ check_draw_elements_data(struct gl_context *ctx, GLsizei count, GLenum elemType,
GLint i, k;
if (_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) {
- elemMap = ctx->Driver.MapBuffer(ctx,
- GL_ELEMENT_ARRAY_BUFFER_ARB,
- GL_READ_ONLY,
- ctx->Array.ElementArrayBufferObj);
+ elemMap = ctx->Driver.MapBufferRange(ctx, 0,
+ ctx->Array.ElementArrayBufferObj->Size,
+ GL_MAP_READ_BIT,
+ ctx->Array.ElementArrayBufferObj);
elements = ADD_POINTERS(elements, elemMap);
}
@@ -296,8 +311,7 @@ check_draw_elements_data(struct gl_context *ctx, GLsizei count, GLenum elemType,
}
if (_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) {
- ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB,
- ctx->Array.ElementArrayBufferObj);
+ ctx->Driver.UnmapBuffer(ctx, ctx->Array.ElementArrayBufferObj);
}
unmap_array_buffer(ctx, &arrayObj->Vertex);
@@ -351,8 +365,8 @@ print_draw_arrays(struct gl_context *ctx,
bufName);
if (bufName) {
- GLubyte *p = ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER_ARB,
- GL_READ_ONLY_ARB, bufObj);
+ GLubyte *p = ctx->Driver.MapBufferRange(ctx, 0, bufObj->Size,
+ GL_MAP_READ_BIT, bufObj);
int offset = (int) (GLintptr) exec->array.inputs[i]->Ptr;
float *f = (float *) (p + offset);
int *k = (int *) f;
@@ -364,7 +378,7 @@ print_draw_arrays(struct gl_context *ctx,
for (i = 0; i < n; i++) {
printf(" float[%d] = 0x%08x %f\n", i, k[i], f[i]);
}
- ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, bufObj);
+ ctx->Driver.UnmapBuffer(ctx, bufObj);
}
}
}
@@ -715,10 +729,11 @@ vbo_exec_DrawArraysInstanced(GLenum mode, GLint start, GLsizei count,
static void
dump_element_buffer(struct gl_context *ctx, GLenum type)
{
- const GLvoid *map = ctx->Driver.MapBuffer(ctx,
- GL_ELEMENT_ARRAY_BUFFER_ARB,
- GL_READ_ONLY,
- ctx->Array.ElementArrayBufferObj);
+ const GLvoid *map =
+ ctx->Driver.MapBufferRange(ctx, 0,
+ ctx->Array.ElementArrayBufferObj->Size,
+ GL_MAP_READ_BIT,
+ ctx->Array.ElementArrayBufferObj);
switch (type) {
case GL_UNSIGNED_BYTE:
{
@@ -760,8 +775,7 @@ dump_element_buffer(struct gl_context *ctx, GLenum type)
;
}
- ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB,
- ctx->Array.ElementArrayBufferObj);
+ ctx->Driver.UnmapBuffer(ctx, ctx->Array.ElementArrayBufferObj);
}
@@ -909,11 +923,10 @@ vbo_exec_DrawRangeElementsBaseVertex(GLenum mode,
if (0)
_mesa_print_arrays(ctx);
-#ifdef DEBUG
/* 'end' was out of bounds, but now let's check the actual array
* indexes to see if any of them are out of bounds.
*/
- {
+ if (0) {
GLuint max = _mesa_max_buffer_index(ctx, count, type, indices,
ctx->Array.ElementArrayBufferObj);
if (max >= ctx->Array.ArrayObj->_MaxElement) {
@@ -934,7 +947,6 @@ vbo_exec_DrawRangeElementsBaseVertex(GLenum mode,
* upper bound wrong.
*/
}
-#endif
/* Set 'end' to the max possible legal value */
assert(ctx->Array.ArrayObj->_MaxElement >= 1);
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 7e8d8602093..8ffaaaa4876 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -260,8 +260,6 @@ vbo_exec_bind_arrays( struct gl_context *ctx )
static void
vbo_exec_vtx_unmap( struct vbo_exec_context *exec )
{
- GLenum target = GL_ARRAY_BUFFER_ARB;
-
if (_mesa_is_bufferobj(exec->vtx.bufferobj)) {
struct gl_context *ctx = exec->ctx;
@@ -270,8 +268,7 @@ vbo_exec_vtx_unmap( struct vbo_exec_context *exec )
GLsizeiptr length = (exec->vtx.buffer_ptr - exec->vtx.buffer_map) * sizeof(float);
if (length)
- ctx->Driver.FlushMappedBufferRange(ctx, target,
- offset, length,
+ ctx->Driver.FlushMappedBufferRange(ctx, offset, length,
exec->vtx.bufferobj);
}
@@ -281,7 +278,7 @@ vbo_exec_vtx_unmap( struct vbo_exec_context *exec )
assert(exec->vtx.buffer_used <= VBO_VERT_BUFFER_SIZE);
assert(exec->vtx.buffer_ptr != NULL);
- ctx->Driver.UnmapBuffer(ctx, target, exec->vtx.bufferobj);
+ ctx->Driver.UnmapBuffer(ctx, exec->vtx.bufferobj);
exec->vtx.buffer_map = NULL;
exec->vtx.buffer_ptr = NULL;
exec->vtx.max_vert = 0;
@@ -296,8 +293,6 @@ void
vbo_exec_vtx_map( struct vbo_exec_context *exec )
{
struct gl_context *ctx = exec->ctx;
- const GLenum target = GL_ARRAY_BUFFER_ARB;
- const GLenum access = GL_READ_WRITE_ARB; /* for MapBuffer */
const GLenum accessRange = GL_MAP_WRITE_BIT | /* for MapBufferRange */
GL_MAP_INVALIDATE_RANGE_BIT |
GL_MAP_UNSYNCHRONIZED_BIT |
@@ -311,12 +306,10 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec )
assert(!exec->vtx.buffer_map);
assert(!exec->vtx.buffer_ptr);
- if (VBO_VERT_BUFFER_SIZE > exec->vtx.buffer_used + 1024 &&
- ctx->Driver.MapBufferRange) {
+ if (VBO_VERT_BUFFER_SIZE > exec->vtx.buffer_used + 1024) {
/* The VBO exists and there's room for more */
exec->vtx.buffer_map =
(GLfloat *)ctx->Driver.MapBufferRange(ctx,
- target,
exec->vtx.buffer_used,
(VBO_VERT_BUFFER_SIZE -
exec->vtx.buffer_used),
@@ -329,20 +322,16 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec )
/* Need to allocate a new VBO */
exec->vtx.buffer_used = 0;
- ctx->Driver.BufferData(ctx, target,
+ ctx->Driver.BufferData(ctx, GL_ARRAY_BUFFER_ARB,
VBO_VERT_BUFFER_SIZE,
NULL, usage, exec->vtx.bufferobj);
- if (ctx->Driver.MapBufferRange)
- exec->vtx.buffer_map =
- (GLfloat *)ctx->Driver.MapBufferRange(ctx, target,
- 0, VBO_VERT_BUFFER_SIZE,
- accessRange,
- exec->vtx.bufferobj);
- if (!exec->vtx.buffer_map)
- exec->vtx.buffer_map =
- (GLfloat *)ctx->Driver.MapBuffer(ctx, target, access, exec->vtx.bufferobj);
+ exec->vtx.buffer_map =
+ (GLfloat *)ctx->Driver.MapBufferRange(ctx,
+ 0, VBO_VERT_BUFFER_SIZE,
+ accessRange,
+ exec->vtx.bufferobj);
assert(exec->vtx.buffer_map);
exec->vtx.buffer_ptr = exec->vtx.buffer_map;
}
diff --git a/src/mesa/vbo/vbo_rebase.c b/src/mesa/vbo/vbo_rebase.c
index 1de290ff602..a1eab752ad6 100644
--- a/src/mesa/vbo/vbo_rebase.c
+++ b/src/mesa/vbo/vbo_rebase.c
@@ -159,10 +159,8 @@ void vbo_rebase_prims( struct gl_context *ctx,
void *ptr;
if (map_ib)
- ctx->Driver.MapBuffer(ctx,
- GL_ELEMENT_ARRAY_BUFFER,
- GL_READ_ONLY_ARB,
- ib->obj);
+ ctx->Driver.MapBufferRange(ctx, 0, ib->obj->Size, GL_MAP_READ_BIT,
+ ib->obj);
ptr = ADD_POINTERS(ib->obj->Pointer, ib->ptr);
@@ -183,9 +181,7 @@ void vbo_rebase_prims( struct gl_context *ctx,
}
if (map_ib)
- ctx->Driver.UnmapBuffer(ctx,
- GL_ELEMENT_ARRAY_BUFFER,
- ib->obj);
+ ctx->Driver.UnmapBuffer(ctx, ib->obj);
tmp_ib.obj = ctx->Shared->NullBufferObj;
tmp_ib.ptr = tmp_indices;
diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c
index 9041f791edd..ad36e93329c 100644
--- a/src/mesa/vbo/vbo_save_api.c
+++ b/src/mesa/vbo/vbo_save_api.c
@@ -232,11 +232,10 @@ map_vertex_store(struct gl_context *ctx,
assert(vertex_store->bufferobj);
assert(!vertex_store->buffer);
vertex_store->buffer =
- (GLfloat *) ctx->Driver.MapBuffer(ctx,
- GL_ARRAY_BUFFER_ARB, /* not used */
- GL_WRITE_ONLY, /* not used */
- vertex_store->
- bufferobj);
+ (GLfloat *) ctx->Driver.MapBufferRange(ctx, 0,
+ vertex_store->bufferobj->Size,
+ GL_MAP_WRITE_BIT, /* not used */
+ vertex_store->bufferobj);
assert(vertex_store->buffer);
return vertex_store->buffer + vertex_store->used;
@@ -247,7 +246,7 @@ static void
unmap_vertex_store(struct gl_context *ctx,
struct vbo_save_vertex_store *vertex_store)
{
- ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, vertex_store->bufferobj);
+ ctx->Driver.UnmapBuffer(ctx, vertex_store->bufferobj);
vertex_store->buffer = NULL;
}
diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c
index a37af73e0db..6cda831aa85 100644
--- a/src/mesa/vbo/vbo_save_draw.c
+++ b/src/mesa/vbo/vbo_save_draw.c
@@ -70,7 +70,7 @@ _playback_copy_to_current(struct gl_context *ctx,
else
offset = node->buffer_offset;
- ctx->Driver.GetBufferSubData( ctx, 0, offset,
+ ctx->Driver.GetBufferSubData( ctx, offset,
node->vertex_size * sizeof(GLfloat),
data, node->vertex_store->bufferobj );
@@ -217,10 +217,11 @@ static void
vbo_save_loopback_vertex_list(struct gl_context *ctx,
const struct vbo_save_vertex_list *list)
{
- const char *buffer = ctx->Driver.MapBuffer(ctx,
- GL_ARRAY_BUFFER_ARB,
- GL_READ_ONLY, /* ? */
- list->vertex_store->bufferobj);
+ const char *buffer =
+ ctx->Driver.MapBufferRange(ctx, 0,
+ list->vertex_store->bufferobj->Size,
+ GL_MAP_READ_BIT, /* ? */
+ list->vertex_store->bufferobj);
vbo_loopback_vertex_list(ctx,
(const GLfloat *)(buffer + list->buffer_offset),
@@ -230,8 +231,7 @@ vbo_save_loopback_vertex_list(struct gl_context *ctx,
list->wrap_count,
list->vertex_size);
- ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB,
- list->vertex_store->bufferobj);
+ ctx->Driver.UnmapBuffer(ctx, list->vertex_store->bufferobj);
}
diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c
index ecca1171673..40906e38917 100644
--- a/src/mesa/vbo/vbo_split_copy.c
+++ b/src/mesa/vbo/vbo_split_copy.c
@@ -444,7 +444,7 @@ replay_init( struct copy_context *copy )
copy->vertex_size += attr_size(copy->array[i]);
if (_mesa_is_bufferobj(vbo) && !_mesa_bufferobj_mapped(vbo))
- ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY, vbo);
+ ctx->Driver.MapBufferRange(ctx, 0, vbo->Size, GL_MAP_READ_BIT, vbo);
copy->varying[j].src_ptr = ADD_POINTERS(vbo->Pointer,
copy->array[i]->Ptr);
@@ -459,8 +459,8 @@ replay_init( struct copy_context *copy )
*/
if (_mesa_is_bufferobj(copy->ib->obj) &&
!_mesa_bufferobj_mapped(copy->ib->obj))
- ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY,
- copy->ib->obj);
+ ctx->Driver.MapBufferRange(ctx, 0, copy->ib->obj->Size, GL_MAP_READ_BIT,
+ copy->ib->obj);
srcptr = (const GLubyte *) ADD_POINTERS(copy->ib->obj->Pointer,
copy->ib->ptr);
@@ -564,14 +564,14 @@ replay_finish( struct copy_context *copy )
for (i = 0; i < copy->nr_varying; i++) {
struct gl_buffer_object *vbo = copy->varying[i].array->BufferObj;
if (_mesa_is_bufferobj(vbo) && _mesa_bufferobj_mapped(vbo))
- ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, vbo);
+ ctx->Driver.UnmapBuffer(ctx, vbo);
}
/* Unmap index buffer:
*/
if (_mesa_is_bufferobj(copy->ib->obj) &&
_mesa_bufferobj_mapped(copy->ib->obj)) {
- ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, copy->ib->obj);
+ ctx->Driver.UnmapBuffer(ctx, copy->ib->obj);
}
}
diff --git a/src/mesa/x86-64/xform4.S b/src/mesa/x86-64/xform4.S
index 6141e434679..5abd5a25de5 100644
--- a/src/mesa/x86-64/xform4.S
+++ b/src/mesa/x86-64/xform4.S
@@ -118,7 +118,7 @@ p4_constants:
.byte 0x00, 0x00, 0x00, 0x00
.byte 0x00, 0x00, 0x00, 0x00
.byte 0x00, 0x00, 0x00, 0x00
-.float 0f+1.0
+.float 1.0
.text
.align 16