diff options
Diffstat (limited to 'src/mesa')
194 files changed, 13461 insertions, 2658 deletions
diff --git a/src/mesa/Android.gen.mk b/src/mesa/Android.gen.mk new file mode 100644 index 00000000000..2a08184aee6 --- /dev/null +++ b/src/mesa/Android.gen.mk @@ -0,0 +1,131 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <[email protected]> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# included by core mesa Android.mk for source generation + +ifeq ($(LOCAL_MODULE_CLASS),) +LOCAL_MODULE_CLASS := STATIC_LIBRARIES +endif + +intermediates := $(call local-intermediates-dir) + +sources := \ + main/api_exec_es1.c \ + main/api_exec_es1_dispatch.h \ + main/api_exec_es1_remap_helper.h \ + main/api_exec_es2.c \ + main/api_exec_es2_dispatch.h \ + main/api_exec_es2_remap_helper.h \ + program/lex.yy.c \ + program/program_parse.tab.c + +LOCAL_SRC_FILES := $(filter-out $(sources), $(LOCAL_SRC_FILES)) + +LOCAL_C_INCLUDES += $(intermediates)/main + +ifeq ($(strip $(MESA_ENABLE_ASM)),true) +ifeq ($(TARGET_ARCH),x86) +sources += x86/matypes.h +LOCAL_C_INCLUDES += $(intermediates)/x86 +endif +endif + +sources += main/git_sha1.h + +sources := $(addprefix $(intermediates)/, $(sources)) +LOCAL_GENERATED_SOURCES += $(sources) + +glapi := $(MESA_TOP)/src/mapi/glapi/gen + +es_src_deps := \ + $(LOCAL_PATH)/main/APIspec.xml \ + $(LOCAL_PATH)/main/es_generator.py \ + $(LOCAL_PATH)/main/APIspecutil.py \ + $(LOCAL_PATH)/main/APIspec.py + +es_hdr_deps := \ + $(wildcard $(glapi)/*.py) \ + $(wildcard $(glapi)/*.xml) + +define es-gen + @mkdir -p $(dir $@) + @echo "Gen ES: $(PRIVATE_MODULE) <= $(notdir $(@))" + $(hide) $(PRIVATE_SCRIPT) $(1) $(PRIVATE_XML) > $@ +endef + +define local-l-to-c + @mkdir -p $(dir $@) + @echo "Mesa Lex: $(PRIVATE_MODULE) <= $<" + $(hide) $(LEX) -o$@ $< +endef + +define local-y-to-c-and-h + @mkdir -p $(dir $@) + @echo "Mesa Yacc: $(PRIVATE_MODULE) <= $<" + $(hide) $(YACC) -o $@ $< +endef + +$(intermediates)/main/api_exec_%.c: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/main/es_generator.py +$(intermediates)/main/api_exec_%.c: PRIVATE_XML := -S $(LOCAL_PATH)/main/APIspec.xml +$(intermediates)/main/api_exec_%_dispatch.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(glapi)/gl_table.py +$(intermediates)/main/api_exec_%_dispatch.h: PRIVATE_XML := -f $(glapi)/gl_and_es_API.xml +$(intermediates)/main/api_exec_%_remap_helper.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(glapi)/remap_helper.py +$(intermediates)/main/api_exec_%_remap_helper.h: PRIVATE_XML := -f $(glapi)/gl_and_es_API.xml + +$(intermediates)/main/api_exec_es1.c: $(es_src_deps) + $(call es-gen,-V GLES1.1) + +$(intermediates)/main/api_exec_es2.c: $(es_src_deps) + $(call es-gen,-V GLES2.0) + +$(intermediates)/main/api_exec_%_dispatch.h: $(es_hdr_deps) + $(call es-gen, -c $* -m remap_table) + +$(intermediates)/main/api_exec_%_remap_helper.h: $(es_hdr_deps) + $(call es-gen, -c $*) + +$(intermediates)/program/program_parse.tab.c: $(LOCAL_PATH)/program/program_parse.y + $(local-y-to-c-and-h) + +$(intermediates)/program/lex.yy.c: $(LOCAL_PATH)/program/program_lexer.l + $(local-l-to-c) + +$(intermediates)/main/git_sha1.h: + @mkdir -p $(dir $@) + @echo "GIT-SHA1: $(PRIVATE_MODULE) <= git" + $(hide) touch $@ + $(hide) if which git > /dev/null; then \ + git --git-dir $(PRIVATE_PATH)/../../.git log -n 1 --oneline | \ + sed 's/^\([^ ]*\) .*/#define MESA_GIT_SHA1 "git-\1"/' \ + > $@; \ + fi + +matypes_deps := \ + $(BUILD_OUT_EXECUTABLES)/mesa_gen_matypes$(BUILD_EXECUTABLE_SUFFIX) \ + $(LOCAL_PATH)/main/mtypes.h \ + $(LOCAL_PATH)/tnl/t_context.h + +$(intermediates)/x86/matypes.h: $(matypes_deps) + @mkdir -p $(dir $@) + @echo "MATYPES: $(PRIVATE_MODULE) <= $(notdir $@)" + $(hide) $< > $@ diff --git a/src/mesa/Android.mk b/src/mesa/Android.mk new file mode 100644 index 00000000000..67808d491ac --- /dev/null +++ b/src/mesa/Android.mk @@ -0,0 +1,115 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu <[email protected]> +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# Android.mk for core mesa + +LOCAL_PATH := $(call my-dir) + +include $(LOCAL_PATH)/sources.mak + +common_CFLAGS := \ + -DFEATURE_ES1=1 \ + -DFEATURE_ES2=1 + +common_C_INCLUDES := \ + $(MESA_TOP)/src/mapi \ + $(MESA_TOP)/src/glsl + +common_ASM := + +# --------------------------------------- +# Build mesa_gen_matypes for host +# --------------------------------------- + +ifeq ($(strip $(MESA_ENABLE_ASM)),true) +ifeq ($(TARGET_ARCH),x86) +common_ASM += $(X86_SOURCES) + +include $(CLEAR_VARS) +LOCAL_SRC_FILES := x86/gen_matypes.c +LOCAL_CFLAGS := $(common_CFLAGS) +LOCAL_C_INCLUDES := $(common_C_INCLUDES) +LOCAL_MODULE := mesa_gen_matypes +include $(MESA_COMMON_MK) +include $(BUILD_HOST_EXECUTABLE) + +endif # x86 +endif # MESA_ENABLE_ASM + +# --------------------------------------- +# Build libmesa_st_mesa +# --------------------------------------- + +ifeq ($(strip $(MESA_BUILD_GALLIUM)),true) +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + $(MESA_GALLIUM_SOURCES) \ + $(MESA_GALLIUM_CXX_SOURCES) \ + $(common_ASM) + +LOCAL_CFLAGS := $(common_CFLAGS) + +LOCAL_C_INCLUDES := \ + $(common_C_INCLUDES) \ + $(MESA_TOP)/src/gallium/include \ + $(MESA_TOP)/src/gallium/auxiliary + +LOCAL_MODULE := libmesa_st_mesa + +include $(LOCAL_PATH)/Android.gen.mk +include $(MESA_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) +endif # MESA_BUILD_GALLIUM + +# --------------------------------------- +# Build libmesa_glsl_utils +# +# It is used to avoid circular dependency between core mesa and glsl. +# --------------------------------------- + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + program/hash_table.c \ + program/symbol_table.c + +LOCAL_MODULE := libmesa_glsl_utils + +include $(MESA_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) + +# --------------------------------------- +# Build libmesa_glsl_utils for host +# --------------------------------------- + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + program/hash_table.c \ + program/symbol_table.c + +LOCAL_MODULE := libmesa_glsl_utils + +include $(MESA_COMMON_MK) +include $(BUILD_HOST_STATIC_LIBRARY) diff --git a/src/mesa/Makefile b/src/mesa/Makefile index a903a260ac9..0e15d61bd8d 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -12,11 +12,10 @@ DRICORE_OBJ_DIR := objs-dricore include sources.mak # adjust object dirs +DRICORE_OBJECTS := $(addprefix $(DRICORE_OBJ_DIR)/, $(MESA_OBJECTS)) MESA_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_OBJECTS)) MESA_GALLIUM_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_GALLIUM_OBJECTS)) -DRICORE_OBJECTS := $(addprefix $(DRICORE_OBJ_DIR)/, $(MESA_OBJECTS)) - # define preprocessor flags MESA_CPPFLAGS := $(API_DEFINES) $(DEFINES) @@ -68,6 +67,26 @@ $(DRICORE_OBJ_DIR)/%.o: %.S # then convenience libs (.a) and finally the device drivers: default: $(DEPENDS) asm_subdirs $(MESA_LIBS) $(DRICORE_LIBS) driver_subdirs +# include glapi_gen.mk for generating glapi headers for GLES +GLAPI := $(TOP)/src/mapi/glapi/gen +include $(GLAPI)/glapi_gen.mk + +main/api_exec_es1_dispatch.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_dispatch_deps) + $(call glapi_gen_dispatch,$<,es1) + +main/api_exec_es1_remap_helper.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_remap_deps) + $(call glapi_gen_remap,$<,es1) + +main/api_exec_es1.o: main/api_exec_es1_dispatch.h main/api_exec_es1_remap_helper.h + +main/api_exec_es2_dispatch.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_dispatch_deps) + $(call glapi_gen_dispatch,$<,es2) + +main/api_exec_es2_remap_helper.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_remap_deps) + $(call glapi_gen_remap,$<,es2) + +main/api_exec_es2.o: main/api_exec_es2_dispatch.h main/api_exec_es2_remap_helper.h + main/api_exec_es1.c: main/APIspec.xml main/es_generator.py main/APIspecutil.py main/APIspec.py $(PYTHON2) $(PYTHON_FLAGS) main/es_generator.py -S main/APIspec.xml -V GLES1.1 > $@ @@ -124,6 +143,8 @@ depend: $(ALL_SOURCES) @ touch depend @$(MKDEP) $(MKDEP_OPTIONS) -p$(MESA_OBJ_DIR)/ $(MESA_CPPFLAGS) \ $(ALL_SOURCES) > /dev/null 2>/dev/null + @$(MKDEP) $(MKDEP_OPTIONS) -a -p$(DRICORE_OBJ_DIR)/ $(MESA_CPPFLAGS) \ + $(ALL_SOURCES) > /dev/null 2>/dev/null ###################################################################### # Installation rules diff --git a/src/mesa/SConscript b/src/mesa/SConscript index 24e2155c387..b0c3334fa48 100644 --- a/src/mesa/SConscript +++ b/src/mesa/SConscript @@ -264,6 +264,7 @@ statetracker_sources = [ 'state_tracker/st_draw_feedback.c', 'state_tracker/st_extensions.c', 'state_tracker/st_format.c', + 'state_tracker/st_glsl_to_tgsi.cpp', 'state_tracker/st_gen_mipmap.c', 'state_tracker/st_manager.c', 'state_tracker/st_mesa_to_tgsi.c', @@ -292,6 +293,7 @@ program_sources = [ 'program/prog_instruction.c', 'program/prog_noise.c', 'program/prog_optimize.c', + 'program/prog_opt_constant_fold.c', 'program/prog_parameter.c', 'program/prog_parameter_layout.c', 'program/prog_print.c', @@ -346,28 +348,28 @@ if env['gles']: GLAPI = '#src/mapi/glapi/' gles_headers = [] gles_headers += env.CodeGenerate( - target = 'es1api/main/dispatch.h', + target = 'main/api_exec_es1_dispatch.h', script = GLAPI + 'gen/gl_table.py', - source = GLAPI + 'gen-es/es1_API.xml', - command = python_cmd + ' $SCRIPT -c -m remap_table -f $SOURCE > $TARGET', + source = GLAPI + 'gen/gl_and_es_API.xml', + command = python_cmd + ' $SCRIPT -c es1 -m remap_table -f $SOURCE > $TARGET', ) gles_headers += env.CodeGenerate( - target = 'es1api/main/remap_helper.h', + target = 'main/api_exec_es1_remap_helper.h', script = GLAPI + 'gen/remap_helper.py', - source = GLAPI + 'gen-es/es1_API.xml', - command = python_cmd + ' $SCRIPT -f $SOURCE > $TARGET', + source = GLAPI + 'gen/gl_and_es_API.xml', + command = python_cmd + ' $SCRIPT -c es1 -f $SOURCE > $TARGET', ) gles_headers += env.CodeGenerate( - target = 'es2api/main/dispatch.h', + target = 'main/api_exec_es2_dispatch.h', script = GLAPI + 'gen/gl_table.py', - source = GLAPI + 'gen-es/es2_API.xml', - command = python_cmd + ' $SCRIPT -c -m remap_table -f $SOURCE > $TARGET', + source = GLAPI + 'gen/gl_and_es_API.xml', + command = python_cmd + ' $SCRIPT -c es2 -m remap_table -f $SOURCE > $TARGET', ) gles_headers += env.CodeGenerate( - target = 'es2api/main/remap_helper.h', + target = 'main/api_exec_es2_remap_helper.h', script = GLAPI + 'gen/remap_helper.py', - source = GLAPI + 'gen-es/es2_API.xml', - command = python_cmd + ' $SCRIPT -f $SOURCE > $TARGET', + source = GLAPI + 'gen/gl_and_es_API.xml', + command = python_cmd + ' $SCRIPT -c es2 -f $SOURCE > $TARGET', ) env.Depends(gles_sources, gles_headers) diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 8ab129dd73d..a6174ee2f56 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -95,8 +95,6 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->TexSubImage2D = _mesa_store_texsubimage2d; driver->TexSubImage3D = _mesa_store_texsubimage3d; driver->GetTexImage = _mesa_get_teximage; - driver->CopyTexImage1D = _mesa_meta_CopyTexImage1D; - driver->CopyTexImage2D = _mesa_meta_CopyTexImage2D; driver->CopyTexSubImage1D = _mesa_meta_CopyTexSubImage1D; driver->CopyTexSubImage2D = _mesa_meta_CopyTexSubImage2D; driver->CopyTexSubImage3D = _mesa_meta_CopyTexSubImage3D; @@ -250,10 +248,10 @@ _mesa_init_driver_state(struct gl_context *ctx) GLuint i; for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) { ctx->Driver.ColorMaskIndexed(ctx, i, - ctx->Color.ColorMask[0][RCOMP], - ctx->Color.ColorMask[0][GCOMP], - ctx->Color.ColorMask[0][BCOMP], - ctx->Color.ColorMask[0][ACOMP]); + ctx->Color.ColorMask[i][RCOMP], + ctx->Color.ColorMask[i][GCOMP], + ctx->Color.ColorMask[i][BCOMP], + ctx->Color.ColorMask[i][ACOMP]); } } else { @@ -288,7 +286,10 @@ _mesa_init_driver_state(struct gl_context *ctx) ctx->Driver.Enable(ctx, GL_TEXTURE_CUBE_MAP, GL_FALSE); ctx->Driver.Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color); - ctx->Driver.Fogfv(ctx, GL_FOG_MODE, 0); + { + GLfloat mode = (GLfloat) ctx->Fog.Mode; + ctx->Driver.Fogfv(ctx, GL_FOG_MODE, &mode); + } ctx->Driver.Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density); ctx->Driver.Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start); ctx->Driver.Fogfv(ctx, GL_FOG_END, &ctx->Fog.End); diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index 0e58aeca3f5..291d912121b 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -62,6 +62,7 @@ #include "main/teximage.h" #include "main/texparam.h" #include "main/texstate.h" +#include "main/uniforms.h" #include "main/varray.h" #include "main/viewport.h" #include "program/program.h" @@ -72,63 +73,36 @@ /** Return offset in bytes of the field within a vertex struct */ #define OFFSET(FIELD) ((void *) offsetof(struct vertex, FIELD)) - -/** - * Flags passed to _mesa_meta_begin(). - */ -/*@{*/ -#define META_ALL ~0x0 -#define META_ALPHA_TEST 0x1 -#define META_BLEND 0x2 /**< includes logicop */ -#define META_COLOR_MASK 0x4 -#define META_DEPTH_TEST 0x8 -#define META_FOG 0x10 -#define META_PIXEL_STORE 0x20 -#define META_PIXEL_TRANSFER 0x40 -#define META_RASTERIZATION 0x80 -#define META_SCISSOR 0x100 -#define META_SHADER 0x200 -#define META_STENCIL_TEST 0x400 -#define META_TRANSFORM 0x800 /**< modelview, projection, clip planes */ -#define META_TEXTURE 0x1000 -#define META_VERTEX 0x2000 -#define META_VIEWPORT 0x4000 -#define META_CLAMP_FRAGMENT_COLOR 0x8000 -#define META_CLAMP_VERTEX_COLOR 0x10000 -#define META_CONDITIONAL_RENDER 0x20000 -/*@}*/ - - /** * State which we may save/restore across meta ops. * XXX this may be incomplete... */ struct save_state { - GLbitfield SavedState; /**< bitmask of META_* flags */ + GLbitfield SavedState; /**< bitmask of MESA_META_* flags */ - /** META_ALPHA_TEST */ + /** MESA_META_ALPHA_TEST */ GLboolean AlphaEnabled; GLenum AlphaFunc; GLclampf AlphaRef; - /** META_BLEND */ + /** MESA_META_BLEND */ GLbitfield BlendEnabled; GLboolean ColorLogicOpEnabled; - /** META_COLOR_MASK */ + /** MESA_META_COLOR_MASK */ GLubyte ColorMask[MAX_DRAW_BUFFERS][4]; - /** META_DEPTH_TEST */ + /** MESA_META_DEPTH_TEST */ struct gl_depthbuffer_attrib Depth; - /** META_FOG */ + /** MESA_META_FOG */ GLboolean Fog; - /** META_PIXEL_STORE */ + /** MESA_META_PIXEL_STORE */ struct gl_pixelstore_attrib Pack, Unpack; - /** META_PIXEL_TRANSFER */ + /** MESA_META_PIXEL_TRANSFER */ GLfloat RedBias, RedScale; GLfloat GreenBias, GreenScale; GLfloat BlueBias, BlueScale; @@ -136,17 +110,17 @@ struct save_state GLfloat DepthBias, DepthScale; GLboolean MapColorFlag; - /** META_RASTERIZATION */ + /** MESA_META_RASTERIZATION */ GLenum FrontPolygonMode, BackPolygonMode; GLboolean PolygonOffset; GLboolean PolygonSmooth; GLboolean PolygonStipple; GLboolean PolygonCull; - /** META_SCISSOR */ + /** MESA_META_SCISSOR */ struct gl_scissor_attrib Scissor; - /** META_SHADER */ + /** MESA_META_SHADER */ GLboolean VertexProgramEnabled; struct gl_vertex_program *VertexProgram; GLboolean FragmentProgramEnabled; @@ -156,17 +130,19 @@ struct save_state struct gl_shader_program *FragmentShader; struct gl_shader_program *ActiveShader; - /** META_STENCIL_TEST */ + /** MESA_META_STENCIL_TEST */ struct gl_stencil_attrib Stencil; - /** META_TRANSFORM */ + /** MESA_META_TRANSFORM */ GLenum MatrixMode; GLfloat ModelviewMatrix[16]; GLfloat ProjectionMatrix[16]; GLfloat TextureMatrix[16]; + + /** MESA_META_CLIP */ GLbitfield ClipPlanesEnabled; - /** META_TEXTURE */ + /** MESA_META_TEXTURE */ GLuint ActiveUnit; GLuint ClientActiveUnit; /** for unit[0] only */ @@ -176,21 +152,21 @@ struct save_state GLbitfield TexGenEnabled[MAX_TEXTURE_UNITS]; GLuint EnvMode; /* unit[0] only */ - /** META_VERTEX */ + /** MESA_META_VERTEX */ struct gl_array_object *ArrayObj; struct gl_buffer_object *ArrayBufferObj; - /** META_VIEWPORT */ + /** MESA_META_VIEWPORT */ GLint ViewportX, ViewportY, ViewportW, ViewportH; GLclampd DepthNear, DepthFar; - /** META_CLAMP_FRAGMENT_COLOR */ + /** MESA_META_CLAMP_FRAGMENT_COLOR */ GLenum ClampFragmentColor; - /** META_CLAMP_VERTEX_COLOR */ + /** MESA_META_CLAMP_VERTEX_COLOR */ GLenum ClampVertexColor; - /** META_CONDITIONAL_RENDER */ + /** MESA_META_CONDITIONAL_RENDER */ struct gl_query_object *CondRenderQuery; GLenum CondRenderMode; @@ -235,6 +211,8 @@ struct clear_state { GLuint ArrayObj; GLuint VBO; + GLuint ShaderProg; + GLint ColorLocation; }; @@ -336,10 +314,10 @@ _mesa_meta_free(struct gl_context *ctx) * Enter meta state. This is like a light-weight version of glPushAttrib * but it also resets most GL state back to default values. * - * \param state bitmask of META_* flags indicating which attribute groups + * \param state bitmask of MESA_META_* flags indicating which attribute groups * to save and reset to their defaults */ -static void +void _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) { struct save_state *save; @@ -351,7 +329,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) memset(save, 0, sizeof(*save)); save->SavedState = state; - if (state & META_ALPHA_TEST) { + if (state & MESA_META_ALPHA_TEST) { save->AlphaEnabled = ctx->Color.AlphaEnabled; save->AlphaFunc = ctx->Color.AlphaFunc; save->AlphaRef = ctx->Color.AlphaRef; @@ -359,7 +337,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) _mesa_set_enable(ctx, GL_ALPHA_TEST, GL_FALSE); } - if (state & META_BLEND) { + if (state & MESA_META_BLEND) { save->BlendEnabled = ctx->Color.BlendEnabled; if (ctx->Color.BlendEnabled) { if (ctx->Extensions.EXT_draw_buffers2) { @@ -377,7 +355,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) _mesa_set_enable(ctx, GL_COLOR_LOGIC_OP, GL_FALSE); } - if (state & META_COLOR_MASK) { + if (state & MESA_META_COLOR_MASK) { memcpy(save->ColorMask, ctx->Color.ColorMask, sizeof(ctx->Color.ColorMask)); if (!ctx->Color.ColorMask[0][0] || @@ -387,26 +365,26 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) _mesa_ColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); } - if (state & META_DEPTH_TEST) { + if (state & MESA_META_DEPTH_TEST) { save->Depth = ctx->Depth; /* struct copy */ if (ctx->Depth.Test) _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_FALSE); } - if (state & META_FOG) { + if (state & MESA_META_FOG) { save->Fog = ctx->Fog.Enabled; if (ctx->Fog.Enabled) _mesa_set_enable(ctx, GL_FOG, GL_FALSE); } - if (state & META_PIXEL_STORE) { + if (state & MESA_META_PIXEL_STORE) { save->Pack = ctx->Pack; save->Unpack = ctx->Unpack; ctx->Pack = ctx->DefaultPacking; ctx->Unpack = ctx->DefaultPacking; } - if (state & META_PIXEL_TRANSFER) { + if (state & MESA_META_PIXEL_TRANSFER) { save->RedScale = ctx->Pixel.RedScale; save->RedBias = ctx->Pixel.RedBias; save->GreenScale = ctx->Pixel.GreenScale; @@ -429,7 +407,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) ctx->NewState |=_NEW_PIXEL; } - if (state & META_RASTERIZATION) { + if (state & MESA_META_RASTERIZATION) { save->FrontPolygonMode = ctx->Polygon.FrontMode; save->BackPolygonMode = ctx->Polygon.BackMode; save->PolygonOffset = ctx->Polygon.OffsetFill; @@ -443,12 +421,12 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) _mesa_set_enable(ctx, GL_CULL_FACE, GL_FALSE); } - if (state & META_SCISSOR) { + if (state & MESA_META_SCISSOR) { save->Scissor = ctx->Scissor; /* struct copy */ _mesa_set_enable(ctx, GL_SCISSOR_TEST, GL_FALSE); } - if (state & META_SHADER) { + if (state & MESA_META_SHADER) { if (ctx->Extensions.ARB_vertex_program) { save->VertexProgramEnabled = ctx->VertexProgram.Enabled; _mesa_reference_vertprog(ctx, &save->VertexProgram, @@ -477,14 +455,14 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) } } - if (state & META_STENCIL_TEST) { + if (state & MESA_META_STENCIL_TEST) { save->Stencil = ctx->Stencil; /* struct copy */ if (ctx->Stencil.Enabled) _mesa_set_enable(ctx, GL_STENCIL_TEST, GL_FALSE); /* NOTE: other stencil state not reset */ } - if (state & META_TEXTURE) { + if (state & MESA_META_TEXTURE) { GLuint u, tgt; save->ActiveUnit = ctx->Texture.CurrentUnit; @@ -523,7 +501,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); } - if (state & META_TRANSFORM) { + if (state & MESA_META_TRANSFORM) { GLuint activeTexture = ctx->Texture.CurrentUnit; memcpy(save->ModelviewMatrix, ctx->ModelviewMatrixStack.Top->m, 16 * sizeof(GLfloat)); @@ -544,6 +522,9 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) _mesa_Ortho(0.0, ctx->DrawBuffer->Width, 0.0, ctx->DrawBuffer->Height, -1.0, 1.0); + } + + if (state & MESA_META_CLIP) { save->ClipPlanesEnabled = ctx->Transform.ClipPlanesEnabled; if (ctx->Transform.ClipPlanesEnabled) { GLuint i; @@ -553,7 +534,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) } } - if (state & META_VERTEX) { + if (state & MESA_META_VERTEX) { /* save vertex array object state */ _mesa_reference_array_object(ctx, &save->ArrayObj, ctx->Array.ArrayObj); @@ -562,7 +543,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) /* set some default state? */ } - if (state & META_VIEWPORT) { + if (state & MESA_META_VIEWPORT) { /* save viewport state */ save->ViewportX = ctx->Viewport.X; save->ViewportY = ctx->Viewport.Y; @@ -583,7 +564,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) _mesa_DepthRange(0.0, 1.0); } - if (state & META_CLAMP_FRAGMENT_COLOR) { + if (state & MESA_META_CLAMP_FRAGMENT_COLOR) { save->ClampFragmentColor = ctx->Color.ClampFragmentColor; /* Generally in here we want to do clamping according to whether @@ -594,7 +575,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) _mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE); } - if (state & META_CLAMP_VERTEX_COLOR) { + if (state & MESA_META_CLAMP_VERTEX_COLOR) { save->ClampVertexColor = ctx->Light.ClampVertexColor; /* Generally in here we never want vertex color clamping -- @@ -603,7 +584,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) _mesa_ClampColorARB(GL_CLAMP_VERTEX_COLOR, GL_FALSE); } - if (state & META_CONDITIONAL_RENDER) { + if (state & MESA_META_CONDITIONAL_RENDER) { save->CondRenderQuery = ctx->Query.CondRenderQuery; save->CondRenderMode = ctx->Query.CondRenderMode; @@ -623,19 +604,19 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) /** * Leave meta state. This is like a light-weight version of glPopAttrib(). */ -static void +void _mesa_meta_end(struct gl_context *ctx) { struct save_state *save = &ctx->Meta->Save[--ctx->Meta->SaveStackDepth]; const GLbitfield state = save->SavedState; - if (state & META_ALPHA_TEST) { + if (state & MESA_META_ALPHA_TEST) { if (ctx->Color.AlphaEnabled != save->AlphaEnabled) _mesa_set_enable(ctx, GL_ALPHA_TEST, save->AlphaEnabled); _mesa_AlphaFunc(save->AlphaFunc, save->AlphaRef); } - if (state & META_BLEND) { + if (state & MESA_META_BLEND) { if (ctx->Color.BlendEnabled != save->BlendEnabled) { if (ctx->Extensions.EXT_draw_buffers2) { GLuint i; @@ -651,7 +632,7 @@ _mesa_meta_end(struct gl_context *ctx) _mesa_set_enable(ctx, GL_COLOR_LOGIC_OP, save->ColorLogicOpEnabled); } - if (state & META_COLOR_MASK) { + if (state & MESA_META_COLOR_MASK) { GLuint i; for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) { if (!TEST_EQ_4V(ctx->Color.ColorMask[i], save->ColorMask[i])) { @@ -670,23 +651,23 @@ _mesa_meta_end(struct gl_context *ctx) } } - if (state & META_DEPTH_TEST) { + if (state & MESA_META_DEPTH_TEST) { if (ctx->Depth.Test != save->Depth.Test) _mesa_set_enable(ctx, GL_DEPTH_TEST, save->Depth.Test); _mesa_DepthFunc(save->Depth.Func); _mesa_DepthMask(save->Depth.Mask); } - if (state & META_FOG) { + if (state & MESA_META_FOG) { _mesa_set_enable(ctx, GL_FOG, save->Fog); } - if (state & META_PIXEL_STORE) { + if (state & MESA_META_PIXEL_STORE) { ctx->Pack = save->Pack; ctx->Unpack = save->Unpack; } - if (state & META_PIXEL_TRANSFER) { + if (state & MESA_META_PIXEL_TRANSFER) { ctx->Pixel.RedScale = save->RedScale; ctx->Pixel.RedBias = save->RedBias; ctx->Pixel.GreenScale = save->GreenScale; @@ -700,7 +681,7 @@ _mesa_meta_end(struct gl_context *ctx) ctx->NewState |=_NEW_PIXEL; } - if (state & META_RASTERIZATION) { + if (state & MESA_META_RASTERIZATION) { _mesa_PolygonMode(GL_FRONT, save->FrontPolygonMode); _mesa_PolygonMode(GL_BACK, save->BackPolygonMode); _mesa_set_enable(ctx, GL_POLYGON_STIPPLE, save->PolygonStipple); @@ -709,13 +690,13 @@ _mesa_meta_end(struct gl_context *ctx) _mesa_set_enable(ctx, GL_CULL_FACE, save->PolygonCull); } - if (state & META_SCISSOR) { + if (state & MESA_META_SCISSOR) { _mesa_set_enable(ctx, GL_SCISSOR_TEST, save->Scissor.Enabled); _mesa_Scissor(save->Scissor.X, save->Scissor.Y, save->Scissor.Width, save->Scissor.Height); } - if (state & META_SHADER) { + if (state & MESA_META_SHADER) { if (ctx->Extensions.ARB_vertex_program) { _mesa_set_enable(ctx, GL_VERTEX_PROGRAM_ARB, save->VertexProgramEnabled); @@ -747,7 +728,7 @@ _mesa_meta_end(struct gl_context *ctx) save->ActiveShader); } - if (state & META_STENCIL_TEST) { + if (state & MESA_META_STENCIL_TEST) { const struct gl_stencil_attrib *stencil = &save->Stencil; _mesa_set_enable(ctx, GL_STENCIL_TEST, stencil->Enabled); @@ -778,7 +759,7 @@ _mesa_meta_end(struct gl_context *ctx) stencil->ZPassFunc[1]); } - if (state & META_TEXTURE) { + if (state & MESA_META_TEXTURE) { GLuint u, tgt; ASSERT(ctx->Texture.CurrentUnit == 0); @@ -829,7 +810,7 @@ _mesa_meta_end(struct gl_context *ctx) _mesa_ClientActiveTextureARB(GL_TEXTURE0 + save->ClientActiveUnit); } - if (state & META_TRANSFORM) { + if (state & MESA_META_TRANSFORM) { GLuint activeTexture = ctx->Texture.CurrentUnit; _mesa_ActiveTextureARB(GL_TEXTURE0); _mesa_MatrixMode(GL_TEXTURE); @@ -843,7 +824,9 @@ _mesa_meta_end(struct gl_context *ctx) _mesa_LoadMatrixf(save->ProjectionMatrix); _mesa_MatrixMode(save->MatrixMode); + } + if (state & MESA_META_CLIP) { if (save->ClipPlanesEnabled) { GLuint i; for (i = 0; i < ctx->Const.MaxClipPlanes; i++) { @@ -854,7 +837,7 @@ _mesa_meta_end(struct gl_context *ctx) } } - if (state & META_VERTEX) { + if (state & MESA_META_VERTEX) { /* restore vertex buffer object */ _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, save->ArrayBufferObj->Name); _mesa_reference_buffer_object(ctx, &save->ArrayBufferObj, NULL); @@ -864,7 +847,7 @@ _mesa_meta_end(struct gl_context *ctx) _mesa_reference_array_object(ctx, &save->ArrayObj, NULL); } - if (state & META_VIEWPORT) { + if (state & MESA_META_VIEWPORT) { if (save->ViewportX != ctx->Viewport.X || save->ViewportY != ctx->Viewport.Y || save->ViewportW != ctx->Viewport.Width || @@ -875,15 +858,15 @@ _mesa_meta_end(struct gl_context *ctx) _mesa_DepthRange(save->DepthNear, save->DepthFar); } - if (state & META_CLAMP_FRAGMENT_COLOR) { + if (state & MESA_META_CLAMP_FRAGMENT_COLOR) { _mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, save->ClampFragmentColor); } - if (state & META_CLAMP_VERTEX_COLOR) { + if (state & MESA_META_CLAMP_VERTEX_COLOR) { _mesa_ClampColorARB(GL_CLAMP_VERTEX_COLOR, save->ClampVertexColor); } - if (state & META_CONDITIONAL_RENDER) { + if (state & MESA_META_CONDITIONAL_RENDER) { if (save->CondRenderQuery) _mesa_BeginConditionalRender(save->CondRenderQuery->Id, save->CondRenderMode); @@ -1349,7 +1332,7 @@ _mesa_meta_BlitFramebuffer(struct gl_context *ctx, } /* only scissor effects blit so save/clear all other relevant state */ - _mesa_meta_begin(ctx, ~META_SCISSOR); + _mesa_meta_begin(ctx, ~MESA_META_SCISSOR); if (blit->ArrayObj == 0) { /* one-time setup */ @@ -1478,15 +1461,15 @@ _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers) }; struct vertex verts[4]; /* save all state but scissor, pixel pack/unpack */ - GLbitfield metaSave = (META_ALL - - META_SCISSOR - - META_PIXEL_STORE - - META_CONDITIONAL_RENDER); + GLbitfield metaSave = (MESA_META_ALL - + MESA_META_SCISSOR - + MESA_META_PIXEL_STORE - + MESA_META_CONDITIONAL_RENDER); const GLuint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1; if (buffers & BUFFER_BITS_COLOR) { /* if clearing color buffers, don't save/restore colormask */ - metaSave -= META_COLOR_MASK; + metaSave -= MESA_META_COLOR_MASK; } _mesa_meta_begin(ctx, metaSave); @@ -1521,7 +1504,7 @@ _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers) _mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE); } else { - ASSERT(metaSave & META_COLOR_MASK); + ASSERT(metaSave & MESA_META_COLOR_MASK); _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); } @@ -1589,10 +1572,166 @@ _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers) _mesa_meta_end(ctx); } +static void +meta_glsl_clear_init(struct gl_context *ctx, struct clear_state *clear) +{ + const char *vs_source = + "attribute vec4 position;\n" + "void main()\n" + "{\n" + " gl_Position = position;\n" + "}\n"; + const char *fs_source = + "uniform vec4 color;\n" + "void main()\n" + "{\n" + " gl_FragColor = color;\n" + "}\n"; + GLuint vs, fs; + + if (clear->ArrayObj != 0) + return; + + /* create vertex array object */ + _mesa_GenVertexArrays(1, &clear->ArrayObj); + _mesa_BindVertexArray(clear->ArrayObj); + + /* create vertex array buffer */ + _mesa_GenBuffersARB(1, &clear->VBO); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, clear->VBO); + + /* setup vertex arrays */ + _mesa_VertexAttribPointerARB(0, 3, GL_FLOAT, GL_FALSE, 0, (void *)0); + _mesa_EnableVertexAttribArrayARB(0); + + vs = _mesa_CreateShaderObjectARB(GL_VERTEX_SHADER); + _mesa_ShaderSourceARB(vs, 1, &vs_source, NULL); + _mesa_CompileShaderARB(vs); + + fs = _mesa_CreateShaderObjectARB(GL_FRAGMENT_SHADER); + _mesa_ShaderSourceARB(fs, 1, &fs_source, NULL); + _mesa_CompileShaderARB(fs); + + clear->ShaderProg = _mesa_CreateProgramObjectARB(); + _mesa_AttachShader(clear->ShaderProg, fs); + _mesa_AttachShader(clear->ShaderProg, vs); + _mesa_BindAttribLocationARB(clear->ShaderProg, 0, "position"); + _mesa_LinkProgramARB(clear->ShaderProg); + + clear->ColorLocation = _mesa_GetUniformLocationARB(clear->ShaderProg, + "color"); +} + +/** + * Meta implementation of ctx->Driver.Clear() in terms of polygon rendering. + */ +void +_mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers) +{ + struct clear_state *clear = &ctx->Meta->Clear; + GLbitfield metaSave; + const GLuint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1; + struct gl_framebuffer *fb = ctx->DrawBuffer; + const float x0 = ((float)fb->_Xmin / fb->Width) * 2.0f - 1.0f; + const float y0 = ((float)fb->_Ymin / fb->Height) * 2.0f - 1.0f; + const float x1 = ((float)fb->_Xmax / fb->Width) * 2.0f - 1.0f; + const float y1 = ((float)fb->_Ymax / fb->Height) * 2.0f - 1.0f; + const float z = -invert_z(ctx->Depth.Clear); + struct vertex { + GLfloat x, y, z; + } verts[4]; + + metaSave = (MESA_META_ALPHA_TEST | + MESA_META_BLEND | + MESA_META_DEPTH_TEST | + MESA_META_RASTERIZATION | + MESA_META_SHADER | + MESA_META_STENCIL_TEST | + MESA_META_VERTEX | + MESA_META_VIEWPORT | + MESA_META_CLIP | + MESA_META_CLAMP_FRAGMENT_COLOR); + + if (!(buffers & BUFFER_BITS_COLOR)) { + /* We'll use colormask to disable color writes. Otherwise, + * respect color mask + */ + metaSave |= MESA_META_COLOR_MASK; + } + + _mesa_meta_begin(ctx, metaSave); + + meta_glsl_clear_init(ctx, clear); + + _mesa_UseProgramObjectARB(clear->ShaderProg); + _mesa_Uniform4fvARB(clear->ColorLocation, 1, + ctx->Color.ClearColorUnclamped); + + _mesa_BindVertexArray(clear->ArrayObj); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, clear->VBO); + + /* GL_COLOR_BUFFER_BIT */ + if (buffers & BUFFER_BITS_COLOR) { + /* leave colormask, glDrawBuffer state as-is */ + + /* Clears never have the color clamped. */ + _mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE); + } + else { + ASSERT(metaSave & MESA_META_COLOR_MASK); + _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + } + + /* GL_DEPTH_BUFFER_BIT */ + if (buffers & BUFFER_BIT_DEPTH) { + _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_TRUE); + _mesa_DepthFunc(GL_ALWAYS); + _mesa_DepthMask(GL_TRUE); + } + else { + assert(!ctx->Depth.Test); + } + + /* GL_STENCIL_BUFFER_BIT */ + if (buffers & BUFFER_BIT_STENCIL) { + _mesa_set_enable(ctx, GL_STENCIL_TEST, GL_TRUE); + _mesa_StencilOpSeparate(GL_FRONT_AND_BACK, + GL_REPLACE, GL_REPLACE, GL_REPLACE); + _mesa_StencilFuncSeparate(GL_FRONT_AND_BACK, GL_ALWAYS, + ctx->Stencil.Clear & stencilMax, + ctx->Stencil.WriteMask[0]); + } + else { + assert(!ctx->Stencil.Enabled); + } + + /* vertex positions */ + verts[0].x = x0; + verts[0].y = y0; + verts[0].z = z; + verts[1].x = x1; + verts[1].y = y0; + verts[1].z = z; + verts[2].x = x1; + verts[2].y = y1; + verts[2].z = z; + verts[3].x = x0; + verts[3].y = y1; + verts[3].z = z; + + /* upload new vertex data */ + _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(verts), verts, + GL_DYNAMIC_DRAW_ARB); + + /* draw quad */ + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); + + _mesa_meta_end(ctx); +} /** * Meta implementation of ctx->Driver.CopyPixels() in terms - * of texture mapping and polygon rendering. + * of texture mapping and polygon rendering and GLSL shaders. */ void _mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcX, GLint srcY, @@ -1621,12 +1760,13 @@ _mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcX, GLint srcY, /* Most GL state applies to glCopyPixels, but a there's a few things * we need to override: */ - _mesa_meta_begin(ctx, (META_RASTERIZATION | - META_SHADER | - META_TEXTURE | - META_TRANSFORM | - META_VERTEX | - META_VIEWPORT)); + _mesa_meta_begin(ctx, (MESA_META_RASTERIZATION | + MESA_META_SHADER | + MESA_META_TEXTURE | + MESA_META_TRANSFORM | + MESA_META_CLIP | + MESA_META_VERTEX | + MESA_META_VIEWPORT)); if (copypix->ArrayObj == 0) { /* one-time setup */ @@ -1901,10 +2041,10 @@ _mesa_meta_DrawPixels(struct gl_context *ctx, * in [0,1]. */ texIntFormat = GL_ALPHA; - metaExtraSave = (META_COLOR_MASK | - META_DEPTH_TEST | - META_SHADER | - META_STENCIL_TEST); + metaExtraSave = (MESA_META_COLOR_MASK | + MESA_META_DEPTH_TEST | + MESA_META_SHADER | + MESA_META_STENCIL_TEST); } else { fallback = GL_TRUE; @@ -1914,7 +2054,7 @@ _mesa_meta_DrawPixels(struct gl_context *ctx, if (ctx->Extensions.ARB_depth_texture && ctx->Extensions.ARB_fragment_program) { texIntFormat = GL_DEPTH_COMPONENT; - metaExtraSave = (META_SHADER); + metaExtraSave = (MESA_META_SHADER); } else { fallback = GL_TRUE; @@ -1942,13 +2082,14 @@ _mesa_meta_DrawPixels(struct gl_context *ctx, /* Most GL state applies to glDrawPixels (like blending, stencil, etc), * but a there's a few things we need to override: */ - _mesa_meta_begin(ctx, (META_RASTERIZATION | - META_SHADER | - META_TEXTURE | - META_TRANSFORM | - META_VERTEX | - META_VIEWPORT | - META_CLAMP_FRAGMENT_COLOR | + _mesa_meta_begin(ctx, (MESA_META_RASTERIZATION | + MESA_META_SHADER | + MESA_META_TEXTURE | + MESA_META_TRANSFORM | + MESA_META_CLIP | + MESA_META_VERTEX | + MESA_META_VIEWPORT | + MESA_META_CLAMP_FRAGMENT_COLOR | metaExtraSave)); newTex = alloc_texture(tex, width, height, texIntFormat); @@ -2149,14 +2290,15 @@ _mesa_meta_Bitmap(struct gl_context *ctx, /* Most GL state applies to glBitmap (like blending, stencil, etc), * but a there's a few things we need to override: */ - _mesa_meta_begin(ctx, (META_ALPHA_TEST | - META_PIXEL_STORE | - META_RASTERIZATION | - META_SHADER | - META_TEXTURE | - META_TRANSFORM | - META_VERTEX | - META_VIEWPORT)); + _mesa_meta_begin(ctx, (MESA_META_ALPHA_TEST | + MESA_META_PIXEL_STORE | + MESA_META_RASTERIZATION | + MESA_META_SHADER | + MESA_META_TEXTURE | + MESA_META_TRANSFORM | + MESA_META_CLIP | + MESA_META_VERTEX | + MESA_META_VIEWPORT)); if (bitmap->ArrayObj == 0) { /* one-time setup */ @@ -2282,7 +2424,9 @@ _mesa_meta_check_generate_mipmap_fallback(struct gl_context *ctx, GLenum target, /* check for fallbacks */ if (!ctx->Extensions.EXT_framebuffer_object || - target == GL_TEXTURE_3D) { + target == GL_TEXTURE_3D || + target == GL_TEXTURE_1D_ARRAY || + target == GL_TEXTURE_2D_ARRAY) { return GL_TRUE; } @@ -2334,7 +2478,8 @@ _mesa_meta_check_generate_mipmap_fallback(struct gl_context *ctx, GLenum target, /** * Called via ctx->Driver.GenerateMipmap() - * Note: texture borders and 3D texture support not yet complete. + * Note: We don't yet support 3D textures, 1D/2D array textures or texture + * borders. */ void _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target, @@ -2374,7 +2519,7 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target, faceTarget = target; } - _mesa_meta_begin(ctx, META_ALL); + _mesa_meta_begin(ctx, MESA_META_ALL); if (original_active_unit != 0) _mesa_BindTexture(target, texObj->Name); @@ -2678,119 +2823,6 @@ get_temp_image_type(struct gl_context *ctx, GLenum baseFormat) /** - * Helper for _mesa_meta_CopyTexImage1/2D() functions. - * Have to be careful with locking and meta state for pixel transfer. - */ -static void -copy_tex_image(struct gl_context *ctx, GLuint dims, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLsizei height, GLint border) -{ - struct gl_texture_object *texObj; - struct gl_texture_image *texImage; - GLenum format, type; - GLint bpp; - void *buf; - struct gl_renderbuffer *read_rb = ctx->ReadBuffer->_ColorReadBuffer; - - texObj = _mesa_get_current_tex_object(ctx, target); - texImage = _mesa_get_tex_image(ctx, texObj, target, level); - - /* Choose format/type for temporary image buffer */ - format = _mesa_base_tex_format(ctx, internalFormat); - - if (format == GL_LUMINANCE && - _mesa_get_format_base_format(read_rb->Format) != GL_LUMINANCE) { - /* The glReadPixels() path will convert RGB to luminance by - * summing R+G+B. glCopyTexImage() is supposed to behave as - * glCopyPixels, which doesn't do that change, and instead - * leaves it up to glTexImage which converts RGB to luminance by - * just taking the R channel. To avoid glReadPixels() trashing - * our data, use RGBA for our temporary image. - */ - format = GL_RGBA; - } - - type = get_temp_image_type(ctx, format); - bpp = _mesa_bytes_per_pixel(format, type); - if (bpp <= 0) { - _mesa_problem(ctx, "Bad bpp in meta copy_tex_image()"); - return; - } - - /* - * Alloc image buffer (XXX could use a PBO) - */ - buf = malloc(width * height * bpp); - if (!buf) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims); - return; - } - - _mesa_unlock_texture(ctx, texObj); /* need to unlock first */ - - /* - * Read image from framebuffer (disable pixel transfer ops) - */ - _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER); - ctx->Driver.ReadPixels(ctx, x, y, width, height, - format, type, &ctx->Pack, buf); - _mesa_meta_end(ctx); - - if (texImage->Data) { - ctx->Driver.FreeTexImageData(ctx, texImage); - } - - /* The texture's format was already chosen in _mesa_CopyTexImage() */ - ASSERT(texImage->TexFormat != MESA_FORMAT_NONE); - - /* - * Store texture data (with pixel transfer ops) - */ - _mesa_meta_begin(ctx, META_PIXEL_STORE); - - _mesa_update_state(ctx); /* to update pixel transfer state */ - - if (target == GL_TEXTURE_1D) { - ctx->Driver.TexImage1D(ctx, target, level, internalFormat, - width, border, format, type, - buf, &ctx->Unpack, texObj, texImage); - } - else { - ctx->Driver.TexImage2D(ctx, target, level, internalFormat, - width, height, border, format, type, - buf, &ctx->Unpack, texObj, texImage); - } - _mesa_meta_end(ctx); - - _mesa_lock_texture(ctx, texObj); /* re-lock */ - - free(buf); -} - - -void -_mesa_meta_CopyTexImage1D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLint border) -{ - copy_tex_image(ctx, 1, target, level, internalFormat, x, y, - width, 1, border); -} - - -void -_mesa_meta_CopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLsizei height, GLint border) -{ - copy_tex_image(ctx, 2, target, level, internalFormat, x, y, - width, height, border); -} - - - -/** * Helper for _mesa_meta_CopyTexSubImage1/2/3D() functions. * Have to be careful with locking and meta state for pixel transfer. */ @@ -2812,6 +2844,16 @@ copy_tex_sub_image(struct gl_context *ctx, /* Choose format/type for temporary image buffer */ format = _mesa_get_format_base_format(texImage->TexFormat); + if (format == GL_LUMINANCE || + format == GL_LUMINANCE_ALPHA || + format == GL_INTENSITY) { + /* We don't want to use GL_LUMINANCE, GL_INTENSITY, etc. for the + * temp image buffer because glReadPixels will do L=R+G+B which is + * not what we want (should be L=R). + */ + format = GL_RGBA; + } + type = get_temp_image_type(ctx, format); bpp = _mesa_bytes_per_pixel(format, type); if (bpp <= 0) { @@ -2833,7 +2875,7 @@ copy_tex_sub_image(struct gl_context *ctx, /* * Read image from framebuffer (disable pixel transfer ops) */ - _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER); + _mesa_meta_begin(ctx, MESA_META_PIXEL_STORE | MESA_META_PIXEL_TRANSFER); ctx->Driver.ReadPixels(ctx, x, y, width, height, format, type, &ctx->Pack, buf); _mesa_meta_end(ctx); @@ -2843,7 +2885,7 @@ copy_tex_sub_image(struct gl_context *ctx, /* * Store texture data (with pixel transfer ops) */ - _mesa_meta_begin(ctx, META_PIXEL_STORE); + _mesa_meta_begin(ctx, MESA_META_PIXEL_STORE); if (target == GL_TEXTURE_1D) { ctx->Driver.TexSubImage1D(ctx, target, level, xoffset, width, format, type, buf, @@ -2915,7 +2957,7 @@ _mesa_meta_CopyColorTable(struct gl_context *ctx, /* * Read image from framebuffer (disable pixel transfer ops) */ - _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER); + _mesa_meta_begin(ctx, MESA_META_PIXEL_STORE | MESA_META_PIXEL_TRANSFER); ctx->Driver.ReadPixels(ctx, x, y, width, 1, GL_RGBA, GL_FLOAT, &ctx->Pack, buf); @@ -2942,7 +2984,7 @@ _mesa_meta_CopyColorSubTable(struct gl_context *ctx,GLenum target, GLsizei start /* * Read image from framebuffer (disable pixel transfer ops) */ - _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER); + _mesa_meta_begin(ctx, MESA_META_PIXEL_STORE | MESA_META_PIXEL_TRANSFER); ctx->Driver.ReadPixels(ctx, x, y, width, 1, GL_RGBA, GL_FLOAT, &ctx->Pack, buf); diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h index b0797d3d91a..ac20e370eb8 100644 --- a/src/mesa/drivers/common/meta.h +++ b/src/mesa/drivers/common/meta.h @@ -26,6 +26,33 @@ #ifndef META_H #define META_H +/** + * \name Flags for meta operations + * \{ + * + * These flags are passed to _mesa_meta_begin(). + */ +#define MESA_META_ALL ~0x0 +#define MESA_META_ALPHA_TEST 0x1 +#define MESA_META_BLEND 0x2 /**< includes logicop */ +#define MESA_META_COLOR_MASK 0x4 +#define MESA_META_DEPTH_TEST 0x8 +#define MESA_META_FOG 0x10 +#define MESA_META_PIXEL_STORE 0x20 +#define MESA_META_PIXEL_TRANSFER 0x40 +#define MESA_META_RASTERIZATION 0x80 +#define MESA_META_SCISSOR 0x100 +#define MESA_META_SHADER 0x200 +#define MESA_META_STENCIL_TEST 0x400 +#define MESA_META_TRANSFORM 0x800 /**< modelview/projection matrix state */ +#define MESA_META_TEXTURE 0x1000 +#define MESA_META_VERTEX 0x2000 +#define MESA_META_VIEWPORT 0x4000 +#define MESA_META_CLAMP_FRAGMENT_COLOR 0x8000 +#define MESA_META_CLAMP_VERTEX_COLOR 0x10000 +#define MESA_META_CONDITIONAL_RENDER 0x20000 +#define MESA_META_CLIP 0x40000 +/**\}*/ extern void _mesa_meta_init(struct gl_context *ctx); @@ -34,6 +61,12 @@ extern void _mesa_meta_free(struct gl_context *ctx); extern void +_mesa_meta_begin(struct gl_context *ctx, GLbitfield state); + +extern void +_mesa_meta_end(struct gl_context *ctx); + +extern void _mesa_meta_BlitFramebuffer(struct gl_context *ctx, GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, @@ -43,6 +76,9 @@ extern void _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers); extern void +_mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers); + +extern void _mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, GLsizei width, GLsizei height, GLint dstx, GLint dsty, GLenum type); @@ -69,16 +105,6 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target, struct gl_texture_object *texObj); extern void -_mesa_meta_CopyTexImage1D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLint border); - -extern void -_mesa_meta_CopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLsizei height, GLint border); - -extern void _mesa_meta_CopyTexSubImage1D(struct gl_context *ctx, GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width); diff --git a/src/mesa/drivers/dri/common/xmlconfig.c b/src/mesa/drivers/dri/common/xmlconfig.c index 77967ac2a43..12dd31bb162 100644 --- a/src/mesa/drivers/dri/common/xmlconfig.c +++ b/src/mesa/drivers/dri/common/xmlconfig.c @@ -567,7 +567,7 @@ static void parseOptInfoAttr (struct OptInfoData *data, const XML_Char **attr) { } else defaultVal = attrVal[OA_DEFAULT]; if (!parseValue (&cache->values[opt], cache->info[opt].type, defaultVal)) - XML_FATAL ("illegal default value: %s.", defaultVal); + XML_FATAL ("illegal default value for %s: %s.", cache->info[opt].name, defaultVal); if (attrVal[OA_VALID]) { if (cache->info[opt].type == DRI_BOOL) diff --git a/src/mesa/drivers/dri/common/xmlpool.h b/src/mesa/drivers/dri/common/xmlpool.h index 587517ea10a..ffea430024d 100644 --- a/src/mesa/drivers/dri/common/xmlpool.h +++ b/src/mesa/drivers/dri/common/xmlpool.h @@ -60,7 +60,7 @@ #define DRI_CONF_OPT_BEGIN(name,type,def) \ "<option name=\""#name"\" type=\""#type"\" default=\""#def"\">\n" -/** \brief Begin an option definition with qouted default value */ +/** \brief Begin an option definition with quoted default value */ #define DRI_CONF_OPT_BEGIN_Q(name,type,def) \ "<option name=\""#name"\" type=\""#type"\" default="#def">\n" diff --git a/src/mesa/drivers/dri/common/xmlpool/options.h b/src/mesa/drivers/dri/common/xmlpool/options.h index d76595578c7..1e584ba086a 100644 --- a/src/mesa/drivers/dri/common/xmlpool/options.h +++ b/src/mesa/drivers/dri/common/xmlpool/options.h @@ -425,6 +425,66 @@ DRI_CONF_OPT_BEGIN(hyperz,bool,def) \ DRI_CONF_DESC(sv,"Använd HyperZ för att maximera prestandan") \ DRI_CONF_OPT_END +#define DRI_CONF_PP_CELSHADE(def) \ +DRI_CONF_OPT_BEGIN_V(pp_celshade,enum,def,"0:1") \ + DRI_CONF_DESC(en,"A post-processing filter to cel-shade the output") \ + DRI_CONF_DESC(de,"A post-processing filter to cel-shade the output") \ + DRI_CONF_DESC(es,"A post-processing filter to cel-shade the output") \ + DRI_CONF_DESC(nl,"A post-processing filter to cel-shade the output") \ + DRI_CONF_DESC(fr,"A post-processing filter to cel-shade the output") \ + DRI_CONF_DESC(sv,"A post-processing filter to cel-shade the output") \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_NORED(def) \ +DRI_CONF_OPT_BEGIN_V(pp_nored,enum,def,"0:1") \ + DRI_CONF_DESC(en,"A post-processing filter to remove the red channel") \ + DRI_CONF_DESC(de,"A post-processing filter to remove the red channel") \ + DRI_CONF_DESC(es,"A post-processing filter to remove the red channel") \ + DRI_CONF_DESC(nl,"A post-processing filter to remove the red channel") \ + DRI_CONF_DESC(fr,"A post-processing filter to remove the red channel") \ + DRI_CONF_DESC(sv,"A post-processing filter to remove the red channel") \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_NOGREEN(def) \ +DRI_CONF_OPT_BEGIN_V(pp_nogreen,enum,def,"0:1") \ + DRI_CONF_DESC(en,"A post-processing filter to remove the green channel") \ + DRI_CONF_DESC(de,"A post-processing filter to remove the green channel") \ + DRI_CONF_DESC(es,"A post-processing filter to remove the green channel") \ + DRI_CONF_DESC(nl,"A post-processing filter to remove the green channel") \ + DRI_CONF_DESC(fr,"A post-processing filter to remove the green channel") \ + DRI_CONF_DESC(sv,"A post-processing filter to remove the green channel") \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_NOBLUE(def) \ +DRI_CONF_OPT_BEGIN_V(pp_noblue,enum,def,"0:1") \ + DRI_CONF_DESC(en,"A post-processing filter to remove the blue channel") \ + DRI_CONF_DESC(de,"A post-processing filter to remove the blue channel") \ + DRI_CONF_DESC(es,"A post-processing filter to remove the blue channel") \ + DRI_CONF_DESC(nl,"A post-processing filter to remove the blue channel") \ + DRI_CONF_DESC(fr,"A post-processing filter to remove the blue channel") \ + DRI_CONF_DESC(sv,"A post-processing filter to remove the blue channel") \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_JIMENEZMLAA(def,min,max) \ +DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa,int,def, # min ":" # max ) \ + DRI_CONF_DESC(en,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \ + DRI_CONF_DESC(de,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \ + DRI_CONF_DESC(es,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \ + DRI_CONF_DESC(nl,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \ + DRI_CONF_DESC(fr,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \ + DRI_CONF_DESC(sv,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_JIMENEZMLAA_COLOR(def,min,max) \ +DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa_color,int,def, # min ":" # max ) \ + DRI_CONF_DESC(en,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \ + DRI_CONF_DESC(de,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \ + DRI_CONF_DESC(es,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \ + DRI_CONF_DESC(nl,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \ + DRI_CONF_DESC(fr,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \ + DRI_CONF_DESC(sv,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \ +DRI_CONF_OPT_END + #define DRI_CONF_MAX_TEXTURE_UNITS(def,min,max) \ DRI_CONF_OPT_BEGIN_V(texture_units,int,def, # min ":" # max ) \ DRI_CONF_DESC(en,"Number of texture units used") \ diff --git a/src/mesa/drivers/dri/common/xmlpool/t_options.h b/src/mesa/drivers/dri/common/xmlpool/t_options.h index 5fd6ec65bf8..2427aa77f5b 100644 --- a/src/mesa/drivers/dri/common/xmlpool/t_options.h +++ b/src/mesa/drivers/dri/common/xmlpool/t_options.h @@ -191,6 +191,36 @@ DRI_CONF_OPT_BEGIN(hyperz,bool,def) \ DRI_CONF_DESC(en,gettext("Use HyperZ to boost performance")) \ DRI_CONF_OPT_END +#define DRI_CONF_PP_CELSHADE(def) \ +DRI_CONF_OPT_BEGIN_V(pp_celshade,enum,def,"0:1") \ + DRI_CONF_DESC(en,gettext("A post-processing filter to cel-shade the output")) \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_NORED(def) \ +DRI_CONF_OPT_BEGIN_V(pp_nored,enum,def,"0:1") \ + DRI_CONF_DESC(en,gettext("A post-processing filter to remove the red channel")) \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_NOGREEN(def) \ +DRI_CONF_OPT_BEGIN_V(pp_nogreen,enum,def,"0:1") \ + DRI_CONF_DESC(en,gettext("A post-processing filter to remove the green channel")) \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_NOBLUE(def) \ +DRI_CONF_OPT_BEGIN_V(pp_noblue,enum,def,"0:1") \ + DRI_CONF_DESC(en,gettext("A post-processing filter to remove the blue channel")) \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_JIMENEZMLAA(def,min,max) \ +DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa,int,def, # min ":" # max ) \ + DRI_CONF_DESC(en,gettext("Morphological anti-aliasing based on Jimenez\\\' MLAA. 0 to disable, 8 for default quality")) \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_JIMENEZMLAA_COLOR(def,min,max) \ +DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa_color,int,def, # min ":" # max ) \ + DRI_CONF_DESC(en,gettext("Morphological anti-aliasing based on Jimenez\\\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps")) \ +DRI_CONF_OPT_END + #define DRI_CONF_MAX_TEXTURE_UNITS(def,min,max) \ DRI_CONF_OPT_BEGIN_V(texture_units,int,def, # min ":" # max ) \ DRI_CONF_DESC(en,gettext("Number of texture units used")) \ diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index 6d43726beb1..ed5286fd7d9 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -881,6 +881,12 @@ i830_invalidate_state(struct intel_context *intel, GLuint new_state) i830_update_provoking_vertex(&intel->ctx); } +static bool +i830_is_hiz_depth_format(struct intel_context *intel, gl_format format) +{ + return false; +} + void i830InitVtbl(struct i830_context *i830) { @@ -898,4 +904,5 @@ i830InitVtbl(struct i830_context *i830) i830->intel.vtbl.finish_batch = intel_finish_vb; i830->intel.vtbl.invalidate_state = i830_invalidate_state; i830->intel.vtbl.render_target_supported = i830_render_target_supported; + i830->intel.vtbl.is_hiz_depth_format = i830_is_hiz_depth_format; } diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 6e1d7092237..d155b85ffca 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -175,10 +175,8 @@ src_vector(struct i915_fragment_program *p, case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: case PROGRAM_UNIFORM: - src = - i915_emit_param4fv(p, - program->Base.Parameters->ParameterValues[source-> - Index]); + src = i915_emit_param4fv(p, + &program->Base.Parameters->ParameterValues[source->Index][0].f); break; default: @@ -303,7 +301,7 @@ do { \ /* * TODO: consider moving this into core */ -static void calc_live_regs( struct i915_fragment_program *p ) +static bool calc_live_regs( struct i915_fragment_program *p ) { const struct gl_fragment_program *program = &p->FragProg; GLuint regsUsed = 0xffff0000; @@ -317,6 +315,9 @@ static void calc_live_regs( struct i915_fragment_program *p ) /* Register is written to: unmark as live for this and preceeding ops */ if (inst->DstReg.File == PROGRAM_TEMPORARY) { + if (inst->DstReg.Index > 16) + return false; + live_components[inst->DstReg.Index] &= ~inst->DstReg.WriteMask; if (live_components[inst->DstReg.Index] == 0) regsUsed &= ~(1 << inst->DstReg.Index); @@ -327,6 +328,9 @@ static void calc_live_regs( struct i915_fragment_program *p ) if (inst->SrcReg[a].File == PROGRAM_TEMPORARY) { unsigned c; + if (inst->SrcReg[a].Index > 16) + return false; + regsUsed |= 1 << inst->SrcReg[a].Index; for (c = 0; c < 4; c++) { @@ -340,6 +344,8 @@ static void calc_live_regs( struct i915_fragment_program *p ) p->usedRegs[i] = regsUsed; } + + return true; } static GLuint get_live_regs( struct i915_fragment_program *p, @@ -394,7 +400,10 @@ upload_program(struct i915_fragment_program *p) /* Not always needed: */ - calc_live_regs(p); + if (!calc_live_regs(p)) { + i915_program_error(p, "Could not allocate registers"); + return; + } while (1) { GLuint src0, src1, src2, flags; diff --git a/src/mesa/drivers/dri/i915/i915_program.c b/src/mesa/drivers/dri/i915/i915_program.c index ca1949b223e..0a600d30bef 100644 --- a/src/mesa/drivers/dri/i915/i915_program.c +++ b/src/mesa/drivers/dri/i915/i915_program.c @@ -442,14 +442,16 @@ i915_emit_param4fv(struct i915_fragment_program * p, const GLfloat * values) void i915_program_error(struct i915_fragment_program *p, const char *fmt, ...) { - va_list args; + if (unlikely((INTEL_DEBUG & (DEBUG_WM | DEBUG_FALLBACKS)) != 0)) { + va_list args; - fprintf(stderr, "i915_program_error: "); - va_start(args, fmt); - vfprintf(stderr, fmt, args); - va_end(args); + fprintf(stderr, "i915_program_error: "); + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); - fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + } p->error = 1; } diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 44f28cd9d15..d9c885da65b 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -124,7 +124,11 @@ CXX_SOURCES = \ brw_fs_reg_allocate.cpp \ brw_fs_schedule_instructions.cpp \ brw_fs_vector_splitting.cpp \ - brw_shader.cpp + brw_shader.cpp \ + brw_vec4.cpp \ + brw_vec4_emit.cpp \ + brw_vec4_reg_allocate.cpp \ + brw_vec4_visitor.cpp ASM_SOURCES = diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 471015cf9d0..df63fe1d52c 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -212,6 +212,7 @@ enum state_struct_type { AUB_TRACE_BINDING_TABLE = 0x101, AUB_TRACE_SURFACE_STATE = 0x102, AUB_TRACE_VS_CONSTANTS = 0x103, + AUB_TRACE_WM_CONSTANTS = 0x104, }; /** Subclass of Mesa vertex program */ @@ -247,6 +248,7 @@ enum param_conversion { PARAM_CONVERT_F2I, PARAM_CONVERT_F2U, PARAM_CONVERT_F2B, + PARAM_CONVERT_ZERO, }; /* Data about a particular attempt to compile a program. Note that @@ -310,12 +312,20 @@ struct brw_vs_prog_data { GLuint total_grf; GLbitfield64 outputs_written; GLuint nr_params; /**< number of float params/constants */ + GLuint total_scratch; GLuint inputs_read; /* Used for calculating urb partitions: */ GLuint urb_entry_size; + + const float *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */ + enum param_conversion param_convert[MAX_UNIFORMS * 4]; + const float *pull_param[MAX_UNIFORMS * 4]; + enum param_conversion pull_param_convert[MAX_UNIFORMS * 4]; + + bool uses_new_param_layout; }; @@ -528,7 +538,7 @@ struct brw_context * the CURBE, the depth buffer, and a query BO. */ drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + BRW_WM_MAX_SURF + 16]; - int validated_bo_count; + unsigned int validated_bo_count; } state; struct brw_cache cache; @@ -662,6 +672,7 @@ struct brw_context struct brw_vs_prog_data *prog_data; int8_t *constant_map; /* variable array following prog_data */ + drm_intel_bo *scratch_bo; drm_intel_bo *const_bo; /** Offset in the program cache to the VS program */ uint32_t prog_offset; @@ -674,6 +685,23 @@ struct brw_context uint32_t push_const_offset; /* Offset in the batchbuffer */ int push_const_size; /* in 256-bit register increments */ + + /** @{ register allocator */ + + struct ra_regs *regs; + + /** + * Array of the ra classes for the unaligned contiguous register + * block sizes used. + */ + int *classes; + + /** + * Mapping for register-allocated objects in *regs to the first + * GRF for that object. + */ + uint8_t *ra_reg_to_grf; + /** @} */ } vs; struct { @@ -726,7 +754,6 @@ struct brw_context GLuint render_surf; GLuint nr_surfaces; - GLuint max_threads; drm_intel_bo *scratch_bo; GLuint sampler_count; @@ -747,6 +774,29 @@ struct brw_context * Pre-gen6, push constants live in the CURBE. */ uint32_t push_const_offset; + + /** @{ register allocator */ + + struct ra_regs *regs; + + /** Array of the ra classes for the unaligned contiguous + * register block sizes used. + */ + int *classes; + + /** + * Mapping for register-allocated objects in *regs to the first + * GRF for that object. + */ + uint8_t *ra_reg_to_grf; + + /** + * ra class for the aligned pairs we use for PLN, which doesn't + * appear in *classes. + */ + int aligned_pairs_class; + + /** @} */ } wm; @@ -827,6 +877,10 @@ void brw_validate_textures( struct brw_context *brw ); */ void brwInitFragProgFuncs( struct dd_function_table *functions ); +int brw_get_scratch_size(int size); +void brw_get_scratch_bo(struct intel_context *intel, + drm_intel_bo **scratch_bo, int size); + /* brw_urb.c */ @@ -874,7 +928,7 @@ brw_fragment_program_const(const struct gl_fragment_program *p) } static inline -float convert_param(enum param_conversion conversion, float param) +float convert_param(enum param_conversion conversion, const float *param) { union { float f; @@ -884,21 +938,23 @@ float convert_param(enum param_conversion conversion, float param) switch (conversion) { case PARAM_NO_CONVERT: - return param; + return *param; case PARAM_CONVERT_F2I: - fi.i = param; + fi.i = *param; return fi.f; case PARAM_CONVERT_F2U: - fi.u = param; + fi.u = *param; return fi.f; case PARAM_CONVERT_F2B: - if (param != 0.0) + if (*param != 0.0) fi.i = 1; else fi.i = 0; return fi.f; + case PARAM_CONVERT_ZERO: + return 0.0; default: - return param; + return *param; } } diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index ae11c487a2c..960be10006e 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -203,7 +203,7 @@ static void prepare_constant_buffer(struct brw_context *brw) /* copy float constants */ for (i = 0; i < brw->wm.prog_data->nr_params; i++) { buf[offset + i] = convert_param(brw->wm.prog_data->param_convert[i], - *brw->wm.prog_data->param[i]); + brw->wm.prog_data->param[i]); } } @@ -244,15 +244,22 @@ static void prepare_constant_buffer(struct brw_context *brw) GLuint offset = brw->curbe.vs_start * 16; GLuint nr = brw->vs.prog_data->nr_params / 4; - /* Load the subset of push constants that will get used when - * we also have a pull constant buffer. - */ - for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { - if (brw->vs.constant_map[i] != -1) { - assert(brw->vs.constant_map[i] <= nr); - memcpy(buf + offset + brw->vs.constant_map[i] * 4, - vp->program.Base.Parameters->ParameterValues[i], - 4 * sizeof(float)); + if (brw->vs.prog_data->uses_new_param_layout) { + for (i = 0; i < brw->vs.prog_data->nr_params; i++) { + buf[offset + i] = convert_param(brw->vs.prog_data->param_convert[i], + brw->vs.prog_data->param[i]); + } + } else { + /* Load the subset of push constants that will get used when + * we also have a pull constant buffer. + */ + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { + if (brw->vs.constant_map[i] != -1) { + assert(brw->vs.constant_map[i] <= nr); + memcpy(buf + offset + brw->vs.constant_map[i] * 4, + vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + } } } } diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 0a3027d04ad..d1799c0ab4f 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -557,58 +557,93 @@ #define BRW_WE_ALL 1 /** @} */ -#define BRW_OPCODE_MOV 1 -#define BRW_OPCODE_SEL 2 -#define BRW_OPCODE_NOT 4 -#define BRW_OPCODE_AND 5 -#define BRW_OPCODE_OR 6 -#define BRW_OPCODE_XOR 7 -#define BRW_OPCODE_SHR 8 -#define BRW_OPCODE_SHL 9 -#define BRW_OPCODE_RSR 10 -#define BRW_OPCODE_RSL 11 -#define BRW_OPCODE_ASR 12 -#define BRW_OPCODE_CMP 16 -#define BRW_OPCODE_CMPN 17 -#define BRW_OPCODE_JMPI 32 -#define BRW_OPCODE_IF 34 -#define BRW_OPCODE_IFF 35 -#define BRW_OPCODE_ELSE 36 -#define BRW_OPCODE_ENDIF 37 -#define BRW_OPCODE_DO 38 -#define BRW_OPCODE_WHILE 39 -#define BRW_OPCODE_BREAK 40 -#define BRW_OPCODE_CONTINUE 41 -#define BRW_OPCODE_HALT 42 -#define BRW_OPCODE_MSAVE 44 -#define BRW_OPCODE_MRESTORE 45 -#define BRW_OPCODE_PUSH 46 -#define BRW_OPCODE_POP 47 -#define BRW_OPCODE_WAIT 48 -#define BRW_OPCODE_SEND 49 -#define BRW_OPCODE_SENDC 50 -#define BRW_OPCODE_MATH 56 -#define BRW_OPCODE_ADD 64 -#define BRW_OPCODE_MUL 65 -#define BRW_OPCODE_AVG 66 -#define BRW_OPCODE_FRC 67 -#define BRW_OPCODE_RNDU 68 -#define BRW_OPCODE_RNDD 69 -#define BRW_OPCODE_RNDE 70 -#define BRW_OPCODE_RNDZ 71 -#define BRW_OPCODE_MAC 72 -#define BRW_OPCODE_MACH 73 -#define BRW_OPCODE_LZD 74 -#define BRW_OPCODE_SAD2 80 -#define BRW_OPCODE_SADA2 81 -#define BRW_OPCODE_DP4 84 -#define BRW_OPCODE_DPH 85 -#define BRW_OPCODE_DP3 86 -#define BRW_OPCODE_DP2 87 -#define BRW_OPCODE_DPA2 88 -#define BRW_OPCODE_LINE 89 -#define BRW_OPCODE_PLN 90 -#define BRW_OPCODE_NOP 126 +enum opcode { + /* These are the actual hardware opcodes. */ + BRW_OPCODE_MOV = 1, + BRW_OPCODE_SEL = 2, + BRW_OPCODE_NOT = 4, + BRW_OPCODE_AND = 5, + BRW_OPCODE_OR = 6, + BRW_OPCODE_XOR = 7, + BRW_OPCODE_SHR = 8, + BRW_OPCODE_SHL = 9, + BRW_OPCODE_RSR = 10, + BRW_OPCODE_RSL = 11, + BRW_OPCODE_ASR = 12, + BRW_OPCODE_CMP = 16, + BRW_OPCODE_CMPN = 17, + BRW_OPCODE_JMPI = 32, + BRW_OPCODE_IF = 34, + BRW_OPCODE_IFF = 35, + BRW_OPCODE_ELSE = 36, + BRW_OPCODE_ENDIF = 37, + BRW_OPCODE_DO = 38, + BRW_OPCODE_WHILE = 39, + BRW_OPCODE_BREAK = 40, + BRW_OPCODE_CONTINUE = 41, + BRW_OPCODE_HALT = 42, + BRW_OPCODE_MSAVE = 44, + BRW_OPCODE_MRESTORE = 45, + BRW_OPCODE_PUSH = 46, + BRW_OPCODE_POP = 47, + BRW_OPCODE_WAIT = 48, + BRW_OPCODE_SEND = 49, + BRW_OPCODE_SENDC = 50, + BRW_OPCODE_MATH = 56, + BRW_OPCODE_ADD = 64, + BRW_OPCODE_MUL = 65, + BRW_OPCODE_AVG = 66, + BRW_OPCODE_FRC = 67, + BRW_OPCODE_RNDU = 68, + BRW_OPCODE_RNDD = 69, + BRW_OPCODE_RNDE = 70, + BRW_OPCODE_RNDZ = 71, + BRW_OPCODE_MAC = 72, + BRW_OPCODE_MACH = 73, + BRW_OPCODE_LZD = 74, + BRW_OPCODE_SAD2 = 80, + BRW_OPCODE_SADA2 = 81, + BRW_OPCODE_DP4 = 84, + BRW_OPCODE_DPH = 85, + BRW_OPCODE_DP3 = 86, + BRW_OPCODE_DP2 = 87, + BRW_OPCODE_DPA2 = 88, + BRW_OPCODE_LINE = 89, + BRW_OPCODE_PLN = 90, + BRW_OPCODE_NOP = 126, + + /* These are compiler backend opcodes that get translated into other + * instructions. + */ + FS_OPCODE_FB_WRITE = 128, + SHADER_OPCODE_RCP, + SHADER_OPCODE_RSQ, + SHADER_OPCODE_SQRT, + SHADER_OPCODE_EXP2, + SHADER_OPCODE_LOG2, + SHADER_OPCODE_POW, + SHADER_OPCODE_SIN, + SHADER_OPCODE_COS, + FS_OPCODE_DDX, + FS_OPCODE_DDY, + FS_OPCODE_PIXEL_X, + FS_OPCODE_PIXEL_Y, + FS_OPCODE_CINTERP, + FS_OPCODE_LINTERP, + FS_OPCODE_TEX, + FS_OPCODE_TXB, + FS_OPCODE_TXD, + FS_OPCODE_TXL, + FS_OPCODE_TXS, + FS_OPCODE_DISCARD, + FS_OPCODE_SPILL, + FS_OPCODE_UNSPILL, + FS_OPCODE_PULL_CONSTANT_LOAD, + + VS_OPCODE_URB_WRITE, + VS_OPCODE_SCRATCH_READ, + VS_OPCODE_SCRATCH_WRITE, +}; #define BRW_PREDICATE_NONE 0 #define BRW_PREDICATE_NORMAL 1 @@ -734,7 +769,6 @@ #define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0 #define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1 #define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 -#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2 #define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 #define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3 #define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 @@ -747,6 +781,7 @@ #define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS 4 #define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5 #define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE 6 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10 /* for GEN5 only */ #define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index af41c848308..927b0b4acc9 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -309,6 +309,35 @@ char *target_function[16] = { [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner" }; +char *target_function_gen6[16] = { + [BRW_MESSAGE_TARGET_NULL] = "null", + [BRW_MESSAGE_TARGET_MATH] = "math", + [BRW_MESSAGE_TARGET_SAMPLER] = "sampler", + [BRW_MESSAGE_TARGET_GATEWAY] = "gateway", + [GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE] = "sampler", + [GEN6_MESSAGE_TARGET_DP_RENDER_CACHE] = "render", + [GEN6_MESSAGE_TARGET_DP_CONST_CACHE] = "const", + [BRW_MESSAGE_TARGET_URB] = "urb", + [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner" +}; + +char *dp_rc_msg_type_gen6[16] = { + [BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ] = "OWORD block read", + [GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ] = "RT UNORM read", + [GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ] = "OWORD dual block read", + [GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ] = "media block read", + [GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ] = "OWORD unaligned block read", + [GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ] = "DWORD scattered read", + [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE] = "DWORD atomic write", + [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE] = "OWORD block write", + [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE] = "OWORD dual block write", + [GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE] = "media block write", + [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE] = "DWORD scattered write", + [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE] = "RT write", + [GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE] = "streamed VB write", + [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE] = "RT UNORMc write", +}; + char *math_function[16] = { [BRW_MATH_FUNCTION_INV] = "inv", [BRW_MATH_FUNCTION_LOG] = "log", @@ -927,8 +956,14 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) newline (file); pad (file, 16); space = 0; - err |= control (file, "target function", target_function, - target, &space); + + if (gen >= 6) { + err |= control (file, "target function", target_function_gen6, + target, &space); + } else { + err |= control (file, "target function", target_function, + target, &space); + } switch (target) { case BRW_MESSAGE_TARGET_MATH: @@ -985,9 +1020,16 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) inst->bits3.dp_read.msg_type); } break; + case BRW_MESSAGE_TARGET_DATAPORT_WRITE: if (gen >= 6) { - format (file, " (%d, %d, %d, %d, %d, %d)", + format (file, " ("); + + err |= control (file, "DP rc message type", + dp_rc_msg_type_gen6, + inst->bits3.gen6_dp.msg_type, &space); + + format (file, ", %d, %d, %d, %d, %d, %d)", inst->bits3.gen6_dp.binding_table_index, inst->bits3.gen6_dp.msg_control, inst->bits3.gen6_dp.msg_type, @@ -1003,6 +1045,7 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) inst->bits3.dp_write.send_commit_msg); } break; + case BRW_MESSAGE_TARGET_URB: if (gen >= 5) { format (file, " %d", inst->bits3.urb_gen5.offset); diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 56a46ced6e3..7bc69c612e3 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -689,17 +689,17 @@ static void brw_prepare_indices(struct brw_context *brw) * rebase it into a temporary. */ if ((get_size(index_buffer->type) - 1) & offset) { - GLubyte *map = ctx->Driver.MapBuffer(ctx, - GL_ELEMENT_ARRAY_BUFFER_ARB, - GL_DYNAMIC_DRAW_ARB, - bufferobj); - map += offset; + GLubyte *map = ctx->Driver.MapBufferRange(ctx, + offset, + ib_size, + GL_MAP_WRITE_BIT, + bufferobj); intel_upload_data(&brw->intel, map, ib_size, ib_type_size, &bo, &offset); brw->ib.start_vertex_offset = offset / ib_type_size; - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj); + ctx->Driver.UnmapBuffer(ctx, bufferobj); } else { /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading * the index buffer state when we're just moving the start index diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 72d50eadbce..af50305fc2b 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -44,6 +44,9 @@ #define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3) #define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3) #define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0) +#define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1) +#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2) +#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3) #define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) @@ -798,6 +801,12 @@ void brw_init_compile(struct brw_context *, struct brw_compile *p, void *mem_ctx); const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz ); +struct brw_instruction *brw_next_insn(struct brw_compile *p, GLuint opcode); +void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg dest); +void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg reg); + /* Helpers for regular instructions: */ @@ -852,6 +861,27 @@ ROUND(RNDE) /* Helpers for SEND instruction: */ +void brw_set_dp_read_message(struct brw_compile *p, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint msg_control, + GLuint msg_type, + GLuint target_cache, + GLuint msg_length, + GLuint response_length); + +void brw_set_dp_write_message(struct brw_compile *p, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint msg_control, + GLuint msg_type, + GLuint msg_length, + GLboolean header_present, + GLuint pixel_scoreboard_clear, + GLuint response_length, + GLuint end_of_thread, + GLuint send_commit_msg); + void brw_urb_WRITE(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index e7370f36064..c5013de7ec1 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -89,9 +89,9 @@ gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg) } -static void brw_set_dest(struct brw_compile *p, - struct brw_instruction *insn, - struct brw_reg dest) +void +brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg dest) { if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && dest.file != BRW_MESSAGE_REGISTER_FILE) @@ -221,9 +221,9 @@ validate_reg(struct brw_instruction *insn, struct brw_reg reg) /* 10. Check destination issues. */ } -static void brw_set_src0(struct brw_compile *p, - struct brw_instruction *insn, - struct brw_reg reg) +void +brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg reg) { if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) assert(reg.nr < 128); @@ -504,17 +504,18 @@ static void brw_set_urb_message( struct brw_compile *p, } } -static void brw_set_dp_write_message( struct brw_compile *p, - struct brw_instruction *insn, - GLuint binding_table_index, - GLuint msg_control, - GLuint msg_type, - GLuint msg_length, - GLboolean header_present, - GLuint pixel_scoreboard_clear, - GLuint response_length, - GLuint end_of_thread, - GLuint send_commit_msg) +void +brw_set_dp_write_message(struct brw_compile *p, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint msg_control, + GLuint msg_type, + GLuint msg_length, + GLboolean header_present, + GLuint pixel_scoreboard_clear, + GLuint response_length, + GLuint end_of_thread, + GLuint send_commit_msg) { struct brw_context *brw = p->brw; struct intel_context *intel = &brw->intel; @@ -570,7 +571,7 @@ static void brw_set_dp_write_message( struct brw_compile *p, } } -static void +void brw_set_dp_read_message(struct brw_compile *p, struct brw_instruction *insn, GLuint binding_table_index, @@ -709,9 +710,9 @@ static void brw_set_sampler_message(struct brw_compile *p, } - -static struct brw_instruction *next_insn( struct brw_compile *p, - GLuint opcode ) +#define next_insn brw_next_insn +struct brw_instruction * +brw_next_insn(struct brw_compile *p, GLuint opcode) { struct brw_instruction *insn; @@ -732,7 +733,6 @@ static struct brw_instruction *next_insn( struct brw_compile *p, return insn; } - static struct brw_instruction *brw_alu1( struct brw_compile *p, GLuint opcode, struct brw_reg dest, @@ -1341,8 +1341,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, brw_set_src1(p, insn, brw_imm_ud(0)); insn->bits3.break_cont.jip = br * (do_insn - insn); - insn->header.execution_size = do_insn->header.execution_size; - assert(insn->header.execution_size == BRW_EXECUTE_8); + insn->header.execution_size = BRW_EXECUTE_8; } else if (intel->gen == 6) { insn = next_insn(p, BRW_OPCODE_WHILE); @@ -1351,8 +1350,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - insn->header.execution_size = do_insn->header.execution_size; - assert(insn->header.execution_size == BRW_EXECUTE_8); + insn->header.execution_size = BRW_EXECUTE_8; } else { if (p->single_program_flow) { insn = next_insn(p, BRW_OPCODE_ADD); @@ -2246,10 +2244,13 @@ void brw_urb_WRITE(struct brw_compile *p, if (intel->gen == 7) { /* Enable Channel Masks in the URB_WRITE_HWORD message header */ + brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5), BRW_REGISTER_TYPE_UD), retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD), brw_imm_ud(0xff00)); + brw_pop_insn_state(p); } insn = next_insn(p, BRW_OPCODE_SEND); @@ -2311,7 +2312,7 @@ brw_find_loop_end(struct brw_compile *p, int start) if (insn->header.opcode == BRW_OPCODE_WHILE) { int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count : insn->bits3.break_cont.jip; - if (ip + jip / br < start) + if (ip + jip / br <= start) return ip; } } diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index b5ea943387d..0b0445ea142 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -143,20 +143,21 @@ fs_visitor::implied_mrf_writes(fs_inst *inst) return 0; switch (inst->opcode) { - case FS_OPCODE_RCP: - case FS_OPCODE_RSQ: - case FS_OPCODE_SQRT: - case FS_OPCODE_EXP2: - case FS_OPCODE_LOG2: - case FS_OPCODE_SIN: - case FS_OPCODE_COS: + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: return 1 * c->dispatch_width / 8; - case FS_OPCODE_POW: + case SHADER_OPCODE_POW: return 2 * c->dispatch_width / 8; case FS_OPCODE_TEX: case FS_OPCODE_TXB: case FS_OPCODE_TXD: case FS_OPCODE_TXL: + case FS_OPCODE_TXS: return 1; case FS_OPCODE_FB_WRITE: return 2; @@ -181,29 +182,26 @@ fs_visitor::virtual_grf_alloc(int size) virtual_grf_array_size *= 2; virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, virtual_grf_array_size); - - /* This slot is always unused. */ - virtual_grf_sizes[0] = 0; } virtual_grf_sizes[virtual_grf_next] = size; return virtual_grf_next++; } /** Fixed HW reg constructor. */ -fs_reg::fs_reg(enum register_file file, int hw_reg) +fs_reg::fs_reg(enum register_file file, int reg) { init(); this->file = file; - this->hw_reg = hw_reg; + this->reg = reg; this->type = BRW_REGISTER_TYPE_F; } /** Fixed HW reg constructor. */ -fs_reg::fs_reg(enum register_file file, int hw_reg, uint32_t type) +fs_reg::fs_reg(enum register_file file, int reg, uint32_t type) { init(); this->file = file; - this->hw_reg = hw_reg; + this->reg = reg; this->type = type; } @@ -242,11 +240,12 @@ import_uniforms_callback(const void *key, * This brings in those uniform definitions */ void -fs_visitor::import_uniforms(struct hash_table *src_variable_ht) +fs_visitor::import_uniforms(fs_visitor *v) { - hash_table_call_foreach(src_variable_ht, + hash_table_call_foreach(v->variable_ht, import_uniforms_callback, variable_ht); + this->params_remap = v->params_remap; } /* Our support for uniforms is piggy-backed on the struct @@ -281,23 +280,27 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type) assert(param < ARRAY_SIZE(c->prog_data.param)); - switch (type->base_type) { - case GLSL_TYPE_FLOAT: + if (ctx->Const.NativeIntegers) { c->prog_data.param_convert[param] = PARAM_NO_CONVERT; - break; - case GLSL_TYPE_UINT: - c->prog_data.param_convert[param] = PARAM_CONVERT_F2U; - break; - case GLSL_TYPE_INT: - c->prog_data.param_convert[param] = PARAM_CONVERT_F2I; - break; - case GLSL_TYPE_BOOL: - c->prog_data.param_convert[param] = PARAM_CONVERT_F2B; - break; - default: - assert(!"not reached"); - c->prog_data.param_convert[param] = PARAM_NO_CONVERT; - break; + } else { + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + c->prog_data.param_convert[param] = PARAM_NO_CONVERT; + break; + case GLSL_TYPE_UINT: + c->prog_data.param_convert[param] = PARAM_CONVERT_F2U; + break; + case GLSL_TYPE_INT: + c->prog_data.param_convert[param] = PARAM_CONVERT_F2I; + break; + case GLSL_TYPE_BOOL: + c->prog_data.param_convert[param] = PARAM_CONVERT_F2B; + break; + default: + assert(!"not reached"); + c->prog_data.param_convert[param] = PARAM_NO_CONVERT; + break; + } } this->param_index[param] = loc; this->param_offset[param] = i; @@ -463,9 +466,21 @@ fs_visitor::emit_general_interpolation(ir_variable *ir) } else { /* Perspective interpolation case. */ for (unsigned int k = 0; k < type->vector_elements; k++) { - struct brw_reg interp = interp_reg(location, k); - emit(FS_OPCODE_LINTERP, attr, - this->delta_x, this->delta_y, fs_reg(interp)); + /* FINISHME: At some point we probably want to push + * this farther by giving similar treatment to the + * other potentially constant components of the + * attribute, as well as making brw_vs_constval.c + * handle varyings other than gl_TexCoord. + */ + if (location >= FRAG_ATTRIB_TEX0 && + location <= FRAG_ATTRIB_TEX7 && + k == 3 && !(c->key.proj_attrib_mask & (1 << location))) { + emit(BRW_OPCODE_MOV, attr, fs_reg(1.0f)); + } else { + struct brw_reg interp = interp_reg(location, k); + emit(FS_OPCODE_LINTERP, attr, + this->delta_x, this->delta_y, fs_reg(interp)); + } attr.reg_offset++; } @@ -512,16 +527,16 @@ fs_visitor::emit_frontfacing_interpolation(ir_variable *ir) } fs_inst * -fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) +fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src) { switch (opcode) { - case FS_OPCODE_RCP: - case FS_OPCODE_RSQ: - case FS_OPCODE_SQRT: - case FS_OPCODE_EXP2: - case FS_OPCODE_LOG2: - case FS_OPCODE_SIN: - case FS_OPCODE_COS: + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: break; default: assert(!"not reached: bad math opcode"); @@ -555,12 +570,12 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) } fs_inst * -fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1) +fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) { int base_mrf = 2; fs_inst *inst; - assert(opcode == FS_OPCODE_POW); + assert(opcode == SHADER_OPCODE_POW); if (intel->gen >= 6) { /* Can't do hstride == 0 args to gen6 math, so expand it out. @@ -605,7 +620,7 @@ fs_visitor::setup_paramvalues_refs() /* Set up the pointers to ParamValues now that that array is finalized. */ for (unsigned int i = 0; i < c->prog_data.nr_params; i++) { c->prog_data.param[i] = - fp->Base.Parameters->ParameterValues[this->param_index[i]] + + (const float *)fp->Base.Parameters->ParameterValues[this->param_index[i]] + this->param_offset[i]; } } @@ -621,12 +636,12 @@ fs_visitor::assign_curb_setup() } /* Map the offsets in the UNIFORM file to fixed HW regs. */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == UNIFORM) { - int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; + int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; struct brw_reg brw_reg = brw_vec1_grf(c->nr_payload_regs + constant_nr / 8, constant_nr % 8); @@ -684,8 +699,8 @@ fs_visitor::assign_urb_setup() /* Offset all the urb_setup[] index by the actual position of the * setup regs, now that the location of the constants has been chosen. */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->opcode == FS_OPCODE_LINTERP) { assert(inst->src[2].file == FIXED_HW_REG); @@ -739,8 +754,8 @@ fs_visitor::split_virtual_grfs() split_grf[this->delta_x.reg] = false; } - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; /* Texturing produces 4 contiguous registers, so no splitting. */ if (inst->is_tex()) { @@ -763,8 +778,8 @@ fs_visitor::split_virtual_grfs() } } - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->dst.file == GRF && split_grf[inst->dst.reg] && @@ -786,6 +801,86 @@ fs_visitor::split_virtual_grfs() this->live_intervals_valid = false; } +bool +fs_visitor::remove_dead_constants() +{ + if (c->dispatch_width == 8) { + this->params_remap = ralloc_array(mem_ctx, int, c->prog_data.nr_params); + + for (unsigned int i = 0; i < c->prog_data.nr_params; i++) + this->params_remap[i] = -1; + + /* Find which params are still in use. */ + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; + + for (int i = 0; i < 3; i++) { + int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; + + if (inst->src[i].file != UNIFORM) + continue; + + assert(constant_nr < (int)c->prog_data.nr_params); + + /* For now, set this to non-negative. We'll give it the + * actual new number in a moment, in order to keep the + * register numbers nicely ordered. + */ + this->params_remap[constant_nr] = 0; + } + } + + /* Figure out what the new numbers for the params will be. At some + * point when we're doing uniform array access, we're going to want + * to keep the distinction between .reg and .reg_offset, but for + * now we don't care. + */ + unsigned int new_nr_params = 0; + for (unsigned int i = 0; i < c->prog_data.nr_params; i++) { + if (this->params_remap[i] != -1) { + this->params_remap[i] = new_nr_params++; + } + } + + /* Update the list of params to be uploaded to match our new numbering. */ + for (unsigned int i = 0; i < c->prog_data.nr_params; i++) { + int remapped = this->params_remap[i]; + + if (remapped == -1) + continue; + + /* We've already done setup_paramvalues_refs() so no need to worry + * about param_index and param_offset. + */ + c->prog_data.param[remapped] = c->prog_data.param[i]; + c->prog_data.param_convert[remapped] = c->prog_data.param_convert[i]; + } + + c->prog_data.nr_params = new_nr_params; + } else { + /* This should have been generated in the 8-wide pass already. */ + assert(this->params_remap); + } + + /* Now do the renumbering of the shader to remove unused params. */ + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; + + for (int i = 0; i < 3; i++) { + int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; + + if (inst->src[i].file != UNIFORM) + continue; + + assert(this->params_remap[constant_nr] != -1); + inst->src[i].reg = this->params_remap[constant_nr]; + inst->src[i].reg_offset = 0; + } + } + + return true; +} + /** * Choose accesses from the UNIFORM file to demote to using the pull * constant buffer. @@ -815,14 +910,14 @@ fs_visitor::setup_pull_constants() int pull_uniform_base = max_uniform_components; int pull_uniform_count = c->prog_data.nr_params - pull_uniform_base; - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; for (int i = 0; i < 3; i++) { if (inst->src[i].file != UNIFORM) continue; - int uniform_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; + int uniform_nr = inst->src[i].reg + inst->src[i].reg_offset; if (uniform_nr < pull_uniform_base) continue; @@ -871,8 +966,8 @@ fs_visitor::calculate_live_intervals() } int ip = 0; - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->opcode == BRW_OPCODE_DO) { if (loop_depth++ == 0) @@ -892,7 +987,7 @@ fs_visitor::calculate_live_intervals() } } else { for (unsigned int i = 0; i < 3; i++) { - if (inst->src[i].file == GRF && inst->src[i].reg != 0) { + if (inst->src[i].file == GRF) { int reg = inst->src[i].reg; if (!loop_depth) { @@ -908,7 +1003,7 @@ fs_visitor::calculate_live_intervals() } } } - if (inst->dst.file == GRF && inst->dst.reg != 0) { + if (inst->dst.file == GRF) { int reg = inst->dst.reg; if (!loop_depth) { @@ -945,8 +1040,8 @@ fs_visitor::propagate_constants() calculate_live_intervals(); - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->opcode != BRW_OPCODE_MOV || inst->predicated || @@ -965,11 +1060,9 @@ fs_visitor::propagate_constants() /* Found a move of a constant to a GRF. Find anything else using the GRF * before it's written, and replace it with the constant if we can. */ - exec_list_iterator scan_iter = iter; - scan_iter.next(); - for (; scan_iter.has_next(); scan_iter.next()) { - fs_inst *scan_inst = (fs_inst *)scan_iter.get(); - + for (fs_inst *scan_inst = (fs_inst *)inst->next; + !scan_inst->is_tail_sentinel(); + scan_inst = (fs_inst *)scan_inst->next) { if (scan_inst->opcode == BRW_OPCODE_DO || scan_inst->opcode == BRW_OPCODE_WHILE || scan_inst->opcode == BRW_OPCODE_ELSE || @@ -1046,6 +1139,24 @@ fs_visitor::propagate_constants() progress = true; } break; + + case SHADER_OPCODE_RCP: + /* The hardware doesn't do math on immediate values + * (because why are you doing that, seriously?), but + * the correct answer is to just constant fold it + * anyway. + */ + assert(i == 0); + if (inst->src[0].imm.f != 0.0f) { + scan_inst->opcode = BRW_OPCODE_MOV; + scan_inst->src[0] = inst->src[0]; + scan_inst->src[0].imm.f = 1.0f / scan_inst->src[0].imm.f; + progress = true; + } + break; + + default: + break; } } @@ -1063,6 +1174,49 @@ fs_visitor::propagate_constants() return progress; } + + +/** + * Attempts to move immediate constants into the immediate + * constant slot of following instructions. + * + * Immediate constants are a bit tricky -- they have to be in the last + * operand slot, you can't do abs/negate on them, + */ + +bool +fs_visitor::opt_algebraic() +{ + bool progress = false; + + calculate_live_intervals(); + + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; + + switch (inst->opcode) { + case BRW_OPCODE_MUL: + if (inst->src[1].file != IMM) + continue; + + /* a * 1.0 = a */ + if (inst->src[1].type == BRW_REGISTER_TYPE_F && + inst->src[1].imm.f == 1.0) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[1] = reg_undef; + progress = true; + break; + } + + break; + default: + break; + } + } + + return progress; +} + /** * Must be called after calculate_live_intervales() to remove unused * writes to registers -- register allocation will fail otherwise @@ -1077,8 +1231,8 @@ fs_visitor::dead_code_eliminate() calculate_live_intervals(); - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list_safe(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) { inst->remove(); @@ -1101,8 +1255,8 @@ fs_visitor::register_coalesce() int if_depth = 0; int loop_depth = 0; - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list_safe(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; /* Make sure that we dominate the instructions we're going to * scan for interfering with our coalescing, or we won't have @@ -1123,6 +1277,8 @@ fs_visitor::register_coalesce() case BRW_OPCODE_ENDIF: if_depth--; break; + default: + break; } if (loop_depth || if_depth) continue; @@ -1130,7 +1286,8 @@ fs_visitor::register_coalesce() if (inst->opcode != BRW_OPCODE_MOV || inst->predicated || inst->saturate || - inst->dst.file != GRF || inst->src[0].file != GRF || + inst->dst.file != GRF || (inst->src[0].file != GRF && + inst->src[0].file != UNIFORM)|| inst->dst.type != inst->src[0].type) continue; @@ -1141,11 +1298,10 @@ fs_visitor::register_coalesce() * program. */ bool interfered = false; - exec_list_iterator scan_iter = iter; - scan_iter.next(); - for (; scan_iter.has_next(); scan_iter.next()) { - fs_inst *scan_inst = (fs_inst *)scan_iter.get(); + for (fs_inst *scan_inst = (fs_inst *)inst->next; + !scan_inst->is_tail_sentinel(); + scan_inst = (fs_inst *)scan_inst->next) { if (scan_inst->dst.file == GRF) { if (scan_inst->dst.reg == inst->dst.reg && (scan_inst->dst.reg_offset == inst->dst.reg_offset || @@ -1153,7 +1309,8 @@ fs_visitor::register_coalesce() interfered = true; break; } - if (scan_inst->dst.reg == inst->src[0].reg && + if (inst->src[0].file == GRF && + scan_inst->dst.reg == inst->src[0].reg && (scan_inst->dst.reg_offset == inst->src[0].reg_offset || scan_inst->is_tex())) { interfered = true; @@ -1161,10 +1318,13 @@ fs_visitor::register_coalesce() } } - /* The gen6 MATH instruction can't handle source modifiers, so avoid - * coalescing those for now. We should do something more specific. + /* The gen6 MATH instruction can't handle source modifiers or + * unusual register regions, so avoid coalescing those for + * now. We should do something more specific. */ - if (intel->gen >= 6 && scan_inst->is_math() && has_source_modifiers) { + if (intel->gen >= 6 && + scan_inst->is_math() && + (has_source_modifiers || inst->src[0].file == UNIFORM)) { interfered = true; break; } @@ -1176,19 +1336,17 @@ fs_visitor::register_coalesce() /* Rewrite the later usage to point at the source of the move to * be removed. */ - for (exec_list_iterator scan_iter = iter; scan_iter.has_next(); - scan_iter.next()) { - fs_inst *scan_inst = (fs_inst *)scan_iter.get(); - + for (fs_inst *scan_inst = inst; + !scan_inst->is_tail_sentinel(); + scan_inst = (fs_inst *)scan_inst->next) { for (int i = 0; i < 3; i++) { if (scan_inst->src[i].file == GRF && scan_inst->src[i].reg == inst->dst.reg && scan_inst->src[i].reg_offset == inst->dst.reg_offset) { - scan_inst->src[i].reg = inst->src[0].reg; - scan_inst->src[i].reg_offset = inst->src[0].reg_offset; - scan_inst->src[i].abs |= inst->src[0].abs; - scan_inst->src[i].negate ^= inst->src[0].negate; - scan_inst->src[i].smear = inst->src[0].smear; + fs_reg new_src = inst->src[0]; + new_src.negate ^= scan_inst->src[i].negate; + new_src.abs |= scan_inst->src[i].abs; + scan_inst->src[i] = new_src; } } } @@ -1212,8 +1370,8 @@ fs_visitor::compute_to_mrf() calculate_live_intervals(); - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list_safe(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; int ip = next_ip; next_ip++; @@ -1228,9 +1386,9 @@ fs_visitor::compute_to_mrf() /* Work out which hardware MRF registers are written by this * instruction. */ - int mrf_low = inst->dst.hw_reg & ~BRW_MRF_COMPR4; + int mrf_low = inst->dst.reg & ~BRW_MRF_COMPR4; int mrf_high; - if (inst->dst.hw_reg & BRW_MRF_COMPR4) { + if (inst->dst.reg & BRW_MRF_COMPR4) { mrf_high = mrf_low + 4; } else if (c->dispatch_width == 16 && (!inst->force_uncompressed && !inst->force_sechalf)) { @@ -1297,7 +1455,7 @@ fs_visitor::compute_to_mrf() if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) { /* Found the creator of our MRF's source value. */ scan_inst->dst.file = MRF; - scan_inst->dst.hw_reg = inst->dst.hw_reg; + scan_inst->dst.reg = inst->dst.reg; scan_inst->saturate |= inst->saturate; inst->remove(); progress = true; @@ -1334,10 +1492,10 @@ fs_visitor::compute_to_mrf() /* If somebody else writes our MRF here, we can't * compute-to-MRF before that. */ - int scan_mrf_low = scan_inst->dst.hw_reg & ~BRW_MRF_COMPR4; + int scan_mrf_low = scan_inst->dst.reg & ~BRW_MRF_COMPR4; int scan_mrf_high; - if (scan_inst->dst.hw_reg & BRW_MRF_COMPR4) { + if (scan_inst->dst.reg & BRW_MRF_COMPR4) { scan_mrf_high = scan_mrf_low + 4; } else if (c->dispatch_width == 16 && (!scan_inst->force_uncompressed && @@ -1392,8 +1550,8 @@ fs_visitor::remove_duplicate_mrf_writes() memset(last_mrf_move, 0, sizeof(last_mrf_move)); - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list_safe(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; switch (inst->opcode) { case BRW_OPCODE_DO: @@ -1409,7 +1567,7 @@ fs_visitor::remove_duplicate_mrf_writes() if (inst->opcode == BRW_OPCODE_MOV && inst->dst.file == MRF) { - fs_inst *prev_inst = last_mrf_move[inst->dst.hw_reg]; + fs_inst *prev_inst = last_mrf_move[inst->dst.reg]; if (prev_inst && inst->equals(prev_inst)) { inst->remove(); progress = true; @@ -1419,7 +1577,7 @@ fs_visitor::remove_duplicate_mrf_writes() /* Clear out the last-write records for MRFs that were overwritten. */ if (inst->dst.file == MRF) { - last_mrf_move[inst->dst.hw_reg] = NULL; + last_mrf_move[inst->dst.reg] = NULL; } if (inst->mlen > 0) { @@ -1445,7 +1603,7 @@ fs_visitor::remove_duplicate_mrf_writes() inst->dst.file == MRF && inst->src[0].file == GRF && !inst->predicated) { - last_mrf_move[inst->dst.hw_reg] = inst; + last_mrf_move[inst->dst.reg] = inst; } } @@ -1527,8 +1685,8 @@ fs_visitor::run() /* Generate FS IR for main(). (the visitor only descends into * functions called "main"). */ - foreach_iter(exec_list_iterator, iter, *shader->ir) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &*shader->ir) { + ir_instruction *ir = (ir_instruction *)node; base_ir = ir; this->result = reg_undef; ir->accept(this); @@ -1550,11 +1708,14 @@ fs_visitor::run() progress = remove_duplicate_mrf_writes() || progress; progress = propagate_constants() || progress; + progress = opt_algebraic() || progress; progress = register_coalesce() || progress; progress = compute_to_mrf() || progress; progress = dead_code_eliminate() || progress; } while (progress); + remove_dead_constants(); + schedule_instructions(); assign_curb_setup(); @@ -1563,7 +1724,7 @@ fs_visitor::run() if (0) { /* Debug of register spilling: Go spill everything. */ int virtual_grf_count = virtual_grf_next; - for (int i = 1; i < virtual_grf_count; i++) { + for (int i = 0; i < virtual_grf_count; i++) { spill_reg(i); } } @@ -1625,7 +1786,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c, fs_visitor v(c, prog, shader); if (!v.run()) { prog->LinkStatus = GL_FALSE; - prog->InfoLog = ralloc_strdup(prog, v.fail_msg); + ralloc_strcat(&prog->InfoLog, v.fail_msg); return false; } @@ -1633,7 +1794,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c, if (intel->gen >= 5 && c->prog_data.nr_pull_params == 0) { c->dispatch_width = 16; fs_visitor v2(c, prog, shader); - v2.import_uniforms(v.variable_ht); + v2.import_uniforms(&v); v2.run(); } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 2bf850e5dea..10f45f30fe9 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -25,6 +25,8 @@ * */ +#include "brw_shader.h" + extern "C" { #include <sys/types.h> @@ -51,37 +53,10 @@ enum register_file { MRF = BRW_MESSAGE_REGISTER_FILE, IMM = BRW_IMMEDIATE_VALUE, FIXED_HW_REG, /* a struct brw_reg */ - UNIFORM, /* prog_data->params[hw_reg] */ + UNIFORM, /* prog_data->params[reg] */ BAD_FILE }; -enum fs_opcodes { - FS_OPCODE_FB_WRITE = 256, - FS_OPCODE_RCP, - FS_OPCODE_RSQ, - FS_OPCODE_SQRT, - FS_OPCODE_EXP2, - FS_OPCODE_LOG2, - FS_OPCODE_POW, - FS_OPCODE_SIN, - FS_OPCODE_COS, - FS_OPCODE_DDX, - FS_OPCODE_DDY, - FS_OPCODE_PIXEL_X, - FS_OPCODE_PIXEL_Y, - FS_OPCODE_CINTERP, - FS_OPCODE_LINTERP, - FS_OPCODE_TEX, - FS_OPCODE_TXB, - FS_OPCODE_TXD, - FS_OPCODE_TXL, - FS_OPCODE_DISCARD, - FS_OPCODE_SPILL, - FS_OPCODE_UNSPILL, - FS_OPCODE_PULL_CONSTANT_LOAD, -}; - - class fs_reg { public: /* Callers of this ralloc-based new need not call delete. It's @@ -99,7 +74,6 @@ public: void init() { memset(this, 0, sizeof(*this)); - this->hw_reg = -1; this->smear = -1; } @@ -146,8 +120,8 @@ public: this->type = fixed_hw_reg.type; } - fs_reg(enum register_file file, int hw_reg); - fs_reg(enum register_file file, int hw_reg, uint32_t type); + fs_reg(enum register_file file, int reg); + fs_reg(enum register_file file, int reg, uint32_t type); fs_reg(class fs_visitor *v, const struct glsl_type *type); bool equals(fs_reg *r) @@ -155,7 +129,6 @@ public: return (file == r->file && reg == r->reg && reg_offset == r->reg_offset && - hw_reg == r->hw_reg && type == r->type && negate == r->negate && abs == r->abs && @@ -167,12 +140,17 @@ public: /** Register file: ARF, GRF, MRF, IMM. */ enum register_file file; - /** virtual register number. 0 = fixed hw reg */ + /** + * Register number. For ARF/MRF, it's the hardware register. For + * GRF, it's a virtual register number until register allocation + */ int reg; - /** Offset within the virtual register. */ + /** + * For virtual registers, this is a hardware register offset from + * the start of the register block (for example, a constant index + * in an array access). + */ int reg_offset; - /** HW register number. Generally unset until register allocation. */ - int hw_reg; /** Register type. BRW_REGISTER_TYPE_* */ int type; bool negate; @@ -224,13 +202,13 @@ public: init(); } - fs_inst(int opcode) + fs_inst(enum opcode opcode) { init(); this->opcode = opcode; } - fs_inst(int opcode, fs_reg dst) + fs_inst(enum opcode opcode, fs_reg dst) { init(); this->opcode = opcode; @@ -240,7 +218,7 @@ public: assert(dst.reg_offset >= 0); } - fs_inst(int opcode, fs_reg dst, fs_reg src0) + fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0) { init(); this->opcode = opcode; @@ -253,7 +231,7 @@ public: assert(src[0].reg_offset >= 0); } - fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) + fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) { init(); this->opcode = opcode; @@ -269,7 +247,7 @@ public: assert(src[1].reg_offset >= 0); } - fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) + fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) { init(); this->opcode = opcode; @@ -313,22 +291,23 @@ public: return (opcode == FS_OPCODE_TEX || opcode == FS_OPCODE_TXB || opcode == FS_OPCODE_TXD || - opcode == FS_OPCODE_TXL); + opcode == FS_OPCODE_TXL || + opcode == FS_OPCODE_TXS); } bool is_math() { - return (opcode == FS_OPCODE_RCP || - opcode == FS_OPCODE_RSQ || - opcode == FS_OPCODE_SQRT || - opcode == FS_OPCODE_EXP2 || - opcode == FS_OPCODE_LOG2 || - opcode == FS_OPCODE_SIN || - opcode == FS_OPCODE_COS || - opcode == FS_OPCODE_POW); + return (opcode == SHADER_OPCODE_RCP || + opcode == SHADER_OPCODE_RSQ || + opcode == SHADER_OPCODE_SQRT || + opcode == SHADER_OPCODE_EXP2 || + opcode == SHADER_OPCODE_LOG2 || + opcode == SHADER_OPCODE_SIN || + opcode == SHADER_OPCODE_COS || + opcode == SHADER_OPCODE_POW); } - int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ + enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ fs_reg dst; fs_reg src[3]; bool saturate; @@ -402,7 +381,7 @@ public: this->base_ir = NULL; this->virtual_grf_sizes = NULL; - this->virtual_grf_next = 1; + this->virtual_grf_next = 0; this->virtual_grf_array_size = 0; this->virtual_grf_def = NULL; this->virtual_grf_use = NULL; @@ -421,7 +400,7 @@ public: fs_reg *variable_storage(ir_variable *var); int virtual_grf_alloc(int size); - void import_uniforms(struct hash_table *src_variable_ht); + void import_uniforms(fs_visitor *v); void visit(ir_variable *ir); void visit(ir_assignment *ir); @@ -445,27 +424,28 @@ public: fs_inst *emit(fs_inst inst); - fs_inst *emit(int opcode) + fs_inst *emit(enum opcode opcode) { return emit(fs_inst(opcode)); } - fs_inst *emit(int opcode, fs_reg dst) + fs_inst *emit(enum opcode opcode, fs_reg dst) { return emit(fs_inst(opcode, dst)); } - fs_inst *emit(int opcode, fs_reg dst, fs_reg src0) + fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0) { return emit(fs_inst(opcode, dst, src0)); } - fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) + fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) { return emit(fs_inst(opcode, dst, src0, src1)); } - fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) + fs_inst *emit(enum opcode opcode, fs_reg dst, + fs_reg src0, fs_reg src1, fs_reg src2) { return emit(fs_inst(opcode, dst, src0, src1, src2)); } @@ -485,9 +465,11 @@ public: void setup_pull_constants(); void calculate_live_intervals(); bool propagate_constants(); + bool opt_algebraic(); bool register_coalesce(); bool compute_to_mrf(); bool dead_code_eliminate(); + bool remove_dead_constants(); bool remove_duplicate_mrf_writes(); bool virtual_grf_interferes(int a, int b); void schedule_instructions(); @@ -524,8 +506,8 @@ public: int sampler); fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, int sampler); - fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0); - fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0, fs_reg src1); + fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0); + fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1); bool try_emit_saturate(ir_expression *ir); void emit_bool_to_cond_code(ir_rvalue *condition); void emit_if_gen6(ir_if *ir); @@ -565,6 +547,13 @@ public: int *virtual_grf_use; bool live_intervals_valid; + /* This is the map from UNIFORM hw_reg + reg_offset as generated by + * the visitor to the packed uniform number after + * remove_dead_constants() that represents the actual uploaded + * uniform index. + */ + int *params_remap; + struct hash_table *variable_ht; ir_variable *frag_color, *frag_data, *frag_depth; int first_non_payload_grf; diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 1d89b8f1d11..28efbd3605f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -59,7 +59,8 @@ fs_visitor::generate_fb_write(fs_inst *inst) if (inst->target > 0) { /* Set the render target index for choosing BLEND_STATE. */ - brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2), + brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, + inst->base_mrf, 2), BRW_REGISTER_TYPE_UD), brw_imm_ud(inst->target)); } @@ -145,43 +146,12 @@ void fs_visitor::generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src) { - int op; - - switch (inst->opcode) { - case FS_OPCODE_RCP: - op = BRW_MATH_FUNCTION_INV; - break; - case FS_OPCODE_RSQ: - op = BRW_MATH_FUNCTION_RSQ; - break; - case FS_OPCODE_SQRT: - op = BRW_MATH_FUNCTION_SQRT; - break; - case FS_OPCODE_EXP2: - op = BRW_MATH_FUNCTION_EXP; - break; - case FS_OPCODE_LOG2: - op = BRW_MATH_FUNCTION_LOG; - break; - case FS_OPCODE_POW: - op = BRW_MATH_FUNCTION_POW; - break; - case FS_OPCODE_SIN: - op = BRW_MATH_FUNCTION_SIN; - break; - case FS_OPCODE_COS: - op = BRW_MATH_FUNCTION_COS; - break; - default: - assert(!"not reached: unknown math function"); - op = 0; - break; - } + int op = brw_math_function(inst->opcode); if (intel->gen >= 6) { assert(inst->mlen == 0); - if (inst->opcode == FS_OPCODE_POW) { + if (inst->opcode == SHADER_OPCODE_POW) { brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math2(p, dst, op, src[0], src[1]); @@ -272,10 +242,16 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD; } break; + case FS_OPCODE_TXS: + msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO; + break; case FS_OPCODE_TXD: /* There is no sample_d_c message; comparisons are done manually */ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; break; + default: + assert(!"not reached"); + break; } } else { switch (inst->opcode) { @@ -316,6 +292,14 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) assert(inst->mlen == 7 || inst->mlen == 10); msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS; break; + case FS_OPCODE_TXS: + assert(inst->mlen == 3); + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_RESINFO; + simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; + break; + default: + assert(!"not reached"); + break; } } assert(msg_type != -1); @@ -537,11 +521,9 @@ brw_reg_from_fs_reg(fs_reg *reg) case ARF: case MRF: if (reg->smear == -1) { - brw_reg = brw_vec8_reg(reg->file, - reg->hw_reg, 0); + brw_reg = brw_vec8_reg(reg->file, reg->reg, 0); } else { - brw_reg = brw_vec1_reg(reg->file, - reg->hw_reg, reg->smear); + brw_reg = brw_vec1_reg(reg->file, reg->reg, reg->smear); } brw_reg = retype(brw_reg, reg->type); if (reg->sechalf) @@ -608,8 +590,8 @@ fs_visitor::generate_code() prog->Name, c->dispatch_width); } - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; struct brw_reg src[3], dst; if (unlikely(INTEL_DEBUG & DEBUG_WM)) { @@ -656,6 +638,11 @@ fs_visitor::generate_code() case BRW_OPCODE_MUL: brw_MUL(p, dst, src[0], src[1]); break; + case BRW_OPCODE_MACH: + brw_set_acc_write_control(p, 1); + brw_MACH(p, dst, src[0], src[1]); + brw_set_acc_write_control(p, 0); + break; case BRW_OPCODE_FRC: brw_FRC(p, dst, src[0]); @@ -770,14 +757,14 @@ fs_visitor::generate_code() } break; - case FS_OPCODE_RCP: - case FS_OPCODE_RSQ: - case FS_OPCODE_SQRT: - case FS_OPCODE_EXP2: - case FS_OPCODE_LOG2: - case FS_OPCODE_POW: - case FS_OPCODE_SIN: - case FS_OPCODE_COS: + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_POW: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: generate_math(inst, dst, src); break; case FS_OPCODE_PIXEL_X: @@ -796,6 +783,7 @@ fs_visitor::generate_code() case FS_OPCODE_TXB: case FS_OPCODE_TXD: case FS_OPCODE_TXL: + case FS_OPCODE_TXS: generate_tex(inst, dst, src[0]); break; case FS_OPCODE_DISCARD: diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index b4689d2c293..7c5414ac26c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -25,23 +25,6 @@ * */ -extern "C" { - -#include <sys/types.h> - -#include "main/macros.h" -#include "main/shaderobj.h" -#include "main/uniforms.h" -#include "program/prog_parameter.h" -#include "program/prog_print.h" -#include "program/prog_optimize.h" -#include "program/register_allocate.h" -#include "program/sampler.h" -#include "program/hash_table.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_wm.h" -} #include "brw_fs.h" #include "../glsl/glsl_types.h" #include "../glsl/ir_optimization.h" @@ -50,45 +33,115 @@ extern "C" { static void assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width) { - if (reg->file == GRF && reg->reg != 0) { + if (reg->file == GRF) { assert(reg->reg_offset >= 0); - reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width; - reg->reg = 0; + reg->reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width; + reg->reg_offset = 0; } } void fs_visitor::assign_regs_trivial() { - int last_grf = 0; - int hw_reg_mapping[this->virtual_grf_next]; + int hw_reg_mapping[this->virtual_grf_next + 1]; int i; int reg_width = c->dispatch_width / 8; - hw_reg_mapping[0] = 0; /* Note that compressed instructions require alignment to 2 registers. */ - hw_reg_mapping[1] = ALIGN(this->first_non_payload_grf, reg_width); - for (i = 2; i < this->virtual_grf_next; i++) { + hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width); + for (i = 1; i <= this->virtual_grf_next; i++) { hw_reg_mapping[i] = (hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1] * reg_width); } - last_grf = hw_reg_mapping[i - 1] + (this->virtual_grf_sizes[i - 1] * - reg_width); + this->grf_used = hw_reg_mapping[this->virtual_grf_next]; - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; assign_reg(hw_reg_mapping, &inst->dst, reg_width); assign_reg(hw_reg_mapping, &inst->src[0], reg_width); assign_reg(hw_reg_mapping, &inst->src[1], reg_width); } - if (last_grf >= BRW_MAX_GRF) { + if (this->grf_used >= BRW_MAX_GRF) { fail("Ran out of regs on trivial allocator (%d/%d)\n", - last_grf, BRW_MAX_GRF); + this->grf_used, BRW_MAX_GRF); + } + +} + +static void +brw_alloc_reg_set_for_classes(struct brw_context *brw, + int *class_sizes, + int class_count, + int reg_width, + int base_reg_count) +{ + struct intel_context *intel = &brw->intel; + + /* Compute the total number of registers across all classes. */ + int ra_reg_count = 0; + for (int i = 0; i < class_count; i++) { + ra_reg_count += base_reg_count - (class_sizes[i] - 1); + } + + ralloc_free(brw->wm.ra_reg_to_grf); + brw->wm.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count); + ralloc_free(brw->wm.regs); + brw->wm.regs = ra_alloc_reg_set(ra_reg_count); + ralloc_free(brw->wm.classes); + brw->wm.classes = ralloc_array(brw, int, class_count + 1); + + brw->wm.aligned_pairs_class = -1; + + /* Now, add the registers to their classes, and add the conflicts + * between them and the base GRF registers (and also each other). + */ + int reg = 0; + int pairs_base_reg = 0; + int pairs_reg_count = 0; + for (int i = 0; i < class_count; i++) { + int class_reg_count = base_reg_count - (class_sizes[i] - 1); + brw->wm.classes[i] = ra_alloc_reg_class(brw->wm.regs); + + /* Save this off for the aligned pair class at the end. */ + if (class_sizes[i] == 2) { + pairs_base_reg = reg; + pairs_reg_count = class_reg_count; + } + + for (int j = 0; j < class_reg_count; j++) { + ra_class_add_reg(brw->wm.regs, brw->wm.classes[i], reg); + + brw->wm.ra_reg_to_grf[reg] = j; + + for (int base_reg = j; + base_reg < j + class_sizes[i]; + base_reg++) { + ra_add_transitive_reg_conflict(brw->wm.regs, base_reg, reg); + } + + reg++; + } + } + assert(reg == ra_reg_count); + + /* Add a special class for aligned pairs, which we'll put delta_x/y + * in on gen5 so that we can do PLN. + */ + if (brw->has_pln && reg_width == 1 && intel->gen < 6) { + brw->wm.aligned_pairs_class = ra_alloc_reg_class(brw->wm.regs); + + for (int i = 0; i < pairs_reg_count; i++) { + if ((brw->wm.ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) { + ra_class_add_reg(brw->wm.regs, brw->wm.aligned_pairs_class, + pairs_base_reg + i); + } + } + class_count++; } - this->grf_used = last_grf + reg_width; + ra_set_finalize(brw->wm.regs); } bool @@ -101,12 +154,11 @@ fs_visitor::assign_regs() * for reg_width == 2. */ int reg_width = c->dispatch_width / 8; - int hw_reg_mapping[this->virtual_grf_next + 1]; + int hw_reg_mapping[this->virtual_grf_next]; int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width); int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width; int class_sizes[base_reg_count]; int class_count = 0; - int aligned_pair_class = -1; calculate_live_intervals(); @@ -125,7 +177,7 @@ fs_visitor::assign_regs() */ class_sizes[class_count++] = 2; } - for (int r = 1; r < this->virtual_grf_next; r++) { + for (int r = 0; r < this->virtual_grf_next; r++) { int i; for (i = 0; i < class_count; i++) { @@ -141,94 +193,26 @@ fs_visitor::assign_regs() } } - int ra_reg_count = 0; - int class_base_reg[class_count]; - int class_reg_count[class_count]; - int classes[class_count + 1]; - - for (int i = 0; i < class_count; i++) { - class_base_reg[i] = ra_reg_count; - class_reg_count[i] = base_reg_count - (class_sizes[i] - 1); - ra_reg_count += class_reg_count[i]; - } - - struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count); - for (int i = 0; i < class_count; i++) { - classes[i] = ra_alloc_reg_class(regs); - - for (int i_r = 0; i_r < class_reg_count[i]; i_r++) { - ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r); - } + brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, + reg_width, base_reg_count); - /* Add conflicts between our contiguous registers aliasing - * base regs and other register classes' contiguous registers - * that alias base regs, or the base regs themselves for classes[0]. - */ - for (int c = 0; c <= i; c++) { - for (int i_r = 0; i_r < class_reg_count[i]; i_r++) { - for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1)); - c_r < MIN2(class_reg_count[c], i_r + class_sizes[i]); - c_r++) { - - if (0) { - printf("%d/%d conflicts %d/%d\n", - class_sizes[i], first_assigned_grf + i_r, - class_sizes[c], first_assigned_grf + c_r); - } - - ra_add_reg_conflict(regs, - class_base_reg[i] + i_r, - class_base_reg[c] + c_r); - } - } - } - } - - /* Add a special class for aligned pairs, which we'll put delta_x/y - * in on gen5 so that we can do PLN. - */ - if (brw->has_pln && reg_width == 1 && intel->gen < 6) { - int reg_count = (base_reg_count - 1) / 2; - int unaligned_pair_class = 1; - assert(class_sizes[unaligned_pair_class] == 2); - - aligned_pair_class = class_count; - classes[aligned_pair_class] = ra_alloc_reg_class(regs); - class_sizes[aligned_pair_class] = 2; - class_base_reg[aligned_pair_class] = 0; - class_reg_count[aligned_pair_class] = 0; - int start = (first_assigned_grf & 1) ? 1 : 0; - - for (int i = 0; i < reg_count; i++) { - ra_class_add_reg(regs, classes[aligned_pair_class], - class_base_reg[unaligned_pair_class] + i * 2 + start); - } - class_count++; - } - - ra_set_finalize(regs); - - struct ra_graph *g = ra_alloc_interference_graph(regs, + struct ra_graph *g = ra_alloc_interference_graph(brw->wm.regs, this->virtual_grf_next); - /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1 - * with nodes. - */ - ra_set_node_class(g, 0, classes[0]); - for (int i = 1; i < this->virtual_grf_next; i++) { + for (int i = 0; i < this->virtual_grf_next; i++) { for (int c = 0; c < class_count; c++) { if (class_sizes[c] == this->virtual_grf_sizes[i]) { - if (aligned_pair_class >= 0 && + if (brw->wm.aligned_pairs_class >= 0 && this->delta_x.reg == i) { - ra_set_node_class(g, i, classes[aligned_pair_class]); + ra_set_node_class(g, i, brw->wm.aligned_pairs_class); } else { - ra_set_node_class(g, i, classes[c]); + ra_set_node_class(g, i, brw->wm.classes[c]); } break; } } - for (int j = 1; j < i; j++) { + for (int j = 0; j < i; j++) { if (virtual_grf_interferes(i, j)) { ra_add_node_interference(g, i, j); } @@ -253,7 +237,6 @@ fs_visitor::assign_regs() ralloc_free(g); - ralloc_free(regs); return false; } @@ -263,28 +246,18 @@ fs_visitor::assign_regs() * numbers. */ this->grf_used = first_assigned_grf; - hw_reg_mapping[0] = 0; /* unused */ - for (int i = 1; i < this->virtual_grf_next; i++) { + for (int i = 0; i < this->virtual_grf_next; i++) { int reg = ra_get_node_reg(g, i); - int hw_reg = -1; - - for (int c = 0; c < class_count; c++) { - if (reg >= class_base_reg[c] && - reg < class_base_reg[c] + class_reg_count[c]) { - hw_reg = reg - class_base_reg[c]; - break; - } - } - assert(hw_reg >= 0); - hw_reg_mapping[i] = first_assigned_grf + hw_reg * reg_width; + hw_reg_mapping[i] = (first_assigned_grf + + brw->wm.ra_reg_to_grf[reg] * reg_width); this->grf_used = MAX2(this->grf_used, hw_reg_mapping[i] + this->virtual_grf_sizes[i] * reg_width); } - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; assign_reg(hw_reg_mapping, &inst->dst, reg_width); assign_reg(hw_reg_mapping, &inst->src[0], reg_width); @@ -292,7 +265,6 @@ fs_visitor::assign_regs() } ralloc_free(g); - ralloc_free(regs); return true; } @@ -336,8 +308,8 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) * spill/unspill we'll have to do, and guess that the insides of * loops run 10 times. */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == GRF) { @@ -370,6 +342,9 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) if (inst->dst.file == GRF) no_spill[inst->dst.reg] = true; break; + + default: + break; } } @@ -394,8 +369,8 @@ fs_visitor::spill_reg(int spill_reg) * virtual grf of the same size. For most instructions, though, we * could just spill/unspill the GRF being accessed. */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == GRF && diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp index d8218c26edb..0ea4e5c36f0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -25,21 +25,6 @@ * */ -extern "C" { - -#include <sys/types.h> - -#include "main/macros.h" -#include "main/shaderobj.h" -#include "main/uniforms.h" -#include "program/prog_optimize.h" -#include "program/register_allocate.h" -#include "program/sampler.h" -#include "program/hash_table.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_wm.h" -} #include "brw_fs.h" #include "../glsl/glsl_types.h" #include "../glsl/ir_optimization.h" @@ -84,26 +69,26 @@ public: int math_latency = 22; switch (inst->opcode) { - case FS_OPCODE_RCP: + case SHADER_OPCODE_RCP: this->latency = 1 * chans * math_latency; break; - case FS_OPCODE_RSQ: + case SHADER_OPCODE_RSQ: this->latency = 2 * chans * math_latency; break; - case FS_OPCODE_SQRT: - case FS_OPCODE_LOG2: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_LOG2: /* full precision log. partial is 2. */ this->latency = 3 * chans * math_latency; break; - case FS_OPCODE_EXP2: + case SHADER_OPCODE_EXP2: /* full precision. partial is 3, same throughput. */ this->latency = 4 * chans * math_latency; break; - case FS_OPCODE_POW: + case SHADER_OPCODE_POW: this->latency = 8 * chans * math_latency; break; - case FS_OPCODE_SIN: - case FS_OPCODE_COS: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: /* minimum latency, max is 12 rounds. */ this->latency = 5 * chans * math_latency; break; @@ -283,8 +268,8 @@ instruction_scheduler::calculate_deps() memset(last_mrf_write, 0, sizeof(last_mrf_write)); /* top-to-bottom dependencies: RAW and WAW. */ - foreach_iter(exec_list_iterator, iter, instructions) { - schedule_node *n = (schedule_node *)iter.get(); + foreach_list(node, &instructions) { + schedule_node *n = (schedule_node *)node; fs_inst *inst = n->inst; /* read-after-write deps. */ @@ -321,12 +306,12 @@ instruction_scheduler::calculate_deps() add_dep(last_grf_write[inst->dst.reg], n); last_grf_write[inst->dst.reg] = n; } else if (inst->dst.file == MRF) { - int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4; + int reg = inst->dst.reg & ~BRW_MRF_COMPR4; add_dep(last_mrf_write[reg], n); last_mrf_write[reg] = n; if (is_compressed(inst)) { - if (inst->dst.hw_reg & BRW_MRF_COMPR4) + if (inst->dst.reg & BRW_MRF_COMPR4) reg += 4; else reg++; @@ -401,12 +386,12 @@ instruction_scheduler::calculate_deps() if (inst->dst.file == GRF) { last_grf_write[inst->dst.reg] = n; } else if (inst->dst.file == MRF) { - int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4; + int reg = inst->dst.reg & ~BRW_MRF_COMPR4; last_mrf_write[reg] = n; if (is_compressed(inst)) { - if (inst->dst.hw_reg & BRW_MRF_COMPR4) + if (inst->dst.reg & BRW_MRF_COMPR4) reg += 4; else reg++; @@ -437,8 +422,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header) int time = 0; /* Remove non-DAG heads from the list. */ - foreach_iter(exec_list_iterator, iter, instructions) { - schedule_node *n = (schedule_node *)iter.get(); + foreach_list_safe(node, &instructions) { + schedule_node *n = (schedule_node *)node; if (n->parent_count != 0) n->remove(); } @@ -447,8 +432,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header) schedule_node *chosen = NULL; int chosen_time = 0; - foreach_iter(exec_list_iterator, iter, instructions) { - schedule_node *n = (schedule_node *)iter.get(); + foreach_list(node, &instructions) { + schedule_node *n = (schedule_node *)node; if (!chosen || n->unblocked_time < chosen_time) { chosen = n; @@ -490,8 +475,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header) * progress until the first is done. */ if (chosen->inst->is_math()) { - foreach_iter(exec_list_iterator, iter, instructions) { - schedule_node *n = (schedule_node *)iter.get(); + foreach_list(node, &instructions) { + schedule_node *n = (schedule_node *)node; if (n->inst->is_math()) n->unblocked_time = MAX2(n->unblocked_time, diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp index 530ffa26580..a9a60c2fd8a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp @@ -122,8 +122,8 @@ ir_vector_reference_visitor::get_variable_entry(ir_variable *var) break; } - foreach_iter(exec_list_iterator, iter, this->variable_list) { - variable_entry *entry = (variable_entry *)iter.get(); + foreach_list(node, &this->variable_list) { + variable_entry *entry = (variable_entry *)node; if (entry->var == var) return entry; } @@ -222,8 +222,8 @@ ir_vector_splitting_visitor::get_splitting_entry(ir_variable *var) if (!var->type->is_vector()) return NULL; - foreach_iter(exec_list_iterator, iter, *this->variable_list) { - variable_entry *entry = (variable_entry *)iter.get(); + foreach_list(node, &*this->variable_list) { + variable_entry *entry = (variable_entry *)node; if (entry->var == var) { return entry; } @@ -341,8 +341,8 @@ brw_do_vector_splitting(exec_list *instructions) visit_list_elements(&refs, instructions); /* Trim out variables we can't split. */ - foreach_iter(exec_list_iterator, iter, refs.variable_list) { - variable_entry *entry = (variable_entry *)iter.get(); + foreach_list_safe(node, &refs.variable_list) { + variable_entry *entry = (variable_entry *)node; if (debug) { printf("vector %s@%p: decl %d, whole_access %d\n", @@ -363,8 +363,8 @@ brw_do_vector_splitting(exec_list *instructions) /* Replace the decls of the vectors to be split with their split * components. */ - foreach_iter(exec_list_iterator, iter, refs.variable_list) { - variable_entry *entry = (variable_entry *)iter.get(); + foreach_list(node, &refs.variable_list) { + variable_entry *entry = (variable_entry *)node; const struct glsl_type *type; type = glsl_type::get_instance(entry->var->type->base_type, 1, 1); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index cbe5cf428c5..cdaf543c88b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -142,9 +142,7 @@ fs_visitor::visit(ir_dereference_array *ir) this->result.type = brw_type_for_base_type(ir->type); if (index) { - assert(this->result.file == UNIFORM || - (this->result.file == GRF && - this->result.reg != 0)); + assert(this->result.file == UNIFORM || this->result.file == GRF); this->result.reg_offset += index->value.i[0] * element_size; } else { assert(!"FINISHME: non-constant array element"); @@ -252,14 +250,14 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_rcp: - emit_math(FS_OPCODE_RCP, this->result, op[0]); + emit_math(SHADER_OPCODE_RCP, this->result, op[0]); break; case ir_unop_exp2: - emit_math(FS_OPCODE_EXP2, this->result, op[0]); + emit_math(SHADER_OPCODE_EXP2, this->result, op[0]); break; case ir_unop_log2: - emit_math(FS_OPCODE_LOG2, this->result, op[0]); + emit_math(SHADER_OPCODE_LOG2, this->result, op[0]); break; case ir_unop_exp: case ir_unop_log: @@ -267,11 +265,11 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_sin: case ir_unop_sin_reduced: - emit_math(FS_OPCODE_SIN, this->result, op[0]); + emit_math(SHADER_OPCODE_SIN, this->result, op[0]); break; case ir_unop_cos: case ir_unop_cos_reduced: - emit_math(FS_OPCODE_COS, this->result, op[0]); + emit_math(SHADER_OPCODE_COS, this->result, op[0]); break; case ir_unop_dFdx: @@ -289,7 +287,23 @@ fs_visitor::visit(ir_expression *ir) break; case ir_binop_mul: - emit(BRW_OPCODE_MUL, this->result, op[0], op[1]); + if (ir->type->is_integer()) { + /* For integer multiplication, the MUL uses the low 16 bits + * of one of the operands (src0 on gen6, src1 on gen7). The + * MACH accumulates in the contribution of the upper 16 bits + * of that operand. + * + * FINISHME: Emit just the MUL if we know an operand is small + * enough. + */ + struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D); + + emit(BRW_OPCODE_MUL, acc, op[0], op[1]); + emit(BRW_OPCODE_MACH, reg_null_d, op[0], op[1]); + emit(BRW_OPCODE_MOV, this->result, fs_reg(acc)); + } else { + emit(BRW_OPCODE_MUL, this->result, op[0], op[1]); + } break; case ir_binop_div: assert(!"not reached: should be handled by ir_div_to_mul_rcp"); @@ -342,11 +356,11 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_sqrt: - emit_math(FS_OPCODE_SQRT, this->result, op[0]); + emit_math(SHADER_OPCODE_SQRT, this->result, op[0]); break; case ir_unop_rsq: - emit_math(FS_OPCODE_RSQ, this->result, op[0]); + emit_math(SHADER_OPCODE_RSQ, this->result, op[0]); break; case ir_unop_i2u: @@ -425,7 +439,7 @@ fs_visitor::visit(ir_expression *ir) break; case ir_binop_pow: - emit_math(FS_OPCODE_POW, this->result, op[0], op[1]); + emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]); break; case ir_unop_bit_not: @@ -496,7 +510,7 @@ fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, void fs_visitor::visit(ir_assignment *ir) { - struct fs_reg l, r; + fs_reg l, r; fs_inst *inst; /* FINISHME: arrays on the lhs */ @@ -603,9 +617,11 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ mlen += 3; } else if (ir->op == ir_txd) { + this->result = reg_undef; ir->lod_info.grad.dPdx->accept(this); fs_reg dPdx = this->result; + this->result = reg_undef; ir->lod_info.grad.dPdy->accept(this); fs_reg dPdy = this->result; @@ -620,6 +636,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, * dPdx = dudx, dvdx, drdx * dPdy = dudy, dvdy, drdy * + * 1-arg: Does not exist. + * * 2-arg: dudx dvdx dudy dvdy * dPdx.x dPdx.y dPdy.x dPdy.y * m4 m5 m6 m7 @@ -631,18 +649,26 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) { emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx); dPdx.reg_offset++; - mlen++; } + mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2); for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) { emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy); dPdy.reg_offset++; - mlen++; } + mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2); + } else if (ir->op == ir_txs) { + /* There's no SIMD8 resinfo message on Gen4. Use SIMD16 instead. */ + simd16 = true; + this->result = reg_undef; + ir->lod_info.lod->accept(this); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result); + mlen += 2; } else { /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod * instructions. We'll need to do SIMD16 here. */ + simd16 = true; assert(ir->op == ir_txb || ir->op == ir_txl); for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { @@ -671,16 +697,19 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, /* The unused upper half. */ mlen++; + } + if (simd16) { /* Now, since we're doing simd16, the return is 2 interleaved * vec4s where the odd-indexed ones are junk. We'll need to move * this weirdness around to the expected layout. */ - simd16 = true; orig_dst = dst; - dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, - 2)); - dst.type = BRW_REGISTER_TYPE_F; + const glsl_type *vec_type = + glsl_type::get_instance(ir->type->base_type, 4, 1); + dst = fs_reg(this, glsl_type::get_array_instance(vec_type, 2)); + dst.type = intel->is_g4x ? brw_type_for_base_type(ir->type) + : BRW_REGISTER_TYPE_F; } fs_inst *inst = NULL; @@ -697,6 +726,9 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, case ir_txd: inst = emit(FS_OPCODE_TXD, dst); break; + case ir_txs: + inst = emit(FS_OPCODE_TXS, dst); + break; case ir_txf: assert(!"GLSL 1.30 features unsupported"); break; @@ -732,6 +764,8 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, int base_mrf = 2; int reg_width = c->dispatch_width / 8; bool header_present = false; + const int vector_elements = + ir->coordinate ? ir->coordinate->type->vector_elements : 0; if (ir->offset) { /* The offsets set up by the ir_texture visitor are in the @@ -742,7 +776,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, base_mrf--; } - for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { + for (int i = 0; i < vector_elements; i++) { fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * reg_width), coordinate); @@ -750,7 +784,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, inst->saturate = true; coordinate.reg_offset++; } - mlen += ir->coordinate->type->vector_elements * reg_width; + mlen += vector_elements * reg_width; if (ir->shadow_comparitor && ir->op != ir_txd) { mlen = MAX2(mlen, header_present + 4 * reg_width); @@ -786,9 +820,11 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, inst = emit(FS_OPCODE_TXL, dst); break; case ir_txd: { + this->result = reg_undef; ir->lod_info.grad.dPdx->accept(this); fs_reg dPdx = this->result; + this->result = reg_undef; ir->lod_info.grad.dPdy->accept(this); fs_reg dPdy = this->result; @@ -816,6 +852,13 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, inst = emit(FS_OPCODE_TXD, dst); break; } + case ir_txs: + this->result = reg_undef; + ir->lod_info.lod->accept(this); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result); + mlen += reg_width; + inst = emit(FS_OPCODE_TXS, dst); + break; case ir_txf: assert(!"GLSL 1.30 features unsupported"); break; @@ -850,6 +893,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, } if (ir->shadow_comparitor && ir->op != ir_txd) { + this->result = reg_undef; ir->shadow_comparitor->accept(this); emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen += reg_width; @@ -860,11 +904,13 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, case ir_tex: break; case ir_txb: + this->result = reg_undef; ir->lod_info.bias->accept(this); emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen += reg_width; break; case ir_txl: + this->result = reg_undef; ir->lod_info.lod->accept(this); emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen += reg_width; @@ -873,9 +919,11 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, if (c->dispatch_width == 16) fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode."); + this->result = reg_undef; ir->lod_info.grad.dPdx->accept(this); fs_reg dPdx = this->result; + this->result = reg_undef; ir->lod_info.grad.dPdy->accept(this); fs_reg dPdy = this->result; @@ -900,13 +948,19 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, } break; } + case ir_txs: + this->result = reg_undef; + ir->lod_info.lod->accept(this); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result); + mlen += reg_width; + break; case ir_txf: assert(!"GLSL 1.30 features unsupported"); break; } /* Set up the coordinate (except for TXD where it was done earlier) */ - if (ir->op != ir_txd) { + if (ir->op != ir_txd && ir->op != ir_txs) { for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate); @@ -924,7 +978,8 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break; case ir_txl: inst = emit(FS_OPCODE_TXL, dst); break; case ir_txd: inst = emit(FS_OPCODE_TXD, dst); break; - case ir_txf: assert(!"TXF unsupported."); + case ir_txf: assert(!"TXF unsupported."); break; + case ir_txs: inst = emit(FS_OPCODE_TXS, dst); break; } inst->base_mrf = base_mrf; inst->mlen = mlen; @@ -959,7 +1014,8 @@ fs_visitor::visit(ir_texture *ir) } this->result = reg_undef; - ir->coordinate->accept(this); + if (ir->coordinate) + ir->coordinate->accept(this); fs_reg coordinate = this->result; if (ir->offset != NULL) { @@ -1000,7 +1056,8 @@ fs_visitor::visit(ir_texture *ir) * texture coordinates. We use the program parameter state * tracking to get the scaling factor. */ - if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) { + if (intel->gen < 6 && + ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) { struct gl_program_parameter_list *params = c->fp->program.Base.Parameters; int tokens[STATE_LENGTH] = { STATE_INTERNAL, @@ -1046,7 +1103,7 @@ fs_visitor::visit(ir_texture *ir) /* Writemasking doesn't eliminate channels on SIMD8 texture * samples, so don't worry about them. */ - fs_reg dst = fs_reg(this, glsl_type::vec4_type); + fs_reg dst = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 4, 1)); if (intel->gen >= 7) { inst = emit_texture_gen7(ir, dst, coordinate, sampler); @@ -1070,6 +1127,7 @@ fs_visitor::visit(ir_texture *ir) if (hw_compare_supported) { inst->shadow_compare = true; } else { + this->result = reg_undef; ir->shadow_comparitor->accept(this); fs_reg ref = this->result; @@ -1465,8 +1523,8 @@ fs_visitor::visit(ir_if *ir) inst->predicated = true; } - foreach_iter(exec_list_iterator, iter, ir->then_instructions) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &ir->then_instructions) { + ir_instruction *ir = (ir_instruction *)node; this->base_ir = ir; this->result = reg_undef; ir->accept(this); @@ -1475,8 +1533,8 @@ fs_visitor::visit(ir_if *ir) if (!ir->else_instructions.is_empty()) { emit(BRW_OPCODE_ELSE); - foreach_iter(exec_list_iterator, iter, ir->else_instructions) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &ir->else_instructions) { + ir_instruction *ir = (ir_instruction *)node; this->base_ir = ir; this->result = reg_undef; ir->accept(this); @@ -1526,8 +1584,8 @@ fs_visitor::visit(ir_loop *ir) inst->predicated = true; } - foreach_iter(exec_list_iterator, iter, ir->body_instructions) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &ir->body_instructions) { + ir_instruction *ir = (ir_instruction *)node; this->base_ir = ir; this->result = reg_undef; @@ -1583,8 +1641,8 @@ fs_visitor::visit(ir_function *ir) assert(sig); - foreach_iter(exec_list_iterator, iter, sig->body) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &sig->body) { + ir_instruction *ir = (ir_instruction *)node; this->base_ir = ir; this->result = reg_undef; ir->accept(this); @@ -1684,7 +1742,7 @@ fs_visitor::emit_interpolation_setup_gen4() interp_reg(FRAG_ATTRIB_WPOS, 3)); /* Compute the pixel 1/W value from wpos.w. */ this->pixel_w = fs_reg(this, glsl_type::float_type); - emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); + emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w); this->current_annotation = NULL; } @@ -1721,7 +1779,7 @@ fs_visitor::emit_interpolation_setup_gen6() this->current_annotation = "compute pos.w"; this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); this->wpos_w = fs_reg(this, glsl_type::float_type); - emit_math(FS_OPCODE_RCP, this->wpos_w, this->pixel_w); + emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w); this->delta_x = fs_reg(brw_vec8_grf(2, 0)); this->delta_y = fs_reg(brw_vec8_grf(3, 0)); @@ -1733,6 +1791,7 @@ void fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color) { int reg_width = c->dispatch_width / 8; + fs_inst *inst; if (c->dispatch_width == 8 || intel->gen == 6) { /* SIMD8 write looks like: @@ -1751,8 +1810,10 @@ fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color) * m + 6: a0 * m + 7: a1 */ - emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index * reg_width), - color); + inst = emit(BRW_OPCODE_MOV, + fs_reg(MRF, first_color_mrf + index * reg_width), + color); + inst->saturate = c->key.clamp_fragment_color; } else { /* pre-gen6 SIMD16 single source DP write looks like: * m + 0: r0 @@ -1770,16 +1831,22 @@ fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color) * usual destination + 1 for the second half we get * destination + 4. */ - emit(BRW_OPCODE_MOV, - fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index), color); + inst = emit(BRW_OPCODE_MOV, + fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index), + color); + inst->saturate = c->key.clamp_fragment_color; } else { push_force_uncompressed(); - emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), color); + inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), + color); + inst->saturate = c->key.clamp_fragment_color; pop_force_uncompressed(); push_force_sechalf(); color.sechalf = true; - emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), color); + inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), + color); + inst->saturate = c->key.clamp_fragment_color; pop_force_sechalf(); color.sechalf = false; } diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 03cebbb824b..f7e6e7c81d1 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -46,7 +46,7 @@ static void upload_drawing_rect(struct brw_context *brw) struct gl_context *ctx = &intel->ctx; BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE); + OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); OUT_BATCH(0); /* xmin, ymin */ OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) | ((ctx->DrawBuffer->Height - 1) << 16)); diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 6674f1640c8..09b5be4c96e 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -226,6 +226,34 @@ static GLboolean brwProgramStringNotify( struct gl_context *ctx, return GL_TRUE; } +/* Per-thread scratch space is a power-of-two multiple of 1KB. */ +int +brw_get_scratch_size(int size) +{ + int i; + + for (i = 1024; i < size; i *= 2) + ; + + return i; +} + +void +brw_get_scratch_bo(struct intel_context *intel, + drm_intel_bo **scratch_bo, int size) +{ + drm_intel_bo *old_bo = *scratch_bo; + + if (old_bo && old_bo->size < size) { + drm_intel_bo_unreference(old_bo); + old_bo = NULL; + } + + if (!old_bo) { + *scratch_bo = drm_intel_bo_alloc(intel->bufmgr, "scratch bo", size, 4096); + } +} + void brwInitFragProgFuncs( struct dd_function_table *functions ) { assert(functions->ProgramStringNotify == _tnl_program_string); diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 9471883fb2b..3ff6bbaed47 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -24,6 +24,7 @@ extern "C" { #include "main/macros.h" #include "brw_context.h" +#include "brw_vs.h" } #include "brw_fs.h" #include "../glsl/ir_optimization.h" @@ -67,6 +68,9 @@ brw_shader_precompile(struct gl_context *ctx, struct gl_shader_program *prog) if (!brw_fs_precompile(ctx, prog)) return false; + if (!brw_vs_precompile(ctx, prog)) + return false; + return true; } @@ -75,10 +79,15 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) { struct brw_context *brw = brw_context(ctx); struct intel_context *intel = &brw->intel; + unsigned int stage; + + for (stage = 0; stage < ARRAY_SIZE(prog->_LinkedShaders); stage++) { + struct brw_shader *shader = + (struct brw_shader *)prog->_LinkedShaders[stage]; + + if (!shader) + continue; - struct brw_shader *shader = - (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; - if (shader != NULL) { void *mem_ctx = ralloc_context(NULL); bool progress; @@ -106,18 +115,22 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) brw_do_cubemap_normalize(shader->ir); lower_noise(shader->ir); lower_quadop_vector(shader->ir, false); + + bool input = true; + bool output = stage == MESA_SHADER_FRAGMENT; + bool temp = stage == MESA_SHADER_FRAGMENT; + bool uniform = true; + lower_variable_index_to_cond_assign(shader->ir, - GL_TRUE, /* input */ - GL_TRUE, /* output */ - GL_TRUE, /* temp */ - GL_TRUE /* uniform */ - ); + input, output, temp, uniform); do { progress = false; - brw_do_channel_expressions(shader->ir); - brw_do_vector_splitting(shader->ir); + if (stage == MESA_SHADER_FRAGMENT) { + brw_do_channel_expressions(shader->ir); + brw_do_vector_splitting(shader->ir); + } progress = do_lower_jumps(shader->ir, true, true, true, /* main return */ @@ -192,3 +205,29 @@ brw_conditional_for_comparison(unsigned int op) return BRW_CONDITIONAL_NZ; } } + +uint32_t +brw_math_function(enum opcode op) +{ + switch (op) { + case SHADER_OPCODE_RCP: + return BRW_MATH_FUNCTION_INV; + case SHADER_OPCODE_RSQ: + return BRW_MATH_FUNCTION_RSQ; + case SHADER_OPCODE_SQRT: + return BRW_MATH_FUNCTION_SQRT; + case SHADER_OPCODE_EXP2: + return BRW_MATH_FUNCTION_EXP; + case SHADER_OPCODE_LOG2: + return BRW_MATH_FUNCTION_LOG; + case SHADER_OPCODE_POW: + return BRW_MATH_FUNCTION_POW; + case SHADER_OPCODE_SIN: + return BRW_MATH_FUNCTION_SIN; + case SHADER_OPCODE_COS: + return BRW_MATH_FUNCTION_COS; + default: + assert(!"not reached: unknown math function"); + return 0; + } +} diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 4c568a26caa..1054d7a589e 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -21,5 +21,11 @@ * IN THE SOFTWARE. */ +#include <stdint.h> +#include "brw_defines.h" + +#pragma once + int brw_type_for_base_type(const struct glsl_type *type); uint32_t brw_conditional_for_comparison(unsigned int op); +uint32_t brw_math_function(enum opcode op); diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index b9e5cc1a534..cb7a3ef73d3 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -455,6 +455,23 @@ dump_vs_constants(struct brw_context *brw, uint32_t offset, uint32_t size) } } +static void +dump_wm_constants(struct brw_context *brw, uint32_t offset, uint32_t size) +{ + const char *name = "WM_CONST"; + struct intel_context *intel = &brw->intel; + uint32_t *as_uint = intel->batch.bo->virtual + offset; + float *as_float = intel->batch.bo->virtual + offset; + int i; + + for (i = 0; i < size / 4; i += 4) { + batch_out(brw, name, offset, i, "%3d: (% f % f % f % f) (0x%08x 0x%08x 0x%08x 0x%08x)\n", + i / 4, + as_float[i], as_float[i + 1], as_float[i + 2], as_float[i + 3], + as_uint[i], as_uint[i + 1], as_uint[i + 2], as_uint[i + 3]); + } +} + static void dump_binding_table(struct brw_context *brw, uint32_t offset, uint32_t size) { @@ -602,6 +619,9 @@ dump_state_batch(struct brw_context *brw) case AUB_TRACE_VS_CONSTANTS: dump_vs_constants(brw, offset, size); break; + case AUB_TRACE_WM_CONSTANTS: + dump_wm_constants(brw, offset, size); + break; default: break; } diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index f462f32b19a..46a417a08ed 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -60,7 +60,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel, * given in Volume 1 of the BSpec. */ h0 = ALIGN(mt->height0, align_h); - h1 = ALIGN(minify(h0), align_h); + h1 = ALIGN(minify(mt->height0), align_h); qpitch = (h0 + h1 + (intel->gen >= 7 ? 12 : 11) * align_h); if (mt->compressed) qpitch /= 4; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp new file mode 100644 index 00000000000..760bc1f7acd --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -0,0 +1,161 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_vec4.h" +extern "C" { +#include "main/macros.h" +#include "program/prog_parameter.h" +} + +#define MAX_INSTRUCTION (1 << 30) + +namespace brw { + +void +vec4_visitor::calculate_live_intervals() +{ + int *def = ralloc_array(mem_ctx, int, virtual_grf_count); + int *use = ralloc_array(mem_ctx, int, virtual_grf_count); + int loop_depth = 0; + int loop_start = 0; + + if (this->live_intervals_valid) + return; + + for (int i = 0; i < virtual_grf_count; i++) { + def[i] = MAX_INSTRUCTION; + use[i] = -1; + } + + int ip = 0; + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + if (inst->opcode == BRW_OPCODE_DO) { + if (loop_depth++ == 0) + loop_start = ip; + } else if (inst->opcode == BRW_OPCODE_WHILE) { + loop_depth--; + + if (loop_depth == 0) { + /* Patches up the use of vars marked for being live across + * the whole loop. + */ + for (int i = 0; i < virtual_grf_count; i++) { + if (use[i] == loop_start) { + use[i] = ip; + } + } + } + } else { + for (unsigned int i = 0; i < 3; i++) { + if (inst->src[i].file == GRF) { + int reg = inst->src[i].reg; + + if (!loop_depth) { + use[reg] = ip; + } else { + def[reg] = MIN2(loop_start, def[reg]); + use[reg] = loop_start; + + /* Nobody else is going to go smash our start to + * later in the loop now, because def[reg] now + * points before the bb header. + */ + } + } + } + if (inst->dst.file == GRF) { + int reg = inst->dst.reg; + + if (!loop_depth) { + def[reg] = MIN2(def[reg], ip); + } else { + def[reg] = MIN2(def[reg], loop_start); + } + } + } + + ip++; + } + + ralloc_free(this->virtual_grf_def); + ralloc_free(this->virtual_grf_use); + this->virtual_grf_def = def; + this->virtual_grf_use = use; + + this->live_intervals_valid = true; +} + +bool +vec4_visitor::virtual_grf_interferes(int a, int b) +{ + int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]); + int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]); + + /* We can't handle dead register writes here, without iterating + * over the whole instruction stream to find every single dead + * write to that register to compare to the live interval of the + * other register. Just assert that dead_code_eliminate() has been + * called. + */ + assert((this->virtual_grf_use[a] != -1 || + this->virtual_grf_def[a] == MAX_INSTRUCTION) && + (this->virtual_grf_use[b] != -1 || + this->virtual_grf_def[b] == MAX_INSTRUCTION)); + + return start < end; +} + +/** + * Must be called after calculate_live_intervales() to remove unused + * writes to registers -- register allocation will fail otherwise + * because something deffed but not used won't be considered to + * interfere with other regs. + */ +bool +vec4_visitor::dead_code_eliminate() +{ + bool progress = false; + int pc = 0; + + calculate_live_intervals(); + + foreach_list_safe(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) { + inst->remove(); + progress = true; + } + + pc++; + } + + if (progress) + live_intervals_valid = false; + + return progress; +} + +} /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h new file mode 100644 index 00000000000..1db910e2b99 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -0,0 +1,489 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef BRW_VEC4_H +#define BRW_VEC4_H + +#include <stdint.h> +#include "brw_shader.h" +#include "main/compiler.h" +#include "program/hash_table.h" + +extern "C" { +#include "brw_vs.h" +#include "brw_context.h" +#include "brw_eu.h" +}; + +#include "../glsl/ir.h" + +namespace brw { + +class dst_reg; + +/** + * Common helper for constructing swizzles. When only a subset of + * channels of a vec4 are used, we don't want to reference the other + * channels, as that will tell optimization passes that those other + * channels are used. + */ +static int +swizzle_for_size(int size) +{ + int size_swizzles[4] = { + BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), + BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), + }; + + assert((size >= 1) && (size <= 4)); + return size_swizzles[size - 1]; +} + +enum register_file { + ARF = BRW_ARCHITECTURE_REGISTER_FILE, + GRF = BRW_GENERAL_REGISTER_FILE, + MRF = BRW_MESSAGE_REGISTER_FILE, + IMM = BRW_IMMEDIATE_VALUE, + HW_REG, /* a struct brw_reg */ + ATTR, + UNIFORM, /* prog_data->params[hw_reg] */ + BAD_FILE +}; + +class reg +{ +public: + /** Register file: ARF, GRF, MRF, IMM. */ + enum register_file file; + /** virtual register number. 0 = fixed hw reg */ + int reg; + /** Offset within the virtual register. */ + int reg_offset; + /** Register type. BRW_REGISTER_TYPE_* */ + int type; + bool sechalf; + struct brw_reg fixed_hw_reg; + int smear; /* -1, or a channel of the reg to smear to all channels. */ + + /** Value for file == BRW_IMMMEDIATE_FILE */ + union { + int32_t i; + uint32_t u; + float f; + } imm; +}; + +class src_reg : public reg +{ +public: + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = ralloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + void init() + { + memset(this, 0, sizeof(*this)); + + this->file = BAD_FILE; + } + + src_reg(register_file file, int reg, const glsl_type *type) + { + init(); + + this->file = file; + this->reg = reg; + if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) + this->swizzle = swizzle_for_size(type->vector_elements); + else + this->swizzle = SWIZZLE_XYZW; + } + + /** Generic unset register constructor. */ + src_reg() + { + init(); + } + + src_reg(float f) + { + init(); + + this->file = IMM; + this->type = BRW_REGISTER_TYPE_F; + this->imm.f = f; + } + + src_reg(uint32_t u) + { + init(); + + this->file = IMM; + this->type = BRW_REGISTER_TYPE_UD; + this->imm.f = u; + } + + src_reg(int32_t i) + { + init(); + + this->file = IMM; + this->type = BRW_REGISTER_TYPE_D; + this->imm.i = i; + } + + src_reg(class vec4_visitor *v, const struct glsl_type *type); + + explicit src_reg(dst_reg reg); + + GLuint swizzle; /**< SWIZZLE_XYZW swizzles from Mesa. */ + bool negate; + bool abs; + + src_reg *reladdr; +}; + +class dst_reg : public reg +{ +public: + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = ralloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + void init() + { + memset(this, 0, sizeof(*this)); + this->file = BAD_FILE; + this->writemask = WRITEMASK_XYZW; + } + + dst_reg() + { + init(); + } + + dst_reg(register_file file, int reg) + { + init(); + + this->file = file; + this->reg = reg; + } + + dst_reg(struct brw_reg reg) + { + init(); + + this->file = HW_REG; + this->fixed_hw_reg = reg; + } + + dst_reg(class vec4_visitor *v, const struct glsl_type *type); + + explicit dst_reg(src_reg reg); + + int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ + + src_reg *reladdr; +}; + +class vec4_instruction : public exec_node { +public: + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = rzalloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + struct brw_reg get_dst(void); + struct brw_reg get_src(int i); + + enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ + dst_reg dst; + src_reg src[3]; + + bool saturate; + bool predicate_inverse; + uint32_t predicate; + + int conditional_mod; /**< BRW_CONDITIONAL_* */ + + int sampler; + int target; /**< MRT target. */ + bool shadow_compare; + + bool eot; + bool header_present; + int mlen; /**< SEND message length */ + int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */ + + uint32_t offset; /* spill/unspill offset */ + /** @{ + * Annotation for the generated IR. One of the two can be set. + */ + ir_instruction *ir; + const char *annotation; +}; + +class vec4_visitor : public ir_visitor +{ +public: + vec4_visitor(struct brw_vs_compile *c, + struct gl_shader_program *prog, struct brw_shader *shader); + ~vec4_visitor(); + + dst_reg dst_null_f() + { + return dst_reg(brw_null_reg()); + } + + dst_reg dst_null_d() + { + return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + } + + dst_reg dst_null_cmp() + { + if (intel->gen > 4) + return dst_null_d(); + else + return dst_null_f(); + } + + struct brw_context *brw; + const struct gl_vertex_program *vp; + struct intel_context *intel; + struct gl_context *ctx; + struct brw_vs_compile *c; + struct brw_vs_prog_data *prog_data; + struct brw_compile *p; + struct brw_shader *shader; + struct gl_shader_program *prog; + void *mem_ctx; + exec_list instructions; + + char *fail_msg; + bool failed; + + /** + * GLSL IR currently being processed, which is associated with our + * driver IR instructions for debugging purposes. + */ + ir_instruction *base_ir; + const char *current_annotation; + + int *virtual_grf_sizes; + int virtual_grf_count; + int virtual_grf_array_size; + int first_non_payload_grf; + int *virtual_grf_def; + int *virtual_grf_use; + bool live_intervals_valid; + + dst_reg *variable_storage(ir_variable *var); + + void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr); + + src_reg src_reg_for_float(float val); + + /** + * \name Visit methods + * + * As typical for the visitor pattern, there must be one \c visit method for + * each concrete subclass of \c ir_instruction. Virtual base classes within + * the hierarchy should not have \c visit methods. + */ + /*@{*/ + virtual void visit(ir_variable *); + virtual void visit(ir_loop *); + virtual void visit(ir_loop_jump *); + virtual void visit(ir_function_signature *); + virtual void visit(ir_function *); + virtual void visit(ir_expression *); + virtual void visit(ir_swizzle *); + virtual void visit(ir_dereference_variable *); + virtual void visit(ir_dereference_array *); + virtual void visit(ir_dereference_record *); + virtual void visit(ir_assignment *); + virtual void visit(ir_constant *); + virtual void visit(ir_call *); + virtual void visit(ir_return *); + virtual void visit(ir_discard *); + virtual void visit(ir_texture *); + virtual void visit(ir_if *); + /*@}*/ + + src_reg result; + + /* Regs for vertex results. Generated at ir_variable visiting time + * for the ir->location's used. + */ + dst_reg output_reg[VERT_RESULT_MAX]; + int uniform_size[MAX_UNIFORMS]; + int uniforms; + + struct hash_table *variable_ht; + + bool run(void); + void fail(const char *msg, ...); + + int virtual_grf_alloc(int size); + int setup_uniform_values(int loc, const glsl_type *type); + void setup_builtin_uniform_values(ir_variable *ir); + int setup_attributes(int payload_reg); + int setup_uniforms(int payload_reg); + void setup_payload(); + void reg_allocate_trivial(); + void reg_allocate(); + void move_grf_array_access_to_scratch(); + void calculate_live_intervals(); + bool dead_code_eliminate(); + bool virtual_grf_interferes(int a, int b); + + vec4_instruction *emit(enum opcode opcode); + + vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0); + + vec4_instruction *emit(enum opcode opcode, dst_reg dst, + src_reg src0, src_reg src1); + + vec4_instruction *emit(enum opcode opcode, dst_reg dst, + src_reg src0, src_reg src1, src_reg src2); + + bool try_rewrite_rhs_to_dst(ir_assignment *ir, + dst_reg dst, + src_reg src, + vec4_instruction *pre_rhs_inst, + vec4_instruction *last_rhs_inst); + + /** Walks an exec_list of ir_instruction and sends it through this visitor. */ + void visit_instructions(const exec_list *list); + + void emit_bool_to_cond_code(ir_rvalue *ir); + void emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1); + void emit_if_gen6(ir_if *ir); + + void emit_block_move(dst_reg *dst, src_reg *src, + const struct glsl_type *type, bool predicated); + + void emit_constant_values(dst_reg *dst, ir_constant *value); + + /** + * Emit the correct dot-product instruction for the type of arguments + */ + void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements); + + void emit_scalar(ir_instruction *ir, enum prog_opcode op, + dst_reg dst, src_reg src0); + + void emit_scalar(ir_instruction *ir, enum prog_opcode op, + dst_reg dst, src_reg src0, src_reg src1); + + void emit_scs(ir_instruction *ir, enum prog_opcode op, + dst_reg dst, const src_reg &src); + + void emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src); + void emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src); + void emit_math(enum opcode opcode, dst_reg dst, src_reg src); + void emit_math2_gen6(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); + void emit_math2_gen4(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); + void emit_math(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); + + int emit_vue_header_gen6(int header_mrf); + int emit_vue_header_gen4(int header_mrf); + void emit_urb_writes(void); + + src_reg get_scratch_offset(vec4_instruction *inst, + src_reg *reladdr, int reg_offset); + void emit_scratch_read(vec4_instruction *inst, + dst_reg dst, + src_reg orig_src, + int base_offset); + void emit_scratch_write(vec4_instruction *inst, + src_reg temp, + dst_reg orig_dst, + int base_offset); + + GLboolean try_emit_sat(ir_expression *ir); + + bool process_move_condition(ir_rvalue *ir); + + void generate_code(); + void generate_vs_instruction(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg *src); + + void generate_math1_gen4(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src); + void generate_math1_gen6(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src); + void generate_math2_gen4(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1); + void generate_math2_gen6(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1); + + void generate_urb_write(vec4_instruction *inst); + void generate_oword_dual_block_offsets(struct brw_reg m1, + struct brw_reg index); + void generate_scratch_write(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src, + struct brw_reg index); + void generate_scratch_read(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg index); +}; + +} /* namespace brw */ + +#endif /* BRW_VEC4_H */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp new file mode 100644 index 00000000000..65ac7d9dc09 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -0,0 +1,854 @@ +/* Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_vec4.h" +#include "../glsl/ir_print_visitor.h" + +extern "C" { +#include "brw_eu.h" +}; + +using namespace brw; + +namespace brw { + +int +vec4_visitor::setup_attributes(int payload_reg) +{ + int nr_attributes; + int attribute_map[VERT_ATTRIB_MAX]; + + nr_attributes = 0; + for (int i = 0; i < VERT_ATTRIB_MAX; i++) { + if (prog_data->inputs_read & BITFIELD64_BIT(i)) { + attribute_map[i] = payload_reg + nr_attributes; + nr_attributes++; + + /* Do GL_FIXED rescaling for GLES2.0. Our GL_FIXED + * attributes come in as floating point conversions of the + * integer values. + */ + if (c->key.gl_fixed_input_size[i] != 0) { + struct brw_reg reg = brw_vec8_grf(attribute_map[i], 0); + + brw_MUL(p, + brw_writemask(reg, (1 << c->key.gl_fixed_input_size[i]) - 1), + reg, brw_imm_f(1.0 / 65536.0)); + } + } + } + + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + for (int i = 0; i < 3; i++) { + if (inst->src[i].file != ATTR) + continue; + + int grf = attribute_map[inst->src[i].reg + inst->src[i].reg_offset]; + + struct brw_reg reg = brw_vec8_grf(grf, 0); + reg.dw1.bits.swizzle = inst->src[i].swizzle; + if (inst->src[i].abs) + reg = brw_abs(reg); + if (inst->src[i].negate) + reg = negate(reg); + + inst->src[i].file = HW_REG; + inst->src[i].fixed_hw_reg = reg; + } + } + + /* The BSpec says we always have to read at least one thing from + * the VF, and it appears that the hardware wedges otherwise. + */ + if (nr_attributes == 0) + nr_attributes = 1; + + prog_data->urb_read_length = (nr_attributes + 1) / 2; + + return payload_reg + nr_attributes; +} + +int +vec4_visitor::setup_uniforms(int reg) +{ + /* User clip planes from curbe: + */ + if (c->key.nr_userclip) { + if (intel->gen >= 6) { + for (int i = 0; i < c->key.nr_userclip; i++) { + c->userplane[i] = stride(brw_vec4_grf(reg + i / 2, + (i % 2) * 4), 0, 4, 1); + } + reg += ALIGN(c->key.nr_userclip, 2) / 2; + } else { + for (int i = 0; i < c->key.nr_userclip; i++) { + c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2, + (i % 2) * 4), 0, 4, 1); + } + reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2; + } + } + + /* The pre-gen6 VS requires that some push constants get loaded no + * matter what, or the GPU would hang. + */ + if (intel->gen < 6 && this->uniforms == 0) { + this->uniform_size[this->uniforms] = 1; + + for (unsigned int i = 0; i < 4; i++) { + unsigned int slot = this->uniforms * 4 + i; + + c->prog_data.param[slot] = NULL; + c->prog_data.param_convert[slot] = PARAM_CONVERT_ZERO; + } + + this->uniforms++; + reg++; + } else { + reg += ALIGN(uniforms, 2) / 2; + } + + /* for now, we are not doing any elimination of unused slots, nor + * are we packing our uniforms. + */ + c->prog_data.nr_params = this->uniforms * 4; + + c->prog_data.curb_read_length = reg - 1; + c->prog_data.uses_new_param_layout = true; + + return reg; +} + +void +vec4_visitor::setup_payload(void) +{ + int reg = 0; + + /* The payload always contains important data in g0, which contains + * the URB handles that are passed on to the URB write at the end + * of the thread. So, we always start push constants at g1. + */ + reg++; + + reg = setup_uniforms(reg); + + reg = setup_attributes(reg); + + this->first_non_payload_grf = reg; +} + +struct brw_reg +vec4_instruction::get_dst(void) +{ + struct brw_reg brw_reg; + + switch (dst.file) { + case GRF: + brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0); + brw_reg = retype(brw_reg, dst.type); + brw_reg.dw1.bits.writemask = dst.writemask; + break; + + case HW_REG: + brw_reg = dst.fixed_hw_reg; + break; + + case BAD_FILE: + brw_reg = brw_null_reg(); + break; + + default: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; + } + return brw_reg; +} + +struct brw_reg +vec4_instruction::get_src(int i) +{ + struct brw_reg brw_reg; + + switch (src[i].file) { + case GRF: + brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0); + brw_reg = retype(brw_reg, src[i].type); + brw_reg.dw1.bits.swizzle = src[i].swizzle; + if (src[i].abs) + brw_reg = brw_abs(brw_reg); + if (src[i].negate) + brw_reg = negate(brw_reg); + break; + + case IMM: + switch (src[i].type) { + case BRW_REGISTER_TYPE_F: + brw_reg = brw_imm_f(src[i].imm.f); + break; + case BRW_REGISTER_TYPE_D: + brw_reg = brw_imm_d(src[i].imm.i); + break; + case BRW_REGISTER_TYPE_UD: + brw_reg = brw_imm_ud(src[i].imm.u); + break; + default: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; + } + break; + + case UNIFORM: + brw_reg = stride(brw_vec4_grf(1 + (src[i].reg + src[i].reg_offset) / 2, + ((src[i].reg + src[i].reg_offset) % 2) * 4), + 0, 4, 1); + brw_reg = retype(brw_reg, src[i].type); + brw_reg.dw1.bits.swizzle = src[i].swizzle; + if (src[i].abs) + brw_reg = brw_abs(brw_reg); + if (src[i].negate) + brw_reg = negate(brw_reg); + break; + + case HW_REG: + brw_reg = src[i].fixed_hw_reg; + break; + + case BAD_FILE: + /* Probably unused. */ + brw_reg = brw_null_reg(); + break; + case ATTR: + default: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; + } + + return brw_reg; +} + +void +vec4_visitor::generate_math1_gen4(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src) +{ + brw_math(p, + dst, + brw_math_function(inst->opcode), + BRW_MATH_SATURATE_NONE, + inst->base_mrf, + src, + BRW_MATH_DATA_SCALAR, + BRW_MATH_PRECISION_FULL); +} + +static void +check_gen6_math_src_arg(struct brw_reg src) +{ + /* Source swizzles are ignored. */ + assert(!src.abs); + assert(!src.negate); + assert(src.dw1.bits.swizzle = BRW_SWIZZLE_XYZW); +} + +void +vec4_visitor::generate_math1_gen6(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src) +{ + /* Can't do writemask because math can't be align16. */ + assert(dst.dw1.bits.writemask == WRITEMASK_XYZW); + check_gen6_math_src_arg(src); + + brw_set_access_mode(p, BRW_ALIGN_1); + brw_math(p, + dst, + brw_math_function(inst->opcode), + BRW_MATH_SATURATE_NONE, + inst->base_mrf, + src, + BRW_MATH_DATA_SCALAR, + BRW_MATH_PRECISION_FULL); + brw_set_access_mode(p, BRW_ALIGN_16); +} + +void +vec4_visitor::generate_math2_gen6(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) +{ + /* Can't do writemask because math can't be align16. */ + assert(dst.dw1.bits.writemask == WRITEMASK_XYZW); + /* Source swizzles are ignored. */ + check_gen6_math_src_arg(src0); + check_gen6_math_src_arg(src1); + + brw_set_access_mode(p, BRW_ALIGN_1); + brw_math2(p, + dst, + brw_math_function(inst->opcode), + src0, src1); + brw_set_access_mode(p, BRW_ALIGN_16); +} + +void +vec4_visitor::generate_math2_gen4(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) +{ + /* Can't do writemask because math can't be align16. */ + assert(dst.dw1.bits.writemask == WRITEMASK_XYZW); + + brw_MOV(p, brw_message_reg(inst->base_mrf + 1), src1); + + brw_set_access_mode(p, BRW_ALIGN_1); + brw_math(p, + dst, + brw_math_function(inst->opcode), + BRW_MATH_SATURATE_NONE, + inst->base_mrf, + src0, + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + brw_set_access_mode(p, BRW_ALIGN_16); +} + +void +vec4_visitor::generate_urb_write(vec4_instruction *inst) +{ + brw_urb_WRITE(p, + brw_null_reg(), /* dest */ + inst->base_mrf, /* starting mrf reg nr */ + brw_vec8_grf(0, 0), /* src */ + false, /* allocate */ + true, /* used */ + inst->mlen, + 0, /* response len */ + inst->eot, /* eot */ + inst->eot, /* writes complete */ + inst->offset, /* urb destination offset */ + BRW_URB_SWIZZLE_INTERLEAVE); +} + +void +vec4_visitor::generate_oword_dual_block_offsets(struct brw_reg m1, + struct brw_reg index) +{ + int second_vertex_offset; + + if (intel->gen >= 6) + second_vertex_offset = 1; + else + second_vertex_offset = 16; + + m1 = retype(m1, BRW_REGISTER_TYPE_D); + + /* Set up M1 (message payload). Only the block offsets in M1.0 and + * M1.4 are used, and the rest are ignored. + */ + struct brw_reg m1_0 = suboffset(vec1(m1), 0); + struct brw_reg m1_4 = suboffset(vec1(m1), 4); + struct brw_reg index_0 = suboffset(vec1(index), 0); + struct brw_reg index_4 = suboffset(vec1(index), 4); + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_access_mode(p, BRW_ALIGN_1); + + brw_MOV(p, m1_0, index_0); + + brw_set_predicate_inverse(p, true); + if (index.file == BRW_IMMEDIATE_VALUE) { + index_4.dw1.ud++; + brw_MOV(p, m1_4, index_4); + } else { + brw_ADD(p, m1_4, index_4, brw_imm_d(second_vertex_offset)); + } + + brw_pop_insn_state(p); +} + +void +vec4_visitor::generate_scratch_read(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg index) +{ + if (intel->gen >= 6) { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, + retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D), + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D)); + brw_pop_insn_state(p); + } + + generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1), + index); + + uint32_t msg_type; + + if (intel->gen >= 6) + msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + else if (intel->gen == 5 || intel->is_g4x) + msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + else + msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + + /* Each of the 8 channel enables is considered for whether each + * dword is written. + */ + struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, send, dst); + brw_set_src0(p, send, brw_message_reg(inst->base_mrf)); + brw_set_dp_read_message(p, send, + 255, /* binding table index: stateless access */ + BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, + msg_type, + BRW_DATAPORT_READ_TARGET_RENDER_CACHE, + 2, /* mlen */ + 1 /* rlen */); +} + +void +vec4_visitor::generate_scratch_write(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src, + struct brw_reg index) +{ + /* If the instruction is predicated, we'll predicate the send, not + * the header setup. + */ + brw_set_predicate_control(p, false); + + if (intel->gen >= 6) { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, + retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D), + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D)); + brw_pop_insn_state(p); + } + + generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1), + index); + + brw_MOV(p, + retype(brw_message_reg(inst->base_mrf + 2), BRW_REGISTER_TYPE_D), + retype(src, BRW_REGISTER_TYPE_D)); + + uint32_t msg_type; + + if (intel->gen >= 6) + msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE; + else + msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE; + + brw_set_predicate_control(p, inst->predicate); + + /* Each of the 8 channel enables is considered for whether each + * dword is written. + */ + struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, send, dst); + brw_set_src0(p, send, brw_message_reg(inst->base_mrf)); + brw_set_dp_write_message(p, send, + 255, /* binding table index: stateless access */ + BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, + msg_type, + 3, /* mlen */ + true, /* header present */ + false, /* pixel scoreboard */ + 0, /* rlen */ + false, /* eot */ + false /* commit */); +} + +void +vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, + struct brw_reg dst, + struct brw_reg *src) +{ + vec4_instruction *inst = (vec4_instruction *)instruction; + + switch (inst->opcode) { + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: + if (intel->gen >= 6) { + generate_math1_gen6(inst, dst, src[0]); + } else { + generate_math1_gen4(inst, dst, src[0]); + } + break; + + case SHADER_OPCODE_POW: + if (intel->gen >= 6) { + generate_math2_gen6(inst, dst, src[0], src[1]); + } else { + generate_math2_gen4(inst, dst, src[0], src[1]); + } + break; + + case VS_OPCODE_URB_WRITE: + generate_urb_write(inst); + break; + + case VS_OPCODE_SCRATCH_READ: + generate_scratch_read(inst, dst, src[0]); + break; + + case VS_OPCODE_SCRATCH_WRITE: + generate_scratch_write(inst, dst, src[0], src[1]); + break; + + default: + if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { + fail("unsupported opcode in `%s' in VS\n", + brw_opcodes[inst->opcode].name); + } else { + fail("Unsupported opcode %d in VS", inst->opcode); + } + } +} + +bool +vec4_visitor::run() +{ + /* Generate VS IR for main(). (the visitor only descends into + * functions called "main"). + */ + visit_instructions(shader->ir); + + emit_urb_writes(); + + /* Before any optimization, push array accesses out to scratch + * space where we need them to be. This pass may allocate new + * virtual GRFs, so we want to do it early. It also makes sure + * that we have reladdr computations available for CSE, since we'll + * often do repeated subexpressions for those. + */ + move_grf_array_access_to_scratch(); + + bool progress; + do { + progress = false; + progress = dead_code_eliminate() || progress; + } while (progress); + + if (failed) + return false; + + setup_payload(); + reg_allocate(); + + if (failed) + return false; + + brw_set_access_mode(p, BRW_ALIGN_16); + + generate_code(); + + return !failed; +} + +void +vec4_visitor::generate_code() +{ + int last_native_inst = p->nr_insn; + const char *last_annotation_string = NULL; + ir_instruction *last_annotation_ir = NULL; + + int loop_stack_array_size = 16; + int loop_stack_depth = 0; + brw_instruction **loop_stack = + rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size); + int *if_depth_in_loop = + rzalloc_array(this->mem_ctx, int, loop_stack_array_size); + + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + printf("Native code for vertex shader %d:\n", prog->Name); + } + + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + struct brw_reg src[3], dst; + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + if (last_annotation_ir != inst->ir) { + last_annotation_ir = inst->ir; + if (last_annotation_ir) { + printf(" "); + last_annotation_ir->print(); + printf("\n"); + } + } + if (last_annotation_string != inst->annotation) { + last_annotation_string = inst->annotation; + if (last_annotation_string) + printf(" %s\n", last_annotation_string); + } + } + + for (unsigned int i = 0; i < 3; i++) { + src[i] = inst->get_src(i); + } + dst = inst->get_dst(); + + brw_set_conditionalmod(p, inst->conditional_mod); + brw_set_predicate_control(p, inst->predicate); + brw_set_predicate_inverse(p, inst->predicate_inverse); + brw_set_saturate(p, inst->saturate); + + switch (inst->opcode) { + case BRW_OPCODE_MOV: + brw_MOV(p, dst, src[0]); + break; + case BRW_OPCODE_ADD: + brw_ADD(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_MUL: + brw_MUL(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_MACH: + brw_set_acc_write_control(p, 1); + brw_MACH(p, dst, src[0], src[1]); + brw_set_acc_write_control(p, 0); + break; + + case BRW_OPCODE_FRC: + brw_FRC(p, dst, src[0]); + break; + case BRW_OPCODE_RNDD: + brw_RNDD(p, dst, src[0]); + break; + case BRW_OPCODE_RNDE: + brw_RNDE(p, dst, src[0]); + break; + case BRW_OPCODE_RNDZ: + brw_RNDZ(p, dst, src[0]); + break; + + case BRW_OPCODE_AND: + brw_AND(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_OR: + brw_OR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_XOR: + brw_XOR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_NOT: + brw_NOT(p, dst, src[0]); + break; + case BRW_OPCODE_ASR: + brw_ASR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_SHR: + brw_SHR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_SHL: + brw_SHL(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_CMP: + brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); + break; + case BRW_OPCODE_SEL: + brw_SEL(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_DP4: + brw_DP4(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_DP3: + brw_DP3(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_DP2: + brw_DP2(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_IF: + if (inst->src[0].file != BAD_FILE) { + /* The instruction has an embedded compare (only allowed on gen6) */ + assert(intel->gen == 6); + gen6_IF(p, inst->conditional_mod, src[0], src[1]); + } else { + struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8); + brw_inst->header.predicate_control = inst->predicate; + } + if_depth_in_loop[loop_stack_depth]++; + break; + + case BRW_OPCODE_ELSE: + brw_ELSE(p); + break; + case BRW_OPCODE_ENDIF: + brw_ENDIF(p); + if_depth_in_loop[loop_stack_depth]--; + break; + + case BRW_OPCODE_DO: + loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); + if (loop_stack_array_size <= loop_stack_depth) { + loop_stack_array_size *= 2; + loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *, + loop_stack_array_size); + if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int, + loop_stack_array_size); + } + if_depth_in_loop[loop_stack_depth] = 0; + break; + + case BRW_OPCODE_BREAK: + brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case BRW_OPCODE_CONTINUE: + /* FINISHME: We need to write the loop instruction support still. */ + if (intel->gen >= 6) + gen6_CONT(p, loop_stack[loop_stack_depth - 1]); + else + brw_CONT(p, if_depth_in_loop[loop_stack_depth]); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + + case BRW_OPCODE_WHILE: { + struct brw_instruction *inst0, *inst1; + GLuint br = 1; + + if (intel->gen >= 5) + br = 2; + + assert(loop_stack_depth > 0); + loop_stack_depth--; + inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); + if (intel->gen < 6) { + /* patch all the BREAK/CONT instructions from last BGNLOOP */ + while (inst0 > loop_stack[loop_stack_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); + } + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + } + } + } + } + break; + + default: + generate_vs_instruction(inst, dst, src); + break; + } + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { + if (0) { + printf("0x%08x 0x%08x 0x%08x 0x%08x ", + ((uint32_t *)&p->store[i])[3], + ((uint32_t *)&p->store[i])[2], + ((uint32_t *)&p->store[i])[1], + ((uint32_t *)&p->store[i])[0]); + } + brw_disasm(stdout, &p->store[i], intel->gen); + } + } + + last_native_inst = p->nr_insn; + } + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + printf("\n"); + } + + ralloc_free(loop_stack); + ralloc_free(if_depth_in_loop); + + brw_set_uip_jip(p); + + /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS + * emit issues, it doesn't get the jump distances into the output, + * which is often something we want to debug. So this is here in + * case you're doing that. + */ + if (0) { + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + for (unsigned int i = 0; i < p->nr_insn; i++) { + printf("0x%08x 0x%08x 0x%08x 0x%08x ", + ((uint32_t *)&p->store[i])[3], + ((uint32_t *)&p->store[i])[2], + ((uint32_t *)&p->store[i])[1], + ((uint32_t *)&p->store[i])[0]); + brw_disasm(stdout, &p->store[i], intel->gen); + } + } + } +} + +extern "C" { + +bool +brw_vs_emit(struct gl_shader_program *prog, struct brw_vs_compile *c) +{ + if (!prog) + return false; + + struct brw_shader *shader = + (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX]; + if (!shader) + return false; + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + printf("GLSL IR for native vertex shader %d:\n", prog->Name); + _mesa_print_ir(shader->ir, NULL); + printf("\n\n"); + } + + vec4_visitor v(c, prog, shader); + if (!v.run()) { + prog->LinkStatus = GL_FALSE; + ralloc_strcat(&prog->InfoLog, v.fail_msg); + return false; + } + + return true; +} + +} /* extern "C" */ + +} /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp new file mode 100644 index 00000000000..3f052ff64cf --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -0,0 +1,234 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +extern "C" { +#include "main/macros.h" +#include "program/register_allocate.h" +} /* extern "C" */ + +#include "brw_vec4.h" +#include "../glsl/ir_print_visitor.h" + +using namespace brw; + +namespace brw { + +static void +assign(int *reg_hw_locations, reg *reg) +{ + if (reg->file == GRF) { + reg->reg = reg_hw_locations[reg->reg]; + } +} + +void +vec4_visitor::reg_allocate_trivial() +{ + int hw_reg_mapping[this->virtual_grf_count]; + bool virtual_grf_used[this->virtual_grf_count]; + int i; + int next; + + /* Calculate which virtual GRFs are actually in use after whatever + * optimization passes have occurred. + */ + for (int i = 0; i < this->virtual_grf_count; i++) { + virtual_grf_used[i] = false; + } + + foreach_iter(exec_list_iterator, iter, this->instructions) { + vec4_instruction *inst = (vec4_instruction *)iter.get(); + + if (inst->dst.file == GRF) + virtual_grf_used[inst->dst.reg] = true; + + for (int i = 0; i < 3; i++) { + if (inst->src[i].file == GRF) + virtual_grf_used[inst->src[i].reg] = true; + } + } + + hw_reg_mapping[0] = this->first_non_payload_grf; + next = hw_reg_mapping[0] + this->virtual_grf_sizes[0]; + for (i = 1; i < this->virtual_grf_count; i++) { + if (virtual_grf_used[i]) { + hw_reg_mapping[i] = next; + next += this->virtual_grf_sizes[i]; + } + } + prog_data->total_grf = next; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + vec4_instruction *inst = (vec4_instruction *)iter.get(); + + assign(hw_reg_mapping, &inst->dst); + assign(hw_reg_mapping, &inst->src[0]); + assign(hw_reg_mapping, &inst->src[1]); + assign(hw_reg_mapping, &inst->src[2]); + } + + if (prog_data->total_grf > BRW_MAX_GRF) { + fail("Ran out of regs on trivial allocator (%d/%d)\n", + prog_data->total_grf, BRW_MAX_GRF); + } +} + +static void +brw_alloc_reg_set_for_classes(struct brw_context *brw, + int *class_sizes, + int class_count, + int base_reg_count) +{ + /* Compute the total number of registers across all classes. */ + int ra_reg_count = 0; + for (int i = 0; i < class_count; i++) { + ra_reg_count += base_reg_count - (class_sizes[i] - 1); + } + + ralloc_free(brw->vs.ra_reg_to_grf); + brw->vs.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count); + ralloc_free(brw->vs.regs); + brw->vs.regs = ra_alloc_reg_set(ra_reg_count); + ralloc_free(brw->vs.classes); + brw->vs.classes = ralloc_array(brw, int, class_count + 1); + + /* Now, add the registers to their classes, and add the conflicts + * between them and the base GRF registers (and also each other). + */ + int reg = 0; + for (int i = 0; i < class_count; i++) { + int class_reg_count = base_reg_count - (class_sizes[i] - 1); + brw->vs.classes[i] = ra_alloc_reg_class(brw->vs.regs); + + for (int j = 0; j < class_reg_count; j++) { + ra_class_add_reg(brw->vs.regs, brw->vs.classes[i], reg); + + brw->vs.ra_reg_to_grf[reg] = j; + + for (int base_reg = j; + base_reg < j + class_sizes[i]; + base_reg++) { + ra_add_transitive_reg_conflict(brw->vs.regs, base_reg, reg); + } + + reg++; + } + } + assert(reg == ra_reg_count); + + ra_set_finalize(brw->vs.regs); +} + +void +vec4_visitor::reg_allocate() +{ + int hw_reg_mapping[virtual_grf_count]; + int first_assigned_grf = this->first_non_payload_grf; + int base_reg_count = BRW_MAX_GRF - first_assigned_grf; + int class_sizes[base_reg_count]; + int class_count = 0; + + /* Using the trivial allocator can be useful in debugging undefined + * register access as a result of broken optimization passes. + */ + if (0) { + reg_allocate_trivial(); + return; + } + + calculate_live_intervals(); + + /* Set up the register classes. + * + * The base registers store a vec4. However, we'll need larger + * storage for arrays, structures, and matrices, which will be sets + * of contiguous registers. + */ + class_sizes[class_count++] = 1; + + for (int r = 0; r < virtual_grf_count; r++) { + int i; + + for (i = 0; i < class_count; i++) { + if (class_sizes[i] == this->virtual_grf_sizes[r]) + break; + } + if (i == class_count) { + if (this->virtual_grf_sizes[r] >= base_reg_count) { + fail("Object too large to register allocate.\n"); + } + + class_sizes[class_count++] = this->virtual_grf_sizes[r]; + } + } + + brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, base_reg_count); + + struct ra_graph *g = ra_alloc_interference_graph(brw->vs.regs, + virtual_grf_count); + + for (int i = 0; i < virtual_grf_count; i++) { + for (int c = 0; c < class_count; c++) { + if (class_sizes[c] == this->virtual_grf_sizes[i]) { + ra_set_node_class(g, i, brw->vs.classes[c]); + break; + } + } + + for (int j = 0; j < i; j++) { + if (virtual_grf_interferes(i, j)) { + ra_add_node_interference(g, i, j); + } + } + } + + if (!ra_allocate_no_spills(g)) { + ralloc_free(g); + fail("No register spilling support yet\n"); + } + + /* Get the chosen virtual registers for each node, and map virtual + * regs in the register classes back down to real hardware reg + * numbers. + */ + prog_data->total_grf = first_assigned_grf; + for (int i = 0; i < virtual_grf_count; i++) { + int reg = ra_get_node_reg(g, i); + + hw_reg_mapping[i] = first_assigned_grf + brw->vs.ra_reg_to_grf[reg]; + prog_data->total_grf = MAX2(prog_data->total_grf, hw_reg_mapping[i] + 1); + } + + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + assign(hw_reg_mapping, &inst->dst); + assign(hw_reg_mapping, &inst->src[0]); + assign(hw_reg_mapping, &inst->src[1]); + assign(hw_reg_mapping, &inst->src[2]); + } + + ralloc_free(g); +} + +} /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp new file mode 100644 index 00000000000..b3a07bd0539 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -0,0 +1,2156 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_vec4.h" +extern "C" { +#include "main/macros.h" +#include "program/prog_parameter.h" +} + +namespace brw { + +src_reg::src_reg(dst_reg reg) +{ + init(); + + this->file = reg.file; + this->reg = reg.reg; + this->reg_offset = reg.reg_offset; + this->type = reg.type; + this->reladdr = reg.reladdr; + this->fixed_hw_reg = reg.fixed_hw_reg; + + int swizzles[4]; + int next_chan = 0; + int last = 0; + + for (int i = 0; i < 4; i++) { + if (!(reg.writemask & (1 << i))) + continue; + + swizzles[next_chan++] = last = i; + } + + for (; next_chan < 4; next_chan++) { + swizzles[next_chan] = last; + } + + this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], + swizzles[2], swizzles[3]); +} + +dst_reg::dst_reg(src_reg reg) +{ + init(); + + this->file = reg.file; + this->reg = reg.reg; + this->reg_offset = reg.reg_offset; + this->type = reg.type; + this->writemask = WRITEMASK_XYZW; + this->reladdr = reg.reladdr; + this->fixed_hw_reg = reg.fixed_hw_reg; +} + +vec4_instruction * +vec4_visitor::emit(enum opcode opcode, dst_reg dst, + src_reg src0, src_reg src1, src_reg src2) +{ + vec4_instruction *inst = new(mem_ctx) vec4_instruction(); + + inst->opcode = opcode; + inst->dst = dst; + inst->src[0] = src0; + inst->src[1] = src1; + inst->src[2] = src2; + inst->ir = this->base_ir; + inst->annotation = this->current_annotation; + + this->instructions.push_tail(inst); + + return inst; +} + + +vec4_instruction * +vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1) +{ + return emit(opcode, dst, src0, src1, src_reg()); +} + +vec4_instruction * +vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0) +{ + assert(dst.writemask != 0); + return emit(opcode, dst, src0, src_reg(), src_reg()); +} + +vec4_instruction * +vec4_visitor::emit(enum opcode opcode) +{ + return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg()); +} + +void +vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements) +{ + static enum opcode dot_opcodes[] = { + BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4 + }; + + emit(dot_opcodes[elements - 2], dst, src0, src1); +} + +void +vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src) +{ + /* The gen6 math instruction ignores the source modifiers -- + * swizzle, abs, negate, and at least some parts of the register + * region description. + */ + src_reg temp_src = src_reg(this, glsl_type::vec4_type); + emit(BRW_OPCODE_MOV, dst_reg(temp_src), src); + + if (dst.writemask != WRITEMASK_XYZW) { + /* The gen6 math instruction must be align1, so we can't do + * writemasks. + */ + dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type); + + emit(opcode, temp_dst, temp_src); + + emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst)); + } else { + emit(opcode, dst, temp_src); + } +} + +void +vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src) +{ + vec4_instruction *inst = emit(opcode, dst, src); + inst->base_mrf = 1; + inst->mlen = 1; +} + +void +vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src) +{ + switch (opcode) { + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: + break; + default: + assert(!"not reached: bad math opcode"); + return; + } + + if (intel->gen >= 6) { + return emit_math1_gen6(opcode, dst, src); + } else { + return emit_math1_gen4(opcode, dst, src); + } +} + +void +vec4_visitor::emit_math2_gen6(enum opcode opcode, + dst_reg dst, src_reg src0, src_reg src1) +{ + src_reg expanded; + + /* The gen6 math instruction ignores the source modifiers -- + * swizzle, abs, negate, and at least some parts of the register + * region description. Move the sources to temporaries to make it + * generally work. + */ + + expanded = src_reg(this, glsl_type::vec4_type); + emit(BRW_OPCODE_MOV, dst_reg(expanded), src0); + src0 = expanded; + + expanded = src_reg(this, glsl_type::vec4_type); + emit(BRW_OPCODE_MOV, dst_reg(expanded), src1); + src1 = expanded; + + if (dst.writemask != WRITEMASK_XYZW) { + /* The gen6 math instruction must be align1, so we can't do + * writemasks. + */ + dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type); + + emit(opcode, temp_dst, src0, src1); + + emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst)); + } else { + emit(opcode, dst, src0, src1); + } +} + +void +vec4_visitor::emit_math2_gen4(enum opcode opcode, + dst_reg dst, src_reg src0, src_reg src1) +{ + vec4_instruction *inst = emit(opcode, dst, src0, src1); + inst->base_mrf = 1; + inst->mlen = 2; +} + +void +vec4_visitor::emit_math(enum opcode opcode, + dst_reg dst, src_reg src0, src_reg src1) +{ + assert(opcode == SHADER_OPCODE_POW); + + if (intel->gen >= 6) { + return emit_math2_gen6(opcode, dst, src0, src1); + } else { + return emit_math2_gen4(opcode, dst, src0, src1); + } +} + +void +vec4_visitor::visit_instructions(const exec_list *list) +{ + foreach_list(node, list) { + ir_instruction *ir = (ir_instruction *)node; + + base_ir = ir; + ir->accept(this); + } +} + + +static int +type_size(const struct glsl_type *type) +{ + unsigned int i; + int size; + + switch (type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + if (type->is_matrix()) { + return type->matrix_columns; + } else { + /* Regardless of size of vector, it gets a vec4. This is bad + * packing for things like floats, but otherwise arrays become a + * mess. Hopefully a later pass over the code can pack scalars + * down if appropriate. + */ + return 1; + } + case GLSL_TYPE_ARRAY: + assert(type->length > 0); + return type_size(type->fields.array) * type->length; + case GLSL_TYPE_STRUCT: + size = 0; + for (i = 0; i < type->length; i++) { + size += type_size(type->fields.structure[i].type); + } + return size; + case GLSL_TYPE_SAMPLER: + /* Samplers take up one slot in UNIFORMS[], but they're baked in + * at link time. + */ + return 1; + default: + assert(0); + return 0; + } +} + +int +vec4_visitor::virtual_grf_alloc(int size) +{ + if (virtual_grf_array_size <= virtual_grf_count) { + if (virtual_grf_array_size == 0) + virtual_grf_array_size = 16; + else + virtual_grf_array_size *= 2; + virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, + virtual_grf_array_size); + } + virtual_grf_sizes[virtual_grf_count] = size; + return virtual_grf_count++; +} + +src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) +{ + init(); + + this->file = GRF; + this->reg = v->virtual_grf_alloc(type_size(type)); + + if (type->is_array() || type->is_record()) { + this->swizzle = BRW_SWIZZLE_NOOP; + } else { + this->swizzle = swizzle_for_size(type->vector_elements); + } + + this->type = brw_type_for_base_type(type); +} + +dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) +{ + init(); + + this->file = GRF; + this->reg = v->virtual_grf_alloc(type_size(type)); + + if (type->is_array() || type->is_record()) { + this->writemask = WRITEMASK_XYZW; + } else { + this->writemask = (1 << type->vector_elements) - 1; + } + + this->type = brw_type_for_base_type(type); +} + +/* Our support for uniforms is piggy-backed on the struct + * gl_fragment_program, because that's where the values actually + * get stored, rather than in some global gl_shader_program uniform + * store. + */ +int +vec4_visitor::setup_uniform_values(int loc, const glsl_type *type) +{ + unsigned int offset = 0; + float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f; + + if (type->is_matrix()) { + const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, + type->vector_elements, + 1); + + for (unsigned int i = 0; i < type->matrix_columns; i++) { + offset += setup_uniform_values(loc + offset, column); + } + + return offset; + } + + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_BOOL: + for (unsigned int i = 0; i < type->vector_elements; i++) { + int slot = this->uniforms * 4 + i; + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; + break; + case GLSL_TYPE_UINT: + c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U; + break; + case GLSL_TYPE_INT: + c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I; + break; + case GLSL_TYPE_BOOL: + c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B; + break; + default: + assert(!"not reached"); + c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; + break; + } + c->prog_data.param[slot] = &values[i]; + } + + for (unsigned int i = type->vector_elements; i < 4; i++) { + c->prog_data.param_convert[this->uniforms * 4 + i] = + PARAM_CONVERT_ZERO; + c->prog_data.param[this->uniforms * 4 + i] = NULL; + } + + this->uniform_size[this->uniforms] = type->vector_elements; + this->uniforms++; + + return 1; + + case GLSL_TYPE_STRUCT: + for (unsigned int i = 0; i < type->length; i++) { + offset += setup_uniform_values(loc + offset, + type->fields.structure[i].type); + } + return offset; + + case GLSL_TYPE_ARRAY: + for (unsigned int i = 0; i < type->length; i++) { + offset += setup_uniform_values(loc + offset, type->fields.array); + } + return offset; + + case GLSL_TYPE_SAMPLER: + /* The sampler takes up a slot, but we don't use any values from it. */ + return 1; + + default: + assert(!"not reached"); + return 0; + } +} + +/* Our support for builtin uniforms is even scarier than non-builtin. + * It sits on top of the PROG_STATE_VAR parameters that are + * automatically updated from GL context state. + */ +void +vec4_visitor::setup_builtin_uniform_values(ir_variable *ir) +{ + const ir_state_slot *const slots = ir->state_slots; + assert(ir->state_slots != NULL); + + for (unsigned int i = 0; i < ir->num_state_slots; i++) { + /* This state reference has already been setup by ir_to_mesa, + * but we'll get the same index back here. We can reference + * ParameterValues directly, since unlike brw_fs.cpp, we never + * add new state references during compile. + */ + int index = _mesa_add_state_reference(this->vp->Base.Parameters, + (gl_state_index *)slots[i].tokens); + float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f; + + this->uniform_size[this->uniforms] = 0; + /* Add each of the unique swizzled channels of the element. + * This will end up matching the size of the glsl_type of this field. + */ + int last_swiz = -1; + for (unsigned int j = 0; j < 4; j++) { + int swiz = GET_SWZ(slots[i].swizzle, j); + last_swiz = swiz; + + c->prog_data.param[this->uniforms * 4 + j] = &values[swiz]; + c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT; + if (swiz <= last_swiz) + this->uniform_size[this->uniforms]++; + } + this->uniforms++; + } +} + +dst_reg * +vec4_visitor::variable_storage(ir_variable *var) +{ + return (dst_reg *)hash_table_find(this->variable_ht, var); +} + +void +vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir) +{ + ir_expression *expr = ir->as_expression(); + + if (expr) { + src_reg op[2]; + vec4_instruction *inst; + + assert(expr->get_num_operands() <= 2); + for (unsigned int i = 0; i < expr->get_num_operands(); i++) { + assert(expr->operands[i]->type->is_scalar()); + + expr->operands[i]->accept(this); + op[i] = this->result; + } + + switch (expr->operation) { + case ir_unop_logic_not: + inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1)); + inst->conditional_mod = BRW_CONDITIONAL_Z; + break; + + case ir_binop_logic_xor: + inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_binop_logic_or: + inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_binop_logic_and: + inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_unop_f2b: + if (intel->gen >= 6) { + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f)); + } else { + inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]); + } + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_unop_i2b: + if (intel->gen >= 6) { + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); + } else { + inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]); + } + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_binop_greater: + case ir_binop_gequal: + case ir_binop_less: + case ir_binop_lequal: + case ir_binop_equal: + case ir_binop_all_equal: + case ir_binop_nequal: + case ir_binop_any_nequal: + inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); + inst->conditional_mod = + brw_conditional_for_comparison(expr->operation); + break; + + default: + assert(!"not reached"); + break; + } + return; + } + + ir->accept(this); + + if (intel->gen >= 6) { + vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(), + this->result, src_reg(1)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + } else { + vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + } +} + +/** + * Emit a gen6 IF statement with the comparison folded into the IF + * instruction. + */ +void +vec4_visitor::emit_if_gen6(ir_if *ir) +{ + ir_expression *expr = ir->condition->as_expression(); + + if (expr) { + src_reg op[2]; + vec4_instruction *inst; + dst_reg temp; + + assert(expr->get_num_operands() <= 2); + for (unsigned int i = 0; i < expr->get_num_operands(); i++) { + expr->operands[i]->accept(this); + op[i] = this->result; + } + + switch (expr->operation) { + case ir_unop_logic_not: + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_Z; + return; + + case ir_binop_logic_xor: + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_binop_logic_or: + temp = dst_reg(this, glsl_type::bool_type); + emit(BRW_OPCODE_OR, temp, op[0], op[1]); + inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_binop_logic_and: + temp = dst_reg(this, glsl_type::bool_type); + emit(BRW_OPCODE_AND, temp, op[0], op[1]); + inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_unop_f2b: + inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_unop_i2b: + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_binop_greater: + case ir_binop_gequal: + case ir_binop_less: + case ir_binop_lequal: + case ir_binop_equal: + case ir_binop_nequal: + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); + inst->conditional_mod = + brw_conditional_for_comparison(expr->operation); + return; + + case ir_binop_all_equal: + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_Z; + + inst = emit(BRW_OPCODE_IF); + inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; + return; + + case ir_binop_any_nequal: + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + inst = emit(BRW_OPCODE_IF); + inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; + return; + + case ir_unop_any: + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + inst = emit(BRW_OPCODE_IF); + inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; + return; + + default: + assert(!"not reached"); + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + } + return; + } + + ir->condition->accept(this); + + vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(), + this->result, src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; +} + +void +vec4_visitor::visit(ir_variable *ir) +{ + dst_reg *reg = NULL; + + if (variable_storage(ir)) + return; + + switch (ir->mode) { + case ir_var_in: + reg = new(mem_ctx) dst_reg(ATTR, ir->location); + break; + + case ir_var_out: + reg = new(mem_ctx) dst_reg(this, ir->type); + + for (int i = 0; i < type_size(ir->type); i++) { + output_reg[ir->location + i] = *reg; + output_reg[ir->location + i].reg_offset = i; + output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F; + } + break; + + case ir_var_auto: + case ir_var_temporary: + reg = new(mem_ctx) dst_reg(this, ir->type); + break; + + case ir_var_uniform: + reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms); + + if (!strncmp(ir->name, "gl_", 3)) { + setup_builtin_uniform_values(ir); + } else { + setup_uniform_values(ir->location, ir->type); + } + break; + + default: + assert(!"not reached"); + } + + reg->type = brw_type_for_base_type(ir->type); + hash_table_insert(this->variable_ht, reg, ir); +} + +void +vec4_visitor::visit(ir_loop *ir) +{ + dst_reg counter; + + /* We don't want debugging output to print the whole body of the + * loop as the annotation. + */ + this->base_ir = NULL; + + if (ir->counter != NULL) { + this->base_ir = ir->counter; + ir->counter->accept(this); + counter = *(variable_storage(ir->counter)); + + if (ir->from != NULL) { + this->base_ir = ir->from; + ir->from->accept(this); + + emit(BRW_OPCODE_MOV, counter, this->result); + } + } + + emit(BRW_OPCODE_DO); + + if (ir->to) { + this->base_ir = ir->to; + ir->to->accept(this); + + vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst_null_d(), + src_reg(counter), this->result); + inst->conditional_mod = brw_conditional_for_comparison(ir->cmp); + + inst = emit(BRW_OPCODE_BREAK); + inst->predicate = BRW_PREDICATE_NORMAL; + } + + visit_instructions(&ir->body_instructions); + + + if (ir->increment) { + this->base_ir = ir->increment; + ir->increment->accept(this); + emit(BRW_OPCODE_ADD, counter, src_reg(counter), this->result); + } + + emit(BRW_OPCODE_WHILE); +} + +void +vec4_visitor::visit(ir_loop_jump *ir) +{ + switch (ir->mode) { + case ir_loop_jump::jump_break: + emit(BRW_OPCODE_BREAK); + break; + case ir_loop_jump::jump_continue: + emit(BRW_OPCODE_CONTINUE); + break; + } +} + + +void +vec4_visitor::visit(ir_function_signature *ir) +{ + assert(0); + (void)ir; +} + +void +vec4_visitor::visit(ir_function *ir) +{ + /* Ignore function bodies other than main() -- we shouldn't see calls to + * them since they should all be inlined. + */ + if (strcmp(ir->name, "main") == 0) { + const ir_function_signature *sig; + exec_list empty; + + sig = ir->matching_signature(&empty); + + assert(sig); + + visit_instructions(&sig->body); + } +} + +GLboolean +vec4_visitor::try_emit_sat(ir_expression *ir) +{ + ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); + if (!sat_src) + return false; + + sat_src->accept(this); + src_reg src = this->result; + + this->result = src_reg(this, ir->type); + vec4_instruction *inst; + inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src); + inst->saturate = true; + + return true; +} + +void +vec4_visitor::emit_bool_comparison(unsigned int op, + dst_reg dst, src_reg src0, src_reg src1) +{ + /* original gen4 does destination conversion before comparison. */ + if (intel->gen < 5) + dst.type = src0.type; + + vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1); + inst->conditional_mod = brw_conditional_for_comparison(op); + + dst.type = BRW_REGISTER_TYPE_D; + emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1)); +} + +void +vec4_visitor::visit(ir_expression *ir) +{ + unsigned int operand; + src_reg op[Elements(ir->operands)]; + src_reg result_src; + dst_reg result_dst; + vec4_instruction *inst; + + if (try_emit_sat(ir)) + return; + + for (operand = 0; operand < ir->get_num_operands(); operand++) { + this->result.file = BAD_FILE; + ir->operands[operand]->accept(this); + if (this->result.file == BAD_FILE) { + printf("Failed to get tree for expression operand:\n"); + ir->operands[operand]->print(); + exit(1); + } + op[operand] = this->result; + + /* Matrix expression operands should have been broken down to vector + * operations already. + */ + assert(!ir->operands[operand]->type->is_matrix()); + } + + int vector_elements = ir->operands[0]->type->vector_elements; + if (ir->operands[1]) { + vector_elements = MAX2(vector_elements, + ir->operands[1]->type->vector_elements); + } + + this->result.file = BAD_FILE; + + /* Storage for our result. Ideally for an assignment we'd be using + * the actual storage for the result here, instead. + */ + result_src = src_reg(this, ir->type); + /* convenience for the emit functions below. */ + result_dst = dst_reg(result_src); + /* If nothing special happens, this is the result. */ + this->result = result_src; + /* Limit writes to the channels that will be used by result_src later. + * This does limit this temp's use as a temporary for multi-instruction + * sequences. + */ + result_dst.writemask = (1 << ir->type->vector_elements) - 1; + + switch (ir->operation) { + case ir_unop_logic_not: + /* Note that BRW_OPCODE_NOT is not appropriate here, since it is + * ones complement of the whole register, not just bit 0. + */ + emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1)); + break; + case ir_unop_neg: + op[0].negate = !op[0].negate; + this->result = op[0]; + break; + case ir_unop_abs: + op[0].abs = true; + op[0].negate = false; + this->result = op[0]; + break; + + case ir_unop_sign: + emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f)); + + inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); + inst->conditional_mod = BRW_CONDITIONAL_G; + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f)); + inst->predicate = BRW_PREDICATE_NORMAL; + + inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); + inst->conditional_mod = BRW_CONDITIONAL_L; + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f)); + inst->predicate = BRW_PREDICATE_NORMAL; + + break; + + case ir_unop_rcp: + emit_math(SHADER_OPCODE_RCP, result_dst, op[0]); + break; + + case ir_unop_exp2: + emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]); + break; + case ir_unop_log2: + emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]); + break; + case ir_unop_exp: + case ir_unop_log: + assert(!"not reached: should be handled by ir_explog_to_explog2"); + break; + case ir_unop_sin: + case ir_unop_sin_reduced: + emit_math(SHADER_OPCODE_SIN, result_dst, op[0]); + break; + case ir_unop_cos: + case ir_unop_cos_reduced: + emit_math(SHADER_OPCODE_COS, result_dst, op[0]); + break; + + case ir_unop_dFdx: + case ir_unop_dFdy: + assert(!"derivatives not valid in vertex shader"); + break; + + case ir_unop_noise: + assert(!"not reached: should be handled by lower_noise"); + break; + + case ir_binop_add: + emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]); + break; + case ir_binop_sub: + assert(!"not reached: should be handled by ir_sub_to_add_neg"); + break; + + case ir_binop_mul: + if (ir->type->is_integer()) { + /* For integer multiplication, the MUL uses the low 16 bits + * of one of the operands (src0 on gen6, src1 on gen7). The + * MACH accumulates in the contribution of the upper 16 bits + * of that operand. + * + * FINISHME: Emit just the MUL if we know an operand is small + * enough. + */ + struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D); + + emit(BRW_OPCODE_MUL, acc, op[0], op[1]); + emit(BRW_OPCODE_MACH, dst_null_d(), op[0], op[1]); + emit(BRW_OPCODE_MOV, result_dst, src_reg(acc)); + } else { + emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]); + } + break; + case ir_binop_div: + assert(!"not reached: should be handled by ir_div_to_mul_rcp"); + case ir_binop_mod: + assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); + break; + + case ir_binop_less: + case ir_binop_greater: + case ir_binop_lequal: + case ir_binop_gequal: + case ir_binop_equal: + case ir_binop_nequal: { + dst_reg temp = result_dst; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); + inst->conditional_mod = brw_conditional_for_comparison(ir->operation); + emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1)); + break; + } + + case ir_binop_all_equal: + /* "==" operator producing a scalar boolean. */ + if (ir->operands[0]->type->is_vector() || + ir->operands[1]->type->is_vector()) { + inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_Z; + + emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); + inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; + } else { + dst_reg temp = result_dst; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_Z; + emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); + } + break; + case ir_binop_any_nequal: + /* "!=" operator producing a scalar boolean. */ + if (ir->operands[0]->type->is_vector() || + ir->operands[1]->type->is_vector()) { + inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); + inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; + } else { + dst_reg temp = result_dst; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); + } + break; + + case ir_unop_any: + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); + + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); + inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; + break; + + case ir_binop_logic_xor: + emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); + break; + + case ir_binop_logic_or: + emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); + break; + + case ir_binop_logic_and: + emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); + break; + + case ir_binop_dot: + assert(ir->operands[0]->type->is_vector()); + assert(ir->operands[0]->type == ir->operands[1]->type); + emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements); + break; + + case ir_unop_sqrt: + emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]); + break; + case ir_unop_rsq: + emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]); + break; + case ir_unop_i2f: + case ir_unop_i2u: + case ir_unop_u2i: + case ir_unop_u2f: + case ir_unop_b2f: + case ir_unop_b2i: + case ir_unop_f2i: + emit(BRW_OPCODE_MOV, result_dst, op[0]); + break; + case ir_unop_f2b: + case ir_unop_i2b: { + dst_reg temp = result_dst; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1)); + break; + } + + case ir_unop_trunc: + emit(BRW_OPCODE_RNDZ, result_dst, op[0]); + break; + case ir_unop_ceil: + op[0].negate = !op[0].negate; + inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); + this->result.negate = true; + break; + case ir_unop_floor: + inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); + break; + case ir_unop_fract: + inst = emit(BRW_OPCODE_FRC, result_dst, op[0]); + break; + case ir_unop_round_even: + emit(BRW_OPCODE_RNDE, result_dst, op[0]); + break; + + case ir_binop_min: + inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_L; + + inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); + inst->predicate = BRW_PREDICATE_NORMAL; + break; + case ir_binop_max: + inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_G; + + inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); + inst->predicate = BRW_PREDICATE_NORMAL; + break; + + case ir_binop_pow: + emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]); + break; + + case ir_unop_bit_not: + inst = emit(BRW_OPCODE_NOT, result_dst, op[0]); + break; + case ir_binop_bit_and: + inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); + break; + case ir_binop_bit_xor: + inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); + break; + case ir_binop_bit_or: + inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); + break; + + case ir_binop_lshift: + case ir_binop_rshift: + assert(!"GLSL 1.30 features unsupported"); + break; + + case ir_quadop_vector: + assert(!"not reached: should be handled by lower_quadop_vector"); + break; + } +} + + +void +vec4_visitor::visit(ir_swizzle *ir) +{ + src_reg src; + int i = 0; + int swizzle[4]; + + /* Note that this is only swizzles in expressions, not those on the left + * hand side of an assignment, which do write masking. See ir_assignment + * for that. + */ + + ir->val->accept(this); + src = this->result; + assert(src.file != BAD_FILE); + + for (i = 0; i < ir->type->vector_elements; i++) { + switch (i) { + case 0: + swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x); + break; + case 1: + swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y); + break; + case 2: + swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z); + break; + case 3: + swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w); + break; + } + } + for (; i < 4; i++) { + /* Replicate the last channel out. */ + swizzle[i] = swizzle[ir->type->vector_elements - 1]; + } + + src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); + + this->result = src; +} + +void +vec4_visitor::visit(ir_dereference_variable *ir) +{ + const struct glsl_type *type = ir->type; + dst_reg *reg = variable_storage(ir->var); + + if (!reg) { + fail("Failed to find variable storage for %s\n", ir->var->name); + this->result = src_reg(brw_null_reg()); + return; + } + + this->result = src_reg(*reg); + + if (type->is_scalar() || type->is_vector() || type->is_matrix()) + this->result.swizzle = swizzle_for_size(type->vector_elements); +} + +void +vec4_visitor::visit(ir_dereference_array *ir) +{ + ir_constant *constant_index; + src_reg src; + int element_size = type_size(ir->type); + + constant_index = ir->array_index->constant_expression_value(); + + ir->array->accept(this); + src = this->result; + + if (constant_index) { + src.reg_offset += constant_index->value.i[0] * element_size; + } else { + /* Variable index array dereference. It eats the "vec4" of the + * base of the array and an index that offsets the Mesa register + * index. + */ + ir->array_index->accept(this); + + src_reg index_reg; + + if (element_size == 1) { + index_reg = this->result; + } else { + index_reg = src_reg(this, glsl_type::int_type); + + emit(BRW_OPCODE_MUL, dst_reg(index_reg), + this->result, src_reg(element_size)); + } + + if (src.reladdr) { + src_reg temp = src_reg(this, glsl_type::int_type); + + emit(BRW_OPCODE_ADD, dst_reg(temp), *src.reladdr, index_reg); + + index_reg = temp; + } + + src.reladdr = ralloc(mem_ctx, src_reg); + memcpy(src.reladdr, &index_reg, sizeof(index_reg)); + } + + /* If the type is smaller than a vec4, replicate the last channel out. */ + if (ir->type->is_scalar() || ir->type->is_vector()) + src.swizzle = swizzle_for_size(ir->type->vector_elements); + else + src.swizzle = BRW_SWIZZLE_NOOP; + src.type = brw_type_for_base_type(ir->type); + + this->result = src; +} + +void +vec4_visitor::visit(ir_dereference_record *ir) +{ + unsigned int i; + const glsl_type *struct_type = ir->record->type; + int offset = 0; + + ir->record->accept(this); + + for (i = 0; i < struct_type->length; i++) { + if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) + break; + offset += type_size(struct_type->fields.structure[i].type); + } + + /* If the type is smaller than a vec4, replicate the last channel out. */ + if (ir->type->is_scalar() || ir->type->is_vector()) + this->result.swizzle = swizzle_for_size(ir->type->vector_elements); + else + this->result.swizzle = BRW_SWIZZLE_NOOP; + this->result.type = brw_type_for_base_type(ir->type); + + this->result.reg_offset += offset; +} + +/** + * We want to be careful in assignment setup to hit the actual storage + * instead of potentially using a temporary like we might with the + * ir_dereference handler. + */ +static dst_reg +get_assignment_lhs(ir_dereference *ir, vec4_visitor *v) +{ + /* The LHS must be a dereference. If the LHS is a variable indexed array + * access of a vector, it must be separated into a series conditional moves + * before reaching this point (see ir_vec_index_to_cond_assign). + */ + assert(ir->as_dereference()); + ir_dereference_array *deref_array = ir->as_dereference_array(); + if (deref_array) { + assert(!deref_array->array->type->is_vector()); + } + + /* Use the rvalue deref handler for the most part. We'll ignore + * swizzles in it and write swizzles using writemask, though. + */ + ir->accept(v); + return dst_reg(v->result); +} + +void +vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src, + const struct glsl_type *type, bool predicated) +{ + if (type->base_type == GLSL_TYPE_STRUCT) { + for (unsigned int i = 0; i < type->length; i++) { + emit_block_move(dst, src, type->fields.structure[i].type, predicated); + } + return; + } + + if (type->is_array()) { + for (unsigned int i = 0; i < type->length; i++) { + emit_block_move(dst, src, type->fields.array, predicated); + } + return; + } + + if (type->is_matrix()) { + const struct glsl_type *vec_type; + + vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, + type->vector_elements, 1); + + for (int i = 0; i < type->matrix_columns; i++) { + emit_block_move(dst, src, vec_type, predicated); + } + return; + } + + assert(type->is_scalar() || type->is_vector()); + + dst->type = brw_type_for_base_type(type); + src->type = dst->type; + + dst->writemask = (1 << type->vector_elements) - 1; + + /* Do we need to worry about swizzling a swizzle? */ + assert(src->swizzle = BRW_SWIZZLE_NOOP); + src->swizzle = swizzle_for_size(type->vector_elements); + + vec4_instruction *inst = emit(BRW_OPCODE_MOV, *dst, *src); + if (predicated) + inst->predicate = BRW_PREDICATE_NORMAL; + + dst->reg_offset++; + src->reg_offset++; +} + + +/* If the RHS processing resulted in an instruction generating a + * temporary value, and it would be easy to rewrite the instruction to + * generate its result right into the LHS instead, do so. This ends + * up reliably removing instructions where it can be tricky to do so + * later without real UD chain information. + */ +bool +vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir, + dst_reg dst, + src_reg src, + vec4_instruction *pre_rhs_inst, + vec4_instruction *last_rhs_inst) +{ + /* This could be supported, but it would take more smarts. */ + if (ir->condition) + return false; + + if (pre_rhs_inst == last_rhs_inst) + return false; /* No instructions generated to work with. */ + + /* Make sure the last instruction generated our source reg. */ + if (src.file != GRF || + src.file != last_rhs_inst->dst.file || + src.reg != last_rhs_inst->dst.reg || + src.reg_offset != last_rhs_inst->dst.reg_offset || + src.reladdr || + src.abs || + src.negate || + last_rhs_inst->predicate != BRW_PREDICATE_NONE) + return false; + + /* Check that that last instruction fully initialized the channels + * we want to use, in the order we want to use them. We could + * potentially reswizzle the operands of many instructions so that + * we could handle out of order channels, but don't yet. + */ + for (int i = 0; i < 4; i++) { + if (dst.writemask & (1 << i)) { + if (!(last_rhs_inst->dst.writemask & (1 << i))) + return false; + + if (BRW_GET_SWZ(src.swizzle, i) != i) + return false; + } + } + + /* Success! Rewrite the instruction. */ + last_rhs_inst->dst.file = dst.file; + last_rhs_inst->dst.reg = dst.reg; + last_rhs_inst->dst.reg_offset = dst.reg_offset; + last_rhs_inst->dst.reladdr = dst.reladdr; + last_rhs_inst->dst.writemask &= dst.writemask; + + return true; +} + +void +vec4_visitor::visit(ir_assignment *ir) +{ + dst_reg dst = get_assignment_lhs(ir->lhs, this); + + if (!ir->lhs->type->is_scalar() && + !ir->lhs->type->is_vector()) { + ir->rhs->accept(this); + src_reg src = this->result; + + if (ir->condition) { + emit_bool_to_cond_code(ir->condition); + } + + emit_block_move(&dst, &src, ir->rhs->type, ir->condition != NULL); + return; + } + + /* Now we're down to just a scalar/vector with writemasks. */ + int i; + + vec4_instruction *pre_rhs_inst, *last_rhs_inst; + pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail(); + + ir->rhs->accept(this); + + last_rhs_inst = (vec4_instruction *)this->instructions.get_tail(); + + src_reg src = this->result; + + int swizzles[4]; + int first_enabled_chan = 0; + int src_chan = 0; + + assert(ir->lhs->type->is_vector() || + ir->lhs->type->is_scalar()); + dst.writemask = ir->write_mask; + + for (int i = 0; i < 4; i++) { + if (dst.writemask & (1 << i)) { + first_enabled_chan = BRW_GET_SWZ(src.swizzle, i); + break; + } + } + + /* Swizzle a small RHS vector into the channels being written. + * + * glsl ir treats write_mask as dictating how many channels are + * present on the RHS while in our instructions we need to make + * those channels appear in the slots of the vec4 they're written to. + */ + for (int i = 0; i < 4; i++) { + if (dst.writemask & (1 << i)) + swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++); + else + swizzles[i] = first_enabled_chan; + } + src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], + swizzles[2], swizzles[3]); + + if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) { + return; + } + + if (ir->condition) { + emit_bool_to_cond_code(ir->condition); + } + + for (i = 0; i < type_size(ir->lhs->type); i++) { + vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src); + + if (ir->condition) + inst->predicate = BRW_PREDICATE_NORMAL; + + dst.reg_offset++; + src.reg_offset++; + } +} + +void +vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir) +{ + if (ir->type->base_type == GLSL_TYPE_STRUCT) { + foreach_list(node, &ir->components) { + ir_constant *field_value = (ir_constant *)node; + + emit_constant_values(dst, field_value); + } + return; + } + + if (ir->type->is_array()) { + for (unsigned int i = 0; i < ir->type->length; i++) { + emit_constant_values(dst, ir->array_elements[i]); + } + return; + } + + if (ir->type->is_matrix()) { + for (int i = 0; i < ir->type->matrix_columns; i++) { + for (int j = 0; j < ir->type->vector_elements; j++) { + dst->writemask = 1 << j; + dst->type = BRW_REGISTER_TYPE_F; + + emit(BRW_OPCODE_MOV, *dst, + src_reg(ir->value.f[i * ir->type->vector_elements + j])); + } + dst->reg_offset++; + } + return; + } + + for (int i = 0; i < ir->type->vector_elements; i++) { + dst->writemask = 1 << i; + dst->type = brw_type_for_base_type(ir->type); + + switch (ir->type->base_type) { + case GLSL_TYPE_FLOAT: + emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.f[i])); + break; + case GLSL_TYPE_INT: + emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.i[i])); + break; + case GLSL_TYPE_UINT: + emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.u[i])); + break; + case GLSL_TYPE_BOOL: + emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.b[i])); + break; + default: + assert(!"Non-float/uint/int/bool constant"); + break; + } + } + dst->reg_offset++; +} + +void +vec4_visitor::visit(ir_constant *ir) +{ + dst_reg dst = dst_reg(this, ir->type); + this->result = src_reg(dst); + + emit_constant_values(&dst, ir); +} + +void +vec4_visitor::visit(ir_call *ir) +{ + assert(!"not reached"); +} + +void +vec4_visitor::visit(ir_texture *ir) +{ + /* FINISHME: Implement vertex texturing. + * + * With 0 vertex samplers available, the linker will reject + * programs that do vertex texturing, but after our visitor has + * run. + */ +} + +void +vec4_visitor::visit(ir_return *ir) +{ + assert(!"not reached"); +} + +void +vec4_visitor::visit(ir_discard *ir) +{ + assert(!"not reached"); +} + +void +vec4_visitor::visit(ir_if *ir) +{ + /* Don't point the annotation at the if statement, because then it plus + * the then and else blocks get printed. + */ + this->base_ir = ir->condition; + + if (intel->gen == 6) { + emit_if_gen6(ir); + } else { + emit_bool_to_cond_code(ir->condition); + vec4_instruction *inst = emit(BRW_OPCODE_IF); + inst->predicate = BRW_PREDICATE_NORMAL; + } + + visit_instructions(&ir->then_instructions); + + if (!ir->else_instructions.is_empty()) { + this->base_ir = ir->condition; + emit(BRW_OPCODE_ELSE); + + visit_instructions(&ir->else_instructions); + } + + this->base_ir = ir->condition; + emit(BRW_OPCODE_ENDIF); +} + +int +vec4_visitor::emit_vue_header_gen4(int header_mrf) +{ + /* Get the position */ + src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]); + + /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */ + dst_reg ndc = dst_reg(this, glsl_type::vec4_type); + + current_annotation = "NDC"; + dst_reg ndc_w = ndc; + ndc_w.writemask = WRITEMASK_W; + src_reg pos_w = pos; + pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); + emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w); + + dst_reg ndc_xyz = ndc; + ndc_xyz.writemask = WRITEMASK_XYZ; + + emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w)); + + if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || + c->key.nr_userclip || brw->has_negative_rhw_bug) { + dst_reg header1 = dst_reg(this, glsl_type::uvec4_type); + GLuint i; + + emit(BRW_OPCODE_MOV, header1, 0u); + + if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { + assert(!"finishme: psiz"); + src_reg psiz; + + header1.writemask = WRITEMASK_W; + emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11); + emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8); + } + + for (i = 0; i < c->key.nr_userclip; i++) { + vec4_instruction *inst; + + inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()), + pos, src_reg(c->userplane[i])); + inst->conditional_mod = BRW_CONDITIONAL_L; + + emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i); + inst->predicate = BRW_PREDICATE_NORMAL; + } + + /* i965 clipping workaround: + * 1) Test for -ve rhw + * 2) If set, + * set ndc = (0,0,0,0) + * set ucp[6] = 1 + * + * Later, clipping will detect ucp[6] and ensure the primitive is + * clipped against all fixed planes. + */ + if (brw->has_negative_rhw_bug) { +#if 0 + /* FINISHME */ + brw_CMP(p, + vec8(brw_null_reg()), + BRW_CONDITIONAL_L, + brw_swizzle1(ndc, 3), + brw_imm_f(0)); + + brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6)); + brw_MOV(p, ndc, brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +#endif + } + + header1.writemask = WRITEMASK_XYZW; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1)); + } else { + emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++), + BRW_REGISTER_TYPE_UD), 0u); + } + + if (intel->gen == 5) { + /* There are 20 DWs (D0-D19) in VUE header on Ironlake: + * dword 0-3 (m1) of the header is indices, point width, clip flags. + * dword 4-7 (m2) is the ndc position (set above) + * dword 8-11 (m3) of the vertex header is the 4D space position + * dword 12-19 (m4,m5) of the vertex header is the user clip distance. + * m6 is a pad so that the vertex element data is aligned + * m7 is the first vertex data we fill. + */ + current_annotation = "NDC"; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); + + current_annotation = "gl_Position"; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); + + /* user clip distance. */ + header_mrf += 2; + + /* Pad so that vertex element data is aligned. */ + header_mrf++; + } else { + /* There are 8 dwords in VUE header pre-Ironlake: + * dword 0-3 (m1) is indices, point width, clip flags. + * dword 4-7 (m2) is ndc position (set above) + * + * dword 8-11 (m3) is the first vertex data. + */ + current_annotation = "NDC"; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); + + current_annotation = "gl_Position"; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); + } + + return header_mrf; +} + +int +vec4_visitor::emit_vue_header_gen6(int header_mrf) +{ + struct brw_reg reg; + + /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge: + * dword 0-3 (m2) of the header is indices, point width, clip flags. + * dword 4-7 (m3) is the 4D space position + * dword 8-15 (m4,m5) of the vertex header is the user clip distance if + * enabled. + * + * m4 or 6 is the first vertex element data we fill. + */ + + current_annotation = "indices, point width, clip flags"; + reg = brw_message_reg(header_mrf++); + emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)); + if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { + emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W), + src_reg(output_reg[VERT_RESULT_PSIZ])); + } + + current_annotation = "gl_Position"; + emit(BRW_OPCODE_MOV, + brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS])); + + current_annotation = "user clip distances"; + if (c->key.nr_userclip) { + for (int i = 0; i < c->key.nr_userclip; i++) { + struct brw_reg m; + if (i < 4) + m = brw_message_reg(header_mrf); + else + m = brw_message_reg(header_mrf + 1); + + emit(BRW_OPCODE_DP4, + dst_reg(brw_writemask(m, 1 << (i & 3))), + src_reg(c->userplane[i])); + } + header_mrf += 2; + } + + current_annotation = NULL; + + return header_mrf; +} + +static int +align_interleaved_urb_mlen(struct brw_context *brw, int mlen) +{ + struct intel_context *intel = &brw->intel; + + if (intel->gen >= 6) { + /* URB data written (does not include the message header reg) must + * be a multiple of 256 bits, or 2 VS registers. See vol5c.5, + * section 5.4.3.2.2: URB_INTERLEAVED. + * + * URB entries are allocated on a multiple of 1024 bits, so an + * extra 128 bits written here to make the end align to 256 is + * no problem. + */ + if ((mlen % 2) != 1) + mlen++; + } + + return mlen; +} + +/** + * Generates the VUE payload plus the 1 or 2 URB write instructions to + * complete the VS thread. + * + * The VUE layout is documented in Volume 2a. + */ +void +vec4_visitor::emit_urb_writes() +{ + /* MRF 0 is reserved for the debugger, so start with message header + * in MRF 1. + */ + int base_mrf = 1; + int mrf = base_mrf; + int urb_entry_size; + uint64_t outputs_remaining = c->prog_data.outputs_written; + /* In the process of generating our URB write message contents, we + * may need to unspill a register or load from an array. Those + * reads would use MRFs 14-15. + */ + int max_usable_mrf = 13; + + /* FINISHME: edgeflag */ + + /* First mrf is the g0-based message header containing URB handles and such, + * which is implied in VS_OPCODE_URB_WRITE. + */ + mrf++; + + if (intel->gen >= 6) { + mrf = emit_vue_header_gen6(mrf); + } else { + mrf = emit_vue_header_gen4(mrf); + } + + /* Set up the VUE data for the first URB write */ + int attr; + for (attr = 0; attr < VERT_RESULT_MAX; attr++) { + if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) + continue; + + outputs_remaining &= ~BITFIELD64_BIT(attr); + + /* This is set up in the VUE header. */ + if (attr == VERT_RESULT_HPOS) + continue; + + /* This is loaded into the VUE header, and thus doesn't occupy + * an attribute slot. + */ + if (attr == VERT_RESULT_PSIZ) + continue; + + vec4_instruction *inst = emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), + src_reg(output_reg[attr])); + + if ((attr == VERT_RESULT_COL0 || + attr == VERT_RESULT_COL1 || + attr == VERT_RESULT_BFC0 || + attr == VERT_RESULT_BFC1) && + c->key.clamp_vertex_color) { + inst->saturate = true; + } + + /* If this was MRF 15, we can't fit anything more into this URB + * WRITE. Note that base_mrf of 1 means that MRF 15 is an + * even-numbered amount of URB write data, which will meet + * gen6's requirements for length alignment. + */ + if (mrf > max_usable_mrf) { + attr++; + break; + } + } + + vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); + inst->base_mrf = base_mrf; + inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); + inst->eot = !outputs_remaining; + + urb_entry_size = mrf - base_mrf; + + /* Optional second URB write */ + if (outputs_remaining) { + mrf = base_mrf + 1; + + for (; attr < VERT_RESULT_MAX; attr++) { + if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) + continue; + + assert(mrf < max_usable_mrf); + + emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); + } + + inst = emit(VS_OPCODE_URB_WRITE); + inst->base_mrf = base_mrf; + inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); + inst->eot = true; + /* URB destination offset. In the previous write, we got MRFs + * 2-13 minus the one header MRF, so 12 regs. URB offset is in + * URB row increments, and each of our MRFs is half of one of + * those, since we're doing interleaved writes. + */ + inst->offset = (max_usable_mrf - base_mrf) / 2; + + urb_entry_size += mrf - base_mrf; + } + + if (intel->gen == 6) + c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8; + else + c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4; +} + +src_reg +vec4_visitor::get_scratch_offset(vec4_instruction *inst, + src_reg *reladdr, int reg_offset) +{ + /* Because we store the values to scratch interleaved like our + * vertex data, we need to scale the vec4 index by 2. + */ + int message_header_scale = 2; + + /* Pre-gen6, the message header uses byte offsets instead of vec4 + * (16-byte) offset units. + */ + if (intel->gen < 6) + message_header_scale *= 16; + + if (reladdr) { + src_reg index = src_reg(this, glsl_type::int_type); + + vec4_instruction *add = emit(BRW_OPCODE_ADD, + dst_reg(index), + *reladdr, + src_reg(reg_offset)); + /* Move our new instruction from the tail to its correct place. */ + add->remove(); + inst->insert_before(add); + + vec4_instruction *mul = emit(BRW_OPCODE_MUL, dst_reg(index), + index, src_reg(message_header_scale)); + mul->remove(); + inst->insert_before(mul); + + return index; + } else { + return src_reg(reg_offset * message_header_scale); + } +} + +/** + * Emits an instruction before @inst to load the value named by @orig_src + * from scratch space at @base_offset to @temp. + */ +void +vec4_visitor::emit_scratch_read(vec4_instruction *inst, + dst_reg temp, src_reg orig_src, + int base_offset) +{ + int reg_offset = base_offset + orig_src.reg_offset; + src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset); + + vec4_instruction *scratch_read_inst = emit(VS_OPCODE_SCRATCH_READ, + temp, index); + + scratch_read_inst->base_mrf = 14; + scratch_read_inst->mlen = 1; + /* Move our instruction from the tail to its correct place. */ + scratch_read_inst->remove(); + inst->insert_before(scratch_read_inst); +} + +/** + * Emits an instruction after @inst to store the value to be written + * to @orig_dst to scratch space at @base_offset, from @temp. + */ +void +vec4_visitor::emit_scratch_write(vec4_instruction *inst, + src_reg temp, dst_reg orig_dst, + int base_offset) +{ + int reg_offset = base_offset + orig_dst.reg_offset; + src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset); + + dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0), + orig_dst.writemask)); + vec4_instruction *scratch_write_inst = emit(VS_OPCODE_SCRATCH_WRITE, + dst, temp, index); + scratch_write_inst->base_mrf = 13; + scratch_write_inst->mlen = 2; + scratch_write_inst->predicate = inst->predicate; + /* Move our instruction from the tail to its correct place. */ + scratch_write_inst->remove(); + inst->insert_after(scratch_write_inst); +} + +/** + * We can't generally support array access in GRF space, because a + * single instruction's destination can only span 2 contiguous + * registers. So, we send all GRF arrays that get variable index + * access to scratch space. + */ +void +vec4_visitor::move_grf_array_access_to_scratch() +{ + int scratch_loc[this->virtual_grf_count]; + + for (int i = 0; i < this->virtual_grf_count; i++) { + scratch_loc[i] = -1; + } + + /* First, calculate the set of virtual GRFs that need to be punted + * to scratch due to having any array access on them, and where in + * scratch. + */ + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + if (inst->dst.file == GRF && inst->dst.reladdr && + scratch_loc[inst->dst.reg] == -1) { + scratch_loc[inst->dst.reg] = c->last_scratch; + c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4; + } + + for (int i = 0 ; i < 3; i++) { + src_reg *src = &inst->src[i]; + + if (src->file == GRF && src->reladdr && + scratch_loc[src->reg] == -1) { + scratch_loc[src->reg] = c->last_scratch; + c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4; + } + } + } + + /* Now, for anything that will be accessed through scratch, rewrite + * it to load/store. Note that this is a _safe list walk, because + * we may generate a new scratch_write instruction after the one + * we're processing. + */ + foreach_list_safe(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + /* Set up the annotation tracking for new generated instructions. */ + base_ir = inst->ir; + current_annotation = inst->annotation; + + if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) { + src_reg temp = src_reg(this, glsl_type::vec4_type); + + emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]); + + inst->dst.file = temp.file; + inst->dst.reg = temp.reg; + inst->dst.reg_offset = temp.reg_offset; + inst->dst.reladdr = NULL; + } + + for (int i = 0 ; i < 3; i++) { + if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1) + continue; + + dst_reg temp = dst_reg(this, glsl_type::vec4_type); + + emit_scratch_read(inst, temp, inst->src[i], + scratch_loc[inst->src[i].reg]); + + inst->src[i].file = temp.file; + inst->src[i].reg = temp.reg; + inst->src[i].reg_offset = temp.reg_offset; + inst->src[i].reladdr = NULL; + } + } +} + + +vec4_visitor::vec4_visitor(struct brw_vs_compile *c, + struct gl_shader_program *prog, + struct brw_shader *shader) +{ + this->c = c; + this->p = &c->func; + this->brw = p->brw; + this->intel = &brw->intel; + this->ctx = &intel->ctx; + this->prog = prog; + this->shader = shader; + + this->mem_ctx = ralloc_context(NULL); + this->failed = false; + + this->base_ir = NULL; + this->current_annotation = NULL; + + this->c = c; + this->vp = prog->VertexProgram; + this->prog_data = &c->prog_data; + + this->variable_ht = hash_table_ctor(0, + hash_table_pointer_hash, + hash_table_pointer_compare); + + this->virtual_grf_def = NULL; + this->virtual_grf_use = NULL; + this->virtual_grf_sizes = NULL; + this->virtual_grf_count = 0; + this->virtual_grf_array_size = 0; + this->live_intervals_valid = false; + + this->uniforms = 0; + + this->variable_ht = hash_table_ctor(0, + hash_table_pointer_hash, + hash_table_pointer_compare); +} + +vec4_visitor::~vec4_visitor() +{ + ralloc_free(this->mem_ctx); + hash_table_dtor(this->variable_ht); +} + + +void +vec4_visitor::fail(const char *format, ...) +{ + va_list va; + char *msg; + + if (failed) + return; + + failed = true; + + va_start(va, format); + msg = ralloc_vasprintf(mem_ctx, format, va); + va_end(va); + msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg); + + this->fail_msg = msg; + + if (INTEL_DEBUG & DEBUG_VS) { + fprintf(stderr, "%s", msg); + } +} + +} /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index a9ad5311fe3..3373e707d98 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -30,6 +30,7 @@ */ +#include "main/compiler.h" #include "brw_context.h" #include "brw_vs.h" #include "brw_util.h" @@ -39,17 +40,21 @@ #include "../glsl/ralloc.h" -static void do_vs_prog( struct brw_context *brw, - struct brw_vertex_program *vp, - struct brw_vs_prog_key *key ) +static bool +do_vs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_vertex_program *vp, + struct brw_vs_prog_key *key) { struct gl_context *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; GLuint program_size; const GLuint *program; struct brw_vs_compile c; void *mem_ctx; int aux_size; int i; + static int new_vs = -1; memset(&c, 0, sizeof(c)); memcpy(&c.key, key, sizeof(*key)); @@ -85,7 +90,25 @@ static void do_vs_prog( struct brw_context *brw, /* Emit GEN4 code. */ - brw_vs_emit(&c); + if (new_vs == -1) + new_vs = getenv("INTEL_NEW_VS") != NULL; + + if (new_vs && prog) { + if (!brw_vs_emit(prog, &c)) { + ralloc_free(mem_ctx); + return false; + } + } else { + brw_old_vs_emit(&c); + } + + /* Scratch space is used for register spilling */ + if (c.last_scratch) { + c.prog_data.total_scratch = brw_get_scratch_size(c.last_scratch); + + brw_get_scratch_bo(intel, &brw->vs.scratch_bo, + c.prog_data.total_scratch * brw->vs_max_threads); + } /* get the program */ @@ -111,6 +134,8 @@ static void do_vs_prog( struct brw_context *brw, &c.prog_data, aux_size, &brw->vs.prog_offset, &brw->vs.prog_data); ralloc_free(mem_ctx); + + return true; } @@ -155,13 +180,15 @@ static void brw_upload_vs_prog(struct brw_context *brw) if (!brw_search_cache(&brw->cache, BRW_VS_PROG, &key, sizeof(key), &brw->vs.prog_offset, &brw->vs.prog_data)) { - do_vs_prog(brw, vp, &key); + bool success = do_vs_prog(brw, ctx->Shader.CurrentVertexProgram, + vp, &key); + + assert(success); } brw->vs.constant_map = ((int8_t *)brw->vs.prog_data + sizeof(*brw->vs.prog_data)); } - /* See brw_vs.c: */ const struct brw_tracked_state brw_vs_prog = { @@ -174,3 +201,30 @@ const struct brw_tracked_state brw_vs_prog = { }, .prepare = brw_upload_vs_prog }; + +bool +brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_vs_prog_key key; + struct gl_vertex_program *vp = prog->VertexProgram; + struct brw_vertex_program *bvp = brw_vertex_program(vp); + uint32_t old_prog_offset = brw->vs.prog_offset; + struct brw_vs_prog_data *old_prog_data = brw->vs.prog_data; + bool success; + + if (!vp) + return true; + + memset(&key, 0, sizeof(key)); + + key.program_string_id = bvp->id; + key.clamp_vertex_color = true; + + success = do_vs_prog(brw, prog, bvp, &key); + + brw->vs.prog_offset = old_prog_offset; + brw->vs.prog_data = old_prog_data; + + return success; +} diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 432994a8534..beccb381ee2 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -66,6 +66,7 @@ struct brw_vs_compile { GLuint first_output; GLuint nr_outputs; GLuint first_overflow_output; /**< VERT_ATTRIB_x */ + GLuint last_scratch; GLuint first_tmp; GLuint last_tmp; @@ -92,6 +93,8 @@ struct brw_vs_compile { GLboolean needs_stack; }; -void brw_vs_emit( struct brw_vs_compile *c ); +bool brw_vs_emit(struct gl_shader_program *prog, struct brw_vs_compile *c); +void brw_old_vs_emit(struct brw_vs_compile *c); +bool brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog); #endif diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c index 9fdfebe9f76..47cc0a7da7a 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_constval.c +++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c @@ -194,19 +194,11 @@ static void calc_wm_input_sizes( struct brw_context *brw ) /* BRW_NEW_VERTEX_PROGRAM */ const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); - /* BRW_NEW_FRAGMENT_PROGRAM */ - struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram; /* BRW_NEW_INPUT_DIMENSIONS */ struct tracker t; GLuint insn; GLuint i; - /* If we're going to go through brw_fs.cpp, we don't end up using - * brw->wm.input_size_masks. - */ - if (prog && prog->_LinkedShaders[MESA_SHADER_FRAGMENT]) - return; - memset(&t, 0, sizeof(t)); /* _NEW_LIGHT */ @@ -246,9 +238,7 @@ static void calc_wm_input_sizes( struct brw_context *brw ) const struct brw_tracked_state brw_wm_input_sizes = { .dirty = { .mesa = _NEW_LIGHT, - .brw = (BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_VERTEX_PROGRAM | - BRW_NEW_INPUT_DIMENSIONS), + .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_INPUT_DIMENSIONS, .cache = 0 }, .prepare = calc_wm_input_sizes diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 9d733344a26..bfee811e13d 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1096,31 +1096,6 @@ static void emit_lrp_noalias(struct brw_vs_compile *c, brw_MAC(p, dst, arg0, arg1); } -/** 3 or 4-component vector normalization */ -static void emit_nrm( struct brw_vs_compile *c, - struct brw_reg dst, - struct brw_reg arg0, - int num_comps) -{ - struct brw_compile *p = &c->func; - struct brw_reg tmp = get_tmp(c); - - /* tmp = dot(arg0, arg0) */ - if (num_comps == 3) - brw_DP3(p, tmp, arg0, arg0); - else - brw_DP4(p, tmp, arg0, arg0); - - /* tmp = 1 / sqrt(tmp) */ - emit_math1(c, BRW_MATH_FUNCTION_RSQ, tmp, tmp, BRW_MATH_PRECISION_FULL); - - /* dst = arg0 * tmp */ - brw_MUL(p, dst, arg0, tmp); - - release_tmp(c, tmp); -} - - static struct brw_reg get_constant(struct brw_vs_compile *c, const struct prog_instruction *inst, @@ -1359,7 +1334,7 @@ get_src_reg( struct brw_vs_compile *c, if (component >= 0) { params = c->vp->program.Base.Parameters; - f = params->ParameterValues[src->Index][component]; + f = params->ParameterValues[src->Index][component].f; if (src->Abs) f = fabs(f); @@ -1821,6 +1796,9 @@ accumulator_contains(struct brw_vs_compile *c, struct brw_reg val) if (val.address_mode != BRW_ADDRESS_DIRECT) return GL_FALSE; + if (val.negate || val.abs) + return GL_FALSE; + switch (prev_insn->header.opcode) { case BRW_OPCODE_MOV: case BRW_OPCODE_MAC: @@ -1900,7 +1878,7 @@ brw_vs_rescale_gl_fixed(struct brw_vs_compile *c) /* Emit the vertex program instructions here. */ -void brw_vs_emit(struct brw_vs_compile *c ) +void brw_old_vs_emit(struct brw_vs_compile *c ) { #define MAX_IF_DEPTH 32 #define MAX_LOOP_DEPTH 32 @@ -1980,9 +1958,22 @@ void brw_vs_emit(struct brw_vs_compile *c ) const struct prog_src_register *src = &inst->SrcReg[i]; index = src->Index; file = src->File; - if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) - args[i] = c->output_regs[index].reg; - else + if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) { + /* Can't just make get_arg "do the right thing" here because + * other callers of get_arg and get_src_reg don't expect any + * special behavior for the c->output_regs[index].used_in_src + * case. + */ + args[i] = c->output_regs[index].reg; + args[i].dw1.bits.swizzle = + BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0), + GET_SWZ(src->Swizzle, 1), + GET_SWZ(src->Swizzle, 2), + GET_SWZ(src->Swizzle, 3)); + + /* Note this is ok for non-swizzle ARB_vp instructions */ + args[i].negate = src->Negate ? 1 : 0; + } else args[i] = get_arg(c, inst, i); } @@ -1993,7 +1984,11 @@ void brw_vs_emit(struct brw_vs_compile *c ) index = inst->DstReg.Index; file = inst->DstReg.File; if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) - dst = c->output_regs[index].reg; + /* Can't just make get_dst "do the right thing" here because other + * callers of get_dst don't expect any special behavior for the + * c->output_regs[index].used_in_src case. + */ + dst = brw_writemask(c->output_regs[index].reg, inst->DstReg.WriteMask); else dst = get_dst(c, inst->DstReg); @@ -2025,12 +2020,6 @@ void brw_vs_emit(struct brw_vs_compile *c ) case OPCODE_DPH: brw_DPH(p, dst, args[0], args[1]); break; - case OPCODE_NRM3: - emit_nrm(c, dst, args[0], 3); - break; - case OPCODE_NRM4: - emit_nrm(c, dst, args[0], 4); - break; case OPCODE_DST: unalias2(c, dst, args[0], args[1], emit_dst_noalias); break; diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index fc4373ab311..29b3e47ab0c 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -77,6 +77,16 @@ brw_prepare_vs_unit(struct brw_context *brw) else vs->thread1.binding_table_entry_count = brw->vs.nr_surfaces; + if (brw->vs.prog_data->total_scratch != 0) { + vs->thread2.scratch_space_base_pointer = + brw->vs.scratch_bo->offset >> 10; /* reloc */ + vs->thread2.per_thread_scratch_space = + ffs(brw->vs.prog_data->total_scratch) - 11; + } else { + vs->thread2.scratch_space_base_pointer = 0; + vs->thread2.per_thread_scratch_space = 0; + } + vs->thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length; vs->thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length; vs->thread3.dispatch_grf_start_reg = 1; diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 55dbd4fa8b0..40360b23fff 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -213,6 +213,7 @@ static void brw_new_batch( struct intel_context *intel ) brw->state_batch_count = 0; brw->vb.nr_current_buffers = 0; + brw->ib.type = -1; /* Mark that the current program cache BO has been used by the GPU. * It will be reallocated if we need to put new programs in for the diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index b0dfdd536aa..e76832515fe 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -206,10 +206,6 @@ bool do_wm_prog(struct brw_context *brw, */ return false; } - c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN); - c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN); - c->vreg = rzalloc_array(c, struct brw_wm_value, BRW_WM_MAX_VREG); - c->refs = rzalloc_array(c, struct brw_wm_ref, BRW_WM_MAX_REF); } else { void *instruction = c->instruction; void *prog_instructions = c->prog_instructions; @@ -232,6 +228,13 @@ bool do_wm_prog(struct brw_context *brw, if (!brw_wm_fs_emit(brw, c, prog)) return false; } else { + if (!c->instruction) { + c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN); + c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN); + c->vreg = rzalloc_array(c, struct brw_wm_value, BRW_WM_MAX_VREG); + c->refs = rzalloc_array(c, struct brw_wm_ref, BRW_WM_MAX_REF); + } + /* Fallback for fixed function and ARB_fp shaders. */ c->dispatch_width = 16; brw_wm_payload_setup(brw, c); @@ -241,29 +244,10 @@ bool do_wm_prog(struct brw_context *brw, /* Scratch space is used for register spilling */ if (c->last_scratch) { - uint32_t total_scratch; - - /* Per-thread scratch space is power-of-two sized. */ - for (c->prog_data.total_scratch = 1024; - c->prog_data.total_scratch <= c->last_scratch; - c->prog_data.total_scratch *= 2) { - /* empty */ - } - total_scratch = c->prog_data.total_scratch * brw->wm_max_threads; + c->prog_data.total_scratch = brw_get_scratch_size(c->last_scratch); - if (brw->wm.scratch_bo && total_scratch > brw->wm.scratch_bo->size) { - drm_intel_bo_unreference(brw->wm.scratch_bo); - brw->wm.scratch_bo = NULL; - } - if (brw->wm.scratch_bo == NULL) { - brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr, - "wm scratch", - total_scratch, - 4096); - } - } - else { - c->prog_data.total_scratch = 0; + brw_get_scratch_bo(intel, &brw->wm.scratch_bo, + c->prog_data.total_scratch * brw->wm_max_threads); } if (unlikely(INTEL_DEBUG & DEBUG_WM)) diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index f61757a8cac..6ea4a7d6e50 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -1094,9 +1094,16 @@ void emit_tex(struct brw_wm_compile *c, if (intel->gen < 5 && c->dispatch_width == 8) nr_texcoords = 3; - /* For shadow comparisons, we have to supply u,v,r. */ - if (shadow) - nr_texcoords = 3; + if (shadow) { + if (intel->gen < 7) { + /* For shadow comparisons, we have to supply u,v,r. */ + nr_texcoords = 3; + } else { + /* On Ivybridge, the shadow comparitor comes first. Just load it. */ + brw_MOV(p, brw_message_reg(cur_mrf), arg[2]); + cur_mrf += mrf_per_channel; + } + } /* Emit the texcoords. */ for (i = 0; i < nr_texcoords; i++) { @@ -1113,7 +1120,7 @@ void emit_tex(struct brw_wm_compile *c, } /* Fill in the shadow comparison reference value. */ - if (shadow) { + if (shadow && intel->gen < 7) { if (intel->gen >= 5) { /* Fill in the cube map array index value. */ brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 7cd3edad235..bd46bd8de43 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -535,15 +535,15 @@ static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c, GLfloat s3) { struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters; - GLfloat values[4]; + gl_constant_value values[4]; GLuint idx; GLuint swizzle; struct prog_src_register reg; - values[0] = s0; - values[1] = s1; - values[2] = s2; - values[3] = s3; + values[0].f = s0; + values[1].f = s1; + values[2].f = s2; + values[3].f = s3; idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle ); reg = src_reg(PROGRAM_STATE_VAR, idx); @@ -664,6 +664,8 @@ static void precalc_lit( struct brw_wm_compile *c, static void precalc_tex( struct brw_wm_compile *c, const struct prog_instruction *inst ) { + struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; struct prog_src_register coord; struct prog_dst_register tmpcoord = { 0 }; const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; @@ -727,7 +729,7 @@ static void precalc_tex( struct brw_wm_compile *c, release_temp(c, tmp0); release_temp(c, tmp1); } - else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) { + else if (intel->gen < 6 && inst->TexSrcTarget == TEXTURE_RECT_INDEX) { struct prog_src_register scale = search_or_add_param5( c, STATE_INTERNAL, diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c index f78bdc31866..ccf9dc2bc18 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -205,14 +205,14 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, case PROGRAM_CONSTANT: /* These are invarient: */ - ref = get_const_ref(c, &plist->ParameterValues[idx][component]); + ref = get_const_ref(c, &plist->ParameterValues[idx][component].f); break; case PROGRAM_STATE_VAR: case PROGRAM_UNIFORM: /* These may change from run to run: */ - ref = get_param_ref(c, &plist->ParameterValues[idx][component] ); + ref = get_param_ref(c, &plist->ParameterValues[idx][component].f ); break; default: diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 98146136703..6834ebad780 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -289,6 +289,13 @@ static void brw_update_sampler_state(struct brw_context *brw, sampler->ss1.max_lod = U_FIXED(CLAMP(gl_sampler->MaxLod, 0, 13), 6); sampler->ss1.min_lod = U_FIXED(CLAMP(gl_sampler->MinLod, 0, 13), 6); + /* On Gen6+, the sampler can handle non-normalized texture + * rectangle coordinates natively + */ + if (intel->gen >= 6 && texObj->Target == GL_TEXTURE_RECTANGLE) { + sampler->ss3.non_normalized_coord = 1; + } + upload_default_color(brw, gl_sampler, unit); if (intel->gen >= 6) { diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index fb4fb146f8d..ad909789d82 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -342,7 +342,7 @@ prepare_wm_pull_constants(struct brw_context *brw) constants = brw->wm.const_bo->virtual; for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) { constants[i] = convert_param(brw->wm.prog_data->pull_param_convert[i], - *brw->wm.prog_data->pull_param[i]); + brw->wm.prog_data->pull_param[i]); } drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo); diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index fb4cdbaadf9..b94121e8437 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -81,12 +81,21 @@ gen6_prepare_vs_push_constants(struct brw_context *brw) params_uploaded++; } - for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { - if (brw->vs.constant_map[i] != -1) { - memcpy(param + brw->vs.constant_map[i] * 4, - vp->program.Base.Parameters->ParameterValues[i], - 4 * sizeof(float)); - params_uploaded++; + if (brw->vs.prog_data->uses_new_param_layout) { + for (i = 0; i < brw->vs.prog_data->nr_params; i++) { + *param = convert_param(brw->vs.prog_data->param_convert[i], + brw->vs.prog_data->param[i]); + param++; + } + params_uploaded += brw->vs.prog_data->nr_params / 4; + } else { + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { + if (brw->vs.constant_map[i] != -1) { + memcpy(param + brw->vs.constant_map[i] * 4, + vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + params_uploaded++; + } } } @@ -151,7 +160,15 @@ upload_vs_state(struct brw_context *brw) OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | GEN6_VS_FLOATING_POINT_MODE_ALT | (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - OUT_BATCH(0); /* scratch space base offset */ + + if (brw->vs.prog_data->total_scratch) { + OUT_RELOC(brw->vs.scratch_bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + ffs(brw->vs.prog_data->total_scratch) - 11); + } else { + OUT_BATCH(0); + } + OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) | (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); @@ -160,6 +177,32 @@ upload_vs_state(struct brw_context *brw) GEN6_VS_STATISTICS_ENABLE | GEN6_VS_ENABLE); ADVANCE_BATCH(); + + /* Based on my reading of the simulator, the VS constants don't get + * pulled into the VS FF unit until an appropriate pipeline flush + * happens, and instead the 3DSTATE_CONSTANT_VS packet just adds + * references to them into a little FIFO. The flushes are common, + * but don't reliably happen between this and a 3DPRIMITIVE, causing + * the primitive to use the wrong constants. Then the FIFO + * containing the constant setup gets added to again on the next + * constants change, and eventually when a flush does happen the + * unit is overwhelmed by constant changes and dies. + * + * To avoid this, send a PIPE_CONTROL down the line that will + * update the unit immediately loading the constants. The flush + * type bits here were those set by the STATE_BASE_ADDRESS whose + * move in a82a43e8d99e1715dd11c9c091b5ab734079b6a6 triggered the + * bug reports that led to this workaround, and may be more than + * what is strictly required to avoid the issue. + */ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL); + OUT_BATCH(PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_INSTRUCTION_FLUSH | + PIPE_CONTROL_STATE_CACHE_INVALIDATE); + OUT_BATCH(0); /* address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); } const struct brw_tracked_state gen6_vs_state = { diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 185da9c355f..07e9995f53b 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -54,14 +54,14 @@ gen6_prepare_wm_push_constants(struct brw_context *brw) float *constants; unsigned int i; - constants = brw_state_batch(brw, AUB_TRACE_NO_TYPE, + constants = brw_state_batch(brw, AUB_TRACE_WM_CONSTANTS, brw->wm.prog_data->nr_params * sizeof(float), 32, &brw->wm.push_const_offset); for (i = 0; i < brw->wm.prog_data->nr_params; i++) { constants[i] = convert_param(brw->wm.prog_data->param_convert[i], - *brw->wm.prog_data->param[i]); + brw->wm.prog_data->param[i]); } if (0) { diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c b/src/mesa/drivers/dri/i965/gen7_sampler_state.c index e787c21f4d1..aee67c87472 100644 --- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c @@ -157,6 +157,13 @@ gen7_update_sampler_state(struct brw_context *brw, int unit, sampler->ss1.max_lod = U_FIXED(CLAMP(gl_sampler->MaxLod, 0, 13), 8); sampler->ss1.min_lod = U_FIXED(CLAMP(gl_sampler->MinLod, 0, 13), 8); + /* The sampler can handle non-normalized texture rectangle coordinates + * natively + */ + if (texObj->Target == GL_TEXTURE_RECTANGLE) { + sampler->ss3.non_normalized_coord = 1; + } + upload_default_color(brw, gl_sampler, unit); sampler->ss2.default_color_pointer = brw->wm.sdc_offset[unit] >> 5; diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index 0fad3d2fb68..f3cd5d15bf0 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -71,7 +71,15 @@ upload_vs_state(struct brw_context *brw) OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | GEN6_VS_FLOATING_POINT_MODE_ALT | (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - OUT_BATCH(0); /* scratch space base offset */ + + if (brw->vs.prog_data->total_scratch) { + OUT_RELOC(brw->vs.scratch_bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + ffs(brw->vs.prog_data->total_scratch) - 11); + } else { + OUT_BATCH(0); + } + OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) | (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index a102ca772b3..55a603e887a 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -58,7 +58,7 @@ gen7_prepare_wm_constants(struct brw_context *brw) for (i = 0; i < brw->wm.prog_data->nr_params; i++) { constants[i] = convert_param(brw->wm.prog_data->param_convert[i], - *brw->wm.prog_data->param[i]); + brw->wm.prog_data->param[i]); } if (0) { @@ -228,7 +228,13 @@ upload_ps_state(struct brw_context *brw) OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); OUT_BATCH(brw->wm.prog_offset); OUT_BATCH(dw2); - OUT_BATCH(0); /* scratch space base offset */ + if (brw->wm.prog_data->total_scratch) { + OUT_RELOC(brw->wm.scratch_bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + ffs(brw->wm.prog_data->total_scratch) - 11); + } else { + OUT_BATCH(0); + } OUT_BATCH(dw4); OUT_BATCH(dw5); OUT_BATCH(0); /* kernel 1 pointer */ diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index b61a2ffef19..db4343be10c 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -308,12 +308,29 @@ emit: * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. * - * XXX: There is also a workaround that would appear to apply to this - * workaround, but it doesn't appear to be necessary so far: + * And the workaround for these two requires this workaround first: * - * Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent + * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent * BEFORE the pipe-control with a post-sync op and no write-cache * flushes. + * + * And this last workaround is tricky because of the requirements on + * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM + * volume 2 part 1: + * + * "1 of the following must also be set: + * - Render Target Cache Flush Enable ([12] of DW1) + * - Depth Cache Flush Enable ([0] of DW1) + * - Stall at Pixel Scoreboard ([1] of DW1) + * - Depth Stall ([13] of DW1) + * - Post-Sync Operation ([13] of DW1) + * - Notify Enable ([8] of DW1)" + * + * The cache flushes require the workaround flush that triggered this + * one, so we can't use it. Depth stall would trigger the same. + * Post-sync nonzero is what triggered this second workaround, so we + * can't use that one either. Notify enable is IRQs, which aren't + * really our business. That leaves only stall at scoreboard. */ void intel_emit_post_sync_nonzero_flush(struct intel_context *intel) @@ -323,9 +340,17 @@ intel_emit_post_sync_nonzero_flush(struct intel_context *intel) BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_PIPE_CONTROL); + OUT_BATCH(PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_STALL_AT_SCOREBOARD); + OUT_BATCH(0); /* address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL); OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); OUT_RELOC(intel->batch.workaround_bo, - I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT, 0); + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); OUT_BATCH(0); /* write data */ ADVANCE_BATCH(); @@ -365,6 +390,7 @@ intel_batchbuffer_emit_mi_flush(struct intel_context *intel) OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH | PIPE_CONTROL_WRITE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_TC_FLUSH | PIPE_CONTROL_NO_WRITE); OUT_BATCH(0); /* write address */ OUT_BATCH(0); /* write data */ diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 30be1b9382f..b18dd2922d9 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -541,8 +541,8 @@ intel_set_teximage_alpha_to_one(struct gl_context *ctx, /* get dest x/y in destination texture */ intel_miptree_get_image_offset(intel_image->mt, - intel_image->level, - intel_image->face, + intel_image->base.Level, + intel_image->base.Face, 0, &image_x, &image_y); diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index 439d6fc8247..d908975fc87 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -41,8 +41,7 @@ #include "intel_regions.h" static GLboolean -intel_bufferobj_unmap(struct gl_context * ctx, - GLenum target, struct gl_buffer_object *obj); +intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj); /** Allocates a new drm_intel_bo to store the data for the buffer object. */ static void @@ -122,7 +121,7 @@ intel_bufferobj_free(struct gl_context * ctx, struct gl_buffer_object *obj) * (though it does if you call glDeleteBuffers) */ if (obj->Pointer) - intel_bufferobj_unmap(ctx, 0, obj); + intel_bufferobj_unmap(ctx, obj); free(intel_obj->sys_buffer); if (intel_obj->region) { @@ -203,7 +202,6 @@ intel_bufferobj_data(struct gl_context * ctx, */ static void intel_bufferobj_subdata(struct gl_context * ctx, - GLenum target, GLintptrARB offset, GLsizeiptrARB size, const GLvoid * data, struct gl_buffer_object *obj) @@ -276,82 +274,28 @@ intel_bufferobj_subdata(struct gl_context * ctx, */ static void intel_bufferobj_get_subdata(struct gl_context * ctx, - GLenum target, GLintptrARB offset, GLsizeiptrARB size, GLvoid * data, struct gl_buffer_object *obj) { struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + struct intel_context *intel = intel_context(ctx); assert(intel_obj); if (intel_obj->sys_buffer) memcpy(data, (char *)intel_obj->sys_buffer + offset, size); - else - drm_intel_bo_get_subdata(intel_obj->buffer, offset, size, data); -} - - - -/** - * Called via glMapBufferARB(). - */ -static void * -intel_bufferobj_map(struct gl_context * ctx, - GLenum target, - GLenum access, struct gl_buffer_object *obj) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_buffer_object *intel_obj = intel_buffer_object(obj); - GLboolean read_only = (access == GL_READ_ONLY_ARB); - GLboolean write_only = (access == GL_WRITE_ONLY_ARB); - - assert(intel_obj); - - if (intel_obj->sys_buffer) { - if (!read_only && intel_obj->source) { - release_buffer(intel_obj); - } - - if (!intel_obj->buffer || intel_obj->source) { - obj->Pointer = intel_obj->sys_buffer; - obj->Length = obj->Size; - obj->Offset = 0; - return obj->Pointer; + else { + if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) { + intel_batchbuffer_flush(intel); } - - free(intel_obj->sys_buffer); - intel_obj->sys_buffer = NULL; - } - - /* Flush any existing batchbuffer that might reference this data. */ - if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) - intel_flush(ctx); - - if (intel_obj->region) - intel_bufferobj_cow(intel, intel_obj); - - if (intel_obj->buffer == NULL) { - obj->Pointer = NULL; - return NULL; - } - - if (write_only) { - drm_intel_gem_bo_map_gtt(intel_obj->buffer); - intel_obj->mapped_gtt = GL_TRUE; - } else { - drm_intel_bo_map(intel_obj->buffer, !read_only); - intel_obj->mapped_gtt = GL_FALSE; + drm_intel_bo_get_subdata(intel_obj->buffer, offset, size, data); } +} - obj->Pointer = intel_obj->buffer->virtual; - obj->Length = obj->Size; - obj->Offset = 0; - return obj->Pointer; -} /** - * Called via glMapBufferRange(). + * Called via glMapBufferRange and glMapBuffer * * The goal of this extension is to allow apps to accumulate their rendering * at the same time as they accumulate their buffer object. Without it, @@ -368,12 +312,11 @@ intel_bufferobj_map(struct gl_context * ctx, */ static void * intel_bufferobj_map_range(struct gl_context * ctx, - GLenum target, GLintptr offset, GLsizeiptr length, + GLintptr offset, GLsizeiptr length, GLbitfield access, struct gl_buffer_object *obj) { struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); - GLboolean read_only = (access == GL_READ_ONLY_ARB); assert(intel_obj); @@ -385,6 +328,9 @@ intel_bufferobj_map_range(struct gl_context * ctx, obj->AccessFlags = access; if (intel_obj->sys_buffer) { + const bool read_only = + (access & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)) == GL_MAP_READ_BIT; + if (!read_only && intel_obj->source) release_buffer(intel_obj); @@ -468,7 +414,7 @@ intel_bufferobj_map_range(struct gl_context * ctx, * would defeat the point. */ static void -intel_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target, +intel_bufferobj_flush_mapped_range(struct gl_context *ctx, GLintptr offset, GLsizeiptr length, struct gl_buffer_object *obj) { @@ -502,8 +448,7 @@ intel_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target, * Called via glUnmapBuffer(). */ static GLboolean -intel_bufferobj_unmap(struct gl_context * ctx, - GLenum target, struct gl_buffer_object *obj) +intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj) { struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); @@ -758,23 +703,23 @@ intel_bufferobj_copy_subdata(struct gl_context *ctx, * not overlap. */ if (src == dst) { - char *ptr = intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER, - GL_READ_WRITE, dst); + char *ptr = intel_bufferobj_map_range(ctx, 0, dst->Size, + GL_MAP_READ_BIT, dst); memmove(ptr + write_offset, ptr + read_offset, size); - intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst); + intel_bufferobj_unmap(ctx, dst); } else { const char *src_ptr; char *dst_ptr; - src_ptr = intel_bufferobj_map(ctx, GL_COPY_READ_BUFFER, - GL_READ_ONLY, src); - dst_ptr = intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER, - GL_WRITE_ONLY, dst); + src_ptr = intel_bufferobj_map_range(ctx, 0, src->Size, + GL_MAP_READ_BIT, src); + dst_ptr = intel_bufferobj_map_range(ctx, 0, dst->Size, + GL_MAP_WRITE_BIT, dst); memcpy(dst_ptr + write_offset, src_ptr + read_offset, size); - intel_bufferobj_unmap(ctx, GL_COPY_READ_BUFFER, src); - intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst); + intel_bufferobj_unmap(ctx, src); + intel_bufferobj_unmap(ctx, dst); } return; } @@ -924,7 +869,6 @@ intelInitBufferObjectFuncs(struct dd_function_table *functions) functions->BufferData = intel_bufferobj_data; functions->BufferSubData = intel_bufferobj_subdata; functions->GetBufferSubData = intel_bufferobj_get_subdata; - functions->MapBuffer = intel_bufferobj_map; functions->MapBufferRange = intel_bufferobj_map_range; functions->FlushMappedBufferRange = intel_bufferobj_flush_mapped_range; functions->UnmapBuffer = intel_bufferobj_unmap; diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c index dfca03c14bf..76d33f9b37e 100644 --- a/src/mesa/drivers/dri/intel/intel_clear.c +++ b/src/mesa/drivers/dri/intel/intel_clear.c @@ -116,13 +116,13 @@ intelClear(struct gl_context *ctx, GLbitfield mask) } /* HW color buffers (front, back, aux, generic FBO, etc) */ - if (colorMask == ~0) { + if (intel->gen < 6 && colorMask == ~0) { /* clear all R,G,B,A */ blit_mask |= (mask & BUFFER_BITS_COLOR); } else { /* glColorMask in effect */ - tri_mask |= (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT)); + tri_mask |= (mask & BUFFER_BITS_COLOR); } /* Make sure we have up to date buffers before we start looking at @@ -143,6 +143,12 @@ intelClear(struct gl_context *ctx, GLbitfield mask) */ tri_mask |= BUFFER_BIT_STENCIL; } + else if (intel->has_separate_stencil && + stencilRegion->tiling == I915_TILING_NONE) { + /* The stencil buffer is actually W tiled, which the hardware + * cannot blit to. */ + tri_mask |= BUFFER_BIT_STENCIL; + } else { /* clearing all stencil bits, use blitting */ blit_mask |= BUFFER_BIT_STENCIL; @@ -182,7 +188,10 @@ intelClear(struct gl_context *ctx, GLbitfield mask) if (tri_mask) { debug_mask("tri", tri_mask); - _mesa_meta_Clear(&intel->ctx, tri_mask); + if (ctx->Extensions.ARB_fragment_shader) + _mesa_meta_glsl_Clear(&intel->ctx, tri_mask); + else + _mesa_meta_Clear(&intel->ctx, tri_mask); } } diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 2ba13632569..14342ef6246 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -1439,7 +1439,12 @@ intel_verify_dri2_has_hiz(struct intel_context *intel, assert(stencil_rb->Base.Format == MESA_FORMAT_S8); assert(depth_rb && depth_rb->Base.Format == MESA_FORMAT_X8_Z24); - if (stencil_rb->region->tiling == I915_TILING_Y) { + if (stencil_rb->region->tiling == I915_TILING_NONE) { + /* + * The stencil buffer is actually W tiled. The region's tiling is + * I915_TILING_NONE, however, because the GTT is incapable of W + * fencing. + */ intel->intelScreen->dri2_has_hiz = INTEL_DRI2_HAS_HIZ_TRUE; return; } else { @@ -1449,6 +1454,13 @@ intel_verify_dri2_has_hiz(struct intel_context *intel, * a combined depth/stencil buffer. Discard the hiz buffer too. */ intel->intelScreen->dri2_has_hiz = INTEL_DRI2_HAS_HIZ_FALSE; + if (intel->must_use_separate_stencil) { + _mesa_problem(&intel->ctx, + "intel_context requires separate stencil, but the " + "DRIscreen does not support it. You may need to " + "upgrade the Intel X driver to 2.16.0"); + abort(); + } /* 1. Discard depth and stencil renderbuffers. */ _mesa_remove_renderbuffer(fb, BUFFER_DEPTH); @@ -1527,7 +1539,7 @@ intel_verify_dri2_has_hiz(struct intel_context *intel, * Presently, however, no verification or clean up is necessary, and * execution should not reach here. If the framebuffer still has a hiz * region, then we have already set dri2_has_hiz to true after - * confirming above that the stencil buffer is Y tiled. + * confirming above that the stencil buffer is W tiled. */ assert(0); } diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 55bcc757873..754f9f202d1 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -173,6 +173,9 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer if (irb->Base.Format == MESA_FORMAT_S8) { /* + * The stencil buffer is W tiled. However, we request from the kernel a + * non-tiled buffer because the GTT is incapable of W fencing. + * * The stencil buffer has quirky pitch requirements. From Vol 2a, * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch": * The pitch must be set to 2x the value computed based on width, as @@ -180,14 +183,13 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer * To accomplish this, we resort to the nasty hack of doubling the drm * region's cpp and halving its height. * - * If we neglect to double the pitch, then drm_intel_gem_bo_map_gtt() - * maps the memory incorrectly. + * If we neglect to double the pitch, then render corruption occurs. */ irb->region = intel_region_alloc(intel->intelScreen, - I915_TILING_Y, + I915_TILING_NONE, cpp * 2, - width, - height / 2, + ALIGN(width, 64), + ALIGN((height + 1) / 2, 64), GL_TRUE); if (!irb->region) return false; @@ -594,17 +596,15 @@ intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb, struct intel_texture_image *intel_image, int zoffset) { - struct intel_mipmap_tree *mt = intel_image->mt; unsigned int dst_x, dst_y; /* compute offset of the particular 2D image within the texture region */ intel_miptree_get_image_offset(intel_image->mt, - intel_image->level, - intel_image->face, + intel_image->base.Level, + intel_image->base.Face, zoffset, &dst_x, &dst_y); - irb->draw_offset = (dst_y * mt->region->pitch + dst_x) * mt->cpp; irb->draw_x = dst_x; irb->draw_y = dst_y; } @@ -645,6 +645,22 @@ intel_renderbuffer_tile_offsets(struct intel_renderbuffer *irb, } } +#ifndef I915 +static bool +need_tile_offset_workaround(struct brw_context *brw, + struct intel_renderbuffer *irb) +{ + uint32_t tile_x, tile_y; + + if (brw->has_surface_tile_offset) + return false; + + intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y); + + return tile_x != 0 || tile_y != 0; +} +#endif + /** * Called by glFramebufferTexture[123]DEXT() (and other places) to * prepare for rendering into texture memory. This might be called @@ -698,8 +714,7 @@ intel_render_texture(struct gl_context * ctx, intel_image->used_as_render_target = GL_TRUE; #ifndef I915 - if (!brw_context(ctx)->has_surface_tile_offset && - (irb->draw_offset & 4095) != 0) { + if (need_tile_offset_workaround(brw_context(ctx), irb)) { /* Original gen4 hardware couldn't draw to a non-tile-aligned * destination in a miptree unless you actually setup your * renderbuffer as a miptree and used the fragile @@ -713,8 +728,8 @@ intel_render_texture(struct gl_context * ctx, new_mt = intel_miptree_create(intel, image->TexObject->Target, intel_image->base.TexFormat, - intel_image->level, - intel_image->level, + intel_image->base.Level, + intel_image->base.Level, intel_image->base.Width, intel_image->base.Height, intel_image->base.Depth, @@ -722,8 +737,8 @@ intel_render_texture(struct gl_context * ctx, intel_miptree_image_copy(intel, new_mt, - intel_image->face, - intel_image->level, + intel_image->base.Face, + intel_image->base.Level, old_mt); intel_miptree_release(intel, &intel_image->mt); diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h index f7f99a4f00c..2487994fde5 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.h +++ b/src/mesa/drivers/dri/intel/intel_fbo.h @@ -58,7 +58,6 @@ struct intel_renderbuffer /** \} */ - GLuint draw_offset; /**< Offset of drawing address within the region */ GLuint draw_x, draw_y; /**< Offset of drawing within the region */ }; diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index 4e711de1ce1..f36240d7f1d 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -227,7 +227,7 @@ intel_miptree_match_image(struct intel_mipmap_tree *mt, struct gl_texture_image *image) { struct intel_texture_image *intelImage = intel_texture_image(image); - GLuint level = intelImage->level; + GLuint level = intelImage->base.Level; /* Images with borders are never pulled into mipmap trees. */ if (image->Border) diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c index 86d0ef2d748..d9873a303ee 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c @@ -74,9 +74,9 @@ static const GLubyte *map_pbo( struct gl_context *ctx, return NULL; } - buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, - GL_READ_ONLY_ARB, - unpack->BufferObj); + buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size, + GL_MAP_READ_BIT, + unpack->BufferObj); if (!buf) { _mesa_error(ctx, GL_INVALID_OPERATION, "glBitmap(PBO is mapped)"); return NULL; @@ -292,8 +292,7 @@ out: if (_mesa_is_bufferobj(unpack->BufferObj)) { /* done with PBO so unmap it now */ - ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, - unpack->BufferObj); + ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj); } intel_check_front_buffer_rendering(intel); diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h index 5aa629150cf..a98a669af21 100644 --- a/src/mesa/drivers/dri/intel/intel_reg.h +++ b/src/mesa/drivers/dri/intel/intel_reg.h @@ -75,6 +75,7 @@ #define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4) #define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1 << 3) #define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1 << 2) +#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1) #define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) #define PIPE_CONTROL_PPGTT_WRITE (0 << 2) #define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2) diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h index b2013af1a29..9dd6a525566 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.h +++ b/src/mesa/drivers/dri/intel/intel_screen.h @@ -63,9 +63,12 @@ * x8_z24 and s8). * * Eventually, intel_update_renderbuffers() makes a DRI2 request for - * DRI2BufferStencil and DRI2BufferHiz. If the returned buffers are Y tiled, - * then we joyfully set intel_screen.dri2_has_hiz to true and continue as if - * nothing happend. + * DRI2BufferStencil and DRI2BufferHiz. If the stencil buffer's tiling is + * I915_TILING_NONE [1], then we joyfully set intel_screen.dri2_has_hiz to + * true and continue as if nothing happend. + * + * [1] The stencil buffer is actually W tiled. However, we request from the + * kernel a non-tiled buffer because the GTT is incapable of W fencing. * * If the buffers are X tiled, however, the handshake has failed and we must * clean up. diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index 153803fba09..2e1c80c4766 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -131,38 +131,84 @@ intel_set_span_functions(struct intel_context *intel, int miny = 0; \ int maxx = rb->Width; \ int maxy = rb->Height; \ - int stride = rb->RowStride; \ - uint8_t *buf = rb->Data; \ + \ + /* \ + * Here we ignore rb->Data and rb->RowStride as set by \ + * intelSpanRenderStart. Since intel_offset_S8 decodes the W tile \ + * manually, the region's *real* base address and stride is \ + * required. \ + */ \ + struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ + uint8_t *buf = irb->region->buffer->virtual; \ + unsigned stride = irb->region->pitch; \ + unsigned height = 2 * irb->region->height; \ + bool flip = rb->Name == 0; \ + int y_scale = flip ? -1 : 1; \ + int y_bias = flip ? (height - 1) : 0; \ -/* Don't flip y. */ #undef Y_FLIP -#define Y_FLIP(y) y +#define Y_FLIP(y) (y_scale * (y) + y_bias) /** * \brief Get pointer offset into stencil buffer. * - * The stencil buffer interleaves two rows into one. Yay for crazy hardware. - * The table below demonstrates how the pointer arithmetic behaves for a buffer - * with positive stride (s=stride). - * - * x | y | byte offset - * -------------------------- - * 0 | 0 | 0 - * 0 | 1 | 1 - * 1 | 0 | 2 - * 1 | 1 | 3 - * ... | ... | ... - * 0 | 2 | s - * 0 | 3 | s + 1 - * 1 | 2 | s + 2 - * 1 | 3 | s + 3 + * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we + * must decode the tile's layout in software. * + * See + * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile + * Format. + * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm * + * Even though the returned offset is always positive, the return type is + * signed due to + * commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137 + * mesa: Fix return type of _mesa_get_format_bytes() (#37351) */ static inline intptr_t -intel_offset_S8(int stride, GLint x, GLint y) +intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y) { - return 2 * ((y / 2) * stride + x) + y % 2; + uint32_t tile_size = 4096; + uint32_t tile_width = 64; + uint32_t tile_height = 64; + uint32_t row_size = 64 * stride; + + uint32_t tile_x = x / tile_width; + uint32_t tile_y = y / tile_height; + + /* The byte's address relative to the tile's base addres. */ + uint32_t byte_x = x % tile_width; + uint32_t byte_y = y % tile_height; + + uintptr_t u = tile_y * row_size + + tile_x * tile_size + + 512 * (byte_x / 8) + + 64 * (byte_y / 8) + + 32 * ((byte_y / 4) % 2) + + 16 * ((byte_x / 4) % 2) + + 8 * ((byte_y / 2) % 2) + + 4 * ((byte_x / 2) % 2) + + 2 * (byte_y % 2) + + 1 * (byte_x % 2); + + /* + * Errata for Gen5: + * + * An additional offset is needed which is not documented in the PRM. + * + * if ((byte_x / 8) % 2 == 1) { + * if ((byte_y / 8) % 2) == 0) { + * u += 64; + * } else { + * u -= 64; + * } + * } + * + * The offset is expressed more tersely as + * u += ((int) x & 0x8) * (8 - (((int) y & 0x8) << 1)); + */ + + return u; } #define WRITE_STENCIL(x, y, src) buf[intel_offset_S8(stride, x, y)] = src; diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c index 21c4a1dddba..ee0cd252375 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.c +++ b/src/mesa/drivers/dri/intel/intel_tex.c @@ -95,17 +95,12 @@ intelGenerateMipmap(struct gl_context *ctx, GLenum target, if (!_mesa_is_format_compressed(first_image->TexFormat)) { GLuint nr_faces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; GLuint face, i; - /* Update the level information in our private data in the new images, - * since it didn't get set as part of a normal TexImage path. - */ for (face = 0; face < nr_faces; face++) { for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) { struct intel_texture_image *intelImage = intel_texture_image(texObj->Image[face][i]); if (!intelImage) break; - intelImage->level = i; - intelImage->face = face; /* Unreference the miptree to signal that the new Data is a * bare pointer from mesa. */ diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index 1a3643da593..600bd1251e0 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -118,8 +118,8 @@ intel_copy_texsubimage(struct intel_context *intel, /* get dest x/y in destination texture */ intel_miptree_get_image_offset(intelImage->mt, - intelImage->level, - intelImage->face, + intelImage->base.Level, + intelImage->base.Face, 0, &image_x, &image_y); @@ -164,101 +164,6 @@ intel_copy_texsubimage(struct intel_context *intel, static void -intelCopyTexImage1D(struct gl_context * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLint border) -{ - struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - int srcx, srcy, dstx, dsty, height; - - if (border) - goto fail; - - /* Setup or redefine the texture object, mipmap tree and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage1D(ctx, target, level, internalFormat, - width, border, - GL_RGBA, CHAN_TYPE, NULL, - &ctx->DefaultPacking, texObj, texImage); - srcx = x; - srcy = y; - dstx = 0; - dsty = 0; - height = 1; - if (!_mesa_clip_copytexsubimage(ctx, - &dstx, &dsty, - &srcx, &srcy, - &width, &height)) - return; - - if (!intel_copy_texsubimage(intel_context(ctx), target, - intel_texture_image(texImage), - internalFormat, 0, 0, x, y, width, height)) - goto fail; - - return; - - fail: - fallback_debug("%s - fallback to swrast\n", __FUNCTION__); - _mesa_meta_CopyTexImage1D(ctx, target, level, internalFormat, x, y, - width, border); -} - - -static void -intelCopyTexImage2D(struct gl_context * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border) -{ - struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - int srcx, srcy, dstx, dsty; - - if (border) - goto fail; - - /* Setup or redefine the texture object, mipmap tree and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage2D(ctx, target, level, internalFormat, - width, height, border, - GL_RGBA, GL_UNSIGNED_BYTE, NULL, - &ctx->DefaultPacking, texObj, texImage); - - srcx = x; - srcy = y; - dstx = 0; - dsty = 0; - if (!_mesa_clip_copytexsubimage(ctx, - &dstx, &dsty, - &srcx, &srcy, - &width, &height)) - return; - - if (!intel_copy_texsubimage(intel_context(ctx), target, - intel_texture_image(texImage), - internalFormat, 0, 0, x, y, width, height)) - goto fail; - - return; - - fail: - fallback_debug("%s - fallback to swrast\n", __FUNCTION__); - _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y, - width, height, border); -} - - -static void intelCopyTexSubImage1D(struct gl_context * ctx, GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width) { @@ -312,8 +217,6 @@ intelCopyTexSubImage2D(struct gl_context * ctx, GLenum target, GLint level, void intelInitTextureCopyImageFuncs(struct dd_function_table *functions) { - functions->CopyTexImage1D = intelCopyTexImage1D; - functions->CopyTexImage2D = intelCopyTexImage2D; functions->CopyTexSubImage1D = intelCopyTexSubImage1D; functions->CopyTexSubImage2D = intelCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index 1f8b885bbec..4ee66847255 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -63,7 +63,7 @@ intel_miptree_create_for_teximage(struct intel_context *intel, if (intelImage->base.Border) return NULL; - if (intelImage->level > intelObj->base.BaseLevel && + if (intelImage->base.Level > intelObj->base.BaseLevel && (intelImage->base.Width == 1 || (intelObj->base.Target != GL_TEXTURE_1D && intelImage->base.Height == 1) || @@ -74,19 +74,19 @@ intel_miptree_create_for_teximage(struct intel_context *intel, * likely base level width/height/depth for a full mipmap stack * from this info, so just allocate this one level. */ - firstLevel = intelImage->level; - lastLevel = intelImage->level; + firstLevel = intelImage->base.Level; + lastLevel = intelImage->base.Level; } else { /* If this image disrespects BaseLevel, allocate from level zero. * Usually BaseLevel == 0, so it's unlikely to happen. */ - if (intelImage->level < intelObj->base.BaseLevel) + if (intelImage->base.Level < intelObj->base.BaseLevel) firstLevel = 0; else firstLevel = intelObj->base.BaseLevel; /* Figure out image dimensions at start level. */ - for (i = intelImage->level; i > firstLevel; i--) { + for (i = intelImage->base.Level; i > firstLevel; i--) { width <<= 1; if (height != 1) height <<= 1; @@ -101,7 +101,7 @@ intel_miptree_create_for_teximage(struct intel_context *intel, */ if ((intelObj->base.Sampler.MinFilter == GL_NEAREST || intelObj->base.Sampler.MinFilter == GL_LINEAR) && - intelImage->level == firstLevel && + intelImage->base.Level == firstLevel && (intel->gen < 4 || firstLevel == 0)) { lastLevel = firstLevel; } else { @@ -186,8 +186,8 @@ try_pbo_upload(struct intel_context *intel, else src_stride = width; - intel_miptree_get_image_offset(intelImage->mt, intelImage->level, - intelImage->face, 0, + intel_miptree_get_image_offset(intelImage->mt, intelImage->base.Level, + intelImage->base.Face, 0, &dst_x, &dst_y); dst_stride = intelImage->mt->region->pitch; @@ -243,8 +243,8 @@ try_pbo_zcopy(struct intel_context *intel, else src_stride = width; - intel_miptree_get_image_offset(intelImage->mt, intelImage->level, - intelImage->face, 0, + intel_miptree_get_image_offset(intelImage->mt, intelImage->base.Level, + intelImage->base.Face, 0, &dst_x, &dst_y); dst_stride = intelImage->mt->region->pitch; @@ -407,9 +407,6 @@ intelTexImage(struct gl_context * ctx, DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__, _mesa_lookup_enum_by_nr(target), level, width, height, depth, border); - intelImage->face = _mesa_tex_target_to_face(target); - intelImage->level = level; - if (_mesa_is_format_compressed(texImage->TexFormat)) { texelBytes = 0; } @@ -514,8 +511,8 @@ intelTexImage(struct gl_context * ctx, } texImage->Data = intel_miptree_image_map(intel, intelImage->mt, - intelImage->face, - intelImage->level, + intelImage->base.Face, + intelImage->base.Level, &dstRowStride, intelImage->base.ImageOffsets); } @@ -684,8 +681,8 @@ intel_get_tex_image(struct gl_context * ctx, GLenum target, GLint level, intelImage->base.Data = intel_miptree_image_map(intel, intelImage->mt, - intelImage->face, - intelImage->level, + intelImage->base.Face, + intelImage->base.Level, &intelImage->base.RowStride, intelImage->base.ImageOffsets); intelImage->base.RowStride /= intelImage->mt->cpp; @@ -816,8 +813,6 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, rb->region->width, rb->region->height, 1, 0, internalFormat, texFormat); - intelImage->face = _mesa_tex_target_to_face(target); - intelImage->level = level; texImage->RowStride = rb->region->pitch; intel_miptree_reference(&intelImage->mt, intelObj->mt); @@ -874,8 +869,6 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target, image->region->width, image->region->height, 1, 0, image->internal_format, image->format); - intelImage->face = _mesa_tex_target_to_face(target); - intelImage->level = 0; texImage->RowStride = image->region->pitch; intel_miptree_reference(&intelImage->mt, intelObj->mt); diff --git a/src/mesa/drivers/dri/intel/intel_tex_obj.h b/src/mesa/drivers/dri/intel/intel_tex_obj.h index a9ae2ec5429..e7a4318b8d8 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_obj.h +++ b/src/mesa/drivers/dri/intel/intel_tex_obj.h @@ -52,11 +52,6 @@ struct intel_texture_image { struct gl_texture_image base; - /* These aren't stored in gl_texture_image - */ - GLuint level; - GLuint face; - /* If intelImage->mt != NULL, image data is stored here. * Else if intelImage->base.Data != NULL, image is stored there. * Else there is no image data. diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c index 8b43c406cf9..5fd2cc36234 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c +++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c @@ -113,7 +113,7 @@ intelTexSubimage(struct gl_context * ctx, dstRowStride = pitch; intel_miptree_get_image_offset(intelImage->mt, level, - intelImage->face, 0, + intelImage->base.Face, 0, &blit_x, &blit_y); blit_x += xoffset; blit_y += yoffset; @@ -122,8 +122,8 @@ intelTexSubimage(struct gl_context * ctx, } else { texImage->Data = intel_miptree_image_map(intel, intelImage->mt, - intelImage->face, - intelImage->level, + intelImage->base.Face, + intelImage->base.Level, &dstRowStride, texImage->ImageOffsets); } diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c index 7135a6276fe..31ac689ad77 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_validate.c +++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c @@ -42,8 +42,8 @@ copy_image_data_to_tree(struct intel_context *intel, */ intel_miptree_image_copy(intel, intelObj->mt, - intelImage->face, - intelImage->level, intelImage->mt); + intelImage->base.Face, + intelImage->base.Level, intelImage->mt); intel_miptree_release(intel, &intelImage->mt); } @@ -54,8 +54,8 @@ copy_image_data_to_tree(struct intel_context *intel, */ intel_miptree_image_data(intel, intelObj->mt, - intelImage->face, - intelImage->level, + intelImage->base.Face, + intelImage->base.Level, intelImage->base.Data, intelImage->base.RowStride, intelImage->base.RowStride * @@ -177,8 +177,8 @@ intel_tex_map_level_images(struct intel_context *intel, intelImage->base.Data = intel_miptree_image_map(intel, intelImage->mt, - intelImage->face, - intelImage->level, + intelImage->base.Face, + intelImage->base.Level, &intelImage->base.RowStride, intelImage->base.ImageOffsets); /* convert stride to texels, not bytes */ diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c index e60b91f64be..433590c4181 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c @@ -107,7 +107,7 @@ nouveau_bufferobj_data(struct gl_context *ctx, GLenum target, GLsizeiptrARB size } static void -nouveau_bufferobj_subdata(struct gl_context *ctx, GLenum target, GLintptrARB offset, +nouveau_bufferobj_subdata(struct gl_context *ctx, GLintptrARB offset, GLsizeiptrARB size, const GLvoid *data, struct gl_buffer_object *obj) { @@ -115,7 +115,7 @@ nouveau_bufferobj_subdata(struct gl_context *ctx, GLenum target, GLintptrARB off } static void -nouveau_bufferobj_get_subdata(struct gl_context *ctx, GLenum target, GLintptrARB offset, +nouveau_bufferobj_get_subdata(struct gl_context *ctx, GLintptrARB offset, GLsizeiptrARB size, GLvoid *data, struct gl_buffer_object *obj) { @@ -123,23 +123,6 @@ nouveau_bufferobj_get_subdata(struct gl_context *ctx, GLenum target, GLintptrARB } static void * -nouveau_bufferobj_map(struct gl_context *ctx, GLenum target, GLenum access, - struct gl_buffer_object *obj) -{ - unsigned flags = 0; - - if (access == GL_READ_ONLY_ARB || - access == GL_READ_WRITE_ARB) - flags |= GL_MAP_READ_BIT; - if (access == GL_WRITE_ONLY_ARB || - access == GL_READ_WRITE_ARB) - flags |= GL_MAP_WRITE_BIT; - - return ctx->Driver.MapBufferRange(ctx, target, 0, obj->Size, flags, - obj); -} - -static void * nouveau_bufferobj_map_range(struct gl_context *ctx, GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access, struct gl_buffer_object *obj) @@ -169,7 +152,7 @@ nouveau_bufferobj_map_range(struct gl_context *ctx, GLenum target, GLintptr offs } static GLboolean -nouveau_bufferobj_unmap(struct gl_context *ctx, GLenum target, struct gl_buffer_object *obj) +nouveau_bufferobj_unmap(struct gl_context *ctx, struct gl_buffer_object *obj) { assert(obj->Pointer); @@ -189,7 +172,6 @@ nouveau_bufferobj_functions_init(struct dd_function_table *functions) functions->BufferData = nouveau_bufferobj_data; functions->BufferSubData = nouveau_bufferobj_subdata; functions->GetBufferSubData = nouveau_bufferobj_get_subdata; - functions->MapBuffer = nouveau_bufferobj_map; functions->MapBufferRange = nouveau_bufferobj_map_range; functions->UnmapBuffer = nouveau_bufferobj_unmap; } diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c index 02201cb53d6..44a794da396 100644 --- a/src/mesa/drivers/dri/r200/r200_ioctl.c +++ b/src/mesa/drivers/dri/r200/r200_ioctl.c @@ -185,7 +185,6 @@ static void r200Clear( struct gl_context *ctx, GLbitfield mask ) r200ContextPtr rmesa = R200_CONTEXT(ctx); __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon); GLuint flags = 0; - GLuint color_mask = 0; GLuint orig_mask = mask; if ( R200_DEBUG & RADEON_IOCTL ) { @@ -206,13 +205,11 @@ static void r200Clear( struct gl_context *ctx, GLbitfield mask ) if ( mask & BUFFER_BIT_FRONT_LEFT ) { flags |= RADEON_FRONT; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; mask &= ~BUFFER_BIT_FRONT_LEFT; } if ( mask & BUFFER_BIT_BACK_LEFT ) { flags |= RADEON_BACK; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; mask &= ~BUFFER_BIT_BACK_LEFT; } diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c index d42e8f12041..91e77f9f7da 100644 --- a/src/mesa/drivers/dri/r200/r200_tex.c +++ b/src/mesa/drivers/dri/r200/r200_tex.c @@ -527,7 +527,6 @@ void r200InitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *fu functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c index 7adf9ad73ed..8c9bd6d00b2 100644 --- a/src/mesa/drivers/dri/r200/r200_texstate.c +++ b/src/mesa/drivers/dri/r200/r200_texstate.c @@ -773,18 +773,12 @@ void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format struct radeon_renderbuffer *rb; radeon_texture_image *rImage; radeonContextPtr radeon; - r200ContextPtr rmesa; struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, format; gl_format texFormat; - format = GL_UNSIGNED_BYTE; - internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); - radeon = pDRICtx->driverPrivate; - rmesa = pDRICtx->driverPrivate; rfb = dPriv->driverPrivate; texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit]; diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c index 63e03b0e0c7..cf44d7f459c 100644 --- a/src/mesa/drivers/dri/r200/r200_vertprog.c +++ b/src/mesa/drivers/dri/r200/r200_vertprog.c @@ -126,10 +126,10 @@ static GLboolean r200VertexProgUpdateParams(struct gl_context *ctx, struct r200_ case PROGRAM_NAMED_PARAM: //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name); case PROGRAM_CONSTANT: - *fcmd++ = paramList->ParameterValues[pi][0]; - *fcmd++ = paramList->ParameterValues[pi][1]; - *fcmd++ = paramList->ParameterValues[pi][2]; - *fcmd++ = paramList->ParameterValues[pi][3]; + *fcmd++ = paramList->ParameterValues[pi][0].f; + *fcmd++ = paramList->ParameterValues[pi][1].f; + *fcmd++ = paramList->ParameterValues[pi][2].f; + *fcmd++ = paramList->ParameterValues[pi][3].f; break; default: _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index b24274259f4..39dcb21d4f4 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -561,28 +561,29 @@ static int peephole_add_presub_add( struct rc_instruction * inst_add) { unsigned dstmask = inst_add->U.I.DstReg.WriteMask; - struct rc_src_register * src1 = NULL; - unsigned int i; - - if (!is_presub_candidate(c, inst_add)) - return 0; + unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask; + unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask; if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) return 0; - /* XXX This isn't fully implemented, is it? */ - /* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */ - for (i = 0; i < 2; i++) { - if (inst_add->U.I.SrcReg[i].Abs) - return 0; + /* src0 and src1 can't have absolute values */ + if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs) + return 0; - /* XXX This looks weird, but it's basically what was here before this commit (see git blame): */ - if ((inst_add->U.I.SrcReg[i].Negate & dstmask) != dstmask && !src1) { - src1 = &inst_add->U.I.SrcReg[i]; - } - } + /* presub_replace_add() assumes only one is negative */ + if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate) + return 0; + + /* if src0 is negative, at least all bits of dstmask have to be set */ + if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask) + return 0; - if (!src1) + /* if src1 is negative, at least all bits of dstmask have to be set */ + if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask) + return 0; + + if (!is_presub_candidate(c, inst_add)) return 0; if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) { @@ -615,7 +616,7 @@ static void presub_replace_inv( * of the add instruction must have the constatnt 1 swizzle. This function * does not check const registers to see if their value is 1.0, so it should * be called after the constant_folding optimization. - * @return + * @return * 0 if the ADD instruction is still part of the program. * 1 if the ADD instruction is no longer part of the program. */ diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index 0c4d8537c61..5587c16dd44 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -84,7 +84,8 @@ static void r300FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde GLboolean mapped_named_bo = GL_FALSE; if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size, + GL_MAP_READ_BIT, mesa_ind_buf->obj); mapped_named_bo = GL_TRUE; assert(mesa_ind_buf->obj->Pointer != NULL); } @@ -138,7 +139,7 @@ static void r300FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde r300->ind_buf.count = mesa_ind_buf->count; if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj); } } @@ -163,7 +164,10 @@ static void r300SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde GLboolean mapped_named_bo = GL_FALSE; if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + ctx->Driver.MapBufferRange(ctx, 0, + mesa_ind_buf->obj->Size, + GL_MAP_READ_BIT, + mesa_ind_buf->obj); assert(mesa_ind_buf->obj->Pointer != NULL); mapped_named_bo = GL_TRUE; } @@ -184,7 +188,7 @@ static void r300SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde r300->ind_buf.count = mesa_ind_buf->count; if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj); } } else { r300FixupIndexBuffer(ctx, mesa_ind_buf); @@ -235,7 +239,8 @@ static void r300ConvertAttrib(struct gl_context *ctx, int count, const struct gl if (input->BufferObj->Name) { if (!input->BufferObj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size, + GL_MAP_READ_BIT, input->BufferObj); mapped_named_bo = GL_TRUE; } @@ -286,7 +291,7 @@ static void r300ConvertAttrib(struct gl_context *ctx, int count, const struct gl radeon_bo_unmap(attr->bo); if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + ctx->Driver.UnmapBuffer(ctx, input->BufferObj); } } @@ -302,7 +307,8 @@ static void r300AlignDataToDword(struct gl_context *ctx, const struct gl_client_ radeon_bo_map(attr->bo, 1); if (!input->BufferObj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size, + GL_MAP_READ_BIT, input->BufferObj); mapped_named_bo = GL_TRUE; } @@ -321,7 +327,7 @@ static void r300AlignDataToDword(struct gl_context *ctx, const struct gl_client_ } if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + ctx->Driver.UnmapBuffer(ctx, input->BufferObj); } radeon_bo_unmap(attr->bo); diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index 590d9afe14a..93d8fe185ef 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -379,7 +379,6 @@ void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *fun functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index e24ad6f088d..e4388a021ed 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -427,13 +427,8 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, type, format; gl_format texFormat; - type = GL_BGRA; - format = GL_UNSIGNED_BYTE; - internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); - radeon = pDRICtx->driverPrivate; rmesa = pDRICtx->driverPrivate; diff --git a/src/mesa/drivers/dri/r600/evergreen_fragprog.c b/src/mesa/drivers/dri/r600/evergreen_fragprog.c index e527c379b62..cc584ca2b35 100644 --- a/src/mesa/drivers/dri/r600/evergreen_fragprog.c +++ b/src/mesa/drivers/dri/r600/evergreen_fragprog.c @@ -752,10 +752,10 @@ GLboolean evergreenSetupFPconstants(struct gl_context * ctx) unNumParamData = paramList->NumParameters; for(ui=0; ui<unNumParamData; ui++) { - evergreen->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - evergreen->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - evergreen->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - evergreen->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + evergreen->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f; + evergreen->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f; + evergreen->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f; + evergreen->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f; } /* alloc multiple of 16 constants */ diff --git a/src/mesa/drivers/dri/r600/evergreen_render.c b/src/mesa/drivers/dri/r600/evergreen_render.c index 4507be29d86..74563caf47c 100644 --- a/src/mesa/drivers/dri/r600/evergreen_render.c +++ b/src/mesa/drivers/dri/r600/evergreen_render.c @@ -403,7 +403,8 @@ static void evergreenConvertAttrib(struct gl_context *ctx, int count, { if (!input->BufferObj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size, + GL_MAP_READ_BIT, input->BufferObj); mapped_named_bo = GL_TRUE; } @@ -456,7 +457,7 @@ static void evergreenConvertAttrib(struct gl_context *ctx, int count, if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + ctx->Driver.UnmapBuffer(ctx, input->BufferObj); } } @@ -470,7 +471,8 @@ static void evergreenFixupIndexBuffer(struct gl_context *ctx, const struct _mesa if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size, + GL_MAP_READ_BIT, mesa_ind_buf->obj); mapped_named_bo = GL_TRUE; assert(mesa_ind_buf->obj->Pointer != NULL); } @@ -531,7 +533,7 @@ static void evergreenFixupIndexBuffer(struct gl_context *ctx, const struct _mesa if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj); } } @@ -606,7 +608,8 @@ static void evergreenSetupIndexBuffer(struct gl_context *ctx, const struct _mesa if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size, + GL_MAP_READ_BIT, mesa_ind_buf->obj); assert(mesa_ind_buf->obj->Pointer != NULL); mapped_named_bo = GL_TRUE; } @@ -629,7 +632,7 @@ static void evergreenSetupIndexBuffer(struct gl_context *ctx, const struct _mesa if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj); } } else @@ -655,7 +658,8 @@ static void evergreenAlignDataToDword(struct gl_context *ctx, if (!input->BufferObj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size, + GL_MAP_READ_BIT, input->BufferObj->obj); mapped_named_bo = GL_TRUE; } @@ -675,7 +679,7 @@ static void evergreenAlignDataToDword(struct gl_context *ctx, radeon_bo_unmap(attr->bo); if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + ctx->Driver.UnmapBuffer(ctx, input->BufferObj); } attr->stride = dst_stride; diff --git a/src/mesa/drivers/dri/r600/evergreen_tex.c b/src/mesa/drivers/dri/r600/evergreen_tex.c index 33a5f277683..d240a216817 100644 --- a/src/mesa/drivers/dri/r600/evergreen_tex.c +++ b/src/mesa/drivers/dri/r600/evergreen_tex.c @@ -1288,19 +1288,12 @@ void evergreenSetTexBuffer(__DRIcontext *pDRICtx, GLint target, GLint glx_textur struct radeon_renderbuffer *rb; radeon_texture_image *rImage; radeonContextPtr radeon; - context_t *rmesa; struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, type, format; gl_format texFormat; - type = GL_BGRA; - format = GL_UNSIGNED_BYTE; - internalFormat = (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); - radeon = pDRICtx->driverPrivate; - rmesa = pDRICtx->driverPrivate; rfb = dPriv->driverPrivate; texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit]; @@ -1688,7 +1681,6 @@ void evergreenInitTextureFuncs(radeonContextPtr radeon, struct dd_function_table functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/r600/evergreen_vertprog.c b/src/mesa/drivers/dri/r600/evergreen_vertprog.c index 018869b9996..117916ac78f 100644 --- a/src/mesa/drivers/dri/r600/evergreen_vertprog.c +++ b/src/mesa/drivers/dri/r600/evergreen_vertprog.c @@ -684,17 +684,17 @@ GLboolean evergreenSetupVPconstants(struct gl_context * ctx) for(ui=0; ui<unNumParamData; ui++) { if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM) { - evergreen->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0]; - evergreen->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1]; - evergreen->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2]; - evergreen->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3]; + evergreen->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0].f; + evergreen->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1].f; + evergreen->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2].f; + evergreen->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3].f; } else { - evergreen->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - evergreen->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - evergreen->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - evergreen->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + evergreen->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f; + evergreen->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f; + evergreen->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f; + evergreen->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f; } } diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c index ce2f7779563..74f048b1062 100644 --- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c @@ -259,13 +259,11 @@ static int r600_cs_process_relocs(struct radeon_cs_int *csi, uint32_t * reloc_chunk, uint32_t * length_dw_reloc_chunk) { - struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm; struct r600_cs_reloc_legacy *relocs; int i, j, r; uint32_t offset_dw = 0; - csm = (struct r600_cs_manager_legacy*)csi->csm; relocs = (struct r600_cs_reloc_legacy *)csi->relocs; restart: for (i = 0; i < csi->crelocs; i++) { diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c index eb7ed30c7a3..3efa1d197fa 100644 --- a/src/mesa/drivers/dri/r600/r600_tex.c +++ b/src/mesa/drivers/dri/r600/r600_tex.c @@ -470,7 +470,6 @@ void r600InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *fun functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c index 949db29c189..65fae7195fd 100644 --- a/src/mesa/drivers/dri/r600/r600_texstate.c +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -1141,13 +1141,8 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, type, format; gl_format texFormat; - type = GL_BGRA; - format = GL_UNSIGNED_BYTE; - internalFormat = (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); - radeon = pDRICtx->driverPrivate; rmesa = pDRICtx->driverPrivate; diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index 40494cd6af0..6f9834e68fe 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -778,10 +778,10 @@ GLboolean r700SetupFragmentProgram(struct gl_context * ctx) unNumParamData = paramList->NumParameters; for(ui=0; ui<unNumParamData; ui++) { - r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f; + r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f; + r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f; + r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f; } /* Load fp constants to gpu */ diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 0f7a7a46b71..a565c9f2087 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -490,7 +490,8 @@ static void r700ConvertAttrib(struct gl_context *ctx, int count, { if (!input->BufferObj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size, + GL_MAP_READ_BIT, input->BufferObj); mapped_named_bo = GL_TRUE; } @@ -543,7 +544,7 @@ static void r700ConvertAttrib(struct gl_context *ctx, int count, if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + ctx->Driver.UnmapBuffer(ctx, input->BufferObj); } } @@ -564,7 +565,8 @@ static void r700AlignDataToDword(struct gl_context *ctx, if (!input->BufferObj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size, + GL_MAP_READ_BIT, input->BufferObj); mapped_named_bo = GL_TRUE; } @@ -584,7 +586,7 @@ static void r700AlignDataToDword(struct gl_context *ctx, radeon_bo_unmap(attr->bo); if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + ctx->Driver.UnmapBuffer(ctx, input->BufferObj); } attr->stride = dst_stride; @@ -727,7 +729,8 @@ static void r700FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size, + GL_MAP_READ_BIT, mesa_ind_buf->obj); mapped_named_bo = GL_TRUE; assert(mesa_ind_buf->obj->Pointer != NULL); } @@ -788,7 +791,7 @@ static void r700FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj); } } @@ -813,7 +816,8 @@ static void r700SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size, + GL_MAP_READ_BIT, mesa_ind_buf->obj); assert(mesa_ind_buf->obj->Pointer != NULL); mapped_named_bo = GL_TRUE; } @@ -836,7 +840,7 @@ static void r700SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj); } } else diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index 7d4be9180a0..b1e2742b27d 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -720,17 +720,17 @@ GLboolean r700SetupVertexProgram(struct gl_context * ctx) for(ui=0; ui<unNumParamData; ui++) { if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM) { - r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0]; - r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1]; - r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2]; - r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3]; + r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0].f; + r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1].f; + r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2].f; + r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3].f; } else { - r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f; + r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f; + r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f; + r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f; } } diff --git a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h index 607b7470d4b..a74c6c7a575 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h +++ b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h @@ -78,6 +78,9 @@ static inline uint32_t radeon_gem_name_bo(struct radeon_bo *dummy) static inline void *radeon_bo_manager_gem_ctor(int fd) { + fprintf(stderr, "[%s:%u] Mesa built without Radeon libdrm support.\n", + __func__, __LINE__); + return NULL; } @@ -87,6 +90,9 @@ static inline void radeon_bo_manager_gem_dtor(void *dummy) static inline void *radeon_cs_manager_gem_ctor(int fd) { + fprintf(stderr, "[%s:%u] Mesa built without Radeon libdrm support.\n", + __func__, __LINE__); + return NULL; } diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c index 0d1af726c07..7b59c0377f8 100644 --- a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c +++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c @@ -130,7 +130,6 @@ radeonBufferData(struct gl_context * ctx, */ static void radeonBufferSubData(struct gl_context * ctx, - GLenum target, GLintptrARB offset, GLsizeiptrARB size, const GLvoid * data, @@ -155,7 +154,6 @@ radeonBufferSubData(struct gl_context * ctx, */ static void radeonGetBufferSubData(struct gl_context * ctx, - GLenum target, GLintptrARB offset, GLsizeiptrARB size, GLvoid * data, @@ -171,17 +169,18 @@ radeonGetBufferSubData(struct gl_context * ctx, } /** - * Called via glMapBufferARB() + * Called via glMapBuffer() and glMapBufferRange() */ static void * -radeonMapBuffer(struct gl_context * ctx, - GLenum target, - GLenum access, - struct gl_buffer_object *obj) +radeonMapBufferRange(struct gl_context * ctx, + GLintptr offset, GLsizeiptr length, + GLbitfield access, struct gl_buffer_object *obj) { struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj); + const GLboolean write_only = + (access & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)) == GL_MAP_WRITE_BIT; - if (access == GL_WRITE_ONLY_ARB) { + if (write_only) { ctx->Driver.Flush(ctx); } @@ -190,12 +189,13 @@ radeonMapBuffer(struct gl_context * ctx, return NULL; } - radeon_bo_map(radeon_obj->bo, access == GL_WRITE_ONLY_ARB); + obj->Offset = offset; + obj->Length = length; + obj->AccessFlags = access; - obj->Pointer = radeon_obj->bo->ptr; - obj->Length = obj->Size; - obj->Offset = 0; + radeon_bo_map(radeon_obj->bo, write_only); + obj->Pointer = radeon_obj->bo->ptr + offset; return obj->Pointer; } @@ -205,7 +205,6 @@ radeonMapBuffer(struct gl_context * ctx, */ static GLboolean radeonUnmapBuffer(struct gl_context * ctx, - GLenum target, struct gl_buffer_object *obj) { struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj); @@ -229,6 +228,6 @@ radeonInitBufferObjectFuncs(struct dd_function_table *functions) functions->BufferData = radeonBufferData; functions->BufferSubData = radeonBufferSubData; functions->GetBufferSubData = radeonGetBufferSubData; - functions->MapBuffer = radeonMapBuffer; + functions->MapBufferRange = radeonMapBufferRange; functions->UnmapBuffer = radeonUnmapBuffer; } diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index bfc307ca987..e7a6623cf84 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -436,7 +436,6 @@ void radeonCopyBuffer( __DRIdrawable *dPriv, const drm_clip_rect_t *rect) { radeonContextPtr rmesa; - struct radeon_framebuffer *rfb; GLint nbox, i, ret; assert(dPriv); @@ -447,8 +446,6 @@ void radeonCopyBuffer( __DRIdrawable *dPriv, LOCK_HARDWARE(rmesa); - rfb = dPriv->driverPrivate; - if ( RADEON_DEBUG & RADEON_IOCTL ) { fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx ); } @@ -527,8 +524,6 @@ static GLboolean radeonPageFlip( __DRIdrawable *dPriv ) { radeonContextPtr radeon; GLint ret; - __DRIscreen *psp; - struct radeon_renderbuffer *rrb; struct radeon_framebuffer *rfb; assert(dPriv); @@ -537,9 +532,6 @@ static GLboolean radeonPageFlip( __DRIdrawable *dPriv ) radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; rfb = dPriv->driverPrivate; - rrb = (void *)rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer; - - psp = dPriv->driScreenPriv; LOCK_HARDWARE(radeon); diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index bf8925f61d0..c08b79484af 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -515,7 +515,6 @@ void radeon_prepare_render(radeonContextPtr radeon) __DRIcontext *driContext = radeon->dri.context; __DRIdrawable *drawable; __DRIscreen *screen; - struct radeon_framebuffer *draw; screen = driContext->driScreenPriv; if (!screen->dri2.loader) @@ -527,7 +526,6 @@ void radeon_prepare_render(radeonContextPtr radeon) radeon_update_renderbuffers(driContext, drawable, GL_FALSE); /* Intel driver does the equivalent of this, no clue if it is needed:*/ - draw = drawable->driverPrivate; radeon_draw_buffer(radeon->glCtx, radeon->glCtx->DrawBuffer); driContext->dri2.draw_stamp = drawable->dri2.stamp; diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c index c2722a4e195..5595b705b15 100644 --- a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c +++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c @@ -218,11 +218,9 @@ static int cs_end(struct radeon_cs_int *cs, static int cs_process_relocs(struct radeon_cs_int *cs) { - struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm; struct cs_reloc_legacy *relocs; int i, j, r; - csm = (struct cs_manager_legacy*)cs->csm; relocs = (struct cs_reloc_legacy *)cs->relocs; restart: for (i = 0; i < cs->crelocs; i++) diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c index a91d8727792..c23e9c2d2a2 100644 --- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c +++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c @@ -560,7 +560,6 @@ static void radeonClear( struct gl_context *ctx, GLbitfield mask ) r100ContextPtr rmesa = R100_CONTEXT(ctx); __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon); GLuint flags = 0; - GLuint color_mask = 0; GLuint orig_mask = mask; if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) { @@ -582,13 +581,11 @@ static void radeonClear( struct gl_context *ctx, GLbitfield mask ) if ( mask & BUFFER_BIT_FRONT_LEFT ) { flags |= RADEON_FRONT; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; mask &= ~BUFFER_BIT_FRONT_LEFT; } if ( mask & BUFFER_BIT_BACK_LEFT ) { flags |= RADEON_BACK; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; mask &= ~BUFFER_BIT_BACK_LEFT; } diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c index 7b6bd36dcf7..ae8a212f806 100644 --- a/src/mesa/drivers/dri/radeon/radeon_lock.c +++ b/src/mesa/drivers/dri/radeon/radeon_lock.c @@ -114,16 +114,6 @@ void radeon_lock_hardware(radeonContextPtr radeon ) { char ret = 0; - struct radeon_framebuffer *rfb = NULL; - struct radeon_renderbuffer *rrb = NULL; - - if (radeon_get_drawable(radeon)) { - rfb = radeon_get_drawable(radeon)->driverPrivate; - - if (rfb) - rrb = radeon_get_renderbuffer(&rfb->base, - rfb->base._ColorDrawBufferIndexes[0]); - } if (!radeon->radeonScreen->driScreen->dri2.enabled) { if (ATOMIC_INC_AND_FETCH(radeon->dri.hwLockCount) > 1) diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c index 25a8ddf7b6a..a0b5506ae76 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex.c @@ -455,7 +455,6 @@ void radeonInitTextureFuncs( radeonContextPtr radeon, struct dd_function_table * functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c index f14dfa25d40..94ff3c4a727 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c @@ -141,61 +141,6 @@ do_copy_texsubimage(struct gl_context *ctx, } void -radeonCopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border) -{ - struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - int srcx, srcy, dstx, dsty; - - radeonContextPtr radeon = RADEON_CONTEXT(ctx); - radeon_prepare_render(radeon); - - if (border) - goto fail; - - /* Setup or redefine the texture object, mipmap tree and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage2D(ctx, target, level, internalFormat, - width, height, border, - GL_RGBA, GL_UNSIGNED_BYTE, NULL, - &ctx->DefaultPacking, texObj, texImage); - - srcx = x; - srcy = y; - dstx = 0; - dsty = 0; - if (!_mesa_clip_copytexsubimage(ctx, - &dstx, &dsty, - &srcx, &srcy, - &width, &height)) { - return; - } - - if (!do_copy_texsubimage(ctx, target, level, - radeon_tex_obj(texObj), (radeon_texture_image *)texImage, - 0, 0, x, y, width, height)) { - goto fail; - } - - return; - -fail: - radeon_print(RADEON_FALLBACKS, RADEON_NORMAL, - "Falling back to sw for glCopyTexImage2D (internalFormat %s, border %d)\n", - _mesa_lookup_enum_by_nr(internalFormat), border); - - _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y, - width, height, border); -} - -void radeonCopyTexSubImage2D(struct gl_context *ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c index 9ba98e303a7..430309392a0 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texstate.c +++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c @@ -648,18 +648,12 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_form struct radeon_renderbuffer *rb; radeon_texture_image *rImage; radeonContextPtr radeon; - r100ContextPtr rmesa; struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, format; gl_format texFormat; - format = GL_UNSIGNED_BYTE; - internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? GL_RGB : GL_RGBA); - radeon = pDRICtx->driverPrivate; - rmesa = pDRICtx->driverPrivate; rfb = dPriv->driverPrivate; texUnit = _mesa_get_current_tex_unit(radeon->glCtx); @@ -1018,7 +1012,7 @@ static GLboolean radeon_validate_texgen( struct gl_context *ctx, GLuint unit ) static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int unit) { const struct gl_texture_image *firstImage; - GLint log2Width, log2Height, log2Depth, texelBytes; + GLint log2Width, log2Height, texelBytes; if ( t->bo ) { return GL_TRUE; @@ -1033,7 +1027,6 @@ static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int log2Width = firstImage->WidthLog2; log2Height = firstImage->HeightLog2; - log2Depth = firstImage->DepthLog2; texelBytes = _mesa_get_format_bytes(firstImage->TexFormat); if (!t->image_override) { diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c index ce0df32bfe4..ad7e4c146a4 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.c +++ b/src/mesa/drivers/dri/radeon/radeon_texture.c @@ -787,18 +787,6 @@ static void radeon_teximage( radeon_print(RADEON_TEXTURE, RADEON_NORMAL, "%s %dd: texObj %p, texImage %p, face %d, level %d\n", __func__, dims, texObj, texImage, face, level); - { - struct radeon_bo *bo; - bo = !image->mt ? image->bo : image->mt->bo; - if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) { - radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, - "%s Calling teximage for texture that is " - "queued for GPU processing.\n", - __func__); - radeon_firevertices(rmesa); - } - } - t->validated = GL_FALSE; @@ -820,6 +808,18 @@ static void radeon_teximage( } } + { + struct radeon_bo *bo; + bo = !image->mt ? image->bo : image->mt->bo; + if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) { + radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, + "%s Calling teximage for texture that is " + "queued for GPU processing.\n", + __func__); + radeon_firevertices(rmesa); + } + } + /* Upload texture image; note that the spec allows pixels to be NULL */ if (compressed) { pixels = _mesa_validate_pbo_compressed_teximage( diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.h b/src/mesa/drivers/dri/radeon/radeon_texture.h index 538a07fbba8..6fc06d967dd 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.h +++ b/src/mesa/drivers/dri/radeon/radeon_texture.h @@ -126,11 +126,6 @@ void radeonGetCompressedTexImage(struct gl_context *ctx, GLenum target, GLint le struct gl_texture_object *texObj, struct gl_texture_image *texImage); -void radeonCopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border); - void radeonCopyTexSubImage2D(struct gl_context *ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c index 81f48f9d95a..81d000b3952 100644 --- a/src/mesa/drivers/x11/xm_dd.c +++ b/src/mesa/drivers/x11/xm_dd.c @@ -454,10 +454,10 @@ xmesa_DrawPixels_8R8G8B( struct gl_context *ctx, "glDrawPixels(invalid PBO access)"); return; } - buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, - GL_PIXEL_UNPACK_BUFFER_EXT, - GL_READ_ONLY_ARB, - unpack->BufferObj); + buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, + unpack->BufferObj->Size, + GL_MAP_READ_BIT, + unpack->BufferObj); if (!buf) { /* buffer is already mapped - that's an error */ _mesa_error(ctx, GL_INVALID_OPERATION, @@ -508,8 +508,7 @@ xmesa_DrawPixels_8R8G8B( struct gl_context *ctx, } if (_mesa_is_bufferobj(unpack->BufferObj)) { - ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, - unpack->BufferObj); + ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj); } } else { @@ -589,10 +588,10 @@ xmesa_DrawPixels_5R6G5B( struct gl_context *ctx, "glDrawPixels(invalid PBO access)"); return; } - buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, - GL_PIXEL_UNPACK_BUFFER_EXT, - GL_READ_ONLY_ARB, - unpack->BufferObj); + buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, + unpack->BufferObj->Size, + GL_MAP_READ_BIT, + unpack->BufferObj); if (!buf) { /* buffer is already mapped - that's an error */ _mesa_error(ctx, GL_INVALID_OPERATION, @@ -642,8 +641,7 @@ xmesa_DrawPixels_5R6G5B( struct gl_context *ctx, } if (unpack->BufferObj->Name) { - ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, - unpack->BufferObj); + ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj); } } else { diff --git a/src/mesa/main/.gitignore b/src/mesa/main/.gitignore index 2575f44df4a..d0744e3f0d7 100644 --- a/src/mesa/main/.gitignore +++ b/src/mesa/main/.gitignore @@ -4,3 +4,7 @@ get_es1.c get_es2.c git_sha1.h git_sha1.h.tmp +api_exec_es1_dispatch.h +api_exec_es1_remap_helper.h +api_exec_es2_dispatch.h +api_exec_es2_remap_helper.h diff --git a/src/mesa/main/api_arrayelt.c b/src/mesa/main/api_arrayelt.c index f88da845853..b93a057e68b 100644 --- a/src/mesa/main/api_arrayelt.c +++ b/src/mesa/main/api_arrayelt.c @@ -1602,10 +1602,10 @@ void _ae_map_vbos( struct gl_context *ctx ) _ae_update_state(ctx); for (i = 0; i < actx->nr_vbos; i++) - ctx->Driver.MapBuffer(ctx, - GL_ARRAY_BUFFER_ARB, - GL_DYNAMIC_DRAW_ARB, - actx->vbo[i]); + ctx->Driver.MapBufferRange(ctx, 0, + actx->vbo[i]->Size, + GL_MAP_READ_BIT, + actx->vbo[i]); if (actx->nr_vbos) actx->mapped_vbos = GL_TRUE; @@ -1622,9 +1622,7 @@ void _ae_unmap_vbos( struct gl_context *ctx ) assert (!actx->NewState); for (i = 0; i < actx->nr_vbos; i++) - ctx->Driver.UnmapBuffer(ctx, - GL_ARRAY_BUFFER_ARB, - actx->vbo[i]); + ctx->Driver.UnmapBuffer(ctx, actx->vbo[i]); actx->mapped_vbos = GL_FALSE; } diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c index 2981d42297a..699b414f502 100644 --- a/src/mesa/main/api_validate.c +++ b/src/mesa/main/api_validate.c @@ -65,8 +65,8 @@ _mesa_max_buffer_index(struct gl_context *ctx, GLuint count, GLenum type, if (_mesa_is_bufferobj(elementBuf)) { /* elements are in a user-defined buffer object. need to map it */ - map = ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, - GL_READ_ONLY, elementBuf); + map = ctx->Driver.MapBufferRange(ctx, 0, elementBuf->Size, + GL_MAP_READ_BIT, elementBuf); /* Actual address is the sum of pointers */ indices = (const GLvoid *) ADD_POINTERS(map, (const GLubyte *) indices); } @@ -89,7 +89,7 @@ _mesa_max_buffer_index(struct gl_context *ctx, GLuint count, GLenum type, } if (map) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, elementBuf); + ctx->Driver.UnmapBuffer(ctx, elementBuf); } return max; diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c index c52358ecb04..c453f9c8554 100644 --- a/src/mesa/main/bufferobj.c +++ b/src/mesa/main/bufferobj.c @@ -386,11 +386,11 @@ _mesa_buffer_data( struct gl_context *ctx, GLenum target, GLsizeiptrARB size, * \sa glBufferSubDataARB, dd_function_table::BufferSubData. */ static void -_mesa_buffer_subdata( struct gl_context *ctx, GLenum target, GLintptrARB offset, +_mesa_buffer_subdata( struct gl_context *ctx, GLintptrARB offset, GLsizeiptrARB size, const GLvoid * data, struct gl_buffer_object * bufObj ) { - (void) ctx; (void) target; + (void) ctx; /* this should have been caught in _mesa_BufferSubData() */ ASSERT(size + offset <= bufObj->Size); @@ -419,12 +419,11 @@ _mesa_buffer_subdata( struct gl_context *ctx, GLenum target, GLintptrARB offset, * \sa glBufferGetSubDataARB, dd_function_table::GetBufferSubData. */ static void -_mesa_buffer_get_subdata( struct gl_context *ctx, - GLenum target, GLintptrARB offset, +_mesa_buffer_get_subdata( struct gl_context *ctx, GLintptrARB offset, GLsizeiptrARB size, GLvoid * data, struct gl_buffer_object * bufObj ) { - (void) ctx; (void) target; + (void) ctx; if (bufObj->Data && ((GLsizeiptrARB) (size + offset) <= bufObj->Size)) { memcpy( data, (GLubyte *) bufObj->Data + offset, size ); @@ -433,49 +432,15 @@ _mesa_buffer_get_subdata( struct gl_context *ctx, /** - * Default callback for \c dd_function_tabel::MapBuffer(). - * - * The function parameters will have been already tested for errors. - * - * \param ctx GL context. - * \param target Buffer object target on which to operate. - * \param access Information about how the buffer will be accessed. - * \param bufObj Object to be mapped. - * \return A pointer to the object's internal data store that can be accessed - * by the processor - * - * \sa glMapBufferARB, dd_function_table::MapBuffer - */ -static void * -_mesa_buffer_map( struct gl_context *ctx, GLenum target, GLenum access, - struct gl_buffer_object *bufObj ) -{ - (void) ctx; - (void) target; - (void) access; - /* Just return a direct pointer to the data */ - if (_mesa_bufferobj_mapped(bufObj)) { - /* already mapped! */ - return NULL; - } - bufObj->Pointer = bufObj->Data; - bufObj->Length = bufObj->Size; - bufObj->Offset = 0; - return bufObj->Pointer; -} - - -/** * Default fallback for \c dd_function_table::MapBufferRange(). * Called via glMapBufferRange(). */ static void * -_mesa_buffer_map_range( struct gl_context *ctx, GLenum target, GLintptr offset, +_mesa_buffer_map_range( struct gl_context *ctx, GLintptr offset, GLsizeiptr length, GLbitfield access, struct gl_buffer_object *bufObj ) { (void) ctx; - (void) target; assert(!_mesa_bufferobj_mapped(bufObj)); /* Just return a direct pointer to the data */ bufObj->Pointer = bufObj->Data + offset; @@ -491,12 +456,11 @@ _mesa_buffer_map_range( struct gl_context *ctx, GLenum target, GLintptr offset, * Called via glFlushMappedBufferRange(). */ static void -_mesa_buffer_flush_mapped_range( struct gl_context *ctx, GLenum target, +_mesa_buffer_flush_mapped_range( struct gl_context *ctx, GLintptr offset, GLsizeiptr length, struct gl_buffer_object *obj ) { (void) ctx; - (void) target; (void) offset; (void) length; (void) obj; @@ -512,11 +476,9 @@ _mesa_buffer_flush_mapped_range( struct gl_context *ctx, GLenum target, * \sa glUnmapBufferARB, dd_function_table::UnmapBuffer */ static GLboolean -_mesa_buffer_unmap( struct gl_context *ctx, GLenum target, - struct gl_buffer_object *bufObj ) +_mesa_buffer_unmap( struct gl_context *ctx, struct gl_buffer_object *bufObj ) { (void) ctx; - (void) target; /* XXX we might assert here that bufObj->Pointer is non-null */ bufObj->Pointer = NULL; bufObj->Length = 0; @@ -543,16 +505,16 @@ _mesa_copy_buffer_subdata(struct gl_context *ctx, assert(!_mesa_bufferobj_mapped(src)); assert(!_mesa_bufferobj_mapped(dst)); - srcPtr = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_COPY_READ_BUFFER, - GL_READ_ONLY, src); - dstPtr = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_COPY_WRITE_BUFFER, - GL_WRITE_ONLY, dst); + srcPtr = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, src->Size, + GL_MAP_READ_BIT, src); + dstPtr = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, dst->Size, + GL_MAP_WRITE_BIT, dst); if (srcPtr && dstPtr) memcpy(dstPtr + writeOffset, srcPtr + readOffset, size); - ctx->Driver.UnmapBuffer(ctx, GL_COPY_READ_BUFFER, src); - ctx->Driver.UnmapBuffer(ctx, GL_COPY_WRITE_BUFFER, dst); + ctx->Driver.UnmapBuffer(ctx, src); + ctx->Driver.UnmapBuffer(ctx, dst); } @@ -712,7 +674,6 @@ _mesa_init_buffer_object_functions(struct dd_function_table *driver) driver->BufferData = _mesa_buffer_data; driver->BufferSubData = _mesa_buffer_subdata; driver->GetBufferSubData = _mesa_buffer_get_subdata; - driver->MapBuffer = _mesa_buffer_map; driver->UnmapBuffer = _mesa_buffer_unmap; /* GL_ARB_map_buffer_range */ @@ -774,7 +735,7 @@ _mesa_DeleteBuffersARB(GLsizei n, const GLuint *ids) if (_mesa_bufferobj_mapped(bufObj)) { /* if mapped, unmap it now */ - ctx->Driver.UnmapBuffer(ctx, 0, bufObj); + ctx->Driver.UnmapBuffer(ctx, bufObj); bufObj->AccessFlags = DEFAULT_ACCESS; bufObj->Pointer = NULL; } @@ -934,7 +895,7 @@ _mesa_BufferDataARB(GLenum target, GLsizeiptrARB size, if (_mesa_bufferobj_mapped(bufObj)) { /* Unmap the existing buffer. We'll replace it now. Not an error. */ - ctx->Driver.UnmapBuffer(ctx, target, bufObj); + ctx->Driver.UnmapBuffer(ctx, bufObj); bufObj->AccessFlags = DEFAULT_ACCESS; ASSERT(bufObj->Pointer == NULL); } @@ -980,7 +941,7 @@ _mesa_BufferSubDataARB(GLenum target, GLintptrARB offset, bufObj->Written = GL_TRUE; ASSERT(ctx->Driver.BufferSubData); - ctx->Driver.BufferSubData( ctx, target, offset, size, data, bufObj ); + ctx->Driver.BufferSubData( ctx, offset, size, data, bufObj ); } @@ -1000,7 +961,7 @@ _mesa_GetBufferSubDataARB(GLenum target, GLintptrARB offset, } ASSERT(ctx->Driver.GetBufferSubData); - ctx->Driver.GetBufferSubData( ctx, target, offset, size, data, bufObj ); + ctx->Driver.GetBufferSubData( ctx, offset, size, data, bufObj ); } @@ -1043,8 +1004,8 @@ _mesa_MapBufferARB(GLenum target, GLenum access) return NULL; } - ASSERT(ctx->Driver.MapBuffer); - map = ctx->Driver.MapBuffer( ctx, target, access, bufObj ); + ASSERT(ctx->Driver.MapBufferRange); + map = ctx->Driver.MapBufferRange(ctx, 0, bufObj->Size, accessFlags, bufObj); if (!map) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glMapBufferARB(map failed)"); return NULL; @@ -1147,7 +1108,7 @@ _mesa_UnmapBufferARB(GLenum target) } #endif - status = ctx->Driver.UnmapBuffer( ctx, target, bufObj ); + status = ctx->Driver.UnmapBuffer( ctx, bufObj ); bufObj->AccessFlags = DEFAULT_ACCESS; ASSERT(bufObj->Pointer == NULL); ASSERT(bufObj->Offset == 0); @@ -1451,8 +1412,7 @@ _mesa_MapBufferRange(GLenum target, GLintptr offset, GLsizeiptr length, } ASSERT(ctx->Driver.MapBufferRange); - map = ctx->Driver.MapBufferRange(ctx, target, offset, length, - access, bufObj); + map = ctx->Driver.MapBufferRange(ctx, offset, length, access, bufObj); if (!map) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glMapBufferARB(map failed)"); } @@ -1535,7 +1495,7 @@ _mesa_FlushMappedBufferRange(GLenum target, GLintptr offset, GLsizeiptr length) ASSERT(bufObj->AccessFlags & GL_MAP_WRITE_BIT); if (ctx->Driver.FlushMappedBufferRange) - ctx->Driver.FlushMappedBufferRange(ctx, target, offset, length, bufObj); + ctx->Driver.FlushMappedBufferRange(ctx, offset, length, bufObj); } diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h index 743841be4ef..8ed1c6fa61f 100644 --- a/src/mesa/main/compiler.h +++ b/src/mesa/main/compiler.h @@ -45,9 +45,6 @@ #include <stdlib.h> #include <stdio.h> #include <string.h> -#if defined(__linux__) && defined(__i386__) -#include <fpu_control.h> -#endif #include <float.h> #include <stdarg.h> @@ -60,29 +57,7 @@ extern "C" { /** * Get standard integer types */ -#if defined(_MSC_VER) - typedef __int8 int8_t; - typedef unsigned __int8 uint8_t; - typedef __int16 int16_t; - typedef unsigned __int16 uint16_t; - typedef __int32 int32_t; - typedef unsigned __int32 uint32_t; - typedef __int64 int64_t; - typedef unsigned __int64 uint64_t; - -# if defined(_WIN64) - typedef __int64 intptr_t; - typedef unsigned __int64 uintptr_t; -# else - typedef __int32 intptr_t; - typedef unsigned __int32 uintptr_t; -# endif - -# define INT64_C(__val) __val##i64 -# define UINT64_C(__val) __val##ui64 -#else -# include <stdint.h> -#endif +#include <stdint.h> /** @@ -139,26 +114,28 @@ extern "C" { /** * Function inlining */ -#if defined(__GNUC__) -# define INLINE __inline__ -#elif defined(__MSC__) -# define INLINE __inline -#elif defined(_MSC_VER) -# define INLINE __inline -#elif defined(__ICL) -# define INLINE __inline -#elif defined(__INTEL_COMPILER) -# define INLINE inline -#elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100) -# define INLINE __inline -#elif defined(__SUNPRO_C) && defined(__C99FEATURES__) -# define INLINE inline -# define __inline inline -# define __inline__ inline -#elif (__STDC_VERSION__ >= 199901L) /* C99 */ -# define INLINE inline -#else -# define INLINE +#ifndef INLINE +# if defined(__GNUC__) +# define INLINE __inline__ +# elif defined(__MSC__) +# define INLINE __inline +# elif defined(_MSC_VER) +# define INLINE __inline +# elif defined(__ICL) +# define INLINE __inline +# elif defined(__INTEL_COMPILER) +# define INLINE inline +# elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100) +# define INLINE __inline +# elif defined(__SUNPRO_C) && defined(__C99FEATURES__) +# define INLINE inline +# define __inline inline +# define __inline__ inline +# elif (__STDC_VERSION__ >= 199901L) /* C99 */ +# define INLINE inline +# else +# define INLINE +# endif #endif diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 9fe6d527f92..fcf40ecf102 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -189,31 +189,22 @@ struct dd_function_table { /*@{*/ /** - * Choose texture format. - * - * This is called by the \c _mesa_store_tex[sub]image[123]d() fallback - * functions. The driver should examine \p internalFormat and return a - * gl_format value. + * Choose actual hardware texture format given the user-provided source + * image format and type and the desired internal format. In some + * cases, srcFormat and srcType can be GL_NONE. + * Called by glTexImage(), etc. */ - GLuint (*ChooseTextureFormat)( struct gl_context *ctx, GLint internalFormat, + gl_format (*ChooseTextureFormat)( struct gl_context *ctx, GLint internalFormat, GLenum srcFormat, GLenum srcType ); /** - * Called by glTexImage1D(). - * - * \param target user specified. - * \param format user specified. - * \param type user specified. - * \param pixels user specified. - * \param packing indicates the image packing of pixels. + * Called by glTexImage1D(). Simply copy the source texture data into the + * destination texture memory. The gl_texture_image fields, etc. will be + * fully initialized. + * The parameters are the same as glTexImage1D(), plus: + * \param packing describes how to unpack the source data. * \param texObj is the target texture object. - * \param texImage is the target texture image. It will have the texture \p - * width, \p height, \p depth, \p border and \p internalFormat information. - * - * \p retainInternalCopy is returned by this function and indicates whether - * core Mesa should keep an internal copy of the texture image. - * - * Drivers should call a fallback routine from texstore.c if needed. + * \param texImage is the target texture image. */ void (*TexImage1D)( struct gl_context *ctx, GLenum target, GLint level, GLint internalFormat, @@ -250,25 +241,9 @@ struct dd_function_table { struct gl_texture_image *texImage ); /** - * Called by glTexSubImage1D(). - * - * \param target user specified. - * \param level user specified. - * \param xoffset user specified. - * \param yoffset user specified. - * \param zoffset user specified. - * \param width user specified. - * \param height user specified. - * \param depth user specified. - * \param format user specified. - * \param type user specified. - * \param pixels user specified. - * \param packing indicates the image packing of pixels. - * \param texObj is the target texture object. - * \param texImage is the target texture image. It will have the texture \p - * width, \p height, \p border and \p internalFormat information. - * - * The driver should use a fallback routine from texstore.c if needed. + * Called by glTexSubImage1D(). Replace a subset of the target texture + * with new texel data. + * \sa dd_function_table::TexImage1D. */ void (*TexSubImage1D)( struct gl_context *ctx, GLenum target, GLint level, GLint xoffset, GLsizei width, @@ -315,24 +290,6 @@ struct dd_function_table { struct gl_texture_image *texImage ); /** - * Called by glCopyTexImage1D(). - * - * Drivers should use a fallback routine from texstore.c if needed. - */ - void (*CopyTexImage1D)( struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLint border ); - - /** - * Called by glCopyTexImage2D(). - * - * Drivers should use a fallback routine from texstore.c if needed. - */ - void (*CopyTexImage2D)( struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLsizei height, GLint border ); - - /** * Called by glCopyTexSubImage1D(). * * Drivers should use a fallback routine from texstore.c if needed. @@ -741,17 +698,14 @@ struct dd_function_table { const GLvoid *data, GLenum usage, struct gl_buffer_object *obj ); - void (*BufferSubData)( struct gl_context *ctx, GLenum target, GLintptrARB offset, + void (*BufferSubData)( struct gl_context *ctx, GLintptrARB offset, GLsizeiptrARB size, const GLvoid *data, struct gl_buffer_object *obj ); - void (*GetBufferSubData)( struct gl_context *ctx, GLenum target, + void (*GetBufferSubData)( struct gl_context *ctx, GLintptrARB offset, GLsizeiptrARB size, GLvoid *data, struct gl_buffer_object *obj ); - void * (*MapBuffer)( struct gl_context *ctx, GLenum target, GLenum access, - struct gl_buffer_object *obj ); - void (*CopyBufferSubData)( struct gl_context *ctx, struct gl_buffer_object *src, struct gl_buffer_object *dst, @@ -760,15 +714,15 @@ struct dd_function_table { /* May return NULL if MESA_MAP_NOWAIT_BIT is set in access: */ - void * (*MapBufferRange)( struct gl_context *ctx, GLenum target, GLintptr offset, + void * (*MapBufferRange)( struct gl_context *ctx, GLintptr offset, GLsizeiptr length, GLbitfield access, struct gl_buffer_object *obj); - void (*FlushMappedBufferRange)(struct gl_context *ctx, GLenum target, + void (*FlushMappedBufferRange)(struct gl_context *ctx, GLintptr offset, GLsizeiptr length, struct gl_buffer_object *obj); - GLboolean (*UnmapBuffer)( struct gl_context *ctx, GLenum target, + GLboolean (*UnmapBuffer)( struct gl_context *ctx, struct gl_buffer_object *obj ); /*@}*/ diff --git a/src/mesa/main/debug.c b/src/mesa/main/debug.c index e7f6be99481..b1fc096f296 100644 --- a/src/mesa/main/debug.c +++ b/src/mesa/main/debug.c @@ -192,17 +192,6 @@ static void add_debug_flags( const char *debug ) if (strstr(debug, "flush")) MESA_DEBUG_FLAGS |= DEBUG_ALWAYS_FLUSH; -#if defined(_FPU_GETCW) && defined(_FPU_SETCW) - if (strstr(debug, "fpexceptions")) { - /* raise FP exceptions */ - fpu_control_t mask; - _FPU_GETCW(mask); - mask &= ~(_FPU_MASK_IM | _FPU_MASK_DM | _FPU_MASK_ZM - | _FPU_MASK_OM | _FPU_MASK_UM); - _FPU_SETCW(mask); - } -#endif - #else (void) debug; #endif diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c index f9282398c21..6e075b4e54b 100644 --- a/src/mesa/main/dlist.c +++ b/src/mesa/main/dlist.c @@ -894,8 +894,8 @@ unpack_image(struct gl_context *ctx, GLuint dimensions, GLvoid *image; map = (GLubyte *) - ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, - GL_READ_ONLY_ARB, unpack->BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size, + GL_MAP_READ_BIT, unpack->BufferObj); if (!map) { /* unable to map src buffer! */ _mesa_error(ctx, GL_INVALID_OPERATION, "unable to map PBO"); @@ -906,8 +906,7 @@ unpack_image(struct gl_context *ctx, GLuint dimensions, image = _mesa_unpack_image(dimensions, width, height, depth, format, type, src, unpack); - ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, - unpack->BufferObj); + ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj); if (!image) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "display list construction"); diff --git a/src/mesa/main/drawtex.c b/src/mesa/main/drawtex.c index 2089cdfcef9..83485a928d8 100644 --- a/src/mesa/main/drawtex.c +++ b/src/mesa/main/drawtex.c @@ -45,11 +45,15 @@ draw_texture(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, return; } + _mesa_set_vp_override(ctx, GL_TRUE); + if (ctx->NewState) _mesa_update_state(ctx); ASSERT(ctx->Driver.DrawTex); ctx->Driver.DrawTex(ctx, x, y, z, width, height); + + _mesa_set_vp_override(ctx, GL_FALSE); } diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c index aac8b9c5eaf..3ba4df6342f 100644 --- a/src/mesa/main/enable.c +++ b/src/mesa/main/enable.c @@ -5,7 +5,6 @@ /* * Mesa 3-D graphics library - * Version: 7.0.3 * * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. * @@ -560,7 +559,6 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state) ctx->Polygon.OffsetLine = state; break; case GL_POLYGON_OFFSET_FILL: - /*case GL_POLYGON_OFFSET_EXT:*/ if (ctx->Polygon.OffsetFill == state) return; FLUSH_VERTICES(ctx, _NEW_POLYGON); @@ -643,9 +641,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state) break; #endif - /* - * CLIENT STATE!!! - */ + /* client-side state */ case GL_VERTEX_ARRAY: case GL_NORMAL_ARRAY: case GL_COLOR_ARRAY: @@ -1174,7 +1170,6 @@ _mesa_IsEnabled( GLenum cap ) case GL_POLYGON_OFFSET_LINE: return ctx->Polygon.OffsetLine; case GL_POLYGON_OFFSET_FILL: - /*case GL_POLYGON_OFFSET_EXT:*/ return ctx->Polygon.OffsetFill; case GL_RESCALE_NORMAL_EXT: return ctx->Transform.RescaleNormals; @@ -1213,9 +1208,7 @@ _mesa_IsEnabled( GLenum cap ) } #endif - /* - * CLIENT STATE!!! - */ + /* client-side state */ case GL_VERTEX_ARRAY: return (ctx->Array.ArrayObj->Vertex.Enabled != 0); case GL_NORMAL_ARRAY: diff --git a/src/mesa/main/es_generator.py b/src/mesa/main/es_generator.py index c0b0a445806..cad3deaef94 100644 --- a/src/mesa/main/es_generator.py +++ b/src/mesa/main/es_generator.py @@ -681,10 +681,10 @@ print """ #if FEATURE_remap_table /* define esLocalRemapTable */ -#include "%sapi/main/dispatch.h" +#include "main/api_exec_%s_dispatch.h" #define need_MESA_remap_table -#include "%sapi/main/remap_helper.h" +#include "main/api_exec_%s_remap_helper.h" static void init_remap_table(void) diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index bc61c50a90f..14b0cf9acbd 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -81,6 +81,7 @@ static const struct extension extension_table[] = { { "GL_ARB_blend_func_extended", o(ARB_blend_func_extended), GL, 2009 }, { "GL_ARB_color_buffer_float", o(ARB_color_buffer_float), GL, 2004 }, { "GL_ARB_copy_buffer", o(ARB_copy_buffer), GL, 2008 }, + { "GL_ARB_conservative_depth", o(AMD_conservative_depth), GL, 2011 }, { "GL_ARB_depth_buffer_float", o(ARB_depth_buffer_float), GL, 2008 }, { "GL_ARB_depth_clamp", o(ARB_depth_clamp), GL, 2003 }, { "GL_ARB_depth_texture", o(ARB_depth_texture), GL, 2001 }, diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 84969360d92..0b48fc7eab0 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -1984,10 +1984,26 @@ _mesa_FramebufferTexture1DEXT(GLenum target, GLenum attachment, { GET_CURRENT_CONTEXT(ctx); - if ((texture != 0) && (textarget != GL_TEXTURE_1D)) { - _mesa_error(ctx, GL_INVALID_ENUM, - "glFramebufferTexture1DEXT(textarget)"); - return; + if (texture != 0) { + GLboolean error; + + switch (textarget) { + case GL_TEXTURE_1D: + error = GL_FALSE; + break; + case GL_TEXTURE_1D_ARRAY: + error = !ctx->Extensions.EXT_texture_array; + break; + default: + error = GL_TRUE; + } + + if (error) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glFramebufferTexture1DEXT(textarget=%s)", + _mesa_lookup_enum_by_nr(textarget)); + return; + } } framebuffer_texture(ctx, "1D", target, attachment, textarget, texture, @@ -2001,13 +2017,37 @@ _mesa_FramebufferTexture2DEXT(GLenum target, GLenum attachment, { GET_CURRENT_CONTEXT(ctx); - if ((texture != 0) && - (textarget != GL_TEXTURE_2D) && - (textarget != GL_TEXTURE_RECTANGLE_ARB) && - (!is_cube_face(textarget))) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glFramebufferTexture2DEXT(textarget=0x%x)", textarget); - return; + if (texture != 0) { + GLboolean error; + + switch (textarget) { + case GL_TEXTURE_2D: + error = GL_FALSE; + break; + case GL_TEXTURE_RECTANGLE: + error = !ctx->Extensions.NV_texture_rectangle; + break; + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + error = !ctx->Extensions.ARB_texture_cube_map; + break; + case GL_TEXTURE_2D_ARRAY: + error = !ctx->Extensions.EXT_texture_array; + break; + default: + error = GL_FALSE; + } + + if (error) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glFramebufferTexture2DEXT(textarget=%s)", + _mesa_lookup_enum_by_nr(textarget)); + return; + } } framebuffer_texture(ctx, "2D", target, attachment, textarget, texture, @@ -2023,7 +2063,7 @@ _mesa_FramebufferTexture3DEXT(GLenum target, GLenum attachment, GET_CURRENT_CONTEXT(ctx); if ((texture != 0) && (textarget != GL_TEXTURE_3D)) { - _mesa_error(ctx, GL_INVALID_ENUM, + _mesa_error(ctx, GL_INVALID_OPERATION, "glFramebufferTexture3DEXT(textarget)"); return; } @@ -2134,10 +2174,14 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, { const struct gl_renderbuffer_attachment *att; struct gl_framebuffer *buffer; + GLenum err; GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); + /* The error differs in GL andd GLES. */ + err = ctx->API == API_OPENGL ? GL_INVALID_OPERATION : GL_INVALID_ENUM; + buffer = get_framebuffer_target(ctx, target); if (!buffer) { _mesa_error(ctx, GL_INVALID_ENUM, @@ -2188,7 +2232,12 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, } else { assert(att->Type == GL_NONE); - *params = 0; + if (ctx->API == API_OPENGL) { + *params = 0; + } else { + _mesa_error(ctx, GL_INVALID_ENUM, + "glGetFramebufferAttachmentParameterivEXT(pname)"); + } } return; case GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL_EXT: @@ -2196,7 +2245,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, *params = att->TextureLevel; } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else { @@ -2214,7 +2263,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, } } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else { @@ -2232,7 +2281,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, } } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else { @@ -2246,7 +2295,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else { @@ -2267,7 +2316,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, return; } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else { @@ -2301,7 +2350,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else if (att->Texture) { @@ -2337,6 +2386,8 @@ void GLAPIENTRY _mesa_GenerateMipmapEXT(GLenum target) { struct gl_texture_object *texObj; + GLboolean error; + GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); @@ -2346,12 +2397,22 @@ _mesa_GenerateMipmapEXT(GLenum target) case GL_TEXTURE_1D: case GL_TEXTURE_2D: case GL_TEXTURE_3D: + error = GL_FALSE; + break; case GL_TEXTURE_CUBE_MAP: - /* OK, legal value */ + error = !ctx->Extensions.ARB_texture_cube_map; + break; + case GL_TEXTURE_1D_ARRAY: + case GL_TEXTURE_2D_ARRAY: + error = !ctx->Extensions.EXT_texture_array; break; default: - /* XXX need to implement GL_TEXTURE_1D_ARRAY and GL_TEXTURE_2D_ARRAY */ - _mesa_error(ctx, GL_INVALID_ENUM, "glGenerateMipmapEXT(target)"); + error = GL_TRUE; + } + + if (error) { + _mesa_error(ctx, GL_INVALID_ENUM, "glGenerateMipmapEXT(target=%s)", + _mesa_lookup_enum_by_nr(target)); return; } diff --git a/src/mesa/main/ff_fragment_shader.cpp b/src/mesa/main/ff_fragment_shader.cpp index 0b53c28f7ae..7cc17216884 100644 --- a/src/mesa/main/ff_fragment_shader.cpp +++ b/src/mesa/main/ff_fragment_shader.cpp @@ -330,8 +330,7 @@ static GLbitfield get_fp_input_mask( struct gl_context *ctx ) /* _NEW_RENDERMODE */ fp_inputs = (FRAG_BIT_COL0 | FRAG_BIT_TEX0); } - else if (!(vertexProgram || vertexShader) || - !ctx->VertexProgram._Current) { + else if (!(vertexProgram || vertexShader)) { /* Fixed function vertex logic */ /* _NEW_ARRAY */ GLbitfield varying_inputs = ctx->varying_vp_inputs; @@ -875,7 +874,8 @@ static struct ureg register_const4f( struct texenv_fragment_program *p, values[1] = s1; values[2] = s2; values[3] = s3; - idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4, + idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, + (gl_constant_value *) values, 4, &swizzle ); r = make_ureg(PROGRAM_CONSTANT, idx); r.swz = swizzle; diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c index b8e49a3757f..2d2485c9e06 100644 --- a/src/mesa/main/ffvertex_prog.c +++ b/src/mesa/main/ffvertex_prog.c @@ -455,13 +455,13 @@ static struct ureg register_const4f( struct tnl_program *p, GLfloat s2, GLfloat s3) { - GLfloat values[4]; + gl_constant_value values[4]; GLint idx; GLuint swizzle; - values[0] = s0; - values[1] = s1; - values[2] = s2; - values[3] = s3; + values[0].f = s0; + values[1].f = s1; + values[2].f = s2; + values[3].f = s3; idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4, &swizzle ); ASSERT(swizzle == SWIZZLE_NOOP); diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index e27569a6fac..23fa1b2c11e 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -548,6 +548,7 @@ _mesa_update_framebuffer_visual(struct gl_context *ctx, fb->Visual.rgbBits = fb->Visual.redBits + fb->Visual.greenBits + fb->Visual.blueBits; fb->Visual.samples = rb->NumSamples; + fb->Visual.sampleBuffers = rb->NumSamples > 0 ? 1 : 0; if (_mesa_get_format_color_encoding(fmt) == GL_SRGB) fb->Visual.sRGBCapable = ctx->Const.sRGBCapable; break; diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index 0492e1585c3..d32c68a53a4 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -1569,11 +1569,11 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu break; case GL_NUM_COMPRESSED_TEXTURE_FORMATS_ARB: - v->value_int = _mesa_get_compressed_formats(ctx, NULL, GL_FALSE); + v->value_int = _mesa_get_compressed_formats(ctx, NULL); break; case GL_COMPRESSED_TEXTURE_FORMATS_ARB: v->value_int_n.n = - _mesa_get_compressed_formats(ctx, v->value_int_n.ints, GL_FALSE); + _mesa_get_compressed_formats(ctx, v->value_int_n.ints); ASSERT(v->value_int_n.n <= 100); break; diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c index 0a572ec225d..8f097195922 100644 --- a/src/mesa/main/imports.c +++ b/src/mesa/main/imports.c @@ -753,7 +753,8 @@ _mesa_strdup( const char *s ) float _mesa_strtof( const char *s, char **end ) { -#if defined(_GNU_SOURCE) && !defined(__CYGWIN__) && !defined(__FreeBSD__) +#if defined(_GNU_SOURCE) && !defined(__CYGWIN__) && !defined(__FreeBSD__) && \ + !defined(ANDROID) static locale_t loc = NULL; if (!loc) { loc = newlocale(LC_CTYPE_MASK, "C", NULL); diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h index 3fa1db02aee..70defdc4327 100644 --- a/src/mesa/main/imports.h +++ b/src/mesa/main/imports.h @@ -134,7 +134,13 @@ typedef union { GLfloat f; GLint i; } fi_type; #define exp2f(f) ((float) exp2(f)) #define floorf(f) ((float) floor(f)) #define logf(f) ((float) log(f)) + +#ifdef ANDROID +#define log2f(f) (logf(f) * (float) (1.0 / M_LN2)) +#else #define log2f(f) ((float) log2(f)) +#endif + #define powf(x,y) ((float) pow(x,y)) #define sinf(f) ((float) sin(f)) #define sinhf(f) ((float) sinh(f)) @@ -562,7 +568,7 @@ _mesa_init_sqrt_table(void); #ifdef __GNUC__ -#ifdef __MINGW32__ +#if defined(__MINGW32__) || defined(ANDROID) #define ffs __builtin_ffs #define ffsll __builtin_ffsll #endif diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index b88118366b2..f2eb889feb4 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1279,6 +1279,9 @@ struct gl_texture_image GLboolean _IsPowerOfTwo; /**< Are all dimensions powers of two? */ struct gl_texture_object *TexObject; /**< Pointer back to parent object */ + GLuint Level; /**< Which mipmap level am I? */ + /** Cube map face: index into gl_texture_object::Image[] array */ + GLuint Face; FetchTexelFuncC FetchTexelc; /**< GLchan texel fetch function pointer */ FetchTexelFuncF FetchTexelf; /**< Float texel fetch function pointer */ @@ -2252,8 +2255,6 @@ struct gl_shader_state */ struct gl_shader_program *ActiveProgram; - void *MemPool; - GLbitfield Flags; /**< Mask of GLSL_x flags */ }; @@ -2719,6 +2720,12 @@ struct gl_constants GLuint GLSLVersion; /**< GLSL version supported (ex: 120 = 1.20) */ + /** + * Does the driver support real 32-bit integers? (Otherwise, integers are + * simulated via floats.) + */ + GLboolean NativeIntegers; + /** Which texture units support GL_ATI_envmap_bumpmap as targets */ GLbitfield SupportedBumpUnits; diff --git a/src/mesa/main/nvprogram.c b/src/mesa/main/nvprogram.c index dd198b8141a..7ff7645b7b7 100644 --- a/src/mesa/main/nvprogram.c +++ b/src/mesa/main/nvprogram.c @@ -812,7 +812,7 @@ _mesa_ProgramNamedParameter4fNV(GLuint id, GLsizei len, const GLubyte *name, { struct gl_program *prog; struct gl_fragment_program *fragProg; - GLfloat *v; + gl_constant_value *v; GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); @@ -834,10 +834,10 @@ _mesa_ProgramNamedParameter4fNV(GLuint id, GLsizei len, const GLubyte *name, v = _mesa_lookup_parameter_value(fragProg->Base.Parameters, len, (char *) name); if (v) { - v[0] = x; - v[1] = y; - v[2] = z; - v[3] = w; + v[0].f = x; + v[1].f = y; + v[2].f = z; + v[3].f = w; return; } @@ -878,7 +878,7 @@ _mesa_GetProgramNamedParameterfvNV(GLuint id, GLsizei len, const GLubyte *name, { struct gl_program *prog; struct gl_fragment_program *fragProg; - const GLfloat *v; + const gl_constant_value *v; GET_CURRENT_CONTEXT(ctx); @@ -899,10 +899,10 @@ _mesa_GetProgramNamedParameterfvNV(GLuint id, GLsizei len, const GLubyte *name, v = _mesa_lookup_parameter_value(fragProg->Base.Parameters, len, (char *) name); if (v) { - params[0] = v[0]; - params[1] = v[1]; - params[2] = v[2]; - params[3] = v[3]; + params[0] = v[0].f; + params[1] = v[1].f; + params[2] = v[2].f; + params[3] = v[3].f; return; } diff --git a/src/mesa/main/pbo.c b/src/mesa/main/pbo.c index 15e0480e9f1..4e7e6f925cc 100644 --- a/src/mesa/main/pbo.c +++ b/src/mesa/main/pbo.c @@ -128,9 +128,10 @@ _mesa_map_pbo_source(struct gl_context *ctx, if (_mesa_is_bufferobj(unpack->BufferObj)) { /* unpack from PBO */ - buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, - GL_READ_ONLY_ARB, - unpack->BufferObj); + buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, + unpack->BufferObj->Size, + GL_MAP_READ_BIT, + unpack->BufferObj); if (!buf) return NULL; @@ -201,8 +202,7 @@ _mesa_unmap_pbo_source(struct gl_context *ctx, { ASSERT(unpack != &ctx->Pack); /* catch pack/unpack mismatch */ if (_mesa_is_bufferobj(unpack->BufferObj)) { - ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, - unpack->BufferObj); + ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj); } } @@ -224,9 +224,10 @@ _mesa_map_pbo_dest(struct gl_context *ctx, if (_mesa_is_bufferobj(pack->BufferObj)) { /* pack into PBO */ - buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT, - GL_WRITE_ONLY_ARB, - pack->BufferObj); + buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, + pack->BufferObj->Size, + GL_MAP_WRITE_BIT, + pack->BufferObj); if (!buf) return NULL; @@ -297,7 +298,7 @@ _mesa_unmap_pbo_dest(struct gl_context *ctx, { ASSERT(pack != &ctx->Unpack); /* catch pack/unpack mismatch */ if (_mesa_is_bufferobj(pack->BufferObj)) { - ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT, pack->BufferObj); + ctx->Driver.UnmapBuffer(ctx, pack->BufferObj); } } @@ -327,8 +328,9 @@ _mesa_validate_pbo_teximage(struct gl_context *ctx, GLuint dimensions, return NULL; } - buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, - GL_READ_ONLY_ARB, unpack->BufferObj); + buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size, + GL_MAP_READ_BIT, + unpack->BufferObj); if (!buf) { _mesa_error(ctx, GL_INVALID_OPERATION, funcName, "(PBO is mapped)"); return NULL; @@ -364,8 +366,10 @@ _mesa_validate_pbo_compressed_teximage(struct gl_context *ctx, return NULL; } - buf = (GLubyte*) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, - GL_READ_ONLY_ARB, packing->BufferObj); + buf = (GLubyte*) ctx->Driver.MapBufferRange(ctx, 0, + packing->BufferObj->Size, + GL_MAP_READ_BIT, + packing->BufferObj); if (!buf) { _mesa_error(ctx, GL_INVALID_OPERATION, funcName, "(PBO is mapped"); return NULL; @@ -384,8 +388,7 @@ _mesa_unmap_teximage_pbo(struct gl_context *ctx, const struct gl_pixelstore_attrib *unpack) { if (_mesa_is_bufferobj(unpack->BufferObj)) { - ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, - unpack->BufferObj); + ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj); } } diff --git a/src/mesa/main/querymatrix.c b/src/mesa/main/querymatrix.c index 944ad435f7a..eaedf7cd238 100644 --- a/src/mesa/main/querymatrix.c +++ b/src/mesa/main/querymatrix.c @@ -73,7 +73,7 @@ fpclassify(double x) #elif defined(__APPLE__) || defined(__CYGWIN__) || defined(__FreeBSD__) || \ defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || \ (defined(__sun) && defined(__C99FEATURES__)) || defined(__MINGW32__) || \ - (defined(__sun) && defined(__GNUC__)) + (defined(__sun) && defined(__GNUC__)) || defined(ANDROID) /* fpclassify is available. */ diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index 8df25c3f988..74997eaaa77 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -1125,7 +1125,7 @@ static void validate_program(struct gl_context *ctx, GLuint program) { struct gl_shader_program *shProg; - char errMsg[100]; + char errMsg[100] = ""; shProg = _mesa_lookup_shader_program_err(ctx, program, "glValidateProgram"); if (!shProg) { diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c index 33d91ad594d..f128648f477 100644 --- a/src/mesa/main/shaderobj.c +++ b/src/mesa/main/shaderobj.c @@ -244,6 +244,8 @@ _mesa_init_shader_program(struct gl_context *ctx, struct gl_shader_program *prog prog->Geom.InputType = GL_TRIANGLES; prog->Geom.OutputType = GL_TRIANGLE_STRIP; #endif + + prog->InfoLog = ralloc_strdup(prog, ""); } /** @@ -283,6 +285,10 @@ _mesa_clear_shader_program_data(struct gl_context *ctx, _mesa_free_parameter_list(shProg->Varying); shProg->Varying = NULL; } + + assert(shProg->InfoLog != NULL); + ralloc_free(shProg->InfoLog); + shProg->InfoLog = ralloc_strdup(shProg, ""); } @@ -317,11 +323,6 @@ _mesa_free_shader_program_data(struct gl_context *ctx, shProg->Shaders = NULL; } - if (shProg->InfoLog) { - ralloc_free(shProg->InfoLog); - shProg->InfoLog = NULL; - } - /* Transform feedback varying vars */ for (i = 0; i < shProg->TransformFeedback.NumVarying; i++) { free(shProg->TransformFeedback.VaryingNames[i]); diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c index d84f59690c5..8b7159db09c 100644 --- a/src/mesa/main/shared.c +++ b/src/mesa/main/shared.c @@ -200,7 +200,7 @@ delete_bufferobj_cb(GLuint id, void *data, void *userData) struct gl_buffer_object *bufObj = (struct gl_buffer_object *) data; struct gl_context *ctx = (struct gl_context *) userData; if (_mesa_bufferobj_mapped(bufObj)) { - ctx->Driver.UnmapBuffer(ctx, 0, bufObj); + ctx->Driver.UnmapBuffer(ctx, bufObj); bufObj->Pointer = NULL; } _mesa_reference_buffer_object(ctx, &bufObj, NULL); diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c index d820ae92747..42bd1eee5ca 100644 --- a/src/mesa/main/texcompress.c +++ b/src/mesa/main/texcompress.c @@ -40,19 +40,192 @@ /** + * Get the GL base format of a specified GL compressed texture format + * + * From page 232 of the OpenGL 3.3 (Compatiblity Profile) spec: + * + * "Compressed Internal Format Base Internal Format Type + * --------------------------- -------------------- --------- + * COMPRESSED_ALPHA ALPHA Generic + * COMPRESSED_LUMINANCE LUMINANCE Generic + * COMPRESSED_LUMINANCE_ALPHA LUMINANCE_ALPHA Generic + * COMPRESSED_INTENSITY INTENSITY Generic + * COMPRESSED_RED RED Generic + * COMPRESSED_RG RG Generic + * COMPRESSED_RGB RGB Generic + * COMPRESSED_RGBA RGBA Generic + * COMPRESSED_SRGB RGB Generic + * COMPRESSED_SRGB_ALPHA RGBA Generic + * COMPRESSED_SLUMINANCE LUMINANCE Generic + * COMPRESSED_SLUMINANCE_ALPHA LUMINANCE_ALPHA Generic + * COMPRESSED_RED_RGTC1 RED Specific + * COMPRESSED_SIGNED_RED_RGTC1 RED Specific + * COMPRESSED_RG_RGTC2 RG Specific + * COMPRESSED_SIGNED_RG_RGTC2 RG Specific" + * + * \return + * The base format of \c format if \c format is a compressed format (either + * generic or specific. Otherwise 0 is returned. + */ +GLenum +_mesa_gl_compressed_format_base_format(GLenum format) +{ + switch (format) { + case GL_COMPRESSED_RED: + case GL_COMPRESSED_RED_RGTC1: + case GL_COMPRESSED_SIGNED_RED_RGTC1: + return GL_RED; + + case GL_COMPRESSED_RG: + case GL_COMPRESSED_RG_RGTC2: + case GL_COMPRESSED_SIGNED_RG_RGTC2: + return GL_RG; + + case GL_COMPRESSED_RGB: + case GL_COMPRESSED_SRGB: + case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: + case GL_COMPRESSED_RGB_FXT1_3DFX: + case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT: + return GL_RGB; + + case GL_COMPRESSED_RGBA: + case GL_COMPRESSED_SRGB_ALPHA: + case GL_COMPRESSED_RGBA_BPTC_UNORM_ARB: + case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_ARB: + case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB: + case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT_ARB: + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: + case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: + case GL_COMPRESSED_RGBA_FXT1_3DFX: + case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: + case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: + case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: + return GL_RGBA; + + case GL_COMPRESSED_ALPHA: + return GL_ALPHA; + + case GL_COMPRESSED_LUMINANCE: + case GL_COMPRESSED_SLUMINANCE: + case GL_COMPRESSED_LUMINANCE_LATC1_EXT: + case GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT: + return GL_LUMINANCE; + + case GL_COMPRESSED_LUMINANCE_ALPHA: + case GL_COMPRESSED_SLUMINANCE_ALPHA: + case GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT: + case GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT: + case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI: + return GL_LUMINANCE_ALPHA; + + case GL_COMPRESSED_INTENSITY: + return GL_INTENSITY; + + default: + return 0; + } +} + +/** * Return list of (and count of) all specific texture compression * formats that are supported. * + * Some formats are \b not returned by this function. The + * \c GL_COMPRESSED_TEXTURE_FORMATS query only returns formats that are + * "suitable for general-purpose usage." All texture compression extensions + * have taken this to mean either linear RGB or linear RGBA. + * + * The GL_ARB_texture_compress_rgtc spec says: + * + * "19) Should the GL_NUM_COMPRESSED_TEXTURE_FORMATS and + * GL_COMPRESSED_TEXTURE_FORMATS queries return the RGTC formats? + * + * RESOLVED: No. + * + * The OpenGL 2.1 specification says "The only values returned + * by this query [GL_COMPRESSED_TEXTURE_FORMATS"] are those + * corresponding to formats suitable for general-purpose usage. + * The renderer will not enumerate formats with restrictions that + * need to be specifically understood prior to use." + * + * Compressed textures with just red or red-green components are + * not general-purpose so should not be returned by these queries + * because they have restrictions. + * + * Applications that seek to use the RGTC formats should do so + * by looking for this extension's name in the string returned by + * glGetString(GL_EXTENSIONS) rather than + * what GL_NUM_COMPRESSED_TEXTURE_FORMATS and + * GL_COMPRESSED_TEXTURE_FORMATS return." + * + * There is nearly identical wording in the GL_EXT_texture_compression_rgtc + * spec. + * + * The GL_EXT_texture_rRGB spec says: + * + * "22) Should the new COMPRESSED_SRGB_* formats be listed in an + * implementation's GL_COMPRESSED_TEXTURE_FORMATS list? + * + * RESOLVED: No. Section 3.8.1 says formats listed by + * GL_COMPRESSED_TEXTURE_FORMATS are "suitable for general-purpose + * usage." The non-linear distribution of red, green, and + * blue for these sRGB compressed formats makes them not really + * general-purpose." + * + * The GL_EXT_texture_compression_latc spec says: + * + * "16) Should the GL_NUM_COMPRESSED_TEXTURE_FORMATS and + * GL_COMPRESSED_TEXTURE_FORMATS queries return the LATC formats? + * + * RESOLVED: No. + * + * The OpenGL 2.1 specification says "The only values returned + * by this query [GL_COMPRESSED_TEXTURE_FORMATS"] are those + * corresponding to formats suitable for general-purpose usage. + * The renderer will not enumerate formats with restrictions that + * need to be specifically understood prior to use." + * + * Historically, OpenGL implementation have advertised the RGB and + * RGBA versions of the S3TC extensions compressed format tokens + * through this mechanism. + * + * The specification is not sufficiently clear about what "suitable + * for general-purpose usage" means. Historically that seems to mean + * unsigned RGB or unsigned RGBA. The DXT1 format supporting alpha + * (GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) is not exposed in the list (at + * least for NVIDIA drivers) because the alpha is always 1.0 expect + * when it is 0.0 when RGB is required to be black. NVIDIA's even + * limits itself to true linear RGB or RGBA formats, specifically + * not including EXT_texture_sRGB's sRGB S3TC compressed formats. + * + * Adding luminance and luminance-alpha texture formats (and + * certainly signed versions of luminance and luminance-alpha + * formats!) invites potential comptaibility problems with old + * applications using this mechanism since old applications are + * unlikely to expect non-RGB or non-RGBA formats to be advertised + * through this mechanism. However no specific misinteractions + * with old applications is known. + * + * Applications that seek to use the LATC formats should do so + * by looking for this extension's name in the string returned by + * glGetString(GL_EXTENSIONS) rather than + * what GL_NUM_COMPRESSED_TEXTURE_FORMATS and + * GL_COMPRESSED_TEXTURE_FORMATS return." + * + * There is no formal spec for GL_ATI_texture_compression_3dc. Since the + * formats added by this extension are luminance-alpha formats, it is + * reasonable to expect them to follow the same rules as + * GL_EXT_texture_compression_latc. At the very least, Catalyst 11.6 does not + * expose the 3dc formats through this mechanism. + * * \param ctx the GL context * \param formats the resulting format list (may be NULL). - * \param all if true return all formats, even those with some kind - * of restrictions/limitations (See GL_ARB_texture_compression - * spec for more info). * * \return number of formats. */ GLuint -_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean all) +_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats) { GLuint n = 0; if (ctx->Extensions.TDFX_texture_compression_FXT1) { @@ -64,24 +237,15 @@ _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean a n += 2; } } - /* don't return RGTC - ARB_texture_compression_rgtc query 19 */ + if (ctx->Extensions.EXT_texture_compression_s3tc) { if (formats) { formats[n++] = GL_COMPRESSED_RGB_S3TC_DXT1_EXT; - /* This format has some restrictions/limitations and so should - * not be returned via the GL_COMPRESSED_TEXTURE_FORMATS query. - * Specifically, all transparent pixels become black. NVIDIA - * omits this format too. - */ - if (all) - formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT; formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT; formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT; } else { n += 3; - if (all) - n += 1; } } if (ctx->Extensions.S3_s3tc) { @@ -95,19 +259,6 @@ _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean a n += 4; } } -#if FEATURE_EXT_texture_sRGB - if (ctx->Extensions.EXT_texture_sRGB) { - if (formats) { - formats[n++] = GL_COMPRESSED_SRGB_S3TC_DXT1_EXT; - formats[n++] = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT; - formats[n++] = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT; - formats[n++] = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT; - } - else { - n += 4; - } - } -#endif /* FEATURE_EXT_texture_sRGB */ return n; #if FEATURE_ES1 || FEATURE_ES2 diff --git a/src/mesa/main/texcompress.h b/src/mesa/main/texcompress.h index 19b08bbadf6..375cf90c8a2 100644 --- a/src/mesa/main/texcompress.h +++ b/src/mesa/main/texcompress.h @@ -33,8 +33,11 @@ struct gl_context; #if _HAVE_FULL_GL +extern GLenum +_mesa_gl_compressed_format_base_format(GLenum format); + extern GLuint -_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean all); +_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats); extern gl_format _mesa_glenum_to_compressed_format(GLenum format); diff --git a/src/mesa/main/texcompress_rgtc_tmp.h b/src/mesa/main/texcompress_rgtc_tmp.h index c8bf082a158..48bbd374e08 100644 --- a/src/mesa/main/texcompress_rgtc_tmp.h +++ b/src/mesa/main/texcompress_rgtc_tmp.h @@ -181,7 +181,7 @@ static void TAG(encode_rgtc_chan)(TYPE *blkaddr, TYPE srccolors[4][4], fprintf(stderr, "%d ", alphaenc1[i]); } fprintf(stderr, "cutVals "); - for (i = 0; i < 8; i++) { + for (i = 0; i < 7; i++) { fprintf(stderr, "%d ", acutValues[i]); } fprintf(stderr, "srcVals "); diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c index 26c2ff98ba1..b2ebb0de475 100644 --- a/src/mesa/main/texgetimage.c +++ b/src/mesa/main/texgetimage.c @@ -441,8 +441,8 @@ _mesa_get_teximage(struct gl_context *ctx, GLenum target, GLint level, * texture data to the PBO if the PBO is in VRAM along with the texture. */ GLubyte *buf = (GLubyte *) - ctx->Driver.MapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT, - GL_WRITE_ONLY_ARB, ctx->Pack.BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, ctx->Pack.BufferObj->Size, + GL_MAP_WRITE_BIT, ctx->Pack.BufferObj); if (!buf) { /* out of memory or other unexpected error */ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage(map PBO failed)"); @@ -474,8 +474,7 @@ _mesa_get_teximage(struct gl_context *ctx, GLenum target, GLint level, } if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) { - ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT, - ctx->Pack.BufferObj); + ctx->Driver.UnmapBuffer(ctx, ctx->Pack.BufferObj); } } @@ -500,8 +499,8 @@ _mesa_get_compressed_teximage(struct gl_context *ctx, GLenum target, GLint level if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) { /* pack texture image into a PBO */ GLubyte *buf = (GLubyte *) - ctx->Driver.MapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT, - GL_WRITE_ONLY_ARB, ctx->Pack.BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, ctx->Pack.BufferObj->Size, + GL_MAP_WRITE_BIT, ctx->Pack.BufferObj); if (!buf) { /* out of memory or other unexpected error */ _mesa_error(ctx, GL_OUT_OF_MEMORY, @@ -531,8 +530,7 @@ _mesa_get_compressed_teximage(struct gl_context *ctx, GLenum target, GLint level } if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) { - ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT, - ctx->Pack.BufferObj); + ctx->Driver.UnmapBuffer(ctx, ctx->Pack.BufferObj); } } diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 6f53686e7ff..a005d2935fa 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -1,6 +1,5 @@ /* - * mesa 3-D graphics library - * Version: 7.6 + * Mesa 3-D graphics library * * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. * Copyright (C) 2009 VMware, Inc. All Rights Reserved. @@ -556,8 +555,6 @@ _mesa_tex_target_to_face(GLenum target) * \param target texture target. * \param level image level. * \param texImage texture image. - * - * This was basically prompted by the introduction of cube maps. */ void _mesa_set_tex_image(struct gl_texture_object *tObj, @@ -574,6 +571,8 @@ _mesa_set_tex_image(struct gl_texture_object *tObj, /* Set the 'back' pointer */ texImage->TexObject = tObj; + texImage->Level = level; + texImage->Face = face; } @@ -709,15 +708,13 @@ get_proxy_target(GLenum target) /** * Get the texture object that corresponds to the target of the given - * texture unit. + * texture unit. The target should have already been checked for validity. * * \param ctx GL context. * \param texUnit texture unit. * \param target texture target. * * \return pointer to the texture object on success, or NULL on failure. - * - * \sa gl_texture_unit. */ struct gl_texture_object * _mesa_select_tex_object(struct gl_context *ctx, @@ -2797,29 +2794,43 @@ copyteximage(struct gl_context *ctx, GLuint dims, _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims); } else { - gl_format texFormat; - - if (texImage->Data) { - ctx->Driver.FreeTexImageData( ctx, texImage ); - } + /* choose actual hw format */ + gl_format texFormat = _mesa_choose_texture_format(ctx, texObj, + target, level, + internalFormat, + GL_NONE, GL_NONE); - ASSERT(texImage->Data == NULL); + if (legal_texture_size(ctx, texFormat, width, height, 1)) { + GLint srcX = x, srcY = y, dstX = 0, dstY = 0; - texFormat = _mesa_choose_texture_format(ctx, texObj, target, level, - internalFormat, GL_NONE, - GL_NONE); + /* Free old texture image */ + ctx->Driver.FreeTexImageData(ctx, texImage); - if (legal_texture_size(ctx, texFormat, width, height, 1)) { _mesa_init_teximage_fields(ctx, target, texImage, width, height, 1, border, internalFormat, texFormat); - ASSERT(ctx->Driver.CopyTexImage2D); - if (dims == 1) - ctx->Driver.CopyTexImage1D(ctx, target, level, internalFormat, - x, y, width, border); - else - ctx->Driver.CopyTexImage2D(ctx, target, level, internalFormat, - x, y, width, height, border); + /* Allocate texture memory (no pixel data yet) */ + if (dims == 1) { + ctx->Driver.TexImage1D(ctx, target, level, internalFormat, + width, border, GL_NONE, GL_NONE, NULL, + &ctx->Unpack, texObj, texImage); + } + else { + ctx->Driver.TexImage2D(ctx, target, level, internalFormat, + width, height, border, GL_NONE, GL_NONE, + NULL, &ctx->Unpack, texObj, texImage); + } + + if (_mesa_clip_copytexsubimage(ctx, &dstX, &dstY, &srcX, &srcY, + &width, &height)) { + if (dims == 1) + ctx->Driver.CopyTexSubImage1D(ctx, target, level, dstX, + srcX, srcY, width); + + else + ctx->Driver.CopyTexSubImage2D(ctx, target, level, dstX, dstY, + srcX, srcY, width, height); + } check_gen_mipmap(ctx, target, texObj, level); @@ -2830,6 +2841,7 @@ copyteximage(struct gl_context *ctx, GLuint dims, ctx->NewState |= _NEW_TEXTURE; } else { + /* probably too large of image */ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims); } } diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c index 3021716a0b6..078a43ab153 100644 --- a/src/mesa/main/texobj.c +++ b/src/mesa/main/texobj.c @@ -842,7 +842,7 @@ _mesa_GenTextures( GLsizei n, GLuint *textures ) struct gl_texture_object *texObj; GLuint name = first + i; GLenum target = 0; - texObj = (*ctx->Driver.NewTextureObject)( ctx, name, target); + texObj = ctx->Driver.NewTextureObject(ctx, name, target); if (!texObj) { _glthread_UNLOCK_MUTEX(ctx->Shared->Mutex); _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGenTextures"); @@ -1066,7 +1066,7 @@ _mesa_BindTexture( GLenum target, GLuint texName ) } else { /* if this is a new texture id, allocate a texture object now */ - newTexObj = (*ctx->Driver.NewTextureObject)(ctx, texName, target); + newTexObj = ctx->Driver.NewTextureObject(ctx, texName, target); if (!newTexObj) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBindTexture"); return; @@ -1108,7 +1108,7 @@ _mesa_BindTexture( GLenum target, GLuint texName ) /* Pass BindTexture call to device driver */ if (ctx->Driver.BindTexture) - (*ctx->Driver.BindTexture)( ctx, target, newTexObj ); + ctx->Driver.BindTexture(ctx, target, newTexObj); } diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index 4b9dcb5d3b5..bbbb306b2d9 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -888,7 +888,7 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level, texObj = _mesa_select_tex_object(ctx, texUnit, target); img = _mesa_select_tex_image(ctx, texObj, target, level); - if (!img || !img->TexFormat) { + if (!img || img->TexFormat == MESA_FORMAT_NONE) { /* undefined texture image */ if (pname == GL_TEXTURE_COMPONENTS) *params = 1; @@ -915,9 +915,23 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level, *params = _mesa_compressed_format_to_glenum(ctx, texFormat); } else { - /* return the user's requested internal format */ - *params = img->InternalFormat; - } + /* If the true internal format is not compressed but the user + * requested a generic compressed format, we have to return the + * generic base format that matches. + * + * From page 119 (page 129 of the PDF) of the OpenGL 1.3 spec: + * + * "If no specific compressed format is available, + * internalformat is instead replaced by the corresponding base + * internal format." + * + * Otherwise just return the user's requested internal format + */ + const GLenum f = + _mesa_gl_compressed_format_base_format(img->InternalFormat); + + *params = (f != 0) ? f : img->InternalFormat; + } break; case GL_TEXTURE_BORDER: *params = img->Border; @@ -980,28 +994,21 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level, *params = 0; break; case GL_TEXTURE_DEPTH_SIZE_ARB: - if (ctx->Extensions.ARB_depth_texture) - *params = _mesa_get_format_bits(texFormat, pname); - else + if (!ctx->Extensions.ARB_depth_texture) goto invalid_pname; + *params = _mesa_get_format_bits(texFormat, pname); break; case GL_TEXTURE_STENCIL_SIZE_EXT: - if (ctx->Extensions.EXT_packed_depth_stencil || - ctx->Extensions.ARB_framebuffer_object) { - *params = _mesa_get_format_bits(texFormat, pname); - } - else { + if (!ctx->Extensions.EXT_packed_depth_stencil && + !ctx->Extensions.ARB_framebuffer_object) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, pname); break; case GL_TEXTURE_SHARED_SIZE: - if (ctx->VersionMajor >= 3 || - ctx->Extensions.EXT_texture_shared_exponent) { - *params = texFormat == MESA_FORMAT_RGB9_E5_FLOAT ? 5 : 0; - } - else { + if (ctx->VersionMajor < 3 && + !ctx->Extensions.EXT_texture_shared_exponent) goto invalid_pname; - } + *params = texFormat == MESA_FORMAT_RGB9_E5_FLOAT ? 5 : 0; break; /* GL_ARB_texture_compression */ @@ -1022,67 +1029,46 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level, /* GL_ARB_texture_float */ case GL_TEXTURE_RED_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_RED_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_RED_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_GREEN_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_GREEN_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_GREEN_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_BLUE_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_BLUE_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_BLUE_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_ALPHA_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_ALPHA_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_ALPHA_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_LUMINANCE_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_LUMINANCE_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_LUMINANCE_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_INTENSITY_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_INTENSITY_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_INTENSITY_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_DEPTH_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_DEPTH_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_DEPTH_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; default: @@ -1104,7 +1090,6 @@ void GLAPIENTRY _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) { struct gl_texture_object *obj; - GLboolean error = GL_FALSE; GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); @@ -1130,17 +1115,15 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) *params = ENUM_TO_FLOAT(obj->Sampler.WrapR); break; case GL_TEXTURE_BORDER_COLOR: - if(ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP)) + if (ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP)) _mesa_update_state_locked(ctx); - if(ctx->Color._ClampFragmentColor) - { + if (ctx->Color._ClampFragmentColor) { params[0] = CLAMP(obj->Sampler.BorderColor.f[0], 0.0F, 1.0F); params[1] = CLAMP(obj->Sampler.BorderColor.f[1], 0.0F, 1.0F); params[2] = CLAMP(obj->Sampler.BorderColor.f[2], 0.0F, 1.0F); params[3] = CLAMP(obj->Sampler.BorderColor.f[3], 0.0F, 1.0F); } - else - { + else { params[0] = obj->Sampler.BorderColor.f[0]; params[1] = obj->Sampler.BorderColor.f[1]; params[2] = obj->Sampler.BorderColor.f[2]; @@ -1148,14 +1131,8 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) } break; case GL_TEXTURE_RESIDENT: - { - GLboolean resident; - if (ctx->Driver.IsTextureResident) - resident = ctx->Driver.IsTextureResident(ctx, obj); - else - resident = GL_TRUE; - *params = ENUM_TO_FLOAT(resident); - } + *params = ctx->Driver.IsTextureResident ? + ctx->Driver.IsTextureResident(ctx, obj) : 1.0F; break; case GL_TEXTURE_PRIORITY: *params = obj->Priority; @@ -1173,49 +1150,37 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) *params = (GLfloat) obj->MaxLevel; break; case GL_TEXTURE_MAX_ANISOTROPY_EXT: - if (ctx->Extensions.EXT_texture_filter_anisotropic) { - *params = obj->Sampler.MaxAnisotropy; - } - else - error = GL_TRUE; + if (!ctx->Extensions.EXT_texture_filter_anisotropic) + goto invalid_pname; + *params = obj->Sampler.MaxAnisotropy; break; case GL_TEXTURE_COMPARE_FAIL_VALUE_ARB: - if (ctx->Extensions.ARB_shadow_ambient) { - *params = obj->Sampler.CompareFailValue; - } - else - error = GL_TRUE; + if (!ctx->Extensions.ARB_shadow_ambient) + goto invalid_pname; + *params = obj->Sampler.CompareFailValue; break; case GL_GENERATE_MIPMAP_SGIS: *params = (GLfloat) obj->GenerateMipmap; break; case GL_TEXTURE_COMPARE_MODE_ARB: - if (ctx->Extensions.ARB_shadow) { - *params = (GLfloat) obj->Sampler.CompareMode; - } - else - error = GL_TRUE; + if (!ctx->Extensions.ARB_shadow) + goto invalid_pname; + *params = (GLfloat) obj->Sampler.CompareMode; break; case GL_TEXTURE_COMPARE_FUNC_ARB: - if (ctx->Extensions.ARB_shadow) { - *params = (GLfloat) obj->Sampler.CompareFunc; - } - else - error = GL_TRUE; + if (!ctx->Extensions.ARB_shadow) + goto invalid_pname; + *params = (GLfloat) obj->Sampler.CompareFunc; break; case GL_DEPTH_TEXTURE_MODE_ARB: - if (ctx->Extensions.ARB_depth_texture) { - *params = (GLfloat) obj->Sampler.DepthMode; - } - else - error = GL_TRUE; + if (!ctx->Extensions.ARB_depth_texture) + goto invalid_pname; + *params = (GLfloat) obj->Sampler.DepthMode; break; case GL_TEXTURE_LOD_BIAS: - if (ctx->Extensions.EXT_texture_lod_bias) { - *params = obj->Sampler.LodBias; - } - else - error = GL_TRUE; + if (!ctx->Extensions.EXT_texture_lod_bias) + goto invalid_pname; + *params = obj->Sampler.LodBias; break; #if FEATURE_OES_draw_texture case GL_TEXTURE_CROP_RECT_OES: @@ -1230,45 +1195,40 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) case GL_TEXTURE_SWIZZLE_G_EXT: case GL_TEXTURE_SWIZZLE_B_EXT: case GL_TEXTURE_SWIZZLE_A_EXT: - if (ctx->Extensions.EXT_texture_swizzle) { - GLuint comp = pname - GL_TEXTURE_SWIZZLE_R_EXT; - *params = (GLfloat) obj->Swizzle[comp]; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.EXT_texture_swizzle) + goto invalid_pname; + *params = (GLfloat) obj->Swizzle[pname - GL_TEXTURE_SWIZZLE_R_EXT]; break; case GL_TEXTURE_SWIZZLE_RGBA_EXT: - if (ctx->Extensions.EXT_texture_swizzle) { + if (!ctx->Extensions.EXT_texture_swizzle) { + goto invalid_pname; + } + else { GLuint comp; for (comp = 0; comp < 4; comp++) { params[comp] = (GLfloat) obj->Swizzle[comp]; } } - else { - error = GL_TRUE; - } break; case GL_TEXTURE_CUBE_MAP_SEAMLESS: - if (ctx->Extensions.AMD_seamless_cubemap_per_texture) { + if (!ctx->Extensions.AMD_seamless_cubemap_per_texture) + goto invalid_pname; *params = (GLfloat) obj->Sampler.CubeMapSeamless; - } - else { - error = GL_TRUE; - } + break; default: - error = GL_TRUE; - break; + goto invalid_pname; } - if (error) - _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameterfv(pname=0x%x)", - pname); + /* no error if we get here */ + _mesa_unlock_texture(ctx, obj); + return; +invalid_pname: _mesa_unlock_texture(ctx, obj); + _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameterfv(pname=0x%x)", pname); } @@ -1276,13 +1236,12 @@ void GLAPIENTRY _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params ) { struct gl_texture_object *obj; - GLboolean error = GL_FALSE; GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); - obj = get_texobj(ctx, target, GL_TRUE); - if (!obj) - return; + obj = get_texobj(ctx, target, GL_TRUE); + if (!obj) + return; _mesa_lock_texture(ctx, obj); switch (pname) { @@ -1315,14 +1274,8 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params ) } break;; case GL_TEXTURE_RESIDENT: - { - GLboolean resident; - if (ctx->Driver.IsTextureResident) - resident = ctx->Driver.IsTextureResident(ctx, obj); - else - resident = GL_TRUE; - *params = (GLint) resident; - } + *params = ctx->Driver.IsTextureResident ? + ctx->Driver.IsTextureResident(ctx, obj) : 1; break;; case GL_TEXTURE_PRIORITY: *params = FLOAT_TO_INT(obj->Priority); @@ -1340,55 +1293,37 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params ) *params = obj->MaxLevel; break;; case GL_TEXTURE_MAX_ANISOTROPY_EXT: - if (ctx->Extensions.EXT_texture_filter_anisotropic) { - *params = (GLint) obj->Sampler.MaxAnisotropy; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.EXT_texture_filter_anisotropic) + goto invalid_pname; + *params = (GLint) obj->Sampler.MaxAnisotropy; break; case GL_TEXTURE_COMPARE_FAIL_VALUE_ARB: - if (ctx->Extensions.ARB_shadow_ambient) { - *params = (GLint) FLOAT_TO_INT(obj->Sampler.CompareFailValue); - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.ARB_shadow_ambient) + goto invalid_pname; + *params = (GLint) FLOAT_TO_INT(obj->Sampler.CompareFailValue); break; case GL_GENERATE_MIPMAP_SGIS: *params = (GLint) obj->GenerateMipmap; break; case GL_TEXTURE_COMPARE_MODE_ARB: - if (ctx->Extensions.ARB_shadow) { - *params = (GLint) obj->Sampler.CompareMode; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.ARB_shadow) + goto invalid_pname; + *params = (GLint) obj->Sampler.CompareMode; break; case GL_TEXTURE_COMPARE_FUNC_ARB: - if (ctx->Extensions.ARB_shadow) { - *params = (GLint) obj->Sampler.CompareFunc; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.ARB_shadow) + goto invalid_pname; + *params = (GLint) obj->Sampler.CompareFunc; break; case GL_DEPTH_TEXTURE_MODE_ARB: - if (ctx->Extensions.ARB_depth_texture) { - *params = (GLint) obj->Sampler.DepthMode; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.ARB_depth_texture) + goto invalid_pname; + *params = (GLint) obj->Sampler.DepthMode; break; case GL_TEXTURE_LOD_BIAS: - if (ctx->Extensions.EXT_texture_lod_bias) { - *params = (GLint) obj->Sampler.LodBias; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.EXT_texture_lod_bias) + goto invalid_pname; + *params = (GLint) obj->Sampler.LodBias; break; #if FEATURE_OES_draw_texture case GL_TEXTURE_CROP_RECT_OES: @@ -1402,41 +1337,34 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params ) case GL_TEXTURE_SWIZZLE_G_EXT: case GL_TEXTURE_SWIZZLE_B_EXT: case GL_TEXTURE_SWIZZLE_A_EXT: - if (ctx->Extensions.EXT_texture_swizzle) { - GLuint comp = pname - GL_TEXTURE_SWIZZLE_R_EXT; - *params = obj->Swizzle[comp]; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.EXT_texture_swizzle) + goto invalid_pname; + *params = obj->Swizzle[pname - GL_TEXTURE_SWIZZLE_R_EXT]; break; case GL_TEXTURE_SWIZZLE_RGBA_EXT: - if (ctx->Extensions.EXT_texture_swizzle) { - COPY_4V(params, obj->Swizzle); - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.EXT_texture_swizzle) + goto invalid_pname; + COPY_4V(params, obj->Swizzle); break; case GL_TEXTURE_CUBE_MAP_SEAMLESS: - if (ctx->Extensions.AMD_seamless_cubemap_per_texture) { - *params = (GLint) obj->Sampler.CubeMapSeamless; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.AMD_seamless_cubemap_per_texture) + goto invalid_pname; + *params = (GLint) obj->Sampler.CubeMapSeamless; + break; default: - ; /* silence warnings */ + goto invalid_pname; } - if (error) - _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameteriv(pname=0x%x)", - pname); + /* no error if we get here */ + _mesa_unlock_texture(ctx, obj); + return; +invalid_pname: _mesa_unlock_texture(ctx, obj); + _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameteriv(pname=0x%x)", pname); } @@ -1449,6 +1377,8 @@ _mesa_GetTexParameterIiv(GLenum target, GLenum pname, GLint *params) ASSERT_OUTSIDE_BEGIN_END(ctx); texObj = get_texobj(ctx, target, GL_TRUE); + if (!texObj) + return; switch (pname) { case GL_TEXTURE_BORDER_COLOR: @@ -1469,6 +1399,8 @@ _mesa_GetTexParameterIuiv(GLenum target, GLenum pname, GLuint *params) ASSERT_OUTSIDE_BEGIN_END(ctx); texObj = get_texobj(ctx, target, GL_TRUE); + if (!texObj) + return; switch (pname) { case GL_TEXTURE_BORDER_COLOR: diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index 6e1e63bdfb0..c4aeaa8f16d 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -4577,8 +4577,7 @@ texture_row_stride(const struct gl_texture_image *texImage) /** - * This is the software fallback for Driver.TexImage1D() - * and Driver.CopyTexImage1D(). + * This is the software fallback for Driver.TexImage1D(). * \sa _mesa_store_teximage2d() */ void @@ -4629,8 +4628,7 @@ _mesa_store_teximage1d(struct gl_context *ctx, GLenum target, GLint level, /** - * This is the software fallback for Driver.TexImage2D() - * and Driver.CopyTexImage2D(). + * This is the software fallback for Driver.TexImage2D(). * * This function is oriented toward storing images in main memory, rather * than VRAM. Device driver's can easily plug in their own replacement. @@ -4684,8 +4682,7 @@ _mesa_store_teximage2d(struct gl_context *ctx, GLenum target, GLint level, /** - * This is the software fallback for Driver.TexImage3D() - * and Driver.CopyTexImage3D(). + * This is the software fallback for Driver.TexImage3D(). * \sa _mesa_store_teximage2d() */ void diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c index dd069a3a4d1..cda840fe2d2 100644 --- a/src/mesa/main/uniforms.c +++ b/src/mesa/main/uniforms.c @@ -429,7 +429,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location, for (i = 0; i < rows; i++) { const int base = paramPos + offset + i; for (j = 0; j < cols; j++ ) { - params[k++] = prog->Parameters->ParameterValues[base][j]; + params[k++] = prog->Parameters->ParameterValues[base][j].f; } } } @@ -442,7 +442,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location, const int base = paramPos + offset + i; for (j = 0; j < cols; j++ ) { params[k++] = (GLdouble) - prog->Parameters->ParameterValues[base][j]; + prog->Parameters->ParameterValues[base][j].f; } } } @@ -454,8 +454,9 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location, for (i = 0; i < rows; i++) { const int base = paramPos + offset + i; for (j = 0; j < cols; j++ ) { - params[k++] = (GLint) - prog->Parameters->ParameterValues[base][j]; + params[k++] = ctx->Const.NativeIntegers ? + prog->Parameters->ParameterValues[base][j].i : + (GLint) prog->Parameters->ParameterValues[base][j].f; } } } @@ -467,8 +468,9 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location, for (i = 0; i < rows; i++) { const int base = paramPos + offset + i; for (j = 0; j < cols; j++ ) { - params[k++] = (GLuint) - prog->Parameters->ParameterValues[base][j]; + params[k++] = ctx->Const.NativeIntegers ? + prog->Parameters->ParameterValues[base][j].u : + (GLuint) prog->Parameters->ParameterValues[base][j].f; } } } @@ -670,7 +672,7 @@ set_program_uniform(struct gl_context *ctx, struct gl_program *program, /* loop over number of samplers to change */ for (i = 0; i < count; i++) { GLuint sampler = (GLuint) - program->Parameters->ParameterValues[index + offset + i][0]; + program->Parameters->ParameterValues[index+offset + i][0].f; GLuint texUnit = ((GLuint *) values)[i]; /* check that the sampler (tex unit index) is legal */ @@ -735,42 +737,52 @@ set_program_uniform(struct gl_context *ctx, struct gl_program *program, /* loop over number of array elements */ for (k = 0; k < count; k++) { - GLfloat *uniformVal; + gl_constant_value *uniformVal; if (offset + k >= slots) { /* Extra array data is ignored */ break; } - /* uniformVal (the destination) is always float[4] */ + /* uniformVal (the destination) is always gl_constant_value[4] */ uniformVal = program->Parameters->ParameterValues[index + offset + k]; if (basicType == GL_INT) { - /* convert user's ints to floats */ const GLint *iValues = ((const GLint *) values) + k * elems; for (i = 0; i < elems; i++) { - uniformVal[i] = (GLfloat) iValues[i]; + if (!ctx->Const.NativeIntegers) + uniformVal[i].f = (GLfloat) iValues[i]; + else + uniformVal[i].i = iValues[i]; } } else if (basicType == GL_UNSIGNED_INT) { - /* convert user's uints to floats */ const GLuint *iValues = ((const GLuint *) values) + k * elems; for (i = 0; i < elems; i++) { - uniformVal[i] = (GLfloat) iValues[i]; + if (!ctx->Const.NativeIntegers) + uniformVal[i].f = (GLfloat)(GLuint) iValues[i]; + else + uniformVal[i].u = iValues[i]; } } else { const GLfloat *fValues = ((const GLfloat *) values) + k * elems; assert(basicType == GL_FLOAT); for (i = 0; i < elems; i++) { - uniformVal[i] = fValues[i]; + uniformVal[i].f = fValues[i]; } } - /* if the uniform is bool-valued, convert to 1.0 or 0.0 */ + /* if the uniform is bool-valued, convert to 1 or 0 */ if (isUniformBool) { for (i = 0; i < elems; i++) { - uniformVal[i] = uniformVal[i] ? 1.0f : 0.0f; + if (basicType == GL_FLOAT) + uniformVal[i].b = uniformVal[i].f != 0.0f ? 1 : 0; + else + uniformVal[i].b = uniformVal[i].u ? 1 : 0; + + if (!ctx->Const.NativeIntegers) + uniformVal[i].f = uniformVal[i].b ? 1.0f : 0.0f; } } } @@ -936,7 +948,7 @@ set_program_uniform_matrix(struct gl_context *ctx, struct gl_program *program, /* Ignore writes beyond the end of (the used part of) an array */ return; } - v = program->Parameters->ParameterValues[index + offset]; + v = (GLfloat *) program->Parameters->ParameterValues[index + offset]; for (row = 0; row < rows; row++) { if (transpose) { v[row] = values[src + row * cols + col]; diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index d8e5a3a9772..6820e4c6ba7 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -134,7 +134,7 @@ src_reg::src_reg(dst_reg reg) this->index = reg.index; this->swizzle = SWIZZLE_XYZW; this->negate = 0; - this->reladdr = NULL; + this->reladdr = reg.reladdr; } dst_reg::dst_reg(src_reg reg) @@ -297,11 +297,11 @@ public: /** * Emit the correct dot-product instruction for the type of arguments */ - void emit_dp(ir_instruction *ir, - dst_reg dst, - src_reg src0, - src_reg src1, - unsigned elements); + ir_to_mesa_instruction * emit_dp(ir_instruction *ir, + dst_reg dst, + src_reg src0, + src_reg src1, + unsigned elements); void emit_scalar(ir_instruction *ir, enum prog_opcode op, dst_reg dst, src_reg src0); @@ -312,9 +312,11 @@ public: void emit_scs(ir_instruction *ir, enum prog_opcode op, dst_reg dst, const src_reg &src); - GLboolean try_emit_mad(ir_expression *ir, + bool try_emit_mad(ir_expression *ir, int mul_operand); - GLboolean try_emit_sat(ir_expression *ir); + bool try_emit_mad_for_and_not(ir_expression *ir, + int mul_operand); + bool try_emit_sat(ir_expression *ir); void emit_swz(ir_expression *ir); @@ -331,20 +333,6 @@ dst_reg undef_dst = dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP); dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X); -static void -fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); - -static void -fail_link(struct gl_shader_program *prog, const char *fmt, ...) -{ - va_list args; - va_start(args, fmt); - ralloc_vasprintf_append(&prog->InfoLog, fmt, args); - va_end(args); - - prog->LinkStatus = GL_FALSE; -} - static int swizzle_for_size(int size) { @@ -422,7 +410,7 @@ ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op) return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); } -void +ir_to_mesa_instruction * ir_to_mesa_visitor::emit_dp(ir_instruction *ir, dst_reg dst, src_reg src0, src_reg src1, unsigned elements) @@ -431,7 +419,7 @@ ir_to_mesa_visitor::emit_dp(ir_instruction *ir, OPCODE_DP2, OPCODE_DP3, OPCODE_DP4 }; - emit(ir, dot_opcodes[elements - 2], dst, src0, src1); + return emit(ir, dot_opcodes[elements - 2], dst, src0, src1); } /** @@ -593,13 +581,13 @@ ir_to_mesa_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op, } } -struct src_reg +src_reg ir_to_mesa_visitor::src_reg_for_float(float val) { src_reg src(PROGRAM_CONSTANT, -1, NULL); src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - &val, 1, &src.swizzle); + (const gl_constant_value *)&val, 1, &src.swizzle); return src; } @@ -655,8 +643,6 @@ src_reg ir_to_mesa_visitor::get_temp(const glsl_type *type) { src_reg src; - int swizzle[4]; - int i; src.file = PROGRAM_TEMPORARY; src.index = next_temp; @@ -666,12 +652,7 @@ ir_to_mesa_visitor::get_temp(const glsl_type *type) if (type->is_array() || type->is_record()) { src.swizzle = SWIZZLE_NOOP; } else { - for (i = 0; i < type->vector_elements; i++) - swizzle[i] = i; - for (; i < 4; i++) - swizzle[i] = type->vector_elements - 1; - src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], - swizzle[2], swizzle[3]); + src.swizzle = swizzle_for_size(type->vector_elements); } src.negate = 0; @@ -744,7 +725,7 @@ ir_to_mesa_visitor::visit(ir_variable *ir) } } - struct variable_storage *storage; + variable_storage *storage; dst_reg dst; if (i == ir->num_state_slots) { /* We'll set the index later. */ @@ -789,10 +770,11 @@ ir_to_mesa_visitor::visit(ir_variable *ir) if (storage->file == PROGRAM_TEMPORARY && dst.index != storage->index + (int) ir->num_state_slots) { - fail_link(this->shader_program, - "failed to load builtin uniform `%s' (%d/%d regs loaded)\n", - ir->name, dst.index - storage->index, - type_size(ir->type)); + linker_error(this->shader_program, + "failed to load builtin uniform `%s' " + "(%d/%d regs loaded)\n", + ir->name, dst.index - storage->index, + type_size(ir->type)); } } } @@ -889,7 +871,7 @@ ir_to_mesa_visitor::visit(ir_function *ir) } } -GLboolean +bool ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand) { int nonmul_operand = 1 - mul_operand; @@ -912,7 +894,47 @@ ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand) return true; } -GLboolean +/** + * Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b)) + * + * The logic values are 1.0 for true and 0.0 for false. Logical-and is + * implemented using multiplication, and logical-or is implemented using + * addition. Logical-not can be implemented as (true - x), or (1.0 - x). + * As result, the logical expression (a & !b) can be rewritten as: + * + * - a * !b + * - a * (1 - b) + * - (a * 1) - (a * b) + * - a + -(a * b) + * - a + (a * -b) + * + * This final expression can be implemented as a single MAD(a, -b, a) + * instruction. + */ +bool +ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) +{ + const int other_operand = 1 - try_operand; + src_reg a, b; + + ir_expression *expr = ir->operands[try_operand]->as_expression(); + if (!expr || expr->operation != ir_unop_logic_not) + return false; + + ir->operands[other_operand]->accept(this); + a = this->result; + expr->operands[0]->accept(this); + b = this->result; + + b.negate = ~b.negate; + + this->result = get_temp(ir->type); + emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a); + + return true; +} + +bool ir_to_mesa_visitor::try_emit_sat(ir_expression *ir) { /* Saturates were only introduced to vertex programs in @@ -928,10 +950,30 @@ ir_to_mesa_visitor::try_emit_sat(ir_expression *ir) sat_src->accept(this); src_reg src = this->result; - this->result = get_temp(ir->type); - ir_to_mesa_instruction *inst; - inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src); - inst->saturate = true; + /* If we generated an expression instruction into a temporary in + * processing the saturate's operand, apply the saturate to that + * instruction. Otherwise, generate a MOV to do the saturate. + * + * Note that we have to be careful to only do this optimization if + * the instruction in question was what generated src->result. For + * example, ir_dereference_array might generate a MUL instruction + * to create the reladdr, and return us a src reg using that + * reladdr. That MUL result is not the value we're trying to + * saturate. + */ + ir_expression *sat_src_expr = sat_src->as_expression(); + ir_to_mesa_instruction *new_inst; + new_inst = (ir_to_mesa_instruction *)this->instructions.get_tail(); + if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul || + sat_src_expr->operation == ir_binop_add || + sat_src_expr->operation == ir_binop_dot)) { + new_inst->saturate = true; + } else { + this->result = get_temp(ir->type); + ir_to_mesa_instruction *inst; + inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src); + inst->saturate = true; + } return true; } @@ -1088,6 +1130,16 @@ ir_to_mesa_visitor::visit(ir_expression *ir) if (try_emit_mad(ir, 0)) return; } + + /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) + */ + if (ir->operation == ir_binop_logic_and) { + if (try_emit_mad_for_and_not(ir, 1)) + return; + if (try_emit_mad_for_and_not(ir, 0)) + return; + } + if (try_emit_sat(ir)) return; @@ -1135,7 +1187,13 @@ ir_to_mesa_visitor::visit(ir_expression *ir) switch (ir->operation) { case ir_unop_logic_not: - emit(ir, OPCODE_SEQ, result_dst, op[0], src_reg_for_float(0.0)); + /* Previously 'SEQ dst, src, 0.0' was used for this. However, many + * older GPUs implement SEQ using multiple instructions (i915 uses two + * SGE instructions and a MUL instruction). Since our logic values are + * 0.0 and 1.0, 1-x also implements !x. + */ + op[0].negate = ~op[0].negate; + emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0)); break; case ir_unop_neg: op[0].negate = ~op[0].negate; @@ -1231,8 +1289,19 @@ ir_to_mesa_visitor::visit(ir_expression *ir) ir->operands[1]->type->is_vector()) { src_reg temp = get_temp(glsl_type::vec4_type); emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]); + + /* After the dot-product, the value will be an integer on the + * range [0,4]. Zero becomes 1.0, and positive values become zero. + */ emit_dp(ir, result_dst, temp, temp, vector_elements); - emit(ir, OPCODE_SEQ, result_dst, result_src, src_reg_for_float(0.0)); + + /* Negating the result of the dot-product gives values on the range + * [-4, 0]. Zero becomes 1.0, and negative values become zero. This + * achieved using SGE. + */ + src_reg sge_src = result_src; + sge_src.negate = ~sge_src.negate; + emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0)); } else { emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]); } @@ -1243,29 +1312,83 @@ ir_to_mesa_visitor::visit(ir_expression *ir) ir->operands[1]->type->is_vector()) { src_reg temp = get_temp(glsl_type::vec4_type); emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]); - emit_dp(ir, result_dst, temp, temp, vector_elements); - emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0)); + + /* After the dot-product, the value will be an integer on the + * range [0,4]. Zero stays zero, and positive values become 1.0. + */ + ir_to_mesa_instruction *const dp = + emit_dp(ir, result_dst, temp, temp, vector_elements); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate. + */ + dp->saturate = true; + } else { + /* Negating the result of the dot-product gives values on the range + * [-4, 0]. Zero stays zero, and negative values become 1.0. This + * achieved using SLT. + */ + src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0)); + } } else { emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); } break; - case ir_unop_any: + case ir_unop_any: { assert(ir->operands[0]->type->is_vector()); - emit_dp(ir, result_dst, op[0], op[0], - ir->operands[0]->type->vector_elements); - emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0)); + + /* After the dot-product, the value will be an integer on the + * range [0,4]. Zero stays zero, and positive values become 1.0. + */ + ir_to_mesa_instruction *const dp = + emit_dp(ir, result_dst, op[0], op[0], + ir->operands[0]->type->vector_elements); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate. + */ + dp->saturate = true; + } else { + /* Negating the result of the dot-product gives values on the range + * [-4, 0]. Zero stays zero, and negative values become 1.0. This + * is achieved using SLT. + */ + src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0)); + } break; + } case ir_binop_logic_xor: emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); break; - case ir_binop_logic_or: - /* This could be a saturated add and skip the SNE. */ - emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); - emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0)); + case ir_binop_logic_or: { + /* After the addition, the value will be an integer on the + * range [0,2]. Zero stays zero, and positive values become 1.0. + */ + ir_to_mesa_instruction *add = + emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate. + */ + add->saturate = true; + } else { + /* Negating the result of the addition gives values on the range + * [-2, 0]. Zero stays zero, and negative values become 1.0. This + * is achieved using SLT. + */ + src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0)); + } break; + } case ir_binop_logic_and: /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ @@ -1496,6 +1619,18 @@ ir_to_mesa_visitor::visit(ir_dereference_array *ir) this->result, src_reg_for_float(element_size)); } + /* If there was already a relative address register involved, add the + * new and the old together to get the new offset. + */ + if (src.reladdr != NULL) { + src_reg accum_reg = get_temp(glsl_type::float_type); + + emit(ir, OPCODE_ADD, dst_reg(accum_reg), + index_reg, *src.reladdr); + + index_reg = accum_reg; + } + src.reladdr = ralloc(mem_ctx, src_reg); memcpy(src.reladdr, &index_reg, sizeof(index_reg)); } @@ -1796,7 +1931,7 @@ ir_to_mesa_visitor::visit(ir_constant *ir) src = src_reg(PROGRAM_CONSTANT, -1, NULL); src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - values, + (gl_constant_value *) values, ir->type->vector_elements, &src.swizzle); emit(ir, OPCODE_MOV, mat_column, src); @@ -1834,7 +1969,7 @@ ir_to_mesa_visitor::visit(ir_constant *ir) this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type); this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters, - values, + (gl_constant_value *) values, ir->type->vector_elements, &this->result.swizzle); } @@ -1969,7 +2104,10 @@ ir_to_mesa_visitor::visit(ir_texture *ir) ir_to_mesa_instruction *inst = NULL; prog_opcode opcode = OPCODE_NOP; - ir->coordinate->accept(this); + if (ir->op == ir_txs) + this->result = src_reg_for_float(0.0); + else + ir->coordinate->accept(this); /* Put our coords in a temp. We'll need to modify them for shadow, * projection, or LOD, so the only case we'd use it as is is if @@ -1993,6 +2131,7 @@ ir_to_mesa_visitor::visit(ir_texture *ir) switch (ir->op) { case ir_tex: + case ir_txs: opcode = OPCODE_TEX; break; case ir_txb: @@ -2401,29 +2540,32 @@ check_resources(const struct gl_context *ctx, case GL_VERTEX_PROGRAM_ARB: if (_mesa_bitcount(prog->SamplersUsed) > ctx->Const.MaxVertexTextureImageUnits) { - fail_link(shader_program, "Too many vertex shader texture samplers"); + linker_error(shader_program, + "Too many vertex shader texture samplers"); } if (prog->Parameters->NumParameters > MAX_UNIFORMS) { - fail_link(shader_program, "Too many vertex shader constants"); + linker_error(shader_program, "Too many vertex shader constants"); } break; case MESA_GEOMETRY_PROGRAM: if (_mesa_bitcount(prog->SamplersUsed) > ctx->Const.MaxGeometryTextureImageUnits) { - fail_link(shader_program, "Too many geometry shader texture samplers"); + linker_error(shader_program, + "Too many geometry shader texture samplers"); } if (prog->Parameters->NumParameters > MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) { - fail_link(shader_program, "Too many geometry shader constants"); + linker_error(shader_program, "Too many geometry shader constants"); } break; case GL_FRAGMENT_PROGRAM_ARB: if (_mesa_bitcount(prog->SamplersUsed) > ctx->Const.MaxTextureImageUnits) { - fail_link(shader_program, "Too many fragment shader texture samplers"); + linker_error(shader_program, + "Too many fragment shader texture samplers"); } if (prog->Parameters->NumParameters > MAX_UNIFORMS) { - fail_link(shader_program, "Too many fragment shader constants"); + linker_error(shader_program, "Too many fragment shader constants"); } break; default: @@ -2531,16 +2673,17 @@ add_uniforms_to_parameters_list(struct gl_shader_program *shader_program, */ if (file == PROGRAM_SAMPLER) { for (unsigned int j = 0; j < size / 4; j++) - prog->Parameters->ParameterValues[index + j][0] = next_sampler++; + prog->Parameters->ParameterValues[index + j][0].f = next_sampler++; } /* The location chosen in the Parameters list here (returned * from _mesa_add_uniform) has to match what the linker chose. */ if (index != parameter_index) { - fail_link(shader_program, "Allocation of uniform `%s' to target " - "failed (%d vs %d)\n", - uniform->Name, index, parameter_index); + linker_error(shader_program, + "Allocation of uniform `%s' to target failed " + "(%d vs %d)\n", + uniform->Name, index, parameter_index); } } } @@ -2573,8 +2716,8 @@ set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, int loc = _mesa_get_uniform_location(ctx, shader_program, name); if (loc == -1) { - fail_link(shader_program, - "Couldn't find uniform for initializer %s\n", name); + linker_error(shader_program, + "Couldn't find uniform for initializer %s\n", name); return; } @@ -2974,11 +3117,31 @@ get_mesa_program(struct gl_context *ctx, if (mesa_inst->SrcReg[src].RelAddr) prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File; - if (options->EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) { - fail_link(shader_program, "Couldn't flatten if statement\n"); - } - switch (mesa_inst->Opcode) { + case OPCODE_IF: + if (options->EmitNoIfs) { + linker_warning(shader_program, + "Couldn't flatten if-statement. " + "This will likely result in software " + "rasterization.\n"); + } + break; + case OPCODE_BGNLOOP: + if (options->EmitNoLoops) { + linker_warning(shader_program, + "Couldn't unroll loop. " + "This will likely result in software " + "rasterization.\n"); + } + break; + case OPCODE_CONT: + if (options->EmitNoCont) { + linker_warning(shader_program, + "Couldn't lower continue-statement. " + "This will likely result in software " + "rasterization.\n"); + } + break; case OPCODE_BGNSUB: inst->function->inst = i; mesa_inst->Comment = strdup(inst->function->sig->function_name()); @@ -3246,7 +3409,7 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) for (i = 0; i < prog->NumShaders; i++) { if (!prog->Shaders[i]->CompileStatus) { - fail_link(prog, "linking with uncompiled shader"); + linker_error(prog, "linking with uncompiled shader"); prog->LinkStatus = GL_FALSE; } } diff --git a/src/mesa/program/nvfragparse.c b/src/mesa/program/nvfragparse.c index 8516b5fc1ff..ce72c610d89 100644 --- a/src/mesa/program/nvfragparse.c +++ b/src/mesa/program/nvfragparse.c @@ -472,8 +472,9 @@ Parse_ScalarConstant(struct parse_state *parseState, GLfloat *number) const GLfloat *constant; if (!Parse_Identifier(parseState, ident)) RETURN_ERROR1("Expected an identifier"); - constant = _mesa_lookup_parameter_value(parseState->parameters, - -1, (const char *) ident); + constant = (GLfloat *)_mesa_lookup_parameter_value(parseState->parameters, + -1, + (const char *) ident); /* XXX Check that it's a constant and not a parameter */ if (!constant) { RETURN_ERROR1("Undefined symbol"); @@ -1039,7 +1040,8 @@ Parse_VectorSrc(struct parse_state *parseState, if (!Parse_ScalarConstant(parseState, values)) RETURN_ERROR; paramIndex = _mesa_add_unnamed_constant(parseState->parameters, - values, 4, NULL); + (gl_constant_value *) values, + 4, NULL); srcReg->File = PROGRAM_NAMED_PARAM; srcReg->Index = paramIndex; } @@ -1051,7 +1053,8 @@ Parse_VectorSrc(struct parse_state *parseState, if (!Parse_VectorConstant(parseState, values)) RETURN_ERROR; paramIndex = _mesa_add_unnamed_constant(parseState->parameters, - values, 4, NULL); + (gl_constant_value *) values, + 4, NULL); srcReg->File = PROGRAM_NAMED_PARAM; srcReg->Index = paramIndex; } @@ -1145,7 +1148,8 @@ Parse_ScalarSrcReg(struct parse_state *parseState, if (!Parse_VectorConstant(parseState, values)) RETURN_ERROR; paramIndex = _mesa_add_unnamed_constant(parseState->parameters, - values, 4, NULL); + (gl_constant_value *) values, + 4, NULL); srcReg->File = PROGRAM_NAMED_PARAM; srcReg->Index = paramIndex; } @@ -1170,7 +1174,8 @@ Parse_ScalarSrcReg(struct parse_state *parseState, if (!Parse_ScalarConstant(parseState, values)) RETURN_ERROR; paramIndex = _mesa_add_unnamed_constant(parseState->parameters, - values, 4, NULL); + (gl_constant_value *) values, + 4, NULL); srcReg->Index = paramIndex; srcReg->File = PROGRAM_NAMED_PARAM; needSuffix = GL_FALSE; @@ -1296,7 +1301,8 @@ Parse_InstructionSequence(struct parse_state *parseState, RETURN_ERROR2(id, "already defined"); } _mesa_add_named_parameter(parseState->parameters, - (const char *) id, value); + (const char *) id, + (gl_constant_value *) value); } else if (Parse_String(parseState, "DECLARE")) { GLubyte id[100]; @@ -1315,7 +1321,8 @@ Parse_InstructionSequence(struct parse_state *parseState, RETURN_ERROR2(id, "already declared"); } _mesa_add_named_parameter(parseState->parameters, - (const char *) id, value); + (const char *) id, + (gl_constant_value *) value); } else if (Parse_String(parseState, "END")) { inst->Opcode = OPCODE_END; diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c index e7553c69dbe..77f842a1630 100644 --- a/src/mesa/program/prog_execute.c +++ b/src/mesa/program/prog_execute.c @@ -157,7 +157,7 @@ get_src_register_pointer(const struct prog_src_register *source, case PROGRAM_NAMED_PARAM: if (reg >= (GLint) prog->Parameters->NumParameters) return ZeroVec; - return prog->Parameters->ParameterValues[reg]; + return (GLfloat *) prog->Parameters->ParameterValues[reg]; case PROGRAM_SYSTEM_VALUE: assert(reg < Elements(machine->SystemValues)); @@ -639,7 +639,7 @@ _mesa_execute_program(struct gl_context * ctx, struct gl_program_machine *machine) { const GLuint numInst = program->NumInstructions; - const GLuint maxExec = 10000; + const GLuint maxExec = 65536; GLuint pc, numExec = 0; machine->CurProgram = program; @@ -1651,6 +1651,14 @@ _mesa_execute_program(struct gl_context * ctx, GLfloat texcoord[4], color[4]; fetch_vector4(&inst->SrcReg[0], machine, texcoord); + /* For TEX, texcoord.Q should not be used and its value should not + * matter (at most, we pass coord.xyz to texture3D() in GLSL). + * Set Q=1 so that FetchTexelDeriv() doesn't get a garbage value + * which is effectively what happens when the texcoord swizzle + * is .xyzz + */ + texcoord[3] = 1.0f; + fetch_texel(ctx, machine, inst, texcoord, 0.0, color); if (DEBUG_PROG) { diff --git a/src/mesa/program/prog_opt_constant_fold.c b/src/mesa/program/prog_opt_constant_fold.c new file mode 100644 index 00000000000..e2418b55451 --- /dev/null +++ b/src/mesa/program/prog_opt_constant_fold.c @@ -0,0 +1,451 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "main/glheader.h" +#include "main/context.h" +#include "main/macros.h" +#include "program.h" +#include "prog_instruction.h" +#include "prog_optimize.h" +#include "prog_parameter.h" +#include <stdbool.h> + +static bool +src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs) +{ + unsigned i; + + for (i = 0; i < num_srcs; i++) { + if (inst->SrcReg[i].File != PROGRAM_CONSTANT) + return false; + } + + return true; +} + +static struct prog_src_register +src_reg_for_float(struct gl_program *prog, float val) +{ + struct prog_src_register src; + unsigned swiz; + + memset(&src, 0, sizeof(src)); + + src.File = PROGRAM_CONSTANT; + src.Index = _mesa_add_unnamed_constant(prog->Parameters, + (gl_constant_value *) &val, 1, &swiz); + src.Swizzle = swiz; + return src; +} + +static struct prog_src_register +src_reg_for_vec4(struct gl_program *prog, const float *val) +{ + struct prog_src_register src; + unsigned swiz; + + memset(&src, 0, sizeof(src)); + + src.File = PROGRAM_CONSTANT; + src.Index = _mesa_add_unnamed_constant(prog->Parameters, + (gl_constant_value *) val, 4, &swiz); + src.Swizzle = swiz; + return src; +} + +static bool +src_regs_are_same(const struct prog_src_register *a, + const struct prog_src_register *b) +{ + return (a->File == b->File) + && (a->Index == b->Index) + && (a->Swizzle == b->Swizzle) + && (a->Abs == b->Abs) + && (a->Negate == b->Negate) + && (a->RelAddr == 0) + && (b->RelAddr == 0); +} + +static void +get_value(struct gl_program *prog, struct prog_src_register *r, float *data) +{ + const gl_constant_value *const value = + prog->Parameters->ParameterValues[r->Index]; + + data[0] = value[GET_SWZ(r->Swizzle, 0)].f; + data[1] = value[GET_SWZ(r->Swizzle, 1)].f; + data[2] = value[GET_SWZ(r->Swizzle, 2)].f; + data[3] = value[GET_SWZ(r->Swizzle, 3)].f; + + if (r->Abs) { + data[0] = fabsf(data[0]); + data[1] = fabsf(data[1]); + data[2] = fabsf(data[2]); + data[3] = fabsf(data[3]); + } + + if (r->Negate & 0x01) { + data[0] = -data[0]; + } + + if (r->Negate & 0x02) { + data[1] = -data[1]; + } + + if (r->Negate & 0x04) { + data[2] = -data[2]; + } + + if (r->Negate & 0x08) { + data[3] = -data[3]; + } +} + +/** + * Try to replace instructions that produce a constant result with simple moves + * + * The hope is that a following copy propagation pass will eliminate the + * unnecessary move instructions. + */ +GLboolean +_mesa_constant_fold(struct gl_program *prog) +{ + bool progress = false; + unsigned i; + + for (i = 0; i < prog->NumInstructions; i++) { + struct prog_instruction *const inst = &prog->Instructions[i]; + + switch (inst->Opcode) { + case OPCODE_ADD: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = a[0] + b[0]; + result[1] = a[1] + b[1]; + result[2] = a[2] + b[2]; + result[3] = a[3] + b[3]; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_CMP: + /* FINISHME: We could also optimize CMP instructions where the first + * FINISHME: source is a constant that is either all < 0.0 or all + * FINISHME: >= 0.0. + */ + if (src_regs_are_constant(inst, 3)) { + float a[4]; + float b[4]; + float c[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + get_value(prog, &inst->SrcReg[2], c); + + result[0] = a[0] < 0.0f ? b[0] : c[0]; + result[1] = a[1] < 0.0f ? b[1] : c[1]; + result[2] = a[2] < 0.0f ? b[2] : c[2]; + result[3] = a[3] < 0.0f ? b[3] : c[3]; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + inst->SrcReg[2].File = PROGRAM_UNDEFINED; + inst->SrcReg[2].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_DP2: + case OPCODE_DP3: + case OPCODE_DP4: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + /* It seems like a loop could be used here, but we cleverly put + * DP2A between DP2 and DP3. Subtracting DP2 (or similar) from + * the opcode results in various failures of the loop control. + */ + result = (a[0] * b[0]) + (a[1] * b[1]); + + if (inst->Opcode >= OPCODE_DP3) + result += a[2] * b[2]; + + if (inst->Opcode == OPCODE_DP4) + result += a[3] * b[3]; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_MUL: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = a[0] * b[0]; + result[1] = a[1] * b[1]; + result[2] = a[2] * b[2]; + result[3] = a[3] * b[3]; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SEQ: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] == b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] == b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] == b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] == b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 1.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SGE: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] >= b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] >= b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] >= b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] >= b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 1.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SGT: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] > b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] > b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] > b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] > b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 0.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SLE: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] <= b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] <= b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] <= b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] <= b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 1.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SLT: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] < b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] < b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] < b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] < b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 0.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SNE: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] != b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] != b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] != b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] != b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 0.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + default: + break; + } + } + + return progress; +} diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c index 8a40fa69eca..25d9684b137 100644 --- a/src/mesa/program/prog_optimize.c +++ b/src/mesa/program/prog_optimize.c @@ -472,8 +472,7 @@ can_downward_mov_be_modifed(const struct prog_instruction *mov) mov->SrcReg[0].HasIndex2 == 0 && mov->SrcReg[0].RelAddr2 == 0 && mov->DstReg.RelAddr == 0 && - mov->DstReg.CondMask == COND_TR && - mov->SaturateMode == SATURATE_OFF; + mov->DstReg.CondMask == COND_TR; } @@ -482,7 +481,8 @@ can_upward_mov_be_modifed(const struct prog_instruction *mov) { return can_downward_mov_be_modifed(mov) && - mov->DstReg.File == PROGRAM_TEMPORARY; + mov->DstReg.File == PROGRAM_TEMPORARY && + mov->SaturateMode == SATURATE_OFF; } @@ -657,6 +657,8 @@ _mesa_merge_mov_into_inst(struct prog_instruction *inst, if (mask != (inst->DstReg.WriteMask & mask)) return GL_FALSE; + inst->SaturateMode |= mov->SaturateMode; + /* Depending on the instruction, we may need to recompute the swizzles. * Also, some other instructions (like TEX) are not linear. We will only * consider completely active sources and destinations @@ -1319,6 +1321,15 @@ _mesa_simplify_cmp(struct gl_program * program) inst->Opcode = OPCODE_MOV; inst->SrcReg[0] = inst->SrcReg[1]; + + /* Unused operands are expected to have the file set to + * PROGRAM_UNDEFINED. This is how _mesa_init_instructions initializes + * all of the sources. + */ + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + inst->SrcReg[2].File = PROGRAM_UNDEFINED; + inst->SrcReg[2].Swizzle = SWIZZLE_NOOP; } } if (dbg) { @@ -1347,6 +1358,8 @@ _mesa_optimize_program(struct gl_context *ctx, struct gl_program *program) any_change = GL_TRUE; if (_mesa_remove_dead_code_local(program)) any_change = GL_TRUE; + + any_change = _mesa_constant_fold(program) || any_change; _mesa_reallocate_registers(program); } while (any_change); } diff --git a/src/mesa/program/prog_optimize.h b/src/mesa/program/prog_optimize.h index 463f5fc51c4..9854fb7a491 100644 --- a/src/mesa/program/prog_optimize.h +++ b/src/mesa/program/prog_optimize.h @@ -44,4 +44,7 @@ _mesa_find_temp_intervals(const struct prog_instruction *instructions, extern void _mesa_optimize_program(struct gl_context *ctx, struct gl_program *program); +extern GLboolean +_mesa_constant_fold(struct gl_program *prog); + #endif diff --git a/src/mesa/program/prog_parameter.c b/src/mesa/program/prog_parameter.c index 3570cab118b..49b3ffbdd5c 100644 --- a/src/mesa/program/prog_parameter.c +++ b/src/mesa/program/prog_parameter.c @@ -56,8 +56,8 @@ _mesa_new_parameter_list_sized(unsigned size) p->Parameters = (struct gl_program_parameter *) calloc(1, size * sizeof(struct gl_program_parameter)); - p->ParameterValues = (GLfloat (*)[4]) - _mesa_align_malloc(size * 4 *sizeof(GLfloat), 16); + p->ParameterValues = (gl_constant_value (*)[4]) + _mesa_align_malloc(size * 4 *sizeof(gl_constant_value), 16); if ((p->Parameters == NULL) || (p->ParameterValues == NULL)) { @@ -101,14 +101,15 @@ _mesa_free_parameter_list(struct gl_program_parameter_list *paramList) * \param name the parameter name, will be duplicated/copied! * \param size number of elements in 'values' vector (1..4, or more) * \param datatype GL_FLOAT, GL_FLOAT_VECx, GL_INT, GL_INT_VECx or GL_NONE. - * \param values initial parameter value, up to 4 GLfloats, or NULL + * \param values initial parameter value, up to 4 gl_constant_values, or NULL * \param state state indexes, or NULL * \return index of new parameter in the list, or -1 if error (out of mem) */ GLint _mesa_add_parameter(struct gl_program_parameter_list *paramList, gl_register_file type, const char *name, - GLuint size, GLenum datatype, const GLfloat *values, + GLuint size, GLenum datatype, + const gl_constant_value *values, const gl_state_index state[STATE_LENGTH], GLbitfield flags) { @@ -127,10 +128,10 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList, oldNum * sizeof(struct gl_program_parameter), paramList->Size * sizeof(struct gl_program_parameter)); - paramList->ParameterValues = (GLfloat (*)[4]) + paramList->ParameterValues = (gl_constant_value (*)[4]) _mesa_align_realloc(paramList->ParameterValues, /* old buf */ - oldNum * 4 * sizeof(GLfloat), /* old size */ - paramList->Size * 4 *sizeof(GLfloat), /* new sz */ + oldNum * 4 * sizeof(gl_constant_value),/* old sz */ + paramList->Size*4*sizeof(gl_constant_value),/*new*/ 16); } @@ -142,7 +143,7 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList, return -1; } else { - GLuint i; + GLuint i, j; paramList->NumParameters = oldNum + sz4; @@ -163,7 +164,8 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList, } else { /* silence valgrind */ - ASSIGN_4V(paramList->ParameterValues[oldNum + i], 0, 0, 0, 0); + for (j = 0; j < 4; j++) + paramList->ParameterValues[oldNum + i][j].f = 0; } size -= 4; } @@ -184,7 +186,7 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList, */ GLint _mesa_add_named_parameter(struct gl_program_parameter_list *paramList, - const char *name, const GLfloat values[4]) + const char *name, const gl_constant_value values[4]) { return _mesa_add_parameter(paramList, PROGRAM_NAMED_PARAM, name, 4, GL_NONE, values, NULL, 0x0); @@ -204,17 +206,17 @@ _mesa_add_named_parameter(struct gl_program_parameter_list *paramList, */ GLint _mesa_add_named_constant(struct gl_program_parameter_list *paramList, - const char *name, const GLfloat values[4], + const char *name, const gl_constant_value values[4], GLuint size) { /* first check if this is a duplicate constant */ GLint pos; for (pos = 0; pos < (GLint)paramList->NumParameters; pos++) { - const GLfloat *pvals = paramList->ParameterValues[pos]; - if (pvals[0] == values[0] && - pvals[1] == values[1] && - pvals[2] == values[2] && - pvals[3] == values[3] && + const gl_constant_value *pvals = paramList->ParameterValues[pos]; + if (pvals[0].u == values[0].u && + pvals[1].u == values[1].u && + pvals[2].u == values[2].u && + pvals[3].u == values[3].u && strcmp(paramList->Parameters[pos].Name, name) == 0) { /* Same name and value is already in the param list - reuse it */ return pos; @@ -239,9 +241,9 @@ _mesa_add_named_constant(struct gl_program_parameter_list *paramList, * \return index/position of the new parameter in the parameter list. */ GLint -_mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, - const GLfloat values[4], GLuint size, - GLuint *swizzleOut) +_mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList, + const gl_constant_value values[4], GLuint size, + GLenum datatype, GLuint *swizzleOut) { GLint pos; ASSERT(size >= 1); @@ -262,7 +264,7 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, struct gl_program_parameter *p = paramList->Parameters + pos; if (p->Type == PROGRAM_CONSTANT && p->Size + size <= 4) { /* ok, found room */ - GLfloat *pVal = paramList->ParameterValues[pos]; + gl_constant_value *pVal = paramList->ParameterValues[pos]; GLuint swz = p->Size; /* 1, 2 or 3 for Y, Z, W */ pVal[p->Size] = values[0]; p->Size++; @@ -274,7 +276,7 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, /* add a new parameter to store this constant */ pos = _mesa_add_parameter(paramList, PROGRAM_CONSTANT, NULL, - size, GL_NONE, values, NULL, 0x0); + size, datatype, values, NULL, 0x0); if (pos >= 0 && swizzleOut) { if (size == 1) *swizzleOut = SWIZZLE_XXXX; @@ -285,6 +287,28 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, } /** + * Add a new unnamed constant to the parameter list. This will be used + * when a fragment/vertex program contains something like this: + * MOV r, { 0, 1, 2, 3 }; + * If swizzleOut is non-null we'll search the parameter list for an + * existing instance of the constant which matches with a swizzle. + * + * \param paramList the parameter list + * \param values four float values + * \param swizzleOut returns swizzle mask for accessing the constant + * \return index/position of the new parameter in the parameter list. + * \sa _mesa_add_typed_unnamed_constant + */ +GLint +_mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, + const gl_constant_value values[4], GLuint size, + GLuint *swizzleOut) +{ + return _mesa_add_typed_unnamed_constant(paramList, values, size, GL_NONE, + swizzleOut); +} + +/** * Add parameter representing a varying variable. */ GLint @@ -401,7 +425,7 @@ _mesa_add_state_reference(struct gl_program_parameter_list *paramList, * Lookup a parameter value by name in the given parameter list. * \return pointer to the float[4] values. */ -GLfloat * +gl_constant_value * _mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList, GLsizei nameLen, const char *name) { @@ -465,7 +489,7 @@ _mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList, */ GLboolean _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list, - const GLfloat v[], GLuint vSize, + const gl_constant_value v[], GLuint vSize, GLint *posOut, GLuint *swizzleOut) { GLuint i; @@ -484,7 +508,7 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list, /* swizzle not allowed */ GLuint j, match = 0; for (j = 0; j < vSize; j++) { - if (v[j] == list->ParameterValues[i][j]) + if (v[j].u == list->ParameterValues[i][j].u) match++; } if (match == vSize) { @@ -498,7 +522,7 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list, /* look for v[0] anywhere within float[4] value */ GLuint j; for (j = 0; j < list->Parameters[i].Size; j++) { - if (list->ParameterValues[i][j] == v[0]) { + if (list->ParameterValues[i][j].u == v[0].u) { /* found it */ *posOut = i; *swizzleOut = MAKE_SWIZZLE4(j, j, j, j); @@ -511,13 +535,13 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list, GLuint swz[4]; GLuint match = 0, j, k; for (j = 0; j < vSize; j++) { - if (v[j] == list->ParameterValues[i][j]) { + if (v[j].u == list->ParameterValues[i][j].u) { swz[j] = j; match++; } else { for (k = 0; k < list->Parameters[i].Size; k++) { - if (v[j] == list->ParameterValues[i][k]) { + if (v[j].u == list->ParameterValues[i][k].u) { swz[j] = k; match++; break; diff --git a/src/mesa/program/prog_parameter.h b/src/mesa/program/prog_parameter.h index 10cbbe57a6c..1a5ed343937 100644 --- a/src/mesa/program/prog_parameter.h +++ b/src/mesa/program/prog_parameter.h @@ -47,6 +47,17 @@ /*@}*/ +/** + * Actual data for constant values of parameters. + */ +typedef union gl_constant_value +{ + GLfloat f; + GLboolean b; + GLint i; + GLuint u; +} gl_constant_value; + /** * Program parameter. @@ -81,7 +92,7 @@ struct gl_program_parameter_list GLuint Size; /**< allocated size of Parameters, ParameterValues */ GLuint NumParameters; /**< number of parameters in arrays */ struct gl_program_parameter *Parameters; /**< Array [Size] */ - GLfloat (*ParameterValues)[4]; /**< Array [Size] of GLfloat[4] */ + gl_constant_value (*ParameterValues)[4]; /**< Array [Size] of constant[4] */ GLbitfield StateFlags; /**< _NEW_* flags indicating which state changes might invalidate ParameterValues[] */ }; @@ -112,22 +123,28 @@ _mesa_num_parameters(const struct gl_program_parameter_list *list) extern GLint _mesa_add_parameter(struct gl_program_parameter_list *paramList, gl_register_file type, const char *name, - GLuint size, GLenum datatype, const GLfloat *values, + GLuint size, GLenum datatype, + const gl_constant_value *values, const gl_state_index state[STATE_LENGTH], GLbitfield flags); extern GLint _mesa_add_named_parameter(struct gl_program_parameter_list *paramList, - const char *name, const GLfloat values[4]); + const char *name, const gl_constant_value values[4]); extern GLint _mesa_add_named_constant(struct gl_program_parameter_list *paramList, - const char *name, const GLfloat values[4], + const char *name, const gl_constant_value values[4], GLuint size); extern GLint +_mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList, + const gl_constant_value values[4], GLuint size, + GLenum datatype, GLuint *swizzleOut); + +extern GLint _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, - const GLfloat values[4], GLuint size, + const gl_constant_value values[4], GLuint size, GLuint *swizzleOut); extern GLint @@ -143,7 +160,7 @@ extern GLint _mesa_add_state_reference(struct gl_program_parameter_list *paramList, const gl_state_index stateTokens[STATE_LENGTH]); -extern GLfloat * +extern gl_constant_value * _mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList, GLsizei nameLen, const char *name); @@ -153,7 +170,7 @@ _mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList, extern GLboolean _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list, - const GLfloat v[], GLuint vSize, + const gl_constant_value v[], GLuint vSize, GLint *posOut, GLuint *swizzleOut); extern GLuint diff --git a/src/mesa/program/prog_parameter_layout.c b/src/mesa/program/prog_parameter_layout.c index 90a9771080c..28fca3b92d9 100644 --- a/src/mesa/program/prog_parameter_layout.c +++ b/src/mesa/program/prog_parameter_layout.c @@ -182,7 +182,7 @@ _mesa_layout_parameters(struct asm_parser_state *state) switch (p->Type) { case PROGRAM_CONSTANT: { - const float *const v = + const gl_constant_value *const v = state->prog->Parameters->ParameterValues[idx]; inst->Base.SrcReg[i].Index = diff --git a/src/mesa/program/prog_print.c b/src/mesa/program/prog_print.c index 7c3b4909e73..70412b1fa6a 100644 --- a/src/mesa/program/prog_print.c +++ b/src/mesa/program/prog_print.c @@ -985,7 +985,7 @@ _mesa_fprint_parameter_list(FILE *f, fprintf(f, "dirty state flags: 0x%x\n", list->StateFlags); for (i = 0; i < list->NumParameters; i++){ struct gl_program_parameter *param = list->Parameters + i; - const GLfloat *v = list->ParameterValues[i]; + const GLfloat *v = (GLfloat *) list->ParameterValues[i]; fprintf(f, "param[%d] sz=%d %s %s = {%.3g, %.3g, %.3g, %.3g}", i, param->Size, _mesa_register_file_name(list->Parameters[i].Type), diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c index 16f9690e865..6aa2409e85e 100644 --- a/src/mesa/program/prog_statevars.c +++ b/src/mesa/program/prog_statevars.c @@ -1111,7 +1111,7 @@ _mesa_load_state_parameters(struct gl_context *ctx, if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) { _mesa_fetch_state(ctx, paramList->Parameters[i].StateIndexes, - paramList->ParameterValues[i]); + ¶mList->ParameterValues[i][0].f); } } } diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c index adca094ee89..ecff2344a44 100644 --- a/src/mesa/program/program.c +++ b/src/mesa/program/program.c @@ -388,8 +388,9 @@ _mesa_delete_program(struct gl_context *ctx, struct gl_program *prog) if (prog->String) free(prog->String); - _mesa_free_instructions(prog->Instructions, prog->NumInstructions); - + if (prog->Instructions) { + _mesa_free_instructions(prog->Instructions, prog->NumInstructions); + } if (prog->Parameters) { _mesa_free_parameter_list(prog->Parameters); } @@ -1031,7 +1032,8 @@ _mesa_postprocess_program(struct gl_context *ctx, struct gl_program *prog) GLuint i; GLuint whiteSwizzle; GLint whiteIndex = _mesa_add_unnamed_constant(prog->Parameters, - white, 4, &whiteSwizzle); + (gl_constant_value *) white, + 4, &whiteSwizzle); (void) whiteIndex; diff --git a/src/mesa/program/program_parse.y b/src/mesa/program/program_parse.y index dbf5abaa617..dec35038be5 100644 --- a/src/mesa/program/program_parse.y +++ b/src/mesa/program/program_parse.y @@ -1854,64 +1854,64 @@ paramConstUse: paramConstScalarUse | paramConstVector; paramConstScalarDecl: signedFloatConstant { $$.count = 4; - $$.data[0] = $1; - $$.data[1] = $1; - $$.data[2] = $1; - $$.data[3] = $1; + $$.data[0].f = $1; + $$.data[1].f = $1; + $$.data[2].f = $1; + $$.data[3].f = $1; } ; paramConstScalarUse: REAL { $$.count = 1; - $$.data[0] = $1; - $$.data[1] = $1; - $$.data[2] = $1; - $$.data[3] = $1; + $$.data[0].f = $1; + $$.data[1].f = $1; + $$.data[2].f = $1; + $$.data[3].f = $1; } | INTEGER { $$.count = 1; - $$.data[0] = (float) $1; - $$.data[1] = (float) $1; - $$.data[2] = (float) $1; - $$.data[3] = (float) $1; + $$.data[0].f = (float) $1; + $$.data[1].f = (float) $1; + $$.data[2].f = (float) $1; + $$.data[3].f = (float) $1; } ; paramConstVector: '{' signedFloatConstant '}' { $$.count = 4; - $$.data[0] = $2; - $$.data[1] = 0.0f; - $$.data[2] = 0.0f; - $$.data[3] = 1.0f; + $$.data[0].f = $2; + $$.data[1].f = 0.0f; + $$.data[2].f = 0.0f; + $$.data[3].f = 1.0f; } | '{' signedFloatConstant ',' signedFloatConstant '}' { $$.count = 4; - $$.data[0] = $2; - $$.data[1] = $4; - $$.data[2] = 0.0f; - $$.data[3] = 1.0f; + $$.data[0].f = $2; + $$.data[1].f = $4; + $$.data[2].f = 0.0f; + $$.data[3].f = 1.0f; } | '{' signedFloatConstant ',' signedFloatConstant ',' signedFloatConstant '}' { $$.count = 4; - $$.data[0] = $2; - $$.data[1] = $4; - $$.data[2] = $6; - $$.data[3] = 1.0f; + $$.data[0].f = $2; + $$.data[1].f = $4; + $$.data[2].f = $6; + $$.data[3].f = 1.0f; } | '{' signedFloatConstant ',' signedFloatConstant ',' signedFloatConstant ',' signedFloatConstant '}' { $$.count = 4; - $$.data[0] = $2; - $$.data[1] = $4; - $$.data[2] = $6; - $$.data[3] = $8; + $$.data[0].f = $2; + $$.data[1].f = $4; + $$.data[2].f = $6; + $$.data[3].f = $8; } ; diff --git a/src/mesa/program/program_parser.h b/src/mesa/program/program_parser.h index 8e5aaee95e5..5637598f3b3 100644 --- a/src/mesa/program/program_parser.h +++ b/src/mesa/program/program_parser.h @@ -23,6 +23,7 @@ #pragma once #include "main/config.h" +#include "program/prog_parameter.h" struct gl_context; @@ -96,7 +97,7 @@ struct asm_symbol { struct asm_vector { unsigned count; - float data[4]; + gl_constant_value data[4]; }; diff --git a/src/mesa/program/register_allocate.c b/src/mesa/program/register_allocate.c index de96eb42c9b..f5b5174fc18 100644 --- a/src/mesa/program/register_allocate.c +++ b/src/mesa/program/register_allocate.c @@ -200,6 +200,27 @@ ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2) } } +/** + * Adds a conflict between base_reg and reg, and also between reg and + * anything that base_reg conflicts with. + * + * This can simplify code for setting up multiple register classes + * which are aggregates of some base hardware registers, compared to + * explicitly using ra_add_reg_conflict. + */ +void +ra_add_transitive_reg_conflict(struct ra_regs *regs, + unsigned int base_reg, unsigned int reg) +{ + int i; + + ra_add_reg_conflict(regs, reg, base_reg); + + for (i = 0; i < regs->regs[base_reg].num_conflicts; i++) { + ra_add_reg_conflict(regs, reg, regs->regs[base_reg].conflict_list[i]); + } +} + unsigned int ra_alloc_reg_class(struct ra_regs *regs) { diff --git a/src/mesa/program/register_allocate.h b/src/mesa/program/register_allocate.h index 5b95833f394..ee2e58a4756 100644 --- a/src/mesa/program/register_allocate.h +++ b/src/mesa/program/register_allocate.h @@ -40,6 +40,8 @@ struct ra_regs *ra_alloc_reg_set(unsigned int count); unsigned int ra_alloc_reg_class(struct ra_regs *regs); void ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2); +void ra_add_transitive_reg_conflict(struct ra_regs *regs, + unsigned int base_reg, unsigned int reg); void ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int reg); void ra_set_finalize(struct ra_regs *regs); /** @} */ diff --git a/src/mesa/program/sampler.cpp b/src/mesa/program/sampler.cpp index 1457d1199fa..e8d34c670a9 100644 --- a/src/mesa/program/sampler.cpp +++ b/src/mesa/program/sampler.cpp @@ -132,6 +132,6 @@ _mesa_get_sampler_uniform_value(class ir_dereference *sampler, index += getname.offset; - return prog->Parameters->ParameterValues[index][0]; + return prog->Parameters->ParameterValues[index][0].f; } } diff --git a/src/mesa/sources.mak b/src/mesa/sources.mak index 4b2ec08bbb0..5e77e0f5919 100644 --- a/src/mesa/sources.mak +++ b/src/mesa/sources.mak @@ -251,6 +251,7 @@ PROGRAM_SOURCES = \ program/prog_instruction.c \ program/prog_noise.c \ program/prog_optimize.c \ + program/prog_opt_constant_fold.c \ program/prog_parameter.c \ program/prog_parameter_layout.c \ program/prog_print.c \ @@ -336,7 +337,8 @@ MESA_GALLIUM_SOURCES = \ MESA_GALLIUM_CXX_SOURCES = \ $(MAIN_CXX_SOURCES) \ - $(SHADER_CXX_SOURCES) + $(SHADER_CXX_SOURCES) \ + state_tracker/st_glsl_to_tgsi.cpp # All the core C sources, for dependency checking ALL_SOURCES = \ diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c index 1f833d28212..12b5bc5ba79 100644 --- a/src/mesa/state_tracker/st_atom_pixeltransfer.c +++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c @@ -84,26 +84,6 @@ make_state_key(struct gl_context *ctx, struct state_key *key) } -static struct pipe_resource * -create_color_map_texture(struct gl_context *ctx) -{ - struct st_context *st = st_context(ctx); - struct pipe_context *pipe = st->pipe; - struct pipe_resource *pt; - enum pipe_format format; - const uint texSize = 256; /* simple, and usually perfect */ - - /* find an RGBA texture format */ - format = st_choose_format(pipe->screen, GL_RGBA, GL_NONE, GL_NONE, - PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW); - - /* create texture for color map/table */ - pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0, - texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW); - return pt; -} - - /** * Update the pixelmap texture with the contents of the R/G/B/A pixel maps. */ @@ -219,7 +199,7 @@ get_pixel_transfer_program(struct gl_context *ctx, const struct state_key *key) /* create the colormap/texture now if not already done */ if (!st->pixel_xfer.pixelmap_texture) { - st->pixel_xfer.pixelmap_texture = create_color_map_texture(ctx); + st->pixel_xfer.pixelmap_texture = st_create_color_map_texture(ctx); st->pixel_xfer.pixelmap_sampler_view = st_create_texture_sampler_view(st->pipe, st->pixel_xfer.pixelmap_texture); diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c index 800a9f1f0e0..3115a2511ce 100644 --- a/src/mesa/state_tracker/st_atom_texture.c +++ b/src/mesa/state_tracker/st_atom_texture.c @@ -221,9 +221,9 @@ update_single_texture(struct st_context *st, struct pipe_sampler_view **sampler_ if ((samp->sRGBDecode == GL_SKIP_DECODE_EXT) && (_mesa_get_format_color_encoding(texFormat) == GL_SRGB)) { - /* don't do sRGB->RGB conversion. Interpret the texture - * texture data as linear values. - */ + /* Don't do sRGB->RGB conversion. Interpret the texture data as + * linear values. + */ const gl_format linearFormat = _mesa_get_srgb_format_linear(texFormat); firstImageFormat = st_mesa_format_to_pipe_format(linearFormat); diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index 49b196032b9..beb5e7cab31 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -172,6 +172,23 @@ make_bitmap_fragment_program(struct gl_context *ctx, GLuint samplerIndex) } +static struct gl_program * +make_bitmap_fragment_program_glsl(struct st_context *st, + struct st_fragment_program *orig, + GLuint samplerIndex) +{ + struct gl_context *ctx = st->ctx; + struct st_fragment_program *fp = (struct st_fragment_program *) + ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); + + if (!fp) + return NULL; + + get_bitmap_visitor(fp, orig->glsl_to_tgsi, samplerIndex); + return &fp->Base.Base; +} + + static int find_free_bit(uint bitfield) { @@ -199,6 +216,7 @@ st_make_bitmap_fragment_program(struct st_context *st, GLuint *bitmap_sampler) { struct st_fragment_program *bitmap_prog; + struct st_fragment_program *stfpIn = (struct st_fragment_program *) fpIn; struct gl_program *newProg; uint sampler; @@ -207,13 +225,18 @@ st_make_bitmap_fragment_program(struct st_context *st, * with the bitmap sampler/kill instructions. */ sampler = find_free_bit(fpIn->Base.SamplersUsed); - bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler); + + if (stfpIn->glsl_to_tgsi) + newProg = make_bitmap_fragment_program_glsl(st, stfpIn, sampler); + else { + bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler); - newProg = _mesa_combine_programs(st->ctx, - &bitmap_prog->Base.Base, - &fpIn->Base); - /* done with this after combining */ - st_reference_fragprog(st, &bitmap_prog, NULL); + newProg = _mesa_combine_programs(st->ctx, + &bitmap_prog->Base.Base, + &fpIn->Base); + /* done with this after combining */ + st_reference_fragprog(st, &bitmap_prog, NULL); + } #if 0 { @@ -328,8 +351,8 @@ setup_bitmap_vertex_data(struct st_context *st, bool normalized, if(!normalized) { - sRight = width; - tBot = height; + sRight = (GLfloat) width; + tBot = (GLfloat) height; } /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as @@ -381,7 +404,7 @@ setup_bitmap_vertex_data(struct st_context *st, bool normalized, /* same for all verts: */ for (i = 0; i < 4; i++) { st->bitmap.vertices[i][0][2] = z; - st->bitmap.vertices[i][0][3] = 1.0; + st->bitmap.vertices[i][0][3] = 1.0f; st->bitmap.vertices[i][1][0] = color[0]; st->bitmap.vertices[i][1][1] = color[1]; st->bitmap.vertices[i][1][2] = color[2]; @@ -513,7 +536,7 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, cso_set_vertex_elements(cso, 3, st->velems_util_draw); /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */ - z = z * 2.0 - 1.0; + z = z * 2.0f - 1.0f; /* draw textured quad */ offset = setup_bitmap_vertex_data(st, diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c index 416be194d11..750f541b5dd 100644 --- a/src/mesa/state_tracker/st_cb_blit.c +++ b/src/mesa/state_tracker/st_cb_blit.c @@ -62,6 +62,84 @@ st_destroy_blit(struct st_context *st) #if FEATURE_EXT_framebuffer_blit static void +st_BlitFramebuffer_resolve(struct gl_context *ctx, + GLbitfield mask, + struct pipe_resolve_info *info) +{ + const GLbitfield depthStencil = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + + struct st_context *st = st_context(ctx); + + struct st_renderbuffer *srcRb, *dstRb; + + if (mask & GL_COLOR_BUFFER_BIT) { + srcRb = st_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer); + dstRb = st_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]); + + info->mask = PIPE_MASK_RGBA; + + info->src.res = srcRb->texture; + info->src.layer = srcRb->surface->u.tex.first_layer; + info->dst.res = dstRb->texture; + info->dst.level = dstRb->surface->u.tex.level; + info->dst.layer = dstRb->surface->u.tex.first_layer; + + st->pipe->resource_resolve(st->pipe, info); + } + + if (mask & depthStencil) { + struct gl_renderbuffer_attachment *srcDepth, *srcStencil; + struct gl_renderbuffer_attachment *dstDepth, *dstStencil; + boolean combined; + + srcDepth = &ctx->ReadBuffer->Attachment[BUFFER_DEPTH]; + dstDepth = &ctx->DrawBuffer->Attachment[BUFFER_DEPTH]; + srcStencil = &ctx->ReadBuffer->Attachment[BUFFER_STENCIL]; + dstStencil = &ctx->DrawBuffer->Attachment[BUFFER_STENCIL]; + + combined = + st_is_depth_stencil_combined(srcDepth, srcStencil) && + st_is_depth_stencil_combined(dstDepth, dstStencil); + + if ((mask & GL_DEPTH_BUFFER_BIT) || combined) { + /* resolve depth and, if combined and requested, stencil as well */ + srcRb = st_renderbuffer(srcDepth->Renderbuffer); + dstRb = st_renderbuffer(dstDepth->Renderbuffer); + + info->mask = (mask & GL_DEPTH_BUFFER_BIT) ? PIPE_MASK_Z : 0; + if (combined && (mask & GL_STENCIL_BUFFER_BIT)) { + mask &= ~GL_STENCIL_BUFFER_BIT; + info->mask |= PIPE_MASK_S; + } + + info->src.res = srcRb->texture; + info->src.layer = srcRb->surface->u.tex.first_layer; + info->dst.res = dstRb->texture; + info->dst.level = dstRb->surface->u.tex.level; + info->dst.layer = dstRb->surface->u.tex.first_layer; + + st->pipe->resource_resolve(st->pipe, info); + } + + if (mask & GL_STENCIL_BUFFER_BIT) { + /* resolve separate stencil buffer */ + srcRb = st_renderbuffer(srcStencil->Renderbuffer); + dstRb = st_renderbuffer(dstStencil->Renderbuffer); + + info->mask = PIPE_MASK_S; + + info->src.res = srcRb->texture; + info->src.layer = srcRb->surface->u.tex.first_layer; + info->dst.res = dstRb->texture; + info->dst.level = dstRb->surface->u.tex.level; + info->dst.layer = dstRb->surface->u.tex.first_layer; + + st->pipe->resource_resolve(st->pipe, info); + } + } +} + +static void st_BlitFramebuffer(struct gl_context *ctx, GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, @@ -95,6 +173,42 @@ st_BlitFramebuffer(struct gl_context *ctx, srcY1 = readFB->Height - srcY1; } + /* Disable conditional rendering. */ + if (st->render_condition) { + st->pipe->render_condition(st->pipe, NULL, 0); + } + + if (readFB->Visual.sampleBuffers > drawFB->Visual.sampleBuffers) { + struct pipe_resolve_info info; + + if (dstX0 < dstX1) { + info.dst.x0 = dstX0; + info.dst.x1 = dstX1; + info.src.x0 = srcX0; + info.src.x1 = srcX1; + } else { + info.dst.x0 = dstX1; + info.dst.x1 = dstX0; + info.src.x0 = srcX1; + info.src.x1 = srcX0; + } + if (dstY0 < dstY1) { + info.dst.y0 = dstY0; + info.dst.y1 = dstY1; + info.src.y0 = srcY0; + info.src.y1 = srcY1; + } else { + info.dst.y0 = dstY1; + info.dst.y1 = dstY0; + info.src.y0 = srcY1; + info.src.y1 = srcY0; + } + + st_BlitFramebuffer_resolve(ctx, mask, &info); /* filter doesn't apply */ + + goto done; + } + if (srcY0 > srcY1 && dstY0 > dstY1) { /* Both src and dst are upside down. Swap Y to make it * right-side up to increase odds of using a fast path. @@ -109,11 +223,6 @@ st_BlitFramebuffer(struct gl_context *ctx, dstY1 = tmp; } - /* Disable conditional rendering. */ - if (st->render_condition) { - st->pipe->render_condition(st->pipe, NULL, 0); - } - if (mask & GL_COLOR_BUFFER_BIT) { struct gl_renderbuffer_attachment *srcAtt = &readFB->Attachment[readFB->_ColorReadBufferIndex]; diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c index 7374bb0acc5..a451b44049e 100644 --- a/src/mesa/state_tracker/st_cb_bufferobjects.c +++ b/src/mesa/state_tracker/st_cb_bufferobjects.c @@ -93,7 +93,6 @@ st_bufferobj_free(struct gl_context *ctx, struct gl_buffer_object *obj) */ static void st_bufferobj_subdata(struct gl_context *ctx, - GLenum target, GLintptrARB offset, GLsizeiptrARB size, const GLvoid * data, struct gl_buffer_object *obj) @@ -133,7 +132,6 @@ st_bufferobj_subdata(struct gl_context *ctx, */ static void st_bufferobj_get_subdata(struct gl_context *ctx, - GLenum target, GLintptrARB offset, GLsizeiptrARB size, GLvoid * data, struct gl_buffer_object *obj) @@ -238,52 +236,10 @@ static long st_bufferobj_zero_length = 0; /** - * Called via glMapBufferARB(). - */ -static void * -st_bufferobj_map(struct gl_context *ctx, GLenum target, GLenum access, - struct gl_buffer_object *obj) -{ - struct st_buffer_object *st_obj = st_buffer_object(obj); - uint flags; - - switch (access) { - case GL_WRITE_ONLY: - flags = PIPE_TRANSFER_WRITE; - break; - case GL_READ_ONLY: - flags = PIPE_TRANSFER_READ; - break; - case GL_READ_WRITE: - default: - flags = PIPE_TRANSFER_READ_WRITE; - break; - } - - /* Handle zero-size buffers here rather than in drivers */ - if (obj->Size == 0) { - obj->Pointer = &st_bufferobj_zero_length; - } - else { - obj->Pointer = pipe_buffer_map(st_context(ctx)->pipe, - st_obj->buffer, - flags, - &st_obj->transfer); - } - - if (obj->Pointer) { - obj->Offset = 0; - obj->Length = obj->Size; - } - return obj->Pointer; -} - - -/** * Called via glMapBufferRange(). */ static void * -st_bufferobj_map_range(struct gl_context *ctx, GLenum target, +st_bufferobj_map_range(struct gl_context *ctx, GLintptr offset, GLsizeiptr length, GLbitfield access, struct gl_buffer_object *obj) { @@ -353,7 +309,7 @@ st_bufferobj_map_range(struct gl_context *ctx, GLenum target, static void -st_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target, +st_bufferobj_flush_mapped_range(struct gl_context *ctx, GLintptr offset, GLsizeiptr length, struct gl_buffer_object *obj) { @@ -378,7 +334,7 @@ st_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target, * Called via glUnmapBufferARB(). */ static GLboolean -st_bufferobj_unmap(struct gl_context *ctx, GLenum target, struct gl_buffer_object *obj) +st_bufferobj_unmap(struct gl_context *ctx, struct gl_buffer_object *obj) { struct pipe_context *pipe = st_context(ctx)->pipe; struct st_buffer_object *st_obj = st_buffer_object(obj); @@ -444,7 +400,6 @@ st_init_bufferobject_functions(struct dd_function_table *functions) functions->BufferData = st_bufferobj_data; functions->BufferSubData = st_bufferobj_subdata; functions->GetBufferSubData = st_bufferobj_get_subdata; - functions->MapBuffer = st_bufferobj_map; functions->MapBufferRange = st_bufferobj_map_range; functions->FlushMappedBufferRange = st_bufferobj_flush_mapped_range; functions->UnmapBuffer = st_bufferobj_unmap; diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 1d908c0317a..390c518699f 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -94,6 +94,46 @@ is_passthrough_program(const struct gl_fragment_program *prog) } +/** + * Returns a fragment program which implements the current pixel transfer ops. + */ +static struct gl_fragment_program * +get_glsl_pixel_transfer_program(struct st_context *st, + struct st_fragment_program *orig) +{ + int pixelMaps = 0, scaleAndBias = 0; + struct gl_context *ctx = st->ctx; + struct st_fragment_program *fp = (struct st_fragment_program *) + ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); + + if (!fp) + return NULL; + + if (ctx->Pixel.RedBias != 0.0 || ctx->Pixel.RedScale != 1.0 || + ctx->Pixel.GreenBias != 0.0 || ctx->Pixel.GreenScale != 1.0 || + ctx->Pixel.BlueBias != 0.0 || ctx->Pixel.BlueScale != 1.0 || + ctx->Pixel.AlphaBias != 0.0 || ctx->Pixel.AlphaScale != 1.0) { + scaleAndBias = 1; + } + + pixelMaps = ctx->Pixel.MapColorFlag; + + if (pixelMaps) { + /* create the colormap/texture now if not already done */ + if (!st->pixel_xfer.pixelmap_texture) { + st->pixel_xfer.pixelmap_texture = st_create_color_map_texture(ctx); + st->pixel_xfer.pixelmap_sampler_view = + st_create_texture_sampler_view(st->pipe, + st->pixel_xfer.pixelmap_texture); + } + } + + get_pixel_transfer_visitor(fp, orig->glsl_to_tgsi, + scaleAndBias, pixelMaps); + + return &fp->Base; +} + /** * Make fragment shader for glDraw/CopyPixels. This shader is made @@ -107,11 +147,15 @@ st_make_drawpix_fragment_program(struct st_context *st, struct gl_fragment_program **fpOut) { struct gl_program *newProg; + struct st_fragment_program *stfp = (struct st_fragment_program *) fpIn; if (is_passthrough_program(fpIn)) { newProg = (struct gl_program *) _mesa_clone_fragment_program(st->ctx, &st->pixel_xfer.program->Base); } + else if (stfp->glsl_to_tgsi != NULL) { + newProg = (struct gl_program *) get_glsl_pixel_transfer_program(st, stfp); + } else { #if 0 /* debug */ diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c index 32694975d17..2abb4d8f082 100644 --- a/src/mesa/state_tracker/st_cb_program.c +++ b/src/mesa/state_tracker/st_cb_program.c @@ -44,6 +44,7 @@ #include "st_program.h" #include "st_mesa_to_tgsi.h" #include "st_cb_program.h" +#include "st_glsl_to_tgsi.h" @@ -129,6 +130,9 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog) { struct st_vertex_program *stvp = (struct st_vertex_program *) prog; st_release_vp_variants( st, stvp ); + + if (stvp->glsl_to_tgsi) + free_glsl_to_tgsi_visitor(stvp->glsl_to_tgsi); } break; case MESA_GEOMETRY_PROGRAM: @@ -137,6 +141,9 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog) (struct st_geometry_program *) prog; st_release_gp_variants(st, stgp); + + if (stgp->glsl_to_tgsi) + free_glsl_to_tgsi_visitor(stgp->glsl_to_tgsi); if (stgp->tgsi.tokens) { st_free_tokens((void *) stgp->tgsi.tokens); @@ -151,6 +158,9 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog) st_release_fp_variants(st, stfp); + if (stfp->glsl_to_tgsi) + free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi); + if (stfp->tgsi.tokens) { st_free_tokens(stfp->tgsi.tokens); stfp->tgsi.tokens = NULL; @@ -242,4 +252,8 @@ st_init_program_functions(struct dd_function_table *functions) functions->DeleteProgram = st_delete_program; functions->IsProgramNative = st_is_program_native; functions->ProgramStringNotify = st_program_string_notify; + + functions->NewShader = st_new_shader; + functions->NewShaderProgram = st_new_shader_program; + functions->LinkShader = st_link_shader; } diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 6907cfc03cf..a3b2ba9e78d 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -344,7 +344,7 @@ guess_and_alloc_texture(struct st_context *st, stImage->base.Width2, stImage->base.Height2, stImage->base.Depth2, - stImage->level, + stImage->base.Level, &width, &height, &depth)) { /* we can't determine the image size at level=0 */ stObj->width0 = stObj->height0 = stObj->depth0 = 0; @@ -367,7 +367,7 @@ guess_and_alloc_texture(struct st_context *st, stImage->base._BaseFormat == GL_DEPTH_COMPONENT || stImage->base._BaseFormat == GL_DEPTH_STENCIL_EXT) && !stObj->base.GenerateMipmap && - stImage->level == 0) { + stImage->base.Level == 0) { /* only alloc space for a single mipmap level */ lastLevel = 0; } @@ -506,8 +506,8 @@ st_TexImage(struct gl_context * ctx, assert(texImage->Depth == depth); } - stImage->face = _mesa_tex_target_to_face(target); - stImage->level = level; + stImage->base.Face = _mesa_tex_target_to_face(target); + stImage->base.Level = level; _mesa_set_fetch_functions(texImage, dims); @@ -529,7 +529,7 @@ st_TexImage(struct gl_context * ctx, if (stObj->pt) { if (level > (GLint) stObj->pt->last_level || !st_texture_match_image(stObj->pt, &stImage->base, - stImage->face, stImage->level)) { + stImage->base.Face, stImage->base.Level)) { DBG("release it\n"); pipe_resource_reference(&stObj->pt, NULL); assert(!stObj->pt); @@ -563,7 +563,7 @@ st_TexImage(struct gl_context * ctx, */ if (stObj->pt && st_texture_match_image(stObj->pt, &stImage->base, - stImage->face, stImage->level)) { + stImage->base.Face, stImage->base.Level)) { pipe_resource_reference(&stImage->pt, stObj->pt); assert(stImage->pt); @@ -1466,34 +1466,6 @@ st_copy_texsubimage(struct gl_context *ctx, depth/stencil samples per pixel? Need some transfer clarifications. */ assert(sample_count < 2); - if (srcX < 0) { - width -= -srcX; - destX += -srcX; - srcX = 0; - } - - if (srcY < 0) { - height -= -srcY; - destY += -srcY; - srcY = 0; - } - - if (destX < 0) { - width -= -destX; - srcX += -destX; - destX = 0; - } - - if (destY < 0) { - height -= -destY; - srcY += -destY; - destY = 0; - } - - if (width < 0 || height < 0) - return; - - assert(strb); assert(strb->surface); assert(stImage->pt); @@ -1529,8 +1501,8 @@ st_copy_texsubimage(struct gl_context *ctx, pipe->resource_copy_region(pipe, /* dest */ stImage->pt, - stImage->level, - destX, destY, destZ + stImage->face, + stImage->base.Level, + destX, destY, destZ + stImage->base.Face, /* src */ strb->texture, strb->surface->u.tex.level, @@ -1552,9 +1524,9 @@ st_copy_texsubimage(struct gl_context *ctx, memset(&surf_tmpl, 0, sizeof(surf_tmpl)); surf_tmpl.format = util_format_linear(stImage->pt->format); surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; - surf_tmpl.u.tex.level = stImage->level; - surf_tmpl.u.tex.first_layer = stImage->face + destZ; - surf_tmpl.u.tex.last_layer = stImage->face + destZ; + surf_tmpl.u.tex.level = stImage->base.Level; + surf_tmpl.u.tex.first_layer = stImage->base.Face + destZ; + surf_tmpl.u.tex.last_layer = stImage->base.Face + destZ; dest_surface = pipe->create_surface(pipe, stImage->pt, &surf_tmpl); @@ -1610,59 +1582,6 @@ st_copy_texsubimage(struct gl_context *ctx, static void -st_CopyTexImage1D(struct gl_context * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLint border) -{ - struct gl_texture_unit *texUnit = - &ctx->Texture.Unit[ctx->Texture.CurrentUnit]; - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - - /* Setup or redefine the texture object, texture and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage1D(ctx, target, level, internalFormat, - width, border, - GL_RGBA, CHAN_TYPE, NULL, - &ctx->DefaultPacking, texObj, texImage); - - st_copy_texsubimage(ctx, target, level, - 0, 0, 0, /* destX,Y,Z */ - x, y, width, 1); /* src X, Y, size */ -} - - -static void -st_CopyTexImage2D(struct gl_context * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border) -{ - struct gl_texture_unit *texUnit = - &ctx->Texture.Unit[ctx->Texture.CurrentUnit]; - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - - /* Setup or redefine the texture object, texture and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage2D(ctx, target, level, internalFormat, - width, height, border, - GL_RGBA, CHAN_TYPE, NULL, - &ctx->DefaultPacking, texObj, texImage); - - st_copy_texsubimage(ctx, target, level, - 0, 0, 0, /* destX,Y,Z */ - x, y, width, height); /* src X, Y, size */ -} - - -static void st_CopyTexSubImage1D(struct gl_context * ctx, GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width) { @@ -1710,7 +1629,7 @@ copy_image_data_to_texture(struct st_context *st, /* debug checks */ { const struct gl_texture_image *dstImage = - stObj->base.Image[stImage->face][dstLevel]; + stObj->base.Image[stImage->base.Face][dstLevel]; assert(dstImage); assert(dstImage->Width == stImage->base.Width); assert(dstImage->Height == stImage->base.Height); @@ -1722,15 +1641,15 @@ copy_image_data_to_texture(struct st_context *st, */ st_texture_image_copy(st->pipe, stObj->pt, dstLevel, /* dest texture, level */ - stImage->pt, stImage->level, /* src texture, level */ - stImage->face); + stImage->pt, stImage->base.Level, /* src texture, level */ + stImage->base.Face); pipe_resource_reference(&stImage->pt, NULL); } else if (stImage->base.Data) { st_texture_image_data(st, stObj->pt, - stImage->face, + stImage->base.Face, dstLevel, stImage->base.Data, stImage->base.RowStride * @@ -1947,8 +1866,6 @@ st_init_texture_functions(struct dd_function_table *functions) functions->CompressedTexSubImage1D = st_CompressedTexSubImage1D; functions->CompressedTexSubImage2D = st_CompressedTexSubImage2D; functions->CompressedTexSubImage3D = st_CompressedTexSubImage3D; - functions->CopyTexImage1D = st_CopyTexImage1D; - functions->CopyTexImage2D = st_CopyTexImage2D; functions->CopyTexSubImage1D = st_CopyTexSubImage1D; functions->CopyTexSubImage2D = st_CopyTexSubImage2D; functions->CopyTexSubImage3D = st_CopyTexSubImage3D; diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 99b231d9706..8e900934054 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -208,6 +208,15 @@ void st_init_limits(struct st_context *st) } +static GLboolean st_get_s3tc_override(void) +{ + const char *override = _mesa_getenv("force_s3tc_enable"); + if (override && !strcmp(override, "true")) + return GL_TRUE; + return GL_FALSE; +} + + /** * Use pipe_screen::get_param() to query PIPE_CAP_ values to determine * which GL extensions are supported. @@ -219,6 +228,7 @@ void st_init_extensions(struct st_context *st) { struct pipe_screen *screen = st->pipe->screen; struct gl_context *ctx = st->ctx; + int i; /* * Extensions that are supported by all Gallium drivers: @@ -426,7 +436,7 @@ void st_init_extensions(struct st_context *st) if (screen->is_format_supported(screen, PIPE_FORMAT_DXT5_RGBA, PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW) && - ctx->Mesa_DXTn) { + (ctx->Mesa_DXTn || st_get_s3tc_override())) { ctx->Extensions.EXT_texture_compression_s3tc = GL_TRUE; ctx->Extensions.S3_s3tc = GL_TRUE; } @@ -596,6 +606,16 @@ void st_init_extensions(struct st_context *st) ctx->Extensions.EXT_packed_float = GL_TRUE; } + /* Maximum sample count. */ + for (i = 16; i > 0; --i) { + if (screen->is_format_supported(screen, PIPE_FORMAT_B8G8R8A8_UNORM, + PIPE_TEXTURE_2D, i, + PIPE_BIND_RENDER_TARGET)) { + ctx->Const.MaxSamples = i; + break; + } + } + if (screen->get_param(screen, PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE)) { ctx->Extensions.ARB_seamless_cube_map = GL_TRUE; ctx->Extensions.AMD_seamless_cubemap_per_texture = GL_TRUE; diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index b0911294a7c..82ca4af7fe4 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -453,7 +453,6 @@ st_generate_mipmap(struct gl_context *ctx, GLenum target, srcImage->TexFormat); stImage = st_texture_image(dstImage); - stImage->level = dstLevel; pipe_resource_reference(&stImage->pt, pt); } diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp new file mode 100644 index 00000000000..9cac30995af --- /dev/null +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -0,0 +1,5142 @@ +/* + * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. + * Copyright (C) 2008 VMware, Inc. All Rights Reserved. + * Copyright © 2010 Intel Corporation + * Copyright © 2011 Bryan Cain + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file glsl_to_tgsi.cpp + * + * Translate GLSL IR to TGSI. + */ + +#include <stdio.h> +#include "main/compiler.h" +#include "ir.h" +#include "ir_visitor.h" +#include "ir_print_visitor.h" +#include "ir_expression_flattening.h" +#include "glsl_types.h" +#include "glsl_parser_extras.h" +#include "../glsl/program.h" +#include "ir_optimization.h" +#include "ast.h" + +extern "C" { +#include "main/mtypes.h" +#include "main/shaderapi.h" +#include "main/shaderobj.h" +#include "main/uniforms.h" +#include "program/hash_table.h" +#include "program/prog_instruction.h" +#include "program/prog_optimize.h" +#include "program/prog_print.h" +#include "program/program.h" +#include "program/prog_uniform.h" +#include "program/prog_parameter.h" +#include "program/sampler.h" + +#include "pipe/p_compiler.h" +#include "pipe/p_context.h" +#include "pipe/p_screen.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_state.h" +#include "util/u_math.h" +#include "tgsi/tgsi_ureg.h" +#include "tgsi/tgsi_info.h" +#include "st_context.h" +#include "st_program.h" +#include "st_glsl_to_tgsi.h" +#include "st_mesa_to_tgsi.h" +} + +#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX +#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ + (1 << PROGRAM_ENV_PARAM) | \ + (1 << PROGRAM_STATE_VAR) | \ + (1 << PROGRAM_NAMED_PARAM) | \ + (1 << PROGRAM_CONSTANT) | \ + (1 << PROGRAM_UNIFORM)) + +#define MAX_TEMPS 4096 + +class st_src_reg; +class st_dst_reg; + +static int swizzle_for_size(int size); + +/** + * This struct is a corresponding struct to TGSI ureg_src. + */ +class st_src_reg { +public: + st_src_reg(gl_register_file file, int index, const glsl_type *type) + { + this->file = file; + this->index = index; + if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) + this->swizzle = swizzle_for_size(type->vector_elements); + else + this->swizzle = SWIZZLE_XYZW; + this->negate = 0; + this->type = type ? type->base_type : GLSL_TYPE_ERROR; + this->reladdr = NULL; + } + + st_src_reg(gl_register_file file, int index, int type) + { + this->type = type; + this->file = file; + this->index = index; + this->swizzle = SWIZZLE_XYZW; + this->negate = 0; + this->reladdr = NULL; + } + + st_src_reg() + { + this->type = GLSL_TYPE_ERROR; + this->file = PROGRAM_UNDEFINED; + this->index = 0; + this->swizzle = 0; + this->negate = 0; + this->reladdr = NULL; + } + + explicit st_src_reg(st_dst_reg reg); + + gl_register_file file; /**< PROGRAM_* from Mesa */ + int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ + GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ + int negate; /**< NEGATE_XYZW mask from mesa */ + int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ + /** Register index should be offset by the integer in this reg. */ + st_src_reg *reladdr; +}; + +class st_dst_reg { +public: + st_dst_reg(gl_register_file file, int writemask, int type) + { + this->file = file; + this->index = 0; + this->writemask = writemask; + this->cond_mask = COND_TR; + this->reladdr = NULL; + this->type = type; + } + + st_dst_reg() + { + this->type = GLSL_TYPE_ERROR; + this->file = PROGRAM_UNDEFINED; + this->index = 0; + this->writemask = 0; + this->cond_mask = COND_TR; + this->reladdr = NULL; + } + + explicit st_dst_reg(st_src_reg reg); + + gl_register_file file; /**< PROGRAM_* from Mesa */ + int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ + int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ + GLuint cond_mask:4; + int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ + /** Register index should be offset by the integer in this reg. */ + st_src_reg *reladdr; +}; + +st_src_reg::st_src_reg(st_dst_reg reg) +{ + this->type = reg.type; + this->file = reg.file; + this->index = reg.index; + this->swizzle = SWIZZLE_XYZW; + this->negate = 0; + this->reladdr = reg.reladdr; +} + +st_dst_reg::st_dst_reg(st_src_reg reg) +{ + this->type = reg.type; + this->file = reg.file; + this->index = reg.index; + this->writemask = WRITEMASK_XYZW; + this->cond_mask = COND_TR; + this->reladdr = reg.reladdr; +} + +class glsl_to_tgsi_instruction : public exec_node { +public: + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = rzalloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + unsigned op; + st_dst_reg dst; + st_src_reg src[3]; + /** Pointer to the ir source this tree came from for debugging */ + ir_instruction *ir; + GLboolean cond_update; + bool saturate; + int sampler; /**< sampler index */ + int tex_target; /**< One of TEXTURE_*_INDEX */ + GLboolean tex_shadow; + int dead_mask; /**< Used in dead code elimination */ + + class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */ +}; + +class variable_storage : public exec_node { +public: + variable_storage(ir_variable *var, gl_register_file file, int index) + : file(file), index(index), var(var) + { + /* empty */ + } + + gl_register_file file; + int index; + ir_variable *var; /* variable that maps to this, if any */ +}; + +class immediate_storage : public exec_node { +public: + immediate_storage(gl_constant_value *values, int size, int type) + { + memcpy(this->values, values, size * sizeof(gl_constant_value)); + this->size = size; + this->type = type; + } + + gl_constant_value values[4]; + int size; /**< Number of components (1-4) */ + int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ +}; + +class function_entry : public exec_node { +public: + ir_function_signature *sig; + + /** + * identifier of this function signature used by the program. + * + * At the point that TGSI instructions for function calls are + * generated, we don't know the address of the first instruction of + * the function body. So we make the BranchTarget that is called a + * small integer and rewrite them during set_branchtargets(). + */ + int sig_id; + + /** + * Pointer to first instruction of the function body. + * + * Set during function body emits after main() is processed. + */ + glsl_to_tgsi_instruction *bgn_inst; + + /** + * Index of the first instruction of the function body in actual TGSI. + * + * Set after conversion from glsl_to_tgsi_instruction to TGSI. + */ + int inst; + + /** Storage for the return value. */ + st_src_reg return_reg; +}; + +class glsl_to_tgsi_visitor : public ir_visitor { +public: + glsl_to_tgsi_visitor(); + ~glsl_to_tgsi_visitor(); + + function_entry *current_function; + + struct gl_context *ctx; + struct gl_program *prog; + struct gl_shader_program *shader_program; + struct gl_shader_compiler_options *options; + + int next_temp; + + int num_address_regs; + int samplers_used; + bool indirect_addr_temps; + bool indirect_addr_consts; + + int glsl_version; + bool native_integers; + + variable_storage *find_variable_storage(ir_variable *var); + + int add_constant(gl_register_file file, gl_constant_value values[4], + int size, int datatype, GLuint *swizzle_out); + + function_entry *get_function_signature(ir_function_signature *sig); + + st_src_reg get_temp(const glsl_type *type); + void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr); + + st_src_reg st_src_reg_for_float(float val); + st_src_reg st_src_reg_for_int(int val); + st_src_reg st_src_reg_for_type(int type, int val); + + /** + * \name Visit methods + * + * As typical for the visitor pattern, there must be one \c visit method for + * each concrete subclass of \c ir_instruction. Virtual base classes within + * the hierarchy should not have \c visit methods. + */ + /*@{*/ + virtual void visit(ir_variable *); + virtual void visit(ir_loop *); + virtual void visit(ir_loop_jump *); + virtual void visit(ir_function_signature *); + virtual void visit(ir_function *); + virtual void visit(ir_expression *); + virtual void visit(ir_swizzle *); + virtual void visit(ir_dereference_variable *); + virtual void visit(ir_dereference_array *); + virtual void visit(ir_dereference_record *); + virtual void visit(ir_assignment *); + virtual void visit(ir_constant *); + virtual void visit(ir_call *); + virtual void visit(ir_return *); + virtual void visit(ir_discard *); + virtual void visit(ir_texture *); + virtual void visit(ir_if *); + /*@}*/ + + st_src_reg result; + + /** List of variable_storage */ + exec_list variables; + + /** List of immediate_storage */ + exec_list immediates; + int num_immediates; + + /** List of function_entry */ + exec_list function_signatures; + int next_signature_id; + + /** List of glsl_to_tgsi_instruction */ + exec_list instructions; + + glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op); + + glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_src_reg src0); + + glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_src_reg src0, st_src_reg src1); + + glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, + st_dst_reg dst, + st_src_reg src0, st_src_reg src1, st_src_reg src2); + + unsigned get_opcode(ir_instruction *ir, unsigned op, + st_dst_reg dst, + st_src_reg src0, st_src_reg src1); + + /** + * Emit the correct dot-product instruction for the type of arguments + */ + glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir, + st_dst_reg dst, + st_src_reg src0, + st_src_reg src1, + unsigned elements); + + void emit_scalar(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_src_reg src0); + + void emit_scalar(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_src_reg src0, st_src_reg src1); + + void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0); + + void emit_scs(ir_instruction *ir, unsigned op, + st_dst_reg dst, const st_src_reg &src); + + bool try_emit_mad(ir_expression *ir, + int mul_operand); + bool try_emit_mad_for_and_not(ir_expression *ir, + int mul_operand); + bool try_emit_sat(ir_expression *ir); + + void emit_swz(ir_expression *ir); + + bool process_move_condition(ir_rvalue *ir); + + void remove_output_reads(gl_register_file type); + void simplify_cmp(void); + + void rename_temp_register(int index, int new_index); + int get_first_temp_read(int index); + int get_first_temp_write(int index); + int get_last_temp_read(int index); + int get_last_temp_write(int index); + + void copy_propagate(void); + void eliminate_dead_code(void); + int eliminate_dead_code_advanced(void); + void merge_registers(void); + void renumber_registers(void); + + void *mem_ctx; +}; + +static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); + +static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); + +static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT); + +static void +fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); + +static void +fail_link(struct gl_shader_program *prog, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + ralloc_vasprintf_append(&prog->InfoLog, fmt, args); + va_end(args); + + prog->LinkStatus = GL_FALSE; +} + +static int +swizzle_for_size(int size) +{ + int size_swizzles[4] = { + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), + }; + + assert((size >= 1) && (size <= 4)); + return size_swizzles[size - 1]; +} + +static bool +is_tex_instruction(unsigned opcode) +{ + const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); + return info->is_tex; +} + +static unsigned +num_inst_dst_regs(unsigned opcode) +{ + const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); + return info->num_dst; +} + +static unsigned +num_inst_src_regs(unsigned opcode) +{ + const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); + return info->is_tex ? info->num_src - 1 : info->num_src; +} + +glsl_to_tgsi_instruction * +glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, + st_dst_reg dst, + st_src_reg src0, st_src_reg src1, st_src_reg src2) +{ + glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction(); + int num_reladdr = 0, i; + + op = get_opcode(ir, op, dst, src0, src1); + + /* If we have to do relative addressing, we want to load the ARL + * reg directly for one of the regs, and preload the other reladdr + * sources into temps. + */ + num_reladdr += dst.reladdr != NULL; + num_reladdr += src0.reladdr != NULL; + num_reladdr += src1.reladdr != NULL; + num_reladdr += src2.reladdr != NULL; + + reladdr_to_temp(ir, &src2, &num_reladdr); + reladdr_to_temp(ir, &src1, &num_reladdr); + reladdr_to_temp(ir, &src0, &num_reladdr); + + if (dst.reladdr) { + emit_arl(ir, address_reg, *dst.reladdr); + num_reladdr--; + } + assert(num_reladdr == 0); + + inst->op = op; + inst->dst = dst; + inst->src[0] = src0; + inst->src[1] = src1; + inst->src[2] = src2; + inst->ir = ir; + inst->dead_mask = 0; + + inst->function = NULL; + + if (op == TGSI_OPCODE_ARL) + this->num_address_regs = 1; + + /* Update indirect addressing status used by TGSI */ + if (dst.reladdr) { + switch(dst.file) { + case PROGRAM_TEMPORARY: + this->indirect_addr_temps = true; + break; + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: + this->indirect_addr_consts = true; + break; + case PROGRAM_IMMEDIATE: + assert(!"immediates should not have indirect addressing"); + break; + default: + break; + } + } + else { + for (i=0; i<3; i++) { + if(inst->src[i].reladdr) { + switch(inst->src[i].file) { + case PROGRAM_TEMPORARY: + this->indirect_addr_temps = true; + break; + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: + this->indirect_addr_consts = true; + break; + case PROGRAM_IMMEDIATE: + assert(!"immediates should not have indirect addressing"); + break; + default: + break; + } + } + } + } + + this->instructions.push_tail(inst); + + return inst; +} + + +glsl_to_tgsi_instruction * +glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_src_reg src0, st_src_reg src1) +{ + return emit(ir, op, dst, src0, src1, undef_src); +} + +glsl_to_tgsi_instruction * +glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_src_reg src0) +{ + assert(dst.writemask != 0); + return emit(ir, op, dst, src0, undef_src, undef_src); +} + +glsl_to_tgsi_instruction * +glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op) +{ + return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); +} + +/** + * Determines whether to use an integer, unsigned integer, or float opcode + * based on the operands and input opcode, then emits the result. + * + * TODO: type checking for remaining TGSI opcodes + */ +unsigned +glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, + st_dst_reg dst, + st_src_reg src0, st_src_reg src1) +{ + int type = GLSL_TYPE_FLOAT; + + if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) + type = GLSL_TYPE_FLOAT; + else if (native_integers) + type = src0.type; + +#define case4(c, f, i, u) \ + case TGSI_OPCODE_##c: \ + if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \ + else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \ + else op = TGSI_OPCODE_##f; \ + break; +#define case3(f, i, u) case4(f, f, i, u) +#define case2fi(f, i) case4(f, f, i, i) +#define case2iu(i, u) case4(i, LAST, i, u) + + switch(op) { + case2fi(ADD, UADD); + case2fi(MUL, UMUL); + case2fi(MAD, UMAD); + case3(DIV, IDIV, UDIV); + case3(MAX, IMAX, UMAX); + case3(MIN, IMIN, UMIN); + case2iu(MOD, UMOD); + + case2fi(SEQ, USEQ); + case2fi(SNE, USNE); + case3(SGE, ISGE, USGE); + case3(SLT, ISLT, USLT); + + case2iu(SHL, SHL); + case2iu(ISHR, USHR); + case2iu(NOT, NOT); + case2iu(AND, AND); + case2iu(OR, OR); + case2iu(XOR, XOR); + + default: break; + } + + assert(op != TGSI_OPCODE_LAST); + return op; +} + +glsl_to_tgsi_instruction * +glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, + st_dst_reg dst, st_src_reg src0, st_src_reg src1, + unsigned elements) +{ + static const unsigned dot_opcodes[] = { + TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4 + }; + + return emit(ir, dot_opcodes[elements - 2], dst, src0, src1); +} + +/** + * Emits TGSI scalar opcodes to produce unique answers across channels. + * + * Some TGSI opcodes are scalar-only, like ARB_fp/vp. The src X + * channel determines the result across all channels. So to do a vec4 + * of this operation, we want to emit a scalar per source channel used + * to produce dest channels. + */ +void +glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, + st_dst_reg dst, + st_src_reg orig_src0, st_src_reg orig_src1) +{ + int i, j; + int done_mask = ~dst.writemask; + + /* TGSI RCP is a scalar operation splatting results to all channels, + * like ARB_fp/vp. So emit as many RCPs as necessary to cover our + * dst channels. + */ + for (i = 0; i < 4; i++) { + GLuint this_mask = (1 << i); + glsl_to_tgsi_instruction *inst; + st_src_reg src0 = orig_src0; + st_src_reg src1 = orig_src1; + + if (done_mask & this_mask) + continue; + + GLuint src0_swiz = GET_SWZ(src0.swizzle, i); + GLuint src1_swiz = GET_SWZ(src1.swizzle, i); + for (j = i + 1; j < 4; j++) { + /* If there is another enabled component in the destination that is + * derived from the same inputs, generate its value on this pass as + * well. + */ + if (!(done_mask & (1 << j)) && + GET_SWZ(src0.swizzle, j) == src0_swiz && + GET_SWZ(src1.swizzle, j) == src1_swiz) { + this_mask |= (1 << j); + } + } + src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, + src0_swiz, src0_swiz); + src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, + src1_swiz, src1_swiz); + + inst = emit(ir, op, dst, src0, src1); + inst->dst.writemask = this_mask; + done_mask |= this_mask; + } +} + +void +glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_src_reg src0) +{ + st_src_reg undef = undef_src; + + undef.swizzle = SWIZZLE_XXXX; + + emit_scalar(ir, op, dst, src0, undef); +} + +void +glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, + st_dst_reg dst, st_src_reg src0) +{ + st_src_reg tmp = get_temp(glsl_type::float_type); + + if (src0.type == GLSL_TYPE_INT) + emit(NULL, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0); + else if (src0.type == GLSL_TYPE_UINT) + emit(NULL, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0); + else + tmp = src0; + + emit(NULL, TGSI_OPCODE_ARL, dst, tmp); +} + +/** + * Emit an TGSI_OPCODE_SCS instruction + * + * The \c SCS opcode functions a bit differently than the other TGSI opcodes. + * Instead of splatting its result across all four components of the + * destination, it writes one value to the \c x component and another value to + * the \c y component. + * + * \param ir IR instruction being processed + * \param op Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending + * on which value is desired. + * \param dst Destination register + * \param src Source register + */ +void +glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, + st_dst_reg dst, + const st_src_reg &src) +{ + /* Vertex programs cannot use the SCS opcode. + */ + if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) { + emit_scalar(ir, op, dst, src); + return; + } + + const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1; + const unsigned scs_mask = (1U << component); + int done_mask = ~dst.writemask; + st_src_reg tmp; + + assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS); + + /* If there are compnents in the destination that differ from the component + * that will be written by the SCS instrution, we'll need a temporary. + */ + if (scs_mask != unsigned(dst.writemask)) { + tmp = get_temp(glsl_type::vec4_type); + } + + for (unsigned i = 0; i < 4; i++) { + unsigned this_mask = (1U << i); + st_src_reg src0 = src; + + if ((done_mask & this_mask) != 0) + continue; + + /* The source swizzle specified which component of the source generates + * sine / cosine for the current component in the destination. The SCS + * instruction requires that this value be swizzle to the X component. + * Replace the current swizzle with a swizzle that puts the source in + * the X component. + */ + unsigned src0_swiz = GET_SWZ(src.swizzle, i); + + src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, + src0_swiz, src0_swiz); + for (unsigned j = i + 1; j < 4; j++) { + /* If there is another enabled component in the destination that is + * derived from the same inputs, generate its value on this pass as + * well. + */ + if (!(done_mask & (1 << j)) && + GET_SWZ(src0.swizzle, j) == src0_swiz) { + this_mask |= (1 << j); + } + } + + if (this_mask != scs_mask) { + glsl_to_tgsi_instruction *inst; + st_dst_reg tmp_dst = st_dst_reg(tmp); + + /* Emit the SCS instruction. + */ + inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0); + inst->dst.writemask = scs_mask; + + /* Move the result of the SCS instruction to the desired location in + * the destination. + */ + tmp.swizzle = MAKE_SWIZZLE4(component, component, + component, component); + inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp); + inst->dst.writemask = this_mask; + } else { + /* Emit the SCS instruction to write directly to the destination. + */ + glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0); + inst->dst.writemask = scs_mask; + } + + done_mask |= this_mask; + } +} + +int +glsl_to_tgsi_visitor::add_constant(gl_register_file file, + gl_constant_value values[4], int size, int datatype, + GLuint *swizzle_out) +{ + if (file == PROGRAM_CONSTANT) { + return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values, + size, datatype, swizzle_out); + } else { + int index = 0; + immediate_storage *entry; + assert(file == PROGRAM_IMMEDIATE); + + /* Search immediate storage to see if we already have an identical + * immediate that we can use instead of adding a duplicate entry. + */ + foreach_iter(exec_list_iterator, iter, this->immediates) { + entry = (immediate_storage *)iter.get(); + + if (entry->size == size && + entry->type == datatype && + !memcmp(entry->values, values, size * sizeof(gl_constant_value))) { + return index; + } + index++; + } + + /* Add this immediate to the list. */ + entry = new(mem_ctx) immediate_storage(values, size, datatype); + this->immediates.push_tail(entry); + this->num_immediates++; + return index; + } +} + +st_src_reg +glsl_to_tgsi_visitor::st_src_reg_for_float(float val) +{ + st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT); + union gl_constant_value uval; + + uval.f = val; + src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle); + + return src; +} + +st_src_reg +glsl_to_tgsi_visitor::st_src_reg_for_int(int val) +{ + st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT); + union gl_constant_value uval; + + assert(native_integers); + + uval.i = val; + src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle); + + return src; +} + +st_src_reg +glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val) +{ + if (native_integers) + return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : + st_src_reg_for_int(val); + else + return st_src_reg_for_float(val); +} + +static int +type_size(const struct glsl_type *type) +{ + unsigned int i; + int size; + + switch (type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + if (type->is_matrix()) { + return type->matrix_columns; + } else { + /* Regardless of size of vector, it gets a vec4. This is bad + * packing for things like floats, but otherwise arrays become a + * mess. Hopefully a later pass over the code can pack scalars + * down if appropriate. + */ + return 1; + } + case GLSL_TYPE_ARRAY: + assert(type->length > 0); + return type_size(type->fields.array) * type->length; + case GLSL_TYPE_STRUCT: + size = 0; + for (i = 0; i < type->length; i++) { + size += type_size(type->fields.structure[i].type); + } + return size; + case GLSL_TYPE_SAMPLER: + /* Samplers take up one slot in UNIFORMS[], but they're baked in + * at link time. + */ + return 1; + default: + assert(0); + return 0; + } +} + +/** + * In the initial pass of codegen, we assign temporary numbers to + * intermediate results. (not SSA -- variable assignments will reuse + * storage). + */ +st_src_reg +glsl_to_tgsi_visitor::get_temp(const glsl_type *type) +{ + st_src_reg src; + + src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT; + src.file = PROGRAM_TEMPORARY; + src.index = next_temp; + src.reladdr = NULL; + next_temp += type_size(type); + + if (type->is_array() || type->is_record()) { + src.swizzle = SWIZZLE_NOOP; + } else { + src.swizzle = swizzle_for_size(type->vector_elements); + } + src.negate = 0; + + return src; +} + +variable_storage * +glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var) +{ + + variable_storage *entry; + + foreach_iter(exec_list_iterator, iter, this->variables) { + entry = (variable_storage *)iter.get(); + + if (entry->var == var) + return entry; + } + + return NULL; +} + +void +glsl_to_tgsi_visitor::visit(ir_variable *ir) +{ + if (strcmp(ir->name, "gl_FragCoord") == 0) { + struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; + + fp->OriginUpperLeft = ir->origin_upper_left; + fp->PixelCenterInteger = ir->pixel_center_integer; + + } else if (strcmp(ir->name, "gl_FragDepth") == 0) { + struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; + switch (ir->depth_layout) { + case ir_depth_layout_none: + fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE; + break; + case ir_depth_layout_any: + fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY; + break; + case ir_depth_layout_greater: + fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER; + break; + case ir_depth_layout_less: + fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS; + break; + case ir_depth_layout_unchanged: + fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED; + break; + default: + assert(0); + break; + } + } + + if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { + unsigned int i; + const ir_state_slot *const slots = ir->state_slots; + assert(ir->state_slots != NULL); + + /* Check if this statevar's setup in the STATE file exactly + * matches how we'll want to reference it as a + * struct/array/whatever. If not, then we need to move it into + * temporary storage and hope that it'll get copy-propagated + * out. + */ + for (i = 0; i < ir->num_state_slots; i++) { + if (slots[i].swizzle != SWIZZLE_XYZW) { + break; + } + } + + variable_storage *storage; + st_dst_reg dst; + if (i == ir->num_state_slots) { + /* We'll set the index later. */ + storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); + this->variables.push_tail(storage); + + dst = undef_dst; + } else { + /* The variable_storage constructor allocates slots based on the size + * of the type. However, this had better match the number of state + * elements that we're going to copy into the new temporary. + */ + assert((int) ir->num_state_slots == type_size(ir->type)); + + storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY, + this->next_temp); + this->variables.push_tail(storage); + this->next_temp += type_size(ir->type); + + dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, + native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT)); + } + + + for (unsigned int i = 0; i < ir->num_state_slots; i++) { + int index = _mesa_add_state_reference(this->prog->Parameters, + (gl_state_index *)slots[i].tokens); + + if (storage->file == PROGRAM_STATE_VAR) { + if (storage->index == -1) { + storage->index = index; + } else { + assert(index == storage->index + (int)i); + } + } else { + st_src_reg src(PROGRAM_STATE_VAR, index, + native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT); + src.swizzle = slots[i].swizzle; + emit(ir, TGSI_OPCODE_MOV, dst, src); + /* even a float takes up a whole vec4 reg in a struct/array. */ + dst.index++; + } + } + + if (storage->file == PROGRAM_TEMPORARY && + dst.index != storage->index + (int) ir->num_state_slots) { + fail_link(this->shader_program, + "failed to load builtin uniform `%s' (%d/%d regs loaded)\n", + ir->name, dst.index - storage->index, + type_size(ir->type)); + } + } +} + +void +glsl_to_tgsi_visitor::visit(ir_loop *ir) +{ + ir_dereference_variable *counter = NULL; + + if (ir->counter != NULL) + counter = new(ir) ir_dereference_variable(ir->counter); + + if (ir->from != NULL) { + assert(ir->counter != NULL); + + ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); + + a->accept(this); + delete a; + } + + emit(NULL, TGSI_OPCODE_BGNLOOP); + + if (ir->to) { + ir_expression *e = + new(ir) ir_expression(ir->cmp, glsl_type::bool_type, + counter, ir->to); + ir_if *if_stmt = new(ir) ir_if(e); + + ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); + + if_stmt->then_instructions.push_tail(brk); + + if_stmt->accept(this); + + delete if_stmt; + delete e; + delete brk; + } + + visit_exec_list(&ir->body_instructions, this); + + if (ir->increment) { + ir_expression *e = + new(ir) ir_expression(ir_binop_add, counter->type, + counter, ir->increment); + + ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); + + a->accept(this); + delete a; + delete e; + } + + emit(NULL, TGSI_OPCODE_ENDLOOP); +} + +void +glsl_to_tgsi_visitor::visit(ir_loop_jump *ir) +{ + switch (ir->mode) { + case ir_loop_jump::jump_break: + emit(NULL, TGSI_OPCODE_BRK); + break; + case ir_loop_jump::jump_continue: + emit(NULL, TGSI_OPCODE_CONT); + break; + } +} + + +void +glsl_to_tgsi_visitor::visit(ir_function_signature *ir) +{ + assert(0); + (void)ir; +} + +void +glsl_to_tgsi_visitor::visit(ir_function *ir) +{ + /* Ignore function bodies other than main() -- we shouldn't see calls to + * them since they should all be inlined before we get to glsl_to_tgsi. + */ + if (strcmp(ir->name, "main") == 0) { + const ir_function_signature *sig; + exec_list empty; + + sig = ir->matching_signature(&empty); + + assert(sig); + + foreach_iter(exec_list_iterator, iter, sig->body) { + ir_instruction *ir = (ir_instruction *)iter.get(); + + ir->accept(this); + } + } +} + +bool +glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) +{ + int nonmul_operand = 1 - mul_operand; + st_src_reg a, b, c; + st_dst_reg result_dst; + + ir_expression *expr = ir->operands[mul_operand]->as_expression(); + if (!expr || expr->operation != ir_binop_mul) + return false; + + expr->operands[0]->accept(this); + a = this->result; + expr->operands[1]->accept(this); + b = this->result; + ir->operands[nonmul_operand]->accept(this); + c = this->result; + + this->result = get_temp(ir->type); + result_dst = st_dst_reg(this->result); + result_dst.writemask = (1 << ir->type->vector_elements) - 1; + emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c); + + return true; +} + +/** + * Emit MAD(a, -b, a) instead of AND(a, NOT(b)) + * + * The logic values are 1.0 for true and 0.0 for false. Logical-and is + * implemented using multiplication, and logical-or is implemented using + * addition. Logical-not can be implemented as (true - x), or (1.0 - x). + * As result, the logical expression (a & !b) can be rewritten as: + * + * - a * !b + * - a * (1 - b) + * - (a * 1) - (a * b) + * - a + -(a * b) + * - a + (a * -b) + * + * This final expression can be implemented as a single MAD(a, -b, a) + * instruction. + */ +bool +glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) +{ + const int other_operand = 1 - try_operand; + st_src_reg a, b; + + ir_expression *expr = ir->operands[try_operand]->as_expression(); + if (!expr || expr->operation != ir_unop_logic_not) + return false; + + ir->operands[other_operand]->accept(this); + a = this->result; + expr->operands[0]->accept(this); + b = this->result; + + b.negate = ~b.negate; + + this->result = get_temp(ir->type); + emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a); + + return true; +} + +bool +glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) +{ + /* Saturates were only introduced to vertex programs in + * NV_vertex_program3, so don't give them to drivers in the VP. + */ + if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) + return false; + + ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); + if (!sat_src) + return false; + + sat_src->accept(this); + st_src_reg src = this->result; + + /* If we generated an expression instruction into a temporary in + * processing the saturate's operand, apply the saturate to that + * instruction. Otherwise, generate a MOV to do the saturate. + * + * Note that we have to be careful to only do this optimization if + * the instruction in question was what generated src->result. For + * example, ir_dereference_array might generate a MUL instruction + * to create the reladdr, and return us a src reg using that + * reladdr. That MUL result is not the value we're trying to + * saturate. + */ + ir_expression *sat_src_expr = sat_src->as_expression(); + if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul || + sat_src_expr->operation == ir_binop_add || + sat_src_expr->operation == ir_binop_dot)) { + glsl_to_tgsi_instruction *new_inst; + new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); + new_inst->saturate = true; + } else { + this->result = get_temp(ir->type); + st_dst_reg result_dst = st_dst_reg(this->result); + result_dst.writemask = (1 << ir->type->vector_elements) - 1; + glsl_to_tgsi_instruction *inst; + inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src); + inst->saturate = true; + } + + return true; +} + +void +glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, + st_src_reg *reg, int *num_reladdr) +{ + if (!reg->reladdr) + return; + + emit_arl(ir, address_reg, *reg->reladdr); + + if (*num_reladdr != 1) { + st_src_reg temp = get_temp(glsl_type::vec4_type); + + emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg); + *reg = temp; + } + + (*num_reladdr)--; +} + +void +glsl_to_tgsi_visitor::visit(ir_expression *ir) +{ + unsigned int operand; + st_src_reg op[Elements(ir->operands)]; + st_src_reg result_src; + st_dst_reg result_dst; + + /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c) + */ + if (ir->operation == ir_binop_add) { + if (try_emit_mad(ir, 1)) + return; + if (try_emit_mad(ir, 0)) + return; + } + + /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) + */ + if (ir->operation == ir_binop_logic_and) { + if (try_emit_mad_for_and_not(ir, 1)) + return; + if (try_emit_mad_for_and_not(ir, 0)) + return; + } + + if (try_emit_sat(ir)) + return; + + if (ir->operation == ir_quadop_vector) + assert(!"ir_quadop_vector should have been lowered"); + + for (operand = 0; operand < ir->get_num_operands(); operand++) { + this->result.file = PROGRAM_UNDEFINED; + ir->operands[operand]->accept(this); + if (this->result.file == PROGRAM_UNDEFINED) { + ir_print_visitor v; + printf("Failed to get tree for expression operand:\n"); + ir->operands[operand]->accept(&v); + exit(1); + } + op[operand] = this->result; + + /* Matrix expression operands should have been broken down to vector + * operations already. + */ + assert(!ir->operands[operand]->type->is_matrix()); + } + + int vector_elements = ir->operands[0]->type->vector_elements; + if (ir->operands[1]) { + vector_elements = MAX2(vector_elements, + ir->operands[1]->type->vector_elements); + } + + this->result.file = PROGRAM_UNDEFINED; + + /* Storage for our result. Ideally for an assignment we'd be using + * the actual storage for the result here, instead. + */ + result_src = get_temp(ir->type); + /* convenience for the emit functions below. */ + result_dst = st_dst_reg(result_src); + /* Limit writes to the channels that will be used by result_src later. + * This does limit this temp's use as a temporary for multi-instruction + * sequences. + */ + result_dst.writemask = (1 << ir->type->vector_elements) - 1; + + switch (ir->operation) { + case ir_unop_logic_not: + if (result_dst.type != GLSL_TYPE_FLOAT) + emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0)); + else { + /* Previously 'SEQ dst, src, 0.0' was used for this. However, many + * older GPUs implement SEQ using multiple instructions (i915 uses two + * SGE instructions and a MUL instruction). Since our logic values are + * 0.0 and 1.0, 1-x also implements !x. + */ + op[0].negate = ~op[0].negate; + emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0)); + } + break; + case ir_unop_neg: + assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT); + if (result_dst.type == GLSL_TYPE_INT) + emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); + else { + op[0].negate = ~op[0].negate; + result_src = op[0]; + } + break; + case ir_unop_abs: + assert(result_dst.type == GLSL_TYPE_FLOAT); + emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]); + break; + case ir_unop_sign: + emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]); + break; + case ir_unop_rcp: + emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]); + break; + + case ir_unop_exp2: + emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]); + break; + case ir_unop_exp: + case ir_unop_log: + assert(!"not reached: should be handled by ir_explog_to_explog2"); + break; + case ir_unop_log2: + emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]); + break; + case ir_unop_sin: + emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]); + break; + case ir_unop_cos: + emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]); + break; + case ir_unop_sin_reduced: + emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]); + break; + case ir_unop_cos_reduced: + emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]); + break; + + case ir_unop_dFdx: + emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]); + break; + case ir_unop_dFdy: + op[0].negate = ~op[0].negate; + emit(ir, TGSI_OPCODE_DDY, result_dst, op[0]); + break; + + case ir_unop_noise: { + /* At some point, a motivated person could add a better + * implementation of noise. Currently not even the nvidia + * binary drivers do anything more than this. In any case, the + * place to do this is in the GL state tracker, not the poor + * driver. + */ + emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); + break; + } + + case ir_binop_add: + emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); + break; + case ir_binop_sub: + emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]); + break; + + case ir_binop_mul: + emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); + break; + case ir_binop_div: + if (result_dst.type == GLSL_TYPE_FLOAT) + assert(!"not reached: should be handled by ir_div_to_mul_rcp"); + else + emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); + break; + case ir_binop_mod: + if (result_dst.type == GLSL_TYPE_FLOAT) + assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); + else + emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]); + break; + + case ir_binop_less: + emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]); + break; + case ir_binop_greater: + emit(ir, TGSI_OPCODE_SGT, result_dst, op[0], op[1]); + break; + case ir_binop_lequal: + emit(ir, TGSI_OPCODE_SLE, result_dst, op[0], op[1]); + break; + case ir_binop_gequal: + emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); + break; + case ir_binop_equal: + emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); + break; + case ir_binop_nequal: + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); + break; + case ir_binop_all_equal: + /* "==" operator producing a scalar boolean. */ + if (ir->operands[0]->type->is_vector() || + ir->operands[1]->type->is_vector()) { + st_src_reg temp = get_temp(native_integers ? + glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : + glsl_type::vec4_type); + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); + + /* After the dot-product, the value will be an integer on the + * range [0,4]. Zero becomes 1.0, and positive values become zero. + */ + emit_dp(ir, result_dst, temp, temp, vector_elements); + + if (result_dst.type == GLSL_TYPE_FLOAT) { + /* Negating the result of the dot-product gives values on the range + * [-4, 0]. Zero becomes 1.0, and negative values become zero. + * This is achieved using SGE. + */ + st_src_reg sge_src = result_src; + sge_src.negate = ~sge_src.negate; + emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0)); + } else { + /* The TGSI negate flag doesn't work for integers, so use SEQ 0 + * instead. + */ + emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_int(0)); + } + } else { + emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); + } + break; + case ir_binop_any_nequal: + /* "!=" operator producing a scalar boolean. */ + if (ir->operands[0]->type->is_vector() || + ir->operands[1]->type->is_vector()) { + st_src_reg temp = get_temp(native_integers ? + glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : + glsl_type::vec4_type); + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); + + /* After the dot-product, the value will be an integer on the + * range [0,4]. Zero stays zero, and positive values become 1.0. + */ + glsl_to_tgsi_instruction *const dp = + emit_dp(ir, result_dst, temp, temp, vector_elements); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && + result_dst.type == GLSL_TYPE_FLOAT) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate. + */ + dp->saturate = true; + } else if (result_dst.type == GLSL_TYPE_FLOAT) { + /* Negating the result of the dot-product gives values on the range + * [-4, 0]. Zero stays zero, and negative values become 1.0. This + * achieved using SLT. + */ + st_src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); + } else { + emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); + } + } else { + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); + } + break; + + case ir_unop_any: { + assert(ir->operands[0]->type->is_vector()); + + /* After the dot-product, the value will be an integer on the + * range [0,4]. Zero stays zero, and positive values become 1.0. + */ + glsl_to_tgsi_instruction *const dp = + emit_dp(ir, result_dst, op[0], op[0], + ir->operands[0]->type->vector_elements); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && + result_dst.type == GLSL_TYPE_FLOAT) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate. + */ + dp->saturate = true; + } else if (result_dst.type == GLSL_TYPE_FLOAT) { + /* Negating the result of the dot-product gives values on the range + * [-4, 0]. Zero stays zero, and negative values become 1.0. This + * is achieved using SLT. + */ + st_src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); + } + else { + /* Use SNE 0 if integers are being used as boolean values. */ + emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); + } + break; + } + + case ir_binop_logic_xor: + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); + break; + + case ir_binop_logic_or: { + /* After the addition, the value will be an integer on the + * range [0,2]. Zero stays zero, and positive values become 1.0. + */ + glsl_to_tgsi_instruction *add = + emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && + result_dst.type == GLSL_TYPE_FLOAT) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate if floats are being used as boolean values. + */ + add->saturate = true; + } else if (result_dst.type == GLSL_TYPE_FLOAT) { + /* Negating the result of the addition gives values on the range + * [-2, 0]. Zero stays zero, and negative values become 1.0. This + * is achieved using SLT. + */ + st_src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); + } else { + /* Use an SNE on the result of the addition. Zero stays zero, + * 1 stays 1, and 2 becomes 1. + */ + emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); + } + break; + } + + case ir_binop_logic_and: + /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ + emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); + break; + + case ir_binop_dot: + assert(ir->operands[0]->type->is_vector()); + assert(ir->operands[0]->type == ir->operands[1]->type); + emit_dp(ir, result_dst, op[0], op[1], + ir->operands[0]->type->vector_elements); + break; + + case ir_unop_sqrt: + /* sqrt(x) = x * rsq(x). */ + emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); + emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]); + /* For incoming channels <= 0, set the result to 0. */ + op[0].negate = ~op[0].negate; + emit(ir, TGSI_OPCODE_CMP, result_dst, + op[0], result_src, st_src_reg_for_float(0.0)); + break; + case ir_unop_rsq: + emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); + break; + case ir_unop_i2f: + case ir_unop_b2f: + if (native_integers) { + emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]); + break; + } + case ir_unop_i2u: + case ir_unop_u2i: + /* Converting between signed and unsigned integers is a no-op. */ + case ir_unop_b2i: + /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). */ + result_src = op[0]; + break; + case ir_unop_f2i: + if (native_integers) + emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]); + else + emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); + break; + case ir_unop_f2b: + case ir_unop_i2b: + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], + st_src_reg_for_type(result_dst.type, 0)); + break; + case ir_unop_trunc: + emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); + break; + case ir_unop_ceil: + op[0].negate = ~op[0].negate; + emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); + result_src.negate = ~result_src.negate; + break; + case ir_unop_floor: + emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); + break; + case ir_unop_fract: + emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]); + break; + + case ir_binop_min: + emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]); + break; + case ir_binop_max: + emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]); + break; + case ir_binop_pow: + emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]); + break; + + case ir_unop_bit_not: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); + break; + } + case ir_unop_u2f: + if (native_integers) { + emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]); + break; + } + case ir_binop_lshift: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]); + break; + } + case ir_binop_rshift: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]); + break; + } + case ir_binop_bit_and: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_AND, result_dst, op[0]); + break; + } + case ir_binop_bit_xor: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]); + break; + } + case ir_binop_bit_or: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_OR, result_dst, op[0]); + break; + } + case ir_unop_round_even: + assert(!"GLSL 1.30 features unsupported"); + break; + + case ir_quadop_vector: + /* This operation should have already been handled. + */ + assert(!"Should not get here."); + break; + } + + this->result = result_src; +} + + +void +glsl_to_tgsi_visitor::visit(ir_swizzle *ir) +{ + st_src_reg src; + int i; + int swizzle[4]; + + /* Note that this is only swizzles in expressions, not those on the left + * hand side of an assignment, which do write masking. See ir_assignment + * for that. + */ + + ir->val->accept(this); + src = this->result; + assert(src.file != PROGRAM_UNDEFINED); + + for (i = 0; i < 4; i++) { + if (i < ir->type->vector_elements) { + switch (i) { + case 0: + swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); + break; + case 1: + swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); + break; + case 2: + swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); + break; + case 3: + swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); + break; + } + } else { + /* If the type is smaller than a vec4, replicate the last + * channel out. + */ + swizzle[i] = swizzle[ir->type->vector_elements - 1]; + } + } + + src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); + + this->result = src; +} + +void +glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) +{ + variable_storage *entry = find_variable_storage(ir->var); + ir_variable *var = ir->var; + + if (!entry) { + switch (var->mode) { + case ir_var_uniform: + entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, + var->location); + this->variables.push_tail(entry); + break; + case ir_var_in: + case ir_var_inout: + /* The linker assigns locations for varyings and attributes, + * including deprecated builtins (like gl_Color), user-assign + * generic attributes (glBindVertexLocation), and + * user-defined varyings. + * + * FINISHME: We would hit this path for function arguments. Fix! + */ + assert(var->location != -1); + entry = new(mem_ctx) variable_storage(var, + PROGRAM_INPUT, + var->location); + if (this->prog->Target == GL_VERTEX_PROGRAM_ARB && + var->location >= VERT_ATTRIB_GENERIC0) { + _mesa_add_attribute(this->prog->Attributes, + var->name, + _mesa_sizeof_glsl_type(var->type->gl_type), + var->type->gl_type, + var->location - VERT_ATTRIB_GENERIC0); + } + break; + case ir_var_out: + assert(var->location != -1); + entry = new(mem_ctx) variable_storage(var, + PROGRAM_OUTPUT, + var->location); + break; + case ir_var_system_value: + entry = new(mem_ctx) variable_storage(var, + PROGRAM_SYSTEM_VALUE, + var->location); + break; + case ir_var_auto: + case ir_var_temporary: + entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY, + this->next_temp); + this->variables.push_tail(entry); + + next_temp += type_size(var->type); + break; + } + + if (!entry) { + printf("Failed to make storage for %s\n", var->name); + exit(1); + } + } + + this->result = st_src_reg(entry->file, entry->index, var->type); + if (!native_integers) + this->result.type = GLSL_TYPE_FLOAT; +} + +void +glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) +{ + ir_constant *index; + st_src_reg src; + int element_size = type_size(ir->type); + + index = ir->array_index->constant_expression_value(); + + ir->array->accept(this); + src = this->result; + + if (index) { + src.index += index->value.i[0] * element_size; + } else { + /* Variable index array dereference. It eats the "vec4" of the + * base of the array and an index that offsets the TGSI register + * index. + */ + ir->array_index->accept(this); + + st_src_reg index_reg; + + if (element_size == 1) { + index_reg = this->result; + } else { + index_reg = get_temp(glsl_type::float_type); + + emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg), + this->result, st_src_reg_for_float(element_size)); + } + + /* If there was already a relative address register involved, add the + * new and the old together to get the new offset. + */ + if (src.reladdr != NULL) { + st_src_reg accum_reg = get_temp(glsl_type::float_type); + + emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg), + index_reg, *src.reladdr); + + index_reg = accum_reg; + } + + src.reladdr = ralloc(mem_ctx, st_src_reg); + memcpy(src.reladdr, &index_reg, sizeof(index_reg)); + } + + /* If the type is smaller than a vec4, replicate the last channel out. */ + if (ir->type->is_scalar() || ir->type->is_vector()) + src.swizzle = swizzle_for_size(ir->type->vector_elements); + else + src.swizzle = SWIZZLE_NOOP; + + this->result = src; +} + +void +glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) +{ + unsigned int i; + const glsl_type *struct_type = ir->record->type; + int offset = 0; + + ir->record->accept(this); + + for (i = 0; i < struct_type->length; i++) { + if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) + break; + offset += type_size(struct_type->fields.structure[i].type); + } + + /* If the type is smaller than a vec4, replicate the last channel out. */ + if (ir->type->is_scalar() || ir->type->is_vector()) + this->result.swizzle = swizzle_for_size(ir->type->vector_elements); + else + this->result.swizzle = SWIZZLE_NOOP; + + this->result.index += offset; +} + +/** + * We want to be careful in assignment setup to hit the actual storage + * instead of potentially using a temporary like we might with the + * ir_dereference handler. + */ +static st_dst_reg +get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v) +{ + /* The LHS must be a dereference. If the LHS is a variable indexed array + * access of a vector, it must be separated into a series conditional moves + * before reaching this point (see ir_vec_index_to_cond_assign). + */ + assert(ir->as_dereference()); + ir_dereference_array *deref_array = ir->as_dereference_array(); + if (deref_array) { + assert(!deref_array->array->type->is_vector()); + } + + /* Use the rvalue deref handler for the most part. We'll ignore + * swizzles in it and write swizzles using writemask, though. + */ + ir->accept(v); + return st_dst_reg(v->result); +} + +/** + * Process the condition of a conditional assignment + * + * Examines the condition of a conditional assignment to generate the optimal + * first operand of a \c CMP instruction. If the condition is a relational + * operator with 0 (e.g., \c ir_binop_less), the value being compared will be + * used as the source for the \c CMP instruction. Otherwise the comparison + * is processed to a boolean result, and the boolean result is used as the + * operand to the CMP instruction. + */ +bool +glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir) +{ + ir_rvalue *src_ir = ir; + bool negate = true; + bool switch_order = false; + + ir_expression *const expr = ir->as_expression(); + if ((expr != NULL) && (expr->get_num_operands() == 2)) { + bool zero_on_left = false; + + if (expr->operands[0]->is_zero()) { + src_ir = expr->operands[1]; + zero_on_left = true; + } else if (expr->operands[1]->is_zero()) { + src_ir = expr->operands[0]; + zero_on_left = false; + } + + /* a is - 0 + - 0 + + * (a < 0) T F F ( a < 0) T F F + * (0 < a) F F T (-a < 0) F F T + * (a <= 0) T T F (-a < 0) F F T (swap order of other operands) + * (0 <= a) F T T ( a < 0) T F F (swap order of other operands) + * (a > 0) F F T (-a < 0) F F T + * (0 > a) T F F ( a < 0) T F F + * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) + * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) + * + * Note that exchanging the order of 0 and 'a' in the comparison simply + * means that the value of 'a' should be negated. + */ + if (src_ir != ir) { + switch (expr->operation) { + case ir_binop_less: + switch_order = false; + negate = zero_on_left; + break; + + case ir_binop_greater: + switch_order = false; + negate = !zero_on_left; + break; + + case ir_binop_lequal: + switch_order = true; + negate = !zero_on_left; + break; + + case ir_binop_gequal: + switch_order = true; + negate = zero_on_left; + break; + + default: + /* This isn't the right kind of comparison afterall, so make sure + * the whole condition is visited. + */ + src_ir = ir; + break; + } + } + } + + src_ir->accept(this); + + /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the + * condition we produced is 0.0 or 1.0. By flipping the sign, we can + * choose which value TGSI_OPCODE_CMP produces without an extra instruction + * computing the condition. + */ + if (negate) + this->result.negate = ~this->result.negate; + + return switch_order; +} + +void +glsl_to_tgsi_visitor::visit(ir_assignment *ir) +{ + st_dst_reg l; + st_src_reg r; + int i; + + ir->rhs->accept(this); + r = this->result; + + l = get_assignment_lhs(ir->lhs, this); + + /* FINISHME: This should really set to the correct maximal writemask for each + * FINISHME: component written (in the loops below). This case can only + * FINISHME: occur for matrices, arrays, and structures. + */ + if (ir->write_mask == 0) { + assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); + l.writemask = WRITEMASK_XYZW; + } else if (ir->lhs->type->is_scalar() && + ir->lhs->variable_referenced()->mode == ir_var_out) { + /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the + * FINISHME: W component of fragment shader output zero, work correctly. + */ + l.writemask = WRITEMASK_XYZW; + } else { + int swizzles[4]; + int first_enabled_chan = 0; + int rhs_chan = 0; + + l.writemask = ir->write_mask; + + for (int i = 0; i < 4; i++) { + if (l.writemask & (1 << i)) { + first_enabled_chan = GET_SWZ(r.swizzle, i); + break; + } + } + + /* Swizzle a small RHS vector into the channels being written. + * + * glsl ir treats write_mask as dictating how many channels are + * present on the RHS while TGSI treats write_mask as just + * showing which channels of the vec4 RHS get written. + */ + for (int i = 0; i < 4; i++) { + if (l.writemask & (1 << i)) + swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); + else + swizzles[i] = first_enabled_chan; + } + r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], + swizzles[2], swizzles[3]); + } + + assert(l.file != PROGRAM_UNDEFINED); + assert(r.file != PROGRAM_UNDEFINED); + + if (ir->condition) { + const bool switch_order = this->process_move_condition(ir->condition); + st_src_reg condition = this->result; + + for (i = 0; i < type_size(ir->lhs->type); i++) { + st_src_reg l_src = st_src_reg(l); + l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements); + + if (switch_order) { + emit(ir, TGSI_OPCODE_CMP, l, condition, l_src, r); + } else { + emit(ir, TGSI_OPCODE_CMP, l, condition, r, l_src); + } + + l.index++; + r.index++; + } + } else if (ir->rhs->as_expression() && + this->instructions.get_tail() && + ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && + type_size(ir->lhs->type) == 1 && + l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) { + /* To avoid emitting an extra MOV when assigning an expression to a + * variable, emit the last instruction of the expression again, but + * replace the destination register with the target of the assignment. + * Dead code elimination will remove the original instruction. + */ + glsl_to_tgsi_instruction *inst, *new_inst; + inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); + new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); + new_inst->saturate = inst->saturate; + } else { + for (i = 0; i < type_size(ir->lhs->type); i++) { + emit(ir, TGSI_OPCODE_MOV, l, r); + l.index++; + r.index++; + } + } +} + + +void +glsl_to_tgsi_visitor::visit(ir_constant *ir) +{ + st_src_reg src; + GLfloat stack_vals[4] = { 0 }; + gl_constant_value *values = (gl_constant_value *) stack_vals; + GLenum gl_type = GL_NONE; + unsigned int i; + static int in_array = 0; + gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; + + /* Unfortunately, 4 floats is all we can get into + * _mesa_add_typed_unnamed_constant. So, make a temp to store an + * aggregate constant and move each constant value into it. If we + * get lucky, copy propagation will eliminate the extra moves. + */ + if (ir->type->base_type == GLSL_TYPE_STRUCT) { + st_src_reg temp_base = get_temp(ir->type); + st_dst_reg temp = st_dst_reg(temp_base); + + foreach_iter(exec_list_iterator, iter, ir->components) { + ir_constant *field_value = (ir_constant *)iter.get(); + int size = type_size(field_value->type); + + assert(size > 0); + + field_value->accept(this); + src = this->result; + + for (i = 0; i < (unsigned int)size; i++) { + emit(ir, TGSI_OPCODE_MOV, temp, src); + + src.index++; + temp.index++; + } + } + this->result = temp_base; + return; + } + + if (ir->type->is_array()) { + st_src_reg temp_base = get_temp(ir->type); + st_dst_reg temp = st_dst_reg(temp_base); + int size = type_size(ir->type->fields.array); + + assert(size > 0); + in_array++; + + for (i = 0; i < ir->type->length; i++) { + ir->array_elements[i]->accept(this); + src = this->result; + for (int j = 0; j < size; j++) { + emit(ir, TGSI_OPCODE_MOV, temp, src); + + src.index++; + temp.index++; + } + } + this->result = temp_base; + in_array--; + return; + } + + if (ir->type->is_matrix()) { + st_src_reg mat = get_temp(ir->type); + st_dst_reg mat_column = st_dst_reg(mat); + + for (i = 0; i < ir->type->matrix_columns; i++) { + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; + + src = st_src_reg(file, -1, ir->type->base_type); + src.index = add_constant(file, + values, + ir->type->vector_elements, + GL_FLOAT, + &src.swizzle); + emit(ir, TGSI_OPCODE_MOV, mat_column, src); + + mat_column.index++; + } + + this->result = mat; + return; + } + + switch (ir->type->base_type) { + case GLSL_TYPE_FLOAT: + gl_type = GL_FLOAT; + for (i = 0; i < ir->type->vector_elements; i++) { + values[i].f = ir->value.f[i]; + } + break; + case GLSL_TYPE_UINT: + gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT; + for (i = 0; i < ir->type->vector_elements; i++) { + if (native_integers) + values[i].u = ir->value.u[i]; + else + values[i].f = ir->value.u[i]; + } + break; + case GLSL_TYPE_INT: + gl_type = native_integers ? GL_INT : GL_FLOAT; + for (i = 0; i < ir->type->vector_elements; i++) { + if (native_integers) + values[i].i = ir->value.i[i]; + else + values[i].f = ir->value.i[i]; + } + break; + case GLSL_TYPE_BOOL: + gl_type = native_integers ? GL_BOOL : GL_FLOAT; + for (i = 0; i < ir->type->vector_elements; i++) { + if (native_integers) + values[i].b = ir->value.b[i]; + else + values[i].f = ir->value.b[i]; + } + break; + default: + assert(!"Non-float/uint/int/bool constant"); + } + + this->result = st_src_reg(file, -1, ir->type); + this->result.index = add_constant(file, + values, + ir->type->vector_elements, + gl_type, + &this->result.swizzle); +} + +function_entry * +glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig) +{ + function_entry *entry; + + foreach_iter(exec_list_iterator, iter, this->function_signatures) { + entry = (function_entry *)iter.get(); + + if (entry->sig == sig) + return entry; + } + + entry = ralloc(mem_ctx, function_entry); + entry->sig = sig; + entry->sig_id = this->next_signature_id++; + entry->bgn_inst = NULL; + + /* Allocate storage for all the parameters. */ + foreach_iter(exec_list_iterator, iter, sig->parameters) { + ir_variable *param = (ir_variable *)iter.get(); + variable_storage *storage; + + storage = find_variable_storage(param); + assert(!storage); + + storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY, + this->next_temp); + this->variables.push_tail(storage); + + this->next_temp += type_size(param->type); + } + + if (!sig->return_type->is_void()) { + entry->return_reg = get_temp(sig->return_type); + } else { + entry->return_reg = undef_src; + } + + this->function_signatures.push_tail(entry); + return entry; +} + +void +glsl_to_tgsi_visitor::visit(ir_call *ir) +{ + glsl_to_tgsi_instruction *call_inst; + ir_function_signature *sig = ir->get_callee(); + function_entry *entry = get_function_signature(sig); + int i; + + /* Process in parameters. */ + exec_list_iterator sig_iter = sig->parameters.iterator(); + foreach_iter(exec_list_iterator, iter, *ir) { + ir_rvalue *param_rval = (ir_rvalue *)iter.get(); + ir_variable *param = (ir_variable *)sig_iter.get(); + + if (param->mode == ir_var_in || + param->mode == ir_var_inout) { + variable_storage *storage = find_variable_storage(param); + assert(storage); + + param_rval->accept(this); + st_src_reg r = this->result; + + st_dst_reg l; + l.file = storage->file; + l.index = storage->index; + l.reladdr = NULL; + l.writemask = WRITEMASK_XYZW; + l.cond_mask = COND_TR; + + for (i = 0; i < type_size(param->type); i++) { + emit(ir, TGSI_OPCODE_MOV, l, r); + l.index++; + r.index++; + } + } + + sig_iter.next(); + } + assert(!sig_iter.has_next()); + + /* Emit call instruction */ + call_inst = emit(ir, TGSI_OPCODE_CAL); + call_inst->function = entry; + + /* Process out parameters. */ + sig_iter = sig->parameters.iterator(); + foreach_iter(exec_list_iterator, iter, *ir) { + ir_rvalue *param_rval = (ir_rvalue *)iter.get(); + ir_variable *param = (ir_variable *)sig_iter.get(); + + if (param->mode == ir_var_out || + param->mode == ir_var_inout) { + variable_storage *storage = find_variable_storage(param); + assert(storage); + + st_src_reg r; + r.file = storage->file; + r.index = storage->index; + r.reladdr = NULL; + r.swizzle = SWIZZLE_NOOP; + r.negate = 0; + + param_rval->accept(this); + st_dst_reg l = st_dst_reg(this->result); + + for (i = 0; i < type_size(param->type); i++) { + emit(ir, TGSI_OPCODE_MOV, l, r); + l.index++; + r.index++; + } + } + + sig_iter.next(); + } + assert(!sig_iter.has_next()); + + /* Process return value. */ + this->result = entry->return_reg; +} + +void +glsl_to_tgsi_visitor::visit(ir_texture *ir) +{ + st_src_reg result_src, coord, lod_info, projector, dx, dy; + st_dst_reg result_dst, coord_dst; + glsl_to_tgsi_instruction *inst = NULL; + unsigned opcode = TGSI_OPCODE_NOP; + + if (ir->coordinate) { + ir->coordinate->accept(this); + + /* Put our coords in a temp. We'll need to modify them for shadow, + * projection, or LOD, so the only case we'd use it as is is if + * we're doing plain old texturing. The optimization passes on + * glsl_to_tgsi_visitor should handle cleaning up our mess in that case. + */ + coord = get_temp(glsl_type::vec4_type); + coord_dst = st_dst_reg(coord); + emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); + } + + if (ir->projector) { + ir->projector->accept(this); + projector = this->result; + } + + /* Storage for our result. Ideally for an assignment we'd be using + * the actual storage for the result here, instead. + */ + result_src = get_temp(glsl_type::vec4_type); + result_dst = st_dst_reg(result_src); + + switch (ir->op) { + case ir_tex: + opcode = TGSI_OPCODE_TEX; + break; + case ir_txb: + opcode = TGSI_OPCODE_TXB; + ir->lod_info.bias->accept(this); + lod_info = this->result; + break; + case ir_txl: + opcode = TGSI_OPCODE_TXL; + ir->lod_info.lod->accept(this); + lod_info = this->result; + break; + case ir_txd: + opcode = TGSI_OPCODE_TXD; + ir->lod_info.grad.dPdx->accept(this); + dx = this->result; + ir->lod_info.grad.dPdy->accept(this); + dy = this->result; + break; + case ir_txs: + opcode = TGSI_OPCODE_TXQ; + ir->lod_info.lod->accept(this); + lod_info = this->result; + break; + case ir_txf: + opcode = TGSI_OPCODE_TXF; + ir->lod_info.lod->accept(this); + lod_info = this->result; + break; + } + + if (ir->projector) { + if (opcode == TGSI_OPCODE_TEX) { + /* Slot the projector in as the last component of the coord. */ + coord_dst.writemask = WRITEMASK_W; + emit(ir, TGSI_OPCODE_MOV, coord_dst, projector); + coord_dst.writemask = WRITEMASK_XYZW; + opcode = TGSI_OPCODE_TXP; + } else { + st_src_reg coord_w = coord; + coord_w.swizzle = SWIZZLE_WWWW; + + /* For the other TEX opcodes there's no projective version + * since the last slot is taken up by LOD info. Do the + * projective divide now. + */ + coord_dst.writemask = WRITEMASK_W; + emit(ir, TGSI_OPCODE_RCP, coord_dst, projector); + + /* In the case where we have to project the coordinates "by hand," + * the shadow comparator value must also be projected. + */ + st_src_reg tmp_src = coord; + if (ir->shadow_comparitor) { + /* Slot the shadow value in as the second to last component of the + * coord. + */ + ir->shadow_comparitor->accept(this); + + tmp_src = get_temp(glsl_type::vec4_type); + st_dst_reg tmp_dst = st_dst_reg(tmp_src); + + tmp_dst.writemask = WRITEMASK_Z; + emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result); + + tmp_dst.writemask = WRITEMASK_XY; + emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord); + } + + coord_dst.writemask = WRITEMASK_XYZ; + emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w); + + coord_dst.writemask = WRITEMASK_XYZW; + coord.swizzle = SWIZZLE_XYZW; + } + } + + /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow + * comparator was put in the correct place (and projected) by the code, + * above, that handles by-hand projection. + */ + if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) { + /* Slot the shadow value in as the second to last component of the + * coord. + */ + ir->shadow_comparitor->accept(this); + coord_dst.writemask = WRITEMASK_Z; + emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); + coord_dst.writemask = WRITEMASK_XYZW; + } + + if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB || + opcode == TGSI_OPCODE_TXF) { + /* TGSI stores LOD or LOD bias in the last channel of the coords. */ + coord_dst.writemask = WRITEMASK_W; + emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info); + coord_dst.writemask = WRITEMASK_XYZW; + } + + if (opcode == TGSI_OPCODE_TXD) + inst = emit(ir, opcode, result_dst, coord, dx, dy); + else if (opcode == TGSI_OPCODE_TXQ) + inst = emit(ir, opcode, result_dst, lod_info); + else + inst = emit(ir, opcode, result_dst, coord); + + if (ir->shadow_comparitor) + inst->tex_shadow = GL_TRUE; + + inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler, + this->shader_program, + this->prog); + + const glsl_type *sampler_type = ir->sampler->type; + + switch (sampler_type->sampler_dimensionality) { + case GLSL_SAMPLER_DIM_1D: + inst->tex_target = (sampler_type->sampler_array) + ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; + break; + case GLSL_SAMPLER_DIM_2D: + inst->tex_target = (sampler_type->sampler_array) + ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; + break; + case GLSL_SAMPLER_DIM_3D: + inst->tex_target = TEXTURE_3D_INDEX; + break; + case GLSL_SAMPLER_DIM_CUBE: + inst->tex_target = TEXTURE_CUBE_INDEX; + break; + case GLSL_SAMPLER_DIM_RECT: + inst->tex_target = TEXTURE_RECT_INDEX; + break; + case GLSL_SAMPLER_DIM_BUF: + assert(!"FINISHME: Implement ARB_texture_buffer_object"); + break; + default: + assert(!"Should not get here."); + } + + this->result = result_src; +} + +void +glsl_to_tgsi_visitor::visit(ir_return *ir) +{ + if (ir->get_value()) { + st_dst_reg l; + int i; + + assert(current_function); + + ir->get_value()->accept(this); + st_src_reg r = this->result; + + l = st_dst_reg(current_function->return_reg); + + for (i = 0; i < type_size(current_function->sig->return_type); i++) { + emit(ir, TGSI_OPCODE_MOV, l, r); + l.index++; + r.index++; + } + } + + emit(ir, TGSI_OPCODE_RET); +} + +void +glsl_to_tgsi_visitor::visit(ir_discard *ir) +{ + struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; + + if (ir->condition) { + ir->condition->accept(this); + this->result.negate = ~this->result.negate; + emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result); + } else { + emit(ir, TGSI_OPCODE_KILP); + } + + fp->UsesKill = GL_TRUE; +} + +void +glsl_to_tgsi_visitor::visit(ir_if *ir) +{ + glsl_to_tgsi_instruction *cond_inst, *if_inst; + glsl_to_tgsi_instruction *prev_inst; + + prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); + + ir->condition->accept(this); + assert(this->result.file != PROGRAM_UNDEFINED); + + if (this->options->EmitCondCodes) { + cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); + + /* See if we actually generated any instruction for generating + * the condition. If not, then cook up a move to a temp so we + * have something to set cond_update on. + */ + if (cond_inst == prev_inst) { + st_src_reg temp = get_temp(glsl_type::bool_type); + cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result); + } + cond_inst->cond_update = GL_TRUE; + + if_inst = emit(ir->condition, TGSI_OPCODE_IF); + if_inst->dst.cond_mask = COND_NE; + } else { + if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result); + } + + this->instructions.push_tail(if_inst); + + visit_exec_list(&ir->then_instructions, this); + + if (!ir->else_instructions.is_empty()) { + emit(ir->condition, TGSI_OPCODE_ELSE); + visit_exec_list(&ir->else_instructions, this); + } + + if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF); +} + +glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() +{ + result.file = PROGRAM_UNDEFINED; + next_temp = 1; + next_signature_id = 1; + num_immediates = 0; + current_function = NULL; + num_address_regs = 0; + indirect_addr_temps = false; + indirect_addr_consts = false; + mem_ctx = ralloc_context(NULL); +} + +glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() +{ + ralloc_free(mem_ctx); +} + +extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v) +{ + delete v; +} + + +/** + * Count resources used by the given gpu program (number of texture + * samplers, etc). + */ +static void +count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) +{ + v->samplers_used = 0; + + foreach_iter(exec_list_iterator, iter, v->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + if (is_tex_instruction(inst->op)) { + v->samplers_used |= 1 << inst->sampler; + + prog->SamplerTargets[inst->sampler] = + (gl_texture_index)inst->tex_target; + if (inst->tex_shadow) { + prog->ShadowSamplers |= 1 << inst->sampler; + } + } + } + + prog->SamplersUsed = v->samplers_used; + _mesa_update_shader_textures_used(prog); +} + + +/** + * Check if the given vertex/fragment/shader program is within the + * resource limits of the context (number of texture units, etc). + * If any of those checks fail, record a linker error. + * + * XXX more checks are needed... + */ +static void +check_resources(const struct gl_context *ctx, + struct gl_shader_program *shader_program, + glsl_to_tgsi_visitor *prog, + struct gl_program *proginfo) +{ + switch (proginfo->Target) { + case GL_VERTEX_PROGRAM_ARB: + if (_mesa_bitcount(prog->samplers_used) > + ctx->Const.MaxVertexTextureImageUnits) { + fail_link(shader_program, "Too many vertex shader texture samplers"); + } + if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) { + fail_link(shader_program, "Too many vertex shader constants"); + } + break; + case MESA_GEOMETRY_PROGRAM: + if (_mesa_bitcount(prog->samplers_used) > + ctx->Const.MaxGeometryTextureImageUnits) { + fail_link(shader_program, "Too many geometry shader texture samplers"); + } + if (proginfo->Parameters->NumParameters > + MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) { + fail_link(shader_program, "Too many geometry shader constants"); + } + break; + case GL_FRAGMENT_PROGRAM_ARB: + if (_mesa_bitcount(prog->samplers_used) > + ctx->Const.MaxTextureImageUnits) { + fail_link(shader_program, "Too many fragment shader texture samplers"); + } + if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) { + fail_link(shader_program, "Too many fragment shader constants"); + } + break; + default: + _mesa_problem(ctx, "unexpected program type in check_resources()"); + } +} + + + +struct uniform_sort { + struct gl_uniform *u; + int pos; +}; + +/* The shader_program->Uniforms list is almost sorted in increasing + * uniform->{Frag,Vert}Pos locations, but not quite when there are + * uniforms shared between targets. We need to add parameters in + * increasing order for the targets. + */ +static int +sort_uniforms(const void *a, const void *b) +{ + struct uniform_sort *u1 = (struct uniform_sort *)a; + struct uniform_sort *u2 = (struct uniform_sort *)b; + + return u1->pos - u2->pos; +} + +/* Add the uniforms to the parameters. The linker chose locations + * in our parameters lists (which weren't created yet), which the + * uniforms code will use to poke values into our parameters list + * when uniforms are updated. + */ +static void +add_uniforms_to_parameters_list(struct gl_shader_program *shader_program, + struct gl_shader *shader, + struct gl_program *prog) +{ + unsigned int i; + unsigned int next_sampler = 0, num_uniforms = 0; + struct uniform_sort *sorted_uniforms; + + sorted_uniforms = ralloc_array(NULL, struct uniform_sort, + shader_program->Uniforms->NumUniforms); + + for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) { + struct gl_uniform *uniform = shader_program->Uniforms->Uniforms + i; + int parameter_index = -1; + + switch (shader->Type) { + case GL_VERTEX_SHADER: + parameter_index = uniform->VertPos; + break; + case GL_FRAGMENT_SHADER: + parameter_index = uniform->FragPos; + break; + case GL_GEOMETRY_SHADER: + parameter_index = uniform->GeomPos; + break; + } + + /* Only add uniforms used in our target. */ + if (parameter_index != -1) { + sorted_uniforms[num_uniforms].pos = parameter_index; + sorted_uniforms[num_uniforms].u = uniform; + num_uniforms++; + } + } + + qsort(sorted_uniforms, num_uniforms, sizeof(struct uniform_sort), + sort_uniforms); + + for (i = 0; i < num_uniforms; i++) { + struct gl_uniform *uniform = sorted_uniforms[i].u; + int parameter_index = sorted_uniforms[i].pos; + const glsl_type *type = uniform->Type; + unsigned int size; + + if (type->is_vector() || + type->is_scalar()) { + size = type->vector_elements; + } else { + size = type_size(type) * 4; + } + + gl_register_file file; + if (type->is_sampler() || + (type->is_array() && type->fields.array->is_sampler())) { + file = PROGRAM_SAMPLER; + } else { + file = PROGRAM_UNIFORM; + } + + GLint index = _mesa_lookup_parameter_index(prog->Parameters, -1, + uniform->Name); + + if (index < 0) { + index = _mesa_add_parameter(prog->Parameters, file, + uniform->Name, size, type->gl_type, + NULL, NULL, 0x0); + + /* Sampler uniform values are stored in prog->SamplerUnits, + * and the entry in that array is selected by this index we + * store in ParameterValues[]. + */ + if (file == PROGRAM_SAMPLER) { + for (unsigned int j = 0; j < size / 4; j++) + prog->Parameters->ParameterValues[index + j][0].f = next_sampler++; + } + + /* The location chosen in the Parameters list here (returned + * from _mesa_add_uniform) has to match what the linker chose. + */ + if (index != parameter_index) { + fail_link(shader_program, "Allocation of uniform `%s' to target " + "failed (%d vs %d)\n", + uniform->Name, index, parameter_index); + } + } + } + + ralloc_free(sorted_uniforms); +} + +static void +set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, + struct gl_shader_program *shader_program, + const char *name, const glsl_type *type, + ir_constant *val) +{ + if (type->is_record()) { + ir_constant *field_constant; + + field_constant = (ir_constant *)val->components.get_head(); + + for (unsigned int i = 0; i < type->length; i++) { + const glsl_type *field_type = type->fields.structure[i].type; + const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name, + type->fields.structure[i].name); + set_uniform_initializer(ctx, mem_ctx, shader_program, field_name, + field_type, field_constant); + field_constant = (ir_constant *)field_constant->next; + } + return; + } + + int loc = _mesa_get_uniform_location(ctx, shader_program, name); + + if (loc == -1) { + fail_link(shader_program, + "Couldn't find uniform for initializer %s\n", name); + return; + } + + for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) { + ir_constant *element; + const glsl_type *element_type; + if (type->is_array()) { + element = val->array_elements[i]; + element_type = type->fields.array; + } else { + element = val; + element_type = type; + } + + void *values; + + if (element_type->base_type == GLSL_TYPE_BOOL) { + int *conv = ralloc_array(mem_ctx, int, element_type->components()); + for (unsigned int j = 0; j < element_type->components(); j++) { + conv[j] = element->value.b[j]; + } + values = (void *)conv; + element_type = glsl_type::get_instance(GLSL_TYPE_INT, + element_type->vector_elements, + 1); + } else { + values = &element->value; + } + + if (element_type->is_matrix()) { + _mesa_uniform_matrix(ctx, shader_program, + element_type->matrix_columns, + element_type->vector_elements, + loc, 1, GL_FALSE, (GLfloat *)values); + loc += element_type->matrix_columns; + } else { + _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns, + values, element_type->gl_type); + loc += type_size(element_type); + } + } +} + +static void +set_uniform_initializers(struct gl_context *ctx, + struct gl_shader_program *shader_program) +{ + void *mem_ctx = NULL; + + for (unsigned int i = 0; i < MESA_SHADER_TYPES; i++) { + struct gl_shader *shader = shader_program->_LinkedShaders[i]; + + if (shader == NULL) + continue; + + foreach_iter(exec_list_iterator, iter, *shader->ir) { + ir_instruction *ir = (ir_instruction *)iter.get(); + ir_variable *var = ir->as_variable(); + + if (!var || var->mode != ir_var_uniform || !var->constant_value) + continue; + + if (!mem_ctx) + mem_ctx = ralloc_context(NULL); + + set_uniform_initializer(ctx, mem_ctx, shader_program, var->name, + var->type, var->constant_value); + } + } + + ralloc_free(mem_ctx); +} + +/* + * Scan/rewrite program to remove reads of custom (output) registers. + * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING + * (for vertex shaders). + * In GLSL shaders, varying vars can be read and written. + * On some hardware, trying to read an output register causes trouble. + * So, rewrite the program to use a temporary register in this case. + * + * Based on _mesa_remove_output_reads from programopt.c. + */ +void +glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) +{ + GLuint i; + GLint outputMap[VERT_RESULT_MAX]; + GLint outputTypes[VERT_RESULT_MAX]; + GLuint numVaryingReads = 0; + GLboolean usedTemps[MAX_TEMPS]; + GLuint firstTemp = 0; + + _mesa_find_used_registers(prog, PROGRAM_TEMPORARY, + usedTemps, MAX_TEMPS); + + assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT); + assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING); + + for (i = 0; i < VERT_RESULT_MAX; i++) + outputMap[i] = -1; + + /* look for instructions which read from varying vars */ + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + const GLuint numSrc = num_inst_src_regs(inst->op); + GLuint j; + for (j = 0; j < numSrc; j++) { + if (inst->src[j].file == type) { + /* replace the read with a temp reg */ + const GLuint var = inst->src[j].index; + if (outputMap[var] == -1) { + numVaryingReads++; + outputMap[var] = _mesa_find_free_register(usedTemps, + MAX_TEMPS, + firstTemp); + outputTypes[var] = inst->src[j].type; + firstTemp = outputMap[var] + 1; + } + inst->src[j].file = PROGRAM_TEMPORARY; + inst->src[j].index = outputMap[var]; + } + } + } + + if (numVaryingReads == 0) + return; /* nothing to be done */ + + /* look for instructions which write to the varying vars identified above */ + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + if (inst->dst.file == type && outputMap[inst->dst.index] >= 0) { + /* change inst to write to the temp reg, instead of the varying */ + inst->dst.file = PROGRAM_TEMPORARY; + inst->dst.index = outputMap[inst->dst.index]; + } + } + + /* insert new MOV instructions at the end */ + for (i = 0; i < VERT_RESULT_MAX; i++) { + if (outputMap[i] >= 0) { + /* MOV VAR[i], TEMP[tmp]; */ + st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i], outputTypes[i]); + st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW, outputTypes[i]); + dst.index = i; + this->emit(NULL, TGSI_OPCODE_MOV, dst, src); + } + } +} + +/** + * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which + * are read from the given src in this instruction + */ +static int +get_src_arg_mask(st_dst_reg dst, st_src_reg src) +{ + int read_mask = 0, comp; + + /* Now, given the src swizzle and the written channels, find which + * components are actually read + */ + for (comp = 0; comp < 4; ++comp) { + const unsigned coord = GET_SWZ(src.swizzle, comp); + ASSERT(coord < 4); + if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W) + read_mask |= 1 << coord; + } + + return read_mask; +} + +/** + * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP + * instruction is the first instruction to write to register T0. There are + * several lowering passes done in GLSL IR (e.g. branches and + * relative addressing) that create a large number of conditional assignments + * that ir_to_mesa converts to CMP instructions like the one mentioned above. + * + * Here is why this conversion is safe: + * CMP T0, T1 T2 T0 can be expanded to: + * if (T1 < 0.0) + * MOV T0, T2; + * else + * MOV T0, T0; + * + * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same + * as the original program. If (T1 < 0.0) evaluates to false, executing + * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized. + * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2 + * because any instruction that was going to read from T0 after this was going + * to read a garbage value anyway. + */ +void +glsl_to_tgsi_visitor::simplify_cmp(void) +{ + unsigned tempWrites[MAX_TEMPS]; + unsigned outputWrites[MAX_PROGRAM_OUTPUTS]; + + memset(tempWrites, 0, sizeof(tempWrites)); + memset(outputWrites, 0, sizeof(outputWrites)); + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + unsigned prevWriteMask = 0; + + /* Give up if we encounter relative addressing or flow control. */ + if (inst->dst.reladdr || + tgsi_get_opcode_info(inst->op)->is_branch || + inst->op == TGSI_OPCODE_BGNSUB || + inst->op == TGSI_OPCODE_CONT || + inst->op == TGSI_OPCODE_END || + inst->op == TGSI_OPCODE_ENDSUB || + inst->op == TGSI_OPCODE_RET) { + return; + } + + if (inst->dst.file == PROGRAM_OUTPUT) { + assert(inst->dst.index < MAX_PROGRAM_OUTPUTS); + prevWriteMask = outputWrites[inst->dst.index]; + outputWrites[inst->dst.index] |= inst->dst.writemask; + } else if (inst->dst.file == PROGRAM_TEMPORARY) { + assert(inst->dst.index < MAX_TEMPS); + prevWriteMask = tempWrites[inst->dst.index]; + tempWrites[inst->dst.index] |= inst->dst.writemask; + } + + /* For a CMP to be considered a conditional write, the destination + * register and source register two must be the same. */ + if (inst->op == TGSI_OPCODE_CMP + && !(inst->dst.writemask & prevWriteMask) + && inst->src[2].file == inst->dst.file + && inst->src[2].index == inst->dst.index + && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) { + + inst->op = TGSI_OPCODE_MOV; + inst->src[0] = inst->src[1]; + } + } +} + +/* Replaces all references to a temporary register index with another index. */ +void +glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) +{ + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + unsigned j; + + for (j=0; j < num_inst_src_regs(inst->op); j++) { + if (inst->src[j].file == PROGRAM_TEMPORARY && + inst->src[j].index == index) { + inst->src[j].index = new_index; + } + } + + if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { + inst->dst.index = new_index; + } + } +} + +int +glsl_to_tgsi_visitor::get_first_temp_read(int index) +{ + int depth = 0; /* loop depth */ + int loop_start = -1; /* index of the first active BGNLOOP (if any) */ + unsigned i = 0, j; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + for (j=0; j < num_inst_src_regs(inst->op); j++) { + if (inst->src[j].file == PROGRAM_TEMPORARY && + inst->src[j].index == index) { + return (depth == 0) ? i : loop_start; + } + } + + if (inst->op == TGSI_OPCODE_BGNLOOP) { + if(depth++ == 0) + loop_start = i; + } else if (inst->op == TGSI_OPCODE_ENDLOOP) { + if (--depth == 0) + loop_start = -1; + } + assert(depth >= 0); + + i++; + } + + return -1; +} + +int +glsl_to_tgsi_visitor::get_first_temp_write(int index) +{ + int depth = 0; /* loop depth */ + int loop_start = -1; /* index of the first active BGNLOOP (if any) */ + int i = 0; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { + return (depth == 0) ? i : loop_start; + } + + if (inst->op == TGSI_OPCODE_BGNLOOP) { + if(depth++ == 0) + loop_start = i; + } else if (inst->op == TGSI_OPCODE_ENDLOOP) { + if (--depth == 0) + loop_start = -1; + } + assert(depth >= 0); + + i++; + } + + return -1; +} + +int +glsl_to_tgsi_visitor::get_last_temp_read(int index) +{ + int depth = 0; /* loop depth */ + int last = -1; /* index of last instruction that reads the temporary */ + unsigned i = 0, j; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + for (j=0; j < num_inst_src_regs(inst->op); j++) { + if (inst->src[j].file == PROGRAM_TEMPORARY && + inst->src[j].index == index) { + last = (depth == 0) ? i : -2; + } + } + + if (inst->op == TGSI_OPCODE_BGNLOOP) + depth++; + else if (inst->op == TGSI_OPCODE_ENDLOOP) + if (--depth == 0 && last == -2) + last = i; + assert(depth >= 0); + + i++; + } + + assert(last >= -1); + return last; +} + +int +glsl_to_tgsi_visitor::get_last_temp_write(int index) +{ + int depth = 0; /* loop depth */ + int last = -1; /* index of last instruction that writes to the temporary */ + int i = 0; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) + last = (depth == 0) ? i : -2; + + if (inst->op == TGSI_OPCODE_BGNLOOP) + depth++; + else if (inst->op == TGSI_OPCODE_ENDLOOP) + if (--depth == 0 && last == -2) + last = i; + assert(depth >= 0); + + i++; + } + + assert(last >= -1); + return last; +} + +/* + * On a basic block basis, tracks available PROGRAM_TEMPORARY register + * channels for copy propagation and updates following instructions to + * use the original versions. + * + * The glsl_to_tgsi_visitor lazily produces code assuming that this pass + * will occur. As an example, a TXP production before this pass: + * + * 0: MOV TEMP[1], INPUT[4].xyyy; + * 1: MOV TEMP[1].w, INPUT[4].wwww; + * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; + * + * and after: + * + * 0: MOV TEMP[1], INPUT[4].xyyy; + * 1: MOV TEMP[1].w, INPUT[4].wwww; + * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; + * + * which allows for dead code elimination on TEMP[1]'s writes. + */ +void +glsl_to_tgsi_visitor::copy_propagate(void) +{ + glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx, + glsl_to_tgsi_instruction *, + this->next_temp * 4); + int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); + int level = 0; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + assert(inst->dst.file != PROGRAM_TEMPORARY + || inst->dst.index < this->next_temp); + + /* First, do any copy propagation possible into the src regs. */ + for (int r = 0; r < 3; r++) { + glsl_to_tgsi_instruction *first = NULL; + bool good = true; + int acp_base = inst->src[r].index * 4; + + if (inst->src[r].file != PROGRAM_TEMPORARY || + inst->src[r].reladdr) + continue; + + /* See if we can find entries in the ACP consisting of MOVs + * from the same src register for all the swizzled channels + * of this src register reference. + */ + for (int i = 0; i < 4; i++) { + int src_chan = GET_SWZ(inst->src[r].swizzle, i); + glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan]; + + if (!copy_chan) { + good = false; + break; + } + + assert(acp_level[acp_base + src_chan] <= level); + + if (!first) { + first = copy_chan; + } else { + if (first->src[0].file != copy_chan->src[0].file || + first->src[0].index != copy_chan->src[0].index) { + good = false; + break; + } + } + } + + if (good) { + /* We've now validated that we can copy-propagate to + * replace this src register reference. Do it. + */ + inst->src[r].file = first->src[0].file; + inst->src[r].index = first->src[0].index; + + int swizzle = 0; + for (int i = 0; i < 4; i++) { + int src_chan = GET_SWZ(inst->src[r].swizzle, i); + glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan]; + swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << + (3 * i)); + } + inst->src[r].swizzle = swizzle; + } + } + + switch (inst->op) { + case TGSI_OPCODE_BGNLOOP: + case TGSI_OPCODE_ENDLOOP: + /* End of a basic block, clear the ACP entirely. */ + memset(acp, 0, sizeof(*acp) * this->next_temp * 4); + break; + + case TGSI_OPCODE_IF: + ++level; + break; + + case TGSI_OPCODE_ENDIF: + case TGSI_OPCODE_ELSE: + /* Clear all channels written inside the block from the ACP, but + * leaving those that were not touched. + */ + for (int r = 0; r < this->next_temp; r++) { + for (int c = 0; c < 4; c++) { + if (!acp[4 * r + c]) + continue; + + if (acp_level[4 * r + c] >= level) + acp[4 * r + c] = NULL; + } + } + if (inst->op == TGSI_OPCODE_ENDIF) + --level; + break; + + default: + /* Continuing the block, clear any written channels from + * the ACP. + */ + if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) { + /* Any temporary might be written, so no copy propagation + * across this instruction. + */ + memset(acp, 0, sizeof(*acp) * this->next_temp * 4); + } else if (inst->dst.file == PROGRAM_OUTPUT && + inst->dst.reladdr) { + /* Any output might be written, so no copy propagation + * from outputs across this instruction. + */ + for (int r = 0; r < this->next_temp; r++) { + for (int c = 0; c < 4; c++) { + if (!acp[4 * r + c]) + continue; + + if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) + acp[4 * r + c] = NULL; + } + } + } else if (inst->dst.file == PROGRAM_TEMPORARY || + inst->dst.file == PROGRAM_OUTPUT) { + /* Clear where it's used as dst. */ + if (inst->dst.file == PROGRAM_TEMPORARY) { + for (int c = 0; c < 4; c++) { + if (inst->dst.writemask & (1 << c)) { + acp[4 * inst->dst.index + c] = NULL; + } + } + } + + /* Clear where it's used as src. */ + for (int r = 0; r < this->next_temp; r++) { + for (int c = 0; c < 4; c++) { + if (!acp[4 * r + c]) + continue; + + int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); + + if (acp[4 * r + c]->src[0].file == inst->dst.file && + acp[4 * r + c]->src[0].index == inst->dst.index && + inst->dst.writemask & (1 << src_chan)) + { + acp[4 * r + c] = NULL; + } + } + } + } + break; + } + + /* If this is a copy, add it to the ACP. */ + if (inst->op == TGSI_OPCODE_MOV && + inst->dst.file == PROGRAM_TEMPORARY && + !inst->dst.reladdr && + !inst->saturate && + !inst->src[0].reladdr && + !inst->src[0].negate) { + for (int i = 0; i < 4; i++) { + if (inst->dst.writemask & (1 << i)) { + acp[4 * inst->dst.index + i] = inst; + acp_level[4 * inst->dst.index + i] = level; + } + } + } + } + + ralloc_free(acp_level); + ralloc_free(acp); +} + +/* + * Tracks available PROGRAM_TEMPORARY registers for dead code elimination. + * + * The glsl_to_tgsi_visitor lazily produces code assuming that this pass + * will occur. As an example, a TXP production after copy propagation but + * before this pass: + * + * 0: MOV TEMP[1], INPUT[4].xyyy; + * 1: MOV TEMP[1].w, INPUT[4].wwww; + * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; + * + * and after this pass: + * + * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; + * + * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB) + * FIXME: doesn't eliminate all dead code inside of loops; it steps around them + */ +void +glsl_to_tgsi_visitor::eliminate_dead_code(void) +{ + int i; + + for (i=0; i < this->next_temp; i++) { + int last_read = get_last_temp_read(i); + int j = 0; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i && + j > last_read) + { + iter.remove(); + delete inst; + } + + j++; + } + } +} + +/* + * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead + * code elimination. This is less primitive than eliminate_dead_code(), as it + * is per-channel and can detect consecutive writes without a read between them + * as dead code. However, there is some dead code that can be eliminated by + * eliminate_dead_code() but not this function - for example, this function + * cannot eliminate an instruction writing to a register that is never read and + * is the only instruction writing to that register. + * + * The glsl_to_tgsi_visitor lazily produces code assuming that this pass + * will occur. + */ +int +glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) +{ + glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx, + glsl_to_tgsi_instruction *, + this->next_temp * 4); + int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); + int level = 0; + int removed = 0; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + assert(inst->dst.file != PROGRAM_TEMPORARY + || inst->dst.index < this->next_temp); + + switch (inst->op) { + case TGSI_OPCODE_BGNLOOP: + case TGSI_OPCODE_ENDLOOP: + /* End of a basic block, clear the write array entirely. + * FIXME: This keeps us from killing dead code when the writes are + * on either side of a loop, even when the register isn't touched + * inside the loop. + */ + memset(writes, 0, sizeof(*writes) * this->next_temp * 4); + break; + + case TGSI_OPCODE_ENDIF: + --level; + break; + + case TGSI_OPCODE_ELSE: + /* Clear all channels written inside the preceding if block from the + * write array, but leave those that were not touched. + * + * FIXME: This destroys opportunities to remove dead code inside of + * IF blocks that are followed by an ELSE block. + */ + for (int r = 0; r < this->next_temp; r++) { + for (int c = 0; c < 4; c++) { + if (!writes[4 * r + c]) + continue; + + if (write_level[4 * r + c] >= level) + writes[4 * r + c] = NULL; + } + } + break; + + case TGSI_OPCODE_IF: + ++level; + /* fallthrough to default case to mark the condition as read */ + + default: + /* Continuing the block, clear any channels from the write array that + * are read by this instruction. + */ + for (unsigned i = 0; i < Elements(inst->src); i++) { + if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){ + /* Any temporary might be read, so no dead code elimination + * across this instruction. + */ + memset(writes, 0, sizeof(*writes) * this->next_temp * 4); + } else if (inst->src[i].file == PROGRAM_TEMPORARY) { + /* Clear where it's used as src. */ + int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0); + src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1); + src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2); + src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3); + + for (int c = 0; c < 4; c++) { + if (src_chans & (1 << c)) { + writes[4 * inst->src[i].index + c] = NULL; + } + } + } + } + break; + } + + /* If this instruction writes to a temporary, add it to the write array. + * If there is already an instruction in the write array for one or more + * of the channels, flag that channel write as dead. + */ + if (inst->dst.file == PROGRAM_TEMPORARY && + !inst->dst.reladdr && + !inst->saturate) { + for (int c = 0; c < 4; c++) { + if (inst->dst.writemask & (1 << c)) { + if (writes[4 * inst->dst.index + c]) { + if (write_level[4 * inst->dst.index + c] < level) + continue; + else + writes[4 * inst->dst.index + c]->dead_mask |= (1 << c); + } + writes[4 * inst->dst.index + c] = inst; + write_level[4 * inst->dst.index + c] = level; + } + } + } + } + + /* Anything still in the write array at this point is dead code. */ + for (int r = 0; r < this->next_temp; r++) { + for (int c = 0; c < 4; c++) { + glsl_to_tgsi_instruction *inst = writes[4 * r + c]; + if (inst) + inst->dead_mask |= (1 << c); + } + } + + /* Now actually remove the instructions that are completely dead and update + * the writemask of other instructions with dead channels. + */ + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + if (!inst->dead_mask || !inst->dst.writemask) + continue; + else if (inst->dead_mask == inst->dst.writemask) { + iter.remove(); + delete inst; + removed++; + } else + inst->dst.writemask &= ~(inst->dead_mask); + } + + ralloc_free(write_level); + ralloc_free(writes); + + return removed; +} + +/* Merges temporary registers together where possible to reduce the number of + * registers needed to run a program. + * + * Produces optimal code only after copy propagation and dead code elimination + * have been run. */ +void +glsl_to_tgsi_visitor::merge_registers(void) +{ + int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp); + int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp); + int i, j; + + /* Read the indices of the last read and first write to each temp register + * into an array so that we don't have to traverse the instruction list as + * much. */ + for (i=0; i < this->next_temp; i++) { + last_reads[i] = get_last_temp_read(i); + first_writes[i] = get_first_temp_write(i); + } + + /* Start looking for registers with non-overlapping usages that can be + * merged together. */ + for (i=0; i < this->next_temp; i++) { + /* Don't touch unused registers. */ + if (last_reads[i] < 0 || first_writes[i] < 0) continue; + + for (j=0; j < this->next_temp; j++) { + /* Don't touch unused registers. */ + if (last_reads[j] < 0 || first_writes[j] < 0) continue; + + /* We can merge the two registers if the first write to j is after or + * in the same instruction as the last read from i. Note that the + * register at index i will always be used earlier or at the same time + * as the register at index j. */ + if (first_writes[i] <= first_writes[j] && + last_reads[i] <= first_writes[j]) + { + rename_temp_register(j, i); /* Replace all references to j with i.*/ + + /* Update the first_writes and last_reads arrays with the new + * values for the merged register index, and mark the newly unused + * register index as such. */ + last_reads[i] = last_reads[j]; + first_writes[j] = -1; + last_reads[j] = -1; + } + } + } + + ralloc_free(last_reads); + ralloc_free(first_writes); +} + +/* Reassign indices to temporary registers by reusing unused indices created + * by optimization passes. */ +void +glsl_to_tgsi_visitor::renumber_registers(void) +{ + int i = 0; + int new_index = 0; + + for (i=0; i < this->next_temp; i++) { + if (get_first_temp_read(i) < 0) continue; + if (i != new_index) + rename_temp_register(i, new_index); + new_index++; + } + + this->next_temp = new_index; +} + +/** + * Returns a fragment program which implements the current pixel transfer ops. + * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c. + */ +extern "C" void +get_pixel_transfer_visitor(struct st_fragment_program *fp, + glsl_to_tgsi_visitor *original, + int scale_and_bias, int pixel_maps) +{ + glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); + struct st_context *st = st_context(original->ctx); + struct gl_program *prog = &fp->Base.Base; + struct gl_program_parameter_list *params = _mesa_new_parameter_list(); + st_src_reg coord, src0; + st_dst_reg dst0; + glsl_to_tgsi_instruction *inst; + + /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ + v->ctx = original->ctx; + v->prog = prog; + v->glsl_version = original->glsl_version; + v->native_integers = original->native_integers; + v->options = original->options; + v->next_temp = original->next_temp; + v->num_address_regs = original->num_address_regs; + v->samplers_used = prog->SamplersUsed = original->samplers_used; + v->indirect_addr_temps = original->indirect_addr_temps; + v->indirect_addr_consts = original->indirect_addr_consts; + memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); + + /* + * Get initial pixel color from the texture. + * TEX colorTemp, fragment.texcoord[0], texture[0], 2D; + */ + coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); + src0 = v->get_temp(glsl_type::vec4_type); + dst0 = st_dst_reg(src0); + inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); + inst->sampler = 0; + inst->tex_target = TEXTURE_2D_INDEX; + + prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0); + prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */ + v->samplers_used |= (1 << 0); + + if (scale_and_bias) { + static const gl_state_index scale_state[STATE_LENGTH] = + { STATE_INTERNAL, STATE_PT_SCALE, + (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; + static const gl_state_index bias_state[STATE_LENGTH] = + { STATE_INTERNAL, STATE_PT_BIAS, + (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; + GLint scale_p, bias_p; + st_src_reg scale, bias; + + scale_p = _mesa_add_state_reference(params, scale_state); + bias_p = _mesa_add_state_reference(params, bias_state); + + /* MAD colorTemp, colorTemp, scale, bias; */ + scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT); + bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT); + inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias); + } + + if (pixel_maps) { + st_src_reg temp = v->get_temp(glsl_type::vec4_type); + st_dst_reg temp_dst = st_dst_reg(temp); + + assert(st->pixel_xfer.pixelmap_texture); + + /* With a little effort, we can do four pixel map look-ups with + * two TEX instructions: + */ + + /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */ + temp_dst.writemask = WRITEMASK_XY; /* write R,G */ + inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); + inst->sampler = 1; + inst->tex_target = TEXTURE_2D_INDEX; + + /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */ + src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W); + temp_dst.writemask = WRITEMASK_ZW; /* write B,A */ + inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); + inst->sampler = 1; + inst->tex_target = TEXTURE_2D_INDEX; + + prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */ + v->samplers_used |= (1 << 1); + + /* MOV colorTemp, temp; */ + inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp); + } + + /* Now copy the instructions from the original glsl_to_tgsi_visitor into the + * new visitor. */ + foreach_iter(exec_list_iterator, iter, original->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + st_src_reg src_regs[3]; + + if (inst->dst.file == PROGRAM_OUTPUT) + prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); + + for (int i=0; i<3; i++) { + src_regs[i] = inst->src[i]; + if (src_regs[i].file == PROGRAM_INPUT && + src_regs[i].index == FRAG_ATTRIB_COL0) + { + src_regs[i].file = PROGRAM_TEMPORARY; + src_regs[i].index = src0.index; + } + else if (src_regs[i].file == PROGRAM_INPUT) + prog->InputsRead |= (1 << src_regs[i].index); + } + + v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); + } + + /* Make modifications to fragment program info. */ + prog->Parameters = _mesa_combine_parameter_lists(params, + original->prog->Parameters); + prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes); + prog->Varying = _mesa_clone_parameter_list(original->prog->Varying); + _mesa_free_parameter_list(params); + count_resources(v, prog); + fp->glsl_to_tgsi = v; +} + +/** + * Make fragment program for glBitmap: + * Sample the texture and kill the fragment if the bit is 0. + * This program will be combined with the user's fragment program. + * + * Based on make_bitmap_fragment_program in st_cb_bitmap.c. + */ +extern "C" void +get_bitmap_visitor(struct st_fragment_program *fp, + glsl_to_tgsi_visitor *original, int samplerIndex) +{ + glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); + struct st_context *st = st_context(original->ctx); + struct gl_program *prog = &fp->Base.Base; + st_src_reg coord, src0; + st_dst_reg dst0; + glsl_to_tgsi_instruction *inst; + + /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ + v->ctx = original->ctx; + v->prog = prog; + v->glsl_version = original->glsl_version; + v->native_integers = original->native_integers; + v->options = original->options; + v->next_temp = original->next_temp; + v->num_address_regs = original->num_address_regs; + v->samplers_used = prog->SamplersUsed = original->samplers_used; + v->indirect_addr_temps = original->indirect_addr_temps; + v->indirect_addr_consts = original->indirect_addr_consts; + memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); + + /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ + coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); + src0 = v->get_temp(glsl_type::vec4_type); + dst0 = st_dst_reg(src0); + inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); + inst->sampler = samplerIndex; + inst->tex_target = TEXTURE_2D_INDEX; + + prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0); + prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */ + v->samplers_used |= (1 << samplerIndex); + + /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */ + src0.negate = NEGATE_XYZW; + if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM) + src0.swizzle = SWIZZLE_XXXX; + inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0); + + /* Now copy the instructions from the original glsl_to_tgsi_visitor into the + * new visitor. */ + foreach_iter(exec_list_iterator, iter, original->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + st_src_reg src_regs[3]; + + if (inst->dst.file == PROGRAM_OUTPUT) + prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); + + for (int i=0; i<3; i++) { + src_regs[i] = inst->src[i]; + if (src_regs[i].file == PROGRAM_INPUT) + prog->InputsRead |= (1 << src_regs[i].index); + } + + v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); + } + + /* Make modifications to fragment program info. */ + prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters); + prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes); + prog->Varying = _mesa_clone_parameter_list(original->prog->Varying); + count_resources(v, prog); + fp->glsl_to_tgsi = v; +} + +/* ------------------------- TGSI conversion stuff -------------------------- */ +struct label { + unsigned branch_target; + unsigned token; +}; + +/** + * Intermediate state used during shader translation. + */ +struct st_translate { + struct ureg_program *ureg; + + struct ureg_dst temps[MAX_TEMPS]; + struct ureg_src *constants; + struct ureg_src *immediates; + struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; + struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; + struct ureg_dst address[1]; + struct ureg_src samplers[PIPE_MAX_SAMPLERS]; + struct ureg_src systemValues[SYSTEM_VALUE_MAX]; + + /* Extra info for handling point size clamping in vertex shader */ + struct ureg_dst pointSizeResult; /**< Actual point size output register */ + struct ureg_src pointSizeConst; /**< Point size range constant register */ + GLint pointSizeOutIndex; /**< Temp point size output register */ + GLboolean prevInstWrotePointSize; + + const GLuint *inputMapping; + const GLuint *outputMapping; + + /* For every instruction that contains a label (eg CALL), keep + * details so that we can go back afterwards and emit the correct + * tgsi instruction number for each label. + */ + struct label *labels; + unsigned labels_size; + unsigned labels_count; + + /* Keep a record of the tgsi instruction number that each mesa + * instruction starts at, will be used to fix up labels after + * translation. + */ + unsigned *insn; + unsigned insn_size; + unsigned insn_count; + + unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */ + + boolean error; +}; + +/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ +static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { + TGSI_SEMANTIC_FACE, + TGSI_SEMANTIC_INSTANCEID +}; + +/** + * Make note of a branch to a label in the TGSI code. + * After we've emitted all instructions, we'll go over the list + * of labels built here and patch the TGSI code with the actual + * location of each label. + */ +static unsigned *get_label(struct st_translate *t, unsigned branch_target) +{ + unsigned i; + + if (t->labels_count + 1 >= t->labels_size) { + t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); + t->labels = (struct label *)realloc(t->labels, + t->labels_size * sizeof(struct label)); + if (t->labels == NULL) { + static unsigned dummy; + t->error = TRUE; + return &dummy; + } + } + + i = t->labels_count++; + t->labels[i].branch_target = branch_target; + return &t->labels[i].token; +} + +/** + * Called prior to emitting the TGSI code for each instruction. + * Allocate additional space for instructions if needed. + * Update the insn[] array so the next glsl_to_tgsi_instruction points to + * the next TGSI instruction. + */ +static void set_insn_start(struct st_translate *t, unsigned start) +{ + if (t->insn_count + 1 >= t->insn_size) { + t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); + t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0])); + if (t->insn == NULL) { + t->error = TRUE; + return; + } + } + + t->insn[t->insn_count++] = start; +} + +/** + * Map a glsl_to_tgsi constant/immediate to a TGSI immediate. + */ +static struct ureg_src +emit_immediate(struct st_translate *t, + gl_constant_value values[4], + int type, int size) +{ + struct ureg_program *ureg = t->ureg; + + switch(type) + { + case GL_FLOAT: + return ureg_DECL_immediate(ureg, &values[0].f, size); + case GL_INT: + return ureg_DECL_immediate_int(ureg, &values[0].i, size); + case GL_UNSIGNED_INT: + case GL_BOOL: + return ureg_DECL_immediate_uint(ureg, &values[0].u, size); + default: + assert(!"should not get here - type must be float, int, uint, or bool"); + return ureg_src_undef(); + } +} + +/** + * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register. + */ +static struct ureg_dst +dst_register(struct st_translate *t, + gl_register_file file, + GLuint index) +{ + switch(file) { + case PROGRAM_UNDEFINED: + return ureg_dst_undef(); + + case PROGRAM_TEMPORARY: + if (ureg_dst_is_undef(t->temps[index])) + t->temps[index] = ureg_DECL_temporary(t->ureg); + + return t->temps[index]; + + case PROGRAM_OUTPUT: + if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ) + t->prevInstWrotePointSize = GL_TRUE; + + if (t->procType == TGSI_PROCESSOR_VERTEX) + assert(index < VERT_RESULT_MAX); + else if (t->procType == TGSI_PROCESSOR_FRAGMENT) + assert(index < FRAG_RESULT_MAX); + else + assert(index < GEOM_RESULT_MAX); + + assert(t->outputMapping[index] < Elements(t->outputs)); + + return t->outputs[t->outputMapping[index]]; + + case PROGRAM_ADDRESS: + return t->address[index]; + + default: + assert(!"unknown dst register file"); + return ureg_dst_undef(); + } +} + +/** + * Map a glsl_to_tgsi src register to a TGSI ureg_src register. + */ +static struct ureg_src +src_register(struct st_translate *t, + gl_register_file file, + GLuint index) +{ + switch(file) { + case PROGRAM_UNDEFINED: + return ureg_src_undef(); + + case PROGRAM_TEMPORARY: + assert(index >= 0); + assert(index < Elements(t->temps)); + if (ureg_dst_is_undef(t->temps[index])) + t->temps[index] = ureg_DECL_temporary(t->ureg); + return ureg_src(t->temps[index]); + + case PROGRAM_NAMED_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_UNIFORM: + assert(index >= 0); + return t->constants[index]; + case PROGRAM_STATE_VAR: + case PROGRAM_CONSTANT: /* ie, immediate */ + if (index < 0) + return ureg_DECL_constant(t->ureg, 0); + else + return t->constants[index]; + + case PROGRAM_IMMEDIATE: + return t->immediates[index]; + + case PROGRAM_INPUT: + assert(t->inputMapping[index] < Elements(t->inputs)); + return t->inputs[t->inputMapping[index]]; + + case PROGRAM_OUTPUT: + assert(t->outputMapping[index] < Elements(t->outputs)); + return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */ + + case PROGRAM_ADDRESS: + return ureg_src(t->address[index]); + + case PROGRAM_SYSTEM_VALUE: + assert(index < Elements(t->systemValues)); + return t->systemValues[index]; + + default: + assert(!"unknown src register file"); + return ureg_src_undef(); + } +} + +/** + * Create a TGSI ureg_dst register from an st_dst_reg. + */ +static struct ureg_dst +translate_dst(struct st_translate *t, + const st_dst_reg *dst_reg, + bool saturate) +{ + struct ureg_dst dst = dst_register(t, + dst_reg->file, + dst_reg->index); + + dst = ureg_writemask(dst, dst_reg->writemask); + + if (saturate) + dst = ureg_saturate(dst); + + if (dst_reg->reladdr != NULL) + dst = ureg_dst_indirect(dst, ureg_src(t->address[0])); + + return dst; +} + +/** + * Create a TGSI ureg_src register from an st_src_reg. + */ +static struct ureg_src +translate_src(struct st_translate *t, const st_src_reg *src_reg) +{ + struct ureg_src src = src_register(t, src_reg->file, src_reg->index); + + src = ureg_swizzle(src, + GET_SWZ(src_reg->swizzle, 0) & 0x3, + GET_SWZ(src_reg->swizzle, 1) & 0x3, + GET_SWZ(src_reg->swizzle, 2) & 0x3, + GET_SWZ(src_reg->swizzle, 3) & 0x3); + + if ((src_reg->negate & 0xf) == NEGATE_XYZW) + src = ureg_negate(src); + + if (src_reg->reladdr != NULL) { + /* Normally ureg_src_indirect() would be used here, but a stupid compiler + * bug in g++ makes ureg_src_indirect (an inline C function) erroneously + * set the bit for src.Negate. So we have to do the operation manually + * here to work around the compiler's problems. */ + /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/ + struct ureg_src addr = ureg_src(t->address[0]); + src.Indirect = 1; + src.IndirectFile = addr.File; + src.IndirectIndex = addr.Index; + src.IndirectSwizzle = addr.SwizzleX; + + if (src_reg->file != PROGRAM_INPUT && + src_reg->file != PROGRAM_OUTPUT) { + /* If src_reg->index was negative, it was set to zero in + * src_register(). Reassign it now. But don't do this + * for input/output regs since they get remapped while + * const buffers don't. + */ + src.Index = src_reg->index; + } + } + + return src; +} + +static void +compile_tgsi_instruction(struct st_translate *t, + const glsl_to_tgsi_instruction *inst) +{ + struct ureg_program *ureg = t->ureg; + GLuint i; + struct ureg_dst dst[1]; + struct ureg_src src[4]; + unsigned num_dst; + unsigned num_src; + + num_dst = num_inst_dst_regs(inst->op); + num_src = num_inst_src_regs(inst->op); + + if (num_dst) + dst[0] = translate_dst(t, + &inst->dst, + inst->saturate); + + for (i = 0; i < num_src; i++) + src[i] = translate_src(t, &inst->src[i]); + + switch(inst->op) { + case TGSI_OPCODE_BGNLOOP: + case TGSI_OPCODE_CAL: + case TGSI_OPCODE_ELSE: + case TGSI_OPCODE_ENDLOOP: + case TGSI_OPCODE_IF: + assert(num_dst == 0); + ureg_label_insn(ureg, + inst->op, + src, num_src, + get_label(t, + inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0)); + return; + + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXD: + case TGSI_OPCODE_TXL: + case TGSI_OPCODE_TXP: + case TGSI_OPCODE_TXQ: + case TGSI_OPCODE_TXF: + src[num_src++] = t->samplers[inst->sampler]; + ureg_tex_insn(ureg, + inst->op, + dst, num_dst, + translate_texture_target(inst->tex_target, inst->tex_shadow), + src, num_src); + return; + + case TGSI_OPCODE_SCS: + dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY); + ureg_insn(ureg, inst->op, dst, num_dst, src, num_src); + break; + + default: + ureg_insn(ureg, + inst->op, + dst, num_dst, + src, num_src); + break; + } +} + +/** + * Emit the TGSI instructions to adjust the WPOS pixel center convention + * Basically, add (adjX, adjY) to the fragment position. + */ +static void +emit_adjusted_wpos(struct st_translate *t, + const struct gl_program *program, + float adjX, float adjY) +{ + struct ureg_program *ureg = t->ureg; + struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); + struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; + + /* Note that we bias X and Y and pass Z and W through unchanged. + * The shader might also use gl_FragCoord.w and .z. + */ + ureg_ADD(ureg, wpos_temp, wpos_input, + ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f)); + + t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); +} + + +/** + * Emit the TGSI instructions for inverting the WPOS y coordinate. + * This code is unavoidable because it also depends on whether + * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). + */ +static void +emit_wpos_inversion(struct st_translate *t, + const struct gl_program *program, + bool invert) +{ + struct ureg_program *ureg = t->ureg; + + /* Fragment program uses fragment position input. + * Need to replace instances of INPUT[WPOS] with temp T + * where T = INPUT[WPOS] by y is inverted. + */ + static const gl_state_index wposTransformState[STATE_LENGTH] + = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, + (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; + + /* XXX: note we are modifying the incoming shader here! Need to + * do this before emitting the constant decls below, or this + * will be missed: + */ + unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, + wposTransformState); + + struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst); + struct ureg_dst wpos_temp; + struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; + + /* MOV wpos_temp, input[wpos] + */ + if (wpos_input.File == TGSI_FILE_TEMPORARY) + wpos_temp = ureg_dst(wpos_input); + else { + wpos_temp = ureg_DECL_temporary(ureg); + ureg_MOV(ureg, wpos_temp, wpos_input); + } + + if (invert) { + /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy + */ + ureg_MAD(ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), + wpos_input, + ureg_scalar(wpostrans, 0), + ureg_scalar(wpostrans, 1)); + } else { + /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww + */ + ureg_MAD(ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), + wpos_input, + ureg_scalar(wpostrans, 2), + ureg_scalar(wpostrans, 3)); + } + + /* Use wpos_temp as position input from here on: + */ + t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); +} + + +/** + * Emit fragment position/ooordinate code. + */ +static void +emit_wpos(struct st_context *st, + struct st_translate *t, + const struct gl_program *program, + struct ureg_program *ureg) +{ + const struct gl_fragment_program *fp = + (const struct gl_fragment_program *) program; + struct pipe_screen *pscreen = st->pipe->screen; + boolean invert = FALSE; + + if (fp->OriginUpperLeft) { + /* Fragment shader wants origin in upper-left */ + if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { + /* the driver supports upper-left origin */ + } + else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { + /* the driver supports lower-left origin, need to invert Y */ + ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); + invert = TRUE; + } + else + assert(0); + } + else { + /* Fragment shader wants origin in lower-left */ + if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) + /* the driver supports lower-left origin */ + ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); + else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) + /* the driver supports upper-left origin, need to invert Y */ + invert = TRUE; + else + assert(0); + } + + if (fp->PixelCenterInteger) { + /* Fragment shader wants pixel center integer */ + if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) + /* the driver supports pixel center integer */ + ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); + else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) + /* the driver supports pixel center half integer, need to bias X,Y */ + emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f); + else + assert(0); + } + else { + /* Fragment shader wants pixel center half integer */ + if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { + /* the driver supports pixel center half integer */ + } + else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { + /* the driver supports pixel center integer, need to bias X,Y */ + ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); + emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f); + } + else + assert(0); + } + + /* we invert after adjustment so that we avoid the MOV to temporary, + * and reuse the adjustment ADD instead */ + emit_wpos_inversion(t, program, invert); +} + +/** + * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back. + * TGSI uses +1 for front, -1 for back. + * This function converts the TGSI value to the GL value. Simply clamping/ + * saturating the value to [0,1] does the job. + */ +static void +emit_face_var(struct st_translate *t) +{ + struct ureg_program *ureg = t->ureg; + struct ureg_dst face_temp = ureg_DECL_temporary(ureg); + struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]]; + + /* MOV_SAT face_temp, input[face] */ + face_temp = ureg_saturate(face_temp); + ureg_MOV(ureg, face_temp, face_input); + + /* Use face_temp as face input from here on: */ + t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp); +} + +static void +emit_edgeflags(struct st_translate *t) +{ + struct ureg_program *ureg = t->ureg; + struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]]; + struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]]; + + ureg_MOV(ureg, edge_dst, edge_src); +} + +/** + * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. + * \param program the program to translate + * \param numInputs number of input registers used + * \param inputMapping maps Mesa fragment program inputs to TGSI generic + * input indexes + * \param inputSemanticName the TGSI_SEMANTIC flag for each input + * \param inputSemanticIndex the semantic index (ex: which texcoord) for + * each input + * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input + * \param numOutputs number of output registers used + * \param outputMapping maps Mesa fragment program outputs to TGSI + * generic outputs + * \param outputSemanticName the TGSI_SEMANTIC flag for each output + * \param outputSemanticIndex the semantic index (ex: which texcoord) for + * each output + * + * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY + */ +extern "C" enum pipe_error +st_translate_program( + struct gl_context *ctx, + uint procType, + struct ureg_program *ureg, + glsl_to_tgsi_visitor *program, + const struct gl_program *proginfo, + GLuint numInputs, + const GLuint inputMapping[], + const ubyte inputSemanticName[], + const ubyte inputSemanticIndex[], + const GLuint interpMode[], + GLuint numOutputs, + const GLuint outputMapping[], + const ubyte outputSemanticName[], + const ubyte outputSemanticIndex[], + boolean passthrough_edgeflags) +{ + struct st_translate translate, *t; + unsigned i; + enum pipe_error ret = PIPE_OK; + + assert(numInputs <= Elements(t->inputs)); + assert(numOutputs <= Elements(t->outputs)); + + t = &translate; + memset(t, 0, sizeof *t); + + t->procType = procType; + t->inputMapping = inputMapping; + t->outputMapping = outputMapping; + t->ureg = ureg; + t->pointSizeOutIndex = -1; + t->prevInstWrotePointSize = GL_FALSE; + + /* + * Declare input attributes. + */ + if (procType == TGSI_PROCESSOR_FRAGMENT) { + for (i = 0; i < numInputs; i++) { + t->inputs[i] = ureg_DECL_fs_input(ureg, + inputSemanticName[i], + inputSemanticIndex[i], + interpMode[i]); + } + + if (proginfo->InputsRead & FRAG_BIT_WPOS) { + /* Must do this after setting up t->inputs, and before + * emitting constant references, below: + */ + emit_wpos(st_context(ctx), t, proginfo, ureg); + } + + if (proginfo->InputsRead & FRAG_BIT_FACE) + emit_face_var(t); + + /* + * Declare output attributes. + */ + for (i = 0; i < numOutputs; i++) { + switch (outputSemanticName[i]) { + case TGSI_SEMANTIC_POSITION: + t->outputs[i] = ureg_DECL_output(ureg, + TGSI_SEMANTIC_POSITION, /* Z/Depth */ + outputSemanticIndex[i]); + t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z); + break; + case TGSI_SEMANTIC_STENCIL: + t->outputs[i] = ureg_DECL_output(ureg, + TGSI_SEMANTIC_STENCIL, /* Stencil */ + outputSemanticIndex[i]); + t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y); + break; + case TGSI_SEMANTIC_COLOR: + t->outputs[i] = ureg_DECL_output(ureg, + TGSI_SEMANTIC_COLOR, + outputSemanticIndex[i]); + break; + default: + assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR"); + return PIPE_ERROR_BAD_INPUT; + } + } + } + else if (procType == TGSI_PROCESSOR_GEOMETRY) { + for (i = 0; i < numInputs; i++) { + t->inputs[i] = ureg_DECL_gs_input(ureg, + i, + inputSemanticName[i], + inputSemanticIndex[i]); + } + + for (i = 0; i < numOutputs; i++) { + t->outputs[i] = ureg_DECL_output(ureg, + outputSemanticName[i], + outputSemanticIndex[i]); + } + } + else { + assert(procType == TGSI_PROCESSOR_VERTEX); + + for (i = 0; i < numInputs; i++) { + t->inputs[i] = ureg_DECL_vs_input(ureg, i); + } + + for (i = 0; i < numOutputs; i++) { + t->outputs[i] = ureg_DECL_output(ureg, + outputSemanticName[i], + outputSemanticIndex[i]); + if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) { + /* Writing to the point size result register requires special + * handling to implement clamping. + */ + static const gl_state_index pointSizeClampState[STATE_LENGTH] + = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; + /* XXX: note we are modifying the incoming shader here! Need to + * do this before emitting the constant decls below, or this + * will be missed. + */ + unsigned pointSizeClampConst = + _mesa_add_state_reference(proginfo->Parameters, + pointSizeClampState); + struct ureg_dst psizregtemp = ureg_DECL_temporary(ureg); + t->pointSizeConst = ureg_DECL_constant(ureg, pointSizeClampConst); + t->pointSizeResult = t->outputs[i]; + t->pointSizeOutIndex = i; + t->outputs[i] = psizregtemp; + } + } + if (passthrough_edgeflags) + emit_edgeflags(t); + } + + /* Declare address register. + */ + if (program->num_address_regs > 0) { + assert(program->num_address_regs == 1); + t->address[0] = ureg_DECL_address(ureg); + } + + /* Declare misc input registers + */ + { + GLbitfield sysInputs = proginfo->SystemValuesRead; + unsigned numSys = 0; + for (i = 0; sysInputs; i++) { + if (sysInputs & (1 << i)) { + unsigned semName = mesa_sysval_to_semantic[i]; + t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0); + numSys++; + sysInputs &= ~(1 << i); + } + } + } + + if (program->indirect_addr_temps) { + /* If temps are accessed with indirect addressing, declare temporaries + * in sequential order. Else, we declare them on demand elsewhere. + * (Note: the number of temporaries is equal to program->next_temp) + */ + for (i = 0; i < (unsigned)program->next_temp; i++) { + /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ + t->temps[i] = ureg_DECL_temporary(t->ureg); + } + } + + /* Emit constants and uniforms. TGSI uses a single index space for these, + * so we put all the translated regs in t->constants. + */ + if (proginfo->Parameters) { + t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0])); + if (t->constants == NULL) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out; + } + + for (i = 0; i < proginfo->Parameters->NumParameters; i++) { + switch (proginfo->Parameters->Parameters[i].Type) { + case PROGRAM_ENV_PARAM: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_UNIFORM: + t->constants[i] = ureg_DECL_constant(ureg, i); + break; + + /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect + * addressing of the const buffer. + * FIXME: Be smarter and recognize param arrays: + * indirect addressing is only valid within the referenced + * array. + */ + case PROGRAM_CONSTANT: + if (program->indirect_addr_consts) + t->constants[i] = ureg_DECL_constant(ureg, i); + else + t->constants[i] = emit_immediate(t, + proginfo->Parameters->ParameterValues[i], + proginfo->Parameters->Parameters[i].DataType, + 4); + break; + default: + break; + } + } + } + + /* Emit immediate values. + */ + t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src)); + if (t->immediates == NULL) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out; + } + i = 0; + foreach_iter(exec_list_iterator, iter, program->immediates) { + immediate_storage *imm = (immediate_storage *)iter.get(); + t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size); + } + + /* texture samplers */ + for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { + if (program->samplers_used & (1 << i)) { + t->samplers[i] = ureg_DECL_sampler(ureg, i); + } + } + + /* Emit each instruction in turn: + */ + foreach_iter(exec_list_iterator, iter, program->instructions) { + set_insn_start(t, ureg_get_instruction_number(ureg)); + compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get()); + + if (t->prevInstWrotePointSize && proginfo->Id) { + /* The previous instruction wrote to the (fake) vertex point size + * result register. Now we need to clamp that value to the min/max + * point size range, putting the result into the real point size + * register. + * Note that we can't do this easily at the end of program due to + * possible early return. + */ + set_insn_start(t, ureg_get_instruction_number(ureg)); + ureg_MAX(t->ureg, + ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X), + ureg_src(t->outputs[t->pointSizeOutIndex]), + ureg_swizzle(t->pointSizeConst, 1,1,1,1)); + ureg_MIN(t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X), + ureg_src(t->outputs[t->pointSizeOutIndex]), + ureg_swizzle(t->pointSizeConst, 2,2,2,2)); + } + t->prevInstWrotePointSize = GL_FALSE; + } + + /* Fix up all emitted labels: + */ + for (i = 0; i < t->labels_count; i++) { + ureg_fixup_label(ureg, t->labels[i].token, + t->insn[t->labels[i].branch_target]); + } + +out: + FREE(t->insn); + FREE(t->labels); + FREE(t->constants); + FREE(t->immediates); + + if (t->error) { + debug_printf("%s: translate error flag set\n", __FUNCTION__); + } + + return ret; +} +/* ----------------------------- End TGSI code ------------------------------ */ + +/** + * Convert a shader's GLSL IR into a Mesa gl_program, although without + * generating Mesa IR. + */ +static struct gl_program * +get_mesa_program(struct gl_context *ctx, + struct gl_shader_program *shader_program, + struct gl_shader *shader) +{ + glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor(); + struct gl_program *prog; + GLenum target; + const char *target_string; + bool progress; + struct gl_shader_compiler_options *options = + &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)]; + + switch (shader->Type) { + case GL_VERTEX_SHADER: + target = GL_VERTEX_PROGRAM_ARB; + target_string = "vertex"; + break; + case GL_FRAGMENT_SHADER: + target = GL_FRAGMENT_PROGRAM_ARB; + target_string = "fragment"; + break; + case GL_GEOMETRY_SHADER: + target = GL_GEOMETRY_PROGRAM_NV; + target_string = "geometry"; + break; + default: + assert(!"should not be reached"); + return NULL; + } + + validate_ir_tree(shader->ir); + + prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name); + if (!prog) + return NULL; + prog->Parameters = _mesa_new_parameter_list(); + prog->Varying = _mesa_new_parameter_list(); + prog->Attributes = _mesa_new_parameter_list(); + v->ctx = ctx; + v->prog = prog; + v->shader_program = shader_program; + v->options = options; + v->glsl_version = ctx->Const.GLSLVersion; + v->native_integers = ctx->Const.NativeIntegers; + + add_uniforms_to_parameters_list(shader_program, shader, prog); + + /* Emit intermediate IR for main(). */ + visit_exec_list(shader->ir, v); + + /* Now emit bodies for any functions that were used. */ + do { + progress = GL_FALSE; + + foreach_iter(exec_list_iterator, iter, v->function_signatures) { + function_entry *entry = (function_entry *)iter.get(); + + if (!entry->bgn_inst) { + v->current_function = entry; + + entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB); + entry->bgn_inst->function = entry; + + visit_exec_list(&entry->sig->body, v); + + glsl_to_tgsi_instruction *last; + last = (glsl_to_tgsi_instruction *)v->instructions.get_tail(); + if (last->op != TGSI_OPCODE_RET) + v->emit(NULL, TGSI_OPCODE_RET); + + glsl_to_tgsi_instruction *end; + end = v->emit(NULL, TGSI_OPCODE_ENDSUB); + end->function = entry; + + progress = GL_TRUE; + } + } + } while (progress); + +#if 0 + /* Print out some information (for debugging purposes) used by the + * optimization passes. */ + for (i=0; i < v->next_temp; i++) { + int fr = v->get_first_temp_read(i); + int fw = v->get_first_temp_write(i); + int lr = v->get_last_temp_read(i); + int lw = v->get_last_temp_write(i); + + printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw); + assert(fw <= fr); + } +#endif + + /* Remove reads to output registers, and to varyings in vertex shaders. */ + v->remove_output_reads(PROGRAM_OUTPUT); + if (target == GL_VERTEX_PROGRAM_ARB) + v->remove_output_reads(PROGRAM_VARYING); + + /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ + v->simplify_cmp(); + v->copy_propagate(); + while (v->eliminate_dead_code_advanced()); + + /* FIXME: These passes to optimize temporary registers don't work when there + * is indirect addressing of the temporary register space. We need proper + * array support so that we don't have to give up these passes in every + * shader that uses arrays. + */ + if (!v->indirect_addr_temps) { + v->eliminate_dead_code(); + v->merge_registers(); + v->renumber_registers(); + } + + /* Write the END instruction. */ + v->emit(NULL, TGSI_OPCODE_END); + + if (ctx->Shader.Flags & GLSL_DUMP) { + printf("\n"); + printf("GLSL IR for linked %s program %d:\n", target_string, + shader_program->Name); + _mesa_print_ir(shader->ir, NULL); + printf("\n"); + printf("\n"); + } + + prog->Instructions = NULL; + prog->NumInstructions = 0; + + do_set_program_inouts(shader->ir, prog); + count_resources(v, prog); + + check_resources(ctx, shader_program, v, prog); + + _mesa_reference_program(ctx, &shader->Program, prog); + + struct st_vertex_program *stvp; + struct st_fragment_program *stfp; + struct st_geometry_program *stgp; + + switch (shader->Type) { + case GL_VERTEX_SHADER: + stvp = (struct st_vertex_program *)prog; + stvp->glsl_to_tgsi = v; + break; + case GL_FRAGMENT_SHADER: + stfp = (struct st_fragment_program *)prog; + stfp->glsl_to_tgsi = v; + break; + case GL_GEOMETRY_SHADER: + stgp = (struct st_geometry_program *)prog; + stgp->glsl_to_tgsi = v; + break; + default: + assert(!"should not be reached"); + return NULL; + } + + return prog; +} + +extern "C" { + +struct gl_shader * +st_new_shader(struct gl_context *ctx, GLuint name, GLuint type) +{ + struct gl_shader *shader; + assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER || + type == GL_GEOMETRY_SHADER_ARB); + shader = rzalloc(NULL, struct gl_shader); + if (shader) { + shader->Type = type; + shader->Name = name; + _mesa_init_shader(ctx, shader); + } + return shader; +} + +struct gl_shader_program * +st_new_shader_program(struct gl_context *ctx, GLuint name) +{ + struct gl_shader_program *shProg; + shProg = rzalloc(NULL, struct gl_shader_program); + if (shProg) { + shProg->Name = name; + _mesa_init_shader_program(ctx, shProg); + } + return shProg; +} + +/** + * Link a shader. + * Called via ctx->Driver.LinkShader() + * This actually involves converting GLSL IR into an intermediate TGSI-like IR + * with code lowering and other optimizations. + */ +GLboolean +st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) +{ + assert(prog->LinkStatus); + + for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { + if (prog->_LinkedShaders[i] == NULL) + continue; + + bool progress; + exec_list *ir = prog->_LinkedShaders[i]->ir; + const struct gl_shader_compiler_options *options = + &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)]; + + do { + progress = false; + + /* Lowering */ + do_mat_op_to_vec(ir); + lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 + | LOG_TO_LOG2 + | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); + + progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; + + progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress; + + progress = lower_quadop_vector(ir, false) || progress; + + if (options->EmitNoIfs) { + progress = lower_discard(ir) || progress; + progress = lower_if_to_cond_assign(ir) || progress; + } + + if (options->EmitNoNoise) + progress = lower_noise(ir) || progress; + + /* If there are forms of indirect addressing that the driver + * cannot handle, perform the lowering pass. + */ + if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput + || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) + progress = + lower_variable_index_to_cond_assign(ir, + options->EmitNoIndirectInput, + options->EmitNoIndirectOutput, + options->EmitNoIndirectTemp, + options->EmitNoIndirectUniform) + || progress; + + progress = do_vec_index_to_cond_assign(ir) || progress; + } while (progress); + + validate_ir_tree(ir); + } + + for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { + struct gl_program *linked_prog; + + if (prog->_LinkedShaders[i] == NULL) + continue; + + linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]); + + if (linked_prog) { + bool ok = true; + + switch (prog->_LinkedShaders[i]->Type) { + case GL_VERTEX_SHADER: + _mesa_reference_vertprog(ctx, &prog->VertexProgram, + (struct gl_vertex_program *)linked_prog); + ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB, + linked_prog); + break; + case GL_FRAGMENT_SHADER: + _mesa_reference_fragprog(ctx, &prog->FragmentProgram, + (struct gl_fragment_program *)linked_prog); + ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB, + linked_prog); + break; + case GL_GEOMETRY_SHADER: + _mesa_reference_geomprog(ctx, &prog->GeometryProgram, + (struct gl_geometry_program *)linked_prog); + ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV, + linked_prog); + break; + } + if (!ok) { + return GL_FALSE; + } + } + + _mesa_reference_program(ctx, &linked_prog, NULL); + } + + return GL_TRUE; +} + + +/** + * Link a GLSL shader program. Called via glLinkProgram(). + */ +void +st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) +{ + unsigned int i; + + _mesa_clear_shader_program_data(ctx, prog); + + prog->LinkStatus = GL_TRUE; + + for (i = 0; i < prog->NumShaders; i++) { + if (!prog->Shaders[i]->CompileStatus) { + fail_link(prog, "linking with uncompiled shader"); + prog->LinkStatus = GL_FALSE; + } + } + + prog->Varying = _mesa_new_parameter_list(); + _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL); + _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL); + _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL); + + if (prog->LinkStatus) { + link_shaders(ctx, prog); + } + + if (prog->LinkStatus) { + if (!ctx->Driver.LinkShader(ctx, prog)) { + prog->LinkStatus = GL_FALSE; + } + } + + set_uniform_initializers(ctx, prog); + + if (ctx->Shader.Flags & GLSL_DUMP) { + if (!prog->LinkStatus) { + printf("GLSL shader program %d failed to link\n", prog->Name); + } + + if (prog->InfoLog && prog->InfoLog[0] != 0) { + printf("GLSL shader program %d info log:\n", prog->Name); + printf("%s\n", prog->InfoLog); + } + } +} + +} /* extern "C" */ diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h new file mode 100644 index 00000000000..d877471785d --- /dev/null +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h @@ -0,0 +1,72 @@ +/* + * Copyright © 2010 Intel Corporation + * Copyright © 2011 Bryan Cain + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "main/glheader.h" +#include "tgsi/tgsi_ureg.h" + +struct gl_context; +struct gl_shader; +struct gl_shader_program; +struct glsl_to_tgsi_visitor; + +enum pipe_error st_translate_program( + struct gl_context *ctx, + uint procType, + struct ureg_program *ureg, + struct glsl_to_tgsi_visitor *program, + const struct gl_program *proginfo, + GLuint numInputs, + const GLuint inputMapping[], + const ubyte inputSemanticName[], + const ubyte inputSemanticIndex[], + const GLuint interpMode[], + GLuint numOutputs, + const GLuint outputMapping[], + const ubyte outputSemanticName[], + const ubyte outputSemanticIndex[], + boolean passthrough_edgeflags); + +void free_glsl_to_tgsi_visitor(struct glsl_to_tgsi_visitor *v); +void get_pixel_transfer_visitor(struct st_fragment_program *fp, + struct glsl_to_tgsi_visitor *original, + int scale_and_bias, int pixel_maps); +void get_bitmap_visitor(struct st_fragment_program *fp, + struct glsl_to_tgsi_visitor *original, + int samplerIndex); + +struct gl_shader *st_new_shader(struct gl_context *ctx, GLuint name, GLuint type); + +struct gl_shader_program * +st_new_shader_program(struct gl_context *ctx, GLuint name); + +void st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog); +GLboolean st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog); + +#ifdef __cplusplus +} +#endif diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c index 7bd82aae206..d5228d387f7 100644 --- a/src/mesa/state_tracker/st_manager.c +++ b/src/mesa/state_tracker/st_manager.c @@ -587,7 +587,7 @@ st_context_teximage(struct st_context_iface *stctxi, internalFormat = GL_RGB; texFormat = st_ChooseTextureFormat(ctx, internalFormat, - GL_RGBA, GL_UNSIGNED_BYTE); + GL_BGRA, GL_UNSIGNED_BYTE); _mesa_init_teximage_fields(ctx, target, texImage, tex->width0, tex->height0, 1, 0, diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index a41e5b16a85..656c985d78f 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -267,7 +267,7 @@ src_register( struct st_translate *t, /** * Map mesa texture target to TGSI texture target. */ -static unsigned +unsigned translate_texture_target( GLuint textarget, GLboolean shadow ) { @@ -511,7 +511,7 @@ static void emit_ddy( struct st_translate *t, -static unsigned +unsigned translate_opcode( unsigned op ) { switch( op ) { @@ -1207,7 +1207,7 @@ st_translate_mesa_program( else t->constants[i] = ureg_DECL_immediate( ureg, - program->Parameters->ParameterValues[i], + (const float*) program->Parameters->ParameterValues[i], 4 ); break; default: diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.h b/src/mesa/state_tracker/st_mesa_to_tgsi.h index 0615e52ef62..0dbdf5f6159 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.h +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.h @@ -64,6 +64,12 @@ st_translate_mesa_program( void st_free_tokens(const struct tgsi_token *tokens); +unsigned +translate_opcode(unsigned op); + +unsigned +translate_texture_target(GLuint textarget, GLboolean shadow); + #if defined __cplusplus } /* extern "C" */ diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 132ebdbadc9..a4f47edfcd3 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -174,8 +174,8 @@ st_release_gp_variants(struct st_context *st, struct st_geometry_program *stgp) * \param tokensOut destination for TGSI tokens * \return pointer to cached pipe_shader object. */ -static void -st_prepare_vertex_program(struct st_context *st, +void +st_prepare_vertex_program(struct gl_context *ctx, struct st_vertex_program *stvp) { GLuint attr; @@ -184,9 +184,10 @@ st_prepare_vertex_program(struct st_context *st, stvp->num_outputs = 0; if (stvp->Base.IsPositionInvariant) - _mesa_insert_mvp_code(st->ctx, &stvp->Base); + _mesa_insert_mvp_code(ctx, &stvp->Base); - assert(stvp->Base.Base.NumInstructions > 1); + if (!stvp->glsl_to_tgsi) + assert(stvp->Base.Base.NumInstructions > 1); /* * Determine number of inputs, the mappings between VERT_ATTRIB_x @@ -292,10 +293,13 @@ st_translate_vertex_program(struct st_context *st, enum pipe_error error; unsigned num_outputs; - st_prepare_vertex_program( st, stvp ); + st_prepare_vertex_program(st->ctx, stvp); - _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT); - _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_VARYING); + if (!stvp->glsl_to_tgsi) + { + _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT); + _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_VARYING); + } ureg = ureg_create( TGSI_PROCESSOR_VERTEX ); if (ureg == NULL) { @@ -318,22 +322,41 @@ st_translate_vertex_program(struct st_context *st, debug_printf("\n"); } - error = st_translate_mesa_program(st->ctx, - TGSI_PROCESSOR_VERTEX, - ureg, - &stvp->Base.Base, - /* inputs */ - vpv->num_inputs, - stvp->input_to_index, - NULL, /* input semantic name */ - NULL, /* input semantic index */ - NULL, - /* outputs */ - num_outputs, - stvp->result_to_output, - stvp->output_semantic_name, - stvp->output_semantic_index, - key->passthrough_edgeflags ); + if (stvp->glsl_to_tgsi) + error = st_translate_program(st->ctx, + TGSI_PROCESSOR_VERTEX, + ureg, + stvp->glsl_to_tgsi, + &stvp->Base.Base, + /* inputs */ + stvp->num_inputs, + stvp->input_to_index, + NULL, /* input semantic name */ + NULL, /* input semantic index */ + NULL, /* interp mode */ + /* outputs */ + stvp->num_outputs, + stvp->result_to_output, + stvp->output_semantic_name, + stvp->output_semantic_index, + key->passthrough_edgeflags ); + else + error = st_translate_mesa_program(st->ctx, + TGSI_PROCESSOR_VERTEX, + ureg, + &stvp->Base.Base, + /* inputs */ + vpv->num_inputs, + stvp->input_to_index, + NULL, /* input semantic name */ + NULL, /* input semantic index */ + NULL, + /* outputs */ + num_outputs, + stvp->result_to_output, + stvp->output_semantic_name, + stvp->output_semantic_index, + key->passthrough_edgeflags ); if (error) goto fail; @@ -451,6 +474,7 @@ st_translate_fragment_program(struct st_context *st, GLuint attr; const GLbitfield inputsRead = stfp->Base.Base.InputsRead; struct ureg_program *ureg; + GLboolean write_all = GL_FALSE; ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; @@ -460,9 +484,9 @@ st_translate_fragment_program(struct st_context *st, ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; uint fs_num_outputs = 0; - - - _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT); + + if (!stfp->glsl_to_tgsi) + _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT); /* * Convert Mesa program inputs to TGSI input register semantics. @@ -605,21 +629,39 @@ st_translate_fragment_program(struct st_context *st, if (write_all == GL_TRUE) ureg_property_fs_color0_writes_all_cbufs(ureg, 1); - st_translate_mesa_program(st->ctx, - TGSI_PROCESSOR_FRAGMENT, - ureg, - &stfp->Base.Base, - /* inputs */ - fs_num_inputs, - inputMapping, - input_semantic_name, - input_semantic_index, - interpMode, - /* outputs */ - fs_num_outputs, - outputMapping, - fs_output_semantic_name, - fs_output_semantic_index, FALSE ); + if (stfp->glsl_to_tgsi) + st_translate_program(st->ctx, + TGSI_PROCESSOR_FRAGMENT, + ureg, + stfp->glsl_to_tgsi, + &stfp->Base.Base, + /* inputs */ + fs_num_inputs, + inputMapping, + input_semantic_name, + input_semantic_index, + interpMode, + /* outputs */ + fs_num_outputs, + outputMapping, + fs_output_semantic_name, + fs_output_semantic_index, FALSE ); + else + st_translate_mesa_program(st->ctx, + TGSI_PROCESSOR_FRAGMENT, + ureg, + &stfp->Base.Base, + /* inputs */ + fs_num_inputs, + inputMapping, + input_semantic_name, + input_semantic_index, + interpMode, + /* outputs */ + fs_num_outputs, + outputMapping, + fs_output_semantic_name, + fs_output_semantic_index, FALSE ); stfp->tgsi.tokens = ureg_get_tokens( ureg, NULL ); ureg_destroy( ureg ); diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index c4244df939e..699b6e8ccb7 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -38,6 +38,7 @@ #include "program/program.h" #include "pipe/p_state.h" #include "st_context.h" +#include "st_glsl_to_tgsi.h" /** Fragment program variant key */ @@ -83,6 +84,7 @@ struct st_fp_variant struct st_fragment_program { struct gl_fragment_program Base; + struct glsl_to_tgsi_visitor* glsl_to_tgsi; struct pipe_shader_state tgsi; @@ -136,6 +138,7 @@ struct st_vp_variant struct st_vertex_program { struct gl_vertex_program Base; /**< The Mesa vertex program */ + struct glsl_to_tgsi_visitor* glsl_to_tgsi; /** maps a Mesa VERT_ATTRIB_x to a packed TGSI input index */ GLuint input_to_index[VERT_ATTRIB_MAX]; @@ -184,6 +187,7 @@ struct st_gp_variant struct st_geometry_program { struct gl_geometry_program Base; /**< The Mesa geometry program */ + struct glsl_to_tgsi_visitor* glsl_to_tgsi; /** map GP input back to VP output */ GLuint input_map[PIPE_MAX_SHADER_INPUTS]; @@ -276,6 +280,14 @@ st_get_gp_variant(struct st_context *st, const struct st_gp_variant_key *key); +extern void +st_prepare_vertex_program(struct gl_context *ctx, + struct st_vertex_program *stvp); + +extern GLboolean +st_prepare_fragment_program(struct gl_context *ctx, + struct st_fragment_program *stfp); + extern void st_release_vp_variants( struct st_context *st, diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index ffe7e256a56..232c286c1d1 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -221,8 +221,8 @@ st_texture_image_map(struct st_context *st, struct st_texture_image *stImage, DBG("%s \n", __FUNCTION__); - stImage->transfer = pipe_get_transfer(st->pipe, pt, stImage->level, - stImage->face + zoffset, + stImage->transfer = pipe_get_transfer(st->pipe, pt, stImage->base.Level, + stImage->base.Face + zoffset, usage, x, y, w, h); if (stImage->transfer) @@ -396,3 +396,23 @@ st_texture_image_copy(struct pipe_context *pipe, } } + +struct pipe_resource * +st_create_color_map_texture(struct gl_context *ctx) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct pipe_resource *pt; + enum pipe_format format; + const uint texSize = 256; /* simple, and usually perfect */ + + /* find an RGBA texture format */ + format = st_choose_format(pipe->screen, GL_RGBA, GL_NONE, GL_NONE, + PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW); + + /* create texture for color map/table */ + pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0, + texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW); + return pt; +} + diff --git a/src/mesa/state_tracker/st_texture.h b/src/mesa/state_tracker/st_texture.h index d50c3c9af79..50b7284e760 100644 --- a/src/mesa/state_tracker/st_texture.h +++ b/src/mesa/state_tracker/st_texture.h @@ -45,11 +45,6 @@ struct st_texture_image { struct gl_texture_image base; - /* These aren't stored in gl_texture_image - */ - GLuint level; - GLuint face; - /* If stImage->pt != NULL, image data is stored here. * Else if stImage->base.Data != NULL, image is stored there. * Else there is no image data. @@ -232,4 +227,8 @@ st_texture_image_copy(struct pipe_context *pipe, struct pipe_resource *src, GLuint srcLevel, GLuint face); + +extern struct pipe_resource * +st_create_color_map_texture(struct gl_context *ctx); + #endif diff --git a/src/mesa/swrast/s_aatritemp.h b/src/mesa/swrast/s_aatritemp.h index 91d4f7a10ab..77b3ae6ec7a 100644 --- a/src/mesa/swrast/s_aatritemp.h +++ b/src/mesa/swrast/s_aatritemp.h @@ -181,13 +181,20 @@ const GLfloat *pMax = vMax->attrib[FRAG_ATTRIB_WPOS]; const GLfloat dxdy = majDx / majDy; const GLfloat xAdj = dxdy < 0.0F ? -dxdy : 0.0F; - GLfloat x = pMin[0] - (yMin - iyMin) * dxdy; GLint iy; - for (iy = iyMin; iy < iyMax; iy++, x += dxdy) { +#ifdef _OPENMP +#pragma omp parallel for schedule(dynamic) private(iy) firstprivate(span) +#endif + for (iy = iyMin; iy < iyMax; iy++) { + GLfloat x = pMin[0] - (yMin - iy) * dxdy; GLint ix, startX = (GLint) (x - xAdj); GLuint count; GLfloat coverage = 0.0F; +#ifdef _OPENMP + /* each thread needs to use a different (global) SpanArrays variable */ + span.array = SWRAST_CONTEXT(ctx)->SpanArrays + omp_get_thread_num(); +#endif /* skip over fragments with zero coverage */ while (startX < MAX_WIDTH) { coverage = compute_coveragef(pMin, pMid, pMax, startX, iy); @@ -228,13 +235,12 @@ coverage = compute_coveragef(pMin, pMid, pMax, ix, iy); } - if (ix <= startX) - continue; - - span.x = startX; - span.y = iy; - span.end = (GLuint) ix - (GLuint) startX; - _swrast_write_rgba_span(ctx, &span); + if (ix > startX) { + span.x = startX; + span.y = iy; + span.end = (GLuint) ix - (GLuint) startX; + _swrast_write_rgba_span(ctx, &span); + } } } else { @@ -244,13 +250,20 @@ const GLfloat *pMax = vMax->attrib[FRAG_ATTRIB_WPOS]; const GLfloat dxdy = majDx / majDy; const GLfloat xAdj = dxdy > 0 ? dxdy : 0.0F; - GLfloat x = pMin[0] - (yMin - iyMin) * dxdy; GLint iy; - for (iy = iyMin; iy < iyMax; iy++, x += dxdy) { +#ifdef _OPENMP +#pragma omp parallel for schedule(dynamic) private(iy) firstprivate(span) +#endif + for (iy = iyMin; iy < iyMax; iy++) { + GLfloat x = pMin[0] - (yMin - iy) * dxdy; GLint ix, left, startX = (GLint) (x + xAdj); GLuint count, n; GLfloat coverage = 0.0F; +#ifdef _OPENMP + /* each thread needs to use a different (global) SpanArrays variable */ + span.array = SWRAST_CONTEXT(ctx)->SpanArrays + omp_get_thread_num(); +#endif /* make sure we're not past the window edge */ if (startX >= ctx->DrawBuffer->_Xmax) { startX = ctx->DrawBuffer->_Xmax - 1; @@ -296,31 +309,30 @@ ATTRIB_LOOP_END #endif - if (startX <= ix) - continue; - - n = (GLuint) startX - (GLuint) ix; + if (startX > ix) { + n = (GLuint) startX - (GLuint) ix; - left = ix + 1; + left = ix + 1; - /* shift all values to the left */ - /* XXX this is temporary */ - { - SWspanarrays *array = span.array; - GLint j; - for (j = 0; j < (GLint) n; j++) { - array->coverage[j] = array->coverage[j + left]; - COPY_CHAN4(array->rgba[j], array->rgba[j + left]); + /* shift all values to the left */ + /* XXX this is temporary */ + { + SWspanarrays *array = span.array; + GLint j; + for (j = 0; j < (GLint) n; j++) { + array->coverage[j] = array->coverage[j + left]; + COPY_CHAN4(array->rgba[j], array->rgba[j + left]); #ifdef DO_Z - array->z[j] = array->z[j + left]; + array->z[j] = array->z[j + left]; #endif + } } - } - span.x = left; - span.y = iy; - span.end = n; - _swrast_write_rgba_span(ctx, &span); + span.x = left; + span.y = iy; + span.end = n; + _swrast_write_rgba_span(ctx, &span); + } } } } diff --git a/src/mesa/swrast/s_context.c b/src/mesa/swrast/s_context.c index def1531d7ff..792b528ee34 100644 --- a/src/mesa/swrast/s_context.c +++ b/src/mesa/swrast/s_context.c @@ -417,84 +417,6 @@ _swrast_validate_blend_func(struct gl_context *ctx, GLuint n, const GLubyte mask swrast->BlendFunc( ctx, n, mask, src, dst, chanType ); } - -/** - * Make sure we have texture image data for all the textures we may need - * for subsequent rendering. - */ -static void -_swrast_validate_texture_images(struct gl_context *ctx) -{ - SWcontext *swrast = SWRAST_CONTEXT(ctx); - GLuint u; - - if (!swrast->ValidateTextureImage || !ctx->Texture._EnabledUnits) { - /* no textures enabled, or no way to validate images! */ - return; - } - - for (u = 0; u < ctx->Const.MaxTextureImageUnits; u++) { - if (ctx->Texture.Unit[u]._ReallyEnabled) { - struct gl_texture_object *texObj = ctx->Texture.Unit[u]._Current; - ASSERT(texObj); - if (texObj) { - GLuint numFaces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; - GLuint face; - for (face = 0; face < numFaces; face++) { - GLint lvl; - for (lvl = texObj->BaseLevel; lvl <= texObj->_MaxLevel; lvl++) { - struct gl_texture_image *texImg = texObj->Image[face][lvl]; - if (texImg && !texImg->Data) { - swrast->ValidateTextureImage(ctx, texObj, face, lvl); - ASSERT(texObj->Image[face][lvl]->Data); - } - } - } - } - } - } -} - - -/** - * Free the texture image data attached to all currently enabled - * textures. Meant to be called by device drivers when transitioning - * from software to hardware rendering. - */ -void -_swrast_eject_texture_images(struct gl_context *ctx) -{ - GLuint u; - - if (!ctx->Texture._EnabledUnits) { - /* no textures enabled */ - return; - } - - for (u = 0; u < ctx->Const.MaxTextureImageUnits; u++) { - if (ctx->Texture.Unit[u]._ReallyEnabled) { - struct gl_texture_object *texObj = ctx->Texture.Unit[u]._Current; - ASSERT(texObj); - if (texObj) { - GLuint numFaces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; - GLuint face; - for (face = 0; face < numFaces; face++) { - GLint lvl; - for (lvl = texObj->BaseLevel; lvl <= texObj->_MaxLevel; lvl++) { - struct gl_texture_image *texImg = texObj->Image[face][lvl]; - if (texImg && texImg->Data) { - _mesa_free_texmemory(texImg->Data); - texImg->Data = NULL; - } - } - } - } - } - } -} - - - static void _swrast_sleep( struct gl_context *ctx, GLbitfield new_state ) { @@ -640,7 +562,6 @@ _swrast_validate_derived( struct gl_context *ctx ) if (swrast->NewState & (_NEW_TEXTURE | _NEW_PROGRAM)) { _swrast_update_texture_samplers( ctx ); - _swrast_validate_texture_images(ctx); } if (swrast->NewState & (_NEW_COLOR | _NEW_PROGRAM)) @@ -772,6 +693,11 @@ _swrast_CreateContext( struct gl_context *ctx ) { GLuint i; SWcontext *swrast = (SWcontext *)CALLOC(sizeof(SWcontext)); +#ifdef _OPENMP + const GLint maxThreads = omp_get_max_threads(); +#else + const GLint maxThreads = 1; +#endif if (SWRAST_DEBUG) { _mesa_debug(ctx, "_swrast_CreateContext\n"); @@ -806,19 +732,25 @@ _swrast_CreateContext( struct gl_context *ctx ) for (i = 0; i < MAX_TEXTURE_IMAGE_UNITS; i++) swrast->TextureSample[i] = NULL; - swrast->SpanArrays = MALLOC_STRUCT(sw_span_arrays); + /* SpanArrays is global and shared by all SWspan instances. However, when + * using multiple threads, it is necessary to have one SpanArrays instance + * per thread. + */ + swrast->SpanArrays = (SWspanarrays *) MALLOC(maxThreads * sizeof(SWspanarrays)); if (!swrast->SpanArrays) { FREE(swrast); return GL_FALSE; } - swrast->SpanArrays->ChanType = CHAN_TYPE; + for(i = 0; i < maxThreads; i++) { + swrast->SpanArrays[i].ChanType = CHAN_TYPE; #if CHAN_TYPE == GL_UNSIGNED_BYTE - swrast->SpanArrays->rgba = swrast->SpanArrays->rgba8; + swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].rgba8; #elif CHAN_TYPE == GL_UNSIGNED_SHORT - swrast->SpanArrays->rgba = swrast->SpanArrays->rgba16; + swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].rgba16; #else - swrast->SpanArrays->rgba = swrast->SpanArrays->attribs[FRAG_ATTRIB_COL0]; + swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].attribs[FRAG_ATTRIB_COL0]; #endif + } /* init point span buffer */ swrast->PointSpan.primitive = GL_POINT; @@ -826,7 +758,10 @@ _swrast_CreateContext( struct gl_context *ctx ) swrast->PointSpan.facing = 0; swrast->PointSpan.array = swrast->SpanArrays; - swrast->TexelBuffer = (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits * + /* TexelBuffer is also global and normally shared by all SWspan instances; + * when running with multiple threads, create one per thread. + */ + swrast->TexelBuffer = (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits * maxThreads * MAX_WIDTH * 4 * sizeof(GLfloat)); if (!swrast->TexelBuffer) { FREE(swrast->SpanArrays); diff --git a/src/mesa/swrast/s_span.c b/src/mesa/swrast/s_span.c index db102ac7946..9a91be39970 100644 --- a/src/mesa/swrast/s_span.c +++ b/src/mesa/swrast/s_span.c @@ -212,10 +212,10 @@ interpolate_active_attribs(struct gl_context *ctx, SWspan *span, GLbitfield attr static INLINE void interpolate_int_colors(struct gl_context *ctx, SWspan *span) { +#if CHAN_BITS != 32 const GLuint n = span->end; GLuint i; -#if CHAN_BITS != 32 ASSERT(!(span->arrayMask & SPAN_RGBA)); #endif diff --git a/src/mesa/swrast/s_stencil.c b/src/mesa/swrast/s_stencil.c index 5bec71c057b..fa5093a3407 100644 --- a/src/mesa/swrast/s_stencil.c +++ b/src/mesa/swrast/s_stencil.c @@ -462,7 +462,8 @@ stencil_and_ztest_span(struct gl_context *ctx, SWspan *span, GLuint face) * Some fragments passed the stencil test, apply depth test to them * and apply Zpass and Zfail stencil ops. */ - if (ctx->Depth.Test == GL_FALSE) { + if (ctx->Depth.Test == GL_FALSE || + ctx->DrawBuffer->_DepthBuffer == NULL) { /* * No depth buffer, just apply zpass stencil function to active pixels. */ diff --git a/src/mesa/swrast/s_texcombine.c b/src/mesa/swrast/s_texcombine.c index 086ed0b33d7..80b9dff3cc2 100644 --- a/src/mesa/swrast/s_texcombine.c +++ b/src/mesa/swrast/s_texcombine.c @@ -48,7 +48,11 @@ typedef float (*float4_array)[4]; static INLINE float4_array get_texel_array(SWcontext *swrast, GLuint unit) { +#ifdef _OPENMP + return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4 * omp_get_num_threads() + (MAX_WIDTH * 4 * omp_get_thread_num())); +#else return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4); +#endif } diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c index b1967e65417..86af4b7cfe2 100644 --- a/src/mesa/tnl/t_draw.c +++ b/src/mesa/tnl/t_draw.c @@ -280,10 +280,9 @@ static void bind_inputs( struct gl_context *ctx, if (!inputs[i]->BufferObj->Pointer) { bo[*nr_bo] = inputs[i]->BufferObj; (*nr_bo)++; - ctx->Driver.MapBuffer(ctx, - GL_ARRAY_BUFFER, - GL_READ_ONLY_ARB, - inputs[i]->BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, inputs[i]->BufferObj->Size, + GL_MAP_READ_BIT, + inputs[i]->BufferObj); assert(inputs[i]->BufferObj->Pointer); } @@ -348,18 +347,32 @@ static void bind_indices( struct gl_context *ctx, } if (ib->obj->Name && !ib->obj->Pointer) { + unsigned map_size; + + switch (ib->type) { + case GL_UNSIGNED_BYTE: + map_size = ib->count * sizeof(GLubyte); + break; + case GL_UNSIGNED_SHORT: + map_size = ib->count * sizeof(GLushort); + break; + case GL_UNSIGNED_INT: + map_size = ib->count * sizeof(GLuint); + break; + default: + assert(0); + map_size = 0; + } + bo[*nr_bo] = ib->obj; (*nr_bo)++; - ctx->Driver.MapBuffer(ctx, - GL_ELEMENT_ARRAY_BUFFER, - GL_READ_ONLY_ARB, - ib->obj); - + ptr = ctx->Driver.MapBufferRange(ctx, (GLsizeiptr) ib->ptr, map_size, + GL_MAP_READ_BIT, ib->obj); assert(ib->obj->Pointer); + } else { + ptr = ib->ptr; } - ptr = ADD_POINTERS(ib->obj->Pointer, ib->ptr); - if (ib->type == GL_UNSIGNED_INT && VB->Primitive[0].basevertex == 0) { VB->Elts = (GLuint *) ptr; } @@ -402,9 +415,7 @@ static void unmap_vbos( struct gl_context *ctx, { GLuint i; for (i = 0; i < nr_bo; i++) { - ctx->Driver.UnmapBuffer(ctx, - 0, /* target -- I don't see why this would be needed */ - bo[i]); + ctx->Driver.UnmapBuffer(ctx, bo[i]); } } diff --git a/src/mesa/tnl/t_pipeline.c b/src/mesa/tnl/t_pipeline.c index 18f095f0d4b..881d5d5f535 100644 --- a/src/mesa/tnl/t_pipeline.c +++ b/src/mesa/tnl/t_pipeline.c @@ -146,7 +146,17 @@ void _tnl_run_pipeline( struct gl_context *ctx ) _tnl_notify_pipeline_output_change( ctx ); } +#ifndef _OPENMP + /* Don't adjust FPU precision mode in case multiple threads are to be used. + * This would require that the additional threads also changed the FPU mode + * which is quite a mess as this had to be done in all parallelized sections; + * otherwise the master thread and all other threads are running in different + * modes, producing inconsistent results. + * Note that all x64 implementations don't define/use START_FAST_MATH, so + * this is "hack" is only used in i386 mode + */ START_FAST_MATH(__tmp); +#endif for (i = 0; i < tnl->pipeline.nr_stages ; i++) { struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i]; @@ -154,7 +164,9 @@ void _tnl_run_pipeline( struct gl_context *ctx ) break; } +#ifndef _OPENMP END_FAST_MATH(__tmp); +#endif } diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c index 2b8d38ef283..8474c787a46 100644 --- a/src/mesa/vbo/vbo_exec_api.c +++ b/src/mesa/vbo/vbo_exec_api.c @@ -431,6 +431,24 @@ do { \ #include "vbo_attrib_tmp.h" +/** + * Flush (draw) vertices. + * \param unmap - leave VBO unmapped after flushing? + */ +static void +vbo_exec_FlushVertices_internal(struct vbo_exec_context *exec, GLboolean unmap) +{ + if (exec->vtx.vert_count || unmap) { + vbo_exec_vtx_flush( exec, unmap ); + } + + if (exec->vtx.vertex_size) { + vbo_exec_copy_to_current( exec ); + reset_attrfv( exec ); + } +} + + #if FEATURE_beginend @@ -535,24 +553,6 @@ static void GLAPIENTRY vbo_exec_EvalPoint2( GLint i, GLint j ) /** - * Flush (draw) vertices. - * \param unmap - leave VBO unmapped after flushing? - */ -static void -vbo_exec_FlushVertices_internal(struct vbo_exec_context *exec, GLboolean unmap) -{ - if (exec->vtx.vert_count || unmap) { - vbo_exec_vtx_flush( exec, unmap ); - } - - if (exec->vtx.vertex_size) { - vbo_exec_copy_to_current( exec ); - reset_attrfv( exec ); - } -} - - -/** * Called via glBegin. */ static void GLAPIENTRY vbo_exec_Begin( GLenum mode ) @@ -947,7 +947,7 @@ void vbo_exec_vtx_destroy( struct vbo_exec_context *exec ) /* Free the vertex buffer. Unmap first if needed. */ if (_mesa_bufferobj_mapped(exec->vtx.bufferobj)) { - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, exec->vtx.bufferobj); + ctx->Driver.UnmapBuffer(ctx, exec->vtx.bufferobj); } _mesa_reference_buffer_object(ctx, &exec->vtx.bufferobj, NULL); } diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index b908d5aea7e..18719d5f537 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -95,10 +95,25 @@ vbo_get_minmax_index(struct gl_context *ctx, GLuint i; if (_mesa_is_bufferobj(ib->obj)) { - const GLvoid *map = - ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, - GL_READ_ONLY, ib->obj); - indices = ADD_POINTERS(map, ib->ptr); + unsigned map_size; + + switch (ib->type) { + case GL_UNSIGNED_INT: + map_size = count * sizeof(GLuint); + break; + case GL_UNSIGNED_SHORT: + map_size = count * sizeof(GLushort); + break; + case GL_UNSIGNED_BYTE: + map_size = count * sizeof(GLubyte); + break; + default: + assert(0); + map_size = 0; + } + + indices = ctx->Driver.MapBufferRange(ctx, (GLsizeiptr) ib->ptr, map_size, + GL_MAP_READ_BIT, ib->obj); } else { indices = ib->ptr; } @@ -176,7 +191,7 @@ vbo_get_minmax_index(struct gl_context *ctx, } if (_mesa_is_bufferobj(ib->obj)) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, ib->obj); + ctx->Driver.UnmapBuffer(ctx, ib->obj); } } @@ -196,8 +211,8 @@ check_array_data(struct gl_context *ctx, struct gl_client_array *array, if (!array->BufferObj->Pointer) { /* need to map now */ array->BufferObj->Pointer = - ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER_ARB, - GL_READ_ONLY, array->BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, array->BufferObj->Size, + GL_MAP_READ_BIT, array->BufferObj); } data = ADD_POINTERS(data, array->BufferObj->Pointer); } @@ -238,7 +253,7 @@ unmap_array_buffer(struct gl_context *ctx, struct gl_client_array *array) if (array->Enabled && _mesa_is_bufferobj(array->BufferObj) && _mesa_bufferobj_mapped(array->BufferObj)) { - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, array->BufferObj); + ctx->Driver.UnmapBuffer(ctx, array->BufferObj); } } @@ -256,10 +271,10 @@ check_draw_elements_data(struct gl_context *ctx, GLsizei count, GLenum elemType, GLint i, k; if (_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) { - elemMap = ctx->Driver.MapBuffer(ctx, - GL_ELEMENT_ARRAY_BUFFER_ARB, - GL_READ_ONLY, - ctx->Array.ElementArrayBufferObj); + elemMap = ctx->Driver.MapBufferRange(ctx, 0, + ctx->Array.ElementArrayBufferObj->Size, + GL_MAP_READ_BIT, + ctx->Array.ElementArrayBufferObj); elements = ADD_POINTERS(elements, elemMap); } @@ -296,8 +311,7 @@ check_draw_elements_data(struct gl_context *ctx, GLsizei count, GLenum elemType, } if (_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, - ctx->Array.ElementArrayBufferObj); + ctx->Driver.UnmapBuffer(ctx, ctx->Array.ElementArrayBufferObj); } unmap_array_buffer(ctx, &arrayObj->Vertex); @@ -351,8 +365,8 @@ print_draw_arrays(struct gl_context *ctx, bufName); if (bufName) { - GLubyte *p = ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER_ARB, - GL_READ_ONLY_ARB, bufObj); + GLubyte *p = ctx->Driver.MapBufferRange(ctx, 0, bufObj->Size, + GL_MAP_READ_BIT, bufObj); int offset = (int) (GLintptr) exec->array.inputs[i]->Ptr; float *f = (float *) (p + offset); int *k = (int *) f; @@ -364,7 +378,7 @@ print_draw_arrays(struct gl_context *ctx, for (i = 0; i < n; i++) { printf(" float[%d] = 0x%08x %f\n", i, k[i], f[i]); } - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, bufObj); + ctx->Driver.UnmapBuffer(ctx, bufObj); } } } @@ -715,10 +729,11 @@ vbo_exec_DrawArraysInstanced(GLenum mode, GLint start, GLsizei count, static void dump_element_buffer(struct gl_context *ctx, GLenum type) { - const GLvoid *map = ctx->Driver.MapBuffer(ctx, - GL_ELEMENT_ARRAY_BUFFER_ARB, - GL_READ_ONLY, - ctx->Array.ElementArrayBufferObj); + const GLvoid *map = + ctx->Driver.MapBufferRange(ctx, 0, + ctx->Array.ElementArrayBufferObj->Size, + GL_MAP_READ_BIT, + ctx->Array.ElementArrayBufferObj); switch (type) { case GL_UNSIGNED_BYTE: { @@ -760,8 +775,7 @@ dump_element_buffer(struct gl_context *ctx, GLenum type) ; } - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, - ctx->Array.ElementArrayBufferObj); + ctx->Driver.UnmapBuffer(ctx, ctx->Array.ElementArrayBufferObj); } @@ -909,11 +923,10 @@ vbo_exec_DrawRangeElementsBaseVertex(GLenum mode, if (0) _mesa_print_arrays(ctx); -#ifdef DEBUG /* 'end' was out of bounds, but now let's check the actual array * indexes to see if any of them are out of bounds. */ - { + if (0) { GLuint max = _mesa_max_buffer_index(ctx, count, type, indices, ctx->Array.ElementArrayBufferObj); if (max >= ctx->Array.ArrayObj->_MaxElement) { @@ -934,7 +947,6 @@ vbo_exec_DrawRangeElementsBaseVertex(GLenum mode, * upper bound wrong. */ } -#endif /* Set 'end' to the max possible legal value */ assert(ctx->Array.ArrayObj->_MaxElement >= 1); diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c index 7e8d8602093..8ffaaaa4876 100644 --- a/src/mesa/vbo/vbo_exec_draw.c +++ b/src/mesa/vbo/vbo_exec_draw.c @@ -260,8 +260,6 @@ vbo_exec_bind_arrays( struct gl_context *ctx ) static void vbo_exec_vtx_unmap( struct vbo_exec_context *exec ) { - GLenum target = GL_ARRAY_BUFFER_ARB; - if (_mesa_is_bufferobj(exec->vtx.bufferobj)) { struct gl_context *ctx = exec->ctx; @@ -270,8 +268,7 @@ vbo_exec_vtx_unmap( struct vbo_exec_context *exec ) GLsizeiptr length = (exec->vtx.buffer_ptr - exec->vtx.buffer_map) * sizeof(float); if (length) - ctx->Driver.FlushMappedBufferRange(ctx, target, - offset, length, + ctx->Driver.FlushMappedBufferRange(ctx, offset, length, exec->vtx.bufferobj); } @@ -281,7 +278,7 @@ vbo_exec_vtx_unmap( struct vbo_exec_context *exec ) assert(exec->vtx.buffer_used <= VBO_VERT_BUFFER_SIZE); assert(exec->vtx.buffer_ptr != NULL); - ctx->Driver.UnmapBuffer(ctx, target, exec->vtx.bufferobj); + ctx->Driver.UnmapBuffer(ctx, exec->vtx.bufferobj); exec->vtx.buffer_map = NULL; exec->vtx.buffer_ptr = NULL; exec->vtx.max_vert = 0; @@ -296,8 +293,6 @@ void vbo_exec_vtx_map( struct vbo_exec_context *exec ) { struct gl_context *ctx = exec->ctx; - const GLenum target = GL_ARRAY_BUFFER_ARB; - const GLenum access = GL_READ_WRITE_ARB; /* for MapBuffer */ const GLenum accessRange = GL_MAP_WRITE_BIT | /* for MapBufferRange */ GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | @@ -311,12 +306,10 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec ) assert(!exec->vtx.buffer_map); assert(!exec->vtx.buffer_ptr); - if (VBO_VERT_BUFFER_SIZE > exec->vtx.buffer_used + 1024 && - ctx->Driver.MapBufferRange) { + if (VBO_VERT_BUFFER_SIZE > exec->vtx.buffer_used + 1024) { /* The VBO exists and there's room for more */ exec->vtx.buffer_map = (GLfloat *)ctx->Driver.MapBufferRange(ctx, - target, exec->vtx.buffer_used, (VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used), @@ -329,20 +322,16 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec ) /* Need to allocate a new VBO */ exec->vtx.buffer_used = 0; - ctx->Driver.BufferData(ctx, target, + ctx->Driver.BufferData(ctx, GL_ARRAY_BUFFER_ARB, VBO_VERT_BUFFER_SIZE, NULL, usage, exec->vtx.bufferobj); - if (ctx->Driver.MapBufferRange) - exec->vtx.buffer_map = - (GLfloat *)ctx->Driver.MapBufferRange(ctx, target, - 0, VBO_VERT_BUFFER_SIZE, - accessRange, - exec->vtx.bufferobj); - if (!exec->vtx.buffer_map) - exec->vtx.buffer_map = - (GLfloat *)ctx->Driver.MapBuffer(ctx, target, access, exec->vtx.bufferobj); + exec->vtx.buffer_map = + (GLfloat *)ctx->Driver.MapBufferRange(ctx, + 0, VBO_VERT_BUFFER_SIZE, + accessRange, + exec->vtx.bufferobj); assert(exec->vtx.buffer_map); exec->vtx.buffer_ptr = exec->vtx.buffer_map; } diff --git a/src/mesa/vbo/vbo_rebase.c b/src/mesa/vbo/vbo_rebase.c index 1de290ff602..a1eab752ad6 100644 --- a/src/mesa/vbo/vbo_rebase.c +++ b/src/mesa/vbo/vbo_rebase.c @@ -159,10 +159,8 @@ void vbo_rebase_prims( struct gl_context *ctx, void *ptr; if (map_ib) - ctx->Driver.MapBuffer(ctx, - GL_ELEMENT_ARRAY_BUFFER, - GL_READ_ONLY_ARB, - ib->obj); + ctx->Driver.MapBufferRange(ctx, 0, ib->obj->Size, GL_MAP_READ_BIT, + ib->obj); ptr = ADD_POINTERS(ib->obj->Pointer, ib->ptr); @@ -183,9 +181,7 @@ void vbo_rebase_prims( struct gl_context *ctx, } if (map_ib) - ctx->Driver.UnmapBuffer(ctx, - GL_ELEMENT_ARRAY_BUFFER, - ib->obj); + ctx->Driver.UnmapBuffer(ctx, ib->obj); tmp_ib.obj = ctx->Shared->NullBufferObj; tmp_ib.ptr = tmp_indices; diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c index 9041f791edd..ad36e93329c 100644 --- a/src/mesa/vbo/vbo_save_api.c +++ b/src/mesa/vbo/vbo_save_api.c @@ -232,11 +232,10 @@ map_vertex_store(struct gl_context *ctx, assert(vertex_store->bufferobj); assert(!vertex_store->buffer); vertex_store->buffer = - (GLfloat *) ctx->Driver.MapBuffer(ctx, - GL_ARRAY_BUFFER_ARB, /* not used */ - GL_WRITE_ONLY, /* not used */ - vertex_store-> - bufferobj); + (GLfloat *) ctx->Driver.MapBufferRange(ctx, 0, + vertex_store->bufferobj->Size, + GL_MAP_WRITE_BIT, /* not used */ + vertex_store->bufferobj); assert(vertex_store->buffer); return vertex_store->buffer + vertex_store->used; @@ -247,7 +246,7 @@ static void unmap_vertex_store(struct gl_context *ctx, struct vbo_save_vertex_store *vertex_store) { - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, vertex_store->bufferobj); + ctx->Driver.UnmapBuffer(ctx, vertex_store->bufferobj); vertex_store->buffer = NULL; } diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c index a37af73e0db..6cda831aa85 100644 --- a/src/mesa/vbo/vbo_save_draw.c +++ b/src/mesa/vbo/vbo_save_draw.c @@ -70,7 +70,7 @@ _playback_copy_to_current(struct gl_context *ctx, else offset = node->buffer_offset; - ctx->Driver.GetBufferSubData( ctx, 0, offset, + ctx->Driver.GetBufferSubData( ctx, offset, node->vertex_size * sizeof(GLfloat), data, node->vertex_store->bufferobj ); @@ -217,10 +217,11 @@ static void vbo_save_loopback_vertex_list(struct gl_context *ctx, const struct vbo_save_vertex_list *list) { - const char *buffer = ctx->Driver.MapBuffer(ctx, - GL_ARRAY_BUFFER_ARB, - GL_READ_ONLY, /* ? */ - list->vertex_store->bufferobj); + const char *buffer = + ctx->Driver.MapBufferRange(ctx, 0, + list->vertex_store->bufferobj->Size, + GL_MAP_READ_BIT, /* ? */ + list->vertex_store->bufferobj); vbo_loopback_vertex_list(ctx, (const GLfloat *)(buffer + list->buffer_offset), @@ -230,8 +231,7 @@ vbo_save_loopback_vertex_list(struct gl_context *ctx, list->wrap_count, list->vertex_size); - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, - list->vertex_store->bufferobj); + ctx->Driver.UnmapBuffer(ctx, list->vertex_store->bufferobj); } diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c index ecca1171673..40906e38917 100644 --- a/src/mesa/vbo/vbo_split_copy.c +++ b/src/mesa/vbo/vbo_split_copy.c @@ -444,7 +444,7 @@ replay_init( struct copy_context *copy ) copy->vertex_size += attr_size(copy->array[i]); if (_mesa_is_bufferobj(vbo) && !_mesa_bufferobj_mapped(vbo)) - ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY, vbo); + ctx->Driver.MapBufferRange(ctx, 0, vbo->Size, GL_MAP_READ_BIT, vbo); copy->varying[j].src_ptr = ADD_POINTERS(vbo->Pointer, copy->array[i]->Ptr); @@ -459,8 +459,8 @@ replay_init( struct copy_context *copy ) */ if (_mesa_is_bufferobj(copy->ib->obj) && !_mesa_bufferobj_mapped(copy->ib->obj)) - ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY, - copy->ib->obj); + ctx->Driver.MapBufferRange(ctx, 0, copy->ib->obj->Size, GL_MAP_READ_BIT, + copy->ib->obj); srcptr = (const GLubyte *) ADD_POINTERS(copy->ib->obj->Pointer, copy->ib->ptr); @@ -564,14 +564,14 @@ replay_finish( struct copy_context *copy ) for (i = 0; i < copy->nr_varying; i++) { struct gl_buffer_object *vbo = copy->varying[i].array->BufferObj; if (_mesa_is_bufferobj(vbo) && _mesa_bufferobj_mapped(vbo)) - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, vbo); + ctx->Driver.UnmapBuffer(ctx, vbo); } /* Unmap index buffer: */ if (_mesa_is_bufferobj(copy->ib->obj) && _mesa_bufferobj_mapped(copy->ib->obj)) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, copy->ib->obj); + ctx->Driver.UnmapBuffer(ctx, copy->ib->obj); } } diff --git a/src/mesa/x86-64/xform4.S b/src/mesa/x86-64/xform4.S index 6141e434679..5abd5a25de5 100644 --- a/src/mesa/x86-64/xform4.S +++ b/src/mesa/x86-64/xform4.S @@ -118,7 +118,7 @@ p4_constants: .byte 0x00, 0x00, 0x00, 0x00 .byte 0x00, 0x00, 0x00, 0x00 .byte 0x00, 0x00, 0x00, 0x00 -.float 0f+1.0 +.float 1.0 .text .align 16 |