summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/Makefile7
-rw-r--r--src/mesa/SConscript1
-rw-r--r--src/mesa/drivers/dri/common/dri_metaops.c1
-rw-r--r--src/mesa/drivers/dri/i810/i810render.c2
-rw-r--r--src/mesa/drivers/dri/i915/Makefile2
-rw-r--r--src/mesa/drivers/dri/i915/intel_render.c2
-rw-r--r--src/mesa/drivers/dri/i965/Makefile2
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_util.c5
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_optimize.c588
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h124
-rw-r--r--src/mesa/drivers/dri/i965/brw_util.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c14
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.c30
-rw-r--r--src/mesa/drivers/dri/intel/intel_extensions_es2.c1
-rw-r--r--src/mesa/drivers/dri/intel/intel_pixel_bitmap.c1
-rw-r--r--src/mesa/drivers/dri/intel/intel_regions.c6
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_copy.c2
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_format.c36
-rw-r--r--src/mesa/drivers/dri/intel/server/i830_dri.h62
-rw-r--r--src/mesa/drivers/dri/intel/server/intel.h331
-rw-r--r--src/mesa/drivers/dri/mach64/mach64_ioctl.h3
-rw-r--r--src/mesa/drivers/dri/mga/mgarender.c2
-rw-r--r--src/mesa/drivers/dri/nouveau/nouveau_fbo.c7
-rw-r--r--src/mesa/drivers/dri/nouveau/nouveau_texture.c57
-rw-r--r--src/mesa/drivers/dri/nouveau/nv20_state_fb.c5
-rw-r--r--src/mesa/drivers/dri/nouveau/nv20_state_tex.c3
-rw-r--r--src/mesa/drivers/dri/r200/r200_swtcl.c2
-rw-r--r--src/mesa/drivers/dri/r200/r200_tcl.c2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c8
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c268
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c26
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.c26
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.h2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c128
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.h22
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.c43
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.h5
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c39
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c260
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h10
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c4
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_optimize.c3
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c38
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c23
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.c2
-rw-r--r--src/mesa/drivers/dri/r300/r300_reg.h21
-rw-r--r--src/mesa/drivers/dri/r300/r300_render.c2
-rw-r--r--src/mesa/drivers/dri/r300/r300_texstate.c4
-rw-r--r--src/mesa/drivers/dri/r600/r600_blit.c33
-rw-r--r--src/mesa/drivers/dri/r600/r600_context.c4
-rw-r--r--src/mesa/drivers/dri/r600/r600_tex.c2
-rw-r--r--src/mesa/drivers/dri/r600/r600_texstate.c10
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.c149
-rw-r--r--src/mesa/drivers/dri/r600/r700_chip.c57
-rw-r--r--src/mesa/drivers/dri/r600/r700_clear.c3
-rw-r--r--src/mesa/drivers/dri/r600/r700_render.c65
-rw-r--r--src/mesa/drivers/dri/r600/r700_vertprog.c6
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_chipset.h2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common.c15
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common_context.c58
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common_context.h8
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_pixel_read.c3
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_screen.c74
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_screen.h7
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_span.c192
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_swtcl.c5
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_tcl.c2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_tex_copy.c6
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_texture.c2
-rw-r--r--src/mesa/drivers/dri/savage/savagerender.c2
-rw-r--r--src/mesa/drivers/dri/unichrome/via_render.c2
-rw-r--r--src/mesa/main/arbprogram.h1
-rw-r--r--src/mesa/main/arrayobj.h2
-rw-r--r--src/mesa/main/attrib.c1
-rw-r--r--src/mesa/main/bufferobj.h2
-rw-r--r--src/mesa/main/clear.c1
-rw-r--r--src/mesa/main/clear.h2
-rw-r--r--src/mesa/main/clip.h2
-rw-r--r--src/mesa/main/colormac.h2
-rw-r--r--src/mesa/main/config.h2
-rw-r--r--src/mesa/main/context.c15
-rw-r--r--src/mesa/main/convolve.c1
-rw-r--r--src/mesa/main/debug.h3
-rw-r--r--src/mesa/main/depthstencil.h1
-rw-r--r--src/mesa/main/fbobject.c5
-rw-r--r--src/mesa/main/fbobject.h4
-rw-r--r--src/mesa/main/fog.c1
-rw-r--r--src/mesa/main/formats.c1
-rw-r--r--src/mesa/main/formats.h2
-rw-r--r--src/mesa/main/framebuffer.c1
-rw-r--r--src/mesa/main/framebuffer.h1
-rw-r--r--src/mesa/main/get.h2
-rw-r--r--src/mesa/main/histogram.c1
-rw-r--r--src/mesa/main/image.c1
-rw-r--r--src/mesa/main/imports.c2
-rw-r--r--src/mesa/main/mm.c5
-rw-r--r--src/mesa/main/mm.h3
-rw-r--r--src/mesa/main/mtypes.h24
-rw-r--r--src/mesa/main/multisample.h1
-rw-r--r--src/mesa/main/nvprogram.h2
-rw-r--r--src/mesa/main/pixelstore.h1
-rw-r--r--src/mesa/main/querymatrix.c6
-rw-r--r--src/mesa/main/remap.h1
-rw-r--r--src/mesa/main/renderbuffer.h5
-rw-r--r--src/mesa/main/restart.h1
-rw-r--r--src/mesa/main/shared.c1
-rw-r--r--src/mesa/main/shared.h1
-rw-r--r--src/mesa/main/syncobj.h5
-rw-r--r--src/mesa/main/texcompress.c1
-rw-r--r--src/mesa/main/texcompress_fxt1.c2
-rw-r--r--src/mesa/main/texcompress_fxt1.h4
-rw-r--r--src/mesa/main/texcompress_s3tc.c2
-rw-r--r--src/mesa/main/texfetch.c2
-rw-r--r--src/mesa/main/texgen.h5
-rw-r--r--src/mesa/main/texgetimage.h1
-rw-r--r--src/mesa/main/texrender.c1
-rw-r--r--src/mesa/main/texrender.h1
-rw-r--r--src/mesa/main/texstate.h1
-rw-r--r--src/mesa/main/texstore.c1
-rw-r--r--src/mesa/main/uniforms.h4
-rw-r--r--src/mesa/main/viewport.h2
-rw-r--r--src/mesa/main/vtxfmt.h3
-rw-r--r--src/mesa/math/m_matrix.h2
-rw-r--r--src/mesa/math/m_translate.c2
-rw-r--r--src/mesa/math/m_translate.h3
-rw-r--r--src/mesa/math/m_xform.h4
-rw-r--r--src/mesa/program/arbprogparse.c2
-rw-r--r--src/mesa/program/hash_table.h2
-rw-r--r--src/mesa/program/nvfragparse.h1
-rw-r--r--src/mesa/program/nvvertparse.c5
-rw-r--r--src/mesa/program/nvvertparse.h1
-rw-r--r--src/mesa/program/prog_cache.h3
-rw-r--r--src/mesa/program/prog_execute.c56
-rw-r--r--src/mesa/program/prog_execute.h1
-rw-r--r--src/mesa/program/prog_instruction.h18
-rw-r--r--src/mesa/program/prog_noise.h2
-rw-r--r--src/mesa/program/prog_optimize.c637
-rw-r--r--src/mesa/program/prog_optimize.h1
-rw-r--r--src/mesa/program/prog_parameter_layout.c1
-rw-r--r--src/mesa/program/prog_print.c2
-rw-r--r--src/mesa/program/prog_print.h10
-rw-r--r--src/mesa/program/prog_uniform.h3
-rw-r--r--src/mesa/program/program.c11
-rw-r--r--src/mesa/program/program_parse.tab.c425
-rw-r--r--src/mesa/program/program_parse.y5
-rw-r--r--src/mesa/program/programopt.h1
-rw-r--r--src/mesa/slang/library/slang_common_builtin.gc2
-rw-r--r--src/mesa/slang/slang_builtin.h4
-rw-r--r--src/mesa/slang/slang_codegen.h6
-rw-r--r--src/mesa/slang/slang_compile.c1
-rw-r--r--src/mesa/slang/slang_compile.h11
-rw-r--r--src/mesa/slang/slang_compile_function.h8
-rw-r--r--src/mesa/slang/slang_compile_operation.h4
-rw-r--r--src/mesa/slang/slang_compile_struct.h3
-rw-r--r--src/mesa/slang/slang_compile_variable.h4
-rw-r--r--src/mesa/slang/slang_emit.h6
-rw-r--r--src/mesa/slang/slang_ir.h1
-rw-r--r--src/mesa/slang/slang_label.c2
-rw-r--r--src/mesa/slang/slang_label.h5
-rw-r--r--src/mesa/slang/slang_link.c23
-rw-r--r--src/mesa/slang/slang_link.h2
-rw-r--r--src/mesa/slang/slang_log.h2
-rw-r--r--src/mesa/slang/slang_print.h6
-rw-r--r--src/mesa/slang/slang_simplify.h7
-rw-r--r--src/mesa/slang/slang_utility.h2
-rw-r--r--src/mesa/slang/slang_vartable.h3
-rw-r--r--src/mesa/state_tracker/st_atom.h2
-rw-r--r--src/mesa/state_tracker/st_atom_constbuf.h3
-rw-r--r--src/mesa/state_tracker/st_atom_depth.c2
-rw-r--r--src/mesa/state_tracker/st_atom_pixeltransfer.c1
-rw-r--r--src/mesa/state_tracker/st_atom_shader.c1
-rw-r--r--src/mesa/state_tracker/st_atom_shader.h3
-rw-r--r--src/mesa/state_tracker/st_atom_stipple.c2
-rw-r--r--src/mesa/state_tracker/st_cache.h5
-rw-r--r--src/mesa/state_tracker/st_cb_bitmap.c1
-rw-r--r--src/mesa/state_tracker/st_cb_bitmap.h5
-rw-r--r--src/mesa/state_tracker/st_cb_blit.h6
-rw-r--r--src/mesa/state_tracker/st_cb_bufferobjects.h7
-rw-r--r--src/mesa/state_tracker/st_cb_clear.c1
-rw-r--r--src/mesa/state_tracker/st_cb_clear.h3
-rw-r--r--src/mesa/state_tracker/st_cb_condrender.h2
-rw-r--r--src/mesa/state_tracker/st_cb_drawpixels.h5
-rw-r--r--src/mesa/state_tracker/st_cb_drawtex.c1
-rw-r--r--src/mesa/state_tracker/st_cb_drawtex.h5
-rw-r--r--src/mesa/state_tracker/st_cb_eglimage.c1
-rw-r--r--src/mesa/state_tracker/st_cb_eglimage.h5
-rw-r--r--src/mesa/state_tracker/st_cb_fbo.h9
-rw-r--r--src/mesa/state_tracker/st_cb_feedback.h4
-rw-r--r--src/mesa/state_tracker/st_cb_flush.h6
-rw-r--r--src/mesa/state_tracker/st_cb_program.h4
-rw-r--r--src/mesa/state_tracker/st_cb_rasterpos.h4
-rw-r--r--src/mesa/state_tracker/st_cb_readpixels.h4
-rw-r--r--src/mesa/state_tracker/st_cb_strings.h2
-rw-r--r--src/mesa/state_tracker/st_cb_texture.h7
-rw-r--r--src/mesa/state_tracker/st_cb_viewport.h7
-rw-r--r--src/mesa/state_tracker/st_cb_xformfb.h4
-rw-r--r--src/mesa/state_tracker/st_context.c6
-rw-r--r--src/mesa/state_tracker/st_context.h15
-rw-r--r--src/mesa/state_tracker/st_debug.c4
-rw-r--r--src/mesa/state_tracker/st_draw.c169
-rw-r--r--src/mesa/state_tracker/st_draw.h7
-rw-r--r--src/mesa/state_tracker/st_extensions.h2
-rw-r--r--src/mesa/state_tracker/st_format.h5
-rw-r--r--src/mesa/state_tracker/st_gen_mipmap.h4
-rw-r--r--src/mesa/state_tracker/st_gl_api.h2
-rw-r--r--src/mesa/state_tracker/st_manager.h7
-rw-r--r--src/mesa/state_tracker/st_mesa_to_tgsi.c40
-rw-r--r--src/mesa/state_tracker/st_mesa_to_tgsi.h4
-rw-r--r--src/mesa/state_tracker/st_program.c1
-rw-r--r--src/mesa/state_tracker/st_program.h7
-rw-r--r--src/mesa/state_tracker/st_texture.c4
-rw-r--r--src/mesa/swrast/s_aaline.h2
-rw-r--r--src/mesa/swrast/s_aatriangle.h2
-rw-r--r--src/mesa/swrast/s_alpha.h3
-rw-r--r--src/mesa/swrast/s_atifragshader.c2
-rw-r--r--src/mesa/swrast/s_atifragshader.h3
-rw-r--r--src/mesa/swrast/s_blend.h3
-rw-r--r--src/mesa/swrast/s_context.c1
-rw-r--r--src/mesa/swrast/s_context.h1
-rw-r--r--src/mesa/swrast/s_depth.c1
-rw-r--r--src/mesa/swrast/s_depth.h3
-rw-r--r--src/mesa/swrast/s_feedback.c1
-rw-r--r--src/mesa/swrast/s_fog.c1
-rw-r--r--src/mesa/swrast/s_fog.h3
-rw-r--r--src/mesa/swrast/s_fragprog.c2
-rw-r--r--src/mesa/swrast/s_fragprog.h3
-rw-r--r--src/mesa/swrast/s_logic.h3
-rw-r--r--src/mesa/swrast/s_masking.h3
-rw-r--r--src/mesa/swrast/s_points.c1
-rw-r--r--src/mesa/swrast/s_readpix.c1
-rw-r--r--src/mesa/swrast/s_span.c18
-rw-r--r--src/mesa/swrast/s_stencil.h3
-rw-r--r--src/mesa/swrast/s_texcombine.h3
-rw-r--r--src/mesa/swrast/s_texfilter.h3
-rw-r--r--src/mesa/swrast/s_zoom.h3
-rw-r--r--src/mesa/swrast_setup/ss_context.h3
-rw-r--r--src/mesa/swrast_setup/ss_triangle.h2
-rw-r--r--src/mesa/swrast_setup/ss_vb.h1
-rw-r--r--src/mesa/tnl/t_context.h2
-rw-r--r--src/mesa/tnl/t_rasterpos.c1
-rw-r--r--src/mesa/tnl/t_vb_cull.c1
-rw-r--r--src/mesa/tnl/t_vb_fog.c1
-rw-r--r--src/mesa/tnl/t_vb_normals.c1
-rw-r--r--src/mesa/tnl/t_vb_program.c2
-rw-r--r--src/mesa/tnl/t_vb_render.c1
-rw-r--r--src/mesa/tnl/t_vb_texgen.c1
-rw-r--r--src/mesa/tnl/t_vb_texmat.c1
-rw-r--r--src/mesa/tnl/t_vb_vertex.c1
-rw-r--r--src/mesa/vbo/vbo_exec_draw.c2
-rw-r--r--src/mesa/vf/vf.h2
-rw-r--r--src/mesa/vf/vf_generic.c1
256 files changed, 3337 insertions, 1536 deletions
diff --git a/src/mesa/Makefile b/src/mesa/Makefile
index 3e0f010671c..7073c92240b 100644
--- a/src/mesa/Makefile
+++ b/src/mesa/Makefile
@@ -20,6 +20,13 @@ MESA_CPPFLAGS := $(API_DEFINES)
ES1_CPPFLAGS := -DFEATURE_ES1=1
ES2_CPPFLAGS := -DFEATURE_ES2=1
+ifeq ($(MESA_LLVM),1)
+MESA_CPPFLAGS += $(LLVM_CFLAGS)
+ES1_CPPFLAGS += $(LLVM_CFLAGS)
+ES2_CPPFLAGS += $(LLVM_CFLAGS)
+endif
+
+
include sources.mak
# adjust object dirs
diff --git a/src/mesa/SConscript b/src/mesa/SConscript
index 79e9b4553b7..d31b957234b 100644
--- a/src/mesa/SConscript
+++ b/src/mesa/SConscript
@@ -80,6 +80,7 @@ if env['platform'] != 'winddk':
'main/pixelstore.c',
'main/points.c',
'main/polygon.c',
+ 'main/querymatrix.c',
'main/queryobj.c',
'main/rastpos.c',
'main/readpix.c',
diff --git a/src/mesa/drivers/dri/common/dri_metaops.c b/src/mesa/drivers/dri/common/dri_metaops.c
index 86e59a8e51c..a2f404b616f 100644
--- a/src/mesa/drivers/dri/common/dri_metaops.c
+++ b/src/mesa/drivers/dri/common/dri_metaops.c
@@ -29,6 +29,7 @@
#include "main/arbprogram.h"
#include "main/arrayobj.h"
#include "main/bufferobj.h"
+#include "main/context.h"
#include "main/enable.h"
#include "main/matrix.h"
#include "main/texstate.h"
diff --git a/src/mesa/drivers/dri/i810/i810render.c b/src/mesa/drivers/dri/i810/i810render.c
index b543d4f012c..205f0cebc1c 100644
--- a/src/mesa/drivers/dri/i810/i810render.c
+++ b/src/mesa/drivers/dri/i810/i810render.c
@@ -37,6 +37,8 @@
#include "main/imports.h"
#include "main/mtypes.h"
+#include "math/m_xform.h"
+
#include "tnl/t_context.h"
#include "i810screen.h"
diff --git a/src/mesa/drivers/dri/i915/Makefile b/src/mesa/drivers/dri/i915/Makefile
index 71ee753748c..65fd658c047 100644
--- a/src/mesa/drivers/dri/i915/Makefile
+++ b/src/mesa/drivers/dri/i915/Makefile
@@ -56,7 +56,7 @@ C_SOURCES = \
ASM_SOURCES =
-DRIVER_DEFINES = -I../intel -I../intel/server -DI915 \
+DRIVER_DEFINES = -I../intel -DI915 \
$(shell pkg-config libdrm --atleast-version=2.3.1 \
&& echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP")
diff --git a/src/mesa/drivers/dri/i915/intel_render.c b/src/mesa/drivers/dri/i915/intel_render.c
index ec209391ab4..add0adacb56 100644
--- a/src/mesa/drivers/dri/i915/intel_render.c
+++ b/src/mesa/drivers/dri/i915/intel_render.c
@@ -37,6 +37,8 @@
#include "main/mtypes.h"
#include "main/enums.h"
+#include "math/m_xform.h"
+
#include "tnl/t_context.h"
#include "tnl/t_vertex.h"
#include "tnl/t_pipeline.h"
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index 831981558d8..e381a5c714b 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -106,7 +106,7 @@ C_SOURCES = \
ASM_SOURCES =
-DRIVER_DEFINES = -I../intel -I../intel/server
+DRIVER_DEFINES = -I../intel
INCLUDES += $(INTEL_CFLAGS)
DRI_LIB_DEPS += $(INTEL_LIBS)
diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c
index a74bbc25643..d2ac1235e46 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_util.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_util.c
@@ -192,11 +192,6 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
brw_clip_project_vertex(c, dest_ptr );
}
-
-
-
-#define MAX_MRF 16
-
void brw_clip_emit_vue(struct brw_clip_compile *c,
struct brw_indirect vert,
GLboolean allocate,
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 6b20a2979f8..f7a68cead7c 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -604,6 +604,8 @@
#define BRW_ARF_NOTIFICATION_COUNT 0x90
#define BRW_ARF_IP 0xA0
+#define BRW_MRF_COMPR4 (1 << 7)
+
#define BRW_AMASK 0
#define BRW_IMASK 1
#define BRW_LMASK 2
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 31ff86cf731..ffdddd0a388 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -984,5 +984,7 @@ void brw_set_src1( struct brw_instruction *insn,
/* brw_optimize.c */
void brw_optimize(struct brw_compile *p);
+void brw_remove_duplicate_mrf_moves(struct brw_compile *p);
+void brw_remove_grf_to_mrf_moves(struct brw_compile *p);
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_optimize.c b/src/mesa/drivers/dri/i965/brw_optimize.c
index a364b158209..8aa6fb6cc6f 100644
--- a/src/mesa/drivers/dri/i965/brw_optimize.c
+++ b/src/mesa/drivers/dri/i965/brw_optimize.c
@@ -32,6 +32,594 @@
#include "brw_defines.h"
#include "brw_eu.h"
+static const struct {
+ char *name;
+ int nsrc;
+ int ndst;
+ GLboolean is_arith;
+} inst_opcode[128] = {
+ [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
+
+ [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 },
+
+ [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 },
+
+ [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 },
+};
+
+static INLINE
+GLboolean brw_is_arithmetic_inst(const struct brw_instruction *inst)
+{
+ return inst_opcode[inst->header.opcode].is_arith;
+}
+
+static const GLuint inst_stride[7] = {
+ [0] = 0,
+ [1] = 1,
+ [2] = 2,
+ [3] = 4,
+ [4] = 8,
+ [5] = 16,
+ [6] = 32
+};
+
+static const GLuint inst_type_size[8] = {
+ [BRW_REGISTER_TYPE_UD] = 4,
+ [BRW_REGISTER_TYPE_D] = 4,
+ [BRW_REGISTER_TYPE_UW] = 2,
+ [BRW_REGISTER_TYPE_W] = 2,
+ [BRW_REGISTER_TYPE_UB] = 1,
+ [BRW_REGISTER_TYPE_B] = 1,
+ [BRW_REGISTER_TYPE_F] = 4
+};
+
+static INLINE GLboolean
+brw_is_grf_written(const struct brw_instruction *inst,
+ int reg_index, int size,
+ int gen)
+{
+ if (inst_opcode[inst->header.opcode].ndst == 0)
+ return GL_FALSE;
+
+ if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT)
+ if (inst->bits1.ia1.dest_reg_file == BRW_GENERAL_REGISTER_FILE)
+ return GL_TRUE;
+
+ if (inst->bits1.da1.dest_reg_file != BRW_GENERAL_REGISTER_FILE)
+ return GL_FALSE;
+
+ const int reg_start = reg_index * REG_SIZE;
+ const int reg_end = reg_start + size;
+
+ const int type_size = inst_type_size[inst->bits1.da1.dest_reg_type];
+ const int write_start = inst->bits1.da1.dest_reg_nr*REG_SIZE
+ + inst->bits1.da1.dest_subreg_nr;
+ int length, write_end;
+
+ /* SEND is specific */
+ if (inst->header.opcode == BRW_OPCODE_SEND) {
+ if (gen >= 5)
+ length = inst->bits3.generic_gen5.response_length*REG_SIZE;
+ else
+ length = inst->bits3.generic.response_length*REG_SIZE;
+ }
+ else {
+ length = 1 << inst->header.execution_size;
+ length *= type_size;
+ length *= inst->bits1.da1.dest_horiz_stride;
+ }
+
+ /* If the two intervals intersect, we overwrite the register */
+ write_end = write_start + length;
+ const int left = MAX2(write_start, reg_start);
+ const int right = MIN2(write_end, reg_end);
+
+ return left < right;
+}
+
+/* Specific path for message register since we need to handle the compr4 case */
+static INLINE GLboolean
+brw_is_mrf_written(const struct brw_instruction *inst, int reg_index, int size)
+{
+ if (inst_opcode[inst->header.opcode].ndst == 0)
+ return GL_FALSE;
+
+ if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT)
+ if (inst->bits1.ia1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE)
+ return GL_TRUE;
+
+ if (inst->bits1.da1.dest_reg_file != BRW_MESSAGE_REGISTER_FILE)
+ return GL_FALSE;
+
+ const int reg_start = reg_index * REG_SIZE;
+ const int reg_end = reg_start + size;
+
+ const int mrf_index = inst->bits1.da1.dest_reg_nr & 0x0f;
+ const int is_compr4 = inst->bits1.da1.dest_reg_nr & BRW_MRF_COMPR4;
+ const int type_size = inst_type_size[inst->bits1.da1.dest_reg_type];
+
+ /* We use compr4 with a size != 16 elements. Strange, we conservatively
+ * consider that we are writing the register.
+ */
+ if (is_compr4 && inst->header.execution_size != BRW_EXECUTE_16)
+ return GL_TRUE;
+
+ GLboolean is_written = GL_FALSE;
+
+ /* Here we write mrf_{i} and mrf_{i+4}. So we read two times 8 elements */
+ if (is_compr4) {
+ const int length = 8 * type_size * inst->bits1.da1.dest_horiz_stride;
+
+ /* First 8-way register */
+ const int write_start0 = mrf_index*REG_SIZE
+ + inst->bits1.da1.dest_subreg_nr;
+ const int write_end0 = write_start0 + length;
+
+ /* Second 8-way register */
+ const int write_start1 = (mrf_index+4)*REG_SIZE
+ + inst->bits1.da1.dest_subreg_nr;
+ const int write_end1 = write_start1 + length;
+
+ /* If the two intervals intersect, we overwrite the register */
+ const int left0 = MAX2(write_start0, reg_start);
+ const int right0 = MIN2(write_end0, reg_end);
+ const int left1 = MAX2(write_start1, reg_start);
+ const int right1 = MIN2(write_end1, reg_end);
+
+ is_written = left0 < right0 || left1 < right1;
+ }
+ else {
+ int length;
+ length = 1 << inst->header.execution_size;
+ length *= type_size;
+ length *= inst->bits1.da1.dest_horiz_stride;
+
+ /* If the two intervals intersect, we write into the register */
+ const int write_start = inst->bits1.da1.dest_reg_nr*REG_SIZE
+ + inst->bits1.da1.dest_subreg_nr;
+ const int write_end = write_start + length;
+ const int left = MAX2(write_start, reg_start);
+ const int right = MIN2(write_end, reg_end);;
+
+ is_written = left < right;
+ }
+
+ /* SEND may perform an implicit mov to a mrf register */
+ if (is_written == GL_FALSE &&
+ inst->header.opcode == BRW_OPCODE_SEND &&
+ inst->bits1.da1.src0_reg_file != 0) {
+
+ const int mrf_start = inst->header.destreg__conditionalmod;
+ const int write_start = mrf_start * REG_SIZE;
+ const int write_end = write_start + REG_SIZE;
+ const int left = MAX2(write_start, reg_start);
+ const int right = MIN2(write_end, reg_end);;
+ is_written = left < right;
+ }
+
+ return is_written;
+}
+
+static INLINE GLboolean
+brw_is_mrf_read(const struct brw_instruction *inst,
+ int reg_index, int size, int gen)
+{
+ if (inst->header.opcode != BRW_OPCODE_SEND)
+ return GL_FALSE;
+ if (inst->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT)
+ return GL_TRUE;
+
+ const int reg_start = reg_index*REG_SIZE;
+ const int reg_end = reg_start + size;
+
+ int length, read_start, read_end;
+ if (gen >= 5)
+ length = inst->bits3.generic_gen5.msg_length*REG_SIZE;
+ else
+ length = inst->bits3.generic.msg_length*REG_SIZE;
+
+ /* Look if SEND uses an implicit mov. In that case, we read one less register
+ * (but we write it)
+ */
+ if (inst->bits1.da1.src0_reg_file != 0)
+ read_start = inst->header.destreg__conditionalmod;
+ else {
+ length--;
+ read_start = inst->header.destreg__conditionalmod + 1;
+ }
+ read_start *= REG_SIZE;
+ read_end = read_start + length;
+
+ const int left = MAX2(read_start, reg_start);
+ const int right = MIN2(read_end, reg_end);
+
+ return left < right;
+}
+
+static INLINE GLboolean
+brw_is_grf_read(const struct brw_instruction *inst, int reg_index, int size)
+{
+ int i, j;
+ if (inst_opcode[inst->header.opcode].nsrc == 0)
+ return GL_FALSE;
+
+ /* Look at first source. We must take into account register regions to
+ * monitor carefully the read. Note that we are a bit too conservative here
+ * since we do not take into account the fact that some complete registers
+ * may be skipped
+ */
+ if (inst_opcode[inst->header.opcode].nsrc >= 1) {
+
+ if (inst->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT)
+ if (inst->bits1.ia1.src0_reg_file == BRW_GENERAL_REGISTER_FILE)
+ return GL_TRUE;
+ if (inst->bits1.da1.src0_reg_file != BRW_GENERAL_REGISTER_FILE)
+ return GL_FALSE;
+
+ const int reg_start = reg_index*REG_SIZE;
+ const int reg_end = reg_start + size;
+
+ /* See if at least one of this element intersects the interval */
+ const int type_size = inst_type_size[inst->bits1.da1.src0_reg_type];
+ const int elem_num = 1 << inst->header.execution_size;
+ const int width = 1 << inst->bits2.da1.src0_width;
+ const int row_num = elem_num >> inst->bits2.da1.src0_width;
+ const int hs = type_size*inst_stride[inst->bits2.da1.src0_horiz_stride];
+ const int vs = type_size*inst_stride[inst->bits2.da1.src0_vert_stride];
+ int row_start = inst->bits2.da1.src0_reg_nr*REG_SIZE
+ + inst->bits2.da1.src0_subreg_nr;
+ for (j = 0; j < row_num; ++j) {
+ int write_start = row_start;
+ for (i = 0; i < width; ++i) {
+ const int write_end = write_start + type_size;
+ const int left = write_start > reg_start ? write_start : reg_start;
+ const int right = write_end < reg_end ? write_end : reg_end;
+ if (left < right)
+ return GL_TRUE;
+ write_start += hs;
+ }
+ row_start += vs;
+ }
+ }
+
+ /* Second src register */
+ if (inst_opcode[inst->header.opcode].nsrc >= 2) {
+
+ if (inst->bits3.da1.src1_address_mode != BRW_ADDRESS_DIRECT)
+ if (inst->bits1.ia1.src1_reg_file == BRW_GENERAL_REGISTER_FILE)
+ return GL_TRUE;
+ if (inst->bits1.da1.src1_reg_file != BRW_GENERAL_REGISTER_FILE)
+ return GL_FALSE;
+
+ const int reg_start = reg_index*REG_SIZE;
+ const int reg_end = reg_start + size;
+
+ /* See if at least one of this element intersects the interval */
+ const int type_size = inst_type_size[inst->bits1.da1.src1_reg_type];
+ const int elem_num = 1 << inst->header.execution_size;
+ const int width = 1 << inst->bits3.da1.src1_width;
+ const int row_num = elem_num >> inst->bits3.da1.src1_width;
+ const int hs = type_size*inst_stride[inst->bits3.da1.src1_horiz_stride];
+ const int vs = type_size*inst_stride[inst->bits3.da1.src1_vert_stride];
+ int row_start = inst->bits3.da1.src1_reg_nr*REG_SIZE
+ + inst->bits3.da1.src1_subreg_nr;
+ for (j = 0; j < row_num; ++j) {
+ int write_start = row_start;
+ for (i = 0; i < width; ++i) {
+ const int write_end = write_start + type_size;
+ const int left = write_start > reg_start ? write_start : reg_start;
+ const int right = write_end < reg_end ? write_end : reg_end;
+ if (left < right)
+ return GL_TRUE;
+ write_start += hs;
+ }
+ row_start += vs;
+ }
+ }
+
+ return GL_FALSE;
+}
+
+static INLINE GLboolean
+brw_is_control_done(const struct brw_instruction *mov) {
+ return
+ mov->header.dependency_control != 0 ||
+ mov->header.thread_control != 0 ||
+ mov->header.mask_control != 0 ||
+ mov->header.saturate != 0 ||
+ mov->header.debug_control != 0;
+}
+
+static INLINE GLboolean
+brw_is_predicated(const struct brw_instruction *mov) {
+ return mov->header.predicate_control != 0;
+}
+
+static INLINE GLboolean
+brw_is_grf_to_mrf_mov(const struct brw_instruction *mov,
+ int *mrf_index,
+ int *grf_index,
+ GLboolean *is_compr4)
+{
+ if (brw_is_predicated(mov) ||
+ brw_is_control_done(mov) ||
+ mov->header.debug_control != 0)
+ return GL_FALSE;
+
+ if (mov->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT ||
+ mov->bits1.da1.dest_reg_file != BRW_MESSAGE_REGISTER_FILE ||
+ mov->bits1.da1.dest_reg_type != BRW_REGISTER_TYPE_F ||
+ mov->bits1.da1.dest_horiz_stride != BRW_HORIZONTAL_STRIDE_1 ||
+ mov->bits1.da1.dest_subreg_nr != 0)
+ return GL_FALSE;
+
+ if (mov->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT ||
+ mov->bits1.da1.src0_reg_file != BRW_GENERAL_REGISTER_FILE ||
+ mov->bits1.da1.src0_reg_type != BRW_REGISTER_TYPE_F ||
+ mov->bits2.da1.src0_width != BRW_WIDTH_8 ||
+ mov->bits2.da1.src0_horiz_stride != BRW_HORIZONTAL_STRIDE_1 ||
+ mov->bits2.da1.src0_vert_stride != BRW_VERTICAL_STRIDE_8 ||
+ mov->bits2.da1.src0_subreg_nr != 0 ||
+ mov->bits2.da1.src0_abs != 0 ||
+ mov->bits2.da1.src0_negate != 0)
+ return GL_FALSE;
+
+ *grf_index = mov->bits2.da1.src0_reg_nr;
+ *mrf_index = mov->bits1.da1.dest_reg_nr & 0x0f;
+ *is_compr4 = (mov->bits1.da1.dest_reg_nr & BRW_MRF_COMPR4) != 0;
+ return GL_TRUE;
+}
+
+static INLINE GLboolean
+brw_is_grf_straight_write(const struct brw_instruction *inst, int grf_index)
+{
+ /* remark: no problem to predicate a SEL instruction */
+ if ((!brw_is_predicated(inst) || inst->header.opcode == BRW_OPCODE_SEL) &&
+ brw_is_control_done(inst) == GL_FALSE &&
+ inst->header.execution_size == 4 &&
+ inst->header.access_mode == BRW_ALIGN_1 &&
+ inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT &&
+ inst->bits1.da1.dest_reg_file == BRW_GENERAL_REGISTER_FILE &&
+ inst->bits1.da1.dest_reg_type == BRW_REGISTER_TYPE_F &&
+ inst->bits1.da1.dest_horiz_stride == BRW_HORIZONTAL_STRIDE_1 &&
+ inst->bits1.da1.dest_reg_nr == grf_index &&
+ inst->bits1.da1.dest_subreg_nr == 0 &&
+ brw_is_arithmetic_inst(inst))
+ return GL_TRUE;
+
+ return GL_FALSE;
+}
+
+static INLINE GLboolean
+brw_inst_are_equal(const struct brw_instruction *src0,
+ const struct brw_instruction *src1)
+{
+ const GLuint *field0 = (GLuint *) src0;
+ const GLuint *field1 = (GLuint *) src1;
+ return field0[0] == field1[0] &&
+ field0[1] == field1[1] &&
+ field0[2] == field1[2] &&
+ field0[3] == field1[3];
+}
+
+static INLINE void
+brw_inst_copy(struct brw_instruction *dst,
+ const struct brw_instruction *src)
+{
+ GLuint *field_dst = (GLuint *) dst;
+ const GLuint *field_src = (GLuint *) src;
+ field_dst[0] = field_src[0];
+ field_dst[1] = field_src[1];
+ field_dst[2] = field_src[2];
+ field_dst[3] = field_src[3];
+}
+
+static void brw_remove_inst(struct brw_compile *p, const GLboolean *removeInst)
+{
+ int i, nr_insn = 0, to = 0, from = 0;
+
+ for (from = 0; from < p->nr_insn; ++from) {
+ if (removeInst[from])
+ continue;
+ if(to != from)
+ brw_inst_copy(p->store + to, p->store + from);
+ to++;
+ }
+
+ for (i = 0; i < p->nr_insn; ++i)
+ if (removeInst[i] == GL_FALSE)
+ nr_insn++;
+ p->nr_insn = nr_insn;
+}
+
+/* The gen code emitter generates a lot of duplications in the
+ * grf-to-mrf moves, for example when texture sampling with the same
+ * coordinates from multiple textures.. Here, we monitor same mov
+ * grf-to-mrf instrutions and remove repeated ones where the operands
+ * and dst ahven't changed in between.
+ */
+void brw_remove_duplicate_mrf_moves(struct brw_compile *p)
+{
+ const int gen = p->brw->intel.gen;
+ int i, j;
+
+ GLboolean *removeInst = calloc(sizeof(GLboolean), p->nr_insn);
+ for (i = 0; i < p->nr_insn; i++) {
+ if (removeInst[i])
+ continue;
+
+ const struct brw_instruction *mov = p->store + i;
+ int mrf_index, grf_index;
+ GLboolean is_compr4;
+
+ /* Only consider _straight_ grf-to-mrf moves */
+ if (!brw_is_grf_to_mrf_mov(mov, &mrf_index, &grf_index, &is_compr4))
+ continue;
+
+ const int mrf_index0 = mrf_index;
+ const int mrf_index1 = is_compr4 ? mrf_index0+4 : mrf_index0+1;
+ const int simd16_size = 2 * REG_SIZE;
+
+ for (j = i + 1; j < p->nr_insn; j++) {
+ const struct brw_instruction *inst = p->store + j;
+
+ if (brw_inst_are_equal(mov, inst)) {
+ removeInst[j] = GL_TRUE;
+ continue;
+ }
+
+ if (brw_is_grf_written(inst, grf_index, simd16_size, gen) ||
+ brw_is_mrf_written(inst, mrf_index0, REG_SIZE) ||
+ brw_is_mrf_written(inst, mrf_index1, REG_SIZE))
+ break;
+ }
+ }
+
+ brw_remove_inst(p, removeInst);
+ free(removeInst);
+}
+
+/* Replace moves to MRFs where the value moved is the result of a
+ * normal arithmetic operation with computation right into the MRF.
+ */
+void brw_remove_grf_to_mrf_moves(struct brw_compile *p)
+{
+ int i, j, prev;
+ struct brw_context *brw = p->brw;
+ const int gen = brw->intel.gen;
+ const int simd16_size = 2*REG_SIZE;
+
+ GLboolean *removeInst = calloc(sizeof(GLboolean), p->nr_insn);
+ assert(removeInst);
+
+ for (i = 0; i < p->nr_insn; i++) {
+ if (removeInst[i])
+ continue;
+
+ struct brw_instruction *grf_inst = NULL;
+ const struct brw_instruction *mov = p->store + i;
+ int mrf_index, grf_index;
+ GLboolean is_compr4;
+
+ /* Only consider _straight_ grf-to-mrf moves */
+ if (!brw_is_grf_to_mrf_mov(mov, &mrf_index, &grf_index, &is_compr4))
+ continue;
+
+ /* Using comp4 enables a stride of 4 for this instruction */
+ const int mrf_index0 = mrf_index;
+ const int mrf_index1 = is_compr4 ? mrf_index+4 : mrf_index+1;
+
+ /* Look where the register has been set */
+ prev = i;
+ GLboolean potential_remove = GL_FALSE;
+ while (prev--) {
+
+ /* If _one_ instruction writes the grf, we try to remove the mov */
+ struct brw_instruction *inst = p->store + prev;
+ if (brw_is_grf_straight_write(inst, grf_index)) {
+ potential_remove = GL_TRUE;
+ grf_inst = inst;
+ break;
+ }
+
+ }
+
+ if (potential_remove == GL_FALSE)
+ continue;
+ removeInst[i] = GL_TRUE;
+
+ /* Monitor first the section of code between the grf computation and the
+ * mov. Here we cannot read or write both mrf and grf register
+ */
+ for (j = prev + 1; j < i; ++j) {
+ struct brw_instruction *inst = p->store + j;
+ if (removeInst[j])
+ continue;
+ if (brw_is_grf_written(inst, grf_index, simd16_size, gen) ||
+ brw_is_grf_read(inst, grf_index, simd16_size) ||
+ brw_is_mrf_written(inst, mrf_index0, REG_SIZE) ||
+ brw_is_mrf_written(inst, mrf_index1, REG_SIZE) ||
+ brw_is_mrf_read(inst, mrf_index0, REG_SIZE, gen) ||
+ brw_is_mrf_read(inst, mrf_index1, REG_SIZE, gen)) {
+ removeInst[i] = GL_FALSE;
+ break;
+ }
+ }
+
+ /* After the mov, we can read or write the mrf. If the grf is overwritten,
+ * we are done
+ */
+ for (j = i + 1; j < p->nr_insn; ++j) {
+ struct brw_instruction *inst = p->store + j;
+ if (removeInst[j])
+ continue;
+
+ if (brw_is_grf_read(inst, grf_index, simd16_size)) {
+ removeInst[i] = GL_FALSE;
+ break;
+ }
+
+ if (brw_is_grf_straight_write(inst, grf_index))
+ break;
+ }
+
+ /* Note that with the top down traversal, we can safely pacth the mov
+ * instruction
+ */
+ if (removeInst[i]) {
+ grf_inst->bits1.da1.dest_reg_file = mov->bits1.da1.dest_reg_file;
+ grf_inst->bits1.da1.dest_reg_nr = mov->bits1.da1.dest_reg_nr;
+ }
+ }
+
+ brw_remove_inst(p, removeInst);
+ free(removeInst);
+}
+
static GLboolean
is_single_channel_dp4(struct brw_instruction *insn)
{
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 40eece276b7..af08446f2d8 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -46,68 +46,68 @@ brw_add_validated_bo(struct brw_context *brw, drm_intel_bo *bo)
}
};
-const struct brw_tracked_state brw_blend_constant_color;
-const struct brw_tracked_state brw_cc_unit;
-const struct brw_tracked_state brw_check_fallback;
-const struct brw_tracked_state brw_clip_prog;
-const struct brw_tracked_state brw_clip_unit;
-const struct brw_tracked_state brw_vs_constants;
-const struct brw_tracked_state brw_wm_constants;
-const struct brw_tracked_state brw_constant_buffer;
-const struct brw_tracked_state brw_curbe_offsets;
-const struct brw_tracked_state brw_invarient_state;
-const struct brw_tracked_state brw_gs_prog;
-const struct brw_tracked_state brw_gs_unit;
-const struct brw_tracked_state brw_line_stipple;
-const struct brw_tracked_state brw_aa_line_parameters;
-const struct brw_tracked_state brw_pipelined_state_pointers;
-const struct brw_tracked_state brw_binding_table_pointers;
-const struct brw_tracked_state brw_depthbuffer;
-const struct brw_tracked_state brw_polygon_stipple_offset;
-const struct brw_tracked_state brw_polygon_stipple;
-const struct brw_tracked_state brw_program_parameters;
-const struct brw_tracked_state brw_recalculate_urb_fence;
-const struct brw_tracked_state brw_sf_prog;
-const struct brw_tracked_state brw_sf_unit;
-const struct brw_tracked_state brw_sf_vp;
-const struct brw_tracked_state brw_state_base_address;
-const struct brw_tracked_state brw_urb_fence;
-const struct brw_tracked_state brw_vertex_state;
-const struct brw_tracked_state brw_vs_surfaces;
-const struct brw_tracked_state brw_vs_prog;
-const struct brw_tracked_state brw_vs_unit;
-const struct brw_tracked_state brw_wm_input_sizes;
-const struct brw_tracked_state brw_wm_prog;
-const struct brw_tracked_state brw_wm_samplers;
-const struct brw_tracked_state brw_wm_constant_surface;
-const struct brw_tracked_state brw_wm_surfaces;
-const struct brw_tracked_state brw_wm_binding_table;
-const struct brw_tracked_state brw_wm_unit;
-
-const struct brw_tracked_state brw_psp_urb_cbs;
-
-const struct brw_tracked_state brw_pipe_control;
-
-const struct brw_tracked_state brw_drawing_rect;
-const struct brw_tracked_state brw_indices;
-const struct brw_tracked_state brw_vertices;
-const struct brw_tracked_state brw_index_buffer;
-const struct brw_tracked_state gen6_binding_table_pointers;
-const struct brw_tracked_state gen6_blend_state;
-const struct brw_tracked_state gen6_cc_state_pointers;
-const struct brw_tracked_state gen6_clip_state;
-const struct brw_tracked_state gen6_clip_vp;
-const struct brw_tracked_state gen6_color_calc_state;
-const struct brw_tracked_state gen6_depth_stencil_state;
-const struct brw_tracked_state gen6_gs_state;
-const struct brw_tracked_state gen6_sampler_state;
-const struct brw_tracked_state gen6_scissor_state;
-const struct brw_tracked_state gen6_sf_state;
-const struct brw_tracked_state gen6_sf_vp;
-const struct brw_tracked_state gen6_urb;
-const struct brw_tracked_state gen6_viewport_state;
-const struct brw_tracked_state gen6_vs_state;
-const struct brw_tracked_state gen6_wm_state;
+extern const struct brw_tracked_state brw_blend_constant_color;
+extern const struct brw_tracked_state brw_cc_unit;
+extern const struct brw_tracked_state brw_check_fallback;
+extern const struct brw_tracked_state brw_clip_prog;
+extern const struct brw_tracked_state brw_clip_unit;
+extern const struct brw_tracked_state brw_vs_constants;
+extern const struct brw_tracked_state brw_wm_constants;
+extern const struct brw_tracked_state brw_constant_buffer;
+extern const struct brw_tracked_state brw_curbe_offsets;
+extern const struct brw_tracked_state brw_invarient_state;
+extern const struct brw_tracked_state brw_gs_prog;
+extern const struct brw_tracked_state brw_gs_unit;
+extern const struct brw_tracked_state brw_line_stipple;
+extern const struct brw_tracked_state brw_aa_line_parameters;
+extern const struct brw_tracked_state brw_pipelined_state_pointers;
+extern const struct brw_tracked_state brw_binding_table_pointers;
+extern const struct brw_tracked_state brw_depthbuffer;
+extern const struct brw_tracked_state brw_polygon_stipple_offset;
+extern const struct brw_tracked_state brw_polygon_stipple;
+extern const struct brw_tracked_state brw_program_parameters;
+extern const struct brw_tracked_state brw_recalculate_urb_fence;
+extern const struct brw_tracked_state brw_sf_prog;
+extern const struct brw_tracked_state brw_sf_unit;
+extern const struct brw_tracked_state brw_sf_vp;
+extern const struct brw_tracked_state brw_state_base_address;
+extern const struct brw_tracked_state brw_urb_fence;
+extern const struct brw_tracked_state brw_vertex_state;
+extern const struct brw_tracked_state brw_vs_surfaces;
+extern const struct brw_tracked_state brw_vs_prog;
+extern const struct brw_tracked_state brw_vs_unit;
+extern const struct brw_tracked_state brw_wm_input_sizes;
+extern const struct brw_tracked_state brw_wm_prog;
+extern const struct brw_tracked_state brw_wm_samplers;
+extern const struct brw_tracked_state brw_wm_constant_surface;
+extern const struct brw_tracked_state brw_wm_surfaces;
+extern const struct brw_tracked_state brw_wm_binding_table;
+extern const struct brw_tracked_state brw_wm_unit;
+
+extern const struct brw_tracked_state brw_psp_urb_cbs;
+
+extern const struct brw_tracked_state brw_pipe_control;
+
+extern const struct brw_tracked_state brw_drawing_rect;
+extern const struct brw_tracked_state brw_indices;
+extern const struct brw_tracked_state brw_vertices;
+extern const struct brw_tracked_state brw_index_buffer;
+extern const struct brw_tracked_state gen6_binding_table_pointers;
+extern const struct brw_tracked_state gen6_blend_state;
+extern const struct brw_tracked_state gen6_cc_state_pointers;
+extern const struct brw_tracked_state gen6_clip_state;
+extern const struct brw_tracked_state gen6_clip_vp;
+extern const struct brw_tracked_state gen6_color_calc_state;
+extern const struct brw_tracked_state gen6_depth_stencil_state;
+extern const struct brw_tracked_state gen6_gs_state;
+extern const struct brw_tracked_state gen6_sampler_state;
+extern const struct brw_tracked_state gen6_scissor_state;
+extern const struct brw_tracked_state gen6_sf_state;
+extern const struct brw_tracked_state gen6_sf_vp;
+extern const struct brw_tracked_state gen6_urb;
+extern const struct brw_tracked_state gen6_viewport_state;
+extern const struct brw_tracked_state gen6_vs_state;
+extern const struct brw_tracked_state gen6_wm_state;
/***********************************************************************
* brw_state.c
diff --git a/src/mesa/drivers/dri/i965/brw_util.c b/src/mesa/drivers/dri/i965/brw_util.c
index 1db2a210d45..e878da3850d 100644
--- a/src/mesa/drivers/dri/i965/brw_util.c
+++ b/src/mesa/drivers/dri/i965/brw_util.c
@@ -30,6 +30,8 @@
*/
+#include <assert.h>
+
#include "main/mtypes.h"
#include "program/prog_parameter.h"
#include "brw_util.h"
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index a1bee2e44ab..b6b558e9a69 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -44,6 +44,7 @@ static GLboolean
brw_vs_arg_can_be_immediate(enum prog_opcode opcode, int arg)
{
int opcode_array[] = {
+ [OPCODE_MOV] = 1,
[OPCODE_ADD] = 2,
[OPCODE_CMP] = 3,
[OPCODE_DP3] = 2,
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index 323cfac8fa7..d9fa2e63354 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -1283,7 +1283,7 @@ void emit_fb_write(struct brw_wm_compile *c,
* + 1 for the second half we get destination + 4.
*/
brw_MOV(p,
- brw_message_reg(nr + channel + (1 << 7)),
+ brw_message_reg(nr + channel + BRW_MRF_COMPR4),
arg0[channel]);
} else {
/* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
@@ -1712,12 +1712,20 @@ void brw_wm_emit( struct brw_wm_compile *c )
inst->dst[i]->spill_slot);
}
+ /* Only properly tested on ILK */
+ if (p->brw->intel.gen == 5) {
+ brw_remove_duplicate_mrf_moves(p);
+ if (c->dispatch_width == 16)
+ brw_remove_grf_to_mrf_moves(p);
+ }
+
if (INTEL_DEBUG & DEBUG_WM) {
int i;
- printf("wm-native:\n");
- for (i = 0; i < p->nr_insn; i++)
+ printf("wm-native:\n");
+ for (i = 0; i < p->nr_insn; i++)
brw_disasm(stderr, &p->store[i], p->brw->intel.gen);
printf("\n");
}
}
+
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index 5f2035d79c9..e19f44035fd 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -29,6 +29,7 @@
#include "main/glheader.h"
#include "main/context.h"
#include "main/extensions.h"
+#include "main/fbobject.h"
#include "main/framebuffer.h"
#include "main/imports.h"
#include "main/points.h"
@@ -39,8 +40,6 @@
#include "drivers/common/driverfuncs.h"
#include "drivers/common/meta.h"
-#include "i830_dri.h"
-
#include "intel_chipset.h"
#include "intel_buffers.h"
#include "intel_tex.h"
@@ -420,7 +419,7 @@ intel_prepare_render(struct intel_context *intel)
__DRIdrawable *drawable;
drawable = driContext->driDrawablePriv;
- if (drawable->dri2.stamp != driContext->dri2.draw_stamp) {
+ if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
if (drawable->lastStamp != drawable->dri2.stamp)
intel_update_renderbuffers(driContext, drawable);
intel_draw_buffer(&intel->ctx, intel->ctx.DrawBuffer);
@@ -428,7 +427,7 @@ intel_prepare_render(struct intel_context *intel)
}
drawable = driContext->driReadablePriv;
- if (drawable->dri2.stamp != driContext->dri2.read_stamp) {
+ if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
if (drawable->lastStamp != drawable->dri2.stamp)
intel_update_renderbuffers(driContext, drawable);
driContext->dri2.read_stamp = drawable->dri2.stamp;
@@ -613,6 +612,7 @@ intelInitContext(struct intel_context *intel,
__DRIscreen *sPriv = driContextPriv->driScreenPriv;
struct intel_screen *intelScreen = sPriv->private;
int bo_reuse_mode;
+ __GLcontextModes visual;
/* we can't do anything without a connection to the device */
if (intelScreen->bufmgr == NULL)
@@ -624,6 +624,11 @@ intelInitContext(struct intel_context *intel,
functions->Viewport = intel_viewport;
}
+ if (mesaVis == NULL) {
+ memset(&visual, 0, sizeof visual);
+ mesaVis = &visual;
+ }
+
if (!_mesa_initialize_context_for_api(&intel->ctx, api, mesaVis, shareCtx,
functions, (void *) intel)) {
printf("%s: failed to init mesa context\n", __FUNCTION__);
@@ -890,14 +895,21 @@ intelMakeCurrent(__DRIcontext * driContextPriv,
}
if (driContextPriv) {
- struct gl_framebuffer *fb = driDrawPriv->driverPrivate;
- struct gl_framebuffer *readFb = driReadPriv->driverPrivate;
+ struct gl_framebuffer *fb, *readFb;
+
+ if (driDrawPriv == NULL && driReadPriv == NULL) {
+ fb = _mesa_get_incomplete_framebuffer();
+ readFb = _mesa_get_incomplete_framebuffer();
+ } else {
+ fb = driDrawPriv->driverPrivate;
+ readFb = driReadPriv->driverPrivate;
+ driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
+ driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
+ }
- driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
- driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
intel_prepare_render(intel);
_mesa_make_current(&intel->ctx, fb, readFb);
-
+
/* We do this in intel_prepare_render() too, but intel->ctx.DrawBuffer
* is NULL at that point. We can't call _mesa_makecurrent()
* first, since we need the buffer size for the initial
diff --git a/src/mesa/drivers/dri/intel/intel_extensions_es2.c b/src/mesa/drivers/dri/intel/intel_extensions_es2.c
index baf8e130010..de34bbb2aec 100644
--- a/src/mesa/drivers/dri/intel/intel_extensions_es2.c
+++ b/src/mesa/drivers/dri/intel/intel_extensions_es2.c
@@ -28,7 +28,6 @@
#include "main/extensions.h"
#include "intel_extensions.h"
-#include "utils.h"
static const char *es2_extensions[] = {
/* Used by mesa internally (cf all_mesa_extensions in ../common/utils.c) */
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
index 0e2fe893fed..02c0ffce31d 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -45,6 +45,7 @@
#include "main/attrib.h"
#include "main/enable.h"
#include "main/viewport.h"
+#include "main/context.h"
#include "swrast/swrast.h"
#include "intel_screen.h"
diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c
index fe4de189600..680d18ba299 100644
--- a/src/mesa/drivers/dri/intel/intel_regions.c
+++ b/src/mesa/drivers/dri/intel/intel_regions.c
@@ -155,6 +155,9 @@ intel_region_alloc_internal(struct intel_context *intel,
}
region = calloc(sizeof(*region), 1);
+ if (region == NULL)
+ return region;
+
region->cpp = cpp;
region->width = width;
region->height = height;
@@ -189,6 +192,9 @@ intel_region_alloc(struct intel_context *intel,
region = intel_region_alloc_internal(intel, cpp, width, height,
aligned_pitch / cpp, buffer);
+ if (region == NULL)
+ return region;
+
region->tiling = tiling;
return region;
diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c
index 224b506c05b..6efb2ddc553 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c
@@ -102,7 +102,7 @@ do_copy_texsubimage(struct intel_context *intel,
GLcontext *ctx = &intel->ctx;
const struct intel_region *src = get_teximage_source(intel, internalFormat);
- if (!intelImage->mt || !src) {
+ if (!intelImage->mt || !src || !src->buffer) {
if (INTEL_DEBUG & DEBUG_FALLBACKS)
fprintf(stderr, "%s fail %p %p (0x%08x)\n",
__FUNCTION__, intelImage->mt, src, internalFormat);
diff --git a/src/mesa/drivers/dri/intel/intel_tex_format.c b/src/mesa/drivers/dri/intel/intel_tex_format.c
index 5f813c0efa2..e03b203fb40 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_format.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_format.c
@@ -19,7 +19,6 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat,
GLenum format, GLenum type)
{
struct intel_context *intel = intel_context(ctx);
- const GLboolean do32bpt = (intel->ctx.Visual.rgbBits >= 24);
#if 0
printf("%s intFmt=0x%x format=0x%x type=0x%x\n",
@@ -30,39 +29,28 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat,
case 4:
case GL_RGBA:
case GL_COMPRESSED_RGBA:
- if (format == GL_BGRA) {
- if (type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) {
- return MESA_FORMAT_ARGB8888;
- }
- else if (type == GL_UNSIGNED_SHORT_4_4_4_4_REV) {
- return MESA_FORMAT_ARGB4444;
- }
- else if (type == GL_UNSIGNED_SHORT_1_5_5_5_REV) {
- return MESA_FORMAT_ARGB1555;
- }
- }
- return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444;
+ if (type == GL_UNSIGNED_SHORT_4_4_4_4_REV)
+ return MESA_FORMAT_ARGB4444;
+ else if (type == GL_UNSIGNED_SHORT_1_5_5_5_REV)
+ return MESA_FORMAT_ARGB1555;
+ else
+ return MESA_FORMAT_ARGB8888;
case 3:
case GL_RGB:
case GL_COMPRESSED_RGB:
- if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) {
- return MESA_FORMAT_RGB565;
- }
- if (do32bpt) {
- if (intel->has_xrgb_textures)
- return MESA_FORMAT_XRGB8888;
- else
- return MESA_FORMAT_ARGB8888;
- } else {
+ if (type == GL_UNSIGNED_SHORT_5_6_5)
return MESA_FORMAT_RGB565;
- }
+ else if (intel->has_xrgb_textures)
+ return MESA_FORMAT_XRGB8888;
+ else
+ return MESA_FORMAT_ARGB8888;
case GL_RGBA8:
case GL_RGB10_A2:
case GL_RGBA12:
case GL_RGBA16:
- return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444;
+ return MESA_FORMAT_ARGB8888;
case GL_RGBA4:
case GL_RGBA2:
diff --git a/src/mesa/drivers/dri/intel/server/i830_dri.h b/src/mesa/drivers/dri/intel/server/i830_dri.h
deleted file mode 100644
index def049e7a6b..00000000000
--- a/src/mesa/drivers/dri/intel/server/i830_dri.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/i810/i830_dri.h,v 1.6 2003/09/28 20:15:59 alanh Exp $ */
-
-#ifndef _I830_DRI_H
-#define _I830_DRI_H
-
-#include "xf86drm.h"
-
-#define I830_MAX_DRAWABLES 256
-
-#define I830_MAJOR_VERSION 1
-#define I830_MINOR_VERSION 9
-#define I830_PATCHLEVEL 0
-
-#define I830_REG_SIZE 0x80000
-
-typedef struct _I830DRIRec {
- drm_handle_t regs;
- drmSize regsSize;
-
- drmSize unused1; /* backbufferSize */
- drm_handle_t unused2; /* backbuffer */
-
- drmSize unused3; /* depthbufferSize */
- drm_handle_t unused4; /* depthbuffer */
-
- drmSize unused5; /* rotatedSize */
- drm_handle_t unused6; /* rotatedbuffer */
-
- drm_handle_t unused7; /* textures */
- int unused8; /* textureSize */
-
- drm_handle_t unused9; /* agp_buffers */
- drmSize unused10; /* agp_buf_size */
-
- int deviceID;
- int width;
- int height;
- int mem;
- int cpp;
- int bitsPerPixel;
-
- int unused11[8]; /* was front/back/depth/rotated offset/pitch */
-
- int unused12; /* logTextureGranularity */
- int unused13; /* textureOffset */
-
- int irq;
- int sarea_priv_offset;
-} I830DRIRec, *I830DRIPtr;
-
-typedef struct {
- /* Nothing here yet */
- int dummy;
-} I830ConfigPrivRec, *I830ConfigPrivPtr;
-
-typedef struct {
- /* Nothing here yet */
- int dummy;
-} I830DRIContextRec, *I830DRIContextPtr;
-
-
-#endif
diff --git a/src/mesa/drivers/dri/intel/server/intel.h b/src/mesa/drivers/dri/intel/server/intel.h
deleted file mode 100644
index 6ea72499c1c..00000000000
--- a/src/mesa/drivers/dri/intel/server/intel.h
+++ /dev/null
@@ -1,331 +0,0 @@
-#ifndef _INTEL_H_
-#define _INTEL_H_
-
-#include "xf86drm.h" /* drm_handle_t, etc */
-
-/* Intel */
-#ifndef PCI_CHIP_I810
-#define PCI_CHIP_I810 0x7121
-#define PCI_CHIP_I810_DC100 0x7123
-#define PCI_CHIP_I810_E 0x7125
-#define PCI_CHIP_I815 0x1132
-#define PCI_CHIP_I810_BRIDGE 0x7120
-#define PCI_CHIP_I810_DC100_BRIDGE 0x7122
-#define PCI_CHIP_I810_E_BRIDGE 0x7124
-#define PCI_CHIP_I815_BRIDGE 0x1130
-#endif
-
-#define PCI_CHIP_845_G 0x2562
-#define PCI_CHIP_I830_M 0x3577
-
-#ifndef PCI_CHIP_I855_GM
-#define PCI_CHIP_I855_GM 0x3582
-#define PCI_CHIP_I855_GM_BRIDGE 0x3580
-#endif
-
-#ifndef PCI_CHIP_I865_G
-#define PCI_CHIP_I865_G 0x2572
-#define PCI_CHIP_I865_G_BRIDGE 0x2570
-#endif
-
-#ifndef PCI_CHIP_I915_G
-#define PCI_CHIP_I915_G 0x2582
-#define PCI_CHIP_I915_G_BRIDGE 0x2580
-#endif
-
-#ifndef PCI_CHIP_I915_GM
-#define PCI_CHIP_I915_GM 0x2592
-#define PCI_CHIP_I915_GM_BRIDGE 0x2590
-#endif
-
-#ifndef PCI_CHIP_E7221_G
-#define PCI_CHIP_E7221_G 0x258A
-/* Same as I915_G_BRIDGE */
-#define PCI_CHIP_E7221_G_BRIDGE 0x2580
-#endif
-
-#ifndef PCI_CHIP_I945_G
-#define PCI_CHIP_I945_G 0x2772
-#define PCI_CHIP_I945_G_BRIDGE 0x2770
-#endif
-
-#ifndef PCI_CHIP_I945_GM
-#define PCI_CHIP_I945_GM 0x27A2
-#define PCI_CHIP_I945_GM_BRIDGE 0x27A0
-#endif
-
-#define IS_I810(pI810) (pI810->Chipset == PCI_CHIP_I810 || \
- pI810->Chipset == PCI_CHIP_I810_DC100 || \
- pI810->Chipset == PCI_CHIP_I810_E)
-#define IS_I815(pI810) (pI810->Chipset == PCI_CHIP_I815)
-#define IS_I830(pI810) (pI810->Chipset == PCI_CHIP_I830_M)
-#define IS_845G(pI810) (pI810->Chipset == PCI_CHIP_845_G)
-#define IS_I85X(pI810) (pI810->Chipset == PCI_CHIP_I855_GM)
-#define IS_I852(pI810) (pI810->Chipset == PCI_CHIP_I855_GM && (pI810->variant == I852_GM || pI810->variant == I852_GME))
-#define IS_I855(pI810) (pI810->Chipset == PCI_CHIP_I855_GM && (pI810->variant == I855_GM || pI810->variant == I855_GME))
-#define IS_I865G(pI810) (pI810->Chipset == PCI_CHIP_I865_G)
-
-#define IS_I915G(pI810) (pI810->Chipset == PCI_CHIP_I915_G || pI810->Chipset == PCI_CHIP_E7221_G)
-#define IS_I915GM(pI810) (pI810->Chipset == PCI_CHIP_I915_GM)
-#define IS_I945G(pI810) (pI810->Chipset == PCI_CHIP_I945_G)
-#define IS_I945GM(pI810) (pI810->Chipset == PCI_CHIP_I945_GM)
-#define IS_I9XX(pI810) (IS_I915G(pI810) || IS_I915GM(pI810) || IS_I945G(pI810) || IS_I945GM(pI810))
-
-#define IS_MOBILE(pI810) (IS_I830(pI810) || IS_I85X(pI810) || IS_I915GM(pI810) || IS_I945GM(pI810))
-
-#define I830_GMCH_CTRL 0x52
-
-#define I830_GMCH_MEM_MASK 0x1
-#define I830_GMCH_MEM_64M 0x1
-#define I830_GMCH_MEM_128M 0
-
-#define I830_GMCH_GMS_MASK 0x70
-#define I830_GMCH_GMS_DISABLED 0x00
-#define I830_GMCH_GMS_LOCAL 0x10
-#define I830_GMCH_GMS_STOLEN_512 0x20
-#define I830_GMCH_GMS_STOLEN_1024 0x30
-#define I830_GMCH_GMS_STOLEN_8192 0x40
-
-#define I855_GMCH_GMS_MASK (0x7 << 4)
-#define I855_GMCH_GMS_DISABLED 0x00
-#define I855_GMCH_GMS_STOLEN_1M (0x1 << 4)
-#define I855_GMCH_GMS_STOLEN_4M (0x2 << 4)
-#define I855_GMCH_GMS_STOLEN_8M (0x3 << 4)
-#define I855_GMCH_GMS_STOLEN_16M (0x4 << 4)
-#define I855_GMCH_GMS_STOLEN_32M (0x5 << 4)
-#define I915G_GMCH_GMS_STOLEN_48M (0x6 << 4)
-#define I915G_GMCH_GMS_STOLEN_64M (0x7 << 4)
-
-typedef unsigned char Bool;
-#define TRUE 1
-#define FALSE 0
-
-#define PIPE_NONE 0<<0
-#define PIPE_CRT 1<<0
-#define PIPE_TV 1<<1
-#define PIPE_DFP 1<<2
-#define PIPE_LFP 1<<3
-#define PIPE_CRT2 1<<4
-#define PIPE_TV2 1<<5
-#define PIPE_DFP2 1<<6
-#define PIPE_LFP2 1<<7
-
-typedef struct _I830MemPool *I830MemPoolPtr;
-typedef struct _I830MemRange *I830MemRangePtr;
-typedef struct _I830MemRange {
- long Start;
- long End;
- long Size;
- unsigned long Physical;
- unsigned long Offset; /* Offset of AGP-allocated portion */
- unsigned long Alignment;
- drm_handle_t Key;
- unsigned long Pitch; // add pitch
- I830MemPoolPtr Pool;
-} I830MemRange;
-
-typedef struct _I830MemPool {
- I830MemRange Total;
- I830MemRange Free;
- I830MemRange Fixed;
- I830MemRange Allocated;
-} I830MemPool;
-
-typedef struct {
- int tail_mask;
- I830MemRange mem;
- unsigned char *virtual_start;
- int head;
- int tail;
- int space;
-} I830RingBuffer;
-
-typedef struct _I830Rec {
- unsigned char *MMIOBase;
- unsigned char *FbBase;
- int cpp;
- uint32_t aper_size;
- unsigned int bios_version;
-
- /* These are set in PreInit and never changed. */
- long FbMapSize;
- long TotalVideoRam;
- I830MemRange StolenMemory; /* pre-allocated memory */
- long BIOSMemorySize; /* min stolen pool size */
- int BIOSMemSizeLoc;
-
- /* These change according to what has been allocated. */
- long FreeMemory;
- I830MemRange MemoryAperture;
- I830MemPool StolenPool;
- long allocatedMemory;
-
- /* Regions allocated either from the above pools, or from agpgart. */
- /* for single and dual head configurations */
- I830MemRange FrontBuffer;
- I830MemRange FrontBuffer2;
- I830MemRange Scratch;
- I830MemRange Scratch2;
-
- I830RingBuffer *LpRing;
-
- I830MemRange BackBuffer;
- I830MemRange DepthBuffer;
- I830MemRange TexMem;
- int TexGranularity;
- I830MemRange ContextMem;
- int drmMinor;
- Bool have3DWindows;
-
- Bool NeedRingBufferLow;
- Bool allowPageFlip;
- Bool disableTiling;
-
- int Chipset;
- unsigned long LinearAddr;
- unsigned long MMIOAddr;
-
- drmSize registerSize; /**< \brief MMIO register map size */
- drm_handle_t registerHandle; /**< \brief MMIO register map handle */
- // IOADDRESS ioBase;
- int irq; /**< \brief IRQ number */
- int GttBound;
-
- drm_handle_t ring_map;
- unsigned int Fence[8];
-
-} I830Rec;
-
-/*
- * 12288 is set as the maximum, chosen because it is enough for
- * 1920x1440@32bpp with a 2048 pixel line pitch with some to spare.
- */
-#define I830_MAXIMUM_VBIOS_MEM 12288
-#define I830_DEFAULT_VIDEOMEM_2D (MB(32) / 1024)
-#define I830_DEFAULT_VIDEOMEM_3D (MB(64) / 1024)
-
-/* Flags for memory allocation function */
-#define FROM_ANYWHERE 0x00000000
-#define FROM_POOL_ONLY 0x00000001
-#define FROM_NEW_ONLY 0x00000002
-#define FROM_MASK 0x0000000f
-
-#define ALLOCATE_AT_TOP 0x00000010
-#define ALLOCATE_AT_BOTTOM 0x00000020
-#define FORCE_GAPS 0x00000040
-
-#define NEED_PHYSICAL_ADDR 0x00000100
-#define ALIGN_BOTH_ENDS 0x00000200
-#define FORCE_LOW 0x00000400
-
-#define ALLOC_NO_TILING 0x00001000
-#define ALLOC_INITIAL 0x00002000
-
-#define ALLOCATE_DRY_RUN 0x80000000
-
-/* Chipset registers for VIDEO BIOS memory RW access */
-#define _855_DRAM_RW_CONTROL 0x58
-#define _845_DRAM_RW_CONTROL 0x90
-#define DRAM_WRITE 0x33330000
-
-#define KB(x) ((x) * 1024)
-#define MB(x) ((x) * KB(1024))
-
-#define GTT_PAGE_SIZE KB(4)
-#define ROUND_TO(x, y) (((x) + (y) - 1) / (y) * (y))
-#define ROUND_DOWN_TO(x, y) ((x) / (y) * (y))
-#define ROUND_TO_PAGE(x) ROUND_TO((x), GTT_PAGE_SIZE)
-#define ROUND_TO_MB(x) ROUND_TO((x), MB(1))
-#define PRIMARY_RINGBUFFER_SIZE KB(128)
-
-
-/* Ring buffer registers, p277, overview p19
- */
-#define LP_RING 0x2030
-#define HP_RING 0x2040
-
-#define RING_TAIL 0x00
-#define TAIL_ADDR 0x000FFFF8
-#define I830_TAIL_MASK 0x001FFFF8
-
-#define RING_HEAD 0x04
-#define HEAD_WRAP_COUNT 0xFFE00000
-#define HEAD_WRAP_ONE 0x00200000
-#define HEAD_ADDR 0x001FFFFC
-#define I830_HEAD_MASK 0x001FFFFC
-
-#define RING_START 0x08
-#define START_ADDR 0x03FFFFF8
-#define I830_RING_START_MASK 0xFFFFF000
-
-#define RING_LEN 0x0C
-#define RING_NR_PAGES 0x001FF000
-#define I830_RING_NR_PAGES 0x001FF000
-#define RING_REPORT_MASK 0x00000006
-#define RING_REPORT_64K 0x00000002
-#define RING_REPORT_128K 0x00000004
-#define RING_NO_REPORT 0x00000000
-#define RING_VALID_MASK 0x00000001
-#define RING_VALID 0x00000001
-#define RING_INVALID 0x00000000
-
-
-/* Fence/Tiling ranges [0..7]
- */
-#define FENCE 0x2000
-#define FENCE_NR 8
-
-#define I915G_FENCE_START_MASK 0x0ff00000
-
-#define I830_FENCE_START_MASK 0x07f80000
-
-#define FENCE_START_MASK 0x03F80000
-#define FENCE_X_MAJOR 0x00000000
-#define FENCE_Y_MAJOR 0x00001000
-#define FENCE_SIZE_MASK 0x00000700
-#define FENCE_SIZE_512K 0x00000000
-#define FENCE_SIZE_1M 0x00000100
-#define FENCE_SIZE_2M 0x00000200
-#define FENCE_SIZE_4M 0x00000300
-#define FENCE_SIZE_8M 0x00000400
-#define FENCE_SIZE_16M 0x00000500
-#define FENCE_SIZE_32M 0x00000600
-#define FENCE_SIZE_64M 0x00000700
-#define I915G_FENCE_SIZE_1M 0x00000000
-#define I915G_FENCE_SIZE_2M 0x00000100
-#define I915G_FENCE_SIZE_4M 0x00000200
-#define I915G_FENCE_SIZE_8M 0x00000300
-#define I915G_FENCE_SIZE_16M 0x00000400
-#define I915G_FENCE_SIZE_32M 0x00000500
-#define I915G_FENCE_SIZE_64M 0x00000600
-#define I915G_FENCE_SIZE_128M 0x00000700
-#define FENCE_PITCH_1 0x00000000
-#define FENCE_PITCH_2 0x00000010
-#define FENCE_PITCH_4 0x00000020
-#define FENCE_PITCH_8 0x00000030
-#define FENCE_PITCH_16 0x00000040
-#define FENCE_PITCH_32 0x00000050
-#define FENCE_PITCH_64 0x00000060
-#define FENCE_VALID 0x00000001
-
-#include <mmio.h>
-
-# define MMIO_IN8(base, offset) \
- *(volatile unsigned char *)(((unsigned char*)(base)) + (offset))
-# define MMIO_IN32(base, offset) \
- read_MMIO_LE32(base, offset)
-# define MMIO_OUT8(base, offset, val) \
- *(volatile unsigned char *)(((unsigned char*)(base)) + (offset)) = (val)
-# define MMIO_OUT32(base, offset, val) \
- *(volatile unsigned int *)(void *)(((unsigned char*)(base)) + (offset)) = CPU_TO_LE32(val)
-
-
- /* Memory mapped register access macros */
-#define INREG8(addr) MMIO_IN8(MMIO, addr)
-#define INREG(addr) MMIO_IN32(MMIO, addr)
-#define OUTREG8(addr, val) MMIO_OUT8(MMIO, addr, val)
-#define OUTREG(addr, val) MMIO_OUT32(MMIO, addr, val)
-
-#define DSPABASE 0x70184
-
-#endif
diff --git a/src/mesa/drivers/dri/mach64/mach64_ioctl.h b/src/mesa/drivers/dri/mach64/mach64_ioctl.h
index 1ffda1932f1..9145ee6e6cf 100644
--- a/src/mesa/drivers/dri/mach64/mach64_ioctl.h
+++ b/src/mesa/drivers/dri/mach64/mach64_ioctl.h
@@ -32,6 +32,9 @@
#ifndef __MACH64_IOCTL_H__
#define __MACH64_IOCTL_H__
+#include <stdio.h>
+#include <stdlib.h>
+
#include "mach64_dri.h"
#include "mach64_reg.h"
#include "mach64_lock.h"
diff --git a/src/mesa/drivers/dri/mga/mgarender.c b/src/mesa/drivers/dri/mga/mgarender.c
index 8b8fc485d31..cc0cea618d1 100644
--- a/src/mesa/drivers/dri/mga/mgarender.c
+++ b/src/mesa/drivers/dri/mga/mgarender.c
@@ -44,6 +44,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/imports.h"
#include "main/mtypes.h"
+#include "math/m_xform.h"
+
#include "tnl/t_context.h"
#include "mgacontext.h"
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
index 8be7edb150b..bd1273beea7 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
@@ -220,7 +220,7 @@ get_tex_format(struct gl_texture_image *ti)
case MESA_FORMAT_RGB565:
return GL_RGB5;
default:
- assert(0);
+ return GL_NONE;
}
}
@@ -231,7 +231,6 @@ nouveau_render_texture(GLcontext *ctx, struct gl_framebuffer *fb,
struct gl_renderbuffer *rb = att->Renderbuffer;
struct gl_texture_image *ti =
att->Texture->Image[att->CubeMapFace][att->TextureLevel];
- int ret;
/* Allocate a renderbuffer object for the texture if we
* haven't already done so. */
@@ -244,9 +243,7 @@ nouveau_render_texture(GLcontext *ctx, struct gl_framebuffer *fb,
}
/* Update the renderbuffer fields from the texture. */
- ret = set_renderbuffer_format(rb, get_tex_format(ti));
- assert(ret);
-
+ set_renderbuffer_format(rb, get_tex_format(ti));
rb->Width = ti->Width;
rb->Height = ti->Height;
nouveau_surface_ref(&to_nouveau_teximage(ti)->surface,
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_texture.c b/src/mesa/drivers/dri/nouveau/nouveau_texture.c
index dbf9a5cc613..442f4e899ee 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_texture.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_texture.c
@@ -38,6 +38,7 @@
#include "main/mipmap.h"
#include "main/texfetch.h"
#include "main/teximage.h"
+#include "drivers/common/meta.h"
static struct gl_texture_object *
nouveau_texture_new(GLcontext *ctx, GLuint name, GLenum target)
@@ -182,10 +183,10 @@ teximage_fits(struct gl_texture_object *t, int level)
struct nouveau_surface *s = &to_nouveau_texture(t)->surfaces[level];
struct gl_texture_image *ti = t->Image[0][level];
- return ti && (t->Target == GL_TEXTURE_RECTANGLE ||
- (s->bo && s->width == ti->Width &&
- s->height == ti->Height &&
- s->format == ti->TexFormat));
+ return ti && to_nouveau_teximage(ti)->surface.bo &&
+ (t->Target == GL_TEXTURE_RECTANGLE ||
+ (s->bo && s->format == ti->TexFormat &&
+ s->width == ti->Width && s->height == ti->Height));
}
static GLboolean
@@ -589,6 +590,53 @@ nouveau_texture_unmap(GLcontext *ctx, struct gl_texture_object *t)
}
}
+static void
+store_mipmap(GLcontext *ctx, GLenum target, int first, int last,
+ struct gl_texture_object *t)
+{
+ struct gl_pixelstore_attrib packing = {
+ .BufferObj = ctx->Shared->NullBufferObj,
+ .Alignment = 1
+ };
+ GLenum format = t->Image[0][first]->TexFormat;
+ unsigned base_format, type, comps;
+ int i;
+
+ base_format = _mesa_get_format_base_format(format);
+ _mesa_format_to_type_and_comps(format, &type, &comps);
+
+ for (i = first; i <= last; i++) {
+ struct gl_texture_image *ti = t->Image[0][i];
+ void *data = ti->Data;
+
+ nouveau_teximage(ctx, 3, target, i, ti->InternalFormat,
+ ti->Width, ti->Height, ti->Depth,
+ ti->Border, base_format, type, data,
+ &packing, t, ti);
+
+ _mesa_free_texmemory(data);
+ }
+}
+
+static void
+nouveau_generate_mipmap(GLcontext *ctx, GLenum target,
+ struct gl_texture_object *t)
+{
+ if (_mesa_meta_check_generate_mipmap_fallback(ctx, target, t)) {
+ struct gl_texture_image *base = t->Image[0][t->BaseLevel];
+
+ nouveau_teximage_map(ctx, base);
+ _mesa_generate_mipmap(ctx, target, t);
+ nouveau_teximage_unmap(ctx, base);
+
+ store_mipmap(ctx, target, t->BaseLevel + 1,
+ get_last_level(t), t);
+
+ } else {
+ _mesa_meta_GenerateMipmap(ctx, target, t);
+ }
+}
+
void
nouveau_texture_functions_init(struct dd_function_table *functions)
{
@@ -607,4 +655,5 @@ nouveau_texture_functions_init(struct dd_function_table *functions)
functions->BindTexture = nouveau_bind_texture;
functions->MapTexture = nouveau_texture_map;
functions->UnmapTexture = nouveau_texture_unmap;
+ functions->GenerateMipmap = nouveau_generate_mipmap;
}
diff --git a/src/mesa/drivers/dri/nouveau/nv20_state_fb.c b/src/mesa/drivers/dri/nouveau/nv20_state_fb.c
index 21da4f7af16..95691cad047 100644
--- a/src/mesa/drivers/dri/nouveau/nv20_state_fb.c
+++ b/src/mesa/drivers/dri/nouveau/nv20_state_fb.c
@@ -72,7 +72,7 @@ nv20_emit_framebuffer(GLcontext *ctx, int emit)
fb->_ColorDrawBuffers[0])->surface;
rt_format |= get_rt_format(s->format);
- zeta_pitch = rt_pitch = s->pitch;
+ rt_pitch = s->pitch;
nouveau_bo_markl(bctx, kelvin, NV20TCL_COLOR_OFFSET,
s->bo, 0, bo_flags);
@@ -88,6 +88,9 @@ nv20_emit_framebuffer(GLcontext *ctx, int emit)
nouveau_bo_markl(bctx, kelvin, NV20TCL_ZETA_OFFSET,
s->bo, 0, bo_flags);
+ } else {
+ rt_format |= get_rt_format(MESA_FORMAT_Z24_S8);
+ zeta_pitch = rt_pitch;
}
BEGIN_RING(chan, kelvin, NV20TCL_RT_FORMAT, 2);
diff --git a/src/mesa/drivers/dri/nouveau/nv20_state_tex.c b/src/mesa/drivers/dri/nouveau/nv20_state_tex.c
index e46118e4fce..2d45513bb4c 100644
--- a/src/mesa/drivers/dri/nouveau/nv20_state_tex.c
+++ b/src/mesa/drivers/dri/nouveau/nv20_state_tex.c
@@ -194,7 +194,8 @@ nv20_emit_tex_obj(GLcontext *ctx, int emit)
| nvgl_wrap_mode(t->WrapS) << 0;
tx_filter = nvgl_filter_mode(t->MagFilter) << 24
- | nvgl_filter_mode(t->MinFilter) << 16;
+ | nvgl_filter_mode(t->MinFilter) << 16
+ | 2 << 12;
tx_enable = NV20TCL_TX_ENABLE_ENABLE
| log2i(t->MaxAnisotropy) << 4;
diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c
index 262fe3cddee..dbf4ad477db 100644
--- a/src/mesa/drivers/dri/r200/r200_swtcl.c
+++ b/src/mesa/drivers/dri/r200/r200_swtcl.c
@@ -612,6 +612,8 @@ static void r200RasterPrimitive( GLcontext *ctx, GLuint hwprim )
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
+ radeon_prepare_render(&rmesa->radeon);
+
if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
/* need to disable perspective-correct texturing for point sprites */
if ((hwprim & 0xf) == R200_VF_PRIM_POINT_SPRITES && ctx->Point.PointSprite) {
diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c
index d43e14581e9..4ae0f304918 100644
--- a/src/mesa/drivers/dri/r200/r200_tcl.c
+++ b/src/mesa/drivers/dri/r200/r200_tcl.c
@@ -264,6 +264,8 @@ void r200TclPrimitive( GLcontext *ctx,
r200ContextPtr rmesa = R200_CONTEXT(ctx);
GLuint newprim = hw_prim | R200_VF_TCL_OUTPUT_VTX_ENABLE;
+ radeon_prepare_render(&rmesa->radeon);
+
if (newprim != rmesa->tcl.hw_primitive ||
!discrete_prim[hw_prim&0xf]) {
/* need to disable perspective-correct texturing for point sprites */
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index a326ee4c4fa..d2fa816894c 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -109,13 +109,13 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
debug_program_log(c, "before compilation");
if (c->Base.is_r500){
- r500_transform_unroll_loops(&c->Base, &loop_state);
- debug_program_log(c, "after r500 transform loops");
+ rc_unroll_loops(&c->Base, R500_PFS_MAX_INST);
+ debug_program_log(c, "after unroll loops");
}
else{
- rc_transform_unroll_loops(&c->Base, &loop_state);
+ rc_transform_loops(&c->Base, &loop_state, -1);
debug_program_log(c, "after transform loops");
-
+
rc_emulate_branches(&c->Base);
debug_program_log(c, "after emulate branches");
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index d347b4df9cd..666c9c2a7a9 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -32,6 +32,11 @@
#include "radeon_emulate_branches.h"
#include "radeon_emulate_loops.h"
+struct loop {
+ int BgnLoop;
+
+};
+
/*
* Take an already-setup and valid source then swizzle it appropriately to
* obtain a constant ZERO or ONE source.
@@ -332,11 +337,140 @@ static void ei_pow(struct r300_vertex_program_code *vp,
inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
}
+static void mark_write(void * userdata, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ unsigned int * writemasks = userdata;
+
+ if (file != RC_FILE_TEMPORARY)
+ return;
+
+ if (index >= R300_VS_MAX_TEMPS)
+ return;
+
+ writemasks[index] |= mask;
+}
+
+static unsigned long t_pred_src(struct r300_vertex_program_compiler * compiler)
+{
+ return PVS_SRC_OPERAND(compiler->PredicateIndex,
+ t_swizzle(RC_SWIZZLE_ZERO),
+ t_swizzle(RC_SWIZZLE_ZERO),
+ t_swizzle(RC_SWIZZLE_ZERO),
+ t_swizzle(RC_SWIZZLE_W),
+ t_src_class(RC_FILE_TEMPORARY),
+ 0);
+}
+
+static unsigned long t_pred_dst(struct r300_vertex_program_compiler * compiler,
+ unsigned int hw_opcode, int is_math)
+{
+ return PVS_OP_DST_OPERAND(hw_opcode,
+ is_math,
+ 0,
+ compiler->PredicateIndex,
+ RC_MASK_W,
+ t_dst_class(RC_FILE_TEMPORARY));
+
+}
+
+static void ei_if(struct r300_vertex_program_compiler * compiler,
+ struct rc_instruction *rci,
+ unsigned int * inst,
+ unsigned int branch_depth)
+{
+ unsigned int predicate_opcode;
+ int is_math = 0;
+
+ if (!compiler->Base.is_r500) {
+ rc_error(&compiler->Base,"Opcode IF not supported\n");
+ return;
+ }
+
+ /* Reserve a temporary to use as our predicate stack counter, if we
+ * don't already have one. */
+ if (!compiler->PredicateMask) {
+ unsigned int writemasks[R300_VS_MAX_TEMPS];
+ memset(writemasks, 0, sizeof(writemasks));
+ struct rc_instruction * inst;
+ unsigned int i;
+ for(inst = compiler->Base.Program.Instructions.Next;
+ inst != &compiler->Base.Program.Instructions;
+ inst = inst->Next) {
+ rc_for_all_writes_mask(inst, mark_write, writemasks);
+ }
+ for(i = 0; i < R300_VS_MAX_TEMPS; i++) {
+ unsigned int mask = ~writemasks[i] & RC_MASK_XYZW;
+ /* Only the W component can be used fo the predicate
+ * stack counter. */
+ if (mask & RC_MASK_W) {
+ compiler->PredicateMask = RC_MASK_W;
+ compiler->PredicateIndex = i;
+ break;
+ }
+ }
+ if (i == R300_VS_MAX_TEMPS) {
+ rc_error(&compiler->Base, "No free temporary to use for"
+ " predicate stack counter.\n");
+ return;
+ }
+ }
+ predicate_opcode =
+ branch_depth ? VE_PRED_SET_NEQ_PUSH : ME_PRED_SET_NEQ;
+
+ rci->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(rci->U.I.SrcReg[0].Swizzle,0));
+ if (branch_depth == 0) {
+ is_math = 1;
+ predicate_opcode = ME_PRED_SET_NEQ;
+ inst[1] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
+ inst[2] = 0;
+ } else {
+ predicate_opcode = VE_PRED_SET_NEQ_PUSH;
+ inst[1] = t_pred_src(compiler);
+ inst[2] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
+ }
+
+ inst[0] = t_pred_dst(compiler, predicate_opcode, is_math);
+ inst[3] = 0;
+
+}
+
+static void ei_else(struct r300_vertex_program_compiler * compiler,
+ unsigned int * inst)
+{
+ if (!compiler->Base.is_r500) {
+ rc_error(&compiler->Base,"Opcode ELSE not supported\n");
+ return;
+ }
+ inst[0] = t_pred_dst(compiler, ME_PRED_SET_INV, 1);
+ inst[1] = t_pred_src(compiler);
+ inst[2] = 0;
+ inst[3] = 0;
+}
+
+static void ei_endif(struct r300_vertex_program_compiler *compiler,
+ unsigned int * inst)
+{
+ if (!compiler->Base.is_r500) {
+ rc_error(&compiler->Base,"Opcode ENDIF not supported\n");
+ return;
+ }
+ inst[0] = t_pred_dst(compiler, ME_PRED_SET_POP, 1);
+ inst[1] = t_pred_src(compiler);
+ inst[2] = 0;
+ inst[3] = 0;
+}
static void translate_vertex_program(struct r300_vertex_program_compiler * compiler)
{
struct rc_instruction *rci;
+ struct loop * loops;
+ int current_loop_depth = 0;
+ int loops_reserved = 0;
+
+ unsigned int branch_depth = 0;
+
compiler->code->pos_end = 0; /* Not supported yet */
compiler->code->length = 0;
@@ -366,9 +500,12 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break;
case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
+ case RC_OPCODE_ELSE: ei_else(compiler, inst); break;
+ case RC_OPCODE_ENDIF: ei_endif(compiler, inst); branch_depth--; break;
case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
+ case RC_OPCODE_IF: ei_if(compiler, rci, inst, branch_depth); branch_depth++; break;
case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
@@ -385,11 +522,86 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break;
case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
+ case RC_OPCODE_BGNLOOP:
+ {
+ struct loop * l;
+
+ if ((!compiler->Base.is_r500
+ && loops_reserved >= R300_VS_MAX_LOOP_DEPTH)
+ || loops_reserved >= R500_VS_MAX_FC_DEPTH) {
+ rc_error(&compiler->Base,
+ "Loops are nested too deep.");
+ return;
+ }
+ memory_pool_array_reserve(&compiler->Base.Pool,
+ struct loop, loops, current_loop_depth,
+ loops_reserved, 1);
+ l = &loops[current_loop_depth++];
+ memset(l , 0, sizeof(struct loop));
+ l->BgnLoop = (compiler->code->length / 4);
+ continue;
+ }
+ case RC_OPCODE_ENDLOOP:
+ {
+ struct loop * l = &loops[current_loop_depth - 1];
+ unsigned int act_addr = l->BgnLoop - 1;
+ unsigned int last_addr = (compiler->code->length / 4) - 1;
+ unsigned int ret_addr = l->BgnLoop;
+
+ if (loops_reserved >= R300_VS_MAX_FC_OPS) {
+ rc_error(&compiler->Base,
+ "Too many flow control instructions.");
+ return;
+ }
+ if (compiler->Base.is_r500) {
+ compiler->code->fc_op_addrs.r500
+ [compiler->code->num_fc_ops].lw =
+ R500_PVS_FC_ACT_ADRS(act_addr)
+ | R500_PVS_FC_LOOP_CNT_JMP_INST(0xffff)
+ ;
+ compiler->code->fc_op_addrs.r500
+ [compiler->code->num_fc_ops].uw =
+ R500_PVS_FC_LAST_INST(last_addr)
+ | R500_PVS_FC_RTN_INST(ret_addr)
+ ;
+ } else {
+ compiler->code->fc_op_addrs.r300
+ [compiler->code->num_fc_ops] =
+ R300_PVS_FC_ACT_ADRS(act_addr)
+ | R300_PVS_FC_LOOP_CNT_JMP_INST(0xff)
+ | R300_PVS_FC_LAST_INST(last_addr)
+ | R300_PVS_FC_RTN_INST(ret_addr)
+ ;
+ }
+ compiler->code->fc_loop_index[compiler->code->num_fc_ops] =
+ R300_PVS_FC_LOOP_INIT_VAL(0x0)
+ | R300_PVS_FC_LOOP_STEP_VAL(0x1)
+ ;
+ compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP(
+ compiler->code->num_fc_ops);
+ compiler->code->num_fc_ops++;
+ current_loop_depth--;
+ continue;
+ }
+
default:
rc_error(&compiler->Base, "Unknown opcode %s\n", rc_get_opcode_info(vpi->Opcode)->Name);
return;
}
+ /* Non-flow control instructions that are inside an if statement
+ * need to pay attention to the predicate bit. */
+ if (branch_depth
+ && vpi->Opcode != RC_OPCODE_IF
+ && vpi->Opcode != RC_OPCODE_ELSE
+ && vpi->Opcode != RC_OPCODE_ENDIF) {
+
+ inst[0] |= (PVS_DST_PRED_ENABLE_MASK
+ << PVS_DST_PRED_ENABLE_SHIFT);
+ inst[0] |= (PVS_DST_PRED_SENSE_MASK
+ << PVS_DST_PRED_SENSE_SHIFT);
+ }
+
compiler->code->length += 4;
if (compiler->Base.Error)
@@ -406,6 +618,7 @@ struct temporary_allocation {
static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler)
{
struct rc_instruction *inst;
+ struct rc_instruction *end_loop = NULL;
unsigned int num_orig_temps = 0;
char hwtemps[R300_VS_MAX_TEMPS];
struct temporary_allocation * ta;
@@ -440,10 +653,35 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
/* Pass 2: Determine original temporary lifetimes */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ /* Instructions inside of loops need to use the ENDLOOP
+ * instruction as their LastRead. */
+ if (!end_loop && inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ int endloops = 1;
+ struct rc_instruction * ptr;
+ for(ptr = inst->Next;
+ ptr != &compiler->Base.Program.Instructions;
+ ptr = ptr->Next){
+ if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ endloops++;
+ } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
+ endloops--;
+ if (endloops <= 0) {
+ end_loop = ptr;
+ break;
+ }
+ }
+ }
+ }
+
+ if (inst == end_loop) {
+ end_loop = NULL;
+ continue;
+ }
for (i = 0; i < opcode->NumSrcRegs; ++i) {
if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY)
- ta[inst->U.I.SrcReg[i].Index].LastRead = inst;
+ ta[inst->U.I.SrcReg[i].Index].LastRead =
+ end_loop ? end_loop : inst;
}
}
@@ -633,30 +871,24 @@ static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
{
struct emulate_loop_state loop_state;
-
+
compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
addArtificialOutputs(compiler);
debug_program_log(compiler, "before compilation");
- /* XXX Ideally this should be done only for r3xx, but since
- * we don't have branching support for r5xx, we use the emulation
- * on all chipsets. */
- rc_transform_unroll_loops(&compiler->Base, &loop_state);
-
- debug_program_log(compiler, "after transform loops");
-
- if (compiler->Base.is_r500){
- rc_emulate_loops(&loop_state, R500_VS_MAX_ALU);
- } else {
- rc_emulate_loops(&loop_state, R300_VS_MAX_ALU);
- }
- debug_program_log(compiler, "after emulate loops");
+ if (compiler->Base.is_r500)
+ rc_transform_loops(&compiler->Base, &loop_state, R500_VS_MAX_ALU);
+ else
+ rc_transform_loops(&compiler->Base, &loop_state, R300_VS_MAX_ALU);
- rc_emulate_branches(&compiler->Base);
+ debug_program_log(compiler, "after emulate loops");
- debug_program_log(compiler, "after emulate branches");
+ if (!compiler->Base.is_r500) {
+ rc_emulate_branches(&compiler->Base);
+ debug_program_log(compiler, "after emulate branches");
+ }
if (compiler->Base.is_r500) {
struct radeon_program_transformation transformations[] = {
@@ -718,6 +950,6 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
if (compiler->Base.Debug) {
fprintf(stderr, "Final vertex program code:\n");
- r300_vertex_program_dump(compiler->code);
+ r300_vertex_program_dump(compiler);
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
index 5800f1a78e1..e6009338e2e 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
@@ -20,7 +20,9 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
+#include "radeon_compiler.h"
#include "radeon_code.h"
+#include "../r300_reg.h"
#include <stdio.h>
@@ -133,6 +135,10 @@ static void r300_vs_op_dump(uint32_t op)
{
fprintf(stderr, " dst: %d%s op: ",
(op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]);
+ if ((op >> PVS_DST_PRED_ENABLE_SHIFT) & 0x1) {
+ fprintf(stderr, "PRED %u",
+ (op >> PVS_DST_PRED_SENSE_SHIFT) & 0x1);
+ }
if (op & 0x80) {
if (op & 0x1) {
fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n");
@@ -160,8 +166,9 @@ static void r300_vs_src_dump(uint32_t src)
r300_vs_swiz_debug[(src >> 22) & 0x7]);
}
-void r300_vertex_program_dump(struct r300_vertex_program_code * vs)
+void r300_vertex_program_dump(struct r300_vertex_program_compiler * c)
{
+ struct r300_vertex_program_code * vs = c->code;
unsigned instrcount = vs->length / 4;
unsigned i;
@@ -177,4 +184,21 @@ void r300_vertex_program_dump(struct r300_vertex_program_code * vs)
r300_vs_src_dump(vs->body.d[offset+1+src]);
}
}
+
+ fprintf(stderr, "Flow Control Ops: 0x%08x\n",vs->fc_ops);
+ for(i = 0; i < vs->num_fc_ops; i++) {
+ switch((vs->fc_ops >> (i * 2)) & 0x3 ) {
+ case 0: fprintf(stderr, "NOP"); break;
+ case 1: fprintf(stderr, "JUMP"); break;
+ case 2: fprintf(stderr, "LOOP"); break;
+ case 3: fprintf(stderr, "JSR"); break;
+ }
+ if (c->Base.is_r500) {
+ fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x\n",
+ vs->fc_op_addrs.r500[i].uw,
+ vs->fc_op_addrs.r500[i].lw);
+ } else {
+ fprintf(stderr,": 0x%08x\n", vs->fc_op_addrs.r300[i]);
+ }
+ }
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
index e6b5522c5b9..80a120497e3 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -30,7 +30,6 @@
#include <stdio.h>
#include "../r300_reg.h"
-#include "radeon_emulate_loops.h"
/**
* Rewrite IF instructions to use the ALU result special register.
@@ -60,31 +59,6 @@ int r500_transform_IF(
return 1;
}
-/**
- * Rewrite loops to make them easier to emit. This is not a local
- * transformation, because it modifies and reorders an entire block of code.
- */
-void r500_transform_unroll_loops(struct radeon_compiler * c,
- struct emulate_loop_state *s)
-{
- int i;
-
- rc_transform_unroll_loops(c, s);
-
- for( i = s->LoopCount - 1; i >= 0; i-- ){
- struct rc_instruction * inst_continue;
- if(!s->Loops[i].EndLoop){
- continue;
- }
- /* Insert a continue instruction at the end of the loop. This
- * is required in order to emit loops correctly. */
- inst_continue = rc_insert_new_instruction(c,
- s->Loops[i].EndIf->Prev);
- inst_continue->U.I.Opcode = RC_OPCODE_CONTINUE;
- }
-
-}
-
static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
unsigned int relevant;
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
index 0d005a794ff..34173351f83 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
@@ -49,6 +49,4 @@ extern int r500_transform_IF(
struct rc_instruction * inst,
void* data);
-void r500_transform_unroll_loops(struct radeon_compiler * c,
- struct emulate_loop_state * s);
#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index 0bd8f0a239f..9b60e30f586 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -64,7 +64,16 @@ struct branch_info {
};
struct loop_info {
- int LoopStart;
+ int BgnLoop;
+
+ int BranchDepth;
+ int * Brks;
+ int BrkCount;
+ int BrkReserved;
+
+ int * Conts;
+ int ContCount;
+ int ContReserved;
};
struct emit_state {
@@ -368,6 +377,12 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst
unsigned int newip = ++s->Code->inst_end;
+ /* Currently all loops use the same integer constant to intialize
+ * the loop variables. */
+ if(!s->Code->int_constants[0]) {
+ s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
+ s->Code->int_constant_count = 1;
+ }
s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
switch(inst->U.I.Opcode){
@@ -378,32 +393,77 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst
s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
loop = &s->Loops[s->CurrentLoopDepth++];
-
- /* We don't emit an instruction for BGNLOOP, so we need to
- * decrement the instruction counter, but first we need to
- * set LoopStart to the current value of inst_end, which
- * will end up being the first real instruction in the loop.*/
- loop->LoopStart = s->Code->inst_end--;
+ memset(loop, 0, sizeof(struct loop_info));
+ loop->BranchDepth = s->CurrentBranchDepth;
+ loop->BgnLoop = newip;
+
+ s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
+ | R500_FC_JUMP_FUNC(0x00)
+ | R500_FC_IGNORE_UNCOVERED
+ ;
break;
-
case RC_OPCODE_BRK:
- /* Don't emit an instruction for BRK */
- s->Code->inst_end--;
+ loop = &s->Loops[s->CurrentLoopDepth - 1];
+ memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
+ loop->BrkCount, loop->BrkReserved, 1);
+
+ loop->Brks[loop->BrkCount++] = newip;
+ s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
+ | R500_FC_JUMP_FUNC(0xff)
+ | R500_FC_B_OP1_DECR
+ | R500_FC_B_POP_CNT(
+ s->CurrentBranchDepth - loop->BranchDepth)
+ | R500_FC_IGNORE_UNCOVERED
+ ;
break;
- case RC_OPCODE_CONTINUE:
+ case RC_OPCODE_CONT:
loop = &s->Loops[s->CurrentLoopDepth - 1];
- s->Code->inst[newip].inst2 = R500_FC_OP_JUMP |
- R500_FC_JUMP_FUNC(0xff);
- s->Code->inst[newip].inst3 = R500_FC_JUMP_ADDR(loop->LoopStart);
+ memory_pool_array_reserve(&s->C->Pool, int, loop->Conts,
+ loop->ContCount, loop->ContReserved, 1);
+ loop->Conts[loop->ContCount++] = newip;
+ s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE
+ | R500_FC_JUMP_FUNC(0xff)
+ | R500_FC_B_OP1_DECR
+ | R500_FC_B_POP_CNT(
+ s->CurrentBranchDepth - loop->BranchDepth)
+ | R500_FC_IGNORE_UNCOVERED
+ ;
break;
case RC_OPCODE_ENDLOOP:
- /* Don't emit an instruction for ENDLOOP */
- s->Code->inst_end--;
+ {
+ loop = &s->Loops[s->CurrentLoopDepth - 1];
+ /* Emit ENDLOOP */
+ s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
+ | R500_FC_JUMP_FUNC(0xff)
+ | R500_FC_JUMP_ANY
+ | R500_FC_IGNORE_UNCOVERED
+ ;
+ /* The constant integer at index 0 is used by all loops. */
+ s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
+ | R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
+ ;
+
+ /* Set jump address and int constant for BGNLOOP */
+ s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
+ | R500_FC_JUMP_ADDR(newip)
+ ;
+
+ /* Set jump address for the BRK instructions. */
+ while(loop->BrkCount--) {
+ s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
+ R500_FC_JUMP_ADDR(newip + 1);
+ }
+
+ /* Set jump address for CONT instructions. */
+ while(loop->ContCount--) {
+ s->Code->inst[loop->Conts[loop->ContCount]].inst3 =
+ R500_FC_JUMP_ADDR(newip);
+ }
s->CurrentLoopDepth--;
break;
-
+ }
case RC_OPCODE_IF:
if ( s->CurrentBranchDepth >= MAX_BRANCH_DEPTH_FULL) {
rc_error(s->C, "Branch depth exceeds hardware limit");
@@ -442,24 +502,16 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst
}
branch = &s->Branches[s->CurrentBranchDepth - 1];
-
- if(inst->Prev->U.I.Opcode == RC_OPCODE_BRK){
- branch->Endif = --s->Code->inst_end;
- s->Code->inst[branch->Endif].inst2 |=
- R500_FC_B_OP0_DECR;
- }
- else{
- branch->Endif = newip;
-
- s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
- | R500_FC_A_OP_NONE /* no address stack */
- | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
- | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
- | R500_FC_B_OP1_NONE /* no branch counter if stay */
- | R500_FC_B_POP_CNT(1)
+ branch->Endif = newip;
+
+ s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
+ | R500_FC_A_OP_NONE /* no address stack */
+ | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
+ | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
+ | R500_FC_B_OP1_NONE /* no branch counter if stay */
+ | R500_FC_B_POP_CNT(1)
;
- s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
- }
+ s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
| R500_FC_A_OP_NONE /* no address stack */
| R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
@@ -544,11 +596,9 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi
code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
}
- /* Use FULL flow control mode if branches are nested deep enough.
- * We don not need to enable FULL flow control mode for loops, becasue
- * we aren't using the hardware loop instructions.
- */
- if (s.MaxBranchDepth >= 4) {
+ /* Enable full flow control mode if we are using loops or have if
+ * statements nested at least four deep. */
+ if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
if (code->max_temp_idx < 1)
code->max_temp_idx = 1;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index d03689763bc..896246d2035 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -221,6 +221,9 @@ struct r500_fragment_program_code {
int max_temp_idx;
uint32_t us_fc_ctrl;
+
+ uint32_t int_constants[32];
+ uint32_t int_constant_count;
};
struct rX00_fragment_program_code {
@@ -240,6 +243,12 @@ struct rX00_fragment_program_code {
#define R500_VS_MAX_ALU 1024
#define R500_VS_MAX_ALU_DWORDS (R500_VS_MAX_ALU * 4)
#define R300_VS_MAX_TEMPS 32
+/* This is the max for all chipsets (r300-r500) */
+#define R300_VS_MAX_FC_OPS 16
+/* The r500 maximum depth is not just for loops, but any combination of loops
+ * and subroutine jumps. */
+#define R500_VS_MAX_FC_DEPTH 8
+#define R300_VS_MAX_LOOP_DEPTH 1
#define VSF_MAX_INPUTS 32
#define VSF_MAX_OUTPUTS 32
@@ -260,9 +269,18 @@ struct r300_vertex_program_code {
uint32_t InputsRead;
uint32_t OutputsWritten;
-};
-void r300_vertex_program_dump(struct r300_vertex_program_code * vs);
+ unsigned int num_fc_ops;
+ uint32_t fc_ops;
+ union {
+ uint32_t r300[R300_VS_MAX_FC_OPS];
+ struct {
+ uint32_t lw;
+ uint32_t uw;
+ } r500[R300_VS_MAX_FC_OPS];
+ } fc_op_addrs;
+ int32_t fc_loop_index[R300_VS_MAX_FC_OPS];
+};
#endif /* RADEON_CODE_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
index 1c8ba864a41..935dc9b0a80 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
@@ -307,3 +307,46 @@ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsig
}
}
+
+/**
+ * The FACE input in hardware contains 1 if it's a back face, 0 otherwise.
+ * Gallium and OpenGL define it the other way around.
+ *
+ * So let's just negate FACE at the beginning of the shader and rewrite the rest
+ * of the shader to read from the newly allocated temporary.
+ */
+void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
+{
+ unsigned tempregi = rc_find_free_temporary(c);
+ struct rc_instruction *inst_add;
+ struct rc_instruction *inst;
+
+ /* perspective divide */
+ inst_add = rc_insert_new_instruction(c, &c->Program.Instructions);
+ inst_add->U.I.Opcode = RC_OPCODE_ADD;
+
+ inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_add->U.I.DstReg.Index = tempregi;
+ inst_add->U.I.DstReg.WriteMask = RC_MASK_X;
+
+ inst_add->U.I.SrcReg[0].File = RC_FILE_NONE;
+ inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
+
+ inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT;
+ inst_add->U.I.SrcReg[1].Index = face;
+ inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
+ inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
+
+ for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned i;
+
+ for(i = 0; i < opcode->NumSrcRegs; i++) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
+ inst->U.I.SrcReg[i].Index == face) {
+ inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[i].Index = tempregi;
+ }
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index f15905d79d4..7c42eb3ae57 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -81,6 +81,7 @@ void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_ou
void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output);
void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
int full_vtransform);
+void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face);
struct r300_fragment_program_compiler {
struct radeon_compiler Base;
@@ -110,8 +111,12 @@ struct r300_vertex_program_compiler {
void * UserData;
void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c);
+
+ int PredicateIndex;
+ unsigned int PredicateMask;
};
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c);
+void r300_vertex_program_dump(struct r300_vertex_program_compiler * c);
#endif /* RADEON_COMPILER_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
index fbb4235c223..faf531b412e 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
@@ -43,6 +43,12 @@ struct instruction_state {
unsigned char SrcReg[3];
};
+struct loopinfo {
+ struct updatemask_state * Breaks;
+ unsigned int BreakCount;
+ unsigned int BreaksReserved;
+};
+
struct branchinfo {
unsigned int HaveElse:1;
@@ -59,6 +65,10 @@ struct deadcode_state {
struct branchinfo * BranchStack;
unsigned int BranchStackSize;
unsigned int BranchStackReserved;
+
+ struct loopinfo * LoopStack;
+ unsigned int LoopStackSize;
+ unsigned int LoopStackReserved;
};
@@ -78,6 +88,22 @@ static void or_updatemasks(
dst->Address = a->Address | b->Address;
}
+static void push_break(struct deadcode_state *s)
+{
+ struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1];
+ memory_pool_array_reserve(&s->C->Pool, struct updatemask_state,
+ loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1);
+
+ memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R));
+}
+
+static void push_loop(struct deadcode_state * s)
+{
+ memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack,
+ s->LoopStackSize, s->LoopStackReserved, 1);
+ memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo));
+}
+
static void push_branch(struct deadcode_state * s)
{
memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,
@@ -233,11 +259,22 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f
}
}
}
+ push_loop(&s);
break;
}
- case RC_OPCODE_CONTINUE:
case RC_OPCODE_BRK:
+ push_break(&s);
+ break;
case RC_OPCODE_BGNLOOP:
+ {
+ unsigned int i;
+ struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1];
+ for(i = 0; i < loop->BreakCount; i++) {
+ or_updatemasks(&s.R, &s.R, &loop->Breaks[i]);
+ }
+ break;
+ }
+ case RC_OPCODE_CONT:
break;
case RC_OPCODE_ENDIF:
push_branch(&s);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
index 131e9e7436d..32d4b45dd6d 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
@@ -39,7 +39,6 @@
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
struct const_value {
-
struct radeon_compiler * C;
struct rc_src_register * Src;
float Value;
@@ -78,17 +77,17 @@ static int src_reg_is_immediate(struct rc_src_register * src,
c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE;
}
-static unsigned int loop_calc_iterations(struct emulate_loop_state *s,
- struct loop_info * loop, unsigned int max_instructions)
+static unsigned int loop_max_possible_iterations(struct radeon_compiler *c,
+ struct loop_info * loop, unsigned int prog_inst_limit)
{
- unsigned int total_i = rc_recompute_ips(s->C);
+ unsigned int total_i = rc_recompute_ips(c);
unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1;
/* +1 because the program already has one iteration of the loop. */
- return 1 + ((max_instructions - total_i) / (s->LoopCount * loop_i));
+ return 1 + ((prog_inst_limit - total_i) / loop_i);
}
-static void loop_unroll(struct emulate_loop_state * s,
- struct loop_info *loop, unsigned int iterations)
+static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop,
+ unsigned int iterations)
{
unsigned int i;
struct rc_instruction * ptr;
@@ -99,7 +98,7 @@ static void loop_unroll(struct emulate_loop_state * s,
rc_remove_instruction(loop->EndLoop);
for( i = 1; i < iterations; i++){
for(ptr = first; ptr != last->Next; ptr = ptr->Next){
- struct rc_instruction *new = rc_alloc_instruction(s->C);
+ struct rc_instruction *new = rc_alloc_instruction(c);
memcpy(new, ptr, sizeof(struct rc_instruction));
rc_insert_instruction(append_to, new);
append_to = new;
@@ -115,7 +114,7 @@ static void update_const_value(void * data, struct rc_instruction * inst,
if(value->Src->File != file ||
value->Src->Index != index ||
!(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){
- return;
+ return;
}
switch(inst->U.I.Opcode){
case RC_OPCODE_MOV:
@@ -140,7 +139,7 @@ static void get_incr_amount(void * data, struct rc_instruction * inst,
if(file != RC_FILE_TEMPORARY ||
count_inst->Index != index ||
(1 << GET_SWZ(count_inst->Swz,0) != mask)){
- return;
+ return;
}
/* Find the index of the counter register. */
opcode = rc_get_opcode_info(inst->U.I.Opcode);
@@ -185,13 +184,16 @@ static void get_incr_amount(void * data, struct rc_instruction * inst,
count_inst->Unknown = 1;
return;
}
-
}
-static int transform_const_loop(struct emulate_loop_state * s,
- struct loop_info * loop)
+/**
+ * If prog_inst_limit is -1, then all eligible loops will be unrolled regardless
+ * of how many iterations they have.
+ */
+static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop,
+ unsigned int prog_inst_limit)
{
- int end_loops = 1;
+ int end_loops;
int iterations;
struct count_inst count_inst;
float limit_value;
@@ -201,12 +203,12 @@ static int transform_const_loop(struct emulate_loop_state * s,
struct rc_instruction * inst;
/* Find the counter and the upper limit */
-
- if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], s->C)){
+
+ if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], c)){
limit = &loop->Cond->U.I.SrcReg[0];
counter = &loop->Cond->U.I.SrcReg[1];
}
- else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], s->C)){
+ else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], c)){
limit = &loop->Cond->U.I.SrcReg[1];
counter = &loop->Cond->U.I.SrcReg[0];
}
@@ -214,13 +216,13 @@ static int transform_const_loop(struct emulate_loop_state * s,
DBG("No constant limit.\n");
return 0;
}
-
+
/* Find the initial value of the counter */
counter_value.Src = counter;
counter_value.Value = 0.0f;
counter_value.HasValue = 0;
- counter_value.C = s->C;
- for(inst = s->C->Program.Instructions.Next; inst != loop->BeginLoop;
+ counter_value.C = c;
+ for(inst = c->Program.Instructions.Next; inst != loop->BeginLoop;
inst = inst->Next){
rc_for_all_writes_mask(inst, update_const_value, &counter_value);
}
@@ -230,11 +232,12 @@ static int transform_const_loop(struct emulate_loop_state * s,
}
DBG("Initial counter value is %f\n", counter_value.Value);
/* Determine how the counter is modified each loop */
- count_inst.C = s->C;
+ count_inst.C = c;
count_inst.Index = counter->Index;
count_inst.Swz = counter->Swizzle;
count_inst.Amount = 0.0f;
count_inst.Unknown = 0;
+ end_loops = 1;
for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){
switch(inst->U.I.Opcode){
/* XXX In the future we might want to try to unroll nested
@@ -246,6 +249,16 @@ static int transform_const_loop(struct emulate_loop_state * s,
loop->EndLoop = inst;
end_loops--;
break;
+ case RC_OPCODE_BRK:
+ /* Don't unroll loops if it has a BRK instruction
+ * other one used when testing the main conditional
+ * of the loop. */
+
+ /* Make sure we haven't entered a nested loops. */
+ if(inst != loop->Brk && end_loops == 1) {
+ return 0;
+ }
+ break;
/* XXX Check if the counter is modified within an if statement.
*/
case RC_OPCODE_IF:
@@ -266,17 +279,20 @@ static int transform_const_loop(struct emulate_loop_state * s,
/* Calculate the number of iterations of this loop. Keeping this
* simple, since we only support increment and decrement loops.
*/
- limit_value = get_constant_value(s->C, limit, 0);
+ limit_value = get_constant_value(c, limit, 0);
DBG("Limit is %f.\n", limit_value);
+ /* The iteration calculations are opposite of what you would expect.
+ * In a normal loop, if the condition is met, then loop continues, but
+ * with our loops, if the condition is met, the is exited. */
switch(loop->Cond->U.I.Opcode){
- case RC_OPCODE_SGT:
- case RC_OPCODE_SLT:
+ case RC_OPCODE_SGE:
+ case RC_OPCODE_SLE:
iterations = (int) ceilf((limit_value - counter_value.Value) /
count_inst.Amount);
break;
- case RC_OPCODE_SLE:
- case RC_OPCODE_SGE:
+ case RC_OPCODE_SGT:
+ case RC_OPCODE_SLT:
iterations = (int) floorf((limit_value - counter_value.Value) /
count_inst.Amount) + 1;
break;
@@ -284,77 +300,85 @@ static int transform_const_loop(struct emulate_loop_state * s,
return 0;
}
+ if (prog_inst_limit > 0
+ && iterations > loop_max_possible_iterations(c, loop,
+ prog_inst_limit)) {
+ return 0;
+ }
+
DBG("Loop will have %d iterations.\n", iterations);
-
+
/* Prepare loop for unrolling */
rc_remove_instruction(loop->Cond);
rc_remove_instruction(loop->If);
rc_remove_instruction(loop->Brk);
rc_remove_instruction(loop->EndIf);
-
- loop_unroll(s, loop, iterations);
+
+ unroll_loop(c, loop, iterations);
loop->EndLoop = NULL;
return 1;
}
-/**
- * This function prepares a loop to be unrolled by converting it into an if
- * statement. Here is an outline of the conversion process:
- * BGNLOOP; -> BGNLOOP;
- * <Additional conditional code> -> <Additional conditional code>
- * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2];
- * IF temp[0]; -> IF temp[0];
- * BRK; ->
- * ENDIF; -> <Loop Body>
- * <Loop Body> -> ENDIF;
- * ENDLOOP; -> ENDLOOP
- *
+/**
+ * @param c
+ * @param loop
* @param inst A pointer to a BGNLOOP instruction.
- * @return If the loop can be unrolled, a pointer to the first instruction of
- * the unrolled loop.
- * Otherwise, A pointer to the ENDLOOP instruction.
- * Null if there is an error.
+ * @return 1 if all of the members of loop where set.
+ * @return 0 if there was an error and some members of loop are still NULL.
*/
-static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
+static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop,
struct rc_instruction * inst)
{
- struct loop_info *loop;
struct rc_instruction * ptr;
- memory_pool_array_reserve(&s->C->Pool, struct loop_info,
- s->Loops, s->LoopCount, s->LoopReserved, 1);
-
- loop = &s->Loops[s->LoopCount++];
- memset(loop, 0, sizeof(struct loop_info));
if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){
- rc_error(s->C, "expected BGNLOOP\n", __FUNCTION__);
- return NULL;
+ rc_error(c, "%s: expected BGNLOOP", __FUNCTION__);
+ return 0;
}
+
+ memset(loop, 0, sizeof(struct loop_info));
+
loop->BeginLoop = inst;
- for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next){
+ for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next) {
+
+ if (ptr == &c->Program.Instructions) {
+ rc_error(c, "%s: BGNLOOP without an ENDLOOOP.\n",
+ __FUNCTION__);
+ return 0;
+ }
+
switch(ptr->U.I.Opcode){
case RC_OPCODE_BGNLOOP:
- /* Nested loop */
- ptr = transform_loop(s, ptr);
- if(!ptr){
- return NULL;
+ {
+ /* Nested loop, skip ahead to the end. */
+ unsigned int loop_depth = 1;
+ for(ptr = ptr->Next; ptr != &c->Program.Instructions;
+ ptr = ptr->Next){
+ if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ loop_depth++;
+ } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
+ if (!--loop_depth) {
+ break;
+ }
+ }
+ }
+ if (ptr == &c->Program.Instructions) {
+ rc_error(c, "%s: BGNLOOP without an ENDLOOOP\n",
+ __FUNCTION__);
+ return 0;
}
break;
+ }
case RC_OPCODE_BRK:
- loop->Brk = ptr;
- if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF){
- rc_error(s->C,
- "%s: expected ENDIF\n",__FUNCTION__);
- return NULL;
- }
- loop->EndIf = ptr->Next;
- if(ptr->Prev->U.I.Opcode != RC_OPCODE_IF){
- rc_error(s->C,
- "%s: expected IF\n", __FUNCTION__);
- return NULL;
+ if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF
+ || ptr->Prev->U.I.Opcode != RC_OPCODE_IF
+ || loop->Brk){
+ continue;
}
+ loop->Brk = ptr;
loop->If = ptr->Prev;
+ loop->EndIf = ptr->Next;
switch(loop->If->Prev->U.I.Opcode){
case RC_OPCODE_SLT:
case RC_OPCODE_SGE:
@@ -364,18 +388,58 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
case RC_OPCODE_SNE:
break;
default:
- rc_error(s->C, "%s expected conditional\n",
+ rc_error(c, "%s: expected conditional",
__FUNCTION__);
- return NULL;
+ return 0;
}
loop->Cond = loop->If->Prev;
- ptr = loop->EndIf;
break;
+
case RC_OPCODE_ENDLOOP:
loop->EndLoop = ptr;
break;
}
}
+
+ if (loop->BeginLoop && loop->Brk && loop->If && loop->EndIf
+ && loop->Cond && loop->EndLoop) {
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * This function prepares a loop to be unrolled by converting it into an if
+ * statement. Here is an outline of the conversion process:
+ * BGNLOOP; -> BGNLOOP;
+ * <Additional conditional code> -> <Additional conditional code>
+ * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2];
+ * IF temp[0]; -> IF temp[0];
+ * BRK; ->
+ * ENDIF; -> <Loop Body>
+ * <Loop Body> -> ENDIF;
+ * ENDLOOP; -> ENDLOOP
+ *
+ * @param inst A pointer to a BGNLOOP instruction.
+ * @return 1 for success, 0 for failure
+ */
+static int transform_loop(struct emulate_loop_state * s,
+ struct rc_instruction * inst)
+{
+ struct loop_info * loop;
+
+ memory_pool_array_reserve(&s->C->Pool, struct loop_info,
+ s->Loops, s->LoopCount, s->LoopReserved, 1);
+
+ loop = &s->Loops[s->LoopCount++];
+
+ if (!build_loop_info(s->C, loop, inst))
+ return 0;
+
+ if(try_unroll_loop(s->C, loop, s->prog_inst_limit)){
+ return 1;
+ }
+
/* Reverse the conditional instruction */
switch(loop->Cond->U.I.Opcode){
case RC_OPCODE_SGE:
@@ -398,43 +462,51 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
break;
default:
rc_error(s->C, "loop->Cond is not a conditional.\n");
- return NULL;
- }
-
- /* Check if the number of loops is known at compile time. */
- if(transform_const_loop(s, loop)){
- return loop->BeginLoop->Next;
+ return 0;
}
- /* Prepare the loop to be unrolled */
+ /* Prepare the loop to be emulated */
rc_remove_instruction(loop->Brk);
rc_remove_instruction(loop->EndIf);
rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf);
- return loop->EndLoop;
+ return 1;
}
-void rc_transform_unroll_loops(struct radeon_compiler *c,
- struct emulate_loop_state * s)
+void rc_transform_loops(struct radeon_compiler *c,
+ struct emulate_loop_state * s, int prog_inst_limit)
{
struct rc_instruction * ptr;
-
+
memset(s, 0, sizeof(struct emulate_loop_state));
s->C = c;
- ptr = s->C->Program.Instructions.Next;
- while(ptr != &s->C->Program.Instructions) {
+ s->prog_inst_limit = prog_inst_limit;
+ for(ptr = s->C->Program.Instructions.Next;
+ ptr != &s->C->Program.Instructions; ptr = ptr->Next) {
if(ptr->Type == RC_INSTRUCTION_NORMAL &&
ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
- ptr = transform_loop(s, ptr);
- if(!ptr){
+ if (!transform_loop(s, ptr))
return;
+ }
+ }
+}
+
+void rc_unroll_loops(struct radeon_compiler *c, int prog_inst_limit)
+{
+ struct rc_instruction * inst;
+ struct loop_info loop;
+
+ for(inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions; inst = inst->Next) {
+
+ if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ if (build_loop_info(c, &loop, inst)) {
+ try_unroll_loop(c, &loop, prog_inst_limit);
}
}
- ptr = ptr->Next;
}
}
-void rc_emulate_loops(struct emulate_loop_state *s,
- unsigned int max_instructions)
+void rc_emulate_loops(struct emulate_loop_state *s, int prog_inst_limit)
{
int i;
/* Iterate backwards of the list of loops so that loops that nested
@@ -444,8 +516,8 @@ void rc_emulate_loops(struct emulate_loop_state *s,
if(!s->Loops[i].EndLoop){
continue;
}
- unsigned int iterations = loop_calc_iterations(s, &s->Loops[i],
- max_instructions);
- loop_unroll(s, &s->Loops[i], iterations);
+ unsigned int iterations = loop_max_possible_iterations(
+ s->C, &s->Loops[i], prog_inst_limit);
+ unroll_loop(s->C, &s->Loops[i], iterations);
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
index 7748813c4eb..bba1f68e308 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
@@ -21,12 +21,14 @@ struct emulate_loop_state {
struct loop_info * Loops;
unsigned int LoopCount;
unsigned int LoopReserved;
+ int prog_inst_limit;
};
-void rc_transform_unroll_loops(struct radeon_compiler *c,
- struct emulate_loop_state * s);
+void rc_transform_loops(struct radeon_compiler *c,
+ struct emulate_loop_state * s, int prog_inst_limit);
-void rc_emulate_loops(struct emulate_loop_state *s,
- unsigned int max_instructions);
+void rc_unroll_loops(struct radeon_compiler * c, int prog_inst_limit);
+
+void rc_emulate_loops(struct emulate_loop_state * s, int prog_inst_limit);
#endif /* RADEON_EMULATE_LOOPS_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
index 04f234f11d8..2ea830be7f9 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
@@ -386,8 +386,8 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.NumSrcRegs = 0,
},
{
- .Opcode = RC_OPCODE_CONTINUE,
- .Name = "CONTINUE",
+ .Opcode = RC_OPCODE_CONT,
+ .Name = "CONT",
.IsFlowControl = 1,
.NumSrcRegs = 0
},
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
index 8b9fa07dde2..6e18d6eb3f1 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
@@ -187,7 +187,7 @@ typedef enum {
RC_OPCODE_ENDLOOP,
- RC_OPCODE_CONTINUE,
+ RC_OPCODE_CONT,
/** special instruction, used in R300-R500 fragment program pair instructions
* indicates that the result of the alpha operation shall be replicated
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
index eca06515367..7a3f35950a6 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -164,7 +164,8 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo
inst = inst->Next) {
/* XXX In the future we might be able to make the optimizer
* smart enough to handle loops. */
- if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP){
+ if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP
+ || inst->U.I.Opcode == RC_OPCODE_ENDLOOP){
return;
}
rc_for_all_reads_mask(inst, peephole_scan_read, &s);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
index 8a912da4613..ce72cd97ab2 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
@@ -65,6 +65,11 @@ struct regalloc_state {
struct hardware_register * HwTemporary;
unsigned int NumHwTemporaries;
+ /**
+ * If an instruction is inside of a loop, end_loop will be the
+ * IP of the ENDLOOP instruction, otherwise end_loop will be 0
+ */
+ int end_loop;
};
static void print_live_intervals(struct live_intervals * src)
@@ -178,10 +183,10 @@ static void scan_callback(void * data, struct rc_instruction * inst,
else
reg->Live.Start = inst->IP;
reg->Live.End = inst->IP;
- } else {
- if (inst->IP > reg->Live.End)
- reg->Live.End = inst->IP;
- }
+ } else if (s->end_loop)
+ reg->Live.End = s->end_loop;
+ else if (inst->IP > reg->Live.End)
+ reg->Live.End = inst->IP;
}
static void compute_live_intervals(struct regalloc_state * s)
@@ -191,6 +196,31 @@ static void compute_live_intervals(struct regalloc_state * s)
for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
inst != &s->C->Program.Instructions;
inst = inst->Next) {
+
+ /* For all instructions inside of a loop, the ENDLOOP
+ * instruction is used as the end of the live interval. */
+ if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && !s->end_loop) {
+ int loops = 1;
+ struct rc_instruction * tmp;
+ for(tmp = inst->Next;
+ tmp != &s->C->Program.Instructions;
+ tmp = tmp->Next) {
+ if (tmp->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ loops++;
+ break;
+ } else if (tmp->U.I.Opcode
+ == RC_OPCODE_ENDLOOP) {
+ if(!--loops) {
+ s->end_loop = tmp->IP;
+ break;
+ }
+ }
+ }
+ }
+
+ if (inst->IP == s->end_loop)
+ s->end_loop = 0;
+
rc_for_all_reads_mask(inst, scan_callback, s);
rc_for_all_writes_mask(inst, scan_callback, s);
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
index 3cc28972934..857aae55145 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -988,17 +988,22 @@ void radeonTransformKILP(struct radeon_compiler * c)
for (inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions; inst = inst->Next) {
- if (inst->U.I.Opcode != RC_OPCODE_KILP
- || inst->Prev->U.I.Opcode != RC_OPCODE_IF
- || inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) {
+ if (inst->U.I.Opcode != RC_OPCODE_KILP)
continue;
- }
+
inst->U.I.Opcode = RC_OPCODE_KIL;
- inst->U.I.SrcReg[0] = negate(absolute(inst->Prev->U.I.SrcReg[0]));
- /* Remove IF */
- rc_remove_instruction(inst->Prev);
- /* Remove ENDIF */
- rc_remove_instruction(inst->Next);
+ if (inst->Prev->U.I.Opcode != RC_OPCODE_IF
+ || inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) {
+ inst->U.I.SrcReg[0] = negate(builtin_one);
+ } else {
+
+ inst->U.I.SrcReg[0] =
+ negate(absolute(inst->Prev->U.I.SrcReg[0]));
+ /* Remove IF */
+ rc_remove_instruction(inst->Prev);
+ /* Remove ENDIF */
+ rc_remove_instruction(inst->Next);
+ }
}
}
diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
index e4b302bbad9..3d2f8928fa6 100644
--- a/src/mesa/drivers/dri/r300/r300_context.c
+++ b/src/mesa/drivers/dri/r300/r300_context.c
@@ -461,7 +461,7 @@ static void r300InitGLExtensions(GLcontext *ctx)
if (!r300->radeon.radeonScreen->drmSupportsOcclusionQueries) {
_mesa_disable_extension(ctx, "GL_ARB_occlusion_query");
}
- if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV350)
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_R420)
_mesa_enable_extension(ctx, "GL_ARB_half_float_vertex");
if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h
index f25264b6f2d..f7705b0f6fe 100644
--- a/src/mesa/drivers/dri/r300/r300_reg.h
+++ b/src/mesa/drivers/dri/r300/r300_reg.h
@@ -441,6 +441,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R300_VAP_GB_HORZ_CLIP_ADJ 0x2228
#define R300_VAP_GB_HORZ_DISC_ADJ 0x222c
+#define R300_VAP_PVS_FLOW_CNTL_ADDRS_0 0x2230
+#define R300_PVS_FC_ACT_ADRS(x) ((x) << 0)
+#define R300_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 8)
+#define R300_PVS_FC_LAST_INST(x) ((x) << 16)
+#define R300_PVS_FC_RTN_INST(x) ((x) << 24)
+
/* gap */
/* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between
@@ -459,6 +465,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_2288_R300 0x00750000 /* -- nh */
# define R300_2288_RV350 0x0000FFFF /* -- Vladimir */
+#define R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0 0x2290
+#define R300_PVS_FC_LOOP_INIT_VAL(x) ((x) << 0)
+#define R300_PVS_FC_LOOP_STEP_VAL(x) ((x) << 8)
+
/* gap */
/* Addresses are relative to the vertex program instruction area of the
@@ -489,6 +499,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R300_VAP_PVS_CODE_CNTL_1 0x22D8
# define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0
#define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC
+#define R300_VAP_PVS_FC_OPC_JUMP(x) (1 << (2 * (x)))
+#define R300_VAP_PVS_FC_OPC_LOOP(x) (2 << (2 * (x)))
+#define R300_VAP_PVS_FC_OPC_JSR(x) (3 << (2 * (x)))
/* The entire range from 0x2300 to 0x2AC inclusive seems to be used for
* immediate vertices
@@ -505,6 +518,14 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
/* write 0 to indicate end of packet? */
#define R300_VAP_VTX_END_OF_PKT 0x24AC
+#define R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0 0x2500
+#define R500_PVS_FC_ACT_ADRS(x) ((x) << 0)
+#define R500_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 16)
+
+#define R500_VAP_PVS_FLOW_CNTL_ADDRS_UW_0 0x2504
+#define R500_PVS_FC_LAST_INST(x) ((x) << 0)
+#define R500_PVS_FC_RTN_INST(x) ((x) << 16)
+
/* gap */
/* These are values from r300_reg/r300_reg.h - they are known to be correct
diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c
index bb8f91491f5..cf89ab7ec3d 100644
--- a/src/mesa/drivers/dri/r300/r300_render.c
+++ b/src/mesa/drivers/dri/r300/r300_render.c
@@ -327,6 +327,8 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
BATCH_LOCALS(&rmesa->radeon);
int type, num_verts;
+ radeon_prepare_render(&rmesa->radeon);
+
type = r300PrimitiveType(rmesa, prim);
num_verts = r300NumVerts(rmesa, end - start, prim);
diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c
index 4ba6740e3d9..94588698265 100644
--- a/src/mesa/drivers/dri/r300/r300_texstate.c
+++ b/src/mesa/drivers/dri/r300/r300_texstate.c
@@ -152,8 +152,8 @@ int32_t r300TranslateTexFormat(gl_format mesaFormat)
case MESA_FORMAT_Z32:
return R300_EASY_TX_FORMAT(X, X, X, X, X32);
/* EXT_texture_sRGB */
- case MESA_FORMAT_SRGBA8:
- return R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8) | R300_TX_FORMAT_GAMMA;
+ case MESA_FORMAT_SARGB8:
+ return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8) | R300_TX_FORMAT_GAMMA;
case MESA_FORMAT_SLA8:
return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8) | R300_TX_FORMAT_GAMMA;
case MESA_FORMAT_SL8:
diff --git a/src/mesa/drivers/dri/r600/r600_blit.c b/src/mesa/drivers/dri/r600/r600_blit.c
index 172f85eb264..27acff9c166 100644
--- a/src/mesa/drivers/dri/r600/r600_blit.c
+++ b/src/mesa/drivers/dri/r600/r600_blit.c
@@ -72,7 +72,7 @@ unsigned r600_check_blit(gl_format mesa_format)
case MESA_FORMAT_Z24_S8:
case MESA_FORMAT_Z16:
case MESA_FORMAT_Z32:
- case MESA_FORMAT_SRGBA8:
+ case MESA_FORMAT_SARGB8:
case MESA_FORMAT_SLA8:
case MESA_FORMAT_SL8:
break;
@@ -320,9 +320,9 @@ set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_forma
CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
break;
- case MESA_FORMAT_SRGBA8:
+ case MESA_FORMAT_SARGB8:
format = COLOR_8_8_8_8;
- comp_swap = SWAP_STD_REV;
+ comp_swap = SWAP_ALT;
SETbit(cb_color0_info, SOURCE_FORMAT_bit);
SETfield(cb_color0_info, NUMBER_SRGB, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
break;
@@ -390,13 +390,20 @@ set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_forma
0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0);
END_BATCH();
- BEGIN_BATCH_NO_AUTOSTATE(12);
+ BEGIN_BATCH_NO_AUTOSTATE(9);
R600_OUT_BATCH_REGVAL(CB_COLOR0_SIZE + (4 * id), cb_color0_size);
R600_OUT_BATCH_REGVAL(CB_COLOR0_VIEW + (4 * id), cb_color0_view);
- R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), cb_color0_info);
R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), 0);
END_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), cb_color0_info);
+ R600_OUT_BATCH_RELOC(0,
+ bo,
+ 0,
+ 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0);
+ END_BATCH();
+
COMMIT_BATCH();
}
@@ -1043,17 +1050,17 @@ set_tex_resource(context_t * context,
SETfield(sq_tex_resource4, SQ_SEL_X,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
- case MESA_FORMAT_SRGBA8:
+ case MESA_FORMAT_SARGB8:
SETfield(sq_tex_resource1, FMT_8_8_8_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- SETfield(sq_tex_resource4, SQ_SEL_W,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
SETfield(sq_tex_resource4, SQ_SEL_Z,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
SETfield(sq_tex_resource4, SQ_SEL_Y,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_W,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
SETbit(sq_tex_resource4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
break;
@@ -1477,7 +1484,6 @@ set_default_state(context_t *context)
(CLRCMP_SEL_SRC << CLRCMP_FCN_SEL_shift));
R600_OUT_BATCH_REGVAL(SQ_VTX_BASE_VTX_LOC, 0);
R600_OUT_BATCH_REGVAL(SQ_VTX_START_INST_LOC, 0);
- R600_OUT_BATCH_REGVAL(DB_DEPTH_INFO, 0);
R600_OUT_BATCH_REGVAL(DB_DEPTH_CONTROL, 0);
R600_OUT_BATCH_REGVAL(CB_SHADER_MASK, (OUTPUT0_ENABLE_mask));
R600_OUT_BATCH_REGVAL(CB_TARGET_MASK, (TARGET0_ENABLE_mask));
@@ -1526,6 +1532,7 @@ set_default_state(context_t *context)
R600_OUT_BATCH(0);
R600_OUT_BATCH_REGVAL(VGT_STRMOUT_BUFFER_EN, 0);
+ R600_OUT_BATCH_REGVAL(SX_ALPHA_TEST_CONTROL, 0);
END_BATCH();
COMMIT_BATCH();
@@ -1607,7 +1614,7 @@ unsigned r600_blit(GLcontext *ctx,
/* Flush is needed to make sure that source buffer has correct data */
radeonFlush(ctx);
- rcommonEnsureCmdBufSpace(&context->radeon, 304, __FUNCTION__);
+ rcommonEnsureCmdBufSpace(&context->radeon, 308, __FUNCTION__);
/* load shaders */
load_shaders(context->radeon.glCtx);
@@ -1632,7 +1639,7 @@ unsigned r600_blit(GLcontext *ctx,
set_tex_sampler(context);
/* dst */
- /* 27 */
+ /* 31 */
set_render_target(context, dst_bo, dst_mesaformat,
dst_pitch, dst_width, dst_height, dst_offset);
/* scissors */
diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c
index 84d9d423124..389b0412baa 100644
--- a/src/mesa/drivers/dri/r600/r600_context.c
+++ b/src/mesa/drivers/dri/r600/r600_context.c
@@ -72,6 +72,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R600_ENABLE_GLSL_TEST 1
#define need_GL_VERSION_2_0
+#define need_GL_VERSION_2_1
+#define need_GL_ARB_draw_elements_base_vertex
#define need_GL_ARB_occlusion_query
#define need_GL_ARB_point_parameters
#define need_GL_ARB_vertex_program
@@ -140,6 +142,7 @@ static const struct dri_extension card_extensions[] = {
{"GL_NV_vertex_program", GL_NV_vertex_program_functions},
{"GL_SGIS_generate_mipmap", NULL},
{"GL_ARB_pixel_buffer_object", NULL},
+ {"GL_ARB_draw_elements_base_vertex", GL_ARB_draw_elements_base_vertex_functions },
{NULL, NULL}
/* *INDENT-ON* */
};
@@ -157,6 +160,7 @@ static const struct dri_extension mm_extensions[] = {
static const struct dri_extension gl_20_extension[] = {
#ifdef R600_ENABLE_GLSL_TEST
{"GL_ARB_shading_language_100", GL_VERSION_2_0_functions },
+ {"GL_ARB_shading_language_120", GL_VERSION_2_1_functions },
#else
{"GL_VERSION_2_0", GL_VERSION_2_0_functions },
#endif /* R600_ENABLE_GLSL_TEST */
diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c
index 41419f84601..512a52ede3e 100644
--- a/src/mesa/drivers/dri/r600/r600_tex.c
+++ b/src/mesa/drivers/dri/r600/r600_tex.c
@@ -431,7 +431,7 @@ unsigned r600IsFormatRenderable(gl_format mesa_format)
case MESA_FORMAT_Z24_S8:
case MESA_FORMAT_Z16:
case MESA_FORMAT_Z32:
- case MESA_FORMAT_SRGBA8:
+ case MESA_FORMAT_SARGB8:
case MESA_FORMAT_SLA8:
case MESA_FORMAT_SL8:
return 1;
diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c
index 1600033b9bd..ba3690b70ed 100644
--- a/src/mesa/drivers/dri/r600/r600_texstate.c
+++ b/src/mesa/drivers/dri/r600/r600_texstate.c
@@ -605,17 +605,17 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, gl_format mesa
}
break;
/* EXT_texture_sRGB */
- case MESA_FORMAT_SRGBA8:
+ case MESA_FORMAT_SARGB8:
SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
SETbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
break;
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c
index 99a33df4fcb..9c954cbf70c 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.c
+++ b/src/mesa/drivers/dri/r600/r700_assembler.c
@@ -275,7 +275,10 @@ GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
case 2:
format = FMT_8_8; break;
case 3:
- format = FMT_8_8_8; break;
+ /* for some (small/unaligned) strides using 4 comps works
+ * better, probably same as GL_SHORT below
+ * test piglit/draw-vertices */
+ format = FMT_8_8_8_8; break;
case 4:
format = FMT_8_8_8_8; break;
default:
@@ -2872,25 +2875,92 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode)
{
+ /*
+ * r600 - trunc to -PI..PI range
+ * r700 - normalize by dividing by 2PI
+ * see fdo bug 27901
+ */
+
int tmp;
checkop1(pAsm);
tmp = gethelpr(pAsm);
- pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ pAsm->D.dst.op3 = 1;
+
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
pAsm->D.dst.reg = tmp;
- pAsm->D.dst.writex = 1;
assemble_src(pAsm, 0, -1);
pAsm->S[1].src.rtype = SRC_REC_LITERAL;
setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+
+ pAsm->S[2].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
+
pAsm->D2.dst2.literal_slots = 1;
pAsm->C[0].f = 1/(3.1415926535 * 2);
- pAsm->C[1].f = 0.0F;
- next_ins(pAsm);
+ pAsm->C[1].f = 0.5f;
+
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writex = 1;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+
+ if(( GL_FALSE == next_ins(pAsm) ))
+ {
+ return GL_FALSE;
+ }
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ pAsm->D.dst.op3 = 1;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+
+ pAsm->S[1].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+
+ pAsm->S[2].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
+
+ pAsm->D2.dst2.literal_slots = 1;
+
+ if (pAsm->bR6xx)
+ {
+ pAsm->C[0].f = 3.1415926535897f * 2.0f;
+ pAsm->C[1].f = -3.1415926535897f;
+ }
+ else
+ {
+ pAsm->C[0].f = 1.0f;
+ pAsm->C[1].f = -0.5f;
+ }
+
+ if(( GL_FALSE == next_ins(pAsm) ))
+ {
+ return GL_FALSE;
+ }
pAsm->D.dst.opcode = opcode;
pAsm->D.dst.math = 1;
@@ -4030,22 +4100,79 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
checkop1(pAsm);
tmp = gethelpr(pAsm);
- /* tmp.x = src /2*PI */
- pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ pAsm->D.dst.op3 = 1;
+
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
pAsm->D.dst.reg = tmp;
- pAsm->D.dst.writex = 1;
assemble_src(pAsm, 0, -1);
pAsm->S[1].src.rtype = SRC_REC_LITERAL;
setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+
+ pAsm->S[2].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
+
pAsm->D2.dst2.literal_slots = 1;
pAsm->C[0].f = 1/(3.1415926535 * 2);
- pAsm->C[1].f = 0.0F;
+ pAsm->C[1].f = 0.5F;
- next_ins(pAsm);
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writex = 1;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+
+ if(( GL_FALSE == next_ins(pAsm) ))
+ {
+ return GL_FALSE;
+ }
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ pAsm->D.dst.op3 = 1;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+
+ pAsm->S[1].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+
+ pAsm->S[2].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
+
+ pAsm->D2.dst2.literal_slots = 1;
+
+ if(pAsm->bR6xx) {
+ pAsm->C[0].f = 3.1415926535897f * 2.0f;
+ pAsm->C[1].f = -3.1415926535897f;
+ } else {
+ pAsm->C[0].f = 1.0f;
+ pAsm->C[1].f = -0.5f;
+ }
+
+ if(( GL_FALSE == next_ins(pAsm) ))
+ {
+ return GL_FALSE;
+ }
// COS dst.x, a.x
pAsm->D.dst.opcode = SQ_OP2_INST_COS;
@@ -6473,7 +6600,7 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
* results are undefined anyway */
if(export_count == 0)
{
- Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, 0, GL_FALSE);
+ Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, pR700AsmCode->starting_export_register_number, GL_FALSE);
}
if(pR700AsmCode->cf_last_export_ptr != NULL)
diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c
index cefda3ac4ba..bf8063391a2 100644
--- a/src/mesa/drivers/dri/r600/r700_chip.c
+++ b/src/mesa/drivers/dri/r600/r700_chip.c
@@ -265,17 +265,6 @@ static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom)
if (context->radeon.tcl.aos_count == 0)
return;
- BEGIN_BATCH_NO_AUTOSTATE(6);
- R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1));
- R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX);
- R600_OUT_BATCH(0);
-
- R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1));
- R600_OUT_BATCH(mmSQ_VTX_START_INST_LOC - ASIC_CTL_CONST_BASE_INDEX);
- R600_OUT_BATCH(0);
- END_BATCH();
- COMMIT_BATCH();
-
for(i=0; i<VERT_ATTRIB_MAX; i++) {
if(vp->mesa_program->Base.InputsRead & (1 << i))
{
@@ -523,9 +512,9 @@ static void r700SetRenderTarget(context_t *context, int id)
CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask);
CLEARbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
break;
- case MESA_FORMAT_SRGBA8:
+ case MESA_FORMAT_SARGB8:
format = COLOR_8_8_8_8;
- comp_swap = SWAP_STD_REV;
+ comp_swap = SWAP_ALT;
number_type = NUMBER_SRGB;
SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
break;
@@ -617,18 +606,25 @@ static void r700SendDepthTargetState(GLcontext *ctx, struct radeon_state_atom *a
r700SetDepthTarget(context);
- BEGIN_BATCH_NO_AUTOSTATE(8 + 2);
+ BEGIN_BATCH_NO_AUTOSTATE(7 + 2);
R600_OUT_BATCH_REGSEQ(DB_DEPTH_SIZE, 2);
R600_OUT_BATCH(r700->DB_DEPTH_SIZE.u32All);
R600_OUT_BATCH(r700->DB_DEPTH_VIEW.u32All);
- R600_OUT_BATCH_REGSEQ(DB_DEPTH_BASE, 2);
+ R600_OUT_BATCH_REGSEQ(DB_DEPTH_BASE, 1);
R600_OUT_BATCH(r700->DB_DEPTH_BASE.u32All);
- R600_OUT_BATCH(r700->DB_DEPTH_INFO.u32All);
R600_OUT_BATCH_RELOC(r700->DB_DEPTH_BASE.u32All,
rrb->bo,
r700->DB_DEPTH_BASE.u32All,
0, RADEON_GEM_DOMAIN_VRAM, 0);
END_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ R600_OUT_BATCH_REGSEQ(DB_DEPTH_INFO, 1);
+ R600_OUT_BATCH(r700->DB_DEPTH_INFO.u32All);
+ R600_OUT_BATCH_RELOC(r700->DB_DEPTH_INFO.u32All,
+ rrb->bo,
+ r700->DB_DEPTH_INFO.u32All,
+ 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ END_BATCH();
if ((context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) &&
(context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)) {
@@ -687,27 +683,35 @@ static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom *
BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
R600_OUT_BATCH_REGSEQ(CB_COLOR0_TILE + (4 * id), 1);
R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_TILE.u32All);
- R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All,
+ R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_TILE.u32All,
rrb->bo,
- r700->render_target[id].CB_COLOR0_BASE.u32All,
+ r700->render_target[id].CB_COLOR0_TILE.u32All,
0, RADEON_GEM_DOMAIN_VRAM, 0);
END_BATCH();
BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
R600_OUT_BATCH_REGSEQ(CB_COLOR0_FRAG + (4 * id), 1);
R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_FRAG.u32All);
- R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All,
+ R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_FRAG.u32All,
rrb->bo,
- r700->render_target[id].CB_COLOR0_BASE.u32All,
+ r700->render_target[id].CB_COLOR0_FRAG.u32All,
0, RADEON_GEM_DOMAIN_VRAM, 0);
END_BATCH();
- BEGIN_BATCH_NO_AUTOSTATE(12);
+ BEGIN_BATCH_NO_AUTOSTATE(9);
R600_OUT_BATCH_REGVAL(CB_COLOR0_SIZE + (4 * id), r700->render_target[id].CB_COLOR0_SIZE.u32All);
R600_OUT_BATCH_REGVAL(CB_COLOR0_VIEW + (4 * id), r700->render_target[id].CB_COLOR0_VIEW.u32All);
- R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), r700->render_target[id].CB_COLOR0_INFO.u32All);
R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), r700->render_target[id].CB_COLOR0_MASK.u32All);
END_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), r700->render_target[id].CB_COLOR0_INFO.u32All);
+ R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_INFO.u32All,
+ rrb->bo,
+ r700->render_target[id].CB_COLOR0_INFO.u32All,
+ 0, RADEON_GEM_DOMAIN_VRAM, 0);
+
+ END_BATCH();
+
COMMIT_BATCH();
}
@@ -1465,9 +1469,6 @@ static int check_vtx(GLcontext *ctx, struct radeon_state_atom *atom)
context_t *context = R700_CONTEXT(ctx);
int count = context->radeon.tcl.aos_count * 18;
- if (count)
- count += 6;
-
radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
return count;
}
@@ -1567,7 +1568,7 @@ void r600InitAtoms(context_t *context)
ALLOC_STATE(sq, always, 34, r700SendSQConfig);
ALLOC_STATE(db, always, 17, r700SendDBState);
ALLOC_STATE(stencil, always, 4, r700SendStencilState);
- ALLOC_STATE(db_target, always, 12, r700SendDepthTargetState);
+ ALLOC_STATE(db_target, always, 16, r700SendDepthTargetState);
ALLOC_STATE(sc, always, 15, r700SendSCState);
ALLOC_STATE(scissor, always, 22, r700SendScissorState);
ALLOC_STATE(aa, always, 12, r700SendAAState);
@@ -1578,7 +1579,7 @@ void r600InitAtoms(context_t *context)
ALLOC_STATE(poly, always, 10, r700SendPolyState);
ALLOC_STATE(cb, cb, 18, r700SendCBState);
ALLOC_STATE(clrcmp, always, 6, r700SendCBCLRCMPState);
- ALLOC_STATE(cb_target, always, 29, r700SendRenderTargetState);
+ ALLOC_STATE(cb_target, always, 31, r700SendRenderTargetState);
ALLOC_STATE(blnd, blnd, (6 + (R700_MAX_RENDER_TARGETS * 3)), r700SendCBBlendState);
ALLOC_STATE(blnd_clr, always, 6, r700SendCBBlendColorState);
ALLOC_STATE(sx, always, 9, r700SendSXState);
@@ -1590,7 +1591,7 @@ void r600InitAtoms(context_t *context)
ALLOC_STATE(ps, always, 24, r700SendPSState);
ALLOC_STATE(vs_consts, vs_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendVSConsts);
ALLOC_STATE(ps_consts, ps_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendPSConsts);
- ALLOC_STATE(vtx, vtx, (6 + (VERT_ATTRIB_MAX * 18)), r700SendVTXState);
+ ALLOC_STATE(vtx, vtx, (VERT_ATTRIB_MAX * 18), r700SendVTXState);
ALLOC_STATE(tx, tx, (R700_TEXTURE_NUMBERUNITS * 20), r700SendTexState);
ALLOC_STATE(tx_smplr, tx, (R700_TEXTURE_NUMBERUNITS * 5), r700SendTexSamplerState);
ALLOC_STATE(tx_brdr_clr, tx, (R700_TEXTURE_NUMBERUNITS * 6), r700SendTexBorderColorState);
diff --git a/src/mesa/drivers/dri/r600/r700_clear.c b/src/mesa/drivers/dri/r600/r700_clear.c
index 09c48565b68..d1008f28b9b 100644
--- a/src/mesa/drivers/dri/r600/r700_clear.c
+++ b/src/mesa/drivers/dri/r600/r700_clear.c
@@ -48,6 +48,7 @@ static GLboolean r700ClearFast(context_t *context, GLbitfield mask)
void r700Clear(GLcontext * ctx, GLbitfield mask)
{
context_t *context = R700_CONTEXT(ctx);
+ radeonContextPtr radeon = &context->radeon;
__DRIdrawable *dPriv = radeon_get_drawable(&context->radeon);
const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask[0]);
GLbitfield swrast_mask = 0, tri_mask = 0;
@@ -60,6 +61,8 @@ void r700Clear(GLcontext * ctx, GLbitfield mask)
context->radeon.front_buffer_dirty = GL_TRUE;
}
+ radeon_prepare_render(radeon);
+
if( GL_TRUE == r700ClearFast(context, mask) )
{
return;
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
index 1929b7cc129..c5771f9fd0b 100644
--- a/src/mesa/drivers/dri/r600/r700_render.c
+++ b/src/mesa/drivers/dri/r600/r700_render.c
@@ -244,7 +244,8 @@ static int r700NumVerts(int num_verts, int prim)
return num_verts - verts_off;
}
-static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
+static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end,
+ int prim, GLint basevertex)
{
context_t *context = R700_CONTEXT(ctx);
BATCH_LOCALS(&context->radeon);
@@ -282,6 +283,7 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim
total_emit = 3 /* VGT_PRIMITIVE_TYPE */
+ 2 /* VGT_INDEX_TYPE */
+ 2 /* NUM_INSTANCES */
+ + 4 /* VTX_BASE_VTX_LOC + VTX_START_INST_LOC */
+ 5 + 2; /* DRAW_INDEX */
BEGIN_BATCH_NO_AUTOSTATE(total_emit);
@@ -294,6 +296,11 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim
// num instances
R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
R600_OUT_BATCH(1);
+ /* offset */
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 2));
+ R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX);
+ R600_OUT_BATCH(basevertex); //VTX_BASE_VTX_LOC
+ R600_OUT_BATCH(0); //VTX_START_INST_LOC
// draw packet
R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3));
R600_OUT_BATCH(context->ind_buf.bo_offset);
@@ -364,6 +371,7 @@ static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end,
total_emit += 3 /* VGT_PRIMITIVE_TYPE */
+ 2 /* VGT_INDEX_TYPE */
+ 2 /* NUM_INSTANCES */
+ + 4 /* VTX_BASE_VTX_LOC + VTX_START_INST_LOC */
+ 3; /* DRAW */
BEGIN_BATCH_NO_AUTOSTATE(total_emit);
@@ -376,6 +384,11 @@ static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end,
// num instances
R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
R600_OUT_BATCH(1);
+ /* offset */
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 2));
+ R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX);
+ R600_OUT_BATCH(0); //VTX_BASE_VTX_LOC
+ R600_OUT_BATCH(0); //VTX_START_INST_LOC
// draw packet
if(start == 0)
{
@@ -433,16 +446,16 @@ static GLuint r700PredictRenderSize(GLcontext* ctx,
dwords = PRE_EMIT_STATE_BUFSZ;
if (ib)
- dwords += nr_prims * 14;
+ dwords += nr_prims * 18;
else {
for (i = 0; i < nr_prims; ++i)
{
if (prim[i].start == 0)
- dwords += 10;
+ dwords += 14;
else if (prim[i].count > 0xffff)
- dwords += prim[i].count + 10;
+ dwords += prim[i].count + 14;
else
- dwords += ((prim[i].count + 1) / 2) + 10;
+ dwords += ((prim[i].count + 1) / 2) + 14;
}
}
@@ -625,11 +638,11 @@ static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input
stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB;
- if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT ||
+ if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT
#if MESA_BIG_ENDIAN
- getTypeSize(input[i]->Type) != 4 ||
+ || getTypeSize(input[i]->Type) != 4
#endif
- stride < 4)
+ )
{
r700ConvertAttrib(ctx, count, input[i], &context->stream_desc[index]);
}
@@ -637,19 +650,10 @@ static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input
{
if (input[i]->BufferObj->Name)
{
- if (stride % 4 != 0)
- {
- assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0);
- r700AlignDataToDword(ctx, input[i], count, &context->stream_desc[index]);
- context->stream_desc[index].is_named_bo = GL_FALSE;
- }
- else
- {
- context->stream_desc[index].stride = input[i]->StrideB;
- context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr;
- context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo;
- context->stream_desc[index].is_named_bo = GL_TRUE;
- }
+ context->stream_desc[index].stride = input[i]->StrideB;
+ context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr;
+ context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo;
+ context->stream_desc[index].is_named_bo = GL_TRUE;
}
else
{
@@ -932,7 +936,8 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx,
r700RunRenderPrimitive(ctx,
prim[i].start,
prim[i].start + prim[i].count,
- prim[i].mode);
+ prim[i].mode,
+ prim[i].basevertex);
else
r700RunRenderPrimitiveImmediate(ctx,
prim[i].start,
@@ -977,18 +982,24 @@ static void r700DrawPrims(GLcontext *ctx,
{
GLboolean retval = GL_FALSE;
+ context_t *context = R700_CONTEXT(ctx);
+ radeonContextPtr radeon = &context->radeon;
+ radeon_prepare_render(radeon);
+
/* This check should get folded into just the places that
* min/max index are really needed.
*/
- if (!index_bounds_valid) {
- vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
- }
- if (min_index) {
+ if (!vbo_all_varyings_in_vbos(arrays)) {
+ if (!index_bounds_valid)
+ vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
+ /* do we want to rebase, minimizes the
+ * amount of data to upload? */
+ if (min_index) {
vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r700DrawPrims );
return;
+ }
}
-
/* Make an attempt at drawing */
retval = r700TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c
index 137f3007ced..6a2a09eaf1a 100644
--- a/src/mesa/drivers/dri/r600/r700_vertprog.c
+++ b/src/mesa/drivers/dri/r600/r700_vertprog.c
@@ -461,11 +461,11 @@ static void r700TranslateAttrib(GLcontext *ctx, GLuint unLoc, int count, const s
stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size
: input->StrideB;
- if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT ||
+ if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT
#if MESA_BIG_ENDIAN
- getTypeSize(input->Type) != 4 ||
+ || getTypeSize(input->Type) != 4
#endif
- stride < 4)
+ )
{
pStreamDesc->type = GL_FLOAT;
diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h
index b7ee9a134bf..7d54fabebbc 100644
--- a/src/mesa/drivers/dri/radeon/radeon_chipset.h
+++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h
@@ -414,9 +414,9 @@ enum {
CHIP_FAMILY_R350,
CHIP_FAMILY_RV350,
CHIP_FAMILY_RV380,
+ CHIP_FAMILY_RS400,
CHIP_FAMILY_R420,
CHIP_FAMILY_RV410,
- CHIP_FAMILY_RS400,
CHIP_FAMILY_RS600,
CHIP_FAMILY_RS690,
CHIP_FAMILY_RS740,
diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c
index 13f1f0611b8..c1a660af3d0 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common.c
@@ -708,7 +708,6 @@ void radeon_draw_buffer(GLcontext *ctx, struct gl_framebuffer *fb)
if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
rrbColor = radeon_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
radeon->front_cliprects = GL_TRUE;
- radeon->front_buffer_dirty = GL_TRUE;
} else {
rrbColor = radeon_renderbuffer(fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer);
radeon->front_cliprects = GL_FALSE;
@@ -1132,17 +1131,13 @@ flush_front:
if (screen->dri2.loader && (screen->dri2.loader->base.version >= 2)
&& (screen->dri2.loader->flushFrontBuffer != NULL)) {
__DRIdrawable * drawable = radeon_get_drawable(radeon);
- (*screen->dri2.loader->flushFrontBuffer)(drawable, drawable->loaderPrivate);
- /* Only clear the dirty bit if front-buffer rendering is no longer
- * enabled. This is done so that the dirty bit can only be set in
- * glDrawBuffer. Otherwise the dirty bit would have to be set at
- * each of N places that do rendering. This has worse performances,
- * but it is much easier to get correct.
+ /* We set the dirty bit in radeon_prepare_render() if we're
+ * front buffer rendering once we get there.
*/
- if (!radeon->is_front_buffer_rendering) {
- radeon->front_buffer_dirty = GL_FALSE;
- }
+ radeon->front_buffer_dirty = GL_FALSE;
+
+ (*screen->dri2.loader->flushFrontBuffer)(drawable, drawable->loaderPrivate);
}
}
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c
index 5a7d52c4d2f..92663bf66d7 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c
@@ -493,6 +493,50 @@ radeon_bits_per_pixel(const struct radeon_renderbuffer *rb)
return _mesa_get_format_bytes(rb->base.Format) * 8;
}
+/*
+ * Check if drawable has been invalidated by dri2InvalidateDrawable().
+ * Update renderbuffers if so. This prevents a client from accessing
+ * a backbuffer that has a swap pending but not yet completed.
+ *
+ * See intel_prepare_render for equivalent code in intel driver.
+ *
+ */
+void radeon_prepare_render(radeonContextPtr radeon)
+{
+ __DRIcontext *driContext = radeon->dri.context;
+ __DRIdrawable *drawable;
+ __DRIscreen *screen;
+
+ screen = driContext->driScreenPriv;
+ if (!screen->dri2.loader)
+ return;
+
+ drawable = driContext->driDrawablePriv;
+ if (drawable->dri2.stamp != driContext->dri2.draw_stamp) {
+ if (drawable->lastStamp != drawable->dri2.stamp)
+ radeon_update_renderbuffers(driContext, drawable, GL_FALSE);
+
+ /* Intel driver does the equivalent of this, no clue if it is needed:
+ * radeon_draw_buffer(radeon->glCtx, &(drawable->driverPrivate)->base);
+ */
+ driContext->dri2.draw_stamp = drawable->dri2.stamp;
+ }
+
+ drawable = driContext->driReadablePriv;
+ if (drawable->dri2.stamp != driContext->dri2.read_stamp) {
+ if (drawable->lastStamp != drawable->dri2.stamp)
+ radeon_update_renderbuffers(driContext, drawable, GL_FALSE);
+ driContext->dri2.read_stamp = drawable->dri2.stamp;
+ }
+
+ /* If we're currently rendering to the front buffer, the rendering
+ * that will happen next will probably dirty the front buffer. So
+ * mark it as dirty here.
+ */
+ if (radeon->is_front_buffer_rendering)
+ radeon->front_buffer_dirty = GL_TRUE;
+}
+
void
radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable,
GLboolean front_only)
@@ -514,6 +558,11 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable,
screen = context->driScreenPriv;
radeon = (radeonContextPtr) context->driverPrivate;
+ /* Set this up front, so that in case our buffers get invalidated
+ * while we're getting new buffers, we don't clobber the stamp and
+ * thus ignore the invalidate. */
+ drawable->lastStamp = drawable->dri2.stamp;
+
if (screen->dri2.loader
&& (screen->dri2.loader->base.version > 2)
&& (screen->dri2.loader->getBuffersWithFormat != NULL)) {
@@ -650,6 +699,13 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable,
rb->base.Height = drawable->h;
rb->has_surface = 0;
+ /* r6xx+ tiling */
+ rb->tile_config = radeon->radeonScreen->tile_config;
+ rb->group_bytes = radeon->radeonScreen->group_bytes;
+ rb->num_channels = radeon->radeonScreen->num_channels;
+ rb->num_banks = radeon->radeonScreen->num_banks;
+ rb->r7xx_bank_op = radeon->radeonScreen->r7xx_bank_op;
+
if (buffers[i].attachment == __DRI_BUFFER_STENCIL && depth_bo) {
if (RADEON_DEBUG & RADEON_DRI)
fprintf(stderr, "(reusing depth buffer as stencil)\n");
@@ -678,7 +734,7 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable,
bo->flags |= RADEON_BO_FLAGS_MACRO_TILE;
if (tiling_flags & RADEON_TILING_MICRO)
bo->flags |= RADEON_BO_FLAGS_MICRO_TILE;
-
+
}
if (buffers[i].attachment == __DRI_BUFFER_DEPTH) {
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h
index 5156c5d0d0a..f06e5fdf244 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.h
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h
@@ -93,6 +93,13 @@ struct radeon_renderbuffer
GLuint pf_pending; /**< sequence number of pending flip */
GLuint vbl_pending; /**< vblank sequence number of pending flip */
__DRIdrawable *dPriv;
+
+ /* r6xx+ tiling */
+ GLuint tile_config;
+ GLint group_bytes;
+ GLint num_channels;
+ GLint num_banks;
+ GLint r7xx_bank_op;
};
struct radeon_framebuffer
@@ -614,5 +621,6 @@ GLboolean radeonMakeCurrent(__DRIcontext * driContextPriv,
__DRIdrawable * driDrawPriv,
__DRIdrawable * driReadPriv);
extern void radeonDestroyContext(__DRIcontext * driContextPriv);
+void radeon_prepare_render(radeonContextPtr radeon);
#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
index c877e6c1765..c6e5f110ea3 100644
--- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
+++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
@@ -133,7 +133,7 @@ static void compute_tex_image_offset(radeonContextPtr rmesa, radeon_mipmap_tree
height = _mesa_next_pow_two_32(lvl->height);
lvl->rowstride = get_texture_image_row_stride(rmesa, mt->mesaFormat, lvl->width, mt->tilebits);
- lvl->size = get_texture_image_size(mt->mesaFormat, lvl->rowstride, lvl->height, lvl->depth, mt->tilebits);
+ lvl->size = get_texture_image_size(mt->mesaFormat, lvl->rowstride, height, lvl->depth, mt->tilebits);
assert(lvl->size > 0);
diff --git a/src/mesa/drivers/dri/radeon/radeon_pixel_read.c b/src/mesa/drivers/dri/radeon/radeon_pixel_read.c
index dadb8002c7d..fb741173ca8 100644
--- a/src/mesa/drivers/dri/radeon/radeon_pixel_read.c
+++ b/src/mesa/drivers/dri/radeon/radeon_pixel_read.c
@@ -179,6 +179,9 @@ radeonReadPixels(GLcontext * ctx,
GLenum format, GLenum type,
const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ radeon_prepare_render(radeon);
+
if (do_blit_readpixels(ctx, x, y, width, height, format, type, pack, pixels))
return;
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
index 82107cc6aeb..fa97a19302c 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
@@ -213,6 +213,10 @@ static const GLuint __driNConfigOptions = 17;
static int getSwapInfo( __DRIdrawable *dPriv, __DRIswapInfo * sInfo );
+#ifndef RADEON_INFO_TILE_CONFIG
+#define RADEON_INFO_TILE_CONFIG 0x6
+#endif
+
static int
radeonGetParam(__DRIscreen *sPriv, int param, void *value)
{
@@ -232,6 +236,9 @@ radeonGetParam(__DRIscreen *sPriv, int param, void *value)
case RADEON_PARAM_NUM_Z_PIPES:
info.request = RADEON_INFO_NUM_Z_PIPES;
break;
+ case RADEON_INFO_TILE_CONFIG:
+ info.request = RADEON_INFO_TILE_CONFIG;
+ break;
default:
return -EINVAL;
}
@@ -376,6 +383,21 @@ static const __DRItexBufferExtension r600TexBufferExtension = {
};
#endif
+static void
+radeonDRI2Flush(__DRIdrawable *drawable)
+{
+ radeonContextPtr rmesa;
+
+ rmesa = (radeonContextPtr) drawable->driContextPriv->driverPrivate;
+ radeonFlush(rmesa->glCtx);
+}
+
+static const struct __DRI2flushExtensionRec radeonFlushExtension = {
+ { __DRI2_FLUSH, __DRI2_FLUSH_VERSION },
+ radeonDRI2Flush,
+ dri2InvalidateDrawable,
+};
+
static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id)
{
screen->device_id = device_id;
@@ -1305,6 +1327,56 @@ radeonCreateScreen2(__DRIscreen *sPriv)
else
screen->chip_flags |= RADEON_CLASS_R600;
+ /* r6xx+ tiling */
+ if (IS_R600_CLASS(screen) && (sPriv->drm_version.minor >= 6)) {
+ ret = radeonGetParam(sPriv, RADEON_INFO_TILE_CONFIG, &temp);
+ if (ret)
+ fprintf(stderr, "failed to get tiling info\n");
+ else {
+ screen->tile_config = temp;
+ screen->r7xx_bank_op = 0;
+ switch((screen->tile_config & 0xe) >> 1) {
+ case 0:
+ screen->num_channels = 1;
+ break;
+ case 1:
+ screen->num_channels = 2;
+ break;
+ case 2:
+ screen->num_channels = 4;
+ break;
+ case 3:
+ screen->num_channels = 8;
+ break;
+ default:
+ fprintf(stderr, "bad channels\n");
+ break;
+ }
+ switch((screen->tile_config & 0x30) >> 4) {
+ case 0:
+ screen->num_banks = 4;
+ break;
+ case 1:
+ screen->num_banks = 8;
+ break;
+ default:
+ fprintf(stderr, "bad banks\n");
+ break;
+ }
+ switch((screen->tile_config & 0xc0) >> 6) {
+ case 0:
+ screen->group_bytes = 256;
+ break;
+ case 1:
+ screen->group_bytes = 512;
+ break;
+ default:
+ fprintf(stderr, "bad group_bytes\n");
+ break;
+ }
+ }
+ }
+
if (IS_R300_CLASS(screen)) {
ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_GB_PIPES, &temp);
if (ret) {
@@ -1379,6 +1451,8 @@ radeonCreateScreen2(__DRIscreen *sPriv)
screen->extensions[i++] = &r600TexBufferExtension.base;
#endif
+ screen->extensions[i++] = &radeonFlushExtension.base;
+
screen->extensions[i++] = NULL;
sPriv->extensions = screen->extensions;
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h
index 0d7e335fa3a..2b33201a538 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.h
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.h
@@ -112,6 +112,13 @@ typedef struct radeon_screen {
int kernel_mm;
drm_radeon_sarea_t *sarea; /* Private SAREA data */
struct radeon_bo_manager *bom;
+
+ /* r6xx+ tiling */
+ GLuint tile_config;
+ GLint group_bytes;
+ GLint num_channels;
+ GLint num_banks;
+ GLint r7xx_bank_op;
} radeonScreenRec, *radeonScreenPtr;
#define IS_R100_CLASS(screen) \
diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c
index 1adb6096033..9dfe2dd2433 100644
--- a/src/mesa/drivers/dri/radeon/radeon_span.c
+++ b/src/mesa/drivers/dri/radeon/radeon_span.c
@@ -111,7 +111,6 @@ static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb,
* two main types:
* - 1D (akin to macro-linear/micro-tiled on older asics)
* - 2D (akin to macro-tiled/micro-tiled on older asics)
- * only 1D tiling is implemented below
*/
#if defined(RADEON_R600)
static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb,
@@ -208,12 +207,190 @@ static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb,
return offset;
}
+static inline GLint r600_log2(GLint n)
+{
+ GLint log2 = 0;
+
+ while (n >>= 1)
+ ++log2;
+ return log2;
+}
+
+static inline GLint r600_2d_tile_helper(const struct radeon_renderbuffer * rrb,
+ GLint x, GLint y, GLint is_depth, GLint is_stencil)
+{
+ GLint group_bytes = rrb->group_bytes;
+ GLint num_channels = rrb->num_channels;
+ GLint num_banks = rrb->num_banks;
+ GLint r7xx_bank_op = rrb->r7xx_bank_op;
+ /* */
+ GLint group_bits = r600_log2(group_bytes);
+ GLint channel_bits = r600_log2(num_channels);
+ GLint bank_bits = r600_log2(num_banks);
+ GLint element_bytes = rrb->cpp;
+ GLint num_samples = 1;
+ GLint tile_width = 8;
+ GLint tile_height = 8;
+ GLint tile_thickness = 1;
+ GLint macro_tile_width = num_banks;
+ GLint macro_tile_height = num_channels;
+ GLint pitch_elements = (rrb->pitch / element_bytes) / tile_width;
+ GLint height = rrb->base.Height / tile_height;
+ GLint z = 0;
+ GLint sample_number = 0;
+ /* */
+ GLint tile_bytes;
+ GLint macro_tile_bytes;
+ GLint macro_tiles_per_row;
+ GLint macro_tiles_per_slice;
+ GLint slice_offset;
+ GLint macro_tile_row_index;
+ GLint macro_tile_column_index;
+ GLint macro_tile_offset;
+ GLint pixel_number = 0;
+ GLint element_offset;
+ GLint bank = 0;
+ GLint channel = 0;
+ GLint total_offset;
+ GLint group_mask = (1 << group_bits) - 1;
+ GLint offset_low;
+ GLint offset_high;
+ GLint offset = 0;
+
+ switch (num_channels) {
+ case 2:
+ default:
+ // channel[0] = x[3] ^ y[3]
+ channel |= (((x >> 3) ^ (y >> 3)) & 1) << 0;
+ break;
+ case 4:
+ // channel[0] = x[4] ^ y[3]
+ channel |= (((x >> 4) ^ (y >> 3)) & 1) << 0;
+ // channel[1] = x[3] ^ y[4]
+ channel |= (((x >> 3) ^ (y >> 4)) & 1) << 1;
+ break;
+ case 8:
+ // channel[0] = x[5] ^ y[3]
+ channel |= (((x >> 5) ^ (y >> 3)) & 1) << 0;
+ // channel[0] = x[4] ^ x[5] ^ y[4]
+ channel |= (((x >> 4) ^ (x >> 5) ^ (y >> 4)) & 1) << 1;
+ // channel[0] = x[3] ^ y[5]
+ channel |= (((x >> 3) ^ (y >> 5)) & 1) << 2;
+ break;
+ }
+
+ switch (num_banks) {
+ case 4:
+ // bank[0] = x[3] ^ y[4 + log2(num_channels)]
+ bank |= (((x >> 3) ^ (y >> (4 + channel_bits))) & 1) << 0;
+ if (r7xx_bank_op)
+ // bank[1] = x[3] ^ y[4 + log2(num_channels)] ^ x[5]
+ bank |= (((x >> 4) ^ (y >> (3 + channel_bits)) ^ (x >> 5)) & 1) << 1;
+ else
+ // bank[1] = x[4] ^ y[3 + log2(num_channels)]
+ bank |= (((x >> 4) ^ (y >> (3 + channel_bits))) & 1) << 1;
+ break;
+ case 8:
+ // bank[0] = x[3] ^ y[5 + log2(num_channels)]
+ bank |= (((x >> 3) ^ (y >> (5 + channel_bits))) & 1) << 0;
+ // bank[1] = x[4] ^ y[4 + log2(num_channels)] ^ y[5 + log2(num_channels)]
+ bank |= (((x >> 4) ^ (y >> (4 + channel_bits)) ^ (y >> (5 + channel_bits))) & 1) << 1;
+ if (r7xx_bank_op)
+ // bank[2] = x[5] ^ y[3 + log2(num_channels)] ^ x[6]
+ bank |= (((x >> 5) ^ (y >> (3 + channel_bits)) ^ (x >> 6)) & 1) << 2;
+ else
+ // bank[2] = x[5] ^ y[3 + log2(num_channels)]
+ bank |= (((x >> 5) ^ (y >> (3 + channel_bits))) & 1) << 2;
+ break;
+ }
+
+ tile_bytes = tile_width * tile_height * tile_thickness * element_bytes * num_samples;
+ macro_tile_bytes = macro_tile_width * macro_tile_height * tile_bytes;
+ macro_tiles_per_row = pitch_elements / macro_tile_width;
+ macro_tiles_per_slice = macro_tiles_per_row * (height / macro_tile_height);
+ slice_offset = (z / tile_thickness) * macro_tiles_per_slice * macro_tile_bytes;
+ macro_tile_row_index = (y / tile_height) / macro_tile_height;
+ macro_tile_column_index = (x / tile_width) / macro_tile_width;
+ macro_tile_offset = ((macro_tile_row_index * macro_tiles_per_row) + macro_tile_column_index) * macro_tile_bytes;
+
+ if (is_depth) {
+ GLint pixel_offset = 0;
+
+ pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+ pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0]
+ pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1]
+ pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
+ pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2]
+ pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+ switch (element_bytes) {
+ case 2:
+ pixel_offset = pixel_number * element_bytes * num_samples;
+ break;
+ case 4:
+ /* stencil and depth data are stored separately within a tile.
+ * stencil is stored in a contiguous tile before the depth tile.
+ * stencil element is 1 byte, depth element is 3 bytes.
+ * stencil tile is 64 bytes.
+ */
+ if (is_stencil)
+ pixel_offset = pixel_number * 1 * num_samples;
+ else
+ pixel_offset = (pixel_number * 3 * num_samples) + 64;
+ break;
+ }
+ element_offset = pixel_offset + (sample_number * element_bytes);
+ } else {
+ GLint sample_offset;
+
+ switch (element_bytes) {
+ case 1:
+ pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+ pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
+ pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
+ pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
+ pixel_number |= ((y >> 0) & 1) << 4; // pn[4] = y[0]
+ pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+ break;
+ case 2:
+ pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+ pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
+ pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
+ pixel_number |= ((y >> 0) & 1) << 3; // pn[3] = y[0]
+ pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
+ pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+ break;
+ case 4:
+ pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+ pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
+ pixel_number |= ((y >> 0) & 1) << 2; // pn[2] = y[0]
+ pixel_number |= ((x >> 2) & 1) << 3; // pn[3] = x[2]
+ pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
+ pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+ break;
+ }
+ sample_offset = sample_number * (tile_bytes / num_samples);
+ element_offset = sample_offset + (pixel_number * element_bytes);
+ }
+ total_offset = (slice_offset + macro_tile_offset) >> (channel_bits + bank_bits);
+ total_offset += element_offset;
+
+ offset_low = total_offset & group_mask;
+ offset_high = (total_offset & ~group_mask) << (channel_bits + bank_bits);
+ offset = (bank << (group_bits + channel_bits)) + (channel << group_bits) + offset_low + offset_high;
+
+ return offset;
+}
+
/* depth buffers */
static GLubyte *r600_ptr_depth(const struct radeon_renderbuffer * rrb,
GLint x, GLint y)
{
GLubyte *ptr = rrb->bo->ptr;
- GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 0);
+ GLint offset;
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+ offset = r600_2d_tile_helper(rrb, x, y, 1, 0);
+ else
+ offset = r600_1d_tile_helper(rrb, x, y, 1, 0);
return &ptr[offset];
}
@@ -221,7 +398,11 @@ static GLubyte *r600_ptr_stencil(const struct radeon_renderbuffer * rrb,
GLint x, GLint y)
{
GLubyte *ptr = rrb->bo->ptr;
- GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 1);
+ GLint offset;
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+ offset = r600_2d_tile_helper(rrb, x, y, 1, 1);
+ else
+ offset = r600_1d_tile_helper(rrb, x, y, 1, 1);
return &ptr[offset];
}
@@ -235,7 +416,10 @@ static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb,
if (rrb->has_surface || !(rrb->bo->flags & mask)) {
offset = x * rrb->cpp + y * rrb->pitch;
} else {
- offset = r600_1d_tile_helper(rrb, x, y, 0, 0);
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+ offset = r600_2d_tile_helper(rrb, x, y, 0, 0);
+ else
+ offset = r600_1d_tile_helper(rrb, x, y, 0, 0);
}
return &ptr[offset];
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
index f2fcb46688a..29defe73a70 100644
--- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
@@ -40,7 +40,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/macros.h"
#include "main/simple_list.h"
+#include "math/m_xform.h"
+
#include "swrast_setup/swrast_setup.h"
+
#include "tnl/tnl.h"
#include "tnl/t_context.h"
#include "tnl/t_pipeline.h"
@@ -408,6 +411,8 @@ static GLboolean radeon_run_render( GLcontext *ctx,
!radeon_dma_validate_render( ctx, VB ))
return GL_TRUE;
+ radeon_prepare_render(&rmesa->radeon);
+
tnl->Driver.Render.Start( ctx );
for (i = 0 ; i < VB->PrimitiveCount ; i++)
diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c
index ea796e1a45f..5e1718f9dfc 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c
@@ -252,6 +252,8 @@ void radeonTclPrimitive( GLcontext *ctx,
GLuint se_cntl;
GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE;
+ radeon_prepare_render(&rmesa->radeon);
+
if (newprim != rmesa->tcl.hw_primitive ||
!discrete_prim[hw_prim&0xf]) {
RADEON_NEWPRIM( rmesa );
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
index 29fd31ac23f..4cb0bb60c85 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
@@ -153,6 +153,9 @@ radeonCopyTexImage2D(GLcontext *ctx, GLenum target, GLint level,
_mesa_select_tex_image(ctx, texObj, target, level);
int srcx, srcy, dstx, dsty;
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ radeon_prepare_render(radeon);
+
if (border)
goto fail;
@@ -202,6 +205,9 @@ radeonCopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level,
struct gl_texture_object *texObj = _mesa_select_tex_object(ctx, texUnit, target);
struct gl_texture_image *texImage = _mesa_select_tex_image(ctx, texObj, target, level);
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ radeon_prepare_render(radeon);
+
if (!do_copy_texsubimage(ctx, target, level,
radeon_tex_obj(texObj), (radeon_texture_image *)texImage,
xoffset, yoffset, x, y, width, height)) {
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c
index d2b190e42e0..8c6a50d2f0d 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texture.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.c
@@ -551,7 +551,7 @@ gl_format radeonChooseTextureFormat(GLcontext * ctx,
case GL_SRGB8_ALPHA8:
case GL_COMPRESSED_SRGB:
case GL_COMPRESSED_SRGB_ALPHA:
- return MESA_FORMAT_SRGBA8;
+ return MESA_FORMAT_SARGB8;
case GL_SLUMINANCE:
case GL_SLUMINANCE8:
diff --git a/src/mesa/drivers/dri/savage/savagerender.c b/src/mesa/drivers/dri/savage/savagerender.c
index c369bb124c2..2d9e80e29c4 100644
--- a/src/mesa/drivers/dri/savage/savagerender.c
+++ b/src/mesa/drivers/dri/savage/savagerender.c
@@ -33,6 +33,8 @@
#include "main/imports.h"
#include "main/mtypes.h"
+#include "math/m_xform.h"
+
#include "tnl/t_context.h"
#include "savagecontext.h"
diff --git a/src/mesa/drivers/dri/unichrome/via_render.c b/src/mesa/drivers/dri/unichrome/via_render.c
index 896c43db1b0..4351f119555 100644
--- a/src/mesa/drivers/dri/unichrome/via_render.c
+++ b/src/mesa/drivers/dri/unichrome/via_render.c
@@ -33,6 +33,8 @@
#include "main/macros.h"
#include "main/mtypes.h"
+#include "math/m_xform.h"
+
#include "tnl/t_context.h"
#include "via_context.h"
diff --git a/src/mesa/main/arbprogram.h b/src/mesa/main/arbprogram.h
index 787ffd62f4b..e2e535e911e 100644
--- a/src/mesa/main/arbprogram.h
+++ b/src/mesa/main/arbprogram.h
@@ -27,7 +27,6 @@
#define ARBPROGRAM_H
-#include "compiler.h"
#include "glheader.h"
diff --git a/src/mesa/main/arrayobj.h b/src/mesa/main/arrayobj.h
index 8999edc724f..fdf7e2bca46 100644
--- a/src/mesa/main/arrayobj.h
+++ b/src/mesa/main/arrayobj.h
@@ -28,7 +28,7 @@
#ifndef ARRAYOBJ_H
#define ARRAYOBJ_H
-#include "context.h"
+#include "mtypes.h"
/**
* \file arrayobj.h
diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c
index 002448fedb8..753949be503 100644
--- a/src/mesa/main/attrib.c
+++ b/src/mesa/main/attrib.c
@@ -41,6 +41,7 @@
#include "hint.h"
#include "light.h"
#include "lines.h"
+#include "macros.h"
#include "matrix.h"
#include "multisample.h"
#include "points.h"
diff --git a/src/mesa/main/bufferobj.h b/src/mesa/main/bufferobj.h
index 912529cfdf9..f234d06c6cc 100644
--- a/src/mesa/main/bufferobj.h
+++ b/src/mesa/main/bufferobj.h
@@ -29,7 +29,7 @@
#define BUFFEROBJ_H
-#include "context.h"
+#include "mtypes.h"
/*
diff --git a/src/mesa/main/clear.c b/src/mesa/main/clear.c
index e76ab5527b0..49d86b3b1f1 100644
--- a/src/mesa/main/clear.c
+++ b/src/mesa/main/clear.c
@@ -35,6 +35,7 @@
#include "context.h"
#include "colormac.h"
#include "enums.h"
+#include "macros.h"
#include "state.h"
diff --git a/src/mesa/main/clear.h b/src/mesa/main/clear.h
index 4c78eeda488..6657370c4b6 100644
--- a/src/mesa/main/clear.h
+++ b/src/mesa/main/clear.h
@@ -27,7 +27,7 @@
#define CLEAR_H
-#include "main/mtypes.h"
+#include "glheader.h"
extern void GLAPIENTRY
diff --git a/src/mesa/main/clip.h b/src/mesa/main/clip.h
index d53afb45bdf..ac472d66e08 100644
--- a/src/mesa/main/clip.h
+++ b/src/mesa/main/clip.h
@@ -31,7 +31,7 @@
#ifndef CLIP_H
#define CLIP_H
-#include "mtypes.h"
+#include "glheader.h"
extern void GLAPIENTRY
_mesa_ClipPlane( GLenum plane, const GLdouble *equation );
diff --git a/src/mesa/main/colormac.h b/src/mesa/main/colormac.h
index 905f4e22837..245fb658bb3 100644
--- a/src/mesa/main/colormac.h
+++ b/src/mesa/main/colormac.h
@@ -33,9 +33,9 @@
#define COLORMAC_H
-#include "imports.h"
#include "config.h"
#include "macros.h"
+#include "mtypes.h"
/** \def BYTE_TO_CHAN
diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h
index 32f7d969d8d..0f2d1a8f8da 100644
--- a/src/mesa/main/config.h
+++ b/src/mesa/main/config.h
@@ -177,7 +177,7 @@
/**
* Per-program constants (power of two)
*
- * \c MAX_PROGRAM_LOCAL_PARAMS and \c MAX_UNIFORMS are just the assmebly shader
+ * \c MAX_PROGRAM_LOCAL_PARAMS and \c MAX_UNIFORMS are just the assembly shader
* and GLSL shader names for the same thing. They should \b always have the
* same value. Each refers to the number of vec4 values supplied as
* per-program parameters.
diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index a369532e99c..b01fed1781e 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -462,7 +462,7 @@ _mesa_init_current(GLcontext *ctx)
/**
- * Init vertex/fragment program limits.
+ * Init vertex/fragment/geometry program limits.
* Important: drivers should override these with actual limits.
*/
static void
@@ -477,16 +477,18 @@ init_program_limits(GLenum type, struct gl_program_constants *prog)
prog->MaxLocalParams = MAX_PROGRAM_LOCAL_PARAMS;
prog->MaxUniformComponents = 4 * MAX_UNIFORMS;
- if (type == GL_VERTEX_PROGRAM_ARB) {
+ switch (type) {
+ case GL_VERTEX_PROGRAM_ARB:
prog->MaxParameters = MAX_VERTEX_PROGRAM_PARAMS;
prog->MaxAttribs = MAX_NV_VERTEX_PROGRAM_INPUTS;
prog->MaxAddressRegs = MAX_VERTEX_PROGRAM_ADDRESS_REGS;
- }
- else if (type == GL_FRAGMENT_PROGRAM_ARB) {
+ break;
+ case GL_FRAGMENT_PROGRAM_ARB:
prog->MaxParameters = MAX_NV_FRAGMENT_PROGRAM_PARAMS;
prog->MaxAttribs = MAX_NV_FRAGMENT_PROGRAM_INPUTS;
prog->MaxAddressRegs = MAX_FRAGMENT_PROGRAM_ADDRESS_REGS;
- } else {
+ break;
+ case MESA_GEOMETRY_PROGRAM:
prog->MaxParameters = MAX_NV_VERTEX_PROGRAM_PARAMS;
prog->MaxAttribs = MAX_NV_VERTEX_PROGRAM_INPUTS;
prog->MaxAddressRegs = MAX_VERTEX_PROGRAM_ADDRESS_REGS;
@@ -497,6 +499,9 @@ init_program_limits(GLenum type, struct gl_program_constants *prog)
prog->MaxGeometryUniformComponents = MAX_GEOMETRY_UNIFORM_COMPONENTS;
prog->MaxGeometryOutputVertices = MAX_GEOMETRY_OUTPUT_VERTICES;
prog->MaxGeometryTotalOutputComponents = MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS;
+ break;
+ default:
+ assert(0 && "Bad program type in init_program_limits()");
}
/* Set the native limits to zero. This implies that there is no native
diff --git a/src/mesa/main/convolve.c b/src/mesa/main/convolve.c
index 15e8dffc230..f63bddc44d5 100644
--- a/src/mesa/main/convolve.c
+++ b/src/mesa/main/convolve.c
@@ -37,6 +37,7 @@
#include "convolve.h"
#include "context.h"
#include "image.h"
+#include "macros.h"
#include "mtypes.h"
#include "state.h"
#include "main/dispatch.h"
diff --git a/src/mesa/main/debug.h b/src/mesa/main/debug.h
index 0449cb1798a..b517cc8259f 100644
--- a/src/mesa/main/debug.h
+++ b/src/mesa/main/debug.h
@@ -36,6 +36,9 @@
#ifndef _DEBUG_H
#define _DEBUG_H
+#include "glheader.h"
+#include "mtypes.h"
+
#if _HAVE_FULL_GL
extern void _mesa_print_tri_caps( const char *name, GLuint flags );
diff --git a/src/mesa/main/depthstencil.h b/src/mesa/main/depthstencil.h
index 3dde081f5a5..afbac77f0e2 100644
--- a/src/mesa/main/depthstencil.h
+++ b/src/mesa/main/depthstencil.h
@@ -26,6 +26,7 @@
#ifndef DEPTHSTENCIL_H
#define DEPTHSTENCIL_H
+#include "mtypes.h"
extern struct gl_renderbuffer *
_mesa_new_z24_renderbuffer_wrapper(GLcontext *ctx,
diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 8c86b392c7b..9a84e5a79cf 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -97,6 +97,11 @@ _mesa_init_fbobjects(GLcontext *ctx)
DummyRenderbuffer.Delete = delete_dummy_renderbuffer;
}
+struct gl_framebuffer *
+_mesa_get_incomplete_framebuffer(void)
+{
+ return &DummyFramebuffer;
+}
/**
* Helper routine for getting a gl_renderbuffer.
diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h
index ff946033a4d..9850ee9aa23 100644
--- a/src/mesa/main/fbobject.h
+++ b/src/mesa/main/fbobject.h
@@ -26,10 +26,14 @@
#ifndef FBOBJECT_H
#define FBOBJECT_H
+#include "mtypes.h"
extern void
_mesa_init_fbobjects(GLcontext *ctx);
+extern struct gl_framebuffer *
+_mesa_get_incomplete_framebuffer(void);
+
extern struct gl_renderbuffer *
_mesa_lookup_renderbuffer(GLcontext *ctx, GLuint id);
diff --git a/src/mesa/main/fog.c b/src/mesa/main/fog.c
index 269ff3f8b99..9f26c012d66 100644
--- a/src/mesa/main/fog.c
+++ b/src/mesa/main/fog.c
@@ -27,6 +27,7 @@
#include "colormac.h"
#include "context.h"
#include "fog.h"
+#include "macros.h"
#include "mtypes.h"
diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c
index 49463fcc3c2..90449cc04f0 100644
--- a/src/mesa/main/formats.c
+++ b/src/mesa/main/formats.c
@@ -26,7 +26,6 @@
#include "imports.h"
#include "formats.h"
-#include "config.h"
/**
diff --git a/src/mesa/main/formats.h b/src/mesa/main/formats.h
index aa14185628f..ad176caaa0f 100644
--- a/src/mesa/main/formats.h
+++ b/src/mesa/main/formats.h
@@ -33,7 +33,7 @@
#define FORMATS_H
-#include "main/mtypes.h"
+#include <GL/gl.h>
diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c
index 56558cfcc1e..e0aac26f62b 100644
--- a/src/mesa/main/framebuffer.c
+++ b/src/mesa/main/framebuffer.c
@@ -75,7 +75,6 @@ compute_depth_max(struct gl_framebuffer *fb)
fb->_MRD = (GLfloat)1.0 / fb->_DepthMaxF;
}
-
/**
* Create and initialize a gl_framebuffer object.
* This is intended for creating _window_system_ framebuffers, not generic
diff --git a/src/mesa/main/framebuffer.h b/src/mesa/main/framebuffer.h
index 1b6e3b1f0cb..2e9844282f8 100644
--- a/src/mesa/main/framebuffer.h
+++ b/src/mesa/main/framebuffer.h
@@ -26,6 +26,7 @@
#ifndef FRAMEBUFFER_H
#define FRAMEBUFFER_H
+#include "mtypes.h"
extern struct gl_framebuffer *
_mesa_create_framebuffer(const GLvisual *visual);
diff --git a/src/mesa/main/get.h b/src/mesa/main/get.h
index 320492b4ce2..99a004b71dd 100644
--- a/src/mesa/main/get.h
+++ b/src/mesa/main/get.h
@@ -32,7 +32,7 @@
#define GET_H
-#include "mtypes.h"
+#include "glheader.h"
extern void GLAPIENTRY
diff --git a/src/mesa/main/histogram.c b/src/mesa/main/histogram.c
index 3a65bb19260..4e482bcd54b 100644
--- a/src/mesa/main/histogram.c
+++ b/src/mesa/main/histogram.c
@@ -29,6 +29,7 @@
#include "context.h"
#include "image.h"
#include "histogram.h"
+#include "macros.h"
#include "main/dispatch.h"
diff --git a/src/mesa/main/image.c b/src/mesa/main/image.c
index 63c28342f26..86aa6d0d702 100644
--- a/src/mesa/main/image.c
+++ b/src/mesa/main/image.c
@@ -32,7 +32,6 @@
#include "glheader.h"
#include "colormac.h"
-#include "context.h"
#include "enums.h"
#include "image.h"
#include "imports.h"
diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c
index 25080db40c4..46e5c932d0f 100644
--- a/src/mesa/main/imports.c
+++ b/src/mesa/main/imports.c
@@ -756,7 +756,7 @@ _mesa_strdup( const char *s )
float
_mesa_strtof( const char *s, char **end )
{
-#ifdef _GNU_SOURCE
+#if defined(_GNU_SOURCE) && !defined(__CYGWIN__) && !defined(__FreeBSD__)
static locale_t loc = NULL;
if (!loc) {
loc = newlocale(LC_CTYPE_MASK, "C", NULL);
diff --git a/src/mesa/main/mm.c b/src/mesa/main/mm.c
index 3ef38e94be9..25a0293703c 100644
--- a/src/mesa/main/mm.c
+++ b/src/mesa/main/mm.c
@@ -22,6 +22,11 @@
*
*/
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "compiler.h"
#include "mm.h"
diff --git a/src/mesa/main/mm.h b/src/mesa/main/mm.h
index df340808ac9..228721ca2a5 100644
--- a/src/mesa/main/mm.h
+++ b/src/mesa/main/mm.h
@@ -32,9 +32,6 @@
#define MM_H
-#include "imports.h"
-
-
struct mem_block {
struct mem_block *next, *prev;
struct mem_block *next_free, *prev_free;
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index cbb9eb84f33..8d92892ad7d 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -36,7 +36,6 @@
#include "main/glheader.h"
#include "main/config.h"
-#include "main/compiler.h"
#include "main/mfeatures.h"
#include "glapi/glapi.h"
#include "math/m_matrix.h" /* GLmatrix */
@@ -1808,6 +1807,11 @@ struct gl_program
/** Which texture target is being sampled (TEXTURE_1D/2D/3D/etc_INDEX) */
gl_texture_index SamplerTargets[MAX_SAMPLERS];
+ /** Bitmask of which register files are read/written with indirect
+ * addressing. Mask of (1 << PROGRAM_x) bits.
+ */
+ GLbitfield IndirectRegisterFiles;
+
/** Logical counts */
/*@{*/
GLuint NumInstructions;
@@ -2477,29 +2481,29 @@ struct gl_framebuffer
/**
- * Limits for vertex and fragment programs.
+ * Limits for vertex and fragment programs/shaders.
*/
struct gl_program_constants
{
/* logical limits */
GLuint MaxInstructions;
- GLuint MaxAluInstructions; /* fragment programs only, for now */
- GLuint MaxTexInstructions; /* fragment programs only, for now */
- GLuint MaxTexIndirections; /* fragment programs only, for now */
+ GLuint MaxAluInstructions;
+ GLuint MaxTexInstructions;
+ GLuint MaxTexIndirections;
GLuint MaxAttribs;
GLuint MaxTemps;
- GLuint MaxAddressRegs; /* vertex program only, for now */
+ GLuint MaxAddressRegs;
GLuint MaxParameters;
GLuint MaxLocalParams;
GLuint MaxEnvParams;
/* native/hardware limits */
GLuint MaxNativeInstructions;
- GLuint MaxNativeAluInstructions; /* fragment programs only, for now */
- GLuint MaxNativeTexInstructions; /* fragment programs only, for now */
- GLuint MaxNativeTexIndirections; /* fragment programs only, for now */
+ GLuint MaxNativeAluInstructions;
+ GLuint MaxNativeTexInstructions;
+ GLuint MaxNativeTexIndirections;
GLuint MaxNativeAttribs;
GLuint MaxNativeTemps;
- GLuint MaxNativeAddressRegs; /* vertex program only, for now */
+ GLuint MaxNativeAddressRegs;
GLuint MaxNativeParameters;
/* For shaders */
GLuint MaxUniformComponents;
diff --git a/src/mesa/main/multisample.h b/src/mesa/main/multisample.h
index 4305900cc49..998488ef420 100644
--- a/src/mesa/main/multisample.h
+++ b/src/mesa/main/multisample.h
@@ -26,6 +26,7 @@
#ifndef MULTISAMPLE_H
#define MULTISAMPLE_H
+#include "mtypes.h"
extern void GLAPIENTRY
_mesa_SampleCoverageARB(GLclampf value, GLboolean invert);
diff --git a/src/mesa/main/nvprogram.h b/src/mesa/main/nvprogram.h
index 8ee59661bd0..260a25ba9e9 100644
--- a/src/mesa/main/nvprogram.h
+++ b/src/mesa/main/nvprogram.h
@@ -29,6 +29,8 @@
#ifndef NVPROGRAM_H
#define NVPROGRAM_H
+#include "glheader.h"
+#include "mtypes.h"
extern void GLAPIENTRY
_mesa_ExecuteProgramNV(GLenum target, GLuint id, const GLfloat *params);
diff --git a/src/mesa/main/pixelstore.h b/src/mesa/main/pixelstore.h
index ee963f9ba3c..47bff4276d1 100644
--- a/src/mesa/main/pixelstore.h
+++ b/src/mesa/main/pixelstore.h
@@ -33,6 +33,7 @@
#include "glheader.h"
+#include "mtypes.h"
extern void GLAPIENTRY
diff --git a/src/mesa/main/querymatrix.c b/src/mesa/main/querymatrix.c
index 6f62415ba8c..32aaa79f7fb 100644
--- a/src/mesa/main/querymatrix.c
+++ b/src/mesa/main/querymatrix.c
@@ -36,9 +36,9 @@
#define INT_TO_FIXED(x) ((GLfixed) ((x) << 16))
#define FLOAT_TO_FIXED(x) ((GLfixed) ((x) * 65536.0))
-#if defined(WIN32) || defined(_WIN32_WCE)
+#if defined(_MSC_VER)
/* Oddly, the fpclassify() function doesn't exist in such a form
- * on Windows. This is an implementation using slightly different
+ * on MSVC. This is an implementation using slightly different
* lower-level Windows functions.
*/
#include <float.h>
@@ -72,7 +72,7 @@ fpclassify(double x)
#elif defined(__APPLE__) || defined(__CYGWIN__) || defined(__FreeBSD__) || \
defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || \
- (defined(__sun) && defined(__C99FEATURES__))
+ (defined(__sun) && defined(__C99FEATURES__)) || defined(__MINGW32__)
/* fpclassify is available. */
diff --git a/src/mesa/main/remap.h b/src/mesa/main/remap.h
index 7afdee36f5b..a2a55f615d5 100644
--- a/src/mesa/main/remap.h
+++ b/src/mesa/main/remap.h
@@ -28,6 +28,7 @@
#define REMAP_H
+#include "main/compiler.h"
#include "main/mfeatures.h"
struct gl_function_pool_remap {
diff --git a/src/mesa/main/renderbuffer.h b/src/mesa/main/renderbuffer.h
index 7c205e141c1..bc92b269821 100644
--- a/src/mesa/main/renderbuffer.h
+++ b/src/mesa/main/renderbuffer.h
@@ -26,6 +26,11 @@
#ifndef RENDERBUFFER_H
#define RENDERBUFFER_H
+#include "glheader.h"
+#include "mtypes.h"
+
+struct gl_framebuffer;
+struct gl_renderbuffer;
extern void
_mesa_init_renderbuffer(struct gl_renderbuffer *rb, GLuint name);
diff --git a/src/mesa/main/restart.h b/src/mesa/main/restart.h
index 931cd701281..25f58f24c35 100644
--- a/src/mesa/main/restart.h
+++ b/src/mesa/main/restart.h
@@ -28,6 +28,7 @@
#ifndef RESTART_H
#define RESTART_H
+#include "glheader.h"
extern void GLAPIENTRY
_mesa_PrimitiveRestart(void);
diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c
index f9d10f3bbea..cbe004518a0 100644
--- a/src/mesa/main/shared.c
+++ b/src/mesa/main/shared.c
@@ -32,7 +32,6 @@
#include "imports.h"
#include "mtypes.h"
#include "hash.h"
-#include "arrayobj.h"
#if FEATURE_ATI_fragment_shader
#include "atifragshader.h"
#endif
diff --git a/src/mesa/main/shared.h b/src/mesa/main/shared.h
index ef164a14590..5166a0ce51f 100644
--- a/src/mesa/main/shared.h
+++ b/src/mesa/main/shared.h
@@ -25,6 +25,7 @@
#ifndef SHARED_H
#define SHARED_H
+#include "mtypes.h"
struct gl_shared_state *
_mesa_alloc_shared_state(GLcontext *ctx);
diff --git a/src/mesa/main/syncobj.h b/src/mesa/main/syncobj.h
index f23fa281e20..c53511995b1 100644
--- a/src/mesa/main/syncobj.h
+++ b/src/mesa/main/syncobj.h
@@ -31,7 +31,10 @@
#ifndef SYNCOBJ_H
#define SYNCOBJ_H
-#include "context.h"
+#include "glheader.h"
+#include "mtypes.h"
+
+struct dd_function_table;
extern void
_mesa_init_sync_object_functions(struct dd_function_table *driver);
diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c
index f4b1119eb17..e911524cbc5 100644
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -33,7 +33,6 @@
#include "glheader.h"
#include "imports.h"
#include "colormac.h"
-#include "context.h"
#include "formats.h"
#include "texcompress.h"
diff --git a/src/mesa/main/texcompress_fxt1.c b/src/mesa/main/texcompress_fxt1.c
index 04acf05e528..c8b45bd3a55 100644
--- a/src/mesa/main/texcompress_fxt1.c
+++ b/src/mesa/main/texcompress_fxt1.c
@@ -32,9 +32,9 @@
#include "glheader.h"
#include "imports.h"
#include "colormac.h"
-#include "context.h"
#include "convolve.h"
#include "image.h"
+#include "macros.h"
#include "mipmap.h"
#include "texcompress.h"
#include "texcompress_fxt1.h"
diff --git a/src/mesa/main/texcompress_fxt1.h b/src/mesa/main/texcompress_fxt1.h
index d63ca71e212..38048b26ccb 100644
--- a/src/mesa/main/texcompress_fxt1.h
+++ b/src/mesa/main/texcompress_fxt1.h
@@ -25,9 +25,11 @@
#ifndef TEXCOMPRESS_FXT1_H
#define TEXCOMPRESS_FXT1_H
-#include "main/mtypes.h"
+#include "glheader.h"
#include "texstore.h"
+struct gl_texture_image;
+
#if FEATURE_texture_fxt1
extern GLboolean
diff --git a/src/mesa/main/texcompress_s3tc.c b/src/mesa/main/texcompress_s3tc.c
index 85c394b051f..c70792cab61 100644
--- a/src/mesa/main/texcompress_s3tc.c
+++ b/src/mesa/main/texcompress_s3tc.c
@@ -36,10 +36,10 @@
#include "glheader.h"
#include "imports.h"
#include "colormac.h"
-#include "context.h"
#include "convolve.h"
#include "dlopen.h"
#include "image.h"
+#include "macros.h"
#include "texcompress.h"
#include "texcompress_s3tc.h"
#include "texstore.h"
diff --git a/src/mesa/main/texfetch.c b/src/mesa/main/texfetch.c
index fe002082cca..c03bc71cd7a 100644
--- a/src/mesa/main/texfetch.c
+++ b/src/mesa/main/texfetch.c
@@ -34,7 +34,7 @@
#include "colormac.h"
-#include "context.h"
+#include "macros.h"
#include "texcompress.h"
#include "texcompress_fxt1.h"
#include "texcompress_s3tc.h"
diff --git a/src/mesa/main/texgen.h b/src/mesa/main/texgen.h
index 397d89e630f..2224a937611 100644
--- a/src/mesa/main/texgen.h
+++ b/src/mesa/main/texgen.h
@@ -27,7 +27,10 @@
#define TEXGEN_H
-#include "main/mtypes.h"
+#include "compiler.h"
+#include "glheader.h"
+
+struct _glapi_table;
#if FEATURE_texgen
diff --git a/src/mesa/main/texgetimage.h b/src/mesa/main/texgetimage.h
index 088d27c7e17..866ab704945 100644
--- a/src/mesa/main/texgetimage.h
+++ b/src/mesa/main/texgetimage.h
@@ -27,6 +27,7 @@
#ifndef TEXGETIMAGE_H
#define TEXGETIMAGE_H
+#include "mtypes.h"
extern void
_mesa_get_teximage(GLcontext *ctx, GLenum target, GLint level,
diff --git a/src/mesa/main/texrender.c b/src/mesa/main/texrender.c
index d29af5a5b2f..c68105b3951 100644
--- a/src/mesa/main/texrender.c
+++ b/src/mesa/main/texrender.c
@@ -1,6 +1,7 @@
#include "context.h"
#include "colormac.h"
+#include "macros.h"
#include "texfetch.h"
#include "texrender.h"
#include "renderbuffer.h"
diff --git a/src/mesa/main/texrender.h b/src/mesa/main/texrender.h
index 7c3fb0871bd..1e87d594a28 100644
--- a/src/mesa/main/texrender.h
+++ b/src/mesa/main/texrender.h
@@ -1,6 +1,7 @@
#ifndef TEXRENDER_H
#define TEXRENDER_H
+#include "mtypes.h"
extern void
_mesa_render_texture(GLcontext *ctx,
diff --git a/src/mesa/main/texstate.h b/src/mesa/main/texstate.h
index 17ac68000c5..912cb677985 100644
--- a/src/mesa/main/texstate.h
+++ b/src/mesa/main/texstate.h
@@ -32,6 +32,7 @@
#define TEXSTATE_H
+#include "compiler.h"
#include "mtypes.h"
diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c
index 0f21395af39..2989fdb72ed 100644
--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -55,7 +55,6 @@
#include "glheader.h"
#include "bufferobj.h"
#include "colormac.h"
-#include "context.h"
#include "convolve.h"
#include "image.h"
#include "macros.h"
diff --git a/src/mesa/main/uniforms.h b/src/mesa/main/uniforms.h
index 29f77cb35a0..ef98fe16bb1 100644
--- a/src/mesa/main/uniforms.h
+++ b/src/mesa/main/uniforms.h
@@ -25,6 +25,10 @@
#ifndef UNIFORMS_H
#define UNIFORMS_H
+#include "glheader.h"
+
+struct gl_program;
+struct _glapi_table;
extern void GLAPIENTRY
_mesa_Uniform1fARB(GLint, GLfloat);
diff --git a/src/mesa/main/viewport.h b/src/mesa/main/viewport.h
index f08fef27978..ec054a7c597 100644
--- a/src/mesa/main/viewport.h
+++ b/src/mesa/main/viewport.h
@@ -27,6 +27,8 @@
#ifndef VIEWPORT_H
#define VIEWPORT_H
+#include "glheader.h"
+#include "mtypes.h"
extern void GLAPIENTRY
_mesa_Viewport(GLint x, GLint y, GLsizei width, GLsizei height);
diff --git a/src/mesa/main/vtxfmt.h b/src/mesa/main/vtxfmt.h
index fb6c23abe98..aad38b87c35 100644
--- a/src/mesa/main/vtxfmt.h
+++ b/src/mesa/main/vtxfmt.h
@@ -33,6 +33,9 @@
#ifndef _VTXFMT_H_
#define _VTXFMT_H_
+#include "compiler.h"
+#include "mtypes.h"
+
#if FEATURE_beginend
extern void _mesa_init_exec_vtxfmt( GLcontext *ctx );
diff --git a/src/mesa/math/m_matrix.h b/src/mesa/math/m_matrix.h
index 3bc5de6cd4d..a69afb8589a 100644
--- a/src/mesa/math/m_matrix.h
+++ b/src/mesa/math/m_matrix.h
@@ -32,6 +32,8 @@
#define _M_MATRIX_H
+#include "main/glheader.h"
+
/**
* \name Symbolic names to some of the entries in the matrix
diff --git a/src/mesa/math/m_translate.c b/src/mesa/math/m_translate.c
index b12b07957cb..51daf7bfd37 100644
--- a/src/mesa/math/m_translate.c
+++ b/src/mesa/math/m_translate.c
@@ -29,8 +29,8 @@
#include "main/glheader.h"
+#include "main/macros.h"
#include "main/mtypes.h" /* GLchan hack */
-#include "main/colormac.h"
#include "m_translate.h"
diff --git a/src/mesa/math/m_translate.h b/src/mesa/math/m_translate.h
index c677682d506..58041031163 100644
--- a/src/mesa/math/m_translate.h
+++ b/src/mesa/math/m_translate.h
@@ -26,7 +26,8 @@
#ifndef _M_TRANSLATE_H_
#define _M_TRANSLATE_H_
-#include "main/config.h"
+#include "main/compiler.h"
+#include "main/glheader.h"
#include "main/mtypes.h" /* hack for GLchan */
diff --git a/src/mesa/math/m_xform.h b/src/mesa/math/m_xform.h
index 33421ad1c0a..14ac956a7bc 100644
--- a/src/mesa/math/m_xform.h
+++ b/src/mesa/math/m_xform.h
@@ -27,10 +27,10 @@
#define _M_XFORM_H
+#include "main/compiler.h"
#include "main/glheader.h"
-#include "main/config.h"
-#include "math/m_vector.h"
#include "math/m_matrix.h"
+#include "math/m_vector.h"
#ifdef USE_X86_ASM
#define _XFORMAPI _ASMAPI
diff --git a/src/mesa/program/arbprogparse.c b/src/mesa/program/arbprogparse.c
index 6373529e4e8..f834aaf5686 100644
--- a/src/mesa/program/arbprogparse.c
+++ b/src/mesa/program/arbprogparse.c
@@ -109,6 +109,7 @@ _mesa_parse_arb_fragment_program(GLcontext* ctx, GLenum target,
program->Base.NumNativeTexIndirections = prog.NumTexIndirections;
program->Base.InputsRead = prog.InputsRead;
program->Base.OutputsWritten = prog.OutputsWritten;
+ program->Base.IndirectRegisterFiles = prog.IndirectRegisterFiles;
for (i = 0; i < MAX_TEXTURE_IMAGE_UNITS; i++) {
program->Base.TexturesUsed[i] = prog.TexturesUsed[i];
if (prog.TexturesUsed[i])
@@ -199,6 +200,7 @@ _mesa_parse_arb_vertex_program(GLcontext *ctx, GLenum target,
program->Base.NumNativeAddressRegs = prog.NumNativeAddressRegs;
program->Base.InputsRead = prog.InputsRead;
program->Base.OutputsWritten = prog.OutputsWritten;
+ program->Base.IndirectRegisterFiles = prog.IndirectRegisterFiles;
program->IsPositionInvariant = (state.option.PositionInvariant)
? GL_TRUE : GL_FALSE;
diff --git a/src/mesa/program/hash_table.h b/src/mesa/program/hash_table.h
index 7b302f5dbee..e750906f961 100644
--- a/src/mesa/program/hash_table.h
+++ b/src/mesa/program/hash_table.h
@@ -31,8 +31,6 @@
#ifndef HASH_TABLE_H
#define HASH_TABLE_H
-#include <string.h>
-
struct hash_table;
typedef unsigned (*hash_func_t)(const void *key);
diff --git a/src/mesa/program/nvfragparse.h b/src/mesa/program/nvfragparse.h
index 544ab80c56c..e28a6c49349 100644
--- a/src/mesa/program/nvfragparse.h
+++ b/src/mesa/program/nvfragparse.h
@@ -30,6 +30,7 @@
#ifndef NVFRAGPARSE_H
#define NVFRAGPARSE_H
+#include "main/mtypes.h"
extern void
_mesa_parse_nv_fragment_program(GLcontext *ctx, GLenum target,
diff --git a/src/mesa/program/nvvertparse.c b/src/mesa/program/nvvertparse.c
index e2afcfd4ce6..1ac83d0e59d 100644
--- a/src/mesa/program/nvvertparse.c
+++ b/src/mesa/program/nvvertparse.c
@@ -64,6 +64,7 @@ struct parse_state {
GLbitfield inputsRead;
GLbitfield outputsWritten;
GLboolean anyProgRegsWritten;
+ GLboolean indirectRegisterFiles;
GLuint numInst; /* number of instructions parsed */
};
@@ -410,6 +411,7 @@ Parse_ParamReg(struct parse_state *parseState, struct prog_src_register *srcReg)
srcReg->RelAddr = GL_TRUE;
srcReg->File = PROGRAM_ENV_PARAM;
+ parseState->indirectRegisterFiles |= (1 << srcReg->File);
/* Look for +/-N offset */
if (!Peek_Token(parseState, token))
RETURN_ERROR;
@@ -1308,6 +1310,7 @@ _mesa_parse_nv_vertex_program(GLcontext *ctx, GLenum dstTarget,
parseState.inputsRead = 0;
parseState.outputsWritten = 0;
parseState.anyProgRegsWritten = GL_FALSE;
+ parseState.indirectRegisterFiles = 0x0;
/* Reset error state */
_mesa_set_program_error(ctx, -1, NULL);
@@ -1408,6 +1411,8 @@ _mesa_parse_nv_vertex_program(GLcontext *ctx, GLenum dstTarget,
program->Base.Parameters = _mesa_new_parameter_list ();
program->Base.NumParameters = 0;
+ program->Base.IndirectRegisterFiles = parseState.indirectRegisterFiles;
+
state_tokens[0] = STATE_VERTEX_PROGRAM;
state_tokens[1] = STATE_ENV;
/* Add refs to all of the potential params, in order. If we want to not
diff --git a/src/mesa/program/nvvertparse.h b/src/mesa/program/nvvertparse.h
index 9919e22388d..91ef79e6c3c 100644
--- a/src/mesa/program/nvvertparse.h
+++ b/src/mesa/program/nvvertparse.h
@@ -29,6 +29,7 @@
#ifndef NVVERTPARSE_H
#define NVVERTPARSE_H
+#include "main/mtypes.h"
extern void
_mesa_parse_nv_vertex_program(GLcontext *ctx, GLenum target,
diff --git a/src/mesa/program/prog_cache.h b/src/mesa/program/prog_cache.h
index 4e1ccac03ff..bfe8f99d445 100644
--- a/src/mesa/program/prog_cache.h
+++ b/src/mesa/program/prog_cache.h
@@ -30,6 +30,9 @@
#define PROG_CACHE_H
+#include "main/mtypes.h"
+
+
/** Opaque type */
struct gl_program_cache;
diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c
index f85c6513f31..1670c91b6ad 100644
--- a/src/mesa/program/prog_execute.c
+++ b/src/mesa/program/prog_execute.c
@@ -37,7 +37,7 @@
#include "main/glheader.h"
#include "main/colormac.h"
-#include "main/context.h"
+#include "main/macros.h"
#include "prog_execute.h"
#include "prog_instruction.h"
#include "prog_parameter.h"
@@ -81,6 +81,22 @@ static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
/**
+ * Return TRUE for +0 and other positive values, FALSE otherwise.
+ * Used for RCC opcode.
+ */
+static INLINE GLboolean
+positive(float x)
+{
+ fi_type fi;
+ fi.f = x;
+ if (fi.i & 0x80000000)
+ return GL_FALSE;
+ return GL_TRUE;
+}
+
+
+
+/**
* Return a pointer to the 4-element float vector specified by the given
* source register.
*/
@@ -1340,6 +1356,44 @@ _mesa_execute_program(GLcontext * ctx,
store_vector4(inst, machine, result);
}
break;
+ case OPCODE_RCC: /* clamped riciprocal */
+ {
+ const float largest = 1.884467e+19, smallest = 5.42101e-20;
+ GLfloat a[4], r, result[4];
+ fetch_vector1(&inst->SrcReg[0], machine, a);
+ if (DEBUG_PROG) {
+ if (a[0] == 0)
+ printf("RCC(0)\n");
+ else if (IS_INF_OR_NAN(a[0]))
+ printf("RCC(inf)\n");
+ }
+ if (a[0] == 1.0F) {
+ r = 1.0F;
+ }
+ else {
+ r = 1.0F / a[0];
+ }
+ if (positive(r)) {
+ if (r > largest) {
+ r = largest;
+ }
+ else if (r < smallest) {
+ r = smallest;
+ }
+ }
+ else {
+ if (r < -largest) {
+ r = -largest;
+ }
+ else if (r > -smallest) {
+ r = -smallest;
+ }
+ }
+ result[0] = result[1] = result[2] = result[3] = r;
+ store_vector4(inst, machine, result);
+ }
+ break;
+
case OPCODE_RCP:
{
GLfloat a[4], result[4];
diff --git a/src/mesa/program/prog_execute.h b/src/mesa/program/prog_execute.h
index adefc5439de..f59b65176ff 100644
--- a/src/mesa/program/prog_execute.h
+++ b/src/mesa/program/prog_execute.h
@@ -26,6 +26,7 @@
#define PROG_EXECUTE_H
#include "main/config.h"
+#include "main/mtypes.h"
typedef void (*FetchTexelLodFunc)(GLcontext *ctx, const GLfloat texcoord[4],
diff --git a/src/mesa/program/prog_instruction.h b/src/mesa/program/prog_instruction.h
index dacbc33704b..098b366ab56 100644
--- a/src/mesa/program/prog_instruction.h
+++ b/src/mesa/program/prog_instruction.h
@@ -38,7 +38,7 @@
#define PROG_INSTRUCTION_H
-#include "main/mfeatures.h"
+#include "main/glheader.h"
/**
@@ -149,20 +149,20 @@ typedef enum prog_opcode {
OPCODE_ADD, /* X X X X X */
OPCODE_AND, /* */
OPCODE_ARA, /* 2 */
- OPCODE_ARL, /* X X */
+ OPCODE_ARL, /* X X X */
OPCODE_ARL_NV, /* 2 */
OPCODE_ARR, /* 2 */
OPCODE_BGNLOOP, /* opt */
OPCODE_BGNSUB, /* opt */
OPCODE_BRA, /* 2 X */
OPCODE_BRK, /* 2 opt */
- OPCODE_CAL, /* 2 2 */
- OPCODE_CMP, /* X */
+ OPCODE_CAL, /* 2 2 X */
+ OPCODE_CMP, /* X X */
OPCODE_CONT, /* opt */
OPCODE_COS, /* X 2 X X */
OPCODE_DDX, /* X X */
OPCODE_DDY, /* X X */
- OPCODE_DP2, /* 2 */
+ OPCODE_DP2, /* 2 X */
OPCODE_DP2A, /* 2 */
OPCODE_DP3, /* X X X X X */
OPCODE_DP4, /* X X X X X */
@@ -185,7 +185,7 @@ typedef enum prog_opcode {
OPCODE_LG2, /* X X 2 X X */
OPCODE_LIT, /* X X X X */
OPCODE_LOG, /* X X X */
- OPCODE_LRP, /* X X */
+ OPCODE_LRP, /* X X X */
OPCODE_MAD, /* X X X X X */
OPCODE_MAX, /* X X X X X */
OPCODE_MIN, /* X X X X X */
@@ -196,8 +196,8 @@ typedef enum prog_opcode {
OPCODE_NOISE3, /* X */
OPCODE_NOISE4, /* X */
OPCODE_NOT, /* */
- OPCODE_NRM3, /* */
- OPCODE_NRM4, /* */
+ OPCODE_NRM3, /* X */
+ OPCODE_NRM4, /* X */
OPCODE_OR, /* */
OPCODE_PK2H, /* X */
OPCODE_PK2US, /* X */
@@ -209,7 +209,7 @@ typedef enum prog_opcode {
OPCODE_PUSHA, /* 3 */
OPCODE_RCC, /* 1.1 */
OPCODE_RCP, /* X X X X X */
- OPCODE_RET, /* 2 2 */
+ OPCODE_RET, /* 2 2 X */
OPCODE_RFL, /* X X */
OPCODE_RSQ, /* X X X X X */
OPCODE_SCS, /* X */
diff --git a/src/mesa/program/prog_noise.h b/src/mesa/program/prog_noise.h
index c4779479f9b..dd7986efcdb 100644
--- a/src/mesa/program/prog_noise.h
+++ b/src/mesa/program/prog_noise.h
@@ -25,6 +25,8 @@
#ifndef PROG_NOISE
#define PROG_NOISE
+#include "main/glheader.h"
+
extern GLfloat _mesa_noise1(GLfloat);
extern GLfloat _mesa_noise2(GLfloat, GLfloat);
extern GLfloat _mesa_noise3(GLfloat, GLfloat, GLfloat);
diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c
index 2941a17da3f..c78187c983d 100644
--- a/src/mesa/program/prog_optimize.c
+++ b/src/mesa/program/prog_optimize.c
@@ -38,40 +38,117 @@
static GLboolean dbg = GL_FALSE;
-/* Returns the mask of channels read from the given srcreg in this instruction.
+#define NO_MASK 0xf
+
+/**
+ * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
+ * are read from the given src in this instruction, We also provide
+ * one optional masks which may mask other components in the dst
+ * register
*/
static GLuint
-get_src_arg_mask(const struct prog_instruction *inst, int arg)
+get_src_arg_mask(const struct prog_instruction *inst,
+ GLuint arg, GLuint dst_mask)
{
- int writemask = inst->DstReg.WriteMask;
+ GLuint read_mask, channel_mask;
+ GLuint comp;
- if (inst->CondUpdate)
- writemask = WRITEMASK_XYZW;
+ ASSERT(arg < _mesa_num_inst_src_regs(inst->Opcode));
- switch (inst->Opcode) {
- case OPCODE_MOV:
- case OPCODE_ABS:
- case OPCODE_ADD:
- case OPCODE_MUL:
- case OPCODE_SUB:
- return writemask;
- case OPCODE_RCP:
- case OPCODE_SIN:
- case OPCODE_COS:
- case OPCODE_RSQ:
- case OPCODE_POW:
- case OPCODE_EX2:
- return WRITEMASK_X;
- case OPCODE_DP2:
- return WRITEMASK_XY;
- case OPCODE_DP3:
- case OPCODE_XPD:
- return WRITEMASK_XYZ;
- default:
- return WRITEMASK_XYZW;
+ /* Form the dst register, find the written channels */
+ if (inst->CondUpdate) {
+ channel_mask = WRITEMASK_XYZW;
+ }
+ else {
+ switch (inst->Opcode) {
+ case OPCODE_MOV:
+ case OPCODE_MIN:
+ case OPCODE_MAX:
+ case OPCODE_ABS:
+ case OPCODE_ADD:
+ case OPCODE_MAD:
+ case OPCODE_MUL:
+ case OPCODE_SUB:
+ channel_mask = inst->DstReg.WriteMask & dst_mask;
+ break;
+ case OPCODE_RCP:
+ case OPCODE_SIN:
+ case OPCODE_COS:
+ case OPCODE_RSQ:
+ case OPCODE_POW:
+ case OPCODE_EX2:
+ case OPCODE_LOG:
+ channel_mask = WRITEMASK_X;
+ break;
+ case OPCODE_DP2:
+ channel_mask = WRITEMASK_XY;
+ break;
+ case OPCODE_DP3:
+ case OPCODE_XPD:
+ channel_mask = WRITEMASK_XYZ;
+ break;
+ default:
+ channel_mask = WRITEMASK_XYZW;
+ break;
+ }
}
+
+ /* Now, given the src swizzle and the written channels, find which
+ * components are actually read
+ */
+ read_mask = 0x0;
+ for (comp = 0; comp < 4; ++comp) {
+ const GLuint coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp);
+ ASSERT(coord < 4);
+ if (channel_mask & (1 << comp) && coord <= SWIZZLE_W)
+ read_mask |= 1 << coord;
+ }
+
+ return read_mask;
+}
+
+
+/**
+ * For a MOV instruction, compute a write mask when src register also has
+ * a mask
+ */
+static GLuint
+get_dst_mask_for_mov(const struct prog_instruction *mov, GLuint src_mask)
+{
+ const GLuint mask = mov->DstReg.WriteMask;
+ GLuint comp;
+ GLuint updated_mask = 0x0;
+
+ ASSERT(mov->Opcode == OPCODE_MOV);
+
+ for (comp = 0; comp < 4; ++comp) {
+ GLuint src_comp;
+ if ((mask & (1 << comp)) == 0)
+ continue;
+ src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, comp);
+ if ((src_mask & (1 << src_comp)) == 0)
+ continue;
+ updated_mask |= 1 << comp;
+ }
+
+ return updated_mask;
+}
+
+
+/**
+ * Ensure that the swizzle is regular. That is, all of the swizzle
+ * terms are SWIZZLE_X,Y,Z,W and not SWIZZLE_ZERO or SWIZZLE_ONE.
+ */
+static GLboolean
+is_swizzle_regular(GLuint swz)
+{
+ return GET_SWZ(swz,0) <= SWIZZLE_W &&
+ GET_SWZ(swz,1) <= SWIZZLE_W &&
+ GET_SWZ(swz,2) <= SWIZZLE_W &&
+ GET_SWZ(swz,3) <= SWIZZLE_W;
}
+
/**
* In 'prog' remove instruction[i] if removeFlags[i] == TRUE.
* \return number of instructions removed
@@ -148,82 +225,13 @@ replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[])
/**
- * Consolidate temporary registers to use low numbers. For example, if the
- * shader only uses temps 4, 5, 8, replace them with 0, 1, 2.
- */
-static void
-_mesa_consolidate_registers(struct gl_program *prog)
-{
- GLboolean tempUsed[MAX_PROGRAM_TEMPS];
- GLint tempMap[MAX_PROGRAM_TEMPS];
- GLuint tempMax = 0, i;
-
- if (dbg) {
- printf("Optimize: Begin register consolidation\n");
- }
-
- memset(tempUsed, 0, sizeof(tempUsed));
-
- for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
- tempMap[i] = -1;
- }
-
- /* set tempUsed[i] if temporary [i] is referenced */
- for (i = 0; i < prog->NumInstructions; i++) {
- const struct prog_instruction *inst = prog->Instructions + i;
- const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
- GLuint j;
- for (j = 0; j < numSrc; j++) {
- if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
- const GLuint index = inst->SrcReg[j].Index;
- ASSERT(index < MAX_PROGRAM_TEMPS);
- tempUsed[index] = GL_TRUE;
- tempMax = MAX2(tempMax, index);
- break;
- }
- }
- if (inst->DstReg.File == PROGRAM_TEMPORARY) {
- const GLuint index = inst->DstReg.Index;
- ASSERT(index < MAX_PROGRAM_TEMPS);
- tempUsed[index] = GL_TRUE;
- tempMax = MAX2(tempMax, index);
- }
- }
-
- /* allocate a new index for each temp that's used */
- {
- GLuint freeTemp = 0;
- for (i = 0; i <= tempMax; i++) {
- if (tempUsed[i]) {
- tempMap[i] = freeTemp++;
- /*printf("replace %u with %u\n", i, tempMap[i]);*/
- }
- }
- if (freeTemp == tempMax + 1) {
- /* no consolidation possible */
- return;
- }
- if (dbg) {
- printf("Replace regs 0..%u with 0..%u\n", tempMax, freeTemp-1);
- }
- }
-
- replace_regs(prog, PROGRAM_TEMPORARY, tempMap);
-
- if (dbg) {
- printf("Optimize: End register consolidation\n");
- }
-}
-
-
-/**
* Remove dead instructions from the given program.
* This is very primitive for now. Basically look for temp registers
* that are written to but never read. Remove any instructions that
* write to such registers. Be careful with condition code setters.
*/
-static void
-_mesa_remove_dead_code(struct gl_program *prog)
+static GLboolean
+_mesa_remove_dead_code_global(struct gl_program *prog)
{
GLboolean tempRead[MAX_PROGRAM_TEMPS][4];
GLboolean *removeInst; /* per-instruction removal flag */
@@ -251,7 +259,7 @@ _mesa_remove_dead_code(struct gl_program *prog)
const GLuint index = inst->SrcReg[j].Index;
GLuint read_mask;
ASSERT(index < MAX_PROGRAM_TEMPS);
- read_mask = get_src_arg_mask(inst, j);
+ read_mask = get_src_arg_mask(inst, j, NO_MASK);
if (inst->SrcReg[j].RelAddr) {
if (dbg)
@@ -260,25 +268,12 @@ _mesa_remove_dead_code(struct gl_program *prog)
}
for (comp = 0; comp < 4; comp++) {
- GLuint swz = (inst->SrcReg[j].Swizzle >> (3 * comp)) & 0x7;
-
- if ((read_mask & (1 << comp)) == 0)
+ const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp);
+ ASSERT(swz < 4);
+ if ((read_mask & (1 << swz)) == 0)
continue;
-
- switch (swz) {
- case SWIZZLE_X:
- tempRead[index][0] = GL_TRUE;
- break;
- case SWIZZLE_Y:
- tempRead[index][1] = GL_TRUE;
- break;
- case SWIZZLE_Z:
- tempRead[index][2] = GL_TRUE;
- break;
- case SWIZZLE_W:
- tempRead[index][3] = GL_TRUE;
- break;
- }
+ if (swz <= SWIZZLE_W)
+ tempRead[index][swz] = GL_TRUE;
}
}
}
@@ -348,10 +343,11 @@ _mesa_remove_dead_code(struct gl_program *prog)
done:
free(removeInst);
+ return rem != 0;
}
-enum temp_use
+enum inst_use
{
READ,
WRITE,
@@ -359,13 +355,19 @@ enum temp_use
END
};
+
/**
- * Scan forward in program from 'start' for the next occurance of TEMP[index].
+ * Scan forward in program from 'start' for the next occurances of TEMP[index].
+ * We look if an instruction reads the component given by the masks and if they
+ * are overwritten.
* Return READ, WRITE, FLOW or END to indicate the next usage or an indicator
* that we can't look further.
*/
-static enum temp_use
-find_next_temp_use(const struct gl_program *prog, GLuint start, GLuint index)
+static enum inst_use
+find_next_use(const struct gl_program *prog,
+ GLuint start,
+ GLuint index,
+ GLuint mask)
{
GLuint i;
@@ -373,30 +375,50 @@ find_next_temp_use(const struct gl_program *prog, GLuint start, GLuint index)
const struct prog_instruction *inst = prog->Instructions + i;
switch (inst->Opcode) {
case OPCODE_BGNLOOP:
- case OPCODE_ENDLOOP:
case OPCODE_BGNSUB:
+ case OPCODE_BRA:
+ case OPCODE_CAL:
+ case OPCODE_CONT:
+ case OPCODE_IF:
+ case OPCODE_ELSE:
+ case OPCODE_ENDIF:
+ case OPCODE_ENDLOOP:
case OPCODE_ENDSUB:
+ case OPCODE_RET:
return FLOW;
+ case OPCODE_END:
+ return END;
default:
{
const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
GLuint j;
for (j = 0; j < numSrc; j++) {
- if (inst->SrcReg[j].File == PROGRAM_TEMPORARY &&
- inst->SrcReg[j].Index == index)
+ if (inst->SrcReg[j].RelAddr ||
+ (inst->SrcReg[j].File == PROGRAM_TEMPORARY &&
+ inst->SrcReg[j].Index == index &&
+ (get_src_arg_mask(inst,j,NO_MASK) & mask)))
return READ;
}
- if (inst->DstReg.File == PROGRAM_TEMPORARY &&
- inst->DstReg.Index == index)
- return WRITE;
+ if (_mesa_num_inst_dst_regs(inst->Opcode) == 1 &&
+ inst->DstReg.File == PROGRAM_TEMPORARY &&
+ inst->DstReg.Index == index) {
+ mask &= ~inst->DstReg.WriteMask;
+ if (mask == 0)
+ return WRITE;
+ }
}
}
}
-
return END;
}
-static GLboolean _mesa_is_flow_control_opcode(enum prog_opcode opcode)
+
+/**
+ * Is the given instruction opcode a flow-control opcode?
+ * XXX maybe move this into prog_instruction.[ch]
+ */
+static GLboolean
+_mesa_is_flow_control_opcode(enum prog_opcode opcode)
{
switch (opcode) {
case OPCODE_BGNLOOP:
@@ -417,6 +439,37 @@ static GLboolean _mesa_is_flow_control_opcode(enum prog_opcode opcode)
}
}
+
+/**
+ * Test if the given instruction is a simple MOV (no conditional updating,
+ * not relative addressing, no negation/abs, etc).
+ */
+static GLboolean
+can_downward_mov_be_modifed(const struct prog_instruction *mov)
+{
+ return
+ mov->Opcode == OPCODE_MOV &&
+ mov->CondUpdate == GL_FALSE &&
+ mov->SrcReg[0].RelAddr == 0 &&
+ mov->SrcReg[0].Negate == 0 &&
+ mov->SrcReg[0].Abs == 0 &&
+ mov->SrcReg[0].HasIndex2 == 0 &&
+ mov->SrcReg[0].RelAddr2 == 0 &&
+ mov->DstReg.RelAddr == 0 &&
+ mov->DstReg.CondMask == COND_TR &&
+ mov->SaturateMode == SATURATE_OFF;
+}
+
+
+static GLboolean
+can_upward_mov_be_modifed(const struct prog_instruction *mov)
+{
+ return
+ can_downward_mov_be_modifed(mov) &&
+ mov->DstReg.File == PROGRAM_TEMPORARY;
+}
+
+
/**
* Try to remove use of extraneous MOV instructions, to free them up for dead
* code removal.
@@ -444,14 +497,15 @@ _mesa_remove_extra_move_use(struct gl_program *prog)
for (i = 0; i + 1 < prog->NumInstructions; i++) {
const struct prog_instruction *mov = prog->Instructions + i;
+ GLuint dst_mask, src_mask;
+ if (can_upward_mov_be_modifed(mov) == GL_FALSE)
+ continue;
- if (mov->Opcode != OPCODE_MOV ||
- mov->DstReg.File != PROGRAM_TEMPORARY ||
- mov->DstReg.RelAddr ||
- mov->DstReg.CondMask != COND_TR ||
- mov->SaturateMode != SATURATE_OFF ||
- mov->SrcReg[0].RelAddr)
- continue;
+ /* Scanning the code, we maintain the components which are still active in
+ * these two masks
+ */
+ dst_mask = mov->DstReg.WriteMask;
+ src_mask = get_src_arg_mask(mov, 0, NO_MASK);
/* Walk through remaining instructions until the or src reg gets
* rewritten or we get into some flow-control, eliminating the use of
@@ -459,61 +513,60 @@ _mesa_remove_extra_move_use(struct gl_program *prog)
*/
for (j = i + 1; j < prog->NumInstructions; j++) {
struct prog_instruction *inst2 = prog->Instructions + j;
- GLuint arg;
+ GLuint arg;
if (_mesa_is_flow_control_opcode(inst2->Opcode))
break;
/* First rewrite this instruction's args if appropriate. */
for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) {
- int comp;
- int read_mask = get_src_arg_mask(inst2, arg);
+ GLuint comp, read_mask;
if (inst2->SrcReg[arg].File != mov->DstReg.File ||
inst2->SrcReg[arg].Index != mov->DstReg.Index ||
inst2->SrcReg[arg].RelAddr ||
inst2->SrcReg[arg].Abs)
continue;
+ read_mask = get_src_arg_mask(inst2, arg, NO_MASK);
- /* Check that all the sources for this arg of inst2 come from inst1
- * or constants.
- */
- for (comp = 0; comp < 4; comp++) {
- int src_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
-
- /* If the MOV didn't write that channel, can't use it. */
- if ((read_mask & (1 << comp)) &&
- src_swz <= SWIZZLE_W &&
- (mov->DstReg.WriteMask & (1 << src_swz)) == 0)
- break;
- }
- if (comp != 4)
- continue;
-
- /* Adjust the swizzles of inst2 to point at MOV's source */
- for (comp = 0; comp < 4; comp++) {
- int inst2_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
-
- if (inst2_swz <= SWIZZLE_W) {
- GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
- inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
- inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
- inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
- inst2_swz) & 0x1) << comp);
- }
- }
- inst2->SrcReg[arg].File = mov->SrcReg[0].File;
- inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
+ /* Adjust the swizzles of inst2 to point at MOV's source if ALL the
+ * components read still come from the mov instructions
+ */
+ if (is_swizzle_regular(inst2->SrcReg[arg].Swizzle) &&
+ (read_mask & dst_mask) == read_mask) {
+ for (comp = 0; comp < 4; comp++) {
+ const GLuint inst2_swz =
+ GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
+ const GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
+ inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
+ inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
+ inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
+ inst2_swz) & 0x1) << comp);
+ }
+ inst2->SrcReg[arg].File = mov->SrcReg[0].File;
+ inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
+ }
}
- /* If this instruction overwrote part of the move, our time is up. */
- if ((inst2->DstReg.File == mov->DstReg.File &&
- (inst2->DstReg.RelAddr ||
- inst2->DstReg.Index == mov->DstReg.Index)) ||
- (inst2->DstReg.File == mov->SrcReg[0].File &&
- (inst2->DstReg.RelAddr ||
- inst2->DstReg.Index == mov->SrcReg[0].Index)))
- break;
+ /* The source of MOV is written. This potentially deactivates some
+ * components from the src and dst of the MOV instruction
+ */
+ if (inst2->DstReg.File == mov->DstReg.File &&
+ (inst2->DstReg.RelAddr ||
+ inst2->DstReg.Index == mov->DstReg.Index)) {
+ dst_mask &= ~inst2->DstReg.WriteMask;
+ src_mask = get_src_arg_mask(mov, 0, dst_mask);
+ }
+
+ /* Idem when the destination of mov is written */
+ if (inst2->DstReg.File == mov->SrcReg[0].File &&
+ (inst2->DstReg.RelAddr ||
+ inst2->DstReg.Index == mov->SrcReg[0].Index)) {
+ src_mask &= ~inst2->DstReg.WriteMask;
+ dst_mask &= get_dst_mask_for_mov(mov, src_mask);
+ }
+ if (dst_mask == 0)
+ break;
}
}
@@ -523,14 +576,151 @@ _mesa_remove_extra_move_use(struct gl_program *prog)
}
}
+
+/**
+ * Complements dead_code_global. Try to remove code in block of code by
+ * carefully monitoring the swizzles. Both functions should be merged into one
+ * with a proper control flow graph
+ */
+static GLboolean
+_mesa_remove_dead_code_local(struct gl_program *prog)
+{
+ GLboolean *removeInst;
+ GLuint i, arg, rem = 0;
+
+ removeInst = (GLboolean *)
+ calloc(1, prog->NumInstructions * sizeof(GLboolean));
+
+ for (i = 0; i < prog->NumInstructions; i++) {
+ const struct prog_instruction *inst = prog->Instructions + i;
+ const GLuint index = inst->DstReg.Index;
+ const GLuint mask = inst->DstReg.WriteMask;
+ enum inst_use use;
+
+ /* We must deactivate the pass as soon as some indirection is used */
+ if (inst->DstReg.RelAddr)
+ goto done;
+ for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++)
+ if (inst->SrcReg[arg].RelAddr)
+ goto done;
+
+ if (_mesa_is_flow_control_opcode(inst->Opcode) ||
+ _mesa_num_inst_dst_regs(inst->Opcode) == 0 ||
+ inst->DstReg.File != PROGRAM_TEMPORARY ||
+ inst->DstReg.RelAddr)
+ continue;
+
+ use = find_next_use(prog, i+1, index, mask);
+ if (use == WRITE || use == END)
+ removeInst[i] = GL_TRUE;
+ }
+
+ rem = remove_instructions(prog, removeInst);
+
+done:
+ free(removeInst);
+ return rem != 0;
+}
+
+
+/**
+ * Try to inject the destination of mov as the destination of inst and recompute
+ * the swizzles operators for the sources of inst if required. Return GL_TRUE
+ * of the substitution was possible, GL_FALSE otherwise
+ */
+static GLboolean
+_mesa_merge_mov_into_inst(struct prog_instruction *inst,
+ const struct prog_instruction *mov)
+{
+ /* Indirection table which associates destination and source components for
+ * the mov instruction
+ */
+ const GLuint mask = get_src_arg_mask(mov, 0, NO_MASK);
+
+ /* Some components are not written by inst. We cannot remove the mov */
+ if (mask != (inst->DstReg.WriteMask & mask))
+ return GL_FALSE;
+
+ /* Depending on the instruction, we may need to recompute the swizzles.
+ * Also, some other instructions (like TEX) are not linear. We will only
+ * consider completely active sources and destinations
+ */
+ switch (inst->Opcode) {
+
+ /* Carstesian instructions: we compute the swizzle */
+ case OPCODE_MOV:
+ case OPCODE_MIN:
+ case OPCODE_MAX:
+ case OPCODE_ABS:
+ case OPCODE_ADD:
+ case OPCODE_MAD:
+ case OPCODE_MUL:
+ case OPCODE_SUB:
+ {
+ GLuint dst_to_src_comp[4] = {0,0,0,0};
+ GLuint dst_comp, arg;
+ for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
+ if (mov->DstReg.WriteMask & (1 << dst_comp)) {
+ const GLuint src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp);
+ ASSERT(src_comp < 4);
+ dst_to_src_comp[dst_comp] = src_comp;
+ }
+ }
+
+ /* Patch each source of the instruction */
+ for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) {
+ const GLuint arg_swz = inst->SrcReg[arg].Swizzle;
+ inst->SrcReg[arg].Swizzle = 0;
+
+ /* Reset each active component of the swizzle */
+ for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
+ GLuint src_comp, arg_comp;
+ if ((mov->DstReg.WriteMask & (1 << dst_comp)) == 0)
+ continue;
+ src_comp = dst_to_src_comp[dst_comp];
+ ASSERT(src_comp < 4);
+ arg_comp = GET_SWZ(arg_swz, src_comp);
+ ASSERT(arg_comp < 4);
+ inst->SrcReg[arg].Swizzle |= arg_comp << (3*dst_comp);
+ }
+ }
+ inst->DstReg = mov->DstReg;
+ return GL_TRUE;
+ }
+
+ /* Dot products and scalar instructions: we only change the destination */
+ case OPCODE_RCP:
+ case OPCODE_SIN:
+ case OPCODE_COS:
+ case OPCODE_RSQ:
+ case OPCODE_POW:
+ case OPCODE_EX2:
+ case OPCODE_LOG:
+ case OPCODE_DP2:
+ case OPCODE_DP3:
+ case OPCODE_DP4:
+ inst->DstReg = mov->DstReg;
+ return GL_TRUE;
+
+ /* All other instructions require fully active components with no swizzle */
+ default:
+ if (mov->SrcReg[0].Swizzle != SWIZZLE_XYZW ||
+ inst->DstReg.WriteMask != WRITEMASK_XYZW)
+ return GL_FALSE;
+ inst->DstReg = mov->DstReg;
+ return GL_TRUE;
+ }
+}
+
+
/**
* Try to remove extraneous MOV instructions from the given program.
*/
-static void
+static GLboolean
_mesa_remove_extra_moves(struct gl_program *prog)
{
GLboolean *removeInst; /* per-instruction removal flag */
- GLuint i, rem, loopNesting = 0, subroutineNesting = 0;
+ GLuint i, rem = 0, nesting = 0;
if (dbg) {
printf("Optimize: Begin remove extra moves\n");
@@ -549,29 +739,24 @@ _mesa_remove_extra_moves(struct gl_program *prog)
*/
for (i = 0; i < prog->NumInstructions; i++) {
- const struct prog_instruction *inst = prog->Instructions + i;
+ const struct prog_instruction *mov = prog->Instructions + i;
- switch (inst->Opcode) {
+ switch (mov->Opcode) {
case OPCODE_BGNLOOP:
- loopNesting++;
- break;
- case OPCODE_ENDLOOP:
- loopNesting--;
- break;
case OPCODE_BGNSUB:
- subroutineNesting++;
+ case OPCODE_IF:
+ nesting++;
break;
+ case OPCODE_ENDLOOP:
case OPCODE_ENDSUB:
- subroutineNesting--;
+ case OPCODE_ENDIF:
+ nesting--;
break;
case OPCODE_MOV:
- if (i > 0 &&
- loopNesting == 0 &&
- subroutineNesting == 0 &&
- inst->SrcReg[0].File == PROGRAM_TEMPORARY &&
- inst->SrcReg[0].Swizzle == SWIZZLE_XYZW) {
+ if (i > 0 && can_downward_mov_be_modifed(mov) && nesting == 0) {
+
/* see if this MOV can be removed */
- const GLuint tempIndex = inst->SrcReg[0].Index;
+ const GLuint id = mov->SrcReg[0].Index;
struct prog_instruction *prevInst;
GLuint prevI;
@@ -582,11 +767,13 @@ _mesa_remove_extra_moves(struct gl_program *prog)
prevInst = prog->Instructions + prevI;
if (prevInst->DstReg.File == PROGRAM_TEMPORARY &&
- prevInst->DstReg.Index == tempIndex &&
- prevInst->DstReg.WriteMask == WRITEMASK_XYZW) {
+ prevInst->DstReg.Index == id &&
+ prevInst->DstReg.RelAddr == 0 &&
+ prevInst->DstReg.CondSrc == 0 &&
+ prevInst->DstReg.CondMask == COND_TR) {
- enum temp_use next_use =
- find_next_temp_use(prog, i + 1, tempIndex);
+ const GLuint dst_mask = prevInst->DstReg.WriteMask;
+ enum inst_use next_use = find_next_use(prog, i+1, id, dst_mask);
if (next_use == WRITE || next_use == END) {
/* OK, we can safely remove this MOV instruction.
@@ -596,18 +783,13 @@ _mesa_remove_extra_moves(struct gl_program *prog)
* Into:
* prevI: FOO z, x, y;
*/
-
- /* patch up prev inst */
- prevInst->DstReg.File = inst->DstReg.File;
- prevInst->DstReg.Index = inst->DstReg.Index;
-
- /* flag this instruction for removal */
- removeInst[i] = GL_TRUE;
-
- if (dbg) {
- printf("Remove MOV at %u\n", i);
- printf("new prev inst %u: ", prevI);
- _mesa_print_instruction(prevInst);
+ if (_mesa_merge_mov_into_inst(prevInst, mov)) {
+ removeInst[i] = GL_TRUE;
+ if (dbg) {
+ printf("Remove MOV at %u\n", i);
+ printf("new prev inst %u: ", prevI);
+ _mesa_print_instruction(prevInst);
+ }
}
}
}
@@ -627,6 +809,8 @@ _mesa_remove_extra_moves(struct gl_program *prog)
printf("Optimize: End remove extra moves. %u instructions removed\n", rem);
/*_mesa_print_program(prog);*/
}
+
+ return rem != 0;
}
@@ -713,6 +897,7 @@ compare_start(const void *a, const void *b)
return 0;
}
+
/** sort the interval list according to interval starts */
static void
sort_interval_list_by_start(struct interval_list *list)
@@ -1013,6 +1198,17 @@ _mesa_reallocate_registers(struct gl_program *prog)
}
+#if 0
+static void
+print_it(GLcontext *ctx, struct gl_program *program, const char *txt) {
+ fprintf(stderr, "%s (%u inst):\n", txt, program->NumInstructions);
+ _mesa_print_program(program);
+ _mesa_print_program_parameters(ctx, program);
+ fprintf(stderr, "\n\n");
+}
+#endif
+
+
/**
* Apply optimizations to the given program to eliminate unnecessary
* instructions, temp regs, etc.
@@ -1020,16 +1216,19 @@ _mesa_reallocate_registers(struct gl_program *prog)
void
_mesa_optimize_program(GLcontext *ctx, struct gl_program *program)
{
- _mesa_remove_extra_move_use(program);
-
- if (1)
- _mesa_remove_dead_code(program);
-
- if (0) /* not tested much yet */
- _mesa_remove_extra_moves(program);
-
- if (0)
- _mesa_consolidate_registers(program);
- else
+ GLboolean any_change;
+
+ /* Stop when no modifications were output */
+ do {
+ any_change = GL_FALSE;
+ _mesa_remove_extra_move_use(program);
+ if (_mesa_remove_dead_code_global(program))
+ any_change = GL_TRUE;
+ if (_mesa_remove_extra_moves(program))
+ any_change = GL_TRUE;
+ if (_mesa_remove_dead_code_local(program))
+ any_change = GL_TRUE;
_mesa_reallocate_registers(program);
+ } while (any_change);
}
+
diff --git a/src/mesa/program/prog_optimize.h b/src/mesa/program/prog_optimize.h
index 43894a27237..06cd9cb2c20 100644
--- a/src/mesa/program/prog_optimize.h
+++ b/src/mesa/program/prog_optimize.h
@@ -27,6 +27,7 @@
#include "main/config.h"
+#include "main/mtypes.h"
struct gl_program;
diff --git a/src/mesa/program/prog_parameter_layout.c b/src/mesa/program/prog_parameter_layout.c
index a8885738321..d7dc97edbfb 100644
--- a/src/mesa/program/prog_parameter_layout.c
+++ b/src/mesa/program/prog_parameter_layout.c
@@ -28,6 +28,7 @@
* \author Ian Romanick <[email protected]>
*/
+#include "main/compiler.h"
#include "main/mtypes.h"
#include "prog_parameter.h"
#include "prog_parameter_layout.h"
diff --git a/src/mesa/program/prog_print.c b/src/mesa/program/prog_print.c
index 6ab199aa02b..6056c459e4c 100644
--- a/src/mesa/program/prog_print.c
+++ b/src/mesa/program/prog_print.c
@@ -924,6 +924,8 @@ _mesa_fprint_program_parameters(FILE *f,
fprintf(f, "NumParameters=%d\n", prog->NumParameters);
fprintf(f, "NumAttributes=%d\n", prog->NumAttributes);
fprintf(f, "NumAddressRegs=%d\n", prog->NumAddressRegs);
+ fprintf(f, "IndirectRegisterFiles: 0x%x (0b%s)\n",
+ prog->IndirectRegisterFiles, binary(prog->IndirectRegisterFiles));
fprintf(f, "SamplersUsed: 0x%x (0b%s)\n",
prog->SamplersUsed, binary(prog->SamplersUsed));
fprintf(f, "Samplers=[ ");
diff --git a/src/mesa/program/prog_print.h b/src/mesa/program/prog_print.h
index 9ab74560169..4667373f379 100644
--- a/src/mesa/program/prog_print.h
+++ b/src/mesa/program/prog_print.h
@@ -26,6 +26,16 @@
#ifndef PROG_PRINT_H
#define PROG_PRINT_H
+#include <stdio.h>
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+
+struct gl_program;
+struct gl_program_parameter_list;
+struct gl_shader;
+struct prog_instruction;
+
/**
* The output style to use when printing programs.
diff --git a/src/mesa/program/prog_uniform.h b/src/mesa/program/prog_uniform.h
index a671d30bfe8..7988d534a7d 100644
--- a/src/mesa/program/prog_uniform.h
+++ b/src/mesa/program/prog_uniform.h
@@ -31,8 +31,7 @@
#ifndef PROG_UNIFORM_H
#define PROG_UNIFORM_H
-#include "main/mtypes.h"
-#include "prog_statevars.h"
+#include "main/glheader.h"
/**
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index cf46095ce84..3b6d6827446 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -55,13 +55,21 @@ _mesa_init_program(GLcontext *ctx)
/*
* If this assertion fails, we need to increase the field
- * size for register indexes.
+ * size for register indexes (see INST_INDEX_BITS).
*/
ASSERT(ctx->Const.VertexProgram.MaxUniformComponents / 4
<= (1 << INST_INDEX_BITS));
ASSERT(ctx->Const.FragmentProgram.MaxUniformComponents / 4
<= (1 << INST_INDEX_BITS));
+ ASSERT(ctx->Const.VertexProgram.MaxTemps <= (1 << INST_INDEX_BITS));
+ ASSERT(ctx->Const.VertexProgram.MaxLocalParams <= (1 << INST_INDEX_BITS));
+ ASSERT(ctx->Const.FragmentProgram.MaxTemps <= (1 << INST_INDEX_BITS));
+ ASSERT(ctx->Const.FragmentProgram.MaxLocalParams <= (1 << INST_INDEX_BITS));
+
+ ASSERT(ctx->Const.VertexProgram.MaxUniformComponents <= 4 * MAX_UNIFORMS);
+ ASSERT(ctx->Const.FragmentProgram.MaxUniformComponents <= 4 * MAX_UNIFORMS);
+
/* If this fails, increase prog_instruction::TexSrcUnit size */
ASSERT(MAX_TEXTURE_UNITS < (1 << 5));
@@ -512,6 +520,7 @@ _mesa_clone_program(GLcontext *ctx, const struct gl_program *prog)
if (prog->Attributes)
clone->Attributes = _mesa_clone_parameter_list(prog->Attributes);
memcpy(clone->LocalParams, prog->LocalParams, sizeof(clone->LocalParams));
+ clone->IndirectRegisterFiles = prog->IndirectRegisterFiles;
clone->NumInstructions = prog->NumInstructions;
clone->NumTemporaries = prog->NumTemporaries;
clone->NumParameters = prog->NumParameters;
diff --git a/src/mesa/program/program_parse.tab.c b/src/mesa/program/program_parse.tab.c
index 6421d1f58aa..31a609600b7 100644
--- a/src/mesa/program/program_parse.tab.c
+++ b/src/mesa/program/program_parse.tab.c
@@ -798,29 +798,29 @@ static const yytype_uint16 yyrline[] =
415, 459, 464, 474, 518, 524, 525, 526, 527, 528,
529, 530, 531, 532, 533, 534, 535, 538, 550, 558,
575, 582, 601, 612, 632, 657, 664, 697, 704, 719,
- 774, 817, 826, 847, 857, 861, 890, 909, 909, 911,
- 918, 930, 931, 932, 935, 949, 963, 983, 994, 1006,
- 1008, 1009, 1010, 1011, 1014, 1014, 1014, 1014, 1015, 1018,
- 1022, 1027, 1034, 1041, 1048, 1071, 1094, 1095, 1096, 1097,
- 1098, 1099, 1102, 1121, 1125, 1131, 1135, 1139, 1143, 1152,
- 1161, 1165, 1170, 1176, 1187, 1187, 1188, 1190, 1194, 1198,
- 1202, 1208, 1208, 1210, 1228, 1254, 1257, 1268, 1274, 1280,
- 1281, 1288, 1294, 1300, 1308, 1314, 1320, 1328, 1334, 1340,
- 1348, 1349, 1352, 1353, 1354, 1355, 1356, 1357, 1358, 1359,
- 1360, 1361, 1362, 1365, 1374, 1378, 1382, 1388, 1397, 1401,
- 1405, 1414, 1418, 1424, 1430, 1437, 1442, 1450, 1460, 1462,
- 1470, 1476, 1480, 1484, 1490, 1501, 1510, 1514, 1519, 1523,
- 1527, 1531, 1537, 1544, 1548, 1554, 1562, 1573, 1580, 1584,
- 1590, 1600, 1611, 1615, 1633, 1642, 1645, 1651, 1655, 1659,
- 1665, 1676, 1681, 1686, 1691, 1696, 1701, 1709, 1712, 1717,
- 1730, 1738, 1749, 1757, 1757, 1759, 1759, 1761, 1771, 1776,
- 1783, 1793, 1802, 1807, 1814, 1824, 1834, 1846, 1846, 1847,
- 1847, 1849, 1859, 1867, 1877, 1885, 1893, 1902, 1913, 1917,
- 1923, 1924, 1925, 1928, 1928, 1931, 1966, 1970, 1970, 1973,
- 1980, 1989, 2003, 2012, 2021, 2025, 2034, 2043, 2054, 2061,
- 2066, 2075, 2087, 2090, 2099, 2110, 2111, 2112, 2115, 2116,
- 2117, 2120, 2121, 2124, 2125, 2128, 2129, 2132, 2143, 2154,
- 2165, 2191, 2192
+ 774, 817, 826, 848, 858, 862, 891, 910, 910, 912,
+ 919, 931, 932, 933, 936, 950, 964, 984, 995, 1007,
+ 1009, 1010, 1011, 1012, 1015, 1015, 1015, 1015, 1016, 1019,
+ 1023, 1028, 1035, 1042, 1049, 1072, 1095, 1096, 1097, 1098,
+ 1099, 1100, 1103, 1122, 1126, 1132, 1136, 1140, 1144, 1153,
+ 1162, 1166, 1171, 1177, 1188, 1188, 1189, 1191, 1195, 1199,
+ 1203, 1209, 1209, 1211, 1229, 1255, 1258, 1269, 1275, 1281,
+ 1282, 1289, 1295, 1301, 1309, 1315, 1321, 1329, 1335, 1341,
+ 1349, 1350, 1353, 1354, 1355, 1356, 1357, 1358, 1359, 1360,
+ 1361, 1362, 1363, 1366, 1375, 1379, 1383, 1389, 1398, 1402,
+ 1406, 1415, 1419, 1425, 1431, 1438, 1443, 1451, 1461, 1463,
+ 1471, 1477, 1481, 1485, 1491, 1502, 1511, 1515, 1520, 1524,
+ 1528, 1532, 1538, 1545, 1549, 1555, 1563, 1574, 1581, 1585,
+ 1591, 1601, 1612, 1616, 1634, 1643, 1646, 1652, 1656, 1660,
+ 1666, 1677, 1682, 1687, 1692, 1697, 1702, 1710, 1713, 1718,
+ 1731, 1739, 1750, 1758, 1758, 1760, 1760, 1762, 1772, 1777,
+ 1784, 1794, 1803, 1808, 1815, 1825, 1835, 1847, 1847, 1848,
+ 1848, 1850, 1860, 1868, 1878, 1886, 1894, 1903, 1914, 1918,
+ 1924, 1925, 1926, 1929, 1929, 1932, 1967, 1971, 1971, 1974,
+ 1981, 1990, 2004, 2013, 2022, 2026, 2035, 2044, 2055, 2062,
+ 2067, 2076, 2088, 2091, 2100, 2111, 2112, 2113, 2116, 2117,
+ 2118, 2121, 2122, 2125, 2126, 2129, 2130, 2133, 2144, 2155,
+ 2166, 2192, 2193
};
#endif
@@ -2844,6 +2844,7 @@ yyreduce:
(yyval.src_reg).Base.File = (yyvsp[(1) - (4)].sym)->param_binding_type;
if ((yyvsp[(3) - (4)].src_reg).Base.RelAddr) {
+ state->prog->IndirectRegisterFiles |= (1 << (yyval.src_reg).Base.File);
(yyvsp[(1) - (4)].sym)->param_accessed_indirectly = 1;
(yyval.src_reg).Base.RelAddr = 1;
@@ -2858,7 +2859,7 @@ yyreduce:
case 63:
/* Line 1455 of yacc.c */
-#line 848 "program_parse.y"
+#line 849 "program_parse.y"
{
gl_register_file file = ((yyvsp[(1) - (1)].temp_sym).name != NULL)
? (yyvsp[(1) - (1)].temp_sym).param_binding_type
@@ -2871,7 +2872,7 @@ yyreduce:
case 64:
/* Line 1455 of yacc.c */
-#line 858 "program_parse.y"
+#line 859 "program_parse.y"
{
set_dst_reg(& (yyval.dst_reg), PROGRAM_OUTPUT, (yyvsp[(1) - (1)].result));
;}
@@ -2880,7 +2881,7 @@ yyreduce:
case 65:
/* Line 1455 of yacc.c */
-#line 862 "program_parse.y"
+#line 863 "program_parse.y"
{
struct asm_symbol *const s = (struct asm_symbol *)
_mesa_symbol_table_find_symbol(state->st, 0, (yyvsp[(1) - (1)].string));
@@ -2912,7 +2913,7 @@ yyreduce:
case 66:
/* Line 1455 of yacc.c */
-#line 891 "program_parse.y"
+#line 892 "program_parse.y"
{
struct asm_symbol *const s = (struct asm_symbol *)
_mesa_symbol_table_find_symbol(state->st, 0, (yyvsp[(1) - (1)].string));
@@ -2934,7 +2935,7 @@ yyreduce:
case 69:
/* Line 1455 of yacc.c */
-#line 912 "program_parse.y"
+#line 913 "program_parse.y"
{
init_src_reg(& (yyval.src_reg));
(yyval.src_reg).Base.Index = (yyvsp[(1) - (1)].integer);
@@ -2944,7 +2945,7 @@ yyreduce:
case 70:
/* Line 1455 of yacc.c */
-#line 919 "program_parse.y"
+#line 920 "program_parse.y"
{
/* FINISHME: Add support for multiple address registers.
*/
@@ -2959,30 +2960,30 @@ yyreduce:
case 71:
/* Line 1455 of yacc.c */
-#line 930 "program_parse.y"
+#line 931 "program_parse.y"
{ (yyval.integer) = 0; ;}
break;
case 72:
/* Line 1455 of yacc.c */
-#line 931 "program_parse.y"
+#line 932 "program_parse.y"
{ (yyval.integer) = (yyvsp[(2) - (2)].integer); ;}
break;
case 73:
/* Line 1455 of yacc.c */
-#line 932 "program_parse.y"
+#line 933 "program_parse.y"
{ (yyval.integer) = -(yyvsp[(2) - (2)].integer); ;}
break;
case 74:
/* Line 1455 of yacc.c */
-#line 936 "program_parse.y"
+#line 937 "program_parse.y"
{
- if (((yyvsp[(1) - (1)].integer) < 0) || ((yyvsp[(1) - (1)].integer) > 63)) {
+ if (((yyvsp[(1) - (1)].integer) < 0) || ((yyvsp[(1) - (1)].integer) > 4095)) {
char s[100];
_mesa_snprintf(s, sizeof(s),
"relative address offset too large (%d)", (yyvsp[(1) - (1)].integer));
@@ -2997,9 +2998,9 @@ yyreduce:
case 75:
/* Line 1455 of yacc.c */
-#line 950 "program_parse.y"
+#line 951 "program_parse.y"
{
- if (((yyvsp[(1) - (1)].integer) < 0) || ((yyvsp[(1) - (1)].integer) > 64)) {
+ if (((yyvsp[(1) - (1)].integer) < 0) || ((yyvsp[(1) - (1)].integer) > 4096)) {
char s[100];
_mesa_snprintf(s, sizeof(s),
"relative address offset too large (%d)", (yyvsp[(1) - (1)].integer));
@@ -3014,7 +3015,7 @@ yyreduce:
case 76:
/* Line 1455 of yacc.c */
-#line 964 "program_parse.y"
+#line 965 "program_parse.y"
{
struct asm_symbol *const s = (struct asm_symbol *)
_mesa_symbol_table_find_symbol(state->st, 0, (yyvsp[(1) - (1)].string));
@@ -3037,7 +3038,7 @@ yyreduce:
case 77:
/* Line 1455 of yacc.c */
-#line 984 "program_parse.y"
+#line 985 "program_parse.y"
{
if ((yyvsp[(1) - (1)].swiz_mask).mask != WRITEMASK_X) {
yyerror(& (yylsp[(1) - (1)]), state, "invalid address component selector");
@@ -3051,7 +3052,7 @@ yyreduce:
case 78:
/* Line 1455 of yacc.c */
-#line 995 "program_parse.y"
+#line 996 "program_parse.y"
{
if ((yyvsp[(1) - (1)].swiz_mask).mask != WRITEMASK_X) {
yyerror(& (yylsp[(1) - (1)]), state,
@@ -3066,21 +3067,21 @@ yyreduce:
case 83:
/* Line 1455 of yacc.c */
-#line 1011 "program_parse.y"
+#line 1012 "program_parse.y"
{ (yyval.swiz_mask).swizzle = SWIZZLE_NOOP; (yyval.swiz_mask).mask = WRITEMASK_XYZW; ;}
break;
case 88:
/* Line 1455 of yacc.c */
-#line 1015 "program_parse.y"
+#line 1016 "program_parse.y"
{ (yyval.swiz_mask).swizzle = SWIZZLE_NOOP; (yyval.swiz_mask).mask = WRITEMASK_XYZW; ;}
break;
case 89:
/* Line 1455 of yacc.c */
-#line 1019 "program_parse.y"
+#line 1020 "program_parse.y"
{
(yyval.dst_reg) = (yyvsp[(2) - (3)].dst_reg);
;}
@@ -3089,7 +3090,7 @@ yyreduce:
case 90:
/* Line 1455 of yacc.c */
-#line 1023 "program_parse.y"
+#line 1024 "program_parse.y"
{
(yyval.dst_reg) = (yyvsp[(2) - (3)].dst_reg);
;}
@@ -3098,7 +3099,7 @@ yyreduce:
case 91:
/* Line 1455 of yacc.c */
-#line 1027 "program_parse.y"
+#line 1028 "program_parse.y"
{
(yyval.dst_reg).CondMask = COND_TR;
(yyval.dst_reg).CondSwizzle = SWIZZLE_NOOP;
@@ -3109,7 +3110,7 @@ yyreduce:
case 92:
/* Line 1455 of yacc.c */
-#line 1035 "program_parse.y"
+#line 1036 "program_parse.y"
{
(yyval.dst_reg) = (yyvsp[(1) - (2)].dst_reg);
(yyval.dst_reg).CondSwizzle = (yyvsp[(2) - (2)].swiz_mask).swizzle;
@@ -3119,7 +3120,7 @@ yyreduce:
case 93:
/* Line 1455 of yacc.c */
-#line 1042 "program_parse.y"
+#line 1043 "program_parse.y"
{
(yyval.dst_reg) = (yyvsp[(1) - (2)].dst_reg);
(yyval.dst_reg).CondSwizzle = (yyvsp[(2) - (2)].swiz_mask).swizzle;
@@ -3129,7 +3130,7 @@ yyreduce:
case 94:
/* Line 1455 of yacc.c */
-#line 1049 "program_parse.y"
+#line 1050 "program_parse.y"
{
const int cond = _mesa_parse_cc((yyvsp[(1) - (1)].string));
if ((cond == 0) || ((yyvsp[(1) - (1)].string)[2] != '\0')) {
@@ -3155,7 +3156,7 @@ yyreduce:
case 95:
/* Line 1455 of yacc.c */
-#line 1072 "program_parse.y"
+#line 1073 "program_parse.y"
{
const int cond = _mesa_parse_cc((yyvsp[(1) - (1)].string));
if ((cond == 0) || ((yyvsp[(1) - (1)].string)[2] != '\0')) {
@@ -3181,7 +3182,7 @@ yyreduce:
case 102:
/* Line 1455 of yacc.c */
-#line 1103 "program_parse.y"
+#line 1104 "program_parse.y"
{
struct asm_symbol *const s =
declare_variable(state, (yyvsp[(2) - (4)].string), at_attrib, & (yylsp[(2) - (4)]));
@@ -3203,7 +3204,7 @@ yyreduce:
case 103:
/* Line 1455 of yacc.c */
-#line 1122 "program_parse.y"
+#line 1123 "program_parse.y"
{
(yyval.attrib) = (yyvsp[(2) - (2)].attrib);
;}
@@ -3212,7 +3213,7 @@ yyreduce:
case 104:
/* Line 1455 of yacc.c */
-#line 1126 "program_parse.y"
+#line 1127 "program_parse.y"
{
(yyval.attrib) = (yyvsp[(2) - (2)].attrib);
;}
@@ -3221,7 +3222,7 @@ yyreduce:
case 105:
/* Line 1455 of yacc.c */
-#line 1132 "program_parse.y"
+#line 1133 "program_parse.y"
{
(yyval.attrib) = VERT_ATTRIB_POS;
;}
@@ -3230,7 +3231,7 @@ yyreduce:
case 106:
/* Line 1455 of yacc.c */
-#line 1136 "program_parse.y"
+#line 1137 "program_parse.y"
{
(yyval.attrib) = VERT_ATTRIB_WEIGHT;
;}
@@ -3239,7 +3240,7 @@ yyreduce:
case 107:
/* Line 1455 of yacc.c */
-#line 1140 "program_parse.y"
+#line 1141 "program_parse.y"
{
(yyval.attrib) = VERT_ATTRIB_NORMAL;
;}
@@ -3248,7 +3249,7 @@ yyreduce:
case 108:
/* Line 1455 of yacc.c */
-#line 1144 "program_parse.y"
+#line 1145 "program_parse.y"
{
if (!state->ctx->Extensions.EXT_secondary_color) {
yyerror(& (yylsp[(2) - (2)]), state, "GL_EXT_secondary_color not supported");
@@ -3262,7 +3263,7 @@ yyreduce:
case 109:
/* Line 1455 of yacc.c */
-#line 1153 "program_parse.y"
+#line 1154 "program_parse.y"
{
if (!state->ctx->Extensions.EXT_fog_coord) {
yyerror(& (yylsp[(1) - (1)]), state, "GL_EXT_fog_coord not supported");
@@ -3276,7 +3277,7 @@ yyreduce:
case 110:
/* Line 1455 of yacc.c */
-#line 1162 "program_parse.y"
+#line 1163 "program_parse.y"
{
(yyval.attrib) = VERT_ATTRIB_TEX0 + (yyvsp[(2) - (2)].integer);
;}
@@ -3285,7 +3286,7 @@ yyreduce:
case 111:
/* Line 1455 of yacc.c */
-#line 1166 "program_parse.y"
+#line 1167 "program_parse.y"
{
yyerror(& (yylsp[(1) - (4)]), state, "GL_ARB_matrix_palette not supported");
YYERROR;
@@ -3295,7 +3296,7 @@ yyreduce:
case 112:
/* Line 1455 of yacc.c */
-#line 1171 "program_parse.y"
+#line 1172 "program_parse.y"
{
(yyval.attrib) = VERT_ATTRIB_GENERIC0 + (yyvsp[(3) - (4)].integer);
;}
@@ -3304,7 +3305,7 @@ yyreduce:
case 113:
/* Line 1455 of yacc.c */
-#line 1177 "program_parse.y"
+#line 1178 "program_parse.y"
{
if ((unsigned) (yyvsp[(1) - (1)].integer) >= state->limits->MaxAttribs) {
yyerror(& (yylsp[(1) - (1)]), state, "invalid vertex attribute reference");
@@ -3318,7 +3319,7 @@ yyreduce:
case 117:
/* Line 1455 of yacc.c */
-#line 1191 "program_parse.y"
+#line 1192 "program_parse.y"
{
(yyval.attrib) = FRAG_ATTRIB_WPOS;
;}
@@ -3327,7 +3328,7 @@ yyreduce:
case 118:
/* Line 1455 of yacc.c */
-#line 1195 "program_parse.y"
+#line 1196 "program_parse.y"
{
(yyval.attrib) = FRAG_ATTRIB_COL0 + (yyvsp[(2) - (2)].integer);
;}
@@ -3336,7 +3337,7 @@ yyreduce:
case 119:
/* Line 1455 of yacc.c */
-#line 1199 "program_parse.y"
+#line 1200 "program_parse.y"
{
(yyval.attrib) = FRAG_ATTRIB_FOGC;
;}
@@ -3345,7 +3346,7 @@ yyreduce:
case 120:
/* Line 1455 of yacc.c */
-#line 1203 "program_parse.y"
+#line 1204 "program_parse.y"
{
(yyval.attrib) = FRAG_ATTRIB_TEX0 + (yyvsp[(2) - (2)].integer);
;}
@@ -3354,7 +3355,7 @@ yyreduce:
case 123:
/* Line 1455 of yacc.c */
-#line 1211 "program_parse.y"
+#line 1212 "program_parse.y"
{
struct asm_symbol *const s =
declare_variable(state, (yyvsp[(2) - (3)].string), at_param, & (yylsp[(2) - (3)]));
@@ -3375,7 +3376,7 @@ yyreduce:
case 124:
/* Line 1455 of yacc.c */
-#line 1229 "program_parse.y"
+#line 1230 "program_parse.y"
{
if (((yyvsp[(4) - (6)].integer) != 0) && ((unsigned) (yyvsp[(4) - (6)].integer) != (yyvsp[(6) - (6)].temp_sym).param_binding_length)) {
free((yyvsp[(2) - (6)].string));
@@ -3403,7 +3404,7 @@ yyreduce:
case 125:
/* Line 1455 of yacc.c */
-#line 1254 "program_parse.y"
+#line 1255 "program_parse.y"
{
(yyval.integer) = 0;
;}
@@ -3412,7 +3413,7 @@ yyreduce:
case 126:
/* Line 1455 of yacc.c */
-#line 1258 "program_parse.y"
+#line 1259 "program_parse.y"
{
if (((yyvsp[(1) - (1)].integer) < 1) || ((unsigned) (yyvsp[(1) - (1)].integer) > state->limits->MaxParameters)) {
yyerror(& (yylsp[(1) - (1)]), state, "invalid parameter array size");
@@ -3426,7 +3427,7 @@ yyreduce:
case 127:
/* Line 1455 of yacc.c */
-#line 1269 "program_parse.y"
+#line 1270 "program_parse.y"
{
(yyval.temp_sym) = (yyvsp[(2) - (2)].temp_sym);
;}
@@ -3435,7 +3436,7 @@ yyreduce:
case 128:
/* Line 1455 of yacc.c */
-#line 1275 "program_parse.y"
+#line 1276 "program_parse.y"
{
(yyval.temp_sym) = (yyvsp[(3) - (4)].temp_sym);
;}
@@ -3444,7 +3445,7 @@ yyreduce:
case 130:
/* Line 1455 of yacc.c */
-#line 1282 "program_parse.y"
+#line 1283 "program_parse.y"
{
(yyvsp[(1) - (3)].temp_sym).param_binding_length += (yyvsp[(3) - (3)].temp_sym).param_binding_length;
(yyval.temp_sym) = (yyvsp[(1) - (3)].temp_sym);
@@ -3454,7 +3455,7 @@ yyreduce:
case 131:
/* Line 1455 of yacc.c */
-#line 1289 "program_parse.y"
+#line 1290 "program_parse.y"
{
memset(& (yyval.temp_sym), 0, sizeof((yyval.temp_sym)));
(yyval.temp_sym).param_binding_begin = ~0;
@@ -3465,7 +3466,7 @@ yyreduce:
case 132:
/* Line 1455 of yacc.c */
-#line 1295 "program_parse.y"
+#line 1296 "program_parse.y"
{
memset(& (yyval.temp_sym), 0, sizeof((yyval.temp_sym)));
(yyval.temp_sym).param_binding_begin = ~0;
@@ -3476,7 +3477,7 @@ yyreduce:
case 133:
/* Line 1455 of yacc.c */
-#line 1301 "program_parse.y"
+#line 1302 "program_parse.y"
{
memset(& (yyval.temp_sym), 0, sizeof((yyval.temp_sym)));
(yyval.temp_sym).param_binding_begin = ~0;
@@ -3487,7 +3488,7 @@ yyreduce:
case 134:
/* Line 1455 of yacc.c */
-#line 1309 "program_parse.y"
+#line 1310 "program_parse.y"
{
memset(& (yyval.temp_sym), 0, sizeof((yyval.temp_sym)));
(yyval.temp_sym).param_binding_begin = ~0;
@@ -3498,7 +3499,7 @@ yyreduce:
case 135:
/* Line 1455 of yacc.c */
-#line 1315 "program_parse.y"
+#line 1316 "program_parse.y"
{
memset(& (yyval.temp_sym), 0, sizeof((yyval.temp_sym)));
(yyval.temp_sym).param_binding_begin = ~0;
@@ -3509,7 +3510,7 @@ yyreduce:
case 136:
/* Line 1455 of yacc.c */
-#line 1321 "program_parse.y"
+#line 1322 "program_parse.y"
{
memset(& (yyval.temp_sym), 0, sizeof((yyval.temp_sym)));
(yyval.temp_sym).param_binding_begin = ~0;
@@ -3520,7 +3521,7 @@ yyreduce:
case 137:
/* Line 1455 of yacc.c */
-#line 1329 "program_parse.y"
+#line 1330 "program_parse.y"
{
memset(& (yyval.temp_sym), 0, sizeof((yyval.temp_sym)));
(yyval.temp_sym).param_binding_begin = ~0;
@@ -3531,7 +3532,7 @@ yyreduce:
case 138:
/* Line 1455 of yacc.c */
-#line 1335 "program_parse.y"
+#line 1336 "program_parse.y"
{
memset(& (yyval.temp_sym), 0, sizeof((yyval.temp_sym)));
(yyval.temp_sym).param_binding_begin = ~0;
@@ -3542,7 +3543,7 @@ yyreduce:
case 139:
/* Line 1455 of yacc.c */
-#line 1341 "program_parse.y"
+#line 1342 "program_parse.y"
{
memset(& (yyval.temp_sym), 0, sizeof((yyval.temp_sym)));
(yyval.temp_sym).param_binding_begin = ~0;
@@ -3553,98 +3554,98 @@ yyreduce:
case 140:
/* Line 1455 of yacc.c */
-#line 1348 "program_parse.y"
+#line 1349 "program_parse.y"
{ memcpy((yyval.state), (yyvsp[(1) - (1)].state), sizeof((yyval.state))); ;}
break;
case 141:
/* Line 1455 of yacc.c */
-#line 1349 "program_parse.y"
+#line 1350 "program_parse.y"
{ memcpy((yyval.state), (yyvsp[(2) - (2)].state), sizeof((yyval.state))); ;}
break;
case 142:
/* Line 1455 of yacc.c */
-#line 1352 "program_parse.y"
+#line 1353 "program_parse.y"
{ memcpy((yyval.state), (yyvsp[(2) - (2)].state), sizeof((yyval.state))); ;}
break;
case 143:
/* Line 1455 of yacc.c */
-#line 1353 "program_parse.y"
+#line 1354 "program_parse.y"
{ memcpy((yyval.state), (yyvsp[(2) - (2)].state), sizeof((yyval.state))); ;}
break;
case 144:
/* Line 1455 of yacc.c */
-#line 1354 "program_parse.y"
+#line 1355 "program_parse.y"
{ memcpy((yyval.state), (yyvsp[(2) - (2)].state), sizeof((yyval.state))); ;}
break;
case 145:
/* Line 1455 of yacc.c */
-#line 1355 "program_parse.y"
+#line 1356 "program_parse.y"
{ memcpy((yyval.state), (yyvsp[(2) - (2)].state), sizeof((yyval.state))); ;}
break;
case 146:
/* Line 1455 of yacc.c */
-#line 1356 "program_parse.y"
+#line 1357 "program_parse.y"
{ memcpy((yyval.state), (yyvsp[(2) - (2)].state), sizeof((yyval.state))); ;}
break;
case 147:
/* Line 1455 of yacc.c */
-#line 1357 "program_parse.y"
+#line 1358 "program_parse.y"
{ memcpy((yyval.state), (yyvsp[(2) - (2)].state), sizeof((yyval.state))); ;}
break;
case 148:
/* Line 1455 of yacc.c */
-#line 1358 "program_parse.y"
+#line 1359 "program_parse.y"
{ memcpy((yyval.state), (yyvsp[(2) - (2)].state), sizeof((yyval.state))); ;}
break;
case 149:
/* Line 1455 of yacc.c */
-#line 1359 "program_parse.y"
+#line 1360 "program_parse.y"
{ memcpy((yyval.state), (yyvsp[(2) - (2)].state), sizeof((yyval.state))); ;}
break;
case 150:
/* Line 1455 of yacc.c */
-#line 1360 "program_parse.y"
+#line 1361 "program_parse.y"
{ memcpy((yyval.state), (yyvsp[(2) - (2)].state), sizeof((yyval.state))); ;}
break;
case 151:
/* Line 1455 of yacc.c */
-#line 1361 "program_parse.y"
+#line 1362 "program_parse.y"
{ memcpy((yyval.state), (yyvsp[(2) - (2)].state), sizeof((yyval.state))); ;}
break;
case 152:
/* Line 1455 of yacc.c */
-#line 1362 "program_parse.y"
+#line 1363 "program_parse.y"
{ memcpy((yyval.state), (yyvsp[(2) - (2)].state), sizeof((yyval.state))); ;}
break;
case 153:
/* Line 1455 of yacc.c */
-#line 1366 "program_parse.y"
+#line 1367 "program_parse.y"
{
memset((yyval.state), 0, sizeof((yyval.state)));
(yyval.state)[0] = STATE_MATERIAL;
@@ -3656,7 +3657,7 @@ yyreduce:
case 154:
/* Line 1455 of yacc.c */
-#line 1375 "program_parse.y"
+#line 1376 "program_parse.y"
{
(yyval.integer) = (yyvsp[(1) - (1)].integer);
;}
@@ -3665,7 +3666,7 @@ yyreduce:
case 155:
/* Line 1455 of yacc.c */
-#line 1379 "program_parse.y"
+#line 1380 "program_parse.y"
{
(yyval.integer) = STATE_EMISSION;
;}
@@ -3674,7 +3675,7 @@ yyreduce:
case 156:
/* Line 1455 of yacc.c */
-#line 1383 "program_parse.y"
+#line 1384 "program_parse.y"
{
(yyval.integer) = STATE_SHININESS;
;}
@@ -3683,7 +3684,7 @@ yyreduce:
case 157:
/* Line 1455 of yacc.c */
-#line 1389 "program_parse.y"
+#line 1390 "program_parse.y"
{
memset((yyval.state), 0, sizeof((yyval.state)));
(yyval.state)[0] = STATE_LIGHT;
@@ -3695,7 +3696,7 @@ yyreduce:
case 158:
/* Line 1455 of yacc.c */
-#line 1398 "program_parse.y"
+#line 1399 "program_parse.y"
{
(yyval.integer) = (yyvsp[(1) - (1)].integer);
;}
@@ -3704,7 +3705,7 @@ yyreduce:
case 159:
/* Line 1455 of yacc.c */
-#line 1402 "program_parse.y"
+#line 1403 "program_parse.y"
{
(yyval.integer) = STATE_POSITION;
;}
@@ -3713,7 +3714,7 @@ yyreduce:
case 160:
/* Line 1455 of yacc.c */
-#line 1406 "program_parse.y"
+#line 1407 "program_parse.y"
{
if (!state->ctx->Extensions.EXT_point_parameters) {
yyerror(& (yylsp[(1) - (1)]), state, "GL_ARB_point_parameters not supported");
@@ -3727,7 +3728,7 @@ yyreduce:
case 161:
/* Line 1455 of yacc.c */
-#line 1415 "program_parse.y"
+#line 1416 "program_parse.y"
{
(yyval.integer) = (yyvsp[(2) - (2)].integer);
;}
@@ -3736,7 +3737,7 @@ yyreduce:
case 162:
/* Line 1455 of yacc.c */
-#line 1419 "program_parse.y"
+#line 1420 "program_parse.y"
{
(yyval.integer) = STATE_HALF_VECTOR;
;}
@@ -3745,7 +3746,7 @@ yyreduce:
case 163:
/* Line 1455 of yacc.c */
-#line 1425 "program_parse.y"
+#line 1426 "program_parse.y"
{
(yyval.integer) = STATE_SPOT_DIRECTION;
;}
@@ -3754,7 +3755,7 @@ yyreduce:
case 164:
/* Line 1455 of yacc.c */
-#line 1431 "program_parse.y"
+#line 1432 "program_parse.y"
{
(yyval.state)[0] = (yyvsp[(2) - (2)].state)[0];
(yyval.state)[1] = (yyvsp[(2) - (2)].state)[1];
@@ -3764,7 +3765,7 @@ yyreduce:
case 165:
/* Line 1455 of yacc.c */
-#line 1438 "program_parse.y"
+#line 1439 "program_parse.y"
{
memset((yyval.state), 0, sizeof((yyval.state)));
(yyval.state)[0] = STATE_LIGHTMODEL_AMBIENT;
@@ -3774,7 +3775,7 @@ yyreduce:
case 166:
/* Line 1455 of yacc.c */
-#line 1443 "program_parse.y"
+#line 1444 "program_parse.y"
{
memset((yyval.state), 0, sizeof((yyval.state)));
(yyval.state)[0] = STATE_LIGHTMODEL_SCENECOLOR;
@@ -3785,7 +3786,7 @@ yyreduce:
case 167:
/* Line 1455 of yacc.c */
-#line 1451 "program_parse.y"
+#line 1452 "program_parse.y"
{
memset((yyval.state), 0, sizeof((yyval.state)));
(yyval.state)[0] = STATE_LIGHTPROD;
@@ -3798,7 +3799,7 @@ yyreduce:
case 169:
/* Line 1455 of yacc.c */
-#line 1463 "program_parse.y"
+#line 1464 "program_parse.y"
{
memset((yyval.state), 0, sizeof((yyval.state)));
(yyval.state)[0] = (yyvsp[(3) - (3)].integer);
@@ -3809,7 +3810,7 @@ yyreduce:
case 170:
/* Line 1455 of yacc.c */
-#line 1471 "program_parse.y"
+#line 1472 "program_parse.y"
{
(yyval.integer) = STATE_TEXENV_COLOR;
;}
@@ -3818,7 +3819,7 @@ yyreduce:
case 171:
/* Line 1455 of yacc.c */
-#line 1477 "program_parse.y"
+#line 1478 "program_parse.y"
{
(yyval.integer) = STATE_AMBIENT;
;}
@@ -3827,7 +3828,7 @@ yyreduce:
case 172:
/* Line 1455 of yacc.c */
-#line 1481 "program_parse.y"
+#line 1482 "program_parse.y"
{
(yyval.integer) = STATE_DIFFUSE;
;}
@@ -3836,7 +3837,7 @@ yyreduce:
case 173:
/* Line 1455 of yacc.c */
-#line 1485 "program_parse.y"
+#line 1486 "program_parse.y"
{
(yyval.integer) = STATE_SPECULAR;
;}
@@ -3845,7 +3846,7 @@ yyreduce:
case 174:
/* Line 1455 of yacc.c */
-#line 1491 "program_parse.y"
+#line 1492 "program_parse.y"
{
if ((unsigned) (yyvsp[(1) - (1)].integer) >= state->MaxLights) {
yyerror(& (yylsp[(1) - (1)]), state, "invalid light selector");
@@ -3859,7 +3860,7 @@ yyreduce:
case 175:
/* Line 1455 of yacc.c */
-#line 1502 "program_parse.y"
+#line 1503 "program_parse.y"
{
memset((yyval.state), 0, sizeof((yyval.state)));
(yyval.state)[0] = STATE_TEXGEN;
@@ -3871,7 +3872,7 @@ yyreduce:
case 176:
/* Line 1455 of yacc.c */
-#line 1511 "program_parse.y"
+#line 1512 "program_parse.y"
{
(yyval.integer) = STATE_TEXGEN_EYE_S;
;}
@@ -3880,7 +3881,7 @@ yyreduce:
case 177:
/* Line 1455 of yacc.c */
-#line 1515 "program_parse.y"
+#line 1516 "program_parse.y"
{
(yyval.integer) = STATE_TEXGEN_OBJECT_S;
;}
@@ -3889,7 +3890,7 @@ yyreduce:
case 178:
/* Line 1455 of yacc.c */
-#line 1520 "program_parse.y"
+#line 1521 "program_parse.y"
{
(yyval.integer) = STATE_TEXGEN_EYE_S - STATE_TEXGEN_EYE_S;
;}
@@ -3898,7 +3899,7 @@ yyreduce:
case 179:
/* Line 1455 of yacc.c */
-#line 1524 "program_parse.y"
+#line 1525 "program_parse.y"
{
(yyval.integer) = STATE_TEXGEN_EYE_T - STATE_TEXGEN_EYE_S;
;}
@@ -3907,7 +3908,7 @@ yyreduce:
case 180:
/* Line 1455 of yacc.c */
-#line 1528 "program_parse.y"
+#line 1529 "program_parse.y"
{
(yyval.integer) = STATE_TEXGEN_EYE_R - STATE_TEXGEN_EYE_S;
;}
@@ -3916,7 +3917,7 @@ yyreduce:
case 181:
/* Line 1455 of yacc.c */
-#line 1532 "program_parse.y"
+#line 1533 "program_parse.y"
{
(yyval.integer) = STATE_TEXGEN_EYE_Q - STATE_TEXGEN_EYE_S;
;}
@@ -3925,7 +3926,7 @@ yyreduce:
case 182:
/* Line 1455 of yacc.c */
-#line 1538 "program_parse.y"
+#line 1539 "program_parse.y"
{
memset((yyval.state), 0, sizeof((yyval.state)));
(yyval.state)[0] = (yyvsp[(2) - (2)].integer);
@@ -3935,7 +3936,7 @@ yyreduce:
case 183:
/* Line 1455 of yacc.c */
-#line 1545 "program_parse.y"
+#line 1546 "program_parse.y"
{
(yyval.integer) = STATE_FOG_COLOR;
;}
@@ -3944,7 +3945,7 @@ yyreduce:
case 184:
/* Line 1455 of yacc.c */
-#line 1549 "program_parse.y"
+#line 1550 "program_parse.y"
{
(yyval.integer) = STATE_FOG_PARAMS;
;}
@@ -3953,7 +3954,7 @@ yyreduce:
case 185:
/* Line 1455 of yacc.c */
-#line 1555 "program_parse.y"
+#line 1556 "program_parse.y"
{
memset((yyval.state), 0, sizeof((yyval.state)));
(yyval.state)[0] = STATE_CLIPPLANE;
@@ -3964,7 +3965,7 @@ yyreduce:
case 186:
/* Line 1455 of yacc.c */
-#line 1563 "program_parse.y"
+#line 1564 "program_parse.y"
{
if ((unsigned) (yyvsp[(1) - (1)].integer) >= state->MaxClipPlanes) {
yyerror(& (yylsp[(1) - (1)]), state, "invalid clip plane selector");
@@ -3978,7 +3979,7 @@ yyreduce:
case 187:
/* Line 1455 of yacc.c */
-#line 1574 "program_parse.y"
+#line 1575 "program_parse.y"
{
memset((yyval.state), 0, sizeof((yyval.state)));
(yyval.state)[0] = (yyvsp[(2) - (2)].integer);
@@ -3988,7 +3989,7 @@ yyreduce:
case 188:
/* Line 1455 of yacc.c */
-#line 1581 "program_parse.y"
+#line 1582 "program_parse.y"
{
(yyval.integer) = STATE_POINT_SIZE;
;}
@@ -3997,7 +3998,7 @@ yyreduce:
case 189:
/* Line 1455 of yacc.c */
-#line 1585 "program_parse.y"
+#line 1586 "program_parse.y"
{
(yyval.integer) = STATE_POINT_ATTENUATION;
;}
@@ -4006,7 +4007,7 @@ yyreduce:
case 190:
/* Line 1455 of yacc.c */
-#line 1591 "program_parse.y"
+#line 1592 "program_parse.y"
{
(yyval.state)[0] = (yyvsp[(1) - (5)].state)[0];
(yyval.state)[1] = (yyvsp[(1) - (5)].state)[1];
@@ -4019,7 +4020,7 @@ yyreduce:
case 191:
/* Line 1455 of yacc.c */
-#line 1601 "program_parse.y"
+#line 1602 "program_parse.y"
{
(yyval.state)[0] = (yyvsp[(1) - (2)].state)[0];
(yyval.state)[1] = (yyvsp[(1) - (2)].state)[1];
@@ -4032,7 +4033,7 @@ yyreduce:
case 192:
/* Line 1455 of yacc.c */
-#line 1611 "program_parse.y"
+#line 1612 "program_parse.y"
{
(yyval.state)[2] = 0;
(yyval.state)[3] = 3;
@@ -4042,7 +4043,7 @@ yyreduce:
case 193:
/* Line 1455 of yacc.c */
-#line 1616 "program_parse.y"
+#line 1617 "program_parse.y"
{
/* It seems logical that the matrix row range specifier would have
* to specify a range or more than one row (i.e., $5 > $3).
@@ -4063,7 +4064,7 @@ yyreduce:
case 194:
/* Line 1455 of yacc.c */
-#line 1634 "program_parse.y"
+#line 1635 "program_parse.y"
{
(yyval.state)[0] = (yyvsp[(2) - (3)].state)[0];
(yyval.state)[1] = (yyvsp[(2) - (3)].state)[1];
@@ -4074,7 +4075,7 @@ yyreduce:
case 195:
/* Line 1455 of yacc.c */
-#line 1642 "program_parse.y"
+#line 1643 "program_parse.y"
{
(yyval.integer) = 0;
;}
@@ -4083,7 +4084,7 @@ yyreduce:
case 196:
/* Line 1455 of yacc.c */
-#line 1646 "program_parse.y"
+#line 1647 "program_parse.y"
{
(yyval.integer) = (yyvsp[(1) - (1)].integer);
;}
@@ -4092,7 +4093,7 @@ yyreduce:
case 197:
/* Line 1455 of yacc.c */
-#line 1652 "program_parse.y"
+#line 1653 "program_parse.y"
{
(yyval.integer) = STATE_MATRIX_INVERSE;
;}
@@ -4101,7 +4102,7 @@ yyreduce:
case 198:
/* Line 1455 of yacc.c */
-#line 1656 "program_parse.y"
+#line 1657 "program_parse.y"
{
(yyval.integer) = STATE_MATRIX_TRANSPOSE;
;}
@@ -4110,7 +4111,7 @@ yyreduce:
case 199:
/* Line 1455 of yacc.c */
-#line 1660 "program_parse.y"
+#line 1661 "program_parse.y"
{
(yyval.integer) = STATE_MATRIX_INVTRANS;
;}
@@ -4119,7 +4120,7 @@ yyreduce:
case 200:
/* Line 1455 of yacc.c */
-#line 1666 "program_parse.y"
+#line 1667 "program_parse.y"
{
if ((yyvsp[(1) - (1)].integer) > 3) {
yyerror(& (yylsp[(1) - (1)]), state, "invalid matrix row reference");
@@ -4133,7 +4134,7 @@ yyreduce:
case 201:
/* Line 1455 of yacc.c */
-#line 1677 "program_parse.y"
+#line 1678 "program_parse.y"
{
(yyval.state)[0] = STATE_MODELVIEW_MATRIX;
(yyval.state)[1] = (yyvsp[(2) - (2)].integer);
@@ -4143,7 +4144,7 @@ yyreduce:
case 202:
/* Line 1455 of yacc.c */
-#line 1682 "program_parse.y"
+#line 1683 "program_parse.y"
{
(yyval.state)[0] = STATE_PROJECTION_MATRIX;
(yyval.state)[1] = 0;
@@ -4153,7 +4154,7 @@ yyreduce:
case 203:
/* Line 1455 of yacc.c */
-#line 1687 "program_parse.y"
+#line 1688 "program_parse.y"
{
(yyval.state)[0] = STATE_MVP_MATRIX;
(yyval.state)[1] = 0;
@@ -4163,7 +4164,7 @@ yyreduce:
case 204:
/* Line 1455 of yacc.c */
-#line 1692 "program_parse.y"
+#line 1693 "program_parse.y"
{
(yyval.state)[0] = STATE_TEXTURE_MATRIX;
(yyval.state)[1] = (yyvsp[(2) - (2)].integer);
@@ -4173,7 +4174,7 @@ yyreduce:
case 205:
/* Line 1455 of yacc.c */
-#line 1697 "program_parse.y"
+#line 1698 "program_parse.y"
{
yyerror(& (yylsp[(1) - (4)]), state, "GL_ARB_matrix_palette not supported");
YYERROR;
@@ -4183,7 +4184,7 @@ yyreduce:
case 206:
/* Line 1455 of yacc.c */
-#line 1702 "program_parse.y"
+#line 1703 "program_parse.y"
{
(yyval.state)[0] = STATE_PROGRAM_MATRIX;
(yyval.state)[1] = (yyvsp[(3) - (4)].integer);
@@ -4193,7 +4194,7 @@ yyreduce:
case 207:
/* Line 1455 of yacc.c */
-#line 1709 "program_parse.y"
+#line 1710 "program_parse.y"
{
(yyval.integer) = 0;
;}
@@ -4202,7 +4203,7 @@ yyreduce:
case 208:
/* Line 1455 of yacc.c */
-#line 1713 "program_parse.y"
+#line 1714 "program_parse.y"
{
(yyval.integer) = (yyvsp[(2) - (3)].integer);
;}
@@ -4211,7 +4212,7 @@ yyreduce:
case 209:
/* Line 1455 of yacc.c */
-#line 1718 "program_parse.y"
+#line 1719 "program_parse.y"
{
/* Since GL_ARB_vertex_blend isn't supported, only modelview matrix
* zero is valid.
@@ -4228,7 +4229,7 @@ yyreduce:
case 210:
/* Line 1455 of yacc.c */
-#line 1731 "program_parse.y"
+#line 1732 "program_parse.y"
{
/* Since GL_ARB_matrix_palette isn't supported, just let any value
* through here. The error will be generated later.
@@ -4240,7 +4241,7 @@ yyreduce:
case 211:
/* Line 1455 of yacc.c */
-#line 1739 "program_parse.y"
+#line 1740 "program_parse.y"
{
if ((unsigned) (yyvsp[(1) - (1)].integer) >= state->MaxProgramMatrices) {
yyerror(& (yylsp[(1) - (1)]), state, "invalid program matrix selector");
@@ -4254,7 +4255,7 @@ yyreduce:
case 212:
/* Line 1455 of yacc.c */
-#line 1750 "program_parse.y"
+#line 1751 "program_parse.y"
{
memset((yyval.state), 0, sizeof((yyval.state)));
(yyval.state)[0] = STATE_DEPTH_RANGE;
@@ -4264,7 +4265,7 @@ yyreduce:
case 217:
/* Line 1455 of yacc.c */
-#line 1762 "program_parse.y"
+#line 1763 "program_parse.y"
{
memset((yyval.state), 0, sizeof((yyval.state)));
(yyval.state)[0] = state->state_param_enum;
@@ -4277,7 +4278,7 @@ yyreduce:
case 218:
/* Line 1455 of yacc.c */
-#line 1772 "program_parse.y"
+#line 1773 "program_parse.y"
{
(yyval.state)[0] = (yyvsp[(1) - (1)].integer);
(yyval.state)[1] = (yyvsp[(1) - (1)].integer);
@@ -4287,7 +4288,7 @@ yyreduce:
case 219:
/* Line 1455 of yacc.c */
-#line 1777 "program_parse.y"
+#line 1778 "program_parse.y"
{
(yyval.state)[0] = (yyvsp[(1) - (3)].integer);
(yyval.state)[1] = (yyvsp[(3) - (3)].integer);
@@ -4297,7 +4298,7 @@ yyreduce:
case 220:
/* Line 1455 of yacc.c */
-#line 1784 "program_parse.y"
+#line 1785 "program_parse.y"
{
memset((yyval.state), 0, sizeof((yyval.state)));
(yyval.state)[0] = state->state_param_enum;
@@ -4310,7 +4311,7 @@ yyreduce:
case 221:
/* Line 1455 of yacc.c */
-#line 1794 "program_parse.y"
+#line 1795 "program_parse.y"
{
memset((yyval.state), 0, sizeof((yyval.state)));
(yyval.state)[0] = state->state_param_enum;
@@ -4323,7 +4324,7 @@ yyreduce:
case 222:
/* Line 1455 of yacc.c */
-#line 1803 "program_parse.y"
+#line 1804 "program_parse.y"
{
(yyval.state)[0] = (yyvsp[(1) - (1)].integer);
(yyval.state)[1] = (yyvsp[(1) - (1)].integer);
@@ -4333,7 +4334,7 @@ yyreduce:
case 223:
/* Line 1455 of yacc.c */
-#line 1808 "program_parse.y"
+#line 1809 "program_parse.y"
{
(yyval.state)[0] = (yyvsp[(1) - (3)].integer);
(yyval.state)[1] = (yyvsp[(3) - (3)].integer);
@@ -4343,7 +4344,7 @@ yyreduce:
case 224:
/* Line 1455 of yacc.c */
-#line 1815 "program_parse.y"
+#line 1816 "program_parse.y"
{
memset((yyval.state), 0, sizeof((yyval.state)));
(yyval.state)[0] = state->state_param_enum;
@@ -4356,7 +4357,7 @@ yyreduce:
case 225:
/* Line 1455 of yacc.c */
-#line 1825 "program_parse.y"
+#line 1826 "program_parse.y"
{
if ((unsigned) (yyvsp[(1) - (1)].integer) >= state->limits->MaxEnvParams) {
yyerror(& (yylsp[(1) - (1)]), state, "invalid environment parameter reference");
@@ -4369,7 +4370,7 @@ yyreduce:
case 226:
/* Line 1455 of yacc.c */
-#line 1835 "program_parse.y"
+#line 1836 "program_parse.y"
{
if ((unsigned) (yyvsp[(1) - (1)].integer) >= state->limits->MaxLocalParams) {
yyerror(& (yylsp[(1) - (1)]), state, "invalid local parameter reference");
@@ -4382,7 +4383,7 @@ yyreduce:
case 231:
/* Line 1455 of yacc.c */
-#line 1850 "program_parse.y"
+#line 1851 "program_parse.y"
{
(yyval.vector).count = 4;
(yyval.vector).data[0] = (yyvsp[(1) - (1)].real);
@@ -4395,7 +4396,7 @@ yyreduce:
case 232:
/* Line 1455 of yacc.c */
-#line 1860 "program_parse.y"
+#line 1861 "program_parse.y"
{
(yyval.vector).count = 1;
(yyval.vector).data[0] = (yyvsp[(1) - (1)].real);
@@ -4408,7 +4409,7 @@ yyreduce:
case 233:
/* Line 1455 of yacc.c */
-#line 1868 "program_parse.y"
+#line 1869 "program_parse.y"
{
(yyval.vector).count = 1;
(yyval.vector).data[0] = (float) (yyvsp[(1) - (1)].integer);
@@ -4421,7 +4422,7 @@ yyreduce:
case 234:
/* Line 1455 of yacc.c */
-#line 1878 "program_parse.y"
+#line 1879 "program_parse.y"
{
(yyval.vector).count = 4;
(yyval.vector).data[0] = (yyvsp[(2) - (3)].real);
@@ -4434,7 +4435,7 @@ yyreduce:
case 235:
/* Line 1455 of yacc.c */
-#line 1886 "program_parse.y"
+#line 1887 "program_parse.y"
{
(yyval.vector).count = 4;
(yyval.vector).data[0] = (yyvsp[(2) - (5)].real);
@@ -4447,7 +4448,7 @@ yyreduce:
case 236:
/* Line 1455 of yacc.c */
-#line 1895 "program_parse.y"
+#line 1896 "program_parse.y"
{
(yyval.vector).count = 4;
(yyval.vector).data[0] = (yyvsp[(2) - (7)].real);
@@ -4460,7 +4461,7 @@ yyreduce:
case 237:
/* Line 1455 of yacc.c */
-#line 1904 "program_parse.y"
+#line 1905 "program_parse.y"
{
(yyval.vector).count = 4;
(yyval.vector).data[0] = (yyvsp[(2) - (9)].real);
@@ -4473,7 +4474,7 @@ yyreduce:
case 238:
/* Line 1455 of yacc.c */
-#line 1914 "program_parse.y"
+#line 1915 "program_parse.y"
{
(yyval.real) = ((yyvsp[(1) - (2)].negate)) ? -(yyvsp[(2) - (2)].real) : (yyvsp[(2) - (2)].real);
;}
@@ -4482,7 +4483,7 @@ yyreduce:
case 239:
/* Line 1455 of yacc.c */
-#line 1918 "program_parse.y"
+#line 1919 "program_parse.y"
{
(yyval.real) = (float)(((yyvsp[(1) - (2)].negate)) ? -(yyvsp[(2) - (2)].integer) : (yyvsp[(2) - (2)].integer));
;}
@@ -4491,35 +4492,35 @@ yyreduce:
case 240:
/* Line 1455 of yacc.c */
-#line 1923 "program_parse.y"
+#line 1924 "program_parse.y"
{ (yyval.negate) = FALSE; ;}
break;
case 241:
/* Line 1455 of yacc.c */
-#line 1924 "program_parse.y"
+#line 1925 "program_parse.y"
{ (yyval.negate) = TRUE; ;}
break;
case 242:
/* Line 1455 of yacc.c */
-#line 1925 "program_parse.y"
+#line 1926 "program_parse.y"
{ (yyval.negate) = FALSE; ;}
break;
case 243:
/* Line 1455 of yacc.c */
-#line 1928 "program_parse.y"
+#line 1929 "program_parse.y"
{ (yyval.integer) = (yyvsp[(2) - (2)].integer); ;}
break;
case 245:
/* Line 1455 of yacc.c */
-#line 1932 "program_parse.y"
+#line 1933 "program_parse.y"
{
/* NV_fragment_program_option defines the size qualifiers in a
* fairly broken way. "SHORT" or "LONG" can optionally be used
@@ -4558,7 +4559,7 @@ yyreduce:
case 246:
/* Line 1455 of yacc.c */
-#line 1966 "program_parse.y"
+#line 1967 "program_parse.y"
{
;}
break;
@@ -4566,14 +4567,14 @@ yyreduce:
case 247:
/* Line 1455 of yacc.c */
-#line 1970 "program_parse.y"
+#line 1971 "program_parse.y"
{ (yyval.integer) = (yyvsp[(1) - (1)].integer); ;}
break;
case 249:
/* Line 1455 of yacc.c */
-#line 1974 "program_parse.y"
+#line 1975 "program_parse.y"
{
if (!declare_variable(state, (yyvsp[(3) - (3)].string), (yyvsp[(0) - (3)].integer), & (yylsp[(3) - (3)]))) {
free((yyvsp[(3) - (3)].string));
@@ -4585,7 +4586,7 @@ yyreduce:
case 250:
/* Line 1455 of yacc.c */
-#line 1981 "program_parse.y"
+#line 1982 "program_parse.y"
{
if (!declare_variable(state, (yyvsp[(1) - (1)].string), (yyvsp[(0) - (1)].integer), & (yylsp[(1) - (1)]))) {
free((yyvsp[(1) - (1)].string));
@@ -4597,7 +4598,7 @@ yyreduce:
case 251:
/* Line 1455 of yacc.c */
-#line 1990 "program_parse.y"
+#line 1991 "program_parse.y"
{
struct asm_symbol *const s =
declare_variable(state, (yyvsp[(3) - (5)].string), at_output, & (yylsp[(3) - (5)]));
@@ -4614,7 +4615,7 @@ yyreduce:
case 252:
/* Line 1455 of yacc.c */
-#line 2004 "program_parse.y"
+#line 2005 "program_parse.y"
{
if (state->mode == ARB_vertex) {
(yyval.result) = VERT_RESULT_HPOS;
@@ -4628,7 +4629,7 @@ yyreduce:
case 253:
/* Line 1455 of yacc.c */
-#line 2013 "program_parse.y"
+#line 2014 "program_parse.y"
{
if (state->mode == ARB_vertex) {
(yyval.result) = VERT_RESULT_FOGC;
@@ -4642,7 +4643,7 @@ yyreduce:
case 254:
/* Line 1455 of yacc.c */
-#line 2022 "program_parse.y"
+#line 2023 "program_parse.y"
{
(yyval.result) = (yyvsp[(2) - (2)].result);
;}
@@ -4651,7 +4652,7 @@ yyreduce:
case 255:
/* Line 1455 of yacc.c */
-#line 2026 "program_parse.y"
+#line 2027 "program_parse.y"
{
if (state->mode == ARB_vertex) {
(yyval.result) = VERT_RESULT_PSIZ;
@@ -4665,7 +4666,7 @@ yyreduce:
case 256:
/* Line 1455 of yacc.c */
-#line 2035 "program_parse.y"
+#line 2036 "program_parse.y"
{
if (state->mode == ARB_vertex) {
(yyval.result) = VERT_RESULT_TEX0 + (yyvsp[(3) - (3)].integer);
@@ -4679,7 +4680,7 @@ yyreduce:
case 257:
/* Line 1455 of yacc.c */
-#line 2044 "program_parse.y"
+#line 2045 "program_parse.y"
{
if (state->mode == ARB_fragment) {
(yyval.result) = FRAG_RESULT_DEPTH;
@@ -4693,7 +4694,7 @@ yyreduce:
case 258:
/* Line 1455 of yacc.c */
-#line 2055 "program_parse.y"
+#line 2056 "program_parse.y"
{
(yyval.result) = (yyvsp[(2) - (3)].integer) + (yyvsp[(3) - (3)].integer);
;}
@@ -4702,7 +4703,7 @@ yyreduce:
case 259:
/* Line 1455 of yacc.c */
-#line 2061 "program_parse.y"
+#line 2062 "program_parse.y"
{
(yyval.integer) = (state->mode == ARB_vertex)
? VERT_RESULT_COL0
@@ -4713,7 +4714,7 @@ yyreduce:
case 260:
/* Line 1455 of yacc.c */
-#line 2067 "program_parse.y"
+#line 2068 "program_parse.y"
{
if (state->mode == ARB_vertex) {
(yyval.integer) = VERT_RESULT_COL0;
@@ -4727,7 +4728,7 @@ yyreduce:
case 261:
/* Line 1455 of yacc.c */
-#line 2076 "program_parse.y"
+#line 2077 "program_parse.y"
{
if (state->mode == ARB_vertex) {
(yyval.integer) = VERT_RESULT_BFC0;
@@ -4741,7 +4742,7 @@ yyreduce:
case 262:
/* Line 1455 of yacc.c */
-#line 2087 "program_parse.y"
+#line 2088 "program_parse.y"
{
(yyval.integer) = 0;
;}
@@ -4750,7 +4751,7 @@ yyreduce:
case 263:
/* Line 1455 of yacc.c */
-#line 2091 "program_parse.y"
+#line 2092 "program_parse.y"
{
if (state->mode == ARB_vertex) {
(yyval.integer) = 0;
@@ -4764,7 +4765,7 @@ yyreduce:
case 264:
/* Line 1455 of yacc.c */
-#line 2100 "program_parse.y"
+#line 2101 "program_parse.y"
{
if (state->mode == ARB_vertex) {
(yyval.integer) = 1;
@@ -4778,91 +4779,91 @@ yyreduce:
case 265:
/* Line 1455 of yacc.c */
-#line 2110 "program_parse.y"
+#line 2111 "program_parse.y"
{ (yyval.integer) = 0; ;}
break;
case 266:
/* Line 1455 of yacc.c */
-#line 2111 "program_parse.y"
+#line 2112 "program_parse.y"
{ (yyval.integer) = 0; ;}
break;
case 267:
/* Line 1455 of yacc.c */
-#line 2112 "program_parse.y"
+#line 2113 "program_parse.y"
{ (yyval.integer) = 1; ;}
break;
case 268:
/* Line 1455 of yacc.c */
-#line 2115 "program_parse.y"
+#line 2116 "program_parse.y"
{ (yyval.integer) = 0; ;}
break;
case 269:
/* Line 1455 of yacc.c */
-#line 2116 "program_parse.y"
+#line 2117 "program_parse.y"
{ (yyval.integer) = 0; ;}
break;
case 270:
/* Line 1455 of yacc.c */
-#line 2117 "program_parse.y"
+#line 2118 "program_parse.y"
{ (yyval.integer) = 1; ;}
break;
case 271:
/* Line 1455 of yacc.c */
-#line 2120 "program_parse.y"
+#line 2121 "program_parse.y"
{ (yyval.integer) = 0; ;}
break;
case 272:
/* Line 1455 of yacc.c */
-#line 2121 "program_parse.y"
+#line 2122 "program_parse.y"
{ (yyval.integer) = (yyvsp[(2) - (3)].integer); ;}
break;
case 273:
/* Line 1455 of yacc.c */
-#line 2124 "program_parse.y"
+#line 2125 "program_parse.y"
{ (yyval.integer) = 0; ;}
break;
case 274:
/* Line 1455 of yacc.c */
-#line 2125 "program_parse.y"
+#line 2126 "program_parse.y"
{ (yyval.integer) = (yyvsp[(2) - (3)].integer); ;}
break;
case 275:
/* Line 1455 of yacc.c */
-#line 2128 "program_parse.y"
+#line 2129 "program_parse.y"
{ (yyval.integer) = 0; ;}
break;
case 276:
/* Line 1455 of yacc.c */
-#line 2129 "program_parse.y"
+#line 2130 "program_parse.y"
{ (yyval.integer) = (yyvsp[(2) - (3)].integer); ;}
break;
case 277:
/* Line 1455 of yacc.c */
-#line 2133 "program_parse.y"
+#line 2134 "program_parse.y"
{
if ((unsigned) (yyvsp[(1) - (1)].integer) >= state->MaxTextureCoordUnits) {
yyerror(& (yylsp[(1) - (1)]), state, "invalid texture coordinate unit selector");
@@ -4876,7 +4877,7 @@ yyreduce:
case 278:
/* Line 1455 of yacc.c */
-#line 2144 "program_parse.y"
+#line 2145 "program_parse.y"
{
if ((unsigned) (yyvsp[(1) - (1)].integer) >= state->MaxTextureImageUnits) {
yyerror(& (yylsp[(1) - (1)]), state, "invalid texture image unit selector");
@@ -4890,7 +4891,7 @@ yyreduce:
case 279:
/* Line 1455 of yacc.c */
-#line 2155 "program_parse.y"
+#line 2156 "program_parse.y"
{
if ((unsigned) (yyvsp[(1) - (1)].integer) >= state->MaxTextureUnits) {
yyerror(& (yylsp[(1) - (1)]), state, "invalid texture unit selector");
@@ -4904,7 +4905,7 @@ yyreduce:
case 280:
/* Line 1455 of yacc.c */
-#line 2166 "program_parse.y"
+#line 2167 "program_parse.y"
{
struct asm_symbol *exist = (struct asm_symbol *)
_mesa_symbol_table_find_symbol(state->st, 0, (yyvsp[(2) - (4)].string));
@@ -4933,7 +4934,7 @@ yyreduce:
/* Line 1455 of yacc.c */
-#line 4937 "program_parse.tab.c"
+#line 4938 "program_parse.tab.c"
default: break;
}
YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
@@ -5152,7 +5153,7 @@ yyreturn:
/* Line 1675 of yacc.c */
-#line 2195 "program_parse.y"
+#line 2196 "program_parse.y"
void
diff --git a/src/mesa/program/program_parse.y b/src/mesa/program/program_parse.y
index 861927c744c..fb6ef85a9fc 100644
--- a/src/mesa/program/program_parse.y
+++ b/src/mesa/program/program_parse.y
@@ -835,6 +835,7 @@ srcReg: USED_IDENTIFIER /* temporaryReg | progParamSingle */
$$.Base.File = $1->param_binding_type;
if ($3.Base.RelAddr) {
+ state->prog->IndirectRegisterFiles |= (1 << $$.Base.File);
$1->param_accessed_indirectly = 1;
$$.Base.RelAddr = 1;
@@ -934,7 +935,7 @@ addrRegRelOffset: { $$ = 0; }
addrRegPosOffset: INTEGER
{
- if (($1 < 0) || ($1 > 63)) {
+ if (($1 < 0) || ($1 > 4095)) {
char s[100];
_mesa_snprintf(s, sizeof(s),
"relative address offset too large (%d)", $1);
@@ -948,7 +949,7 @@ addrRegPosOffset: INTEGER
addrRegNegOffset: INTEGER
{
- if (($1 < 0) || ($1 > 64)) {
+ if (($1 < 0) || ($1 > 4096)) {
char s[100];
_mesa_snprintf(s, sizeof(s),
"relative address offset too large (%d)", $1);
diff --git a/src/mesa/program/programopt.h b/src/mesa/program/programopt.h
index 21fac07849a..4af6357f976 100644
--- a/src/mesa/program/programopt.h
+++ b/src/mesa/program/programopt.h
@@ -26,6 +26,7 @@
#ifndef PROGRAMOPT_H
#define PROGRAMOPT_H 1
+#include "main/mtypes.h"
extern void
_mesa_insert_mvp_code(GLcontext *ctx, struct gl_vertex_program *vprog);
diff --git a/src/mesa/slang/library/slang_common_builtin.gc b/src/mesa/slang/library/slang_common_builtin.gc
index d75354deffe..1f5ddbc1ee2 100644
--- a/src/mesa/slang/library/slang_common_builtin.gc
+++ b/src/mesa/slang/library/slang_common_builtin.gc
@@ -411,7 +411,7 @@ float atan(const float y, const float x)
if (abs(x) > 1.0e-4) {
r = atan(y / x);
if (x < 0.0) {
- r = r + sign(y) * 3.141593;
+ r = r + 3.141593 - 6.283186 * float(y < 0.0);
}
}
else {
diff --git a/src/mesa/slang/slang_builtin.h b/src/mesa/slang/slang_builtin.h
index ed9ae80b3c3..dc92f83f8ef 100644
--- a/src/mesa/slang/slang_builtin.h
+++ b/src/mesa/slang/slang_builtin.h
@@ -26,8 +26,8 @@
#ifndef SLANG_BUILTIN_H
#define SLANG_BUILTIN_H
-#include "program/prog_parameter.h"
-#include "slang_utility.h"
+#include "main/glheader.h"
+#include "main/mtypes.h"
#include "slang_ir.h"
diff --git a/src/mesa/slang/slang_codegen.h b/src/mesa/slang/slang_codegen.h
index 461633fe346..ff0279bbfed 100644
--- a/src/mesa/slang/slang_codegen.h
+++ b/src/mesa/slang/slang_codegen.h
@@ -27,9 +27,13 @@
#define SLANG_CODEGEN_H
-#include "main/imports.h"
+#include "main/glheader.h"
#include "slang_compile.h"
+#include "slang_compile_variable.h"
+#include "slang_typeinfo.h"
+#include "slang_utility.h"
+struct slang_function_;
#define MAX_LOOP_DEPTH 30
diff --git a/src/mesa/slang/slang_compile.c b/src/mesa/slang/slang_compile.c
index 12ab4666aed..de1bb56cd9a 100644
--- a/src/mesa/slang/slang_compile.c
+++ b/src/mesa/slang/slang_compile.c
@@ -36,6 +36,7 @@
#include "program/prog_print.h"
#include "program/prog_parameter.h"
#include "../../glsl/pp/sl_pp_public.h"
+#include "../../glsl/pp/sl_pp_purify.h"
#include "../../glsl/cl/sl_cl_parse.h"
#include "slang_codegen.h"
#include "slang_compile.h"
diff --git a/src/mesa/slang/slang_compile.h b/src/mesa/slang/slang_compile.h
index 71fcaa39931..6061f878e75 100644
--- a/src/mesa/slang/slang_compile.h
+++ b/src/mesa/slang/slang_compile.h
@@ -25,13 +25,14 @@
#if !defined SLANG_COMPILE_H
#define SLANG_COMPILE_H
-#include "main/imports.h"
+#include "main/glheader.h"
#include "main/mtypes.h"
-#include "slang_typeinfo.h"
-#include "slang_compile_variable.h"
-#include "slang_compile_struct.h"
-#include "slang_compile_operation.h"
#include "slang_compile_function.h"
+#include "slang_compile_struct.h"
+#include "slang_compile_variable.h"
+#include "slang_utility.h"
+
+struct slang_code_object_;
#if defined __cplusplus
extern "C" {
diff --git a/src/mesa/slang/slang_compile_function.h b/src/mesa/slang/slang_compile_function.h
index a5445ec2537..0eced3ca1a1 100644
--- a/src/mesa/slang/slang_compile_function.h
+++ b/src/mesa/slang/slang_compile_function.h
@@ -25,6 +25,14 @@
#ifndef SLANG_COMPILE_FUNCTION_H
#define SLANG_COMPILE_FUNCTION_H
+#include "main/glheader.h"
+#include "slang_compile_operation.h"
+#include "slang_compile_variable.h"
+#include "slang_log.h"
+#include "slang_utility.h"
+
+struct slang_name_space_;
+struct slang_operation_;
/**
* Types of functions.
diff --git a/src/mesa/slang/slang_compile_operation.h b/src/mesa/slang/slang_compile_operation.h
index 1f15c198963..b8c5f214cf0 100644
--- a/src/mesa/slang/slang_compile_operation.h
+++ b/src/mesa/slang/slang_compile_operation.h
@@ -26,6 +26,10 @@
#define SLANG_COMPILE_OPERATION_H
+#include "main/glheader.h"
+#include "slang_compile_variable.h"
+#include "slang_utility.h"
+
/**
* Types of slang operations.
* These are the types of the AST (abstract syntax tree) nodes.
diff --git a/src/mesa/slang/slang_compile_struct.h b/src/mesa/slang/slang_compile_struct.h
index 90c5512f4d3..7be6f204e11 100644
--- a/src/mesa/slang/slang_compile_struct.h
+++ b/src/mesa/slang/slang_compile_struct.h
@@ -29,6 +29,9 @@
extern "C" {
#endif
+#include "main/glheader.h"
+#include "slang_utility.h"
+
struct slang_function_;
typedef struct slang_struct_scope_
diff --git a/src/mesa/slang/slang_compile_variable.h b/src/mesa/slang/slang_compile_variable.h
index 5c9d248b354..48dc6efca4b 100644
--- a/src/mesa/slang/slang_compile_variable.h
+++ b/src/mesa/slang/slang_compile_variable.h
@@ -26,7 +26,9 @@
#define SLANG_COMPILE_VARIABLE_H
-struct slang_ir_storage_;
+#include "main/glheader.h"
+#include "slang_typeinfo.h"
+#include "slang_utility.h"
/**
diff --git a/src/mesa/slang/slang_emit.h b/src/mesa/slang/slang_emit.h
index ab4c202d673..f93d6b00d69 100644
--- a/src/mesa/slang/slang_emit.h
+++ b/src/mesa/slang/slang_emit.h
@@ -25,11 +25,9 @@
#ifndef SLANG_EMIT_H
#define SLANG_EMIT_H
-
-#include "main/imports.h"
-#include "slang_compile.h"
+#include "main/glheader.h"
#include "slang_ir.h"
-#include "main/mtypes.h"
+#include "slang_vartable.h"
extern GLuint
diff --git a/src/mesa/slang/slang_ir.h b/src/mesa/slang/slang_ir.h
index b7a373746b4..ce9a6c5a483 100644
--- a/src/mesa/slang/slang_ir.h
+++ b/src/mesa/slang/slang_ir.h
@@ -37,6 +37,7 @@
#include "slang_compile.h"
#include "slang_label.h"
#include "main/mtypes.h"
+#include "program/prog_instruction.h"
/**
diff --git a/src/mesa/slang/slang_label.c b/src/mesa/slang/slang_label.c
index 8e3a8ebc1aa..24881d5b6e6 100644
--- a/src/mesa/slang/slang_label.c
+++ b/src/mesa/slang/slang_label.c
@@ -7,6 +7,8 @@
*/
+#include "main/mtypes.h"
+#include "program/prog_instruction.h"
#include "slang_label.h"
#include "slang_mem.h"
diff --git a/src/mesa/slang/slang_label.h b/src/mesa/slang/slang_label.h
index 4d04df18d25..b0cff3a8e89 100644
--- a/src/mesa/slang/slang_label.h
+++ b/src/mesa/slang/slang_label.h
@@ -1,10 +1,9 @@
#ifndef SLANG_LABEL_H
#define SLANG_LABEL_H 1
-#include "main/imports.h"
-#include "main/mtypes.h"
-#include "program/prog_instruction.h"
+#include "main/glheader.h"
+struct gl_program;
struct slang_label_
{
diff --git a/src/mesa/slang/slang_link.c b/src/mesa/slang/slang_link.c
index 00c2c13cc67..c21f67256a5 100644
--- a/src/mesa/slang/slang_link.c
+++ b/src/mesa/slang/slang_link.c
@@ -756,6 +756,8 @@ _slang_update_inputs_outputs(struct gl_program *prog)
prog->InputsRead = 0x0;
prog->OutputsWritten = 0x0;
+ prog->IndirectRegisterFiles = 0x0;
+
for (i = 0; i < prog->NumInstructions; i++) {
const struct prog_instruction *inst = prog->Instructions + i;
const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
@@ -774,6 +776,9 @@ _slang_update_inputs_outputs(struct gl_program *prog)
else if (inst->SrcReg[j].File == PROGRAM_ADDRESS) {
maxAddrReg = MAX2(maxAddrReg, (GLuint) (inst->SrcReg[j].Index + 1));
}
+
+ if (inst->SrcReg[j].RelAddr)
+ prog->IndirectRegisterFiles |= (1 << inst->SrcReg[j].File);
}
if (inst->DstReg.File == PROGRAM_OUTPUT) {
@@ -784,6 +789,8 @@ _slang_update_inputs_outputs(struct gl_program *prog)
else if (inst->DstReg.File == PROGRAM_ADDRESS) {
maxAddrReg = MAX2(maxAddrReg, inst->DstReg.Index + 1);
}
+ if (inst->DstReg.RelAddr)
+ prog->IndirectRegisterFiles |= (1 << inst->DstReg.File);
}
prog->NumAddressRegs = maxAddrReg;
}
@@ -1199,11 +1206,11 @@ _slang_link(GLcontext *ctx,
vertNotify = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
&shProg->FragmentProgram->Base);
if (ctx->Shader.Flags & GLSL_DUMP) {
- printf("Mesa pre-link fragment program:\n");
+ fprintf(stderr, "Mesa pre-link fragment program:\n");
_mesa_print_program(&fragProg->Base);
_mesa_print_program_parameters(ctx, &fragProg->Base);
- printf("Mesa post-link fragment program:\n");
+ fprintf(stderr, "Mesa post-link fragment program:\n");
_mesa_print_program(&shProg->FragmentProgram->Base);
_mesa_print_program_parameters(ctx, &shProg->FragmentProgram->Base);
}
@@ -1222,11 +1229,11 @@ _slang_link(GLcontext *ctx,
geomNotify = ctx->Driver.ProgramStringNotify(ctx, MESA_GEOMETRY_PROGRAM,
&shProg->GeometryProgram->Base);
if (ctx->Shader.Flags & GLSL_DUMP) {
- printf("Mesa pre-link geometry program:\n");
+ fprintf(stderr, "Mesa pre-link geometry program:\n");
_mesa_print_program(&geomProg->Base);
_mesa_print_program_parameters(ctx, &geomProg->Base);
- printf("Mesa post-link geometry program:\n");
+ fprintf(stderr, "Mesa post-link geometry program:\n");
_mesa_print_program(&shProg->GeometryProgram->Base);
_mesa_print_program_parameters(ctx, &shProg->GeometryProgram->Base);
}
@@ -1240,11 +1247,11 @@ _slang_link(GLcontext *ctx,
fragNotify = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
&shProg->VertexProgram->Base);
if (ctx->Shader.Flags & GLSL_DUMP) {
- printf("Mesa pre-link vertex program:\n");
+ fprintf(stderr, "Mesa pre-link vertex program:\n");
_mesa_print_program(&vertProg->Base);
_mesa_print_program_parameters(ctx, &vertProg->Base);
- printf("Mesa post-link vertex program:\n");
+ fprintf(stderr, "Mesa post-link vertex program:\n");
_mesa_print_program(&shProg->VertexProgram->Base);
_mesa_print_program_parameters(ctx, &shProg->VertexProgram->Base);
}
@@ -1259,10 +1266,10 @@ _slang_link(GLcontext *ctx,
}
if (ctx->Shader.Flags & GLSL_DUMP) {
- printf("Varying vars:\n");
+ fprintf(stderr, "Varying vars:\n");
_mesa_print_parameter_list(shProg->Varying);
if (shProg->InfoLog) {
- printf("Info Log: %s\n", shProg->InfoLog);
+ fprintf(stderr, "Info Log: %s\n", shProg->InfoLog);
}
}
diff --git a/src/mesa/slang/slang_link.h b/src/mesa/slang/slang_link.h
index 2b44d20787a..3e9fa2d743d 100644
--- a/src/mesa/slang/slang_link.h
+++ b/src/mesa/slang/slang_link.h
@@ -25,7 +25,7 @@
#ifndef SLANG_LINK_H
#define SLANG_LINK_H 1
-#include "slang_compile.h"
+#include "main/mtypes.h"
extern void
diff --git a/src/mesa/slang/slang_log.h b/src/mesa/slang/slang_log.h
index dcaba0285a7..544a26654e7 100644
--- a/src/mesa/slang/slang_log.h
+++ b/src/mesa/slang/slang_log.h
@@ -27,6 +27,8 @@
#define SLANG_LOG_H
+#include "main/glheader.h"
+
typedef struct slang_info_log_
{
char *text;
diff --git a/src/mesa/slang/slang_print.h b/src/mesa/slang/slang_print.h
index 46605c80610..99da3041437 100644
--- a/src/mesa/slang/slang_print.h
+++ b/src/mesa/slang/slang_print.h
@@ -3,6 +3,12 @@
#ifndef SLANG_PRINT
#define SLANG_PRINT
+#include "main/glheader.h"
+#include "slang_compile_function.h"
+#include "slang_compile_operation.h"
+#include "slang_compile_variable.h"
+#include "slang_typeinfo.h"
+
extern void
slang_print_function(const slang_function *f, GLboolean body);
diff --git a/src/mesa/slang/slang_simplify.h b/src/mesa/slang/slang_simplify.h
index 8689c23b1a0..37fb938d4fb 100644
--- a/src/mesa/slang/slang_simplify.h
+++ b/src/mesa/slang/slang_simplify.h
@@ -26,6 +26,13 @@
#define SLANG_SIMPLIFY_H
+#include "main/glheader.h"
+#include "slang_compile.h"
+#include "slang_compile_function.h"
+#include "slang_compile_operation.h"
+#include "slang_log.h"
+#include "slang_utility.h"
+
extern GLint
_slang_lookup_constant(const char *name);
diff --git a/src/mesa/slang/slang_utility.h b/src/mesa/slang/slang_utility.h
index 2c0d0bcbb2a..cb9b6d2aaaa 100644
--- a/src/mesa/slang/slang_utility.h
+++ b/src/mesa/slang/slang_utility.h
@@ -26,6 +26,8 @@
#define SLANG_UTILITY_H
+#include "main/glheader.h"
+
/* Compile-time assertions. If the expression is zero, try to declare an
* array of size [-1] to cause compilation error.
*/
diff --git a/src/mesa/slang/slang_vartable.h b/src/mesa/slang/slang_vartable.h
index 94bcd63f45a..97945b89d03 100644
--- a/src/mesa/slang/slang_vartable.h
+++ b/src/mesa/slang/slang_vartable.h
@@ -2,6 +2,9 @@
#ifndef SLANG_VARTABLE_H
#define SLANG_VARTABLE_H
+#include "main/glheader.h"
+#include "slang_utility.h"
+
struct slang_ir_storage_;
typedef struct slang_var_table_ slang_var_table;
diff --git a/src/mesa/state_tracker/st_atom.h b/src/mesa/state_tracker/st_atom.h
index 1f0fef63df5..c7a04951bff 100644
--- a/src/mesa/state_tracker/st_atom.h
+++ b/src/mesa/state_tracker/st_atom.h
@@ -34,6 +34,8 @@
#ifndef ST_ATOM_H
#define ST_ATOM_H
+#include "main/glheader.h"
+
struct st_context;
struct st_tracked_state;
diff --git a/src/mesa/state_tracker/st_atom_constbuf.h b/src/mesa/state_tracker/st_atom_constbuf.h
index f707534e2cf..97b076629ee 100644
--- a/src/mesa/state_tracker/st_atom_constbuf.h
+++ b/src/mesa/state_tracker/st_atom_constbuf.h
@@ -29,6 +29,9 @@
#ifndef ST_ATOM_CONSTBUF_H
#define ST_ATOM_CONSTBUF_H
+struct gl_program_parameter_list;
+struct st_context;
+
void st_upload_constants( struct st_context *st,
struct gl_program_parameter_list *params,
diff --git a/src/mesa/state_tracker/st_atom_depth.c b/src/mesa/state_tracker/st_atom_depth.c
index 3c07afba9aa..1616e945fea 100644
--- a/src/mesa/state_tracker/st_atom_depth.c
+++ b/src/mesa/state_tracker/st_atom_depth.c
@@ -33,6 +33,8 @@
*/
+#include <assert.h>
+
#include "st_context.h"
#include "st_atom.h"
#include "pipe/p_context.h"
diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c
index b88c74fa03a..8a8d17599ec 100644
--- a/src/mesa/state_tracker/st_atom_pixeltransfer.c
+++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c
@@ -37,6 +37,7 @@
#include "main/image.h"
#include "main/macros.h"
#include "program/program.h"
+#include "program/prog_cache.h"
#include "program/prog_instruction.h"
#include "program/prog_parameter.h"
#include "program/prog_print.h"
diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c
index cebaad5f000..05442ef91b5 100644
--- a/src/mesa/state_tracker/st_atom_shader.c
+++ b/src/mesa/state_tracker/st_atom_shader.c
@@ -40,7 +40,6 @@
#include "program/program.h"
#include "pipe/p_context.h"
-#include "pipe/p_shader_tokens.h"
#include "util/u_simple_shaders.h"
diff --git a/src/mesa/state_tracker/st_atom_shader.h b/src/mesa/state_tracker/st_atom_shader.h
index 8403bc66c92..56d4c68f4f7 100644
--- a/src/mesa/state_tracker/st_atom_shader.h
+++ b/src/mesa/state_tracker/st_atom_shader.h
@@ -30,6 +30,9 @@
#define ST_ATOM_SHADER_H
+struct st_context;
+struct translated_vertex_program;
+
extern void
st_free_translated_vertex_programs(struct st_context *st,
struct translated_vertex_program *xvp);
diff --git a/src/mesa/state_tracker/st_atom_stipple.c b/src/mesa/state_tracker/st_atom_stipple.c
index 31e124b3293..ecdd9f06f6a 100644
--- a/src/mesa/state_tracker/st_atom_stipple.c
+++ b/src/mesa/state_tracker/st_atom_stipple.c
@@ -33,6 +33,8 @@
*/
+#include <assert.h>
+
#include "st_context.h"
#include "st_atom.h"
#include "pipe/p_context.h"
diff --git a/src/mesa/state_tracker/st_cache.h b/src/mesa/state_tracker/st_cache.h
index b81de316ec9..6d5de7b13ad 100644
--- a/src/mesa/state_tracker/st_cache.h
+++ b/src/mesa/state_tracker/st_cache.h
@@ -33,10 +33,11 @@
#ifndef ST_CACHE_H
#define ST_CACHE_H
-#include "cso_cache/cso_cache.h"
-
struct pipe_blend_state;
+struct pipe_depth_stencil_alpha_state;
+struct pipe_rasterizer_state;
struct pipe_sampler_state;
+struct pipe_shader_state;
struct st_context;
diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
index ba600ccef6d..0b8ecd27cb9 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -46,6 +46,7 @@
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
#include "util/u_inlines.h"
#include "util/u_draw_quad.h"
#include "util/u_simple_shaders.h"
diff --git a/src/mesa/state_tracker/st_cb_bitmap.h b/src/mesa/state_tracker/st_cb_bitmap.h
index 8af975b74fc..d04b2b67795 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.h
+++ b/src/mesa/state_tracker/st_cb_bitmap.h
@@ -30,7 +30,10 @@
#define ST_CB_BITMAP_H
-#include "main/mtypes.h"
+#include "main/compiler.h"
+
+struct dd_function_table;
+struct st_context;
#if FEATURE_drawpix
diff --git a/src/mesa/state_tracker/st_cb_blit.h b/src/mesa/state_tracker/st_cb_blit.h
index 7ab9a54df90..c230652cefc 100644
--- a/src/mesa/state_tracker/st_cb_blit.h
+++ b/src/mesa/state_tracker/st_cb_blit.h
@@ -29,8 +29,10 @@
#define ST_CB_BLIT_H
-#include "main/mtypes.h"
-#include "st_context.h"
+#include "main/compiler.h"
+
+struct dd_function_table;
+struct st_context;
extern void
diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.h b/src/mesa/state_tracker/st_cb_bufferobjects.h
index a27daac2bf0..1c991d20837 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.h
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.h
@@ -28,9 +28,12 @@
#ifndef ST_CB_BUFFEROBJECTS_H
#define ST_CB_BUFFEROBJECTS_H
-struct st_context;
-struct gl_buffer_object;
+#include "main/compiler.h"
+#include "main/mtypes.h"
+
+struct dd_function_table;
struct pipe_resource;
+struct st_context;
/**
* State_tracker vertex/pixel buffer object, derived from Mesa's
diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c
index ea2414c4a00..246ab2e9579 100644
--- a/src/mesa/state_tracker/st_cb_clear.c
+++ b/src/mesa/state_tracker/st_cb_clear.c
@@ -45,6 +45,7 @@
#include "st_program.h"
#include "pipe/p_context.h"
+#include "pipe/p_shader_tokens.h"
#include "pipe/p_state.h"
#include "pipe/p_defines.h"
#include "util/u_format.h"
diff --git a/src/mesa/state_tracker/st_cb_clear.h b/src/mesa/state_tracker/st_cb_clear.h
index bc035ac25ca..b27c09d10e4 100644
--- a/src/mesa/state_tracker/st_cb_clear.h
+++ b/src/mesa/state_tracker/st_cb_clear.h
@@ -30,6 +30,9 @@
#define ST_CB_CLEAR_H
+struct dd_function_table;
+struct st_context;
+
extern void
st_init_clear(struct st_context *st);
diff --git a/src/mesa/state_tracker/st_cb_condrender.h b/src/mesa/state_tracker/st_cb_condrender.h
index 891f1cbcd8c..79d0db8d08a 100644
--- a/src/mesa/state_tracker/st_cb_condrender.h
+++ b/src/mesa/state_tracker/st_cb_condrender.h
@@ -29,6 +29,8 @@
#define ST_CB_CONDRENDER_H
+struct dd_function_table;
+
extern void st_init_cond_render_functions(struct dd_function_table *functions);
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.h b/src/mesa/state_tracker/st_cb_drawpixels.h
index 7d5e901ccc5..575f169e08e 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.h
+++ b/src/mesa/state_tracker/st_cb_drawpixels.h
@@ -30,7 +30,10 @@
#define ST_CB_DRAWPIXELS_H
-#include "main/mtypes.h"
+#include "main/compiler.h"
+
+struct dd_function_table;
+struct st_context;
#if FEATURE_drawpix
diff --git a/src/mesa/state_tracker/st_cb_drawtex.c b/src/mesa/state_tracker/st_cb_drawtex.c
index b191a7f8902..c99a8d792ed 100644
--- a/src/mesa/state_tracker/st_cb_drawtex.c
+++ b/src/mesa/state_tracker/st_cb_drawtex.c
@@ -14,7 +14,6 @@
#include "main/imports.h"
#include "main/image.h"
-#include "main/bufferobj.h"
#include "main/macros.h"
#include "program/program.h"
#include "program/prog_print.h"
diff --git a/src/mesa/state_tracker/st_cb_drawtex.h b/src/mesa/state_tracker/st_cb_drawtex.h
index a3f54a349cc..d21262f8977 100644
--- a/src/mesa/state_tracker/st_cb_drawtex.h
+++ b/src/mesa/state_tracker/st_cb_drawtex.h
@@ -10,7 +10,10 @@
#define ST_CB_DRAWTEX_H
-#include "main/mtypes.h"
+#include "main/compiler.h"
+
+struct dd_function_table;
+struct st_context;
#if FEATURE_OES_draw_texture
diff --git a/src/mesa/state_tracker/st_cb_eglimage.c b/src/mesa/state_tracker/st_cb_eglimage.c
index 4aaf91d5a19..037e576fabe 100644
--- a/src/mesa/state_tracker/st_cb_eglimage.c
+++ b/src/mesa/state_tracker/st_cb_eglimage.c
@@ -33,6 +33,7 @@
#include "util/u_format.h"
#include "st_cb_eglimage.h"
#include "st_cb_fbo.h"
+#include "st_context.h"
#include "st_texture.h"
#include "st_format.h"
#include "st_manager.h"
diff --git a/src/mesa/state_tracker/st_cb_eglimage.h b/src/mesa/state_tracker/st_cb_eglimage.h
index d6953e99f69..b6e44d5aff5 100644
--- a/src/mesa/state_tracker/st_cb_eglimage.h
+++ b/src/mesa/state_tracker/st_cb_eglimage.h
@@ -29,8 +29,9 @@
#ifndef ST_CB_EGLIMAGE_H
#define ST_CB_EGLIMAGE_H
-#include "main/mtypes.h"
-#include "main/dd.h"
+#include "main/compiler.h"
+
+struct dd_function_table;
#if FEATURE_OES_EGL_image
diff --git a/src/mesa/state_tracker/st_cb_fbo.h b/src/mesa/state_tracker/st_cb_fbo.h
index 43b6c1e75f4..62a9bbcb25f 100644
--- a/src/mesa/state_tracker/st_cb_fbo.h
+++ b/src/mesa/state_tracker/st_cb_fbo.h
@@ -29,6 +29,15 @@
#ifndef ST_CB_FBO_H
#define ST_CB_FBO_H
+#include "main/compiler.h"
+#include "main/glheader.h"
+#include "main/mtypes.h"
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_format.h"
+
+struct dd_function_table;
+struct pipe_context;
/**
* Derived renderbuffer class. Just need to add a pointer to the
diff --git a/src/mesa/state_tracker/st_cb_feedback.h b/src/mesa/state_tracker/st_cb_feedback.h
index 706d84960f7..f2342f58238 100644
--- a/src/mesa/state_tracker/st_cb_feedback.h
+++ b/src/mesa/state_tracker/st_cb_feedback.h
@@ -30,7 +30,9 @@
#define ST_CB_FEEDBACK_H
-#include "main/mtypes.h"
+#include "main/compiler.h"
+
+struct dd_function_table;
#if FEATURE_feedback
diff --git a/src/mesa/state_tracker/st_cb_flush.h b/src/mesa/state_tracker/st_cb_flush.h
index 7fca0176a30..7672b4cf1da 100644
--- a/src/mesa/state_tracker/st_cb_flush.h
+++ b/src/mesa/state_tracker/st_cb_flush.h
@@ -30,6 +30,12 @@
#define ST_CB_FLUSH_H
+#include "pipe/p_compiler.h"
+
+struct dd_function_table;
+struct pipe_fence_handle;
+struct st_context;
+
extern void
st_init_flush_functions(struct dd_function_table *functions);
diff --git a/src/mesa/state_tracker/st_cb_program.h b/src/mesa/state_tracker/st_cb_program.h
index 0de96f2fd22..0fd179ef3df 100644
--- a/src/mesa/state_tracker/st_cb_program.h
+++ b/src/mesa/state_tracker/st_cb_program.h
@@ -29,6 +29,10 @@
#define ST_CB_PROGRAM_H
+#include "main/mtypes.h"
+
+struct dd_function_table;
+
extern void
st_init_program_functions(struct dd_function_table *functions);
diff --git a/src/mesa/state_tracker/st_cb_rasterpos.h b/src/mesa/state_tracker/st_cb_rasterpos.h
index d2ed7297f15..2dc109bb184 100644
--- a/src/mesa/state_tracker/st_cb_rasterpos.h
+++ b/src/mesa/state_tracker/st_cb_rasterpos.h
@@ -29,7 +29,9 @@
#define ST_CB_RASTERPOS_H
-#include "main/mtypes.h"
+#include "main/compiler.h"
+
+struct dd_function_table;
#if FEATURE_rastpos
diff --git a/src/mesa/state_tracker/st_cb_readpixels.h b/src/mesa/state_tracker/st_cb_readpixels.h
index c90ef029062..9e1f7b4925e 100644
--- a/src/mesa/state_tracker/st_cb_readpixels.h
+++ b/src/mesa/state_tracker/st_cb_readpixels.h
@@ -29,6 +29,10 @@
#ifndef ST_CB_READPIXELS_H
#define ST_CB_READPIXELS_H
+#include "main/mtypes.h"
+
+struct dd_function_table;
+
extern struct st_renderbuffer *
st_get_color_read_renderbuffer(GLcontext *ctx);
diff --git a/src/mesa/state_tracker/st_cb_strings.h b/src/mesa/state_tracker/st_cb_strings.h
index 3b765aaa592..92d5d2d9ba7 100644
--- a/src/mesa/state_tracker/st_cb_strings.h
+++ b/src/mesa/state_tracker/st_cb_strings.h
@@ -30,6 +30,8 @@
#define ST_CB_STRINGS_H
+struct dd_function_table;
+
extern void
st_init_string_functions(struct dd_function_table *functions);
diff --git a/src/mesa/state_tracker/st_cb_texture.h b/src/mesa/state_tracker/st_cb_texture.h
index 1cd9fc3a50f..6942478e815 100644
--- a/src/mesa/state_tracker/st_cb_texture.h
+++ b/src/mesa/state_tracker/st_cb_texture.h
@@ -30,6 +30,13 @@
#define ST_CB_TEXTURE_H
+#include "main/glheader.h"
+#include "main/mtypes.h"
+
+struct dd_function_table;
+struct pipe_context;
+struct st_context;
+
extern GLboolean
st_finalize_texture(GLcontext *ctx,
struct pipe_context *pipe,
diff --git a/src/mesa/state_tracker/st_cb_viewport.h b/src/mesa/state_tracker/st_cb_viewport.h
index db7dd6eab82..bcfd7cb68af 100644
--- a/src/mesa/state_tracker/st_cb_viewport.h
+++ b/src/mesa/state_tracker/st_cb_viewport.h
@@ -25,5 +25,12 @@
*
**************************************************************************/
+#ifndef ST_CB_VIEWPORT_H
+#define ST_CB_VIEWPORT_H
+
+struct dd_function_table;
+
extern void
st_init_viewport_functions(struct dd_function_table *functions);
+
+#endif /* ST_CB_VIEW_PORT_H */
diff --git a/src/mesa/state_tracker/st_cb_xformfb.h b/src/mesa/state_tracker/st_cb_xformfb.h
index 50efcb9293f..574cf481e18 100644
--- a/src/mesa/state_tracker/st_cb_xformfb.h
+++ b/src/mesa/state_tracker/st_cb_xformfb.h
@@ -29,6 +29,10 @@
#define ST_CB_XFORMFB_H
+#include "main/compiler.h"
+
+struct dd_function_table;
+
#if FEATURE_EXT_transform_feedback
extern void
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index 7eb5f32611d..2ce5f087536 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -28,6 +28,7 @@
#include "main/imports.h"
#include "main/context.h"
#include "main/shaderobj.h"
+#include "program/prog_cache.h"
#include "vbo/vbo.h"
#include "glapi/glapi.h"
#include "st_context.h"
@@ -62,6 +63,9 @@
#include "cso_cache/cso_context.h"
+DEBUG_GET_ONCE_BOOL_OPTION(mesa_mvp_dp4, "MESA_MVP_DP4", FALSE)
+
+
/**
* Called via ctx->Driver.UpdateState()
*/
@@ -169,7 +173,7 @@ struct st_context *st_create_context(gl_api api, struct pipe_context *pipe,
/* XXX: need a capability bit in gallium to query if the pipe
* driver prefers DP4 or MUL/MAD for vertex transformation.
*/
- if (debug_get_bool_option("MESA_MVP_DP4", FALSE))
+ if (debug_get_option_mesa_mvp_dp4())
_mesa_set_mvp_with_dp4( ctx, GL_TRUE );
return st_create_context_priv(ctx, pipe);
diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
index a147a021176..60c25fb8f00 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -1,3 +1,4 @@
+//struct dd_function_table;
/**************************************************************************
*
* Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
@@ -29,21 +30,17 @@
#define ST_CONTEXT_H
#include "main/mtypes.h"
-#include "program/prog_cache.h"
#include "pipe/p_state.h"
#include "state_tracker/st_api.h"
-
-struct st_context;
-struct st_texture_object;
-struct st_fragment_program;
+struct bitmap_cache;
+struct blit_state;
+struct dd_function_table;
struct draw_context;
struct draw_stage;
-struct cso_cache;
-struct cso_blend;
struct gen_mipmap_state;
-struct blit_state;
-struct bitmap_cache;
+struct st_context;
+struct st_fragment_program;
#define ST_NEW_MESA 0x1 /* Mesa state has changed */
diff --git a/src/mesa/state_tracker/st_debug.c b/src/mesa/state_tracker/st_debug.c
index ebf6ec6e7e2..df32491d044 100644
--- a/src/mesa/state_tracker/st_debug.c
+++ b/src/mesa/state_tracker/st_debug.c
@@ -55,6 +55,8 @@ static const struct debug_named_value st_debug_flags[] = {
{ "query", DEBUG_QUERY, NULL },
DEBUG_NAMED_VALUE_END
};
+
+DEBUG_GET_ONCE_FLAGS_OPTION(st_debug, "ST_DEBUG", st_debug_flags, 0)
#endif
@@ -62,7 +64,7 @@ void
st_debug_init(void)
{
#ifdef DEBUG
- ST_DEBUG = debug_get_flags_option("ST_DEBUG", st_debug_flags, 0 );
+ ST_DEBUG = debug_get_option_st_debug();
#endif
}
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index 5821da4889d..5b054892702 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -58,6 +58,7 @@
#include "util/u_inlines.h"
#include "util/u_format.h"
#include "util/u_prim.h"
+#include "util/u_draw_quad.h"
#include "draw/draw_context.h"
#include "cso_cache/cso_context.h"
@@ -494,6 +495,49 @@ setup_non_interleaved_attribs(GLcontext *ctx,
}
+static void
+setup_index_buffer(GLcontext *ctx,
+ const struct _mesa_index_buffer *ib,
+ struct pipe_index_buffer *ibuffer)
+{
+ struct st_context *st = st_context(ctx);
+ struct pipe_context *pipe = st->pipe;
+
+ memset(ibuffer, 0, sizeof(*ibuffer));
+ if (ib) {
+ struct gl_buffer_object *bufobj = ib->obj;
+
+ switch (ib->type) {
+ case GL_UNSIGNED_INT:
+ ibuffer->index_size = 4;
+ break;
+ case GL_UNSIGNED_SHORT:
+ ibuffer->index_size = 2;
+ break;
+ case GL_UNSIGNED_BYTE:
+ ibuffer->index_size = 1;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ /* get/create the index buffer object */
+ if (bufobj && bufobj->Name) {
+ /* elements/indexes are in a real VBO */
+ struct st_buffer_object *stobj = st_buffer_object(bufobj);
+ pipe_resource_reference(&ibuffer->buffer, stobj->buffer);
+ ibuffer->offset = pointer_to_offset(ib->ptr);
+ }
+ else {
+ /* element/indicies are in user space memory */
+ ibuffer->buffer =
+ pipe_user_buffer_create(pipe->screen, (void *) ib->ptr,
+ ib->count * ibuffer->index_size,
+ PIPE_BIND_INDEX_BUFFER);
+ }
+ }
+}
/**
* Prior to drawing, check that any uniforms referenced by the
@@ -568,8 +612,11 @@ st_draw_vbo(GLcontext *ctx,
GLuint attr;
struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
unsigned num_vbuffers, num_velements;
+ struct pipe_index_buffer ibuffer;
GLboolean userSpace = GL_FALSE;
GLboolean vertDataEdgeFlags;
+ struct pipe_draw_info info;
+ unsigned i;
/* Mesa core state should have been validated already */
assert(ctx->NewState == 0x0);
@@ -647,113 +694,35 @@ st_draw_vbo(GLcontext *ctx,
if (num_vbuffers == 0 || num_velements == 0)
return;
- /* do actual drawing */
- if (ib) {
- /* indexed primitive */
- struct gl_buffer_object *bufobj = ib->obj;
- struct pipe_resource *indexBuf = NULL;
- unsigned indexSize, indexOffset, i;
+ setup_index_buffer(ctx, ib, &ibuffer);
+ pipe->set_index_buffer(pipe, &ibuffer);
- switch (ib->type) {
- case GL_UNSIGNED_INT:
- indexSize = 4;
- break;
- case GL_UNSIGNED_SHORT:
- indexSize = 2;
- break;
- case GL_UNSIGNED_BYTE:
- indexSize = 1;
- break;
- default:
- assert(0);
- return;
- }
-
- /* get/create the index buffer object */
- if (bufobj && bufobj->Name) {
- /* elements/indexes are in a real VBO */
- struct st_buffer_object *stobj = st_buffer_object(bufobj);
- pipe_resource_reference(&indexBuf, stobj->buffer);
- indexOffset = pointer_to_offset(ib->ptr) / indexSize;
- }
- else {
- /* element/indicies are in user space memory */
- indexBuf = pipe_user_buffer_create(pipe->screen, (void *) ib->ptr,
- ib->count * indexSize,
- PIPE_BIND_INDEX_BUFFER);
- indexOffset = 0;
+ util_draw_init_info(&info);
+ if (ib) {
+ info.indexed = TRUE;
+ if (min_index != ~0 && max_index != ~0) {
+ info.min_index = min_index;
+ info.max_index = max_index;
}
+ }
- /* draw */
- if (pipe->draw_range_elements && min_index != ~0 && max_index != ~0) {
- /* XXX: exercise temporary path to pass min/max directly
- * through to driver & draw module. These interfaces still
- * need a bit of work...
- */
- for (i = 0; i < nr_prims; i++) {
- unsigned vcount = prims[i].count;
- unsigned prim = translate_prim(ctx, prims[i].mode);
-
- if (u_trim_pipe_prim(prims[i].mode, &vcount)) {
- pipe->draw_range_elements(pipe, indexBuf, indexSize,
- prims[i].basevertex,
- min_index, max_index, prim,
- prims[i].start + indexOffset, vcount);
- }
- }
- }
- else {
- for (i = 0; i < nr_prims; i++) {
- unsigned vcount = prims[i].count;
- unsigned prim = translate_prim(ctx, prims[i].mode);
-
- if (u_trim_pipe_prim(prims[i].mode, &vcount)) {
- if (prims[i].num_instances == 1) {
- pipe->draw_elements(pipe, indexBuf,
- indexSize,
- prims[i].basevertex,
- prim,
- prims[i].start + indexOffset,
- vcount);
- }
- else {
- pipe->draw_elements_instanced(pipe, indexBuf,
- indexSize,
- prims[i].basevertex,
- prim,
- prims[i].start + indexOffset,
- vcount,
- 0, /* startInstance */
- prims[i].num_instances);
- }
- }
- }
+ /* do actual drawing */
+ for (i = 0; i < nr_prims; i++) {
+ info.mode = translate_prim( ctx, prims[i].mode );
+ info.start = prims[i].start;
+ info.count = prims[i].count;
+ info.instance_count = prims[i].num_instances;
+ info.index_bias = prims[i].basevertex;
+ if (!ib) {
+ info.min_index = info.start;
+ info.max_index = info.start + info.count - 1;
}
- pipe_resource_reference(&indexBuf, NULL);
+ if (u_trim_pipe_prim(info.mode, &info.count))
+ pipe->draw_vbo(pipe, &info);
}
- else {
- /* non-indexed */
- GLuint i;
-
- for (i = 0; i < nr_prims; i++) {
- unsigned vcount = prims[i].count;
- unsigned prim = translate_prim(ctx, prims[i].mode);
- if (u_trim_pipe_prim(prims[i].mode, &vcount)) {
- if (prims[i].num_instances == 1) {
- pipe->draw_arrays(pipe, prim, prims[i].start, vcount);
- }
- else {
- pipe->draw_arrays_instanced(pipe, prim,
- prims[i].start,
- vcount,
- 0, /* startInstance */
- prims[i].num_instances);
- }
- }
- }
- }
+ pipe_resource_reference(&ibuffer.buffer, NULL);
/* unreference buffers (frees wrapped user-space buffer objects) */
for (attr = 0; attr < num_vbuffers; attr++) {
diff --git a/src/mesa/state_tracker/st_draw.h b/src/mesa/state_tracker/st_draw.h
index 3e0face656b..f36184487a6 100644
--- a/src/mesa/state_tracker/st_draw.h
+++ b/src/mesa/state_tracker/st_draw.h
@@ -34,8 +34,13 @@
#ifndef ST_DRAW_H
#define ST_DRAW_H
-struct _mesa_prim;
+#include "main/compiler.h"
+#include "main/glheader.h"
+#include "main/mtypes.h"
+
struct _mesa_index_buffer;
+struct _mesa_prim;
+struct st_context;
void st_init_draw( struct st_context *st );
diff --git a/src/mesa/state_tracker/st_extensions.h b/src/mesa/state_tracker/st_extensions.h
index 2994f16dd33..aa9b2b2b914 100644
--- a/src/mesa/state_tracker/st_extensions.h
+++ b/src/mesa/state_tracker/st_extensions.h
@@ -30,6 +30,8 @@
#define ST_EXTENSIONS_H
+struct st_context;
+
extern void st_init_limits(struct st_context *st);
extern void st_init_extensions(struct st_context *st);
diff --git a/src/mesa/state_tracker/st_format.h b/src/mesa/state_tracker/st_format.h
index 29768f296d6..841c58cadc8 100644
--- a/src/mesa/state_tracker/st_format.h
+++ b/src/mesa/state_tracker/st_format.h
@@ -31,7 +31,12 @@
#define ST_FORMAT_H
#include "main/formats.h"
+#include "main/mtypes.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_format.h"
+
+struct pipe_screen;
extern GLenum
st_format_datatype(enum pipe_format format);
diff --git a/src/mesa/state_tracker/st_gen_mipmap.h b/src/mesa/state_tracker/st_gen_mipmap.h
index 00fbae93026..016bf3f4bba 100644
--- a/src/mesa/state_tracker/st_gen_mipmap.h
+++ b/src/mesa/state_tracker/st_gen_mipmap.h
@@ -30,6 +30,10 @@
#define ST_GEN_MIPMAP_H
+#include "main/mtypes.h"
+
+struct st_context;
+
extern void
st_init_generate_mipmap(struct st_context *st);
diff --git a/src/mesa/state_tracker/st_gl_api.h b/src/mesa/state_tracker/st_gl_api.h
index fe1aec207ea..57c6d9f24d2 100644
--- a/src/mesa/state_tracker/st_gl_api.h
+++ b/src/mesa/state_tracker/st_gl_api.h
@@ -2,8 +2,6 @@
#ifndef ST_GL_API_H
#define ST_GL_API_H
-#include "state_tracker/st_api.h"
-
struct st_api *st_gl_api_create(void);
struct st_api *st_gl_api_create_es1(void);
struct st_api *st_gl_api_create_es2(void);
diff --git a/src/mesa/state_tracker/st_manager.h b/src/mesa/state_tracker/st_manager.h
index cd2887b1e0f..48a9d4d99a6 100644
--- a/src/mesa/state_tracker/st_manager.h
+++ b/src/mesa/state_tracker/st_manager.h
@@ -29,8 +29,11 @@
#ifndef ST_MANAGER_H
#define ST_MANAGER_H
-#include "state_tracker/st_api.h"
-#include "st_context.h"
+#include "main/mtypes.h"
+
+#include "pipe/p_compiler.h"
+
+struct st_context;
struct pipe_surface *
st_manager_get_egl_image_surface(struct st_context *st,
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index bacd091853b..a19dcc92534 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -44,6 +44,15 @@
#include "util/u_math.h"
#include "util/u_memory.h"
+
+#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \
+ (1 << PROGRAM_ENV_PARAM) | \
+ (1 << PROGRAM_STATE_VAR) | \
+ (1 << PROGRAM_NAMED_PARAM) | \
+ (1 << PROGRAM_CONSTANT) | \
+ (1 << PROGRAM_UNIFORM))
+
+
struct label {
unsigned branch_target;
unsigned token;
@@ -205,7 +214,7 @@ src_register( struct st_translate *t,
return ureg_src_undef();
case PROGRAM_TEMPORARY:
- ASSERT(index >= 0);
+ assert(index >= 0);
if (ureg_dst_is_undef(t->temps[index]))
t->temps[index] = ureg_DECL_temporary( t->ureg );
assert(index < Elements(t->temps));
@@ -215,7 +224,7 @@ src_register( struct st_translate *t,
case PROGRAM_ENV_PARAM:
case PROGRAM_LOCAL_PARAM:
case PROGRAM_UNIFORM:
- ASSERT(index >= 0);
+ assert(index >= 0);
return t->constants[index];
case PROGRAM_STATE_VAR:
case PROGRAM_CONSTANT: /* ie, immediate */
@@ -738,9 +747,11 @@ emit_adjusted_wpos( struct st_translate *t,
struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
- ureg_ADD(ureg,
- ureg_writemask(wpos_temp, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y),
- wpos_input, ureg_imm1f(ureg, value));
+ /* Note that we bias X and Y and pass Z and W through unchanged.
+ * The shader might also use gl_FragCoord.w and .z.
+ */
+ ureg_ADD(ureg, wpos_temp, wpos_input,
+ ureg_imm4f(ureg, value, value, 0.0f, 0.0f));
t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
}
@@ -1057,6 +1068,16 @@ st_translate_mesa_program(
t->address[0] = ureg_DECL_address( ureg );
}
+ if (program->IndirectRegisterFiles & (1 << PROGRAM_TEMPORARY)) {
+ /* If temps are accessed with indirect addressing, declare temporaries
+ * in sequential order. Else, we declare them on demand elsewhere.
+ */
+ for (i = 0; i < program->NumTemporaries; i++) {
+ /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
+ t->temps[i] = ureg_DECL_temporary( t->ureg );
+ }
+ }
+
/* Emit constants and immediates. Mesa uses a single index space
* for these, so we put all the translated regs in t->constants.
*/
@@ -1067,7 +1088,7 @@ st_translate_mesa_program(
ret = PIPE_ERROR_OUT_OF_MEMORY;
goto out;
}
-
+
for (i = 0; i < program->Parameters->NumParameters; i++) {
switch (program->Parameters->Parameters[i].Type) {
case PROGRAM_ENV_PARAM:
@@ -1078,13 +1099,14 @@ st_translate_mesa_program(
t->constants[i] = ureg_DECL_constant( ureg, i );
break;
- /* Emit immediates only when there is no address register
- * in use. FIXME: Be smarter and recognize param arrays:
+ /* Emit immediates only when there's no indirect addressing of
+ * the const buffer.
+ * FIXME: Be smarter and recognize param arrays:
* indirect addressing is only valid within the referenced
* array.
*/
case PROGRAM_CONSTANT:
- if (program->NumAddressRegs > 0)
+ if (program->IndirectRegisterFiles & PROGRAM_ANY_CONST)
t->constants[i] = ureg_DECL_constant( ureg, i );
else
t->constants[i] =
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.h b/src/mesa/state_tracker/st_mesa_to_tgsi.h
index e3c5bd1d94d..ca076ce3622 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.h
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.h
@@ -30,8 +30,10 @@
#define ST_MESA_TO_TGSI_H
#include "main/mtypes.h"
-#include "tgsi/tgsi_ureg.h"
+#include "pipe/p_compiler.h"
+
+struct ureg_program;
#if defined __cplusplus
extern "C" {
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index 6f3ecdbce11..91528c227b2 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -41,6 +41,7 @@
#include "pipe/p_shader_tokens.h"
#include "draw/draw_context.h"
#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_ureg.h"
#include "st_debug.h"
#include "st_context.h"
diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h
index d779d5a6dde..3805b9a725e 100644
--- a/src/mesa/state_tracker/st_program.h
+++ b/src/mesa/state_tracker/st_program.h
@@ -36,11 +36,8 @@
#include "main/mtypes.h"
#include "program/program.h"
-#include "pipe/p_shader_tokens.h"
-
-
-struct cso_fragment_shader;
-struct cso_vertex_shader;
+#include "pipe/p_state.h"
+#include "st_context.h"
/**
diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c
index dbdf1ea1ad0..add6e949dfb 100644
--- a/src/mesa/state_tracker/st_texture.c
+++ b/src/mesa/state_tracker/st_texture.c
@@ -25,14 +25,14 @@
*
**************************************************************************/
+#include <stdio.h>
+
#include "st_context.h"
#include "st_format.h"
#include "st_texture.h"
#include "st_cb_fbo.h"
#include "main/enums.h"
-#undef Elements /* fix re-defined macro warning */
-
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
diff --git a/src/mesa/swrast/s_aaline.h b/src/mesa/swrast/s_aaline.h
index f1d708ec801..922eb230e51 100644
--- a/src/mesa/swrast/s_aaline.h
+++ b/src/mesa/swrast/s_aaline.h
@@ -28,7 +28,7 @@
#define S_AALINE_H
-#include "swrast.h"
+#include "main/mtypes.h"
extern void
diff --git a/src/mesa/swrast/s_aatriangle.h b/src/mesa/swrast/s_aatriangle.h
index 4b57fa73a27..9aed41a1915 100644
--- a/src/mesa/swrast/s_aatriangle.h
+++ b/src/mesa/swrast/s_aatriangle.h
@@ -28,7 +28,7 @@
#define S_AATRIANGLE_H
-#include "swrast.h"
+#include "main/mtypes.h"
extern void
diff --git a/src/mesa/swrast/s_alpha.h b/src/mesa/swrast/s_alpha.h
index 7a5b72e650a..239484a9743 100644
--- a/src/mesa/swrast/s_alpha.h
+++ b/src/mesa/swrast/s_alpha.h
@@ -28,7 +28,8 @@
#define S_ALPHA_H
-#include "s_context.h"
+#include "main/mtypes.h"
+#include "s_span.h"
extern GLint
diff --git a/src/mesa/swrast/s_atifragshader.c b/src/mesa/swrast/s_atifragshader.c
index fa280e72e40..1338b6802d4 100644
--- a/src/mesa/swrast/s_atifragshader.c
+++ b/src/mesa/swrast/s_atifragshader.c
@@ -21,10 +21,10 @@
#include "main/glheader.h"
#include "main/colormac.h"
-#include "main/context.h"
#include "main/macros.h"
#include "main/atifragshader.h"
#include "swrast/s_atifragshader.h"
+#include "swrast/s_context.h"
/**
diff --git a/src/mesa/swrast/s_atifragshader.h b/src/mesa/swrast/s_atifragshader.h
index 871a0c04559..cce455a0465 100644
--- a/src/mesa/swrast/s_atifragshader.h
+++ b/src/mesa/swrast/s_atifragshader.h
@@ -27,7 +27,8 @@
#define S_ATIFRAGSHADER_H
-#include "s_context.h"
+#include "main/mtypes.h"
+#include "s_span.h"
extern void
diff --git a/src/mesa/swrast/s_blend.h b/src/mesa/swrast/s_blend.h
index 8d5a81635d5..9cedde3bf20 100644
--- a/src/mesa/swrast/s_blend.h
+++ b/src/mesa/swrast/s_blend.h
@@ -27,7 +27,8 @@
#define S_BLEND_H
-#include "s_context.h"
+#include "main/mtypes.h"
+#include "s_span.h"
extern void
diff --git a/src/mesa/swrast/s_context.c b/src/mesa/swrast/s_context.c
index 6d2d17c61d9..d8d8a80b7d7 100644
--- a/src/mesa/swrast/s_context.c
+++ b/src/mesa/swrast/s_context.c
@@ -28,7 +28,6 @@
#include "main/imports.h"
#include "main/bufferobj.h"
-#include "main/context.h"
#include "main/colormac.h"
#include "main/mtypes.h"
#include "main/teximage.h"
diff --git a/src/mesa/swrast/s_context.h b/src/mesa/swrast/s_context.h
index c9755e6da18..6d81f74768f 100644
--- a/src/mesa/swrast/s_context.h
+++ b/src/mesa/swrast/s_context.h
@@ -43,6 +43,7 @@
#ifndef S_CONTEXT_H
#define S_CONTEXT_H
+#include "main/compiler.h"
#include "main/mtypes.h"
#include "program/prog_execute.h"
#include "swrast.h"
diff --git a/src/mesa/swrast/s_depth.c b/src/mesa/swrast/s_depth.c
index ed637cac124..f952fd6baa7 100644
--- a/src/mesa/swrast/s_depth.c
+++ b/src/mesa/swrast/s_depth.c
@@ -30,7 +30,6 @@
#include "main/imports.h"
#include "s_depth.h"
-#include "s_context.h"
#include "s_span.h"
diff --git a/src/mesa/swrast/s_depth.h b/src/mesa/swrast/s_depth.h
index 7eae3667428..878d242f5e5 100644
--- a/src/mesa/swrast/s_depth.h
+++ b/src/mesa/swrast/s_depth.h
@@ -27,7 +27,8 @@
#define S_DEPTH_H
-#include "s_context.h"
+#include "main/mtypes.h"
+#include "s_span.h"
extern GLuint
diff --git a/src/mesa/swrast/s_feedback.c b/src/mesa/swrast/s_feedback.c
index 373b1416e28..6ac8ac73b0b 100644
--- a/src/mesa/swrast/s_feedback.c
+++ b/src/mesa/swrast/s_feedback.c
@@ -24,7 +24,6 @@
#include "main/glheader.h"
#include "main/colormac.h"
-#include "main/context.h"
#include "main/feedback.h"
#include "main/macros.h"
diff --git a/src/mesa/swrast/s_fog.c b/src/mesa/swrast/s_fog.c
index 3fc84392133..689500a613a 100644
--- a/src/mesa/swrast/s_fog.c
+++ b/src/mesa/swrast/s_fog.c
@@ -25,7 +25,6 @@
#include "main/glheader.h"
#include "main/colormac.h"
-#include "main/context.h"
#include "main/macros.h"
#include "s_context.h"
diff --git a/src/mesa/swrast/s_fog.h b/src/mesa/swrast/s_fog.h
index 06107de3f9d..a496746d106 100644
--- a/src/mesa/swrast/s_fog.h
+++ b/src/mesa/swrast/s_fog.h
@@ -28,7 +28,8 @@
#define S_FOG_H
-#include "swrast.h"
+#include "main/mtypes.h"
+#include "s_span.h"
extern GLfloat
diff --git a/src/mesa/swrast/s_fragprog.c b/src/mesa/swrast/s_fragprog.c
index 413f136cd59..9facb44d9bf 100644
--- a/src/mesa/swrast/s_fragprog.c
+++ b/src/mesa/swrast/s_fragprog.c
@@ -24,9 +24,9 @@
#include "main/glheader.h"
#include "main/colormac.h"
-#include "main/context.h"
#include "program/prog_instruction.h"
+#include "s_context.h"
#include "s_fragprog.h"
#include "s_span.h"
diff --git a/src/mesa/swrast/s_fragprog.h b/src/mesa/swrast/s_fragprog.h
index e1b7e679185..92b9d01e173 100644
--- a/src/mesa/swrast/s_fragprog.h
+++ b/src/mesa/swrast/s_fragprog.h
@@ -27,7 +27,8 @@
#define S_FRAGPROG_H
-#include "s_context.h"
+#include "main/mtypes.h"
+#include "s_span.h"
extern void
diff --git a/src/mesa/swrast/s_logic.h b/src/mesa/swrast/s_logic.h
index e8cfae33f23..d609513348d 100644
--- a/src/mesa/swrast/s_logic.h
+++ b/src/mesa/swrast/s_logic.h
@@ -27,7 +27,8 @@
#define S_LOGIC_H
-#include "swrast.h"
+#include "main/mtypes.h"
+#include "s_span.h"
extern void
_swrast_logicop_rgba_span(GLcontext *ctx, struct gl_renderbuffer *rb,
diff --git a/src/mesa/swrast/s_masking.h b/src/mesa/swrast/s_masking.h
index 3ba4f8356cb..cb000da0fd8 100644
--- a/src/mesa/swrast/s_masking.h
+++ b/src/mesa/swrast/s_masking.h
@@ -27,7 +27,8 @@
#define S_MASKING_H
-#include "swrast.h"
+#include "main/mtypes.h"
+#include "s_span.h"
extern void
diff --git a/src/mesa/swrast/s_points.c b/src/mesa/swrast/s_points.c
index 1663ece8294..12431662c47 100644
--- a/src/mesa/swrast/s_points.c
+++ b/src/mesa/swrast/s_points.c
@@ -25,7 +25,6 @@
#include "main/glheader.h"
#include "main/colormac.h"
-#include "main/context.h"
#include "main/macros.h"
#include "s_context.h"
#include "s_feedback.h"
diff --git a/src/mesa/swrast/s_readpix.c b/src/mesa/swrast/s_readpix.c
index 6ad9aceec77..553fd9a76d8 100644
--- a/src/mesa/swrast/s_readpix.c
+++ b/src/mesa/swrast/s_readpix.c
@@ -27,7 +27,6 @@
#include "main/bufferobj.h"
#include "main/colormac.h"
#include "main/convolve.h"
-#include "main/context.h"
#include "main/feedback.h"
#include "main/formats.h"
#include "main/image.h"
diff --git a/src/mesa/swrast/s_span.c b/src/mesa/swrast/s_span.c
index 687c8eb0bf8..8931cdec1bc 100644
--- a/src/mesa/swrast/s_span.c
+++ b/src/mesa/swrast/s_span.c
@@ -33,7 +33,6 @@
#include "main/glheader.h"
#include "main/colormac.h"
-#include "main/context.h"
#include "main/macros.h"
#include "main/imports.h"
#include "main/image.h"
@@ -971,6 +970,10 @@ shade_texture_span(GLcontext *ctx, SWspan *span)
if (span->primitive == GL_BITMAP && span->array->ChanType != GL_FLOAT) {
convert_color_type(span, GL_FLOAT, 0);
}
+ else {
+ span->array->rgba = (void *) span->array->attribs[FRAG_ATTRIB_COL0];
+ }
+
if (span->primitive != GL_POINT ||
(span->interpMask & SPAN_RGBA) ||
ctx->Point.PointSprite) {
@@ -1222,9 +1225,22 @@ _swrast_write_rgba_span( GLcontext *ctx, SWspan *span)
GLchan rgbaSave[MAX_WIDTH][4];
const GLuint fragOutput = multiFragOutputs ? buf : 0;
+ /* set span->array->rgba to colors for render buffer's datatype */
if (rb->DataType != span->array->ChanType || fragOutput > 0) {
convert_color_type(span, rb->DataType, fragOutput);
}
+ else {
+ if (rb->DataType == GL_UNSIGNED_BYTE) {
+ span->array->rgba = span->array->rgba8;
+ }
+ else if (rb->DataType == GL_UNSIGNED_SHORT) {
+ span->array->rgba = (void *) span->array->rgba16;
+ }
+ else {
+ span->array->rgba = (void *)
+ span->array->attribs[FRAG_ATTRIB_COL0];
+ }
+ }
if (!multiFragOutputs && numBuffers > 1) {
/* save colors for second, third renderbuffer writes */
diff --git a/src/mesa/swrast/s_stencil.h b/src/mesa/swrast/s_stencil.h
index cd6cbc57b0b..c076ebbe2a1 100644
--- a/src/mesa/swrast/s_stencil.h
+++ b/src/mesa/swrast/s_stencil.h
@@ -27,7 +27,8 @@
#define S_STENCIL_H
-#include "swrast.h"
+#include "main/mtypes.h"
+#include "s_span.h"
diff --git a/src/mesa/swrast/s_texcombine.h b/src/mesa/swrast/s_texcombine.h
index 9ed96efb879..4f5dfbe1afe 100644
--- a/src/mesa/swrast/s_texcombine.h
+++ b/src/mesa/swrast/s_texcombine.h
@@ -27,7 +27,8 @@
#define S_TEXCOMBINE_H
-#include "swrast.h"
+#include "main/mtypes.h"
+#include "s_span.h"
extern void
_swrast_texture_span( GLcontext *ctx, SWspan *span );
diff --git a/src/mesa/swrast/s_texfilter.h b/src/mesa/swrast/s_texfilter.h
index 2e265d685c5..eceab59658e 100644
--- a/src/mesa/swrast/s_texfilter.h
+++ b/src/mesa/swrast/s_texfilter.h
@@ -27,7 +27,8 @@
#define S_TEXFILTER_H
-#include "swrast.h"
+#include "main/mtypes.h"
+#include "s_context.h"
extern texture_sample_func
diff --git a/src/mesa/swrast/s_zoom.h b/src/mesa/swrast/s_zoom.h
index 43917be65fc..09f624efad5 100644
--- a/src/mesa/swrast/s_zoom.h
+++ b/src/mesa/swrast/s_zoom.h
@@ -25,7 +25,8 @@
#ifndef S_ZOOM_H
#define S_ZOOM_H
-#include "swrast.h"
+#include "main/mtypes.h"
+#include "s_span.h"
extern void
diff --git a/src/mesa/swrast_setup/ss_context.h b/src/mesa/swrast_setup/ss_context.h
index 1ec293fade1..56551ab273c 100644
--- a/src/mesa/swrast_setup/ss_context.h
+++ b/src/mesa/swrast_setup/ss_context.h
@@ -28,9 +28,8 @@
#ifndef SS_CONTEXT_H
#define SS_CONTEXT_H
-#include "main/mtypes.h"
+#include "main/glheader.h"
#include "swrast/swrast.h"
-#include "swrast_setup.h"
#include "tnl/t_context.h"
typedef struct {
diff --git a/src/mesa/swrast_setup/ss_triangle.h b/src/mesa/swrast_setup/ss_triangle.h
index 007fa2e9141..ac553cbd018 100644
--- a/src/mesa/swrast_setup/ss_triangle.h
+++ b/src/mesa/swrast_setup/ss_triangle.h
@@ -29,7 +29,7 @@
#ifndef SS_TRIANGLE_H
#define SS_TRIANGLE_H
-#include "ss_context.h"
+#include "main/mtypes.h"
void _swsetup_trifuncs_init( GLcontext *ctx );
diff --git a/src/mesa/swrast_setup/ss_vb.h b/src/mesa/swrast_setup/ss_vb.h
index 2ad1f56f396..944a3b78d8c 100644
--- a/src/mesa/swrast_setup/ss_vb.h
+++ b/src/mesa/swrast_setup/ss_vb.h
@@ -30,7 +30,6 @@
#define SS_VB_H
#include "main/mtypes.h"
-#include "swrast_setup.h"
void _swsetup_vb_init( GLcontext *ctx );
void _swsetup_choose_rastersetup_func( GLcontext *ctx );
diff --git a/src/mesa/tnl/t_context.h b/src/mesa/tnl/t_context.h
index ebaae6335b9..258906f7956 100644
--- a/src/mesa/tnl/t_context.h
+++ b/src/mesa/tnl/t_context.h
@@ -53,9 +53,7 @@
#include "main/bitset.h"
#include "main/mtypes.h"
-#include "math/m_matrix.h"
#include "math/m_vector.h"
-#include "math/m_xform.h"
#include "vbo/vbo.h"
diff --git a/src/mesa/tnl/t_rasterpos.c b/src/mesa/tnl/t_rasterpos.c
index 3596d162b23..d82d5b50736 100644
--- a/src/mesa/tnl/t_rasterpos.c
+++ b/src/mesa/tnl/t_rasterpos.c
@@ -25,7 +25,6 @@
#include "main/glheader.h"
#include "main/colormac.h"
-#include "main/context.h"
#include "main/feedback.h"
#include "main/light.h"
#include "main/macros.h"
diff --git a/src/mesa/tnl/t_vb_cull.c b/src/mesa/tnl/t_vb_cull.c
index 712901acf30..22df7166735 100644
--- a/src/mesa/tnl/t_vb_cull.c
+++ b/src/mesa/tnl/t_vb_cull.c
@@ -28,7 +28,6 @@
#include "main/glheader.h"
#include "main/colormac.h"
-#include "main/context.h"
#include "main/macros.h"
#include "main/imports.h"
#include "main/mtypes.h"
diff --git a/src/mesa/tnl/t_vb_fog.c b/src/mesa/tnl/t_vb_fog.c
index 4a0e6ad4f99..9faae24ec6d 100644
--- a/src/mesa/tnl/t_vb_fog.c
+++ b/src/mesa/tnl/t_vb_fog.c
@@ -28,7 +28,6 @@
#include "main/glheader.h"
#include "main/colormac.h"
-#include "main/context.h"
#include "main/macros.h"
#include "main/imports.h"
#include "main/mtypes.h"
diff --git a/src/mesa/tnl/t_vb_normals.c b/src/mesa/tnl/t_vb_normals.c
index 61ac4095733..c2aa655674c 100644
--- a/src/mesa/tnl/t_vb_normals.c
+++ b/src/mesa/tnl/t_vb_normals.c
@@ -28,7 +28,6 @@
#include "main/glheader.h"
#include "main/colormac.h"
-#include "main/context.h"
#include "main/macros.h"
#include "main/imports.h"
#include "main/mtypes.h"
diff --git a/src/mesa/tnl/t_vb_program.c b/src/mesa/tnl/t_vb_program.c
index 614c67d05eb..f3a338ef1ed 100644
--- a/src/mesa/tnl/t_vb_program.c
+++ b/src/mesa/tnl/t_vb_program.c
@@ -33,9 +33,9 @@
#include "main/glheader.h"
#include "main/colormac.h"
-#include "main/context.h"
#include "main/macros.h"
#include "main/imports.h"
+#include "math/m_xform.h"
#include "program/prog_instruction.h"
#include "program/prog_statevars.h"
#include "program/prog_execute.h"
diff --git a/src/mesa/tnl/t_vb_render.c b/src/mesa/tnl/t_vb_render.c
index c1bebc99423..7d991009a14 100644
--- a/src/mesa/tnl/t_vb_render.c
+++ b/src/mesa/tnl/t_vb_render.c
@@ -44,6 +44,7 @@
#include "main/macros.h"
#include "main/imports.h"
#include "main/mtypes.h"
+#include "math/m_xform.h"
#include "t_pipeline.h"
diff --git a/src/mesa/tnl/t_vb_texgen.c b/src/mesa/tnl/t_vb_texgen.c
index 9ef13bc96d8..950e0f54e9f 100644
--- a/src/mesa/tnl/t_vb_texgen.c
+++ b/src/mesa/tnl/t_vb_texgen.c
@@ -37,7 +37,6 @@
#include "main/glheader.h"
#include "main/colormac.h"
-#include "main/context.h"
#include "main/macros.h"
#include "main/imports.h"
#include "main/mtypes.h"
diff --git a/src/mesa/tnl/t_vb_texmat.c b/src/mesa/tnl/t_vb_texmat.c
index 83688290e59..985d137e5cc 100644
--- a/src/mesa/tnl/t_vb_texmat.c
+++ b/src/mesa/tnl/t_vb_texmat.c
@@ -28,7 +28,6 @@
#include "main/glheader.h"
#include "main/colormac.h"
-#include "main/context.h"
#include "main/macros.h"
#include "main/imports.h"
#include "main/mtypes.h"
diff --git a/src/mesa/tnl/t_vb_vertex.c b/src/mesa/tnl/t_vb_vertex.c
index a2753425633..453479227b7 100644
--- a/src/mesa/tnl/t_vb_vertex.c
+++ b/src/mesa/tnl/t_vb_vertex.c
@@ -28,7 +28,6 @@
#include "main/glheader.h"
#include "main/colormac.h"
-#include "main/context.h"
#include "main/macros.h"
#include "main/imports.h"
#include "main/mtypes.h"
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 045af46da8d..84ae1b87f93 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -27,7 +27,7 @@
#include "main/glheader.h"
#include "main/bufferobj.h"
-#include "main/context.h"
+#include "main/compiler.h"
#include "main/enums.h"
#include "main/state.h"
diff --git a/src/mesa/vf/vf.h b/src/mesa/vf/vf.h
index 83d7547619c..5fe392bbe51 100644
--- a/src/mesa/vf/vf.h
+++ b/src/mesa/vf/vf.h
@@ -28,7 +28,7 @@
#ifndef VF_VERTEX_H
#define VF_VERTEX_H
-#include "main/mtypes.h"
+#include "main/glheader.h"
#include "math/m_vector.h"
enum {
diff --git a/src/mesa/vf/vf_generic.c b/src/mesa/vf/vf_generic.c
index 0af8893c302..95a317e99db 100644
--- a/src/mesa/vf/vf_generic.c
+++ b/src/mesa/vf/vf_generic.c
@@ -29,6 +29,7 @@
#include "main/glheader.h"
#include "main/context.h"
#include "main/colormac.h"
+#include "main/macros.h"
#include "main/simple_list.h"
#include "vf/vf.h"