summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/i965/Makefile104
-rw-r--r--src/gallium/drivers/i965/brw_cc.c297
-rw-r--r--src/gallium/drivers/i965/brw_clip.c273
-rw-r--r--src/gallium/drivers/i965/brw_clip.h179
-rw-r--r--src/gallium/drivers/i965/brw_clip_line.c276
-rw-r--r--src/gallium/drivers/i965/brw_clip_point.c56
-rw-r--r--src/gallium/drivers/i965/brw_clip_state.c184
-rw-r--r--src/gallium/drivers/i965/brw_clip_tri.c603
-rw-r--r--src/gallium/drivers/i965/brw_clip_unfilled.c505
-rw-r--r--src/gallium/drivers/i965/brw_clip_util.c396
-rw-r--r--src/gallium/drivers/i965/brw_context.c173
-rw-r--r--src/gallium/drivers/i965/brw_context.h767
-rw-r--r--src/gallium/drivers/i965/brw_curbe.c376
-rw-r--r--src/gallium/drivers/i965/brw_defines.h851
-rw-r--r--src/gallium/drivers/i965/brw_disasm.c903
-rw-r--r--src/gallium/drivers/i965/brw_draw.c493
-rw-r--r--src/gallium/drivers/i965/brw_draw.h54
-rw-r--r--src/gallium/drivers/i965/brw_draw_upload.c742
-rw-r--r--src/gallium/drivers/i965/brw_eu.c254
-rw-r--r--src/gallium/drivers/i965/brw_eu.h968
-rw-r--r--src/gallium/drivers/i965/brw_eu_debug.c95
-rw-r--r--src/gallium/drivers/i965/brw_eu_emit.c1425
-rw-r--r--src/gallium/drivers/i965/brw_eu_util.c126
-rw-r--r--src/gallium/drivers/i965/brw_gs.c201
-rw-r--r--src/gallium/drivers/i965/brw_gs.h76
-rw-r--r--src/gallium/drivers/i965/brw_gs_emit.c186
-rw-r--r--src/gallium/drivers/i965/brw_gs_state.c149
-rw-r--r--src/gallium/drivers/i965/brw_misc_state.c545
-rw-r--r--src/gallium/drivers/i965/brw_program.c166
-rw-r--r--src/gallium/drivers/i965/brw_queryobj.c254
-rw-r--r--src/gallium/drivers/i965/brw_sf.c200
-rw-r--r--src/gallium/drivers/i965/brw_sf.h113
-rw-r--r--src/gallium/drivers/i965/brw_sf_emit.c739
-rw-r--r--src/gallium/drivers/i965/brw_sf_state.c365
-rw-r--r--src/gallium/drivers/i965/brw_state.h173
-rw-r--r--src/gallium/drivers/i965/brw_state_batch.c99
-rw-r--r--src/gallium/drivers/i965/brw_state_cache.c597
-rw-r--r--src/gallium/drivers/i965/brw_state_dump.c224
-rw-r--r--src/gallium/drivers/i965/brw_state_upload.c416
-rw-r--r--src/gallium/drivers/i965/brw_structs.h1575
-rw-r--r--src/gallium/drivers/i965/brw_tex.c59
-rw-r--r--src/gallium/drivers/i965/brw_tex_layout.c222
-rw-r--r--src/gallium/drivers/i965/brw_urb.c250
-rw-r--r--src/gallium/drivers/i965/brw_util.c104
-rw-r--r--src/gallium/drivers/i965/brw_util.h45
-rw-r--r--src/gallium/drivers/i965/brw_vs.c124
-rw-r--r--src/gallium/drivers/i965/brw_vs.h88
-rw-r--r--src/gallium/drivers/i965/brw_vs_emit.c1667
-rw-r--r--src/gallium/drivers/i965/brw_vs_state.c185
-rw-r--r--src/gallium/drivers/i965/brw_vs_surface_state.c226
-rw-r--r--src/gallium/drivers/i965/brw_wm.c375
-rw-r--r--src/gallium/drivers/i965/brw_wm.h309
-rw-r--r--src/gallium/drivers/i965/brw_wm_debug.c174
-rw-r--r--src/gallium/drivers/i965/brw_wm_emit.c1509
-rw-r--r--src/gallium/drivers/i965/brw_wm_fp.c1177
-rw-r--r--src/gallium/drivers/i965/brw_wm_glsl.c3046
-rw-r--r--src/gallium/drivers/i965/brw_wm_iz.c157
-rw-r--r--src/gallium/drivers/i965/brw_wm_pass0.c442
-rw-r--r--src/gallium/drivers/i965/brw_wm_pass1.c291
-rw-r--r--src/gallium/drivers/i965/brw_wm_pass2.c343
-rw-r--r--src/gallium/drivers/i965/brw_wm_sampler_state.c369
-rw-r--r--src/gallium/drivers/i965/brw_wm_state.c317
-rw-r--r--src/gallium/drivers/i965/brw_wm_surface_state.c752
-rw-r--r--src/gallium/drivers/i965/intel_batchbuffer.h184
-rw-r--r--src/gallium/drivers/i965/intel_chipset.h118
-rw-r--r--src/gallium/drivers/i965/intel_structs.h132
-rw-r--r--src/gallium/drivers/i965/intel_tex_format.c225
-rw-r--r--src/gallium/drivers/i965/intel_tex_layout.c140
68 files changed, 29208 insertions, 0 deletions
diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile
new file mode 100644
index 00000000000..7a55333e896
--- /dev/null
+++ b/src/gallium/drivers/i965/Makefile
@@ -0,0 +1,104 @@
+
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = i965_dri.so
+
+DRIVER_SOURCES = \
+ intel_batchbuffer.c \
+ intel_blit.c \
+ intel_buffer_objects.c \
+ intel_buffers.c \
+ intel_clear.c \
+ intel_context.c \
+ intel_decode.c \
+ intel_extensions.c \
+ intel_fbo.c \
+ intel_mipmap_tree.c \
+ intel_regions.c \
+ intel_screen.c \
+ intel_span.c \
+ intel_pixel.c \
+ intel_pixel_bitmap.c \
+ intel_pixel_copy.c \
+ intel_pixel_draw.c \
+ intel_pixel_read.c \
+ intel_state.c \
+ intel_swapbuffers.c \
+ intel_syncobj.c \
+ intel_tex.c \
+ intel_tex_copy.c \
+ intel_tex_format.c \
+ intel_tex_image.c \
+ intel_tex_layout.c \
+ intel_tex_subimage.c \
+ intel_tex_validate.c \
+ brw_cc.c \
+ brw_clip.c \
+ brw_clip_line.c \
+ brw_clip_point.c \
+ brw_clip_state.c \
+ brw_clip_tri.c \
+ brw_clip_unfilled.c \
+ brw_clip_util.c \
+ brw_context.c \
+ brw_curbe.c \
+ brw_disasm.c \
+ brw_draw.c \
+ brw_draw_upload.c \
+ brw_eu.c \
+ brw_eu_debug.c \
+ brw_eu_emit.c \
+ brw_eu_util.c \
+ brw_fallback.c \
+ brw_gs.c \
+ brw_gs_emit.c \
+ brw_gs_state.c \
+ brw_misc_state.c \
+ brw_program.c \
+ brw_queryobj.c \
+ brw_sf.c \
+ brw_sf_emit.c \
+ brw_sf_state.c \
+ brw_state_batch.c \
+ brw_state_cache.c \
+ brw_state_dump.c \
+ brw_state_upload.c \
+ brw_tex.c \
+ brw_tex_layout.c \
+ brw_urb.c \
+ brw_util.c \
+ brw_vs.c \
+ brw_vs_constval.c \
+ brw_vs_emit.c \
+ brw_vs_state.c \
+ brw_vs_surface_state.c \
+ brw_vtbl.c \
+ brw_wm.c \
+ brw_wm_debug.c \
+ brw_wm_emit.c \
+ brw_wm_fp.c \
+ brw_wm_iz.c \
+ brw_wm_glsl.c \
+ brw_wm_pass0.c \
+ brw_wm_pass1.c \
+ brw_wm_pass2.c \
+ brw_wm_sampler_state.c \
+ brw_wm_state.c \
+ brw_wm_surface_state.c
+
+C_SOURCES = \
+ $(COMMON_SOURCES) \
+ $(MINIGLX_SOURCES) \
+ $(DRIVER_SOURCES)
+
+ASM_SOURCES =
+
+DRIVER_DEFINES = -I../intel -I../intel/server
+
+DRI_LIB_DEPS += -ldrm_intel
+
+include ../Makefile.template
+
+intel_decode.o: ../intel/intel_decode.c
+intel_tex_layout.o: ../intel/intel_tex_layout.c
diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c
new file mode 100644
index 00000000000..1088a7a6070
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_cc.c
@@ -0,0 +1,297 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "main/macros.h"
+#include "main/enums.h"
+
+static void prepare_cc_vp( struct brw_context *brw )
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ struct brw_cc_viewport ccv;
+
+ memset(&ccv, 0, sizeof(ccv));
+
+ /* _NEW_VIEWPORT */
+ ccv.min_depth = ctx->Viewport.Near;
+ ccv.max_depth = ctx->Viewport.Far;
+
+ dri_bo_unreference(brw->cc.vp_bo);
+ brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 );
+}
+
+const struct brw_tracked_state brw_cc_vp = {
+ .dirty = {
+ .mesa = _NEW_VIEWPORT,
+ .brw = BRW_NEW_CONTEXT,
+ .cache = 0
+ },
+ .prepare = prepare_cc_vp
+};
+
+struct brw_cc_unit_key {
+ GLboolean stencil, stencil_two_side, color_blend, alpha_enabled;
+
+ GLenum stencil_func[2], stencil_fail_op[2];
+ GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2];
+ GLubyte stencil_ref[2], stencil_write_mask[2], stencil_test_mask[2];
+ GLenum logic_op;
+
+ GLenum blend_eq_rgb, blend_eq_a;
+ GLenum blend_src_rgb, blend_src_a;
+ GLenum blend_dst_rgb, blend_dst_a;
+
+ GLenum alpha_func;
+ GLclampf alpha_ref;
+
+ GLboolean dither;
+
+ GLboolean depth_test, depth_write;
+ GLenum depth_func;
+};
+
+static void
+cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key)
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ const unsigned back = ctx->Stencil._BackFace;
+
+ memset(key, 0, sizeof(*key));
+
+ key->stencil = ctx->Stencil._Enabled;
+ key->stencil_two_side = ctx->Stencil._TestTwoSide;
+
+ if (key->stencil) {
+ key->stencil_func[0] = ctx->Stencil.Function[0];
+ key->stencil_fail_op[0] = ctx->Stencil.FailFunc[0];
+ key->stencil_pass_depth_fail_op[0] = ctx->Stencil.ZFailFunc[0];
+ key->stencil_pass_depth_pass_op[0] = ctx->Stencil.ZPassFunc[0];
+ key->stencil_ref[0] = ctx->Stencil.Ref[0];
+ key->stencil_write_mask[0] = ctx->Stencil.WriteMask[0];
+ key->stencil_test_mask[0] = ctx->Stencil.ValueMask[0];
+ }
+ if (key->stencil_two_side) {
+ key->stencil_func[1] = ctx->Stencil.Function[back];
+ key->stencil_fail_op[1] = ctx->Stencil.FailFunc[back];
+ key->stencil_pass_depth_fail_op[1] = ctx->Stencil.ZFailFunc[back];
+ key->stencil_pass_depth_pass_op[1] = ctx->Stencil.ZPassFunc[back];
+ key->stencil_ref[1] = ctx->Stencil.Ref[back];
+ key->stencil_write_mask[1] = ctx->Stencil.WriteMask[back];
+ key->stencil_test_mask[1] = ctx->Stencil.ValueMask[back];
+ }
+
+ if (ctx->Color._LogicOpEnabled)
+ key->logic_op = ctx->Color.LogicOp;
+ else
+ key->logic_op = GL_COPY;
+
+ key->color_blend = ctx->Color.BlendEnabled;
+ if (key->color_blend) {
+ key->blend_eq_rgb = ctx->Color.BlendEquationRGB;
+ key->blend_eq_a = ctx->Color.BlendEquationA;
+ key->blend_src_rgb = ctx->Color.BlendSrcRGB;
+ key->blend_dst_rgb = ctx->Color.BlendDstRGB;
+ key->blend_src_a = ctx->Color.BlendSrcA;
+ key->blend_dst_a = ctx->Color.BlendDstA;
+ }
+
+ key->alpha_enabled = ctx->Color.AlphaEnabled;
+ if (key->alpha_enabled) {
+ key->alpha_func = ctx->Color.AlphaFunc;
+ key->alpha_ref = ctx->Color.AlphaRef;
+ }
+
+ key->dither = ctx->Color.DitherFlag;
+
+ key->depth_test = ctx->Depth.Test;
+ if (key->depth_test) {
+ key->depth_func = ctx->Depth.Func;
+ key->depth_write = ctx->Depth.Mask;
+ }
+}
+
+/**
+ * Creates the state cache entry for the given CC unit key.
+ */
+static dri_bo *
+cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)
+{
+ struct brw_cc_unit_state cc;
+ dri_bo *bo;
+
+ memset(&cc, 0, sizeof(cc));
+
+ /* _NEW_STENCIL */
+ if (key->stencil) {
+ cc.cc0.stencil_enable = 1;
+ cc.cc0.stencil_func =
+ intel_translate_compare_func(key->stencil_func[0]);
+ cc.cc0.stencil_fail_op =
+ intel_translate_stencil_op(key->stencil_fail_op[0]);
+ cc.cc0.stencil_pass_depth_fail_op =
+ intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]);
+ cc.cc0.stencil_pass_depth_pass_op =
+ intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]);
+ cc.cc1.stencil_ref = key->stencil_ref[0];
+ cc.cc1.stencil_write_mask = key->stencil_write_mask[0];
+ cc.cc1.stencil_test_mask = key->stencil_test_mask[0];
+
+ if (key->stencil_two_side) {
+ cc.cc0.bf_stencil_enable = 1;
+ cc.cc0.bf_stencil_func =
+ intel_translate_compare_func(key->stencil_func[1]);
+ cc.cc0.bf_stencil_fail_op =
+ intel_translate_stencil_op(key->stencil_fail_op[1]);
+ cc.cc0.bf_stencil_pass_depth_fail_op =
+ intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]);
+ cc.cc0.bf_stencil_pass_depth_pass_op =
+ intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]);
+ cc.cc1.bf_stencil_ref = key->stencil_ref[1];
+ cc.cc2.bf_stencil_write_mask = key->stencil_write_mask[1];
+ cc.cc2.bf_stencil_test_mask = key->stencil_test_mask[1];
+ }
+
+ /* Not really sure about this:
+ */
+ if (key->stencil_write_mask[0] ||
+ (key->stencil_two_side && key->stencil_write_mask[1]))
+ cc.cc0.stencil_write_enable = 1;
+ }
+
+ /* _NEW_COLOR */
+ if (key->logic_op != GL_COPY) {
+ cc.cc2.logicop_enable = 1;
+ cc.cc5.logicop_func = intel_translate_logic_op(key->logic_op);
+ } else if (key->color_blend) {
+ GLenum eqRGB = key->blend_eq_rgb;
+ GLenum eqA = key->blend_eq_a;
+ GLenum srcRGB = key->blend_src_rgb;
+ GLenum dstRGB = key->blend_dst_rgb;
+ GLenum srcA = key->blend_src_a;
+ GLenum dstA = key->blend_dst_a;
+
+ if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
+ srcRGB = dstRGB = GL_ONE;
+ }
+
+ if (eqA == GL_MIN || eqA == GL_MAX) {
+ srcA = dstA = GL_ONE;
+ }
+
+ cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB);
+ cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB);
+ cc.cc6.blend_function = brw_translate_blend_equation(eqRGB);
+
+ cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA);
+ cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA);
+ cc.cc5.ia_blend_function = brw_translate_blend_equation(eqA);
+
+ cc.cc3.blend_enable = 1;
+ cc.cc3.ia_blend_enable = (srcA != srcRGB ||
+ dstA != dstRGB ||
+ eqA != eqRGB);
+ }
+
+ if (key->alpha_enabled) {
+ cc.cc3.alpha_test = 1;
+ cc.cc3.alpha_test_func = intel_translate_compare_func(key->alpha_func);
+ cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
+
+ UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], key->alpha_ref);
+ }
+
+ if (key->dither) {
+ cc.cc5.dither_enable = 1;
+ cc.cc6.y_dither_offset = 0;
+ cc.cc6.x_dither_offset = 0;
+ }
+
+ /* _NEW_DEPTH */
+ if (key->depth_test) {
+ cc.cc2.depth_test = 1;
+ cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func);
+ cc.cc2.depth_write_enable = key->depth_write;
+ }
+
+ /* CACHE_NEW_CC_VP */
+ cc.cc4.cc_viewport_state_offset = brw->cc.vp_bo->offset >> 5; /* reloc */
+
+ if (INTEL_DEBUG & DEBUG_STATS)
+ cc.cc5.statistics_enable = 1;
+
+ bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT,
+ key, sizeof(*key),
+ &brw->cc.vp_bo, 1,
+ &cc, sizeof(cc),
+ NULL, NULL);
+
+ /* Emit CC viewport relocation */
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_INSTRUCTION,
+ 0,
+ 0,
+ offsetof(struct brw_cc_unit_state, cc4),
+ brw->cc.vp_bo);
+
+ return bo;
+}
+
+static void prepare_cc_unit( struct brw_context *brw )
+{
+ struct brw_cc_unit_key key;
+
+ cc_unit_populate_key(brw, &key);
+
+ dri_bo_unreference(brw->cc.state_bo);
+ brw->cc.state_bo = brw_search_cache(&brw->cache, BRW_CC_UNIT,
+ &key, sizeof(key),
+ &brw->cc.vp_bo, 1,
+ NULL);
+
+ if (brw->cc.state_bo == NULL)
+ brw->cc.state_bo = cc_unit_create_from_key(brw, &key);
+}
+
+const struct brw_tracked_state brw_cc_unit = {
+ .dirty = {
+ .mesa = _NEW_STENCIL | _NEW_COLOR | _NEW_DEPTH,
+ .brw = 0,
+ .cache = CACHE_NEW_CC_VP
+ },
+ .prepare = prepare_cc_unit,
+};
+
+
+
diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c
new file mode 100644
index 00000000000..20a927cf386
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip.c
@@ -0,0 +1,273 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_state.h"
+#include "brw_clip.h"
+
+
+#define FRONT_UNFILLED_BIT 0x1
+#define BACK_UNFILLED_BIT 0x2
+
+
+static void compile_clip_prog( struct brw_context *brw,
+ struct brw_clip_prog_key *key )
+{
+ struct brw_clip_compile c;
+ const GLuint *program;
+ GLuint program_size;
+ GLuint delta;
+ GLuint i;
+
+ memset(&c, 0, sizeof(c));
+
+ /* Begin the compilation:
+ */
+ brw_init_compile(brw, &c.func);
+
+ c.func.single_program_flow = 1;
+
+ c.key = *key;
+ c.need_ff_sync = BRW_IS_IGDNG(brw);
+
+ /* Need to locate the two positions present in vertex + header.
+ * These are currently hardcoded:
+ */
+ c.header_position_offset = ATTR_SIZE;
+
+ if (BRW_IS_IGDNG(brw))
+ delta = 3 * REG_SIZE;
+ else
+ delta = REG_SIZE;
+
+ for (i = 0; i < VERT_RESULT_MAX; i++)
+ if (c.key.attrs & (1<<i)) {
+ c.offset[i] = delta;
+ delta += ATTR_SIZE;
+ }
+
+ c.nr_attrs = brw_count_bits(c.key.attrs);
+
+ if (BRW_IS_IGDNG(brw))
+ c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */
+ else
+ c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
+
+ c.nr_bytes = c.nr_regs * REG_SIZE;
+
+ c.prog_data.clip_mode = c.key.clip_mode; /* XXX */
+
+ /* For some reason the thread is spawned with only 4 channels
+ * unmasked.
+ */
+ brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
+
+
+ /* Would ideally have the option of producing a program which could
+ * do all three:
+ */
+ switch (key->primitive) {
+ case GL_TRIANGLES:
+ if (key->do_unfilled)
+ brw_emit_unfilled_clip( &c );
+ else
+ brw_emit_tri_clip( &c );
+ break;
+ case GL_LINES:
+ brw_emit_line_clip( &c );
+ break;
+ case GL_POINTS:
+ brw_emit_point_clip( &c );
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+
+
+ /* get the program
+ */
+ program = brw_get_program(&c.func, &program_size);
+
+ /* Upload
+ */
+ dri_bo_unreference(brw->clip.prog_bo);
+ brw->clip.prog_bo = brw_upload_cache( &brw->cache,
+ BRW_CLIP_PROG,
+ &c.key, sizeof(c.key),
+ NULL, 0,
+ program, program_size,
+ &c.prog_data,
+ &brw->clip.prog_data );
+}
+
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static void upload_clip_prog(struct brw_context *brw)
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ struct brw_clip_prog_key key;
+
+ memset(&key, 0, sizeof(key));
+
+ /* Populate the key:
+ */
+ /* BRW_NEW_REDUCED_PRIMITIVE */
+ key.primitive = brw->intel.reduced_primitive;
+ /* CACHE_NEW_VS_PROG */
+ key.attrs = brw->vs.prog_data->outputs_written;
+ /* _NEW_LIGHT */
+ key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
+ /* _NEW_TRANSFORM */
+ key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
+
+ if (BRW_IS_IGDNG(brw))
+ key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP;
+ else
+ key.clip_mode = BRW_CLIPMODE_NORMAL;
+
+ /* _NEW_POLYGON */
+ if (key.primitive == GL_TRIANGLES) {
+ if (ctx->Polygon.CullFlag &&
+ ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
+ key.clip_mode = BRW_CLIPMODE_REJECT_ALL;
+ else {
+ GLuint fill_front = CLIP_CULL;
+ GLuint fill_back = CLIP_CULL;
+ GLuint offset_front = 0;
+ GLuint offset_back = 0;
+
+ if (!ctx->Polygon.CullFlag ||
+ ctx->Polygon.CullFaceMode != GL_FRONT) {
+ switch (ctx->Polygon.FrontMode) {
+ case GL_FILL:
+ fill_front = CLIP_FILL;
+ offset_front = 0;
+ break;
+ case GL_LINE:
+ fill_front = CLIP_LINE;
+ offset_front = ctx->Polygon.OffsetLine;
+ break;
+ case GL_POINT:
+ fill_front = CLIP_POINT;
+ offset_front = ctx->Polygon.OffsetPoint;
+ break;
+ }
+ }
+
+ if (!ctx->Polygon.CullFlag ||
+ ctx->Polygon.CullFaceMode != GL_BACK) {
+ switch (ctx->Polygon.BackMode) {
+ case GL_FILL:
+ fill_back = CLIP_FILL;
+ offset_back = 0;
+ break;
+ case GL_LINE:
+ fill_back = CLIP_LINE;
+ offset_back = ctx->Polygon.OffsetLine;
+ break;
+ case GL_POINT:
+ fill_back = CLIP_POINT;
+ offset_back = ctx->Polygon.OffsetPoint;
+ break;
+ }
+ }
+
+ if (ctx->Polygon.BackMode != GL_FILL ||
+ ctx->Polygon.FrontMode != GL_FILL) {
+ key.do_unfilled = 1;
+
+ /* Most cases the fixed function units will handle. Cases where
+ * one or more polygon faces are unfilled will require help:
+ */
+ key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED;
+
+ if (offset_back || offset_front) {
+ /* _NEW_POLYGON, _NEW_BUFFERS */
+ key.offset_units = ctx->Polygon.OffsetUnits * brw->intel.polygon_offset_scale;
+ key.offset_factor = ctx->Polygon.OffsetFactor * ctx->DrawBuffer->_MRD;
+ }
+
+ switch (ctx->Polygon.FrontFace) {
+ case GL_CCW:
+ key.fill_ccw = fill_front;
+ key.fill_cw = fill_back;
+ key.offset_ccw = offset_front;
+ key.offset_cw = offset_back;
+ if (ctx->Light.Model.TwoSide &&
+ key.fill_cw != CLIP_CULL)
+ key.copy_bfc_cw = 1;
+ break;
+ case GL_CW:
+ key.fill_cw = fill_front;
+ key.fill_ccw = fill_back;
+ key.offset_cw = offset_front;
+ key.offset_ccw = offset_back;
+ if (ctx->Light.Model.TwoSide &&
+ key.fill_ccw != CLIP_CULL)
+ key.copy_bfc_ccw = 1;
+ break;
+ }
+ }
+ }
+ }
+
+ dri_bo_unreference(brw->clip.prog_bo);
+ brw->clip.prog_bo = brw_search_cache(&brw->cache, BRW_CLIP_PROG,
+ &key, sizeof(key),
+ NULL, 0,
+ &brw->clip.prog_data);
+ if (brw->clip.prog_bo == NULL)
+ compile_clip_prog( brw, &key );
+}
+
+
+const struct brw_tracked_state brw_clip_prog = {
+ .dirty = {
+ .mesa = (_NEW_LIGHT |
+ _NEW_TRANSFORM |
+ _NEW_POLYGON |
+ _NEW_BUFFERS),
+ .brw = (BRW_NEW_REDUCED_PRIMITIVE),
+ .cache = CACHE_NEW_VS_PROG
+ },
+ .prepare = upload_clip_prog
+};
diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h
new file mode 100644
index 00000000000..957df441ab0
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip.h
@@ -0,0 +1,179 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+#ifndef BRW_CLIP_H
+#define BRW_CLIP_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+#define MAX_VERTS (3+6+6)
+
+/* Note that if unfilled primitives are being emitted, we have to fix
+ * up polygon offset and flatshading at this point:
+ */
+struct brw_clip_prog_key {
+ GLuint attrs:32;
+ GLuint primitive:4;
+ GLuint nr_userclip:3;
+ GLuint do_flat_shading:1;
+ GLuint do_unfilled:1;
+ GLuint fill_cw:2; /* includes cull information */
+ GLuint fill_ccw:2; /* includes cull information */
+ GLuint offset_cw:1;
+ GLuint offset_ccw:1;
+ GLuint pad0:17;
+
+ GLuint copy_bfc_cw:1;
+ GLuint copy_bfc_ccw:1;
+ GLuint clip_mode:3;
+ GLuint pad1:27;
+
+ GLfloat offset_factor;
+ GLfloat offset_units;
+};
+
+
+#define CLIP_LINE 0
+#define CLIP_POINT 1
+#define CLIP_FILL 2
+#define CLIP_CULL 3
+
+
+#define PRIM_MASK (0x1f)
+
+struct brw_clip_compile {
+ struct brw_compile func;
+ struct brw_clip_prog_key key;
+ struct brw_clip_prog_data prog_data;
+
+ struct {
+ struct brw_reg R0;
+ struct brw_reg vertex[MAX_VERTS];
+
+ struct brw_reg t;
+ struct brw_reg t0, t1;
+ struct brw_reg dp0, dp1;
+
+ struct brw_reg dpPrev;
+ struct brw_reg dp;
+ struct brw_reg loopcount;
+ struct brw_reg nr_verts;
+ struct brw_reg planemask;
+
+ struct brw_reg inlist;
+ struct brw_reg outlist;
+ struct brw_reg freelist;
+
+ struct brw_reg dir;
+ struct brw_reg tmp0, tmp1;
+ struct brw_reg offset;
+
+ struct brw_reg fixed_planes;
+ struct brw_reg plane_equation;
+
+ struct brw_reg ff_sync;
+ } reg;
+
+ /* 3 different ways of expressing vertex size:
+ */
+ GLuint nr_attrs;
+ GLuint nr_regs;
+ GLuint nr_bytes;
+
+ GLuint first_tmp;
+ GLuint last_tmp;
+
+ GLboolean need_direction;
+
+ GLuint last_mrf;
+
+ GLuint header_position_offset;
+ GLuint offset[VERT_ATTRIB_MAX];
+ GLboolean need_ff_sync;
+};
+
+#define ATTR_SIZE (4*4)
+
+/* Points are only culled, so no need for a clip routine, however it
+ * works out easier to have a dummy one.
+ */
+void brw_emit_unfilled_clip( struct brw_clip_compile *c );
+void brw_emit_tri_clip( struct brw_clip_compile *c );
+void brw_emit_line_clip( struct brw_clip_compile *c );
+void brw_emit_point_clip( struct brw_clip_compile *c );
+
+/* brw_clip_tri.c, for use by the unfilled clip routine:
+ */
+void brw_clip_tri_init_vertices( struct brw_clip_compile *c );
+void brw_clip_tri_flat_shade( struct brw_clip_compile *c );
+void brw_clip_tri( struct brw_clip_compile *c );
+void brw_clip_tri_emit_polygon( struct brw_clip_compile *c );
+void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
+ GLuint nr_verts );
+
+
+/* Utils:
+ */
+
+void brw_clip_interp_vertex( struct brw_clip_compile *c,
+ struct brw_indirect dest_ptr,
+ struct brw_indirect v0_ptr, /* from */
+ struct brw_indirect v1_ptr, /* to */
+ struct brw_reg t0,
+ GLboolean force_edgeflag );
+
+void brw_clip_init_planes( struct brw_clip_compile *c );
+
+void brw_clip_emit_vue(struct brw_clip_compile *c,
+ struct brw_indirect vert,
+ GLboolean allocate,
+ GLboolean eot,
+ GLuint header);
+
+void brw_clip_kill_thread(struct brw_clip_compile *c);
+
+struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c );
+struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c );
+
+void brw_clip_copy_colors( struct brw_clip_compile *c,
+ GLuint to, GLuint from );
+
+void brw_clip_init_clipmask( struct brw_clip_compile *c );
+
+struct brw_reg get_tmp( struct brw_clip_compile *c );
+
+void brw_clip_project_position(struct brw_clip_compile *c,
+ struct brw_reg pos );
+void brw_clip_ff_sync(struct brw_clip_compile *c);
+void brw_clip_init_ff_sync(struct brw_clip_compile *c);
+#endif
diff --git a/src/gallium/drivers/i965/brw_clip_line.c b/src/gallium/drivers/i965/brw_clip_line.c
new file mode 100644
index 00000000000..048ca620fab
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_line.c
@@ -0,0 +1,276 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/program.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+
+static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
+{
+ GLuint i = 0,j;
+
+ /* Register usage is static, precompute here:
+ */
+ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+
+ if (c->key.nr_userclip) {
+ c->reg.fixed_planes = brw_vec4_grf(i, 0);
+ i += (6 + c->key.nr_userclip + 1) / 2;
+
+ c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
+ }
+ else
+ c->prog_data.curb_read_length = 0;
+
+
+ /* Payload vertices plus space for more generated vertices:
+ */
+ for (j = 0; j < 4; j++) {
+ c->reg.vertex[j] = brw_vec4_grf(i, 0);
+ i += c->nr_regs;
+ }
+
+ c->reg.t = brw_vec1_grf(i, 0);
+ c->reg.t0 = brw_vec1_grf(i, 1);
+ c->reg.t1 = brw_vec1_grf(i, 2);
+ c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
+ c->reg.plane_equation = brw_vec4_grf(i, 4);
+ i++;
+
+ c->reg.dp0 = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
+ c->reg.dp1 = brw_vec1_grf(i, 4);
+ i++;
+
+ if (!c->key.nr_userclip) {
+ c->reg.fixed_planes = brw_vec8_grf(i, 0);
+ i++;
+ }
+
+ if (c->need_ff_sync) {
+ c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
+ i++;
+ }
+
+ c->first_tmp = i;
+ c->last_tmp = i;
+
+ c->prog_data.urb_read_length = c->nr_regs; /* ? */
+ c->prog_data.total_grf = i;
+}
+
+
+
+/* Line clipping, more or less following the following algorithm:
+ *
+ * for (p=0;p<MAX_PLANES;p++) {
+ * if (clipmask & (1 << p)) {
+ * GLfloat dp0 = DOTPROD( vtx0, plane[p] );
+ * GLfloat dp1 = DOTPROD( vtx1, plane[p] );
+ *
+ * if (IS_NEGATIVE(dp1)) {
+ * GLfloat t = dp1 / (dp1 - dp0);
+ * if (t > t1) t1 = t;
+ * } else {
+ * GLfloat t = dp0 / (dp0 - dp1);
+ * if (t > t0) t0 = t;
+ * }
+ *
+ * if (t0 + t1 >= 1.0)
+ * return;
+ * }
+ * }
+ *
+ * interp( ctx, newvtx0, vtx0, vtx1, t0 );
+ * interp( ctx, newvtx1, vtx1, vtx0, t1 );
+ *
+ */
+static void clip_and_emit_line( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_indirect vtx0 = brw_indirect(0, 0);
+ struct brw_indirect vtx1 = brw_indirect(1, 0);
+ struct brw_indirect newvtx0 = brw_indirect(2, 0);
+ struct brw_indirect newvtx1 = brw_indirect(3, 0);
+ struct brw_indirect plane_ptr = brw_indirect(4, 0);
+ struct brw_instruction *plane_loop;
+ struct brw_instruction *plane_active;
+ struct brw_instruction *is_negative;
+ struct brw_instruction *is_neg2 = NULL;
+ struct brw_instruction *not_culled;
+ struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD);
+
+ brw_MOV(p, get_addr_reg(vtx0), brw_address(c->reg.vertex[0]));
+ brw_MOV(p, get_addr_reg(vtx1), brw_address(c->reg.vertex[1]));
+ brw_MOV(p, get_addr_reg(newvtx0), brw_address(c->reg.vertex[2]));
+ brw_MOV(p, get_addr_reg(newvtx1), brw_address(c->reg.vertex[3]));
+ brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c));
+
+ /* Note: init t0, t1 together:
+ */
+ brw_MOV(p, vec2(c->reg.t0), brw_imm_f(0));
+
+ brw_clip_init_planes(c);
+ brw_clip_init_clipmask(c);
+
+ /* -ve rhw workaround */
+ if (BRW_IS_965(p->brw)) {
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
+ brw_imm_ud(1<<20));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f));
+ }
+
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ plane_loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ /* if (planemask & 1)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1));
+
+ plane_active = brw_IF(p, BRW_EXECUTE_1);
+ {
+ if (c->key.nr_userclip)
+ brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
+ else
+ brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));
+
+ /* dp = DP4(vtx->position, plane)
+ */
+ brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+
+ /* if (IS_NEGATIVE(dp1))
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+ brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+ is_negative = brw_IF(p, BRW_EXECUTE_1);
+ {
+ /*
+ * Both can be negative on GM965/G965 due to RHW workaround
+ * if so, this object should be rejected.
+ */
+ if (BRW_IS_965(p->brw)) {
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0));
+ is_neg2 = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_kill_thread(c);
+ }
+ brw_ENDIF(p, is_neg2);
+ }
+
+ brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0));
+ brw_math_invert(p, c->reg.t, c->reg.t);
+ brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1);
+
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 );
+ brw_MOV(p, c->reg.t1, c->reg.t);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+ is_negative = brw_ELSE(p, is_negative);
+ {
+ /* Coming back in. We know that both cannot be negative
+ * because the line would have been culled in that case.
+ */
+
+ /* If both are positive, do nothing */
+ /* Only on GM965/G965 */
+ if (BRW_IS_965(p->brw)) {
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0));
+ is_neg2 = brw_IF(p, BRW_EXECUTE_1);
+ }
+
+ {
+ brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1));
+ brw_math_invert(p, c->reg.t, c->reg.t);
+ brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0);
+
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 );
+ brw_MOV(p, c->reg.t0, c->reg.t);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+
+ if (BRW_IS_965(p->brw)) {
+ brw_ENDIF(p, is_neg2);
+ }
+ }
+ brw_ENDIF(p, is_negative);
+ }
+ brw_ENDIF(p, plane_active);
+
+ /* plane_ptr++;
+ */
+ brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));
+
+ /* while (planemask>>=1) != 0
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
+ }
+ brw_WHILE(p, plane_loop);
+
+ brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1);
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0));
+ not_culled = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, GL_FALSE);
+ brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, GL_FALSE);
+
+ brw_clip_emit_vue(c, newvtx0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START);
+ brw_clip_emit_vue(c, newvtx1, 0, 1, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END);
+ }
+ brw_ENDIF(p, not_culled);
+ brw_clip_kill_thread(c);
+}
+
+
+
+void brw_emit_line_clip( struct brw_clip_compile *c )
+{
+ brw_clip_line_alloc_regs(c);
+ brw_clip_init_ff_sync(c);
+
+ if (c->key.do_flat_shading)
+ brw_clip_copy_colors(c, 0, 1);
+
+ clip_and_emit_line(c);
+}
diff --git a/src/gallium/drivers/i965/brw_clip_point.c b/src/gallium/drivers/i965/brw_clip_point.c
new file mode 100644
index 00000000000..8458f61c5a0
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_point.c
@@ -0,0 +1,56 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/program.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+/* Point clipping, nothing to do?
+ */
+void brw_emit_point_clip( struct brw_clip_compile *c )
+{
+ /* Send an empty message to kill the thread:
+ */
+ brw_clip_tri_alloc_regs(c, 0);
+ brw_clip_init_ff_sync(c);
+
+ brw_clip_kill_thread(c);
+}
diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c
new file mode 100644
index 00000000000..234b3744bfc
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_state.c
@@ -0,0 +1,184 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "main/macros.h"
+
+struct brw_clip_unit_key {
+ unsigned int total_grf;
+ unsigned int urb_entry_read_length;
+ unsigned int curb_entry_read_length;
+ unsigned int clip_mode;
+
+ unsigned int curbe_offset;
+
+ unsigned int nr_urb_entries, urb_size;
+
+ GLboolean depth_clamp;
+};
+
+static void
+clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ memset(key, 0, sizeof(*key));
+
+ /* CACHE_NEW_CLIP_PROG */
+ key->total_grf = brw->clip.prog_data->total_grf;
+ key->urb_entry_read_length = brw->clip.prog_data->urb_read_length;
+ key->curb_entry_read_length = brw->clip.prog_data->curb_read_length;
+ key->clip_mode = brw->clip.prog_data->clip_mode;
+
+ /* BRW_NEW_CURBE_OFFSETS */
+ key->curbe_offset = brw->curbe.clip_start;
+
+ /* BRW_NEW_URB_FENCE */
+ key->nr_urb_entries = brw->urb.nr_clip_entries;
+ key->urb_size = brw->urb.vsize;
+
+ /* _NEW_TRANSOFORM */
+ key->depth_clamp = ctx->Transform.DepthClamp;
+}
+
+static dri_bo *
+clip_unit_create_from_key(struct brw_context *brw,
+ struct brw_clip_unit_key *key)
+{
+ struct brw_clip_unit_state clip;
+ dri_bo *bo;
+
+ memset(&clip, 0, sizeof(clip));
+
+ clip.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+ /* reloc */
+ clip.thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6;
+
+ clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ clip.thread1.single_program_flow = 1;
+
+ clip.thread3.urb_entry_read_length = key->urb_entry_read_length;
+ clip.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
+ clip.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
+ clip.thread3.dispatch_grf_start_reg = 1;
+ clip.thread3.urb_entry_read_offset = 0;
+
+ clip.thread4.nr_urb_entries = key->nr_urb_entries;
+ clip.thread4.urb_entry_allocation_size = key->urb_size - 1;
+ /* If we have enough clip URB entries to run two threads, do so.
+ */
+ if (key->nr_urb_entries >= 10) {
+ /* Half of the URB entries go to each thread, and it has to be an
+ * even number.
+ */
+ assert(key->nr_urb_entries % 2 == 0);
+
+ /* Although up to 16 concurrent Clip threads are allowed on IGDNG,
+ * only 2 threads can output VUEs at a time.
+ */
+ if (BRW_IS_IGDNG(brw))
+ clip.thread4.max_threads = 16 - 1;
+ else
+ clip.thread4.max_threads = 2 - 1;
+ } else {
+ assert(key->nr_urb_entries >= 5);
+ clip.thread4.max_threads = 1 - 1;
+ }
+
+ if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
+ clip.thread4.max_threads = 0;
+
+ if (INTEL_DEBUG & DEBUG_STATS)
+ clip.thread4.stats_enable = 1;
+
+ clip.clip5.userclip_enable_flags = 0x7f;
+ clip.clip5.userclip_must_clip = 1;
+ clip.clip5.guard_band_enable = 0;
+ if (!key->depth_clamp)
+ clip.clip5.viewport_z_clip_enable = 1;
+ clip.clip5.viewport_xy_clip_enable = 1;
+ clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
+ clip.clip5.api_mode = BRW_CLIP_API_OGL;
+ clip.clip5.clip_mode = key->clip_mode;
+
+ if (BRW_IS_G4X(brw))
+ clip.clip5.negative_w_clip_test = 1;
+
+ clip.clip6.clipper_viewport_state_ptr = 0;
+ clip.viewport_xmin = -1;
+ clip.viewport_xmax = 1;
+ clip.viewport_ymin = -1;
+ clip.viewport_ymax = 1;
+
+ bo = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT,
+ key, sizeof(*key),
+ &brw->clip.prog_bo, 1,
+ &clip, sizeof(clip),
+ NULL, NULL);
+
+ /* Emit clip program relocation */
+ assert(brw->clip.prog_bo);
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_INSTRUCTION,
+ 0,
+ clip.thread0.grf_reg_count << 1,
+ offsetof(struct brw_clip_unit_state, thread0),
+ brw->clip.prog_bo);
+
+ return bo;
+}
+
+static void upload_clip_unit( struct brw_context *brw )
+{
+ struct brw_clip_unit_key key;
+
+ clip_unit_populate_key(brw, &key);
+
+ dri_bo_unreference(brw->clip.state_bo);
+ brw->clip.state_bo = brw_search_cache(&brw->cache, BRW_CLIP_UNIT,
+ &key, sizeof(key),
+ &brw->clip.prog_bo, 1,
+ NULL);
+ if (brw->clip.state_bo == NULL) {
+ brw->clip.state_bo = clip_unit_create_from_key(brw, &key);
+ }
+}
+
+const struct brw_tracked_state brw_clip_unit = {
+ .dirty = {
+ .mesa = _NEW_TRANSFORM,
+ .brw = (BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_URB_FENCE),
+ .cache = CACHE_NEW_CLIP_PROG
+ },
+ .prepare = upload_clip_unit,
+};
diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c
new file mode 100644
index 00000000000..0efd77225e2
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_tri.c
@@ -0,0 +1,603 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/program.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+static void release_tmps( struct brw_clip_compile *c )
+{
+ c->last_tmp = c->first_tmp;
+}
+
+
+void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
+ GLuint nr_verts )
+{
+ GLuint i = 0,j;
+
+ /* Register usage is static, precompute here:
+ */
+ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+
+ if (c->key.nr_userclip) {
+ c->reg.fixed_planes = brw_vec4_grf(i, 0);
+ i += (6 + c->key.nr_userclip + 1) / 2;
+
+ c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
+ }
+ else
+ c->prog_data.curb_read_length = 0;
+
+
+ /* Payload vertices plus space for more generated vertices:
+ */
+ for (j = 0; j < nr_verts; j++) {
+ c->reg.vertex[j] = brw_vec4_grf(i, 0);
+ i += c->nr_regs;
+ }
+
+ if (c->nr_attrs & 1) {
+ for (j = 0; j < 3; j++) {
+ GLuint delta = c->nr_attrs*16 + 32;
+
+ if (BRW_IS_IGDNG(c->func.brw))
+ delta = c->nr_attrs * 16 + 32 * 3;
+
+ brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
+ }
+ }
+
+ c->reg.t = brw_vec1_grf(i, 0);
+ c->reg.loopcount = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_D);
+ c->reg.nr_verts = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD);
+ c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
+ c->reg.plane_equation = brw_vec4_grf(i, 4);
+ i++;
+
+ c->reg.dpPrev = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
+ c->reg.dp = brw_vec1_grf(i, 4);
+ i++;
+
+ c->reg.inlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+ i++;
+
+ c->reg.outlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+ i++;
+
+ c->reg.freelist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+ i++;
+
+ if (!c->key.nr_userclip) {
+ c->reg.fixed_planes = brw_vec8_grf(i, 0);
+ i++;
+ }
+
+ if (c->key.do_unfilled) {
+ c->reg.dir = brw_vec4_grf(i, 0);
+ c->reg.offset = brw_vec4_grf(i, 4);
+ i++;
+ c->reg.tmp0 = brw_vec4_grf(i, 0);
+ c->reg.tmp1 = brw_vec4_grf(i, 4);
+ i++;
+ }
+
+ if (c->need_ff_sync) {
+ c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
+ i++;
+ }
+
+ c->first_tmp = i;
+ c->last_tmp = i;
+
+ c->prog_data.urb_read_length = c->nr_regs; /* ? */
+ c->prog_data.total_grf = i;
+}
+
+
+
+void brw_clip_tri_init_vertices( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
+ struct brw_instruction *is_rev;
+
+ /* Initial list of indices for incoming vertexes:
+ */
+ brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_EQ,
+ tmp0,
+ brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));
+
+ /* XXX: Is there an easier way to do this? Need to reverse every
+ * second tristrip element: Can ignore sometimes?
+ */
+ is_rev = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[1]) );
+ brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[0]) );
+ if (c->need_direction)
+ brw_MOV(p, c->reg.dir, brw_imm_f(-1));
+ }
+ is_rev = brw_ELSE(p, is_rev);
+ {
+ brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[0]) );
+ brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[1]) );
+ if (c->need_direction)
+ brw_MOV(p, c->reg.dir, brw_imm_f(1));
+ }
+ brw_ENDIF(p, is_rev);
+
+ brw_MOV(p, get_element(c->reg.inlist, 2), brw_address(c->reg.vertex[2]) );
+ brw_MOV(p, brw_vec8_grf(c->reg.outlist.nr, 0), brw_imm_f(0));
+ brw_MOV(p, c->reg.nr_verts, brw_imm_ud(3));
+}
+
+
+
+void brw_clip_tri_flat_shade( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *is_poly;
+ struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
+
+ brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_EQ,
+ tmp0,
+ brw_imm_ud(_3DPRIM_POLYGON));
+
+ is_poly = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_copy_colors(c, 1, 0);
+ brw_clip_copy_colors(c, 2, 0);
+ }
+ is_poly = brw_ELSE(p, is_poly);
+ {
+ brw_clip_copy_colors(c, 0, 2);
+ brw_clip_copy_colors(c, 1, 2);
+ }
+ brw_ENDIF(p, is_poly);
+}
+
+
+
+/* Use mesa's clipping algorithms, translated to GEN4 assembly.
+ */
+void brw_clip_tri( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_indirect vtx = brw_indirect(0, 0);
+ struct brw_indirect vtxPrev = brw_indirect(1, 0);
+ struct brw_indirect vtxOut = brw_indirect(2, 0);
+ struct brw_indirect plane_ptr = brw_indirect(3, 0);
+ struct brw_indirect inlist_ptr = brw_indirect(4, 0);
+ struct brw_indirect outlist_ptr = brw_indirect(5, 0);
+ struct brw_indirect freelist_ptr = brw_indirect(6, 0);
+ struct brw_instruction *plane_loop;
+ struct brw_instruction *plane_active;
+ struct brw_instruction *vertex_loop;
+ struct brw_instruction *next_test;
+ struct brw_instruction *prev_test;
+
+ brw_MOV(p, get_addr_reg(vtxPrev), brw_address(c->reg.vertex[2]) );
+ brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c));
+ brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist));
+ brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));
+
+ brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) );
+
+ plane_loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ /* if (planemask & 1)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1));
+
+ plane_active = brw_IF(p, BRW_EXECUTE_1);
+ {
+ /* vtxOut = freelist_ptr++
+ */
+ brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(freelist_ptr) );
+ brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE));
+
+ if (c->key.nr_userclip)
+ brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
+ else
+ brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));
+
+ brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+ brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0));
+
+ vertex_loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ /* vtx = *input_ptr;
+ */
+ brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0));
+
+ /* IS_NEGATIVE(prev) */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+ brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+ prev_test = brw_IF(p, BRW_EXECUTE_1);
+ {
+ /* IS_POSITIVE(next)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_GE);
+ brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+ next_test = brw_IF(p, BRW_EXECUTE_1);
+ {
+
+ /* Coming back in.
+ */
+ brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp));
+ brw_math_invert(p, c->reg.t, c->reg.t);
+ brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev);
+
+ /* If (vtxOut == 0) vtxOut = vtxPrev
+ */
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
+ brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev) );
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, GL_FALSE);
+
+ /* *outlist_ptr++ = vtxOut;
+ * nr_verts++;
+ * vtxOut = 0;
+ */
+ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
+ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+ brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+ brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
+ }
+ brw_ENDIF(p, next_test);
+
+ }
+ prev_test = brw_ELSE(p, prev_test);
+ {
+ /* *outlist_ptr++ = vtxPrev;
+ * nr_verts++;
+ */
+ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev));
+ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+ brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+
+ /* IS_NEGATIVE(next)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+ brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+ next_test = brw_IF(p, BRW_EXECUTE_1);
+ {
+ /* Going out of bounds. Avoid division by zero as we
+ * know dp != dpPrev from DIFFERENT_SIGNS, above.
+ */
+ brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev));
+ brw_math_invert(p, c->reg.t, c->reg.t);
+ brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp);
+
+ /* If (vtxOut == 0) vtxOut = vtx
+ */
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
+ brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx) );
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, GL_TRUE);
+
+ /* *outlist_ptr++ = vtxOut;
+ * nr_verts++;
+ * vtxOut = 0;
+ */
+ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
+ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+ brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+ brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
+ }
+ brw_ENDIF(p, next_test);
+ }
+ brw_ENDIF(p, prev_test);
+
+ /* vtxPrev = vtx;
+ * inlist_ptr++;
+ */
+ brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx));
+ brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short)));
+
+ /* while (--loopcount != 0)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, vertex_loop);
+
+ /* vtxPrev = *(outlist_ptr-1) OR: outlist[nr_verts-1]
+ * inlist = outlist
+ * inlist_ptr = &inlist[0]
+ * outlist_ptr = &outlist[0]
+ */
+ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2));
+ brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0));
+ brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0));
+ brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist));
+ brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));
+ }
+ brw_ENDIF(p, plane_active);
+
+ /* plane_ptr++;
+ */
+ brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));
+
+ /* nr_verts >= 3
+ */
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_GE,
+ c->reg.nr_verts,
+ brw_imm_ud(3));
+
+ /* && (planemask>>=1) != 0
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
+ }
+ brw_WHILE(p, plane_loop);
+}
+
+
+
+void brw_clip_tri_emit_polygon(struct brw_clip_compile *c)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *loop, *if_insn;
+
+ /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--)
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_G);
+ brw_ADD(p,
+ c->reg.loopcount,
+ c->reg.nr_verts,
+ brw_imm_d(-2));
+
+ if_insn = brw_IF(p, BRW_EXECUTE_1);
+ {
+ struct brw_indirect v0 = brw_indirect(0, 0);
+ struct brw_indirect vptr = brw_indirect(1, 0);
+
+ brw_MOV(p, get_addr_reg(vptr), brw_address(c->reg.inlist));
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+
+ brw_clip_emit_vue(c, v0, 1, 0, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_START));
+
+ brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2));
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+
+ loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_TRIFAN << 2));
+
+ brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2));
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, loop);
+
+ brw_clip_emit_vue(c, v0, 0, 1, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_END));
+ }
+ brw_ENDIF(p, if_insn);
+}
+
+static void do_clip_tri( struct brw_clip_compile *c )
+{
+ brw_clip_init_planes(c);
+
+ brw_clip_tri(c);
+}
+
+
+static void maybe_do_clip_tri( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *do_clip;
+
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0));
+ do_clip = brw_IF(p, BRW_EXECUTE_1);
+ {
+ do_clip_tri(c);
+ }
+ brw_ENDIF(p, do_clip);
+}
+
+static void brw_clip_test( struct brw_clip_compile *c )
+{
+ struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+ struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+ struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+ struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+
+ struct brw_reg v0 = get_tmp(c);
+ struct brw_reg v1 = get_tmp(c);
+ struct brw_reg v2 = get_tmp(c);
+
+ struct brw_indirect vt0 = brw_indirect(0, 0);
+ struct brw_indirect vt1 = brw_indirect(1, 0);
+ struct brw_indirect vt2 = brw_indirect(2, 0);
+
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *is_outside;
+ struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
+
+ brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0]));
+ brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1]));
+ brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2]));
+ brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS]));
+ brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS]));
+ brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS]));
+ brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f));
+
+ /* test nearz, xmin, ymin plane */
+ /* clip.xyz < -clip.w */
+ brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, negate(get_element(v0, 3)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, negate(get_element(v1, 3)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, negate(get_element(v2, 3)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* All vertices are outside of a plane, rejected */
+ brw_AND(p, t, t1, t2);
+ brw_AND(p, t, t, t3);
+ brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
+ brw_OR(p, tmp0, tmp0, get_element(t, 2));
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
+ is_outside = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_kill_thread(c);
+ }
+ brw_ENDIF(p, is_outside);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* some vertices are inside a plane, some are outside,need to clip */
+ brw_XOR(p, t, t1, t2);
+ brw_XOR(p, t1, t2, t3);
+ brw_OR(p, t, t, t1);
+ brw_AND(p, t, t, brw_imm_ud(0x1));
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 0), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 1), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 2), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* test farz, xmax, ymax plane */
+ /* clip.xyz > clip.w */
+ brw_CMP(p, t1, BRW_CONDITIONAL_G, v0, get_element(v0, 3));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, t2, BRW_CONDITIONAL_G, v1, get_element(v1, 3));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, t3, BRW_CONDITIONAL_G, v2, get_element(v2, 3));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* All vertices are outside of a plane, rejected */
+ brw_AND(p, t, t1, t2);
+ brw_AND(p, t, t, t3);
+ brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
+ brw_OR(p, tmp0, tmp0, get_element(t, 2));
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
+ is_outside = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_kill_thread(c);
+ }
+ brw_ENDIF(p, is_outside);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* some vertices are inside a plane, some are outside,need to clip */
+ brw_XOR(p, t, t1, t2);
+ brw_XOR(p, t1, t2, t3);
+ brw_OR(p, t, t, t1);
+ brw_AND(p, t, t, brw_imm_ud(0x1));
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 0), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 1), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+ get_element(t, 2), brw_imm_ud(0));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ release_tmps(c);
+}
+
+
+void brw_emit_tri_clip( struct brw_clip_compile *c )
+{
+ struct brw_instruction *neg_rhw;
+ struct brw_compile *p = &c->func;
+ brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
+ brw_clip_tri_init_vertices(c);
+ brw_clip_init_clipmask(c);
+ brw_clip_init_ff_sync(c);
+
+ /* if -ve rhw workaround bit is set,
+ do cliptest */
+ if (BRW_IS_965(p->brw)) {
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
+ brw_imm_ud(1<<20));
+ neg_rhw = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_test(c);
+ }
+ brw_ENDIF(p, neg_rhw);
+ }
+ /* Can't push into do_clip_tri because with polygon (or quad)
+ * flatshading, need to apply the flatshade here because we don't
+ * respect the PV when converting to trifan for emit:
+ */
+ if (c->key.do_flat_shading)
+ brw_clip_tri_flat_shade(c);
+
+ if ((c->key.clip_mode == BRW_CLIPMODE_NORMAL) ||
+ (c->key.clip_mode == BRW_CLIPMODE_KERNEL_CLIP))
+ do_clip_tri(c);
+ else
+ maybe_do_clip_tri(c);
+
+ brw_clip_tri_emit_polygon(c);
+
+ /* Send an empty message to kill the thread:
+ */
+ brw_clip_kill_thread(c);
+}
diff --git a/src/gallium/drivers/i965/brw_clip_unfilled.c b/src/gallium/drivers/i965/brw_clip_unfilled.c
new file mode 100644
index 00000000000..ad1bfa435fb
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_unfilled.c
@@ -0,0 +1,505 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/program.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+
+/* This is performed against the original triangles, so no indirection
+ * required:
+BZZZT!
+ */
+static void compute_tri_direction( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg e = c->reg.tmp0;
+ struct brw_reg f = c->reg.tmp1;
+ struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_HPOS]);
+ struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset[VERT_RESULT_HPOS]);
+ struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_HPOS]);
+
+
+ struct brw_reg v0n = get_tmp(c);
+ struct brw_reg v1n = get_tmp(c);
+ struct brw_reg v2n = get_tmp(c);
+
+ /* Convert to NDC.
+ * NOTE: We can't modify the original vertex coordinates,
+ * as it may impact further operations.
+ * So, we have to keep normalized coordinates in temp registers.
+ *
+ * TBD-KC
+ * Try to optimize unnecessary MOV's.
+ */
+ brw_MOV(p, v0n, v0);
+ brw_MOV(p, v1n, v1);
+ brw_MOV(p, v2n, v2);
+
+ brw_clip_project_position(c, v0n);
+ brw_clip_project_position(c, v1n);
+ brw_clip_project_position(c, v2n);
+
+ /* Calculate the vectors of two edges of the triangle:
+ */
+ brw_ADD(p, e, v0n, negate(v2n));
+ brw_ADD(p, f, v1n, negate(v2n));
+
+ /* Take their crossproduct:
+ */
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3), brw_swizzle(f,2,0,1,3));
+ brw_MAC(p, vec4(e), negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3));
+ brw_set_access_mode(p, BRW_ALIGN_1);
+
+ brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e));
+}
+
+
+static void cull_direction( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *ccw;
+ GLuint conditional;
+
+ assert (!(c->key.fill_ccw == CLIP_CULL &&
+ c->key.fill_cw == CLIP_CULL));
+
+ if (c->key.fill_ccw == CLIP_CULL)
+ conditional = BRW_CONDITIONAL_GE;
+ else
+ conditional = BRW_CONDITIONAL_L;
+
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ conditional,
+ get_element(c->reg.dir, 2),
+ brw_imm_f(0));
+
+ ccw = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_kill_thread(c);
+ }
+ brw_ENDIF(p, ccw);
+}
+
+
+
+static void copy_bfc( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *ccw;
+ GLuint conditional;
+
+ /* Do we have any colors to copy?
+ */
+ if (!(c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) &&
+ !(c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1]))
+ return;
+
+ /* In some wierd degnerate cases we can end up testing the
+ * direction twice, once for culling and once for bfc copying. Oh
+ * well, that's what you get for setting wierd GL state.
+ */
+ if (c->key.copy_bfc_ccw)
+ conditional = BRW_CONDITIONAL_GE;
+ else
+ conditional = BRW_CONDITIONAL_L;
+
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ conditional,
+ get_element(c->reg.dir, 2),
+ brw_imm_f(0));
+
+ ccw = brw_IF(p, BRW_EXECUTE_1);
+ {
+ GLuint i;
+
+ for (i = 0; i < 3; i++) {
+ if (c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0])
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL0]),
+ byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC0]));
+
+ if (c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1])
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL1]),
+ byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC1]));
+ }
+ }
+ brw_ENDIF(p, ccw);
+}
+
+
+
+
+/*
+ GLfloat iz = 1.0 / dir.z;
+ GLfloat ac = dir.x * iz;
+ GLfloat bc = dir.y * iz;
+ offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE;
+ offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor;
+ offset *= MRD;
+*/
+static void compute_offset( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg off = c->reg.offset;
+ struct brw_reg dir = c->reg.dir;
+
+ brw_math_invert(p, get_element(off, 2), get_element(dir, 2));
+ brw_MUL(p, vec2(off), dir, get_element(off, 2));
+
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_GE,
+ brw_abs(get_element(off, 0)),
+ brw_abs(get_element(off, 1)));
+
+ brw_SEL(p, vec1(off), brw_abs(get_element(off, 0)), brw_abs(get_element(off, 1)));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_MUL(p, vec1(off), off, brw_imm_f(c->key.offset_factor));
+ brw_ADD(p, vec1(off), off, brw_imm_f(c->key.offset_units));
+}
+
+
+static void merge_edgeflags( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *is_poly;
+ struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0);
+
+ brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_EQ,
+ tmp0,
+ brw_imm_ud(_3DPRIM_POLYGON));
+
+ /* Get away with using reg.vertex because we know that this is not
+ * a _3DPRIM_TRISTRIP_REVERSE:
+ */
+ is_poly = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
+ brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8));
+ brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
+ brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9));
+ brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+ brw_ENDIF(p, is_poly);
+}
+
+
+
+static void apply_one_offset( struct brw_clip_compile *c,
+ struct brw_indirect vert )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg z = deref_1f(vert, c->header_position_offset +
+ 2 * type_sz(BRW_REGISTER_TYPE_F));
+
+ brw_ADD(p, z, z, vec1(c->reg.offset));
+}
+
+
+
+/***********************************************************************
+ * Output clipped polygon as an unfilled primitive:
+ */
+static void emit_lines(struct brw_clip_compile *c,
+ GLboolean do_offset)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *loop;
+ struct brw_instruction *draw_edge;
+ struct brw_indirect v0 = brw_indirect(0, 0);
+ struct brw_indirect v1 = brw_indirect(1, 0);
+ struct brw_indirect v0ptr = brw_indirect(2, 0);
+ struct brw_indirect v1ptr = brw_indirect(3, 0);
+
+ /* Need a seperate loop for offset:
+ */
+ if (do_offset) {
+ brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+ brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+
+ loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+ brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+
+ apply_one_offset(c, v0);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_G);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, loop);
+ }
+
+ /* v1ptr = &inlist[nr_verts]
+ * *v1ptr = v0
+ */
+ brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+ brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+ brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v0ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW));
+ brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW));
+ brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0));
+
+ loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+ brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2));
+ brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+
+ /* draw edge if edgeflag != 0 */
+ brw_CMP(p,
+ vec1(brw_null_reg()), BRW_CONDITIONAL_NZ,
+ deref_1f(v0, c->offset[VERT_RESULT_EDGE]),
+ brw_imm_f(0));
+ draw_edge = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START);
+ brw_clip_emit_vue(c, v1, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END);
+ }
+ brw_ENDIF(p, draw_edge);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, loop);
+}
+
+
+
+static void emit_points(struct brw_clip_compile *c,
+ GLboolean do_offset )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *loop;
+ struct brw_instruction *draw_point;
+
+ struct brw_indirect v0 = brw_indirect(0, 0);
+ struct brw_indirect v0ptr = brw_indirect(2, 0);
+
+ brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+ brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+
+ loop = brw_DO(p, BRW_EXECUTE_1);
+ {
+ brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+ brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+
+ /* draw if edgeflag != 0
+ */
+ brw_CMP(p,
+ vec1(brw_null_reg()), BRW_CONDITIONAL_NZ,
+ deref_1f(v0, c->offset[VERT_RESULT_EDGE]),
+ brw_imm_f(0));
+ draw_point = brw_IF(p, BRW_EXECUTE_1);
+ {
+ if (do_offset)
+ apply_one_offset(c, v0);
+
+ brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END);
+ }
+ brw_ENDIF(p, draw_point);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+ }
+ brw_WHILE(p, loop);
+}
+
+
+
+
+
+
+
+static void emit_primitives( struct brw_clip_compile *c,
+ GLuint mode,
+ GLboolean do_offset )
+{
+ switch (mode) {
+ case CLIP_FILL:
+ brw_clip_tri_emit_polygon(c);
+ break;
+
+ case CLIP_LINE:
+ emit_lines(c, do_offset);
+ break;
+
+ case CLIP_POINT:
+ emit_points(c, do_offset);
+ break;
+
+ case CLIP_CULL:
+ assert(0);
+ break;
+ }
+}
+
+
+
+static void emit_unfilled_primitives( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *ccw;
+
+ /* Direction culling has already been done.
+ */
+ if (c->key.fill_ccw != c->key.fill_cw &&
+ c->key.fill_ccw != CLIP_CULL &&
+ c->key.fill_cw != CLIP_CULL)
+ {
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_GE,
+ get_element(c->reg.dir, 2),
+ brw_imm_f(0));
+
+ ccw = brw_IF(p, BRW_EXECUTE_1);
+ {
+ emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw);
+ }
+ ccw = brw_ELSE(p, ccw);
+ {
+ emit_primitives(c, c->key.fill_cw, c->key.offset_cw);
+ }
+ brw_ENDIF(p, ccw);
+ }
+ else if (c->key.fill_cw != CLIP_CULL) {
+ emit_primitives(c, c->key.fill_cw, c->key.offset_cw);
+ }
+ else if (c->key.fill_ccw != CLIP_CULL) {
+ emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw);
+ }
+}
+
+
+
+
+static void check_nr_verts( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *if_insn;
+
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.nr_verts, brw_imm_d(3));
+ if_insn = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_kill_thread(c);
+ }
+ brw_ENDIF(p, if_insn);
+}
+
+
+void brw_emit_unfilled_clip( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *do_clip;
+
+
+ c->need_direction = ((c->key.offset_ccw || c->key.offset_cw) ||
+ (c->key.fill_ccw != c->key.fill_cw) ||
+ c->key.fill_ccw == CLIP_CULL ||
+ c->key.fill_cw == CLIP_CULL ||
+ c->key.copy_bfc_cw ||
+ c->key.copy_bfc_ccw);
+
+ brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
+ brw_clip_tri_init_vertices(c);
+ brw_clip_init_ff_sync(c);
+
+ assert(c->offset[VERT_RESULT_EDGE]);
+
+ if (c->key.fill_ccw == CLIP_CULL &&
+ c->key.fill_cw == CLIP_CULL) {
+ brw_clip_kill_thread(c);
+ return;
+ }
+
+ merge_edgeflags(c);
+
+ /* Need to use the inlist indirection here:
+ */
+ if (c->need_direction)
+ compute_tri_direction(c);
+
+ if (c->key.fill_ccw == CLIP_CULL ||
+ c->key.fill_cw == CLIP_CULL)
+ cull_direction(c);
+
+ if (c->key.offset_ccw ||
+ c->key.offset_cw)
+ compute_offset(c);
+
+ if (c->key.copy_bfc_ccw ||
+ c->key.copy_bfc_cw)
+ copy_bfc(c);
+
+ /* Need to do this whether we clip or not:
+ */
+ if (c->key.do_flat_shading)
+ brw_clip_tri_flat_shade(c);
+
+ brw_clip_init_clipmask(c);
+ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0));
+ do_clip = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_init_planes(c);
+ brw_clip_tri(c);
+ check_nr_verts(c);
+ }
+ brw_ENDIF(p, do_clip);
+
+ emit_unfilled_primitives(c);
+ brw_clip_kill_thread(c);
+}
+
+
+
diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c
new file mode 100644
index 00000000000..5a73abdfee9
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_util.c
@@ -0,0 +1,396 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/program.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+
+
+struct brw_reg get_tmp( struct brw_clip_compile *c )
+{
+ struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0);
+
+ if (++c->last_tmp > c->prog_data.total_grf)
+ c->prog_data.total_grf = c->last_tmp;
+
+ return tmp;
+}
+
+static void release_tmp( struct brw_clip_compile *c, struct brw_reg tmp )
+{
+ if (tmp.nr == c->last_tmp-1)
+ c->last_tmp--;
+}
+
+
+static struct brw_reg make_plane_ud(GLuint x, GLuint y, GLuint z, GLuint w)
+{
+ return brw_imm_ud((w<<24) | (z<<16) | (y<<8) | x);
+}
+
+
+void brw_clip_init_planes( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+
+ if (!c->key.nr_userclip) {
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0, 0, 0xff, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0, 0, 1, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff, 0, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0, 1, 0, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff, 0, 0, 1));
+ brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1, 0, 0, 1));
+ }
+}
+
+
+
+#define W 3
+
+/* Project 'pos' to screen space (or back again), overwrite with results:
+ */
+void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos )
+{
+ struct brw_compile *p = &c->func;
+
+ /* calc rhw
+ */
+ brw_math_invert(p, get_element(pos, W), get_element(pos, W));
+
+ /* value.xyz *= value.rhw
+ */
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_MUL(p, brw_writemask(pos, WRITEMASK_XYZ), pos, brw_swizzle1(pos, W));
+ brw_set_access_mode(p, BRW_ALIGN_1);
+}
+
+
+static void brw_clip_project_vertex( struct brw_clip_compile *c,
+ struct brw_indirect vert_addr )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = get_tmp(c);
+
+ /* Fixup position. Extract from the original vertex and re-project
+ * to screen space:
+ */
+ brw_MOV(p, tmp, deref_4f(vert_addr, c->offset[VERT_RESULT_HPOS]));
+ brw_clip_project_position(c, tmp);
+ brw_MOV(p, deref_4f(vert_addr, c->header_position_offset), tmp);
+
+ release_tmp(c, tmp);
+}
+
+
+
+
+/* Interpolate between two vertices and put the result into a0.0.
+ * Increment a0.0 accordingly.
+ */
+void brw_clip_interp_vertex( struct brw_clip_compile *c,
+ struct brw_indirect dest_ptr,
+ struct brw_indirect v0_ptr, /* from */
+ struct brw_indirect v1_ptr, /* to */
+ struct brw_reg t0,
+ GLboolean force_edgeflag)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = get_tmp(c);
+ GLuint i;
+
+ /* Just copy the vertex header:
+ */
+ /*
+ * After CLIP stage, only first 256 bits of the VUE are read
+ * back on IGDNG, so needn't change it
+ */
+ brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);
+
+ /* Iterate over each attribute (could be done in pairs?)
+ */
+ for (i = 0; i < c->nr_attrs; i++) {
+ GLuint delta = i*16 + 32;
+
+ if (BRW_IS_IGDNG(p->brw))
+ delta = i * 16 + 32 * 3;
+
+ if (delta == c->offset[VERT_RESULT_EDGE]) {
+ if (force_edgeflag)
+ brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));
+ else
+ brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta));
+ }
+ else {
+ /* Interpolate:
+ *
+ * New = attr0 + t*attr1 - t*attr0
+ */
+ brw_MUL(p,
+ vec4(brw_null_reg()),
+ deref_4f(v1_ptr, delta),
+ t0);
+
+ brw_MAC(p,
+ tmp,
+ negate(deref_4f(v0_ptr, delta)),
+ t0);
+
+ brw_ADD(p,
+ deref_4f(dest_ptr, delta),
+ deref_4f(v0_ptr, delta),
+ tmp);
+ }
+ }
+
+ if (i & 1) {
+ GLuint delta = i*16 + 32;
+
+ if (BRW_IS_IGDNG(p->brw))
+ delta = i * 16 + 32 * 3;
+
+ brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
+ }
+
+ release_tmp(c, tmp);
+
+ /* Recreate the projected (NDC) coordinate in the new vertex
+ * header:
+ */
+ brw_clip_project_vertex(c, dest_ptr );
+}
+
+
+
+
+#define MAX_MRF 16
+
+void brw_clip_emit_vue(struct brw_clip_compile *c,
+ struct brw_indirect vert,
+ GLboolean allocate,
+ GLboolean eot,
+ GLuint header)
+{
+ struct brw_compile *p = &c->func;
+ GLuint start = c->last_mrf;
+
+ brw_clip_ff_sync(c);
+
+ assert(!(allocate && eot));
+
+ /* Cycle through mrf regs - probably futile as we have to wait for
+ * the allocation response anyway. Also, the order this function
+ * is invoked doesn't correspond to the order the instructions will
+ * be executed, so it won't have any effect in many cases.
+ */
+#if 0
+ if (start + c->nr_regs + 1 >= MAX_MRF)
+ start = 0;
+
+ c->last_mrf = start + c->nr_regs + 1;
+#endif
+
+ /* Copy the vertex from vertn into m1..mN+1:
+ */
+ brw_copy_from_indirect(p, brw_message_reg(start+1), vert, c->nr_regs);
+
+ /* Overwrite PrimType and PrimStart in the message header, for
+ * each vertex in turn:
+ */
+ brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header));
+
+
+ /* Send each vertex as a seperate write to the urb. This
+ * is different to the concept in brw_sf_emit.c, where
+ * subsequent writes are used to build up a single urb
+ * entry. Each of these writes instantiates a seperate
+ * urb entry - (I think... what about 'allocate'?)
+ */
+ brw_urb_WRITE(p,
+ allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+ start,
+ c->reg.R0,
+ allocate,
+ 1, /* used */
+ c->nr_regs + 1, /* msg length */
+ allocate ? 1 : 0, /* response_length */
+ eot, /* eot */
+ 1, /* writes_complete */
+ 0, /* urb offset */
+ BRW_URB_SWIZZLE_NONE);
+}
+
+
+
+void brw_clip_kill_thread(struct brw_clip_compile *c)
+{
+ struct brw_compile *p = &c->func;
+
+ brw_clip_ff_sync(c);
+ /* Send an empty message to kill the thread and release any
+ * allocated urb entry:
+ */
+ brw_urb_WRITE(p,
+ retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+ 0,
+ c->reg.R0,
+ 0, /* allocate */
+ 0, /* used */
+ 1, /* msg len */
+ 0, /* response len */
+ 1, /* eot */
+ 1, /* writes complete */
+ 0,
+ BRW_URB_SWIZZLE_NONE);
+}
+
+
+
+
+struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c )
+{
+ return brw_address(c->reg.fixed_planes);
+}
+
+
+struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c )
+{
+ if (c->key.nr_userclip) {
+ return brw_imm_uw(16);
+ }
+ else {
+ return brw_imm_uw(4);
+ }
+}
+
+
+/* If flatshading, distribute color from provoking vertex prior to
+ * clipping.
+ */
+void brw_clip_copy_colors( struct brw_clip_compile *c,
+ GLuint to, GLuint from )
+{
+ struct brw_compile *p = &c->func;
+
+ if (c->offset[VERT_RESULT_COL0])
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL0]),
+ byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL0]));
+
+ if (c->offset[VERT_RESULT_COL1])
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL1]),
+ byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL1]));
+
+ if (c->offset[VERT_RESULT_BFC0])
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC0]),
+ byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC0]));
+
+ if (c->offset[VERT_RESULT_BFC1])
+ brw_MOV(p,
+ byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC1]),
+ byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC1]));
+}
+
+
+
+void brw_clip_init_clipmask( struct brw_clip_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg incoming = get_element_ud(c->reg.R0, 2);
+
+ /* Shift so that lowest outcode bit is rightmost:
+ */
+ brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26));
+
+ if (c->key.nr_userclip) {
+ struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD);
+
+ /* Rearrange userclip outcodes so that they come directly after
+ * the fixed plane bits.
+ */
+ brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14));
+ brw_SHR(p, tmp, tmp, brw_imm_ud(8));
+ brw_OR(p, c->reg.planemask, c->reg.planemask, tmp);
+
+ release_tmp(c, tmp);
+ }
+}
+
+void brw_clip_ff_sync(struct brw_clip_compile *c)
+{
+ if (c->need_ff_sync) {
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *need_ff_sync;
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+ brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1));
+ need_ff_sync = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
+ brw_ff_sync(p,
+ c->reg.R0,
+ 0,
+ c->reg.R0,
+ 1,
+ 1, /* used */
+ 1, /* msg length */
+ 1, /* response length */
+ 0, /* eot */
+ 1, /* write compelete */
+ 0, /* urb offset */
+ BRW_URB_SWIZZLE_NONE);
+ }
+ brw_ENDIF(p, need_ff_sync);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+}
+
+void brw_clip_init_ff_sync(struct brw_clip_compile *c)
+{
+ if (c->need_ff_sync) {
+ struct brw_compile *p = &c->func;
+
+ brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0));
+ }
+}
diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c
new file mode 100644
index 00000000000..c300c33adce
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_context.c
@@ -0,0 +1,173 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#include "main/imports.h"
+#include "main/api_noop.h"
+#include "main/macros.h"
+#include "main/vtxfmt.h"
+#include "main/simple_list.h"
+#include "shader/shader_api.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_draw.h"
+#include "brw_state.h"
+#include "brw_vs.h"
+#include "intel_tex.h"
+#include "intel_blit.h"
+#include "intel_batchbuffer.h"
+#include "intel_pixel.h"
+#include "intel_span.h"
+#include "tnl/t_pipeline.h"
+
+#include "utils.h"
+
+
+/***************************************
+ * Mesa's Driver Functions
+ ***************************************/
+
+static void brwUseProgram(GLcontext *ctx, GLuint program)
+{
+ _mesa_use_program(ctx, program);
+}
+
+static void brwInitProgFuncs( struct dd_function_table *functions )
+{
+ functions->UseProgram = brwUseProgram;
+}
+static void brwInitDriverFunctions( struct dd_function_table *functions )
+{
+ intelInitDriverFunctions( functions );
+
+ brwInitFragProgFuncs( functions );
+ brwInitProgFuncs( functions );
+ brw_init_queryobj_functions(functions);
+
+ functions->Viewport = intel_viewport;
+}
+
+GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
+ __DRIcontextPrivate *driContextPriv,
+ void *sharedContextPrivate)
+{
+ struct dd_function_table functions;
+ struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context);
+ struct intel_context *intel = &brw->intel;
+ GLcontext *ctx = &intel->ctx;
+
+ if (!brw) {
+ _mesa_printf("%s: failed to alloc context\n", __FUNCTION__);
+ return GL_FALSE;
+ }
+
+ brwInitVtbl( brw );
+ brwInitDriverFunctions( &functions );
+
+ if (!intelInitContext( intel, mesaVis, driContextPriv,
+ sharedContextPrivate, &functions )) {
+ _mesa_printf("%s: failed to init intel context\n", __FUNCTION__);
+ FREE(brw);
+ return GL_FALSE;
+ }
+
+ /* Initialize swrast, tnl driver tables: */
+ intelInitSpanFuncs(ctx);
+
+ TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
+
+ ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
+ ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
+ ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits,
+ ctx->Const.MaxTextureImageUnits);
+ ctx->Const.MaxVertexTextureImageUnits = 0; /* no vertex shader textures */
+
+ /* Mesa limits textures to 4kx4k; it would be nice to fix that someday
+ */
+ ctx->Const.MaxTextureLevels = 13;
+ ctx->Const.Max3DTextureLevels = 9;
+ ctx->Const.MaxCubeTextureLevels = 12;
+ ctx->Const.MaxTextureRectSize = (1<<12);
+
+ ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+
+ /* if conformance mode is set, swrast can handle any size AA point */
+ ctx->Const.MaxPointSizeAA = 255.0;
+
+ /* We want the GLSL compiler to emit code that uses condition codes */
+ ctx->Shader.EmitCondCodes = GL_TRUE;
+ ctx->Shader.EmitNVTempInitialization = GL_TRUE;
+
+ ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024);
+ ctx->Const.VertexProgram.MaxAluInstructions = 0;
+ ctx->Const.VertexProgram.MaxTexInstructions = 0;
+ ctx->Const.VertexProgram.MaxTexIndirections = 0;
+ ctx->Const.VertexProgram.MaxNativeAluInstructions = 0;
+ ctx->Const.VertexProgram.MaxNativeTexInstructions = 0;
+ ctx->Const.VertexProgram.MaxNativeTexIndirections = 0;
+ ctx->Const.VertexProgram.MaxNativeAttribs = 16;
+ ctx->Const.VertexProgram.MaxNativeTemps = 256;
+ ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
+ ctx->Const.VertexProgram.MaxNativeParameters = 1024;
+ ctx->Const.VertexProgram.MaxEnvParams =
+ MIN2(ctx->Const.VertexProgram.MaxNativeParameters,
+ ctx->Const.VertexProgram.MaxEnvParams);
+
+ ctx->Const.FragmentProgram.MaxNativeInstructions = (16 * 1024);
+ ctx->Const.FragmentProgram.MaxNativeAluInstructions = (16 * 1024);
+ ctx->Const.FragmentProgram.MaxNativeTexInstructions = (16 * 1024);
+ ctx->Const.FragmentProgram.MaxNativeTexIndirections = (16 * 1024);
+ ctx->Const.FragmentProgram.MaxNativeAttribs = 12;
+ ctx->Const.FragmentProgram.MaxNativeTemps = 256;
+ ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
+ ctx->Const.FragmentProgram.MaxNativeParameters = 1024;
+ ctx->Const.FragmentProgram.MaxEnvParams =
+ MIN2(ctx->Const.FragmentProgram.MaxNativeParameters,
+ ctx->Const.FragmentProgram.MaxEnvParams);
+
+ brw_init_state( brw );
+
+ brw->state.dirty.mesa = ~0;
+ brw->state.dirty.brw = ~0;
+
+ brw->emit_state_always = 0;
+
+ ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
+ ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
+
+ make_empty_list(&brw->query.active_head);
+
+ brw_draw_init( brw );
+
+ return GL_TRUE;
+}
+
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
new file mode 100644
index 00000000000..fa3e32c7ff1
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -0,0 +1,767 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#ifndef BRWCONTEXT_INC
+#define BRWCONTEXT_INC
+
+#include "intel_context.h"
+#include "brw_structs.h"
+#include "main/imports.h"
+
+
+/* Glossary:
+ *
+ * URB - uniform resource buffer. A mid-sized buffer which is
+ * partitioned between the fixed function units and used for passing
+ * values (vertices, primitives, constants) between them.
+ *
+ * CURBE - constant URB entry. An urb region (entry) used to hold
+ * constant values which the fixed function units can be instructed to
+ * preload into the GRF when spawning a thread.
+ *
+ * VUE - vertex URB entry. An urb entry holding a vertex and usually
+ * a vertex header. The header contains control information and
+ * things like primitive type, Begin/end flags and clip codes.
+ *
+ * PUE - primitive URB entry. An urb entry produced by the setup (SF)
+ * unit holding rasterization and interpolation parameters.
+ *
+ * GRF - general register file. One of several register files
+ * addressable by programmed threads. The inputs (r0, payload, curbe,
+ * urb) of the thread are preloaded to this area before the thread is
+ * spawned. The registers are individually 8 dwords wide and suitable
+ * for general usage. Registers holding thread input values are not
+ * special and may be overwritten.
+ *
+ * MRF - message register file. Threads communicate (and terminate)
+ * by sending messages. Message parameters are placed in contiguous
+ * MRF registers. All program output is via these messages. URB
+ * entries are populated by sending a message to the shared URB
+ * function containing the new data, together with a control word,
+ * often an unmodified copy of R0.
+ *
+ * R0 - GRF register 0. Typically holds control information used when
+ * sending messages to other threads.
+ *
+ * EU or GEN4 EU: The name of the programmable subsystem of the
+ * i965 hardware. Threads are executed by the EU, the registers
+ * described above are part of the EU architecture.
+ *
+ * Fixed function units:
+ *
+ * CS - Command streamer. Notional first unit, little software
+ * interaction. Holds the URB entries used for constant data, ie the
+ * CURBEs.
+ *
+ * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of
+ * this unit is responsible for pulling vertices out of vertex buffers
+ * in vram and injecting them into the processing pipe as VUEs. If
+ * enabled, it first passes them to a VS thread which is a good place
+ * for the driver to implement any active vertex shader.
+ *
+ * GS - Geometry Shader. This corresponds to a new DX10 concept. If
+ * enabled, incoming strips etc are passed to GS threads in individual
+ * line/triangle/point units. The GS thread may perform arbitary
+ * computation and emit whatever primtives with whatever vertices it
+ * chooses. This makes GS an excellent place to implement GL's
+ * unfilled polygon modes, though of course it is capable of much
+ * more. Additionally, GS is used to translate away primitives not
+ * handled by latter units, including Quads and Lineloops.
+ *
+ * CS - Clipper. Mesa's clipping algorithms are imported to run on
+ * this unit. The fixed function part performs cliptesting against
+ * the 6 fixed clipplanes and makes descisions on whether or not the
+ * incoming primitive needs to be passed to a thread for clipping.
+ * User clip planes are handled via cooperation with the VS thread.
+ *
+ * SF - Strips Fans or Setup: Triangles are prepared for
+ * rasterization. Interpolation coefficients are calculated.
+ * Flatshading and two-side lighting usually performed here.
+ *
+ * WM - Windower. Interpolation of vertex attributes performed here.
+ * Fragment shader implemented here. SIMD aspects of EU taken full
+ * advantage of, as pixels are processed in blocks of 16.
+ *
+ * CC - Color Calculator. No EU threads associated with this unit.
+ * Handles blending and (presumably) depth and stencil testing.
+ */
+
+#define BRW_FALLBACK_TEXTURE 0x1
+#define BRW_MAX_CURBE (32*16)
+
+struct brw_context;
+
+#define BRW_NEW_URB_FENCE 0x1
+#define BRW_NEW_FRAGMENT_PROGRAM 0x2
+#define BRW_NEW_VERTEX_PROGRAM 0x4
+#define BRW_NEW_INPUT_DIMENSIONS 0x8
+#define BRW_NEW_CURBE_OFFSETS 0x10
+#define BRW_NEW_REDUCED_PRIMITIVE 0x20
+#define BRW_NEW_PRIMITIVE 0x40
+#define BRW_NEW_CONTEXT 0x80
+#define BRW_NEW_WM_INPUT_DIMENSIONS 0x100
+#define BRW_NEW_PSP 0x800
+#define BRW_NEW_WM_SURFACES 0x1000
+#define BRW_NEW_FENCE 0x2000
+#define BRW_NEW_INDICES 0x4000
+#define BRW_NEW_VERTICES 0x8000
+/**
+ * Used for any batch entry with a relocated pointer that will be used
+ * by any 3D rendering.
+ */
+#define BRW_NEW_BATCH 0x10000
+/** brw->depth_region updated */
+#define BRW_NEW_DEPTH_BUFFER 0x20000
+#define BRW_NEW_NR_WM_SURFACES 0x40000
+#define BRW_NEW_NR_VS_SURFACES 0x80000
+#define BRW_NEW_INDEX_BUFFER 0x100000
+
+struct brw_state_flags {
+ /** State update flags signalled by mesa internals */
+ GLuint mesa;
+ /**
+ * State update flags signalled as the result of brw_tracked_state updates
+ */
+ GLuint brw;
+ /** State update flags signalled by brw_state_cache.c searches */
+ GLuint cache;
+};
+
+
+/** Subclass of Mesa vertex program */
+struct brw_vertex_program {
+ struct gl_vertex_program program;
+ GLuint id;
+ dri_bo *const_buffer; /** Program constant buffer/surface */
+ GLboolean use_const_buffer;
+};
+
+
+/** Subclass of Mesa fragment program */
+struct brw_fragment_program {
+ struct gl_fragment_program program;
+ GLuint id; /**< serial no. to identify frag progs, never re-used */
+ GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */
+
+ dri_bo *const_buffer; /** Program constant buffer/surface */
+ GLboolean use_const_buffer;
+
+ /** for debugging, which texture units are referenced */
+ GLbitfield tex_units_used;
+};
+
+
+/* Data about a particular attempt to compile a program. Note that
+ * there can be many of these, each in a different GL state
+ * corresponding to a different brw_wm_prog_key struct, with different
+ * compiled programs:
+ */
+struct brw_wm_prog_data {
+ GLuint curb_read_length;
+ GLuint urb_read_length;
+
+ GLuint first_curbe_grf;
+ GLuint total_grf;
+ GLuint total_scratch;
+
+ GLuint nr_params; /**< number of float params/constants */
+ GLboolean error;
+
+ /* Pointer to tracked values (only valid once
+ * _mesa_load_state_parameters has been called at runtime).
+ */
+ const GLfloat *param[BRW_MAX_CURBE];
+};
+
+struct brw_sf_prog_data {
+ GLuint urb_read_length;
+ GLuint total_grf;
+
+ /* Each vertex may have upto 12 attributes, 4 components each,
+ * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11
+ * rows.
+ *
+ * Actually we use 4 for each, so call it 12 rows.
+ */
+ GLuint urb_entry_size;
+};
+
+struct brw_clip_prog_data {
+ GLuint curb_read_length; /* user planes? */
+ GLuint clip_mode;
+ GLuint urb_read_length;
+ GLuint total_grf;
+};
+
+struct brw_gs_prog_data {
+ GLuint urb_read_length;
+ GLuint total_grf;
+};
+
+struct brw_vs_prog_data {
+ GLuint curb_read_length;
+ GLuint urb_read_length;
+ GLuint total_grf;
+ GLuint outputs_written;
+ GLuint nr_params; /**< number of float params/constants */
+
+ GLuint inputs_read;
+
+ /* Used for calculating urb partitions:
+ */
+ GLuint urb_entry_size;
+};
+
+
+/* Size == 0 if output either not written, or always [0,0,0,1]
+ */
+struct brw_vs_ouput_sizes {
+ GLubyte output_size[VERT_RESULT_MAX];
+};
+
+
+/** Number of texture sampler units */
+#define BRW_MAX_TEX_UNIT 16
+
+/**
+ * Size of our surface binding table for the WM.
+ * This contains pointers to the drawing surfaces and current texture
+ * objects and shader constant buffers (+2).
+ */
+#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1)
+
+/**
+ * Helpers to convert drawing buffers, textures and constant buffers
+ * to surface binding table indexes, for WM.
+ */
+#define SURF_INDEX_DRAW(d) (d)
+#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS)
+#define SURF_INDEX_TEXTURE(t) (MAX_DRAW_BUFFERS + 1 + (t))
+
+/**
+ * Size of surface binding table for the VS.
+ * Only one constant buffer for now.
+ */
+#define BRW_VS_MAX_SURF 1
+
+/**
+ * Only a VS constant buffer
+ */
+#define SURF_INDEX_VERT_CONST_BUFFER 0
+
+
+enum brw_cache_id {
+ BRW_CC_VP,
+ BRW_CC_UNIT,
+ BRW_WM_PROG,
+ BRW_SAMPLER_DEFAULT_COLOR,
+ BRW_SAMPLER,
+ BRW_WM_UNIT,
+ BRW_SF_PROG,
+ BRW_SF_VP,
+ BRW_SF_UNIT,
+ BRW_VS_UNIT,
+ BRW_VS_PROG,
+ BRW_GS_UNIT,
+ BRW_GS_PROG,
+ BRW_CLIP_VP,
+ BRW_CLIP_UNIT,
+ BRW_CLIP_PROG,
+ BRW_SS_SURFACE,
+ BRW_SS_SURF_BIND,
+
+ BRW_MAX_CACHE
+};
+
+struct brw_cache_item {
+ /**
+ * Effectively part of the key, cache_id identifies what kind of state
+ * buffer is involved, and also which brw->state.dirty.cache flag should
+ * be set when this cache item is chosen.
+ */
+ enum brw_cache_id cache_id;
+ /** 32-bit hash of the key data */
+ GLuint hash;
+ GLuint key_size; /* for variable-sized keys */
+ const void *key;
+ dri_bo **reloc_bufs;
+ GLuint nr_reloc_bufs;
+
+ dri_bo *bo;
+ GLuint data_size;
+
+ struct brw_cache_item *next;
+};
+
+
+
+struct brw_cache {
+ struct brw_context *brw;
+
+ struct brw_cache_item **items;
+ GLuint size, n_items;
+
+ GLuint key_size[BRW_MAX_CACHE]; /* for fixed-size keys */
+ GLuint aux_size[BRW_MAX_CACHE];
+ char *name[BRW_MAX_CACHE];
+
+ /* Record of the last BOs chosen for each cache_id. Used to set
+ * brw->state.dirty.cache when a new cache item is chosen.
+ */
+ dri_bo *last_bo[BRW_MAX_CACHE];
+};
+
+
+/* Considered adding a member to this struct to document which flags
+ * an update might raise so that ordering of the state atoms can be
+ * checked or derived at runtime. Dropped the idea in favor of having
+ * a debug mode where the state is monitored for flags which are
+ * raised that have already been tested against.
+ */
+struct brw_tracked_state {
+ struct brw_state_flags dirty;
+ void (*prepare)( struct brw_context *brw );
+ void (*emit)( struct brw_context *brw );
+};
+
+/* Flags for brw->state.cache.
+ */
+#define CACHE_NEW_CC_VP (1<<BRW_CC_VP)
+#define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT)
+#define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG)
+#define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR)
+#define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER)
+#define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT)
+#define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG)
+#define CACHE_NEW_SF_VP (1<<BRW_SF_VP)
+#define CACHE_NEW_SF_UNIT (1<<BRW_SF_UNIT)
+#define CACHE_NEW_VS_UNIT (1<<BRW_VS_UNIT)
+#define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG)
+#define CACHE_NEW_GS_UNIT (1<<BRW_GS_UNIT)
+#define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG)
+#define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP)
+#define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT)
+#define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG)
+#define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE)
+#define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND)
+
+struct brw_cached_batch_item {
+ struct header *header;
+ GLuint sz;
+ struct brw_cached_batch_item *next;
+};
+
+
+
+/* Protect against a future where VERT_ATTRIB_MAX > 32. Wouldn't life
+ * be easier if C allowed arrays of packed elements?
+ */
+#define ATTRIB_BIT_DWORDS ((VERT_ATTRIB_MAX+31)/32)
+
+struct brw_vertex_element {
+ const struct gl_client_array *glarray;
+
+ /** The corresponding Mesa vertex attribute */
+ gl_vert_attrib attrib;
+ /** Size of a complete element */
+ GLuint element_size;
+ /** Number of uploaded elements for this input. */
+ GLuint count;
+ /** Byte stride between elements in the uploaded array */
+ GLuint stride;
+ /** Offset of the first element within the buffer object */
+ unsigned int offset;
+ /** Buffer object containing the uploaded vertex data */
+ dri_bo *bo;
+};
+
+
+
+struct brw_vertex_info {
+ GLuint sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[VERT_ATTRIB_MAX] */
+};
+
+
+
+
+/* Cache for TNL programs.
+ */
+struct brw_tnl_cache_item {
+ GLuint hash;
+ void *key;
+ void *data;
+ struct brw_tnl_cache_item *next;
+};
+
+struct brw_tnl_cache {
+ struct brw_tnl_cache_item **items;
+ GLuint size, n_items;
+};
+
+struct brw_query_object {
+ struct gl_query_object Base;
+
+ /** Doubly linked list of active query objects in the context. */
+ struct brw_query_object *prev, *next;
+
+ /** Last query BO associated with this query. */
+ dri_bo *bo;
+ /** First index in bo with query data for this object. */
+ int first_index;
+ /** Last index in bo with query data for this object. */
+ int last_index;
+
+ /* Total count of pixels from previous BOs */
+ unsigned int count;
+};
+
+
+/**
+ * brw_context is derived from intel_context.
+ */
+struct brw_context
+{
+ struct intel_context intel; /**< base class, must be first field */
+ GLuint primitive;
+
+ GLboolean emit_state_always;
+ GLboolean tmp_fallback;
+ GLboolean no_batch_wrap;
+
+ struct {
+ struct brw_state_flags dirty;
+
+ GLuint nr_color_regions;
+ struct intel_region *color_regions[MAX_DRAW_BUFFERS];
+ struct intel_region *depth_region;
+
+ /**
+ * List of buffers accumulated in brw_validate_state to receive
+ * dri_bo_check_aperture treatment before exec, so we can know if we
+ * should flush the batch and try again before emitting primitives.
+ *
+ * This can be a fixed number as we only have a limited number of
+ * objects referenced from the batchbuffer in a primitive emit,
+ * consisting of the vertex buffers, pipelined state pointers,
+ * the CURBE, the depth buffer, and a query BO.
+ */
+ dri_bo *validated_bos[VERT_ATTRIB_MAX + 16];
+ int validated_bo_count;
+ } state;
+
+ struct brw_cache cache; /** non-surface items */
+ struct brw_cache surface_cache; /* surface items */
+ struct brw_cached_batch_item *cached_batch_items;
+
+ struct {
+ struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
+
+ struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
+ GLuint nr_enabled;
+
+#define BRW_NR_UPLOAD_BUFS 17
+#define BRW_UPLOAD_INIT_SIZE (128*1024)
+
+ struct {
+ dri_bo *bo;
+ GLuint offset;
+ } upload;
+
+ /* Summary of size and varying of active arrays, so we can check
+ * for changes to this state:
+ */
+ struct brw_vertex_info info;
+ unsigned int min_index, max_index;
+ } vb;
+
+ struct {
+ /**
+ * Index buffer for this draw_prims call.
+ *
+ * Updates are signaled by BRW_NEW_INDICES.
+ */
+ const struct _mesa_index_buffer *ib;
+
+ /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */
+ dri_bo *bo;
+ unsigned int offset;
+ unsigned int size;
+ /* Offset to index buffer index to use in CMD_3D_PRIM so that we can
+ * avoid re-uploading the IB packet over and over if we're actually
+ * referencing the same index buffer.
+ */
+ unsigned int start_vertex_offset;
+ } ib;
+
+ /* Active vertex program:
+ */
+ const struct gl_vertex_program *vertex_program;
+ const struct gl_fragment_program *fragment_program;
+
+
+ /* For populating the gtt:
+ */
+ GLuint next_free_page;
+
+
+ /* BRW_NEW_URB_ALLOCATIONS:
+ */
+ struct {
+ GLuint vsize; /* vertex size plus header in urb registers */
+ GLuint csize; /* constant buffer size in urb registers */
+ GLuint sfsize; /* setup data size in urb registers */
+
+ GLboolean constrained;
+
+ GLuint nr_vs_entries;
+ GLuint nr_gs_entries;
+ GLuint nr_clip_entries;
+ GLuint nr_sf_entries;
+ GLuint nr_cs_entries;
+
+/* GLuint vs_size; */
+/* GLuint gs_size; */
+/* GLuint clip_size; */
+/* GLuint sf_size; */
+/* GLuint cs_size; */
+
+ GLuint vs_start;
+ GLuint gs_start;
+ GLuint clip_start;
+ GLuint sf_start;
+ GLuint cs_start;
+ } urb;
+
+
+ /* BRW_NEW_CURBE_OFFSETS:
+ */
+ struct {
+ GLuint wm_start; /**< pos of first wm const in CURBE buffer */
+ GLuint wm_size; /**< number of float[4] consts, multiple of 16 */
+ GLuint clip_start;
+ GLuint clip_size;
+ GLuint vs_start;
+ GLuint vs_size;
+ GLuint total_size;
+
+ dri_bo *curbe_bo;
+ /** Offset within curbe_bo of space for current curbe entry */
+ GLuint curbe_offset;
+ /** Offset within curbe_bo of space for next curbe entry */
+ GLuint curbe_next_offset;
+
+ GLfloat *last_buf;
+ GLuint last_bufsz;
+ /**
+ * Whether we should create a new bo instead of reusing the old one
+ * (if we just dispatch the batch pointing at the old one.
+ */
+ GLboolean need_new_bo;
+ } curbe;
+
+ struct {
+ struct brw_vs_prog_data *prog_data;
+
+ dri_bo *prog_bo;
+ dri_bo *state_bo;
+
+ /** Binding table of pointers to surf_bo entries */
+ dri_bo *bind_bo;
+ dri_bo *surf_bo[BRW_VS_MAX_SURF];
+ GLuint nr_surfaces;
+ } vs;
+
+ struct {
+ struct brw_gs_prog_data *prog_data;
+
+ GLboolean prog_active;
+ dri_bo *prog_bo;
+ dri_bo *state_bo;
+ } gs;
+
+ struct {
+ struct brw_clip_prog_data *prog_data;
+
+ dri_bo *prog_bo;
+ dri_bo *state_bo;
+ dri_bo *vp_bo;
+ } clip;
+
+
+ struct {
+ struct brw_sf_prog_data *prog_data;
+
+ dri_bo *prog_bo;
+ dri_bo *state_bo;
+ dri_bo *vp_bo;
+ } sf;
+
+ struct {
+ struct brw_wm_prog_data *prog_data;
+ struct brw_wm_compile *compile_data;
+
+ /** Input sizes, calculated from active vertex program.
+ * One bit per fragment program input attribute.
+ */
+ GLbitfield input_size_masks[4];
+
+ /** Array of surface default colors (texture border color) */
+ dri_bo *sdc_bo[BRW_MAX_TEX_UNIT];
+
+ GLuint render_surf;
+ GLuint nr_surfaces;
+
+ GLuint max_threads;
+ dri_bo *scratch_bo;
+
+ GLuint sampler_count;
+ dri_bo *sampler_bo;
+
+ /** Binding table of pointers to surf_bo entries */
+ dri_bo *bind_bo;
+ dri_bo *surf_bo[BRW_WM_MAX_SURF];
+
+ dri_bo *prog_bo;
+ dri_bo *state_bo;
+ } wm;
+
+
+ struct {
+ dri_bo *prog_bo;
+ dri_bo *state_bo;
+ dri_bo *vp_bo;
+ } cc;
+
+ struct {
+ struct brw_query_object active_head;
+ dri_bo *bo;
+ int index;
+ GLboolean active;
+ } query;
+ /* Used to give every program string a unique id
+ */
+ GLuint program_id;
+};
+
+
+#define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a)
+
+
+
+/*======================================================================
+ * brw_vtbl.c
+ */
+void brwInitVtbl( struct brw_context *brw );
+
+/*======================================================================
+ * brw_context.c
+ */
+GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
+ __DRIcontextPrivate *driContextPriv,
+ void *sharedContextPrivate);
+
+/*======================================================================
+ * brw_queryobj.c
+ */
+void brw_init_queryobj_functions(struct dd_function_table *functions);
+void brw_prepare_query_begin(struct brw_context *brw);
+void brw_emit_query_begin(struct brw_context *brw);
+void brw_emit_query_end(struct brw_context *brw);
+
+/*======================================================================
+ * brw_state_dump.c
+ */
+void brw_debug_batch(struct intel_context *intel);
+
+/*======================================================================
+ * brw_tex.c
+ */
+void brw_validate_textures( struct brw_context *brw );
+
+
+/*======================================================================
+ * brw_program.c
+ */
+void brwInitFragProgFuncs( struct dd_function_table *functions );
+
+
+/* brw_urb.c
+ */
+void brw_upload_urb_fence(struct brw_context *brw);
+
+/* brw_curbe.c
+ */
+void brw_upload_cs_urb_state(struct brw_context *brw);
+
+/* brw_disasm.c */
+int brw_disasm (FILE *file, struct brw_instruction *inst);
+
+/*======================================================================
+ * Inline conversion functions. These are better-typed than the
+ * macros used previously:
+ */
+static INLINE struct brw_context *
+brw_context( GLcontext *ctx )
+{
+ return (struct brw_context *)ctx;
+}
+
+static INLINE struct brw_vertex_program *
+brw_vertex_program(struct gl_vertex_program *p)
+{
+ return (struct brw_vertex_program *) p;
+}
+
+static INLINE const struct brw_vertex_program *
+brw_vertex_program_const(const struct gl_vertex_program *p)
+{
+ return (const struct brw_vertex_program *) p;
+}
+
+static INLINE struct brw_fragment_program *
+brw_fragment_program(struct gl_fragment_program *p)
+{
+ return (struct brw_fragment_program *) p;
+}
+
+static INLINE const struct brw_fragment_program *
+brw_fragment_program_const(const struct gl_fragment_program *p)
+{
+ return (const struct brw_fragment_program *) p;
+}
+
+
+
+#define DO_SETUP_BITS ((1<<(FRAG_ATTRIB_MAX)) - 1)
+
+#endif
+
diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c
new file mode 100644
index 00000000000..4be6c77aa1e
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_curbe.c
@@ -0,0 +1,376 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "shader/prog_statevars.h"
+#include "intel_batchbuffer.h"
+#include "intel_regions.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+#include "brw_util.h"
+
+
+/**
+ * Partition the CURBE between the various users of constant values:
+ * Note that vertex and fragment shaders can now fetch constants out
+ * of constant buffers. We no longer allocatea block of the GRF for
+ * constants. That greatly reduces the demand for space in the CURBE.
+ * Some of the comments within are dated...
+ */
+static void calculate_curbe_offsets( struct brw_context *brw )
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ /* CACHE_NEW_WM_PROG */
+ const GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16;
+
+ /* BRW_NEW_VERTEX_PROGRAM */
+ const GLuint nr_vp_regs = (brw->vs.prog_data->nr_params + 15) / 16;
+ GLuint nr_clip_regs = 0;
+ GLuint total_regs;
+
+ /* _NEW_TRANSFORM */
+ if (ctx->Transform.ClipPlanesEnabled) {
+ GLuint nr_planes = 6 + brw_count_bits(ctx->Transform.ClipPlanesEnabled);
+ nr_clip_regs = (nr_planes * 4 + 15) / 16;
+ }
+
+
+ total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs;
+
+ /* This can happen - what to do? Probably rather than falling
+ * back, the best thing to do is emit programs which code the
+ * constants as immediate values. Could do this either as a static
+ * cap on WM and VS, or adaptively.
+ *
+ * Unfortunately, this is currently dependent on the results of the
+ * program generation process (in the case of wm), so this would
+ * introduce the need to re-generate programs in the event of a
+ * curbe allocation failure.
+ */
+ /* Max size is 32 - just large enough to
+ * hold the 128 parameters allowed by
+ * the fragment and vertex program
+ * api's. It's not clear what happens
+ * when both VP and FP want to use 128
+ * parameters, though.
+ */
+ assert(total_regs <= 32);
+
+ /* Lazy resize:
+ */
+ if (nr_fp_regs > brw->curbe.wm_size ||
+ nr_vp_regs > brw->curbe.vs_size ||
+ nr_clip_regs != brw->curbe.clip_size ||
+ (total_regs < brw->curbe.total_size / 4 &&
+ brw->curbe.total_size > 16)) {
+
+ GLuint reg = 0;
+
+ /* Calculate a new layout:
+ */
+ reg = 0;
+ brw->curbe.wm_start = reg;
+ brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs;
+ brw->curbe.clip_start = reg;
+ brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs;
+ brw->curbe.vs_start = reg;
+ brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs;
+ brw->curbe.total_size = reg;
+
+ if (0)
+ _mesa_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n",
+ brw->curbe.wm_start,
+ brw->curbe.wm_size,
+ brw->curbe.clip_start,
+ brw->curbe.clip_size,
+ brw->curbe.vs_start,
+ brw->curbe.vs_size );
+
+ brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS;
+ }
+}
+
+
+const struct brw_tracked_state brw_curbe_offsets = {
+ .dirty = {
+ .mesa = _NEW_TRANSFORM,
+ .brw = BRW_NEW_VERTEX_PROGRAM,
+ .cache = CACHE_NEW_WM_PROG
+ },
+ .prepare = calculate_curbe_offsets
+};
+
+
+
+
+/* Define the number of curbes within CS's urb allocation. Multiple
+ * urb entries -> multiple curbes. These will be used by
+ * fixed-function hardware in a double-buffering scheme to avoid a
+ * pipeline stall each time the contents of the curbe is changed.
+ */
+void brw_upload_cs_urb_state(struct brw_context *brw)
+{
+ struct brw_cs_urb_state cs_urb;
+ memset(&cs_urb, 0, sizeof(cs_urb));
+
+ /* It appears that this is the state packet for the CS unit, ie. the
+ * urb entries detailed here are housed in the CS range from the
+ * URB_FENCE command.
+ */
+ cs_urb.header.opcode = CMD_CS_URB_STATE;
+ cs_urb.header.length = sizeof(cs_urb)/4 - 2;
+
+ /* BRW_NEW_URB_FENCE */
+ cs_urb.bits0.nr_urb_entries = brw->urb.nr_cs_entries;
+ cs_urb.bits0.urb_entry_size = brw->urb.csize - 1;
+
+ assert(brw->urb.nr_cs_entries);
+ BRW_CACHED_BATCH_STRUCT(brw, &cs_urb);
+}
+
+static GLfloat fixed_plane[6][4] = {
+ { 0, 0, -1, 1 },
+ { 0, 0, 1, 1 },
+ { 0, -1, 0, 1 },
+ { 0, 1, 0, 1 },
+ {-1, 0, 0, 1 },
+ { 1, 0, 0, 1 }
+};
+
+/* Upload a new set of constants. Too much variability to go into the
+ * cache mechanism, but maybe would benefit from a comparison against
+ * the current uploaded set of constants.
+ */
+static void prepare_constant_buffer(struct brw_context *brw)
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ const struct brw_vertex_program *vp =
+ brw_vertex_program_const(brw->vertex_program);
+ const struct brw_fragment_program *fp =
+ brw_fragment_program_const(brw->fragment_program);
+ const GLuint sz = brw->curbe.total_size;
+ const GLuint bufsz = sz * 16 * sizeof(GLfloat);
+ GLfloat *buf;
+ GLuint i;
+
+ if (sz == 0) {
+ if (brw->curbe.last_buf) {
+ free(brw->curbe.last_buf);
+ brw->curbe.last_buf = NULL;
+ brw->curbe.last_bufsz = 0;
+ }
+ return;
+ }
+
+ buf = (GLfloat *) _mesa_calloc(bufsz);
+
+ /* fragment shader constants */
+ if (brw->curbe.wm_size) {
+ GLuint offset = brw->curbe.wm_start * 16;
+
+ _mesa_load_state_parameters(ctx, fp->program.Base.Parameters);
+
+ /* copy float constants */
+ for (i = 0; i < brw->wm.prog_data->nr_params; i++)
+ buf[offset + i] = *brw->wm.prog_data->param[i];
+ }
+
+
+ /* The clipplanes are actually delivered to both CLIP and VS units.
+ * VS uses them to calculate the outcode bitmasks.
+ */
+ if (brw->curbe.clip_size) {
+ GLuint offset = brw->curbe.clip_start * 16;
+ GLuint j;
+
+ /* If any planes are going this way, send them all this way:
+ */
+ for (i = 0; i < 6; i++) {
+ buf[offset + i * 4 + 0] = fixed_plane[i][0];
+ buf[offset + i * 4 + 1] = fixed_plane[i][1];
+ buf[offset + i * 4 + 2] = fixed_plane[i][2];
+ buf[offset + i * 4 + 3] = fixed_plane[i][3];
+ }
+
+ /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
+ * clip-space:
+ */
+ assert(MAX_CLIP_PLANES == 6);
+ for (j = 0; j < MAX_CLIP_PLANES; j++) {
+ if (ctx->Transform.ClipPlanesEnabled & (1<<j)) {
+ buf[offset + i * 4 + 0] = ctx->Transform._ClipUserPlane[j][0];
+ buf[offset + i * 4 + 1] = ctx->Transform._ClipUserPlane[j][1];
+ buf[offset + i * 4 + 2] = ctx->Transform._ClipUserPlane[j][2];
+ buf[offset + i * 4 + 3] = ctx->Transform._ClipUserPlane[j][3];
+ i++;
+ }
+ }
+ }
+
+ /* vertex shader constants */
+ if (brw->curbe.vs_size) {
+ GLuint offset = brw->curbe.vs_start * 16;
+ GLuint nr = brw->vs.prog_data->nr_params / 4;
+
+ if (brw->vertex_program->IsNVProgram)
+ _mesa_load_tracked_matrices(ctx);
+
+ /* Updates the ParamaterValues[i] pointers for all parameters of the
+ * basic type of PROGRAM_STATE_VAR.
+ */
+ _mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
+
+ /* XXX just use a memcpy here */
+ for (i = 0; i < nr; i++) {
+ const GLfloat *value = vp->program.Base.Parameters->ParameterValues[i];
+ buf[offset + i * 4 + 0] = value[0];
+ buf[offset + i * 4 + 1] = value[1];
+ buf[offset + i * 4 + 2] = value[2];
+ buf[offset + i * 4 + 3] = value[3];
+ }
+ }
+
+ if (0) {
+ for (i = 0; i < sz*16; i+=4)
+ _mesa_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
+ buf[i+0], buf[i+1], buf[i+2], buf[i+3]);
+
+ _mesa_printf("last_buf %p buf %p sz %d/%d cmp %d\n",
+ brw->curbe.last_buf, buf,
+ bufsz, brw->curbe.last_bufsz,
+ brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
+ }
+
+ if (brw->curbe.curbe_bo != NULL &&
+ brw->curbe.last_buf &&
+ bufsz == brw->curbe.last_bufsz &&
+ memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
+ /* constants have not changed */
+ _mesa_free(buf);
+ }
+ else {
+ /* constants have changed */
+ if (brw->curbe.last_buf)
+ _mesa_free(brw->curbe.last_buf);
+
+ brw->curbe.last_buf = buf;
+ brw->curbe.last_bufsz = bufsz;
+
+ if (brw->curbe.curbe_bo != NULL &&
+ (brw->curbe.need_new_bo ||
+ brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size))
+ {
+ dri_bo_unreference(brw->curbe.curbe_bo);
+ brw->curbe.curbe_bo = NULL;
+ }
+
+ if (brw->curbe.curbe_bo == NULL) {
+ /* Allocate a single page for CURBE entries for this batchbuffer.
+ * They're generally around 64b.
+ */
+ brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE",
+ 4096, 1 << 6);
+ brw->curbe.curbe_next_offset = 0;
+ }
+
+ brw->curbe.curbe_offset = brw->curbe.curbe_next_offset;
+ brw->curbe.curbe_next_offset += bufsz;
+ brw->curbe.curbe_next_offset = ALIGN(brw->curbe.curbe_next_offset, 64);
+
+ /* Copy data to the buffer:
+ */
+ dri_bo_subdata(brw->curbe.curbe_bo, brw->curbe.curbe_offset, bufsz, buf);
+ }
+
+ brw_add_validated_bo(brw, brw->curbe.curbe_bo);
+
+ /* Because this provokes an action (ie copy the constants into the
+ * URB), it shouldn't be shortcircuited if identical to the
+ * previous time - because eg. the urb destination may have
+ * changed, or the urb contents different to last time.
+ *
+ * Note that the data referred to is actually copied internally,
+ * not just used in place according to passed pointer.
+ *
+ * It appears that the CS unit takes care of using each available
+ * URB entry (Const URB Entry == CURBE) in turn, and issuing
+ * flushes as necessary when doublebuffering of CURBEs isn't
+ * possible.
+ */
+}
+
+static void emit_constant_buffer(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ GLuint sz = brw->curbe.total_size;
+
+ BEGIN_BATCH(2, IGNORE_CLIPRECTS);
+ if (sz == 0) {
+ OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
+ OUT_BATCH(0);
+ } else {
+ OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
+ OUT_RELOC(brw->curbe.curbe_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ (sz - 1) + brw->curbe.curbe_offset);
+ }
+ ADVANCE_BATCH();
+}
+
+/* This tracked state is unique in that the state it monitors varies
+ * dynamically depending on the parameters tracked by the fragment and
+ * vertex programs. This is the template used as a starting point,
+ * each context will maintain a copy of this internally and update as
+ * required.
+ */
+const struct brw_tracked_state brw_constant_buffer = {
+ .dirty = {
+ .mesa = _NEW_PROGRAM_CONSTANTS,
+ .brw = (BRW_NEW_FRAGMENT_PROGRAM |
+ BRW_NEW_VERTEX_PROGRAM |
+ BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */
+ BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */
+ BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_BATCH),
+ .cache = (CACHE_NEW_WM_PROG)
+ },
+ .prepare = prepare_constant_buffer,
+ .emit = emit_constant_buffer,
+};
+
diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h
new file mode 100644
index 00000000000..78d457ad2be
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_defines.h
@@ -0,0 +1,851 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#ifndef BRW_DEFINES_H
+#define BRW_DEFINES_H
+
+/* 3D state:
+ */
+#define _3DOP_3DSTATE_PIPELINED 0x0
+#define _3DOP_3DSTATE_NONPIPELINED 0x1
+#define _3DOP_3DCONTROL 0x2
+#define _3DOP_3DPRIMITIVE 0x3
+
+#define _3DSTATE_PIPELINED_POINTERS 0x00
+#define _3DSTATE_BINDING_TABLE_POINTERS 0x01
+#define _3DSTATE_VERTEX_BUFFERS 0x08
+#define _3DSTATE_VERTEX_ELEMENTS 0x09
+#define _3DSTATE_INDEX_BUFFER 0x0A
+#define _3DSTATE_VF_STATISTICS 0x0B
+#define _3DSTATE_DRAWING_RECTANGLE 0x00
+#define _3DSTATE_CONSTANT_COLOR 0x01
+#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02
+#define _3DSTATE_CHROMA_KEY 0x04
+#define _3DSTATE_DEPTH_BUFFER 0x05
+#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06
+#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07
+#define _3DSTATE_LINE_STIPPLE 0x08
+#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09
+#define _3DCONTROL 0x00
+
+#define PIPE_CONTROL_NOWRITE 0x00
+#define PIPE_CONTROL_WRITEIMMEDIATE 0x01
+#define PIPE_CONTROL_WRITEDEPTH 0x02
+#define PIPE_CONTROL_WRITETIMESTAMP 0x03
+
+#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00
+#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01
+
+#define _3DPRIM_POINTLIST 0x01
+#define _3DPRIM_LINELIST 0x02
+#define _3DPRIM_LINESTRIP 0x03
+#define _3DPRIM_TRILIST 0x04
+#define _3DPRIM_TRISTRIP 0x05
+#define _3DPRIM_TRIFAN 0x06
+#define _3DPRIM_QUADLIST 0x07
+#define _3DPRIM_QUADSTRIP 0x08
+#define _3DPRIM_LINELIST_ADJ 0x09
+#define _3DPRIM_LINESTRIP_ADJ 0x0A
+#define _3DPRIM_TRILIST_ADJ 0x0B
+#define _3DPRIM_TRISTRIP_ADJ 0x0C
+#define _3DPRIM_TRISTRIP_REVERSE 0x0D
+#define _3DPRIM_POLYGON 0x0E
+#define _3DPRIM_RECTLIST 0x0F
+#define _3DPRIM_LINELOOP 0x10
+#define _3DPRIM_POINTLIST_BF 0x11
+#define _3DPRIM_LINESTRIP_CONT 0x12
+#define _3DPRIM_LINESTRIP_BF 0x13
+#define _3DPRIM_LINESTRIP_CONT_BF 0x14
+#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
+
+#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
+#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1
+
+#define BRW_ANISORATIO_2 0
+#define BRW_ANISORATIO_4 1
+#define BRW_ANISORATIO_6 2
+#define BRW_ANISORATIO_8 3
+#define BRW_ANISORATIO_10 4
+#define BRW_ANISORATIO_12 5
+#define BRW_ANISORATIO_14 6
+#define BRW_ANISORATIO_16 7
+
+#define BRW_BLENDFACTOR_ONE 0x1
+#define BRW_BLENDFACTOR_SRC_COLOR 0x2
+#define BRW_BLENDFACTOR_SRC_ALPHA 0x3
+#define BRW_BLENDFACTOR_DST_ALPHA 0x4
+#define BRW_BLENDFACTOR_DST_COLOR 0x5
+#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
+#define BRW_BLENDFACTOR_CONST_COLOR 0x7
+#define BRW_BLENDFACTOR_CONST_ALPHA 0x8
+#define BRW_BLENDFACTOR_SRC1_COLOR 0x9
+#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A
+#define BRW_BLENDFACTOR_ZERO 0x11
+#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12
+#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13
+#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14
+#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15
+#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17
+#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18
+#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19
+#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
+
+#define BRW_BLENDFUNCTION_ADD 0
+#define BRW_BLENDFUNCTION_SUBTRACT 1
+#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2
+#define BRW_BLENDFUNCTION_MIN 3
+#define BRW_BLENDFUNCTION_MAX 4
+
+#define BRW_ALPHATEST_FORMAT_UNORM8 0
+#define BRW_ALPHATEST_FORMAT_FLOAT32 1
+
+#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0
+#define BRW_CHROMAKEY_REPLACE_BLACK 1
+
+#define BRW_CLIP_API_OGL 0
+#define BRW_CLIP_API_DX 1
+
+#define BRW_CLIPMODE_NORMAL 0
+#define BRW_CLIPMODE_CLIP_ALL 1
+#define BRW_CLIPMODE_CLIP_NON_REJECTED 2
+#define BRW_CLIPMODE_REJECT_ALL 3
+#define BRW_CLIPMODE_ACCEPT_ALL 4
+#define BRW_CLIPMODE_KERNEL_CLIP 5
+
+#define BRW_CLIP_NDCSPACE 0
+#define BRW_CLIP_SCREENSPACE 1
+
+#define BRW_COMPAREFUNCTION_ALWAYS 0
+#define BRW_COMPAREFUNCTION_NEVER 1
+#define BRW_COMPAREFUNCTION_LESS 2
+#define BRW_COMPAREFUNCTION_EQUAL 3
+#define BRW_COMPAREFUNCTION_LEQUAL 4
+#define BRW_COMPAREFUNCTION_GREATER 5
+#define BRW_COMPAREFUNCTION_NOTEQUAL 6
+#define BRW_COMPAREFUNCTION_GEQUAL 7
+
+#define BRW_COVERAGE_PIXELS_HALF 0
+#define BRW_COVERAGE_PIXELS_1 1
+#define BRW_COVERAGE_PIXELS_2 2
+#define BRW_COVERAGE_PIXELS_4 3
+
+#define BRW_CULLMODE_BOTH 0
+#define BRW_CULLMODE_NONE 1
+#define BRW_CULLMODE_FRONT 2
+#define BRW_CULLMODE_BACK 3
+
+#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0
+#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1
+
+#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
+#define BRW_DEPTHFORMAT_D32_FLOAT 1
+#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2
+#define BRW_DEPTHFORMAT_D16_UNORM 5
+
+#define BRW_FLOATING_POINT_IEEE_754 0
+#define BRW_FLOATING_POINT_NON_IEEE_754 1
+
+#define BRW_FRONTWINDING_CW 0
+#define BRW_FRONTWINDING_CCW 1
+
+#define BRW_SPRITE_POINT_ENABLE 16
+
+#define BRW_INDEX_BYTE 0
+#define BRW_INDEX_WORD 1
+#define BRW_INDEX_DWORD 2
+
+#define BRW_LOGICOPFUNCTION_CLEAR 0
+#define BRW_LOGICOPFUNCTION_NOR 1
+#define BRW_LOGICOPFUNCTION_AND_INVERTED 2
+#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3
+#define BRW_LOGICOPFUNCTION_AND_REVERSE 4
+#define BRW_LOGICOPFUNCTION_INVERT 5
+#define BRW_LOGICOPFUNCTION_XOR 6
+#define BRW_LOGICOPFUNCTION_NAND 7
+#define BRW_LOGICOPFUNCTION_AND 8
+#define BRW_LOGICOPFUNCTION_EQUIV 9
+#define BRW_LOGICOPFUNCTION_NOOP 10
+#define BRW_LOGICOPFUNCTION_OR_INVERTED 11
+#define BRW_LOGICOPFUNCTION_COPY 12
+#define BRW_LOGICOPFUNCTION_OR_REVERSE 13
+#define BRW_LOGICOPFUNCTION_OR 14
+#define BRW_LOGICOPFUNCTION_SET 15
+
+#define BRW_MAPFILTER_NEAREST 0x0
+#define BRW_MAPFILTER_LINEAR 0x1
+#define BRW_MAPFILTER_ANISOTROPIC 0x2
+
+#define BRW_MIPFILTER_NONE 0
+#define BRW_MIPFILTER_NEAREST 1
+#define BRW_MIPFILTER_LINEAR 3
+
+#define BRW_POLYGON_FRONT_FACING 0
+#define BRW_POLYGON_BACK_FACING 1
+
+#define BRW_PREFILTER_ALWAYS 0x0
+#define BRW_PREFILTER_NEVER 0x1
+#define BRW_PREFILTER_LESS 0x2
+#define BRW_PREFILTER_EQUAL 0x3
+#define BRW_PREFILTER_LEQUAL 0x4
+#define BRW_PREFILTER_GREATER 0x5
+#define BRW_PREFILTER_NOTEQUAL 0x6
+#define BRW_PREFILTER_GEQUAL 0x7
+
+#define BRW_PROVOKING_VERTEX_0 0
+#define BRW_PROVOKING_VERTEX_1 1
+#define BRW_PROVOKING_VERTEX_2 2
+
+#define BRW_RASTRULE_UPPER_LEFT 0
+#define BRW_RASTRULE_UPPER_RIGHT 1
+/* These are listed as "Reserved, but not seen as useful"
+ * in Intel documentation (page 212, "Point Rasterization Rule",
+ * section 7.4 "SF Pipeline State Summary", of document
+ * "Intel® 965 Express Chipset Family and Intel® G35 Express
+ * Chipset Graphics Controller Programmer's Reference Manual,
+ * Volume 2: 3D/Media", Revision 1.0b as of January 2008,
+ * available at
+ * http://intellinuxgraphics.org/documentation.html
+ * at the time of this writing).
+ *
+ * These appear to be supported on at least some
+ * i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT
+ * is useful when using OpenGL to render to a FBO
+ * (which has the pixel coordinate Y orientation inverted
+ * with respect to the normal OpenGL pixel coordinate system).
+ */
+#define BRW_RASTRULE_LOWER_LEFT 2
+#define BRW_RASTRULE_LOWER_RIGHT 3
+
+#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0
+#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1
+#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2
+
+#define BRW_STENCILOP_KEEP 0
+#define BRW_STENCILOP_ZERO 1
+#define BRW_STENCILOP_REPLACE 2
+#define BRW_STENCILOP_INCRSAT 3
+#define BRW_STENCILOP_DECRSAT 4
+#define BRW_STENCILOP_INCR 5
+#define BRW_STENCILOP_DECR 6
+#define BRW_STENCILOP_INVERT 7
+
+#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0
+#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1
+
+#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
+#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001
+#define BRW_SURFACEFORMAT_R32G32B32A32_UINT 0x002
+#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
+#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
+#define BRW_SURFACEFORMAT_R64G64_FLOAT 0x005
+#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
+#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
+#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
+#define BRW_SURFACEFORMAT_R32G32B32_FLOAT 0x040
+#define BRW_SURFACEFORMAT_R32G32B32_SINT 0x041
+#define BRW_SURFACEFORMAT_R32G32B32_UINT 0x042
+#define BRW_SURFACEFORMAT_R32G32B32_UNORM 0x043
+#define BRW_SURFACEFORMAT_R32G32B32_SNORM 0x044
+#define BRW_SURFACEFORMAT_R32G32B32_SSCALED 0x045
+#define BRW_SURFACEFORMAT_R32G32B32_USCALED 0x046
+#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
+#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
+#define BRW_SURFACEFORMAT_R16G16B16A16_SINT 0x082
+#define BRW_SURFACEFORMAT_R16G16B16A16_UINT 0x083
+#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
+#define BRW_SURFACEFORMAT_R32G32_FLOAT 0x085
+#define BRW_SURFACEFORMAT_R32G32_SINT 0x086
+#define BRW_SURFACEFORMAT_R32G32_UINT 0x087
+#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
+#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
+#define BRW_SURFACEFORMAT_L32A32_FLOAT 0x08A
+#define BRW_SURFACEFORMAT_R32G32_UNORM 0x08B
+#define BRW_SURFACEFORMAT_R32G32_SNORM 0x08C
+#define BRW_SURFACEFORMAT_R64_FLOAT 0x08D
+#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
+#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
+#define BRW_SURFACEFORMAT_A32X32_FLOAT 0x090
+#define BRW_SURFACEFORMAT_L32X32_FLOAT 0x091
+#define BRW_SURFACEFORMAT_I32X32_FLOAT 0x092
+#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
+#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
+#define BRW_SURFACEFORMAT_R32G32_SSCALED 0x095
+#define BRW_SURFACEFORMAT_R32G32_USCALED 0x096
+#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
+#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
+#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
+#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
+#define BRW_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
+#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
+#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
+#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
+#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
+#define BRW_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
+#define BRW_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
+#define BRW_SURFACEFORMAT_R16G16_UNORM 0x0CC
+#define BRW_SURFACEFORMAT_R16G16_SNORM 0x0CD
+#define BRW_SURFACEFORMAT_R16G16_SINT 0x0CE
+#define BRW_SURFACEFORMAT_R16G16_UINT 0x0CF
+#define BRW_SURFACEFORMAT_R16G16_FLOAT 0x0D0
+#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
+#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
+#define BRW_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
+#define BRW_SURFACEFORMAT_R32_SINT 0x0D6
+#define BRW_SURFACEFORMAT_R32_UINT 0x0D7
+#define BRW_SURFACEFORMAT_R32_FLOAT 0x0D8
+#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
+#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
+#define BRW_SURFACEFORMAT_L16A16_UNORM 0x0DF
+#define BRW_SURFACEFORMAT_I24X8_UNORM 0x0E0
+#define BRW_SURFACEFORMAT_L24X8_UNORM 0x0E1
+#define BRW_SURFACEFORMAT_A24X8_UNORM 0x0E2
+#define BRW_SURFACEFORMAT_I32_FLOAT 0x0E3
+#define BRW_SURFACEFORMAT_L32_FLOAT 0x0E4
+#define BRW_SURFACEFORMAT_A32_FLOAT 0x0E5
+#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
+#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
+#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
+#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
+#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
+#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
+#define BRW_SURFACEFORMAT_L16A16_FLOAT 0x0F0
+#define BRW_SURFACEFORMAT_R32_UNORM 0x0F1
+#define BRW_SURFACEFORMAT_R32_SNORM 0x0F2
+#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
+#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
+#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
+#define BRW_SURFACEFORMAT_R16G16_SSCALED 0x0F6
+#define BRW_SURFACEFORMAT_R16G16_USCALED 0x0F7
+#define BRW_SURFACEFORMAT_R32_SSCALED 0x0F8
+#define BRW_SURFACEFORMAT_R32_USCALED 0x0F9
+#define BRW_SURFACEFORMAT_B5G6R5_UNORM 0x100
+#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
+#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
+#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
+#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
+#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
+#define BRW_SURFACEFORMAT_R8G8_UNORM 0x106
+#define BRW_SURFACEFORMAT_R8G8_SNORM 0x107
+#define BRW_SURFACEFORMAT_R8G8_SINT 0x108
+#define BRW_SURFACEFORMAT_R8G8_UINT 0x109
+#define BRW_SURFACEFORMAT_R16_UNORM 0x10A
+#define BRW_SURFACEFORMAT_R16_SNORM 0x10B
+#define BRW_SURFACEFORMAT_R16_SINT 0x10C
+#define BRW_SURFACEFORMAT_R16_UINT 0x10D
+#define BRW_SURFACEFORMAT_R16_FLOAT 0x10E
+#define BRW_SURFACEFORMAT_I16_UNORM 0x111
+#define BRW_SURFACEFORMAT_L16_UNORM 0x112
+#define BRW_SURFACEFORMAT_A16_UNORM 0x113
+#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114
+#define BRW_SURFACEFORMAT_I16_FLOAT 0x115
+#define BRW_SURFACEFORMAT_L16_FLOAT 0x116
+#define BRW_SURFACEFORMAT_A16_FLOAT 0x117
+#define BRW_SURFACEFORMAT_L8A8_UNORM_SRGB 0x118
+#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
+#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
+#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
+#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C
+#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D
+#define BRW_SURFACEFORMAT_R16_SSCALED 0x11E
+#define BRW_SURFACEFORMAT_R16_USCALED 0x11F
+#define BRW_SURFACEFORMAT_R8_UNORM 0x140
+#define BRW_SURFACEFORMAT_R8_SNORM 0x141
+#define BRW_SURFACEFORMAT_R8_SINT 0x142
+#define BRW_SURFACEFORMAT_R8_UINT 0x143
+#define BRW_SURFACEFORMAT_A8_UNORM 0x144
+#define BRW_SURFACEFORMAT_I8_UNORM 0x145
+#define BRW_SURFACEFORMAT_L8_UNORM 0x146
+#define BRW_SURFACEFORMAT_P4A4_UNORM 0x147
+#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148
+#define BRW_SURFACEFORMAT_R8_SSCALED 0x149
+#define BRW_SURFACEFORMAT_R8_USCALED 0x14A
+#define BRW_SURFACEFORMAT_L8_UNORM_SRGB 0x14C
+#define BRW_SURFACEFORMAT_R1_UINT 0x181
+#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182
+#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
+#define BRW_SURFACEFORMAT_BC1_UNORM 0x186
+#define BRW_SURFACEFORMAT_BC2_UNORM 0x187
+#define BRW_SURFACEFORMAT_BC3_UNORM 0x188
+#define BRW_SURFACEFORMAT_BC4_UNORM 0x189
+#define BRW_SURFACEFORMAT_BC5_UNORM 0x18A
+#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
+#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
+#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
+#define BRW_SURFACEFORMAT_MONO8 0x18E
+#define BRW_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
+#define BRW_SURFACEFORMAT_YCRCB_SWAPY 0x190
+#define BRW_SURFACEFORMAT_DXT1_RGB 0x191
+#define BRW_SURFACEFORMAT_FXT1 0x192
+#define BRW_SURFACEFORMAT_R8G8B8_UNORM 0x193
+#define BRW_SURFACEFORMAT_R8G8B8_SNORM 0x194
+#define BRW_SURFACEFORMAT_R8G8B8_SSCALED 0x195
+#define BRW_SURFACEFORMAT_R8G8B8_USCALED 0x196
+#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
+#define BRW_SURFACEFORMAT_R64G64B64_FLOAT 0x198
+#define BRW_SURFACEFORMAT_BC4_SNORM 0x199
+#define BRW_SURFACEFORMAT_BC5_SNORM 0x19A
+#define BRW_SURFACEFORMAT_R16G16B16_UNORM 0x19C
+#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D
+#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
+#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F
+
+#define BRW_SURFACERETURNFORMAT_FLOAT32 0
+#define BRW_SURFACERETURNFORMAT_S1 1
+
+#define BRW_SURFACE_1D 0
+#define BRW_SURFACE_2D 1
+#define BRW_SURFACE_3D 2
+#define BRW_SURFACE_CUBE 3
+#define BRW_SURFACE_BUFFER 4
+#define BRW_SURFACE_NULL 7
+
+#define BRW_TEXCOORDMODE_WRAP 0
+#define BRW_TEXCOORDMODE_MIRROR 1
+#define BRW_TEXCOORDMODE_CLAMP 2
+#define BRW_TEXCOORDMODE_CUBE 3
+#define BRW_TEXCOORDMODE_CLAMP_BORDER 4
+#define BRW_TEXCOORDMODE_MIRROR_ONCE 5
+
+#define BRW_THREAD_PRIORITY_NORMAL 0
+#define BRW_THREAD_PRIORITY_HIGH 1
+
+#define BRW_TILEWALK_XMAJOR 0
+#define BRW_TILEWALK_YMAJOR 1
+
+#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0
+#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1
+
+/* Execution Unit (EU) defines
+ */
+
+#define BRW_ALIGN_1 0
+#define BRW_ALIGN_16 1
+
+#define BRW_ADDRESS_DIRECT 0
+#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1
+
+#define BRW_CHANNEL_X 0
+#define BRW_CHANNEL_Y 1
+#define BRW_CHANNEL_Z 2
+#define BRW_CHANNEL_W 3
+
+#define BRW_COMPRESSION_NONE 0
+#define BRW_COMPRESSION_2NDHALF 1
+#define BRW_COMPRESSION_COMPRESSED 2
+
+#define BRW_CONDITIONAL_NONE 0
+#define BRW_CONDITIONAL_Z 1
+#define BRW_CONDITIONAL_NZ 2
+#define BRW_CONDITIONAL_EQ 1 /* Z */
+#define BRW_CONDITIONAL_NEQ 2 /* NZ */
+#define BRW_CONDITIONAL_G 3
+#define BRW_CONDITIONAL_GE 4
+#define BRW_CONDITIONAL_L 5
+#define BRW_CONDITIONAL_LE 6
+#define BRW_CONDITIONAL_R 7
+#define BRW_CONDITIONAL_O 8
+#define BRW_CONDITIONAL_U 9
+
+#define BRW_DEBUG_NONE 0
+#define BRW_DEBUG_BREAKPOINT 1
+
+#define BRW_DEPENDENCY_NORMAL 0
+#define BRW_DEPENDENCY_NOTCLEARED 1
+#define BRW_DEPENDENCY_NOTCHECKED 2
+#define BRW_DEPENDENCY_DISABLE 3
+
+#define BRW_EXECUTE_1 0
+#define BRW_EXECUTE_2 1
+#define BRW_EXECUTE_4 2
+#define BRW_EXECUTE_8 3
+#define BRW_EXECUTE_16 4
+#define BRW_EXECUTE_32 5
+
+#define BRW_HORIZONTAL_STRIDE_0 0
+#define BRW_HORIZONTAL_STRIDE_1 1
+#define BRW_HORIZONTAL_STRIDE_2 2
+#define BRW_HORIZONTAL_STRIDE_4 3
+
+#define BRW_INSTRUCTION_NORMAL 0
+#define BRW_INSTRUCTION_SATURATE 1
+
+#define BRW_MASK_ENABLE 0
+#define BRW_MASK_DISABLE 1
+
+#define BRW_OPCODE_MOV 1
+#define BRW_OPCODE_SEL 2
+#define BRW_OPCODE_NOT 4
+#define BRW_OPCODE_AND 5
+#define BRW_OPCODE_OR 6
+#define BRW_OPCODE_XOR 7
+#define BRW_OPCODE_SHR 8
+#define BRW_OPCODE_SHL 9
+#define BRW_OPCODE_RSR 10
+#define BRW_OPCODE_RSL 11
+#define BRW_OPCODE_ASR 12
+#define BRW_OPCODE_CMP 16
+#define BRW_OPCODE_CMPN 17
+#define BRW_OPCODE_JMPI 32
+#define BRW_OPCODE_IF 34
+#define BRW_OPCODE_IFF 35
+#define BRW_OPCODE_ELSE 36
+#define BRW_OPCODE_ENDIF 37
+#define BRW_OPCODE_DO 38
+#define BRW_OPCODE_WHILE 39
+#define BRW_OPCODE_BREAK 40
+#define BRW_OPCODE_CONTINUE 41
+#define BRW_OPCODE_HALT 42
+#define BRW_OPCODE_MSAVE 44
+#define BRW_OPCODE_MRESTORE 45
+#define BRW_OPCODE_PUSH 46
+#define BRW_OPCODE_POP 47
+#define BRW_OPCODE_WAIT 48
+#define BRW_OPCODE_SEND 49
+#define BRW_OPCODE_ADD 64
+#define BRW_OPCODE_MUL 65
+#define BRW_OPCODE_AVG 66
+#define BRW_OPCODE_FRC 67
+#define BRW_OPCODE_RNDU 68
+#define BRW_OPCODE_RNDD 69
+#define BRW_OPCODE_RNDE 70
+#define BRW_OPCODE_RNDZ 71
+#define BRW_OPCODE_MAC 72
+#define BRW_OPCODE_MACH 73
+#define BRW_OPCODE_LZD 74
+#define BRW_OPCODE_SAD2 80
+#define BRW_OPCODE_SADA2 81
+#define BRW_OPCODE_DP4 84
+#define BRW_OPCODE_DPH 85
+#define BRW_OPCODE_DP3 86
+#define BRW_OPCODE_DP2 87
+#define BRW_OPCODE_DPA2 88
+#define BRW_OPCODE_LINE 89
+#define BRW_OPCODE_NOP 126
+
+#define BRW_PREDICATE_NONE 0
+#define BRW_PREDICATE_NORMAL 1
+#define BRW_PREDICATE_ALIGN1_ANYV 2
+#define BRW_PREDICATE_ALIGN1_ALLV 3
+#define BRW_PREDICATE_ALIGN1_ANY2H 4
+#define BRW_PREDICATE_ALIGN1_ALL2H 5
+#define BRW_PREDICATE_ALIGN1_ANY4H 6
+#define BRW_PREDICATE_ALIGN1_ALL4H 7
+#define BRW_PREDICATE_ALIGN1_ANY8H 8
+#define BRW_PREDICATE_ALIGN1_ALL8H 9
+#define BRW_PREDICATE_ALIGN1_ANY16H 10
+#define BRW_PREDICATE_ALIGN1_ALL16H 11
+#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4
+#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5
+#define BRW_PREDICATE_ALIGN16_ANY4H 6
+#define BRW_PREDICATE_ALIGN16_ALL4H 7
+
+#define BRW_ARCHITECTURE_REGISTER_FILE 0
+#define BRW_GENERAL_REGISTER_FILE 1
+#define BRW_MESSAGE_REGISTER_FILE 2
+#define BRW_IMMEDIATE_VALUE 3
+
+#define BRW_REGISTER_TYPE_UD 0
+#define BRW_REGISTER_TYPE_D 1
+#define BRW_REGISTER_TYPE_UW 2
+#define BRW_REGISTER_TYPE_W 3
+#define BRW_REGISTER_TYPE_UB 4
+#define BRW_REGISTER_TYPE_B 5
+#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
+#define BRW_REGISTER_TYPE_HF 6
+#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
+#define BRW_REGISTER_TYPE_F 7
+
+#define BRW_ARF_NULL 0x00
+#define BRW_ARF_ADDRESS 0x10
+#define BRW_ARF_ACCUMULATOR 0x20
+#define BRW_ARF_FLAG 0x30
+#define BRW_ARF_MASK 0x40
+#define BRW_ARF_MASK_STACK 0x50
+#define BRW_ARF_MASK_STACK_DEPTH 0x60
+#define BRW_ARF_STATE 0x70
+#define BRW_ARF_CONTROL 0x80
+#define BRW_ARF_NOTIFICATION_COUNT 0x90
+#define BRW_ARF_IP 0xA0
+
+#define BRW_AMASK 0
+#define BRW_IMASK 1
+#define BRW_LMASK 2
+#define BRW_CMASK 3
+
+
+
+#define BRW_THREAD_NORMAL 0
+#define BRW_THREAD_ATOMIC 1
+#define BRW_THREAD_SWITCH 2
+
+#define BRW_VERTICAL_STRIDE_0 0
+#define BRW_VERTICAL_STRIDE_1 1
+#define BRW_VERTICAL_STRIDE_2 2
+#define BRW_VERTICAL_STRIDE_4 3
+#define BRW_VERTICAL_STRIDE_8 4
+#define BRW_VERTICAL_STRIDE_16 5
+#define BRW_VERTICAL_STRIDE_32 6
+#define BRW_VERTICAL_STRIDE_64 7
+#define BRW_VERTICAL_STRIDE_128 8
+#define BRW_VERTICAL_STRIDE_256 9
+#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
+
+#define BRW_WIDTH_1 0
+#define BRW_WIDTH_2 1
+#define BRW_WIDTH_4 2
+#define BRW_WIDTH_8 3
+#define BRW_WIDTH_16 4
+
+#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0
+#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1
+#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2
+#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3
+#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4
+#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5
+#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6
+#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7
+#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8
+#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9
+#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10
+#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11
+
+#define BRW_POLYGON_FACING_FRONT 0
+#define BRW_POLYGON_FACING_BACK 1
+
+#define BRW_MESSAGE_TARGET_NULL 0
+#define BRW_MESSAGE_TARGET_MATH 1
+#define BRW_MESSAGE_TARGET_SAMPLER 2
+#define BRW_MESSAGE_TARGET_GATEWAY 3
+#define BRW_MESSAGE_TARGET_DATAPORT_READ 4
+#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5
+#define BRW_MESSAGE_TARGET_URB 6
+#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7
+
+#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0
+#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2
+#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3
+
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
+#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
+#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2
+#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3
+#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3
+#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3
+
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG 0
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_IGDNG 0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG 0
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG 1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_BIAS_IGDNG 1
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG 1
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_IGDNG 2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_IGDNG 2
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD_IGDNG 2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG 3
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE_IGDNG 3
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG 3
+
+/* for IGDNG only */
+#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0
+#define BRW_SAMPLER_SIMD_MODE_SIMD8 1
+#define BRW_SAMPLER_SIMD_MODE_SIMD16 2
+#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3
+
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
+#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2
+#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3
+#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4
+
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
+
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
+
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
+#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2
+#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
+
+#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0
+#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1
+#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
+
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
+
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
+#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
+#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
+#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
+
+#define BRW_MATH_FUNCTION_INV 1
+#define BRW_MATH_FUNCTION_LOG 2
+#define BRW_MATH_FUNCTION_EXP 3
+#define BRW_MATH_FUNCTION_SQRT 4
+#define BRW_MATH_FUNCTION_RSQ 5
+#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */
+#define BRW_MATH_FUNCTION_COS 7 /* was 8 */
+#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */
+#define BRW_MATH_FUNCTION_TAN 9
+#define BRW_MATH_FUNCTION_POW 10
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12
+#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13
+
+#define BRW_MATH_INTEGER_UNSIGNED 0
+#define BRW_MATH_INTEGER_SIGNED 1
+
+#define BRW_MATH_PRECISION_FULL 0
+#define BRW_MATH_PRECISION_PARTIAL 1
+
+#define BRW_MATH_SATURATE_NONE 0
+#define BRW_MATH_SATURATE_SATURATE 1
+
+#define BRW_MATH_DATA_VECTOR 0
+#define BRW_MATH_DATA_SCALAR 1
+
+#define BRW_URB_OPCODE_WRITE 0
+
+#define BRW_URB_SWIZZLE_NONE 0
+#define BRW_URB_SWIZZLE_INTERLEAVE 1
+#define BRW_URB_SWIZZLE_TRANSPOSE 2
+
+#define BRW_SCRATCH_SPACE_SIZE_1K 0
+#define BRW_SCRATCH_SPACE_SIZE_2K 1
+#define BRW_SCRATCH_SPACE_SIZE_4K 2
+#define BRW_SCRATCH_SPACE_SIZE_8K 3
+#define BRW_SCRATCH_SPACE_SIZE_16K 4
+#define BRW_SCRATCH_SPACE_SIZE_32K 5
+#define BRW_SCRATCH_SPACE_SIZE_64K 6
+#define BRW_SCRATCH_SPACE_SIZE_128K 7
+#define BRW_SCRATCH_SPACE_SIZE_256K 8
+#define BRW_SCRATCH_SPACE_SIZE_512K 9
+#define BRW_SCRATCH_SPACE_SIZE_1M 10
+#define BRW_SCRATCH_SPACE_SIZE_2M 11
+
+
+
+
+#define CMD_URB_FENCE 0x6000
+#define CMD_CS_URB_STATE 0x6001
+#define CMD_CONST_BUFFER 0x6002
+
+#define CMD_STATE_BASE_ADDRESS 0x6101
+#define CMD_STATE_INSN_POINTER 0x6102
+#define CMD_PIPELINE_SELECT_965 0x6104
+#define CMD_PIPELINE_SELECT_GM45 0x6904
+
+#define CMD_PIPELINED_STATE_POINTERS 0x7800
+#define CMD_BINDING_TABLE_PTRS 0x7801
+
+#define CMD_VERTEX_BUFFER 0x7808
+# define BRW_VB0_INDEX_SHIFT 27
+# define BRW_VB0_ACCESS_VERTEXDATA (0 << 26)
+# define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26)
+# define BRW_VB0_PITCH_SHIFT 0
+
+#define CMD_VERTEX_ELEMENT 0x7809
+# define BRW_VE0_INDEX_SHIFT 27
+# define BRW_VE0_FORMAT_SHIFT 16
+# define BRW_VE0_VALID (1 << 26)
+# define BRW_VE0_SRC_OFFSET_SHIFT 0
+# define BRW_VE1_COMPONENT_NOSTORE 0
+# define BRW_VE1_COMPONENT_STORE_SRC 1
+# define BRW_VE1_COMPONENT_STORE_0 2
+# define BRW_VE1_COMPONENT_STORE_1_FLT 3
+# define BRW_VE1_COMPONENT_STORE_1_INT 4
+# define BRW_VE1_COMPONENT_STORE_VID 5
+# define BRW_VE1_COMPONENT_STORE_IID 6
+# define BRW_VE1_COMPONENT_STORE_PID 7
+# define BRW_VE1_COMPONENT_0_SHIFT 28
+# define BRW_VE1_COMPONENT_1_SHIFT 24
+# define BRW_VE1_COMPONENT_2_SHIFT 20
+# define BRW_VE1_COMPONENT_3_SHIFT 16
+# define BRW_VE1_DST_OFFSET_SHIFT 0
+
+#define CMD_INDEX_BUFFER 0x780a
+#define CMD_VF_STATISTICS_965 0x780b
+#define CMD_VF_STATISTICS_GM45 0x680b
+
+#define CMD_DRAW_RECT 0x7900
+#define CMD_BLEND_CONSTANT_COLOR 0x7901
+#define CMD_CHROMA_KEY 0x7904
+#define CMD_DEPTH_BUFFER 0x7905
+#define CMD_POLY_STIPPLE_OFFSET 0x7906
+#define CMD_POLY_STIPPLE_PATTERN 0x7907
+#define CMD_LINE_STIPPLE_PATTERN 0x7908
+#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909
+#define CMD_AA_LINE_PARAMETERS 0x790a
+
+#define CMD_PIPE_CONTROL 0x7a00
+
+#define CMD_3D_PRIM 0x7b00
+
+#define CMD_MI_FLUSH 0x0200
+
+
+/* Various values from the R0 vertex header:
+ */
+#define R02_PRIM_END 0x1
+#define R02_PRIM_START 0x2
+
+#include "intel_chipset.h"
+
+#define BRW_IS_G4X(brw) (IS_G4X((brw)->intel.intelScreen->deviceID))
+#define BRW_IS_IGDNG(brw) (IS_IGDNG((brw)->intel.intelScreen->deviceID))
+#define BRW_IS_965(brw) (!(BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)))
+#define CMD_PIPELINE_SELECT(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965)
+#define CMD_VF_STATISTICS(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965)
+#define URB_SIZES(brw) (BRW_IS_IGDNG(brw) ? 1024 : \
+ (BRW_IS_G4X(brw) ? 384 : 256)) /* 512 bit units */
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c
new file mode 100644
index 00000000000..9fef230507f
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_disasm.c
@@ -0,0 +1,903 @@
+/*
+ * Copyright © 2008 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. The copyright holders make no representations
+ * about the suitability of this software for any purpose. It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <stdarg.h>
+
+#include "main/mtypes.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+
+struct {
+ char *name;
+ int nsrc;
+ int ndst;
+} opcode[128] = {
+ [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
+
+ [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
+
+ [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 },
+
+ [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 },
+ [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 },
+};
+
+char *conditional_modifier[16] = {
+ [BRW_CONDITIONAL_NONE] = "",
+ [BRW_CONDITIONAL_Z] = ".e",
+ [BRW_CONDITIONAL_NZ] = ".ne",
+ [BRW_CONDITIONAL_G] = ".g",
+ [BRW_CONDITIONAL_GE] = ".ge",
+ [BRW_CONDITIONAL_L] = ".l",
+ [BRW_CONDITIONAL_LE] = ".le",
+ [BRW_CONDITIONAL_R] = ".r",
+ [BRW_CONDITIONAL_O] = ".o",
+ [BRW_CONDITIONAL_U] = ".u",
+};
+
+char *negate[2] = {
+ [0] = "",
+ [1] = "-",
+};
+
+char *_abs[2] = {
+ [0] = "",
+ [1] = "(abs)",
+};
+
+char *vert_stride[16] = {
+ [0] = "0",
+ [1] = "1",
+ [2] = "2",
+ [3] = "4",
+ [4] = "8",
+ [5] = "16",
+ [6] = "32",
+ [15] = "VxH",
+};
+
+char *width[8] = {
+ [0] = "1",
+ [1] = "2",
+ [2] = "4",
+ [3] = "8",
+ [4] = "16",
+};
+
+char *horiz_stride[4] = {
+ [0] = "0",
+ [1] = "1",
+ [2] = "2",
+ [3] = "4"
+};
+
+char *chan_sel[4] = {
+ [0] = "x",
+ [1] = "y",
+ [2] = "z",
+ [3] = "w",
+};
+
+char *dest_condmod[16] = {
+};
+
+char *debug_ctrl[2] = {
+ [0] = "",
+ [1] = ".breakpoint"
+};
+
+char *saturate[2] = {
+ [0] = "",
+ [1] = ".sat"
+};
+
+char *exec_size[8] = {
+ [0] = "1",
+ [1] = "2",
+ [2] = "4",
+ [3] = "8",
+ [4] = "16",
+ [5] = "32"
+};
+
+char *pred_inv[2] = {
+ [0] = "+",
+ [1] = "-"
+};
+
+char *pred_ctrl_align16[16] = {
+ [1] = "",
+ [2] = ".x",
+ [3] = ".y",
+ [4] = ".z",
+ [5] = ".w",
+ [6] = ".any4h",
+ [7] = ".all4h",
+};
+
+char *pred_ctrl_align1[16] = {
+ [1] = "",
+ [2] = ".anyv",
+ [3] = ".allv",
+ [4] = ".any2h",
+ [5] = ".all2h",
+ [6] = ".any4h",
+ [7] = ".all4h",
+ [8] = ".any8h",
+ [9] = ".all8h",
+ [10] = ".any16h",
+ [11] = ".all16h",
+};
+
+char *thread_ctrl[4] = {
+ [0] = "",
+ [2] = "switch"
+};
+
+char *compr_ctrl[4] = {
+ [0] = "",
+ [1] = "sechalf",
+ [2] = "compr",
+};
+
+char *dep_ctrl[4] = {
+ [0] = "",
+ [1] = "NoDDClr",
+ [2] = "NoDDChk",
+ [3] = "NoDDClr,NoDDChk",
+};
+
+char *mask_ctrl[4] = {
+ [0] = "",
+ [1] = "nomask",
+};
+
+char *access_mode[2] = {
+ [0] = "align1",
+ [1] = "align16",
+};
+
+char *reg_encoding[8] = {
+ [0] = "UD",
+ [1] = "D",
+ [2] = "UW",
+ [3] = "W",
+ [4] = "UB",
+ [5] = "B",
+ [7] = "F"
+};
+
+char *imm_encoding[8] = {
+ [0] = "UD",
+ [1] = "D",
+ [2] = "UW",
+ [3] = "W",
+ [5] = "VF",
+ [5] = "V",
+ [7] = "F"
+};
+
+char *reg_file[4] = {
+ [0] = "A",
+ [1] = "g",
+ [2] = "m",
+ [3] = "imm",
+};
+
+char *writemask[16] = {
+ [0x0] = ".",
+ [0x1] = ".x",
+ [0x2] = ".y",
+ [0x3] = ".xy",
+ [0x4] = ".z",
+ [0x5] = ".xz",
+ [0x6] = ".yz",
+ [0x7] = ".xyz",
+ [0x8] = ".w",
+ [0x9] = ".xw",
+ [0xa] = ".yw",
+ [0xb] = ".xyw",
+ [0xc] = ".zw",
+ [0xd] = ".xzw",
+ [0xe] = ".yzw",
+ [0xf] = "",
+};
+
+char *end_of_thread[2] = {
+ [0] = "",
+ [1] = "EOT"
+};
+
+char *target_function[16] = {
+ [BRW_MESSAGE_TARGET_NULL] = "null",
+ [BRW_MESSAGE_TARGET_MATH] = "math",
+ [BRW_MESSAGE_TARGET_SAMPLER] = "sampler",
+ [BRW_MESSAGE_TARGET_GATEWAY] = "gateway",
+ [BRW_MESSAGE_TARGET_DATAPORT_READ] = "read",
+ [BRW_MESSAGE_TARGET_DATAPORT_WRITE] = "write",
+ [BRW_MESSAGE_TARGET_URB] = "urb",
+ [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner"
+};
+
+char *math_function[16] = {
+ [BRW_MATH_FUNCTION_INV] = "inv",
+ [BRW_MATH_FUNCTION_LOG] = "log",
+ [BRW_MATH_FUNCTION_EXP] = "exp",
+ [BRW_MATH_FUNCTION_SQRT] = "sqrt",
+ [BRW_MATH_FUNCTION_RSQ] = "rsq",
+ [BRW_MATH_FUNCTION_SIN] = "sin",
+ [BRW_MATH_FUNCTION_COS] = "cos",
+ [BRW_MATH_FUNCTION_SINCOS] = "sincos",
+ [BRW_MATH_FUNCTION_TAN] = "tan",
+ [BRW_MATH_FUNCTION_POW] = "pow",
+ [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod",
+ [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intmod",
+ [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intdiv",
+};
+
+char *math_saturate[2] = {
+ [0] = "",
+ [1] = "sat"
+};
+
+char *math_signed[2] = {
+ [0] = "",
+ [1] = "signed"
+};
+
+char *math_scalar[2] = {
+ [0] = "",
+ [1] = "scalar"
+};
+
+char *math_precision[2] = {
+ [0] = "",
+ [1] = "partial_precision"
+};
+
+char *urb_swizzle[4] = {
+ [BRW_URB_SWIZZLE_NONE] = "",
+ [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave",
+ [BRW_URB_SWIZZLE_TRANSPOSE] = "transpose",
+};
+
+char *urb_allocate[2] = {
+ [0] = "",
+ [1] = "allocate"
+};
+
+char *urb_used[2] = {
+ [0] = "",
+ [1] = "used"
+};
+
+char *urb_complete[2] = {
+ [0] = "",
+ [1] = "complete"
+};
+
+char *sampler_target_format[4] = {
+ [0] = "F",
+ [2] = "UD",
+ [3] = "D"
+};
+
+
+static int column;
+
+static int string (FILE *file, char *string)
+{
+ fputs (string, file);
+ column += strlen (string);
+ return 0;
+}
+
+static int format (FILE *f, char *format, ...)
+{
+ char buf[1024];
+ va_list args;
+ va_start (args, format);
+
+ vsnprintf (buf, sizeof (buf) - 1, format, args);
+ string (f, buf);
+ return 0;
+}
+
+static int newline (FILE *f)
+{
+ putc ('\n', f);
+ column = 0;
+ return 0;
+}
+
+static int pad (FILE *f, int c)
+{
+ do
+ string (f, " ");
+ while (column < c);
+ return 0;
+}
+
+static int control (FILE *file, char *name, char *ctrl[], GLuint id, int *space)
+{
+ if (!ctrl[id]) {
+ fprintf (file, "*** invalid %s value %d ",
+ name, id);
+ return 1;
+ }
+ if (ctrl[id][0])
+ {
+ if (space && *space)
+ string (file, " ");
+ string (file, ctrl[id]);
+ if (space)
+ *space = 1;
+ }
+ return 0;
+}
+
+static int print_opcode (FILE *file, int id)
+{
+ if (!opcode[id].name) {
+ format (file, "*** invalid opcode value %d ", id);
+ return 1;
+ }
+ string (file, opcode[id].name);
+ return 0;
+}
+
+static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
+{
+ int err = 0;
+ if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) {
+ switch (_reg_nr & 0xf0) {
+ case BRW_ARF_NULL:
+ string (file, "null");
+ return -1;
+ case BRW_ARF_ADDRESS:
+ format (file, "a%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_ACCUMULATOR:
+ format (file, "acc%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_MASK:
+ format (file, "mask%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_MASK_STACK:
+ format (file, "msd%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_STATE:
+ format (file, "sr%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_CONTROL:
+ format (file, "cr%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_NOTIFICATION_COUNT:
+ format (file, "n%d", _reg_nr & 0x0f);
+ break;
+ case BRW_ARF_IP:
+ string (file, "ip");
+ return -1;
+ break;
+ default:
+ format (file, "ARF%d", _reg_nr);
+ break;
+ }
+ } else {
+ err |= control (file, "src reg file", reg_file, _reg_file, NULL);
+ format (file, "%d", _reg_nr);
+ }
+ return err;
+}
+
+static int dest (FILE *file, struct brw_instruction *inst)
+{
+ int err = 0;
+
+ if (inst->header.access_mode == BRW_ALIGN_1)
+ {
+ if (inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ err |= reg (file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits1.da1.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.da1.dest_subreg_nr);
+ format (file, "<%d>", inst->bits1.da1.dest_horiz_stride);
+ err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL);
+ }
+ else
+ {
+ string (file, "g[a0");
+ if (inst->bits1.ia1.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.ia1.dest_subreg_nr);
+ if (inst->bits1.ia1.dest_indirect_offset)
+ format (file, " %d", inst->bits1.ia1.dest_indirect_offset);
+ string (file, "]");
+ format (file, "<%d>", inst->bits1.ia1.dest_horiz_stride);
+ err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL);
+ }
+ }
+ else
+ {
+ if (inst->bits1.da16.dest_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ err |= reg (file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits1.da16.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.da16.dest_subreg_nr);
+ string (file, "<1>");
+ err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL);
+ err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL);
+ }
+ else
+ {
+ err = 1;
+ string (file, "Indirect align16 address mode not supported");
+ }
+ }
+
+ return 0;
+}
+
+static int src_align1_region (FILE *file,
+ GLuint _vert_stride, GLuint _width, GLuint _horiz_stride)
+{
+ int err = 0;
+ string (file, "<");
+ err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
+ string (file, ",");
+ err |= control (file, "width", width, _width, NULL);
+ string (file, ",");
+ err |= control (file, "horiz_stride", horiz_stride, _horiz_stride, NULL);
+ string (file, ">");
+ return err;
+}
+
+static int src_da1 (FILE *file, GLuint type, GLuint _reg_file,
+ GLuint _vert_stride, GLuint _width, GLuint _horiz_stride,
+ GLuint reg_num, GLuint sub_reg_num, GLuint __abs, GLuint _negate)
+{
+ int err = 0;
+ err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "abs", _abs, __abs, NULL);
+
+ err |= reg (file, _reg_file, reg_num);
+ if (err == -1)
+ return 0;
+ if (sub_reg_num)
+ format (file, ".%d", sub_reg_num);
+ src_align1_region (file, _vert_stride, _width, _horiz_stride);
+ err |= control (file, "src reg encoding", reg_encoding, type, NULL);
+ return err;
+}
+
+static int src_ia1 (FILE *file,
+ GLuint type,
+ GLuint _reg_file,
+ GLint _addr_imm,
+ GLuint _addr_subreg_nr,
+ GLuint _negate,
+ GLuint __abs,
+ GLuint _addr_mode,
+ GLuint _horiz_stride,
+ GLuint _width,
+ GLuint _vert_stride)
+{
+ int err = 0;
+ err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "abs", _abs, __abs, NULL);
+
+ string (file, "g[a0");
+ if (_addr_subreg_nr)
+ format (file, ".%d", _addr_subreg_nr);
+ if (_addr_imm)
+ format (file, " %d", _addr_imm);
+ string (file, "]");
+ src_align1_region (file, _vert_stride, _width, _horiz_stride);
+ err |= control (file, "src reg encoding", reg_encoding, type, NULL);
+ return err;
+}
+
+static int src_da16 (FILE *file,
+ GLuint _reg_type,
+ GLuint _reg_file,
+ GLuint _vert_stride,
+ GLuint _reg_nr,
+ GLuint _subreg_nr,
+ GLuint __abs,
+ GLuint _negate,
+ GLuint swz_x,
+ GLuint swz_y,
+ GLuint swz_z,
+ GLuint swz_w)
+{
+ int err = 0;
+ err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "abs", _abs, __abs, NULL);
+
+ err |= reg (file, _reg_file, _reg_nr);
+ if (err == -1)
+ return 0;
+ if (_subreg_nr)
+ format (file, ".%d", _subreg_nr);
+ string (file, "<");
+ err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
+ string (file, ",1,1>");
+ err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
+ /*
+ * Three kinds of swizzle display:
+ * identity - nothing printed
+ * 1->all - print the single channel
+ * 1->1 - print the mapping
+ */
+ if (swz_x == BRW_CHANNEL_X &&
+ swz_y == BRW_CHANNEL_Y &&
+ swz_z == BRW_CHANNEL_Z &&
+ swz_w == BRW_CHANNEL_W)
+ {
+ ;
+ }
+ else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ }
+ else
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ err |= control (file, "channel select", chan_sel, swz_y, NULL);
+ err |= control (file, "channel select", chan_sel, swz_z, NULL);
+ err |= control (file, "channel select", chan_sel, swz_w, NULL);
+ }
+ return err;
+}
+
+
+static int imm (FILE *file, GLuint type, struct brw_instruction *inst) {
+ switch (type) {
+ case BRW_REGISTER_TYPE_UD:
+ format (file, "0x%08xUD", inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_D:
+ format (file, "%dD", inst->bits3.d);
+ break;
+ case BRW_REGISTER_TYPE_UW:
+ format (file, "0x%04xUW", (uint16_t) inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_W:
+ format (file, "%dW", (int16_t) inst->bits3.d);
+ break;
+ case BRW_REGISTER_TYPE_UB:
+ format (file, "0x%02xUB", (int8_t) inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_VF:
+ format (file, "Vector Float");
+ break;
+ case BRW_REGISTER_TYPE_V:
+ format (file, "0x%08xV", inst->bits3.ud);
+ break;
+ case BRW_REGISTER_TYPE_F:
+ format (file, "%-gF", inst->bits3.f);
+ }
+ return 0;
+}
+
+static int src0 (FILE *file, struct brw_instruction *inst)
+{
+ if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE)
+ return imm (file, inst->bits1.da1.src0_reg_type,
+ inst);
+ else if (inst->header.access_mode == BRW_ALIGN_1)
+ {
+ if (inst->bits2.da1.src0_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da1 (file,
+ inst->bits1.da1.src0_reg_type,
+ inst->bits1.da1.src0_reg_file,
+ inst->bits2.da1.src0_vert_stride,
+ inst->bits2.da1.src0_width,
+ inst->bits2.da1.src0_horiz_stride,
+ inst->bits2.da1.src0_reg_nr,
+ inst->bits2.da1.src0_subreg_nr,
+ inst->bits2.da1.src0_abs,
+ inst->bits2.da1.src0_negate);
+ }
+ else
+ {
+ return src_ia1 (file,
+ inst->bits1.ia1.src0_reg_type,
+ inst->bits1.ia1.src0_reg_file,
+ inst->bits2.ia1.src0_indirect_offset,
+ inst->bits2.ia1.src0_subreg_nr,
+ inst->bits2.ia1.src0_negate,
+ inst->bits2.ia1.src0_abs,
+ inst->bits2.ia1.src0_address_mode,
+ inst->bits2.ia1.src0_horiz_stride,
+ inst->bits2.ia1.src0_width,
+ inst->bits2.ia1.src0_vert_stride);
+ }
+ }
+ else
+ {
+ if (inst->bits2.da16.src0_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da16 (file,
+ inst->bits1.da16.src0_reg_type,
+ inst->bits1.da16.src0_reg_file,
+ inst->bits2.da16.src0_vert_stride,
+ inst->bits2.da16.src0_reg_nr,
+ inst->bits2.da16.src0_subreg_nr,
+ inst->bits2.da16.src0_abs,
+ inst->bits2.da16.src0_negate,
+ inst->bits2.da16.src0_swz_x,
+ inst->bits2.da16.src0_swz_y,
+ inst->bits2.da16.src0_swz_z,
+ inst->bits2.da16.src0_swz_w);
+ }
+ else
+ {
+ string (file, "Indirect align16 address mode not supported");
+ return 1;
+ }
+ }
+}
+
+static int src1 (FILE *file, struct brw_instruction *inst)
+{
+ if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
+ return imm (file, inst->bits1.da1.src1_reg_type,
+ inst);
+ else if (inst->header.access_mode == BRW_ALIGN_1)
+ {
+ if (inst->bits3.da1.src1_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da1 (file,
+ inst->bits1.da1.src1_reg_type,
+ inst->bits1.da1.src1_reg_file,
+ inst->bits3.da1.src1_vert_stride,
+ inst->bits3.da1.src1_width,
+ inst->bits3.da1.src1_horiz_stride,
+ inst->bits3.da1.src1_reg_nr,
+ inst->bits3.da1.src1_subreg_nr,
+ inst->bits3.da1.src1_abs,
+ inst->bits3.da1.src1_negate);
+ }
+ else
+ {
+ return src_ia1 (file,
+ inst->bits1.ia1.src1_reg_type,
+ inst->bits1.ia1.src1_reg_file,
+ inst->bits3.ia1.src1_indirect_offset,
+ inst->bits3.ia1.src1_subreg_nr,
+ inst->bits3.ia1.src1_negate,
+ inst->bits3.ia1.src1_abs,
+ inst->bits3.ia1.src1_address_mode,
+ inst->bits3.ia1.src1_horiz_stride,
+ inst->bits3.ia1.src1_width,
+ inst->bits3.ia1.src1_vert_stride);
+ }
+ }
+ else
+ {
+ if (inst->bits3.da16.src1_address_mode == BRW_ADDRESS_DIRECT)
+ {
+ return src_da16 (file,
+ inst->bits1.da16.src1_reg_type,
+ inst->bits1.da16.src1_reg_file,
+ inst->bits3.da16.src1_vert_stride,
+ inst->bits3.da16.src1_reg_nr,
+ inst->bits3.da16.src1_subreg_nr,
+ inst->bits3.da16.src1_abs,
+ inst->bits3.da16.src1_negate,
+ inst->bits3.da16.src1_swz_x,
+ inst->bits3.da16.src1_swz_y,
+ inst->bits3.da16.src1_swz_z,
+ inst->bits3.da16.src1_swz_w);
+ }
+ else
+ {
+ string (file, "Indirect align16 address mode not supported");
+ return 1;
+ }
+ }
+}
+
+int brw_disasm (FILE *file, struct brw_instruction *inst)
+{
+ int err = 0;
+ int space = 0;
+
+ if (inst->header.predicate_control) {
+ string (file, "(");
+ err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL);
+ string (file, "f0");
+ if (inst->bits2.da1.flag_reg_nr)
+ format (file, ".%d", inst->bits2.da1.flag_reg_nr);
+ if (inst->header.access_mode == BRW_ALIGN_1)
+ err |= control (file, "predicate control align1", pred_ctrl_align1,
+ inst->header.predicate_control, NULL);
+ else
+ err |= control (file, "predicate control align16", pred_ctrl_align16,
+ inst->header.predicate_control, NULL);
+ string (file, ") ");
+ }
+
+ err |= print_opcode (file, inst->header.opcode);
+ err |= control (file, "saturate", saturate, inst->header.saturate, NULL);
+ err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL);
+
+ if (inst->header.opcode != BRW_OPCODE_SEND)
+ err |= control (file, "conditional modifier", conditional_modifier,
+ inst->header.destreg__conditionalmod, NULL);
+
+ if (inst->header.opcode != BRW_OPCODE_NOP) {
+ string (file, "(");
+ err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL);
+ string (file, ")");
+ }
+
+ if (inst->header.opcode == BRW_OPCODE_SEND)
+ format (file, " %d", inst->header.destreg__conditionalmod);
+
+ if (opcode[inst->header.opcode].ndst > 0) {
+ pad (file, 16);
+ err |= dest (file, inst);
+ }
+ if (opcode[inst->header.opcode].nsrc > 0) {
+ pad (file, 32);
+ err |= src0 (file, inst);
+ }
+ if (opcode[inst->header.opcode].nsrc > 1) {
+ pad (file, 48);
+ err |= src1 (file, inst);
+ }
+
+ if (inst->header.opcode == BRW_OPCODE_SEND) {
+ newline (file);
+ pad (file, 16);
+ space = 0;
+ err |= control (file, "target function", target_function,
+ inst->bits3.generic.msg_target, &space);
+ switch (inst->bits3.generic.msg_target) {
+ case BRW_MESSAGE_TARGET_MATH:
+ err |= control (file, "math function", math_function,
+ inst->bits3.math.function, &space);
+ err |= control (file, "math saturate", math_saturate,
+ inst->bits3.math.saturate, &space);
+ err |= control (file, "math signed", math_signed,
+ inst->bits3.math.int_type, &space);
+ err |= control (file, "math scalar", math_scalar,
+ inst->bits3.math.data_type, &space);
+ err |= control (file, "math precision", math_precision,
+ inst->bits3.math.precision, &space);
+ break;
+ case BRW_MESSAGE_TARGET_SAMPLER:
+ format (file, " (%d, %d, ",
+ inst->bits3.sampler.binding_table_index,
+ inst->bits3.sampler.sampler);
+ err |= control (file, "sampler target format", sampler_target_format,
+ inst->bits3.sampler.return_format, NULL);
+ string (file, ")");
+ break;
+ case BRW_MESSAGE_TARGET_DATAPORT_WRITE:
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.dp_write.binding_table_index,
+ (inst->bits3.dp_write.pixel_scoreboard_clear << 3) |
+ inst->bits3.dp_write.msg_control,
+ inst->bits3.dp_write.msg_type,
+ inst->bits3.dp_write.send_commit_msg);
+ break;
+ case BRW_MESSAGE_TARGET_URB:
+ format (file, " %d", inst->bits3.urb.offset);
+ space = 1;
+ err |= control (file, "urb swizzle", urb_swizzle,
+ inst->bits3.urb.swizzle_control, &space);
+ err |= control (file, "urb allocate", urb_allocate,
+ inst->bits3.urb.allocate, &space);
+ err |= control (file, "urb used", urb_used,
+ inst->bits3.urb.used, &space);
+ err |= control (file, "urb complete", urb_complete,
+ inst->bits3.urb.complete, &space);
+ break;
+ case BRW_MESSAGE_TARGET_THREAD_SPAWNER:
+ break;
+ default:
+ format (file, "unsupported target %d", inst->bits3.generic.msg_target);
+ break;
+ }
+ if (space)
+ string (file, " ");
+ format (file, "mlen %d",
+ inst->bits3.generic.msg_length);
+ format (file, " rlen %d",
+ inst->bits3.generic.response_length);
+ }
+ pad (file, 64);
+ if (inst->header.opcode != BRW_OPCODE_NOP) {
+ string (file, "{");
+ space = 1;
+ err |= control(file, "access mode", access_mode, inst->header.access_mode, &space);
+ err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space);
+ err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space);
+ err |= control (file, "compression control", compr_ctrl, inst->header.compression_control, &space);
+ err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space);
+ if (inst->header.opcode == BRW_OPCODE_SEND)
+ err |= control (file, "end of thread", end_of_thread,
+ inst->bits3.generic.end_of_thread, &space);
+ if (space)
+ string (file, " ");
+ string (file, "}");
+ }
+ string (file, ";");
+ newline (file);
+ return err;
+}
diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c
new file mode 100644
index 00000000000..44bb7bd5882
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_draw.c
@@ -0,0 +1,493 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/state.h"
+#include "main/enums.h"
+#include "tnl/tnl.h"
+#include "vbo/vbo_context.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+
+#include "brw_draw.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_fallback.h"
+
+#include "intel_batchbuffer.h"
+#include "intel_buffer_objects.h"
+
+#define FILE_DEBUG_FLAG DEBUG_BATCH
+
+static GLuint prim_to_hw_prim[GL_POLYGON+1] = {
+ _3DPRIM_POINTLIST,
+ _3DPRIM_LINELIST,
+ _3DPRIM_LINELOOP,
+ _3DPRIM_LINESTRIP,
+ _3DPRIM_TRILIST,
+ _3DPRIM_TRISTRIP,
+ _3DPRIM_TRIFAN,
+ _3DPRIM_QUADLIST,
+ _3DPRIM_QUADSTRIP,
+ _3DPRIM_POLYGON
+};
+
+
+static const GLenum reduced_prim[GL_POLYGON+1] = {
+ GL_POINTS,
+ GL_LINES,
+ GL_LINES,
+ GL_LINES,
+ GL_TRIANGLES,
+ GL_TRIANGLES,
+ GL_TRIANGLES,
+ GL_TRIANGLES,
+ GL_TRIANGLES,
+ GL_TRIANGLES
+};
+
+
+/* When the primitive changes, set a state bit and re-validate. Not
+ * the nicest and would rather deal with this by having all the
+ * programs be immune to the active primitive (ie. cope with all
+ * possibilities). That may not be realistic however.
+ */
+static GLuint brw_set_prim(struct brw_context *brw, GLenum prim)
+{
+ GLcontext *ctx = &brw->intel.ctx;
+
+ if (INTEL_DEBUG & DEBUG_PRIMS)
+ _mesa_printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim));
+
+ /* Slight optimization to avoid the GS program when not needed:
+ */
+ if (prim == GL_QUAD_STRIP &&
+ ctx->Light.ShadeModel != GL_FLAT &&
+ ctx->Polygon.FrontMode == GL_FILL &&
+ ctx->Polygon.BackMode == GL_FILL)
+ prim = GL_TRIANGLE_STRIP;
+
+ if (prim != brw->primitive) {
+ brw->primitive = prim;
+ brw->state.dirty.brw |= BRW_NEW_PRIMITIVE;
+
+ if (reduced_prim[prim] != brw->intel.reduced_primitive) {
+ brw->intel.reduced_primitive = reduced_prim[prim];
+ brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE;
+ }
+ }
+
+ return prim_to_hw_prim[prim];
+}
+
+
+static GLuint trim(GLenum prim, GLuint length)
+{
+ if (prim == GL_QUAD_STRIP)
+ return length > 3 ? (length - length % 2) : 0;
+ else if (prim == GL_QUADS)
+ return length - length % 4;
+ else
+ return length;
+}
+
+
+static void brw_emit_prim(struct brw_context *brw,
+ const struct _mesa_prim *prim,
+ uint32_t hw_prim)
+{
+ struct brw_3d_primitive prim_packet;
+ struct intel_context *intel = &brw->intel;
+
+ if (INTEL_DEBUG & DEBUG_PRIMS)
+ _mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
+ prim->start, prim->count);
+
+ prim_packet.header.opcode = CMD_3D_PRIM;
+ prim_packet.header.length = sizeof(prim_packet)/4 - 2;
+ prim_packet.header.pad = 0;
+ prim_packet.header.topology = hw_prim;
+ prim_packet.header.indexed = prim->indexed;
+
+ prim_packet.verts_per_instance = trim(prim->mode, prim->count);
+ prim_packet.start_vert_location = prim->start;
+ if (prim->indexed)
+ prim_packet.start_vert_location += brw->ib.start_vertex_offset;
+ prim_packet.instance_count = 1;
+ prim_packet.start_instance_location = 0;
+ prim_packet.base_vert_location = prim->basevertex;
+
+ /* Can't wrap here, since we rely on the validated state. */
+ brw->no_batch_wrap = GL_TRUE;
+
+ /* If we're set to always flush, do it before and after the primitive emit.
+ * We want to catch both missed flushes that hurt instruction/state cache
+ * and missed flushes of the render cache as it heads to other parts of
+ * the besides the draw code.
+ */
+ if (intel->always_flush_cache) {
+ BEGIN_BATCH(1, IGNORE_CLIPRECTS);
+ OUT_BATCH(intel->vtbl.flush_cmd());
+ ADVANCE_BATCH();
+ }
+ if (prim_packet.verts_per_instance) {
+ intel_batchbuffer_data( brw->intel.batch, &prim_packet,
+ sizeof(prim_packet), LOOP_CLIPRECTS);
+ }
+ if (intel->always_flush_cache) {
+ BEGIN_BATCH(1, IGNORE_CLIPRECTS);
+ OUT_BATCH(intel->vtbl.flush_cmd());
+ ADVANCE_BATCH();
+ }
+
+ brw->no_batch_wrap = GL_FALSE;
+}
+
+static void brw_merge_inputs( struct brw_context *brw,
+ const struct gl_client_array *arrays[])
+{
+ struct brw_vertex_info old = brw->vb.info;
+ GLuint i;
+
+ for (i = 0; i < VERT_ATTRIB_MAX; i++)
+ dri_bo_unreference(brw->vb.inputs[i].bo);
+
+ memset(&brw->vb.inputs, 0, sizeof(brw->vb.inputs));
+ memset(&brw->vb.info, 0, sizeof(brw->vb.info));
+
+ for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+ brw->vb.inputs[i].glarray = arrays[i];
+ brw->vb.inputs[i].attrib = (gl_vert_attrib) i;
+
+ if (arrays[i]->StrideB != 0)
+ brw->vb.info.sizes[i/16] |= (brw->vb.inputs[i].glarray->Size - 1) <<
+ ((i%16) * 2);
+ }
+
+ /* Raise statechanges if input sizes have changed. */
+ if (memcmp(brw->vb.info.sizes, old.sizes, sizeof(old.sizes)) != 0)
+ brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS;
+}
+
+/* XXX: could split the primitive list to fallback only on the
+ * non-conformant primitives.
+ */
+static GLboolean check_fallbacks( struct brw_context *brw,
+ const struct _mesa_prim *prim,
+ GLuint nr_prims )
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ GLuint i;
+
+ /* If we don't require strict OpenGL conformance, never
+ * use fallbacks. If we're forcing fallbacks, always
+ * use fallfacks.
+ */
+ if (brw->intel.conformance_mode == 0)
+ return GL_FALSE;
+
+ if (brw->intel.conformance_mode == 2)
+ return GL_TRUE;
+
+ if (ctx->Polygon.SmoothFlag) {
+ for (i = 0; i < nr_prims; i++)
+ if (reduced_prim[prim[i].mode] == GL_TRIANGLES)
+ return GL_TRUE;
+ }
+
+ /* BRW hardware will do AA lines, but they are non-conformant it
+ * seems. TBD whether we keep this fallback:
+ */
+ if (ctx->Line.SmoothFlag) {
+ for (i = 0; i < nr_prims; i++)
+ if (reduced_prim[prim[i].mode] == GL_LINES)
+ return GL_TRUE;
+ }
+
+ /* Stipple -- these fallbacks could be resolved with a little
+ * bit of work?
+ */
+ if (ctx->Line.StippleFlag) {
+ for (i = 0; i < nr_prims; i++) {
+ /* GS doesn't get enough information to know when to reset
+ * the stipple counter?!?
+ */
+ if (prim[i].mode == GL_LINE_LOOP || prim[i].mode == GL_LINE_STRIP)
+ return GL_TRUE;
+
+ if (prim[i].mode == GL_POLYGON &&
+ (ctx->Polygon.FrontMode == GL_LINE ||
+ ctx->Polygon.BackMode == GL_LINE))
+ return GL_TRUE;
+ }
+ }
+
+ if (ctx->Point.SmoothFlag) {
+ for (i = 0; i < nr_prims; i++)
+ if (prim[i].mode == GL_POINTS)
+ return GL_TRUE;
+ }
+
+ /* BRW hardware doesn't handle GL_CLAMP texturing correctly;
+ * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP
+ * as GL_CLAMP_TO_EDGE instead. If we're using GL_CLAMP, and
+ * we want strict conformance, force the fallback.
+ * Right now, we only do this for 2D textures.
+ */
+ {
+ int u;
+ for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
+ struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u];
+ if (texUnit->Enabled) {
+ if (texUnit->Enabled & TEXTURE_1D_BIT) {
+ if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->WrapS == GL_CLAMP) {
+ return GL_TRUE;
+ }
+ }
+ if (texUnit->Enabled & TEXTURE_2D_BIT) {
+ if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapS == GL_CLAMP ||
+ texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapT == GL_CLAMP) {
+ return GL_TRUE;
+ }
+ }
+ if (texUnit->Enabled & TEXTURE_3D_BIT) {
+ if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapS == GL_CLAMP ||
+ texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapT == GL_CLAMP ||
+ texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapR == GL_CLAMP) {
+ return GL_TRUE;
+ }
+ }
+ }
+ }
+ }
+
+ /* Nothing stopping us from the fast path now */
+ return GL_FALSE;
+}
+
+/* May fail if out of video memory for texture or vbo upload, or on
+ * fallback conditions.
+ */
+static GLboolean brw_try_draw_prims( GLcontext *ctx,
+ const struct gl_client_array *arrays[],
+ const struct _mesa_prim *prim,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLuint min_index,
+ GLuint max_index )
+{
+ struct intel_context *intel = intel_context(ctx);
+ struct brw_context *brw = brw_context(ctx);
+ GLboolean retval = GL_FALSE;
+ GLboolean warn = GL_FALSE;
+ GLboolean first_time = GL_TRUE;
+ GLuint i;
+
+ if (ctx->NewState)
+ _mesa_update_state( ctx );
+
+ /* We have to validate the textures *before* checking for fallbacks;
+ * otherwise, the software fallback won't be able to rely on the
+ * texture state, the firstLevel and lastLevel fields won't be
+ * set in the intel texture object (they'll both be 0), and the
+ * software fallback will segfault if it attempts to access any
+ * texture level other than level 0.
+ */
+ brw_validate_textures( brw );
+
+ if (check_fallbacks(brw, prim, nr_prims))
+ return GL_FALSE;
+
+ /* Bind all inputs, derive varying and size information:
+ */
+ brw_merge_inputs( brw, arrays );
+
+ brw->ib.ib = ib;
+ brw->state.dirty.brw |= BRW_NEW_INDICES;
+
+ brw->vb.min_index = min_index;
+ brw->vb.max_index = max_index;
+ brw->state.dirty.brw |= BRW_NEW_VERTICES;
+
+ /* Have to validate state quite late. Will rebuild tnl_program,
+ * which depends on varying information.
+ *
+ * Note this is where brw->vs->prog_data.inputs_read is calculated,
+ * so can't access it earlier.
+ */
+
+ LOCK_HARDWARE(intel);
+
+ if (!intel->constant_cliprect && intel->driDrawable->numClipRects == 0) {
+ UNLOCK_HARDWARE(intel);
+ return GL_TRUE;
+ }
+
+ for (i = 0; i < nr_prims; i++) {
+ uint32_t hw_prim;
+
+ /* Flush the batch if it's approaching full, so that we don't wrap while
+ * we've got validated state that needs to be in the same batch as the
+ * primitives. This fraction is just a guess (minimal full state plus
+ * a primitive is around 512 bytes), and would be better if we had
+ * an upper bound of how much we might emit in a single
+ * brw_try_draw_prims().
+ */
+ intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4,
+ LOOP_CLIPRECTS);
+
+ hw_prim = brw_set_prim(brw, prim[i].mode);
+
+ if (first_time || (brw->state.dirty.brw & BRW_NEW_PRIMITIVE)) {
+ first_time = GL_FALSE;
+
+ brw_validate_state(brw);
+
+ /* Various fallback checks: */
+ if (brw->intel.Fallback)
+ goto out;
+
+ /* Check that we can fit our state in with our existing batchbuffer, or
+ * flush otherwise.
+ */
+ if (dri_bufmgr_check_aperture_space(brw->state.validated_bos,
+ brw->state.validated_bo_count)) {
+ static GLboolean warned;
+ intel_batchbuffer_flush(intel->batch);
+
+ /* Validate the state after we flushed the batch (which would have
+ * changed the set of dirty state). If we still fail to
+ * check_aperture, warn of what's happening, but attempt to continue
+ * on since it may succeed anyway, and the user would probably rather
+ * see a failure and a warning than a fallback.
+ */
+ brw_validate_state(brw);
+ if (!warned &&
+ dri_bufmgr_check_aperture_space(brw->state.validated_bos,
+ brw->state.validated_bo_count)) {
+ warn = GL_TRUE;
+ warned = GL_TRUE;
+ }
+ }
+
+ brw_upload_state(brw);
+ }
+
+ brw_emit_prim(brw, &prim[i], hw_prim);
+
+ retval = GL_TRUE;
+ }
+
+ if (intel->always_flush_batch)
+ intel_batchbuffer_flush(intel->batch);
+ out:
+ UNLOCK_HARDWARE(intel);
+
+ brw_state_cache_check_size(brw);
+
+ if (warn)
+ fprintf(stderr, "i965: Single primitive emit potentially exceeded "
+ "available aperture space\n");
+
+ if (!retval)
+ DBG("%s failed\n", __FUNCTION__);
+
+ return retval;
+}
+
+void brw_draw_prims( GLcontext *ctx,
+ const struct gl_client_array *arrays[],
+ const struct _mesa_prim *prim,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLboolean index_bounds_valid,
+ GLuint min_index,
+ GLuint max_index )
+{
+ GLboolean retval;
+
+ if (!vbo_all_varyings_in_vbos(arrays)) {
+ if (!index_bounds_valid)
+ vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
+
+ /* Decide if we want to rebase. If so we end up recursing once
+ * only into this function.
+ */
+ if (min_index != 0) {
+ vbo_rebase_prims(ctx, arrays,
+ prim, nr_prims,
+ ib, min_index, max_index,
+ brw_draw_prims );
+ return;
+ }
+ }
+
+ /* Make a first attempt at drawing:
+ */
+ retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+
+ /* Otherwise, we really are out of memory. Pass the drawing
+ * command to the software tnl module and which will in turn call
+ * swrast to do the drawing.
+ */
+ if (!retval) {
+ _swsetup_Wakeup(ctx);
+ _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+ }
+
+}
+
+void brw_draw_init( struct brw_context *brw )
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ struct vbo_context *vbo = vbo_context(ctx);
+
+ /* Register our drawing function:
+ */
+ vbo->draw_prims = brw_draw_prims;
+}
+
+void brw_draw_destroy( struct brw_context *brw )
+{
+ int i;
+
+ if (brw->vb.upload.bo != NULL) {
+ dri_bo_unreference(brw->vb.upload.bo);
+ brw->vb.upload.bo = NULL;
+ }
+
+ for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+ dri_bo_unreference(brw->vb.inputs[i].bo);
+ brw->vb.inputs[i].bo = NULL;
+ }
+
+ dri_bo_unreference(brw->ib.bo);
+ brw->ib.bo = NULL;
+}
diff --git a/src/gallium/drivers/i965/brw_draw.h b/src/gallium/drivers/i965/brw_draw.h
new file mode 100644
index 00000000000..2a14db217fc
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_draw.h
@@ -0,0 +1,54 @@
+ /**************************************************************************
+ *
+ * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef BRW_DRAW_H
+#define BRW_DRAW_H
+
+#include "main/mtypes.h" /* for GLcontext... */
+#include "vbo/vbo.h"
+
+struct brw_context;
+
+
+void brw_draw_prims( GLcontext *ctx,
+ const struct gl_client_array *arrays[],
+ const struct _mesa_prim *prims,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLboolean index_bounds_valid,
+ GLuint min_index,
+ GLuint max_index );
+
+void brw_draw_init( struct brw_context *brw );
+void brw_draw_destroy( struct brw_context *brw );
+
+/* brw_draw_current.c
+ */
+void brw_init_current_values(GLcontext *ctx,
+ struct gl_client_array *arrays);
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c
new file mode 100644
index 00000000000..a3ff6c58d89
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_draw_upload.c
@@ -0,0 +1,742 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "main/glheader.h"
+#include "main/bufferobj.h"
+#include "main/context.h"
+#include "main/state.h"
+#include "main/api_validate.h"
+#include "main/enums.h"
+
+#include "brw_draw.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_fallback.h"
+
+#include "intel_batchbuffer.h"
+#include "intel_buffer_objects.h"
+#include "intel_tex.h"
+
+static GLuint double_types[5] = {
+ 0,
+ BRW_SURFACEFORMAT_R64_FLOAT,
+ BRW_SURFACEFORMAT_R64G64_FLOAT,
+ BRW_SURFACEFORMAT_R64G64B64_FLOAT,
+ BRW_SURFACEFORMAT_R64G64B64A64_FLOAT
+};
+
+static GLuint float_types[5] = {
+ 0,
+ BRW_SURFACEFORMAT_R32_FLOAT,
+ BRW_SURFACEFORMAT_R32G32_FLOAT,
+ BRW_SURFACEFORMAT_R32G32B32_FLOAT,
+ BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
+};
+
+static GLuint uint_types_norm[5] = {
+ 0,
+ BRW_SURFACEFORMAT_R32_UNORM,
+ BRW_SURFACEFORMAT_R32G32_UNORM,
+ BRW_SURFACEFORMAT_R32G32B32_UNORM,
+ BRW_SURFACEFORMAT_R32G32B32A32_UNORM
+};
+
+static GLuint uint_types_scale[5] = {
+ 0,
+ BRW_SURFACEFORMAT_R32_USCALED,
+ BRW_SURFACEFORMAT_R32G32_USCALED,
+ BRW_SURFACEFORMAT_R32G32B32_USCALED,
+ BRW_SURFACEFORMAT_R32G32B32A32_USCALED
+};
+
+static GLuint int_types_norm[5] = {
+ 0,
+ BRW_SURFACEFORMAT_R32_SNORM,
+ BRW_SURFACEFORMAT_R32G32_SNORM,
+ BRW_SURFACEFORMAT_R32G32B32_SNORM,
+ BRW_SURFACEFORMAT_R32G32B32A32_SNORM
+};
+
+static GLuint int_types_scale[5] = {
+ 0,
+ BRW_SURFACEFORMAT_R32_SSCALED,
+ BRW_SURFACEFORMAT_R32G32_SSCALED,
+ BRW_SURFACEFORMAT_R32G32B32_SSCALED,
+ BRW_SURFACEFORMAT_R32G32B32A32_SSCALED
+};
+
+static GLuint ushort_types_norm[5] = {
+ 0,
+ BRW_SURFACEFORMAT_R16_UNORM,
+ BRW_SURFACEFORMAT_R16G16_UNORM,
+ BRW_SURFACEFORMAT_R16G16B16_UNORM,
+ BRW_SURFACEFORMAT_R16G16B16A16_UNORM
+};
+
+static GLuint ushort_types_scale[5] = {
+ 0,
+ BRW_SURFACEFORMAT_R16_USCALED,
+ BRW_SURFACEFORMAT_R16G16_USCALED,
+ BRW_SURFACEFORMAT_R16G16B16_USCALED,
+ BRW_SURFACEFORMAT_R16G16B16A16_USCALED
+};
+
+static GLuint short_types_norm[5] = {
+ 0,
+ BRW_SURFACEFORMAT_R16_SNORM,
+ BRW_SURFACEFORMAT_R16G16_SNORM,
+ BRW_SURFACEFORMAT_R16G16B16_SNORM,
+ BRW_SURFACEFORMAT_R16G16B16A16_SNORM
+};
+
+static GLuint short_types_scale[5] = {
+ 0,
+ BRW_SURFACEFORMAT_R16_SSCALED,
+ BRW_SURFACEFORMAT_R16G16_SSCALED,
+ BRW_SURFACEFORMAT_R16G16B16_SSCALED,
+ BRW_SURFACEFORMAT_R16G16B16A16_SSCALED
+};
+
+static GLuint ubyte_types_norm[5] = {
+ 0,
+ BRW_SURFACEFORMAT_R8_UNORM,
+ BRW_SURFACEFORMAT_R8G8_UNORM,
+ BRW_SURFACEFORMAT_R8G8B8_UNORM,
+ BRW_SURFACEFORMAT_R8G8B8A8_UNORM
+};
+
+static GLuint ubyte_types_scale[5] = {
+ 0,
+ BRW_SURFACEFORMAT_R8_USCALED,
+ BRW_SURFACEFORMAT_R8G8_USCALED,
+ BRW_SURFACEFORMAT_R8G8B8_USCALED,
+ BRW_SURFACEFORMAT_R8G8B8A8_USCALED
+};
+
+static GLuint byte_types_norm[5] = {
+ 0,
+ BRW_SURFACEFORMAT_R8_SNORM,
+ BRW_SURFACEFORMAT_R8G8_SNORM,
+ BRW_SURFACEFORMAT_R8G8B8_SNORM,
+ BRW_SURFACEFORMAT_R8G8B8A8_SNORM
+};
+
+static GLuint byte_types_scale[5] = {
+ 0,
+ BRW_SURFACEFORMAT_R8_SSCALED,
+ BRW_SURFACEFORMAT_R8G8_SSCALED,
+ BRW_SURFACEFORMAT_R8G8B8_SSCALED,
+ BRW_SURFACEFORMAT_R8G8B8A8_SSCALED
+};
+
+
+/**
+ * Given vertex array type/size/format/normalized info, return
+ * the appopriate hardware surface type.
+ * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays.
+ */
+static GLuint get_surface_type( GLenum type, GLuint size,
+ GLenum format, GLboolean normalized )
+{
+ if (INTEL_DEBUG & DEBUG_VERTS)
+ _mesa_printf("type %s size %d normalized %d\n",
+ _mesa_lookup_enum_by_nr(type), size, normalized);
+
+ if (normalized) {
+ switch (type) {
+ case GL_DOUBLE: return double_types[size];
+ case GL_FLOAT: return float_types[size];
+ case GL_INT: return int_types_norm[size];
+ case GL_SHORT: return short_types_norm[size];
+ case GL_BYTE: return byte_types_norm[size];
+ case GL_UNSIGNED_INT: return uint_types_norm[size];
+ case GL_UNSIGNED_SHORT: return ushort_types_norm[size];
+ case GL_UNSIGNED_BYTE:
+ if (format == GL_BGRA) {
+ /* See GL_EXT_vertex_array_bgra */
+ assert(size == 4);
+ return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ }
+ else {
+ return ubyte_types_norm[size];
+ }
+ default: assert(0); return 0;
+ }
+ }
+ else {
+ assert(format == GL_RGBA); /* sanity check */
+ switch (type) {
+ case GL_DOUBLE: return double_types[size];
+ case GL_FLOAT: return float_types[size];
+ case GL_INT: return int_types_scale[size];
+ case GL_SHORT: return short_types_scale[size];
+ case GL_BYTE: return byte_types_scale[size];
+ case GL_UNSIGNED_INT: return uint_types_scale[size];
+ case GL_UNSIGNED_SHORT: return ushort_types_scale[size];
+ case GL_UNSIGNED_BYTE: return ubyte_types_scale[size];
+ default: assert(0); return 0;
+ }
+ }
+}
+
+
+static GLuint get_size( GLenum type )
+{
+ switch (type) {
+ case GL_DOUBLE: return sizeof(GLdouble);
+ case GL_FLOAT: return sizeof(GLfloat);
+ case GL_INT: return sizeof(GLint);
+ case GL_SHORT: return sizeof(GLshort);
+ case GL_BYTE: return sizeof(GLbyte);
+ case GL_UNSIGNED_INT: return sizeof(GLuint);
+ case GL_UNSIGNED_SHORT: return sizeof(GLushort);
+ case GL_UNSIGNED_BYTE: return sizeof(GLubyte);
+ default: return 0;
+ }
+}
+
+static GLuint get_index_type(GLenum type)
+{
+ switch (type) {
+ case GL_UNSIGNED_BYTE: return BRW_INDEX_BYTE;
+ case GL_UNSIGNED_SHORT: return BRW_INDEX_WORD;
+ case GL_UNSIGNED_INT: return BRW_INDEX_DWORD;
+ default: assert(0); return 0;
+ }
+}
+
+static void wrap_buffers( struct brw_context *brw,
+ GLuint size )
+{
+ if (size < BRW_UPLOAD_INIT_SIZE)
+ size = BRW_UPLOAD_INIT_SIZE;
+
+ brw->vb.upload.offset = 0;
+
+ if (brw->vb.upload.bo != NULL)
+ dri_bo_unreference(brw->vb.upload.bo);
+ brw->vb.upload.bo = dri_bo_alloc(brw->intel.bufmgr, "temporary VBO",
+ size, 1);
+
+ /* Set the internal VBO\ to no-backing-store. We only use them as a
+ * temporary within a brw_try_draw_prims while the lock is held.
+ */
+ /* DON'T DO THIS AS IF WE HAVE TO RE-ORG MEMORY WE NEED SOMEWHERE WITH
+ FAKE TO PUSH THIS STUFF */
+// if (!brw->intel.ttm)
+// dri_bo_fake_disable_backing_store(brw->vb.upload.bo, NULL, NULL);
+}
+
+static void get_space( struct brw_context *brw,
+ GLuint size,
+ dri_bo **bo_return,
+ GLuint *offset_return )
+{
+ size = ALIGN(size, 64);
+
+ if (brw->vb.upload.bo == NULL ||
+ brw->vb.upload.offset + size > brw->vb.upload.bo->size) {
+ wrap_buffers(brw, size);
+ }
+
+ assert(*bo_return == NULL);
+ dri_bo_reference(brw->vb.upload.bo);
+ *bo_return = brw->vb.upload.bo;
+ *offset_return = brw->vb.upload.offset;
+ brw->vb.upload.offset += size;
+}
+
+static void
+copy_array_to_vbo_array( struct brw_context *brw,
+ struct brw_vertex_element *element,
+ GLuint dst_stride)
+{
+ struct intel_context *intel = &brw->intel;
+ GLuint size = element->count * dst_stride;
+
+ get_space(brw, size, &element->bo, &element->offset);
+
+ if (element->glarray->StrideB == 0) {
+ assert(element->count == 1);
+ element->stride = 0;
+ } else {
+ element->stride = dst_stride;
+ }
+
+ if (dst_stride == element->glarray->StrideB) {
+ if (intel->intelScreen->kernel_exec_fencing) {
+ drm_intel_gem_bo_map_gtt(element->bo);
+ memcpy((char *)element->bo->virtual + element->offset,
+ element->glarray->Ptr, size);
+ drm_intel_gem_bo_unmap_gtt(element->bo);
+ } else {
+ dri_bo_subdata(element->bo,
+ element->offset,
+ size,
+ element->glarray->Ptr);
+ }
+ } else {
+ char *dest;
+ const unsigned char *src = element->glarray->Ptr;
+ int i;
+
+ if (intel->intelScreen->kernel_exec_fencing) {
+ drm_intel_gem_bo_map_gtt(element->bo);
+ dest = element->bo->virtual;
+ dest += element->offset;
+
+ for (i = 0; i < element->count; i++) {
+ memcpy(dest, src, dst_stride);
+ src += element->glarray->StrideB;
+ dest += dst_stride;
+ }
+
+ drm_intel_gem_bo_unmap_gtt(element->bo);
+ } else {
+ void *data;
+
+ data = _mesa_malloc(dst_stride * element->count);
+ dest = data;
+ for (i = 0; i < element->count; i++) {
+ memcpy(dest, src, dst_stride);
+ src += element->glarray->StrideB;
+ dest += dst_stride;
+ }
+
+ dri_bo_subdata(element->bo,
+ element->offset,
+ size,
+ data);
+
+ _mesa_free(data);
+ }
+ }
+}
+
+static void brw_prepare_vertices(struct brw_context *brw)
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ struct intel_context *intel = intel_context(ctx);
+ GLbitfield vs_inputs = brw->vs.prog_data->inputs_read;
+ GLuint i;
+ const unsigned char *ptr = NULL;
+ GLuint interleave = 0;
+ unsigned int min_index = brw->vb.min_index;
+ unsigned int max_index = brw->vb.max_index;
+
+ struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
+ GLuint nr_uploads = 0;
+
+ /* First build an array of pointers to ve's in vb.inputs_read
+ */
+ if (0)
+ _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
+
+ /* Accumulate the list of enabled arrays. */
+ brw->vb.nr_enabled = 0;
+ while (vs_inputs) {
+ GLuint i = _mesa_ffsll(vs_inputs) - 1;
+ struct brw_vertex_element *input = &brw->vb.inputs[i];
+
+ vs_inputs &= ~(1 << i);
+ brw->vb.enabled[brw->vb.nr_enabled++] = input;
+ }
+
+ /* XXX: In the rare cases where this happens we fallback all
+ * the way to software rasterization, although a tnl fallback
+ * would be sufficient. I don't know of *any* real world
+ * cases with > 17 vertex attributes enabled, so it probably
+ * isn't an issue at this point.
+ */
+ if (brw->vb.nr_enabled >= BRW_VEP_MAX) {
+ intel->Fallback = 1;
+ return;
+ }
+
+ for (i = 0; i < brw->vb.nr_enabled; i++) {
+ struct brw_vertex_element *input = brw->vb.enabled[i];
+
+ input->element_size = get_size(input->glarray->Type) * input->glarray->Size;
+
+ if (_mesa_is_bufferobj(input->glarray->BufferObj)) {
+ struct intel_buffer_object *intel_buffer =
+ intel_buffer_object(input->glarray->BufferObj);
+
+ /* Named buffer object: Just reference its contents directly. */
+ dri_bo_unreference(input->bo);
+ input->bo = intel_bufferobj_buffer(intel, intel_buffer,
+ INTEL_READ);
+ dri_bo_reference(input->bo);
+ input->offset = (unsigned long)input->glarray->Ptr;
+ input->stride = input->glarray->StrideB;
+ input->count = input->glarray->_MaxElement;
+
+ /* This is a common place to reach if the user mistakenly supplies
+ * a pointer in place of a VBO offset. If we just let it go through,
+ * we may end up dereferencing a pointer beyond the bounds of the
+ * GTT. We would hope that the VBO's max_index would save us, but
+ * Mesa appears to hand us min/max values not clipped to the
+ * array object's _MaxElement, and _MaxElement frequently appears
+ * to be wrong anyway.
+ *
+ * The VBO spec allows application termination in this case, and it's
+ * probably a service to the poor programmer to do so rather than
+ * trying to just not render.
+ */
+ assert(input->offset < input->bo->size);
+ } else {
+ input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1;
+ if (input->bo != NULL) {
+ /* Already-uploaded vertex data is present from a previous
+ * prepare_vertices, but we had to re-validate state due to
+ * check_aperture failing and a new batch being produced.
+ */
+ continue;
+ }
+
+ /* Queue the buffer object up to be uploaded in the next pass,
+ * when we've decided if we're doing interleaved or not.
+ */
+ if (input->attrib == VERT_ATTRIB_POS) {
+ /* Position array not properly enabled:
+ */
+ if (input->glarray->StrideB == 0) {
+ intel->Fallback = 1;
+ return;
+ }
+
+ interleave = input->glarray->StrideB;
+ ptr = input->glarray->Ptr;
+ }
+ else if (interleave != input->glarray->StrideB ||
+ (const unsigned char *)input->glarray->Ptr - ptr < 0 ||
+ (const unsigned char *)input->glarray->Ptr - ptr > interleave)
+ {
+ interleave = 0;
+ }
+
+ upload[nr_uploads++] = input;
+
+ /* We rebase drawing to start at element zero only when
+ * varyings are not in vbos, which means we can end up
+ * uploading non-varying arrays (stride != 0) when min_index
+ * is zero. This doesn't matter as the amount to upload is
+ * the same for these arrays whether the draw call is rebased
+ * or not - we just have to upload the one element.
+ */
+ assert(min_index == 0 || input->glarray->StrideB == 0);
+ }
+ }
+
+ /* Handle any arrays to be uploaded. */
+ if (nr_uploads > 1 && interleave && interleave <= 256) {
+ /* All uploads are interleaved, so upload the arrays together as
+ * interleaved. First, upload the contents and set up upload[0].
+ */
+ copy_array_to_vbo_array(brw, upload[0], interleave);
+
+ for (i = 1; i < nr_uploads; i++) {
+ /* Then, just point upload[i] at upload[0]'s buffer. */
+ upload[i]->stride = interleave;
+ upload[i]->offset = upload[0]->offset +
+ ((const unsigned char *)upload[i]->glarray->Ptr - ptr);
+ upload[i]->bo = upload[0]->bo;
+ dri_bo_reference(upload[i]->bo);
+ }
+ }
+ else {
+ /* Upload non-interleaved arrays */
+ for (i = 0; i < nr_uploads; i++) {
+ copy_array_to_vbo_array(brw, upload[i], upload[i]->element_size);
+ }
+ }
+
+ brw_prepare_query_begin(brw);
+
+ for (i = 0; i < brw->vb.nr_enabled; i++) {
+ struct brw_vertex_element *input = brw->vb.enabled[i];
+
+ brw_add_validated_bo(brw, input->bo);
+ }
+}
+
+static void brw_emit_vertices(struct brw_context *brw)
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ struct intel_context *intel = intel_context(ctx);
+ GLuint i;
+
+ brw_emit_query_begin(brw);
+
+ /* If the VS doesn't read any inputs (calculating vertex position from
+ * a state variable for some reason, for example), emit a single pad
+ * VERTEX_ELEMENT struct and bail.
+ *
+ * The stale VB state stays in place, but they don't do anything unless
+ * a VE loads from them.
+ */
+ if (brw->vb.nr_enabled == 0) {
+ BEGIN_BATCH(3, IGNORE_CLIPRECTS);
+ OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1);
+ OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
+ BRW_VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
+ (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+ OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
+ (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+ (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+ (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
+ ADVANCE_BATCH();
+ return;
+ }
+
+ /* Now emit VB and VEP state packets.
+ *
+ * This still defines a hardware VB for each input, even if they
+ * are interleaved or from the same VBO. TBD if this makes a
+ * performance difference.
+ */
+ BEGIN_BATCH(1 + brw->vb.nr_enabled * 4, IGNORE_CLIPRECTS);
+ OUT_BATCH((CMD_VERTEX_BUFFER << 16) |
+ ((1 + brw->vb.nr_enabled * 4) - 2));
+
+ for (i = 0; i < brw->vb.nr_enabled; i++) {
+ struct brw_vertex_element *input = brw->vb.enabled[i];
+
+ OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) |
+ BRW_VB0_ACCESS_VERTEXDATA |
+ (input->stride << BRW_VB0_PITCH_SHIFT));
+ OUT_RELOC(input->bo,
+ I915_GEM_DOMAIN_VERTEX, 0,
+ input->offset);
+ if (BRW_IS_IGDNG(brw)) {
+ if (input->stride) {
+ OUT_RELOC(input->bo,
+ I915_GEM_DOMAIN_VERTEX, 0,
+ input->offset + input->stride * input->count - 1);
+ } else {
+ assert(input->count == 1);
+ OUT_RELOC(input->bo,
+ I915_GEM_DOMAIN_VERTEX, 0,
+ input->offset + input->element_size - 1);
+ }
+ } else
+ OUT_BATCH(input->stride ? input->count : 0);
+ OUT_BATCH(0); /* Instance data step rate */
+ }
+ ADVANCE_BATCH();
+
+ BEGIN_BATCH(1 + brw->vb.nr_enabled * 2, IGNORE_CLIPRECTS);
+ OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_enabled * 2) - 2));
+ for (i = 0; i < brw->vb.nr_enabled; i++) {
+ struct brw_vertex_element *input = brw->vb.enabled[i];
+ uint32_t format = get_surface_type(input->glarray->Type,
+ input->glarray->Size,
+ input->glarray->Format,
+ input->glarray->Normalized);
+ uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
+ uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
+ uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
+ uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;
+
+ switch (input->glarray->Size) {
+ case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
+ case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
+ case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
+ case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT;
+ break;
+ }
+
+ OUT_BATCH((i << BRW_VE0_INDEX_SHIFT) |
+ BRW_VE0_VALID |
+ (format << BRW_VE0_FORMAT_SHIFT) |
+ (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+
+ if (BRW_IS_IGDNG(brw))
+ OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
+ (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
+ (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
+ (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
+ else
+ OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
+ (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
+ (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
+ (comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
+ ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
+ }
+ ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state brw_vertices = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_BATCH | BRW_NEW_VERTICES,
+ .cache = 0,
+ },
+ .prepare = brw_prepare_vertices,
+ .emit = brw_emit_vertices,
+};
+
+static void brw_prepare_indices(struct brw_context *brw)
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ struct intel_context *intel = &brw->intel;
+ const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
+ GLuint ib_size;
+ dri_bo *bo = NULL;
+ struct gl_buffer_object *bufferobj;
+ GLuint offset;
+ GLuint ib_type_size;
+
+ if (index_buffer == NULL)
+ return;
+
+ ib_type_size = get_size(index_buffer->type);
+ ib_size = ib_type_size * index_buffer->count;
+ bufferobj = index_buffer->obj;;
+
+ /* Turn into a proper VBO:
+ */
+ if (!_mesa_is_bufferobj(bufferobj)) {
+ brw->ib.start_vertex_offset = 0;
+
+ /* Get new bufferobj, offset:
+ */
+ get_space(brw, ib_size, &bo, &offset);
+
+ /* Straight upload
+ */
+ if (intel->intelScreen->kernel_exec_fencing) {
+ drm_intel_gem_bo_map_gtt(bo);
+ memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size);
+ drm_intel_gem_bo_unmap_gtt(bo);
+ } else {
+ dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr);
+ }
+ } else {
+ offset = (GLuint) (unsigned long) index_buffer->ptr;
+ brw->ib.start_vertex_offset = 0;
+
+ /* If the index buffer isn't aligned to its element size, we have to
+ * rebase it into a temporary.
+ */
+ if ((get_size(index_buffer->type) - 1) & offset) {
+ GLubyte *map = ctx->Driver.MapBuffer(ctx,
+ GL_ELEMENT_ARRAY_BUFFER_ARB,
+ GL_DYNAMIC_DRAW_ARB,
+ bufferobj);
+ map += offset;
+
+ get_space(brw, ib_size, &bo, &offset);
+
+ dri_bo_subdata(bo, offset, ib_size, map);
+
+ ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj);
+ } else {
+ bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj),
+ INTEL_READ);
+ dri_bo_reference(bo);
+
+ /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading
+ * the index buffer state when we're just moving the start index
+ * of our drawing.
+ */
+ brw->ib.start_vertex_offset = offset / ib_type_size;
+ offset = 0;
+ ib_size = bo->size;
+ }
+ }
+
+ if (brw->ib.bo != bo ||
+ brw->ib.offset != offset ||
+ brw->ib.size != ib_size)
+ {
+ drm_intel_bo_unreference(brw->ib.bo);
+ brw->ib.bo = bo;
+ brw->ib.offset = offset;
+ brw->ib.size = ib_size;
+
+ brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER;
+ } else {
+ drm_intel_bo_unreference(bo);
+ }
+
+ brw_add_validated_bo(brw, brw->ib.bo);
+}
+
+const struct brw_tracked_state brw_indices = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_INDICES,
+ .cache = 0,
+ },
+ .prepare = brw_prepare_indices,
+};
+
+static void brw_emit_index_buffer(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
+
+ if (index_buffer == NULL)
+ return;
+
+ /* Emit the indexbuffer packet:
+ */
+ {
+ struct brw_indexbuffer ib;
+
+ memset(&ib, 0, sizeof(ib));
+
+ ib.header.bits.opcode = CMD_INDEX_BUFFER;
+ ib.header.bits.length = sizeof(ib)/4 - 2;
+ ib.header.bits.index_format = get_index_type(index_buffer->type);
+ ib.header.bits.cut_index_enable = 0;
+
+ BEGIN_BATCH(4, IGNORE_CLIPRECTS);
+ OUT_BATCH( ib.header.dword );
+ OUT_RELOC(brw->ib.bo,
+ I915_GEM_DOMAIN_VERTEX, 0,
+ brw->ib.offset);
+ OUT_RELOC(brw->ib.bo,
+ I915_GEM_DOMAIN_VERTEX, 0,
+ brw->ib.offset + brw->ib.size - 1);
+ OUT_BATCH( 0 );
+ ADVANCE_BATCH();
+ }
+}
+
+const struct brw_tracked_state brw_index_buffer = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_BATCH | BRW_NEW_INDEX_BUFFER,
+ .cache = 0,
+ },
+ .emit = brw_emit_index_buffer,
+};
diff --git a/src/gallium/drivers/i965/brw_eu.c b/src/gallium/drivers/i965/brw_eu.c
new file mode 100644
index 00000000000..1df561386e6
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_eu.c
@@ -0,0 +1,254 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+
+
+/* How does predicate control work when execution_size != 8? Do I
+ * need to test/set for 0xffff when execution_size is 16?
+ */
+void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value )
+{
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+ if (value != 0xff) {
+ if (value != p->flag_value) {
+ brw_push_insn_state(p);
+ brw_MOV(p, brw_flag_reg(), brw_imm_uw(value));
+ p->flag_value = value;
+ brw_pop_insn_state(p);
+ }
+
+ p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+ }
+}
+
+void brw_set_predicate_control( struct brw_compile *p, GLuint pc )
+{
+ p->current->header.predicate_control = pc;
+}
+
+void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional )
+{
+ p->current->header.destreg__conditionalmod = conditional;
+}
+
+void brw_set_access_mode( struct brw_compile *p, GLuint access_mode )
+{
+ p->current->header.access_mode = access_mode;
+}
+
+void brw_set_compression_control( struct brw_compile *p, GLboolean compression_control )
+{
+ p->current->header.compression_control = compression_control;
+}
+
+void brw_set_mask_control( struct brw_compile *p, GLuint value )
+{
+ p->current->header.mask_control = value;
+}
+
+void brw_set_saturate( struct brw_compile *p, GLuint value )
+{
+ p->current->header.saturate = value;
+}
+
+void brw_push_insn_state( struct brw_compile *p )
+{
+ assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
+ memcpy(p->current+1, p->current, sizeof(struct brw_instruction));
+ p->current++;
+}
+
+void brw_pop_insn_state( struct brw_compile *p )
+{
+ assert(p->current != p->stack);
+ p->current--;
+}
+
+
+/***********************************************************************
+ */
+void brw_init_compile( struct brw_context *brw, struct brw_compile *p )
+{
+ p->brw = brw;
+ p->nr_insn = 0;
+ p->current = p->stack;
+ memset(p->current, 0, sizeof(p->current[0]));
+
+ /* Some defaults?
+ */
+ brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */
+ brw_set_saturate(p, 0);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_predicate_control_flag_value(p, 0xff);
+}
+
+
+const GLuint *brw_get_program( struct brw_compile *p,
+ GLuint *sz )
+{
+ GLuint i;
+
+ for (i = 0; i < 8; i++)
+ brw_NOP(p);
+
+ *sz = p->nr_insn * sizeof(struct brw_instruction);
+ return (const GLuint *)p->store;
+}
+
+
+
+/**
+ * Subroutine calls require special attention.
+ * Mesa instructions may be expanded into multiple hardware instructions
+ * so the prog_instruction::BranchTarget field can't be used as an index
+ * into the hardware instructions.
+ *
+ * The BranchTarget field isn't needed, however. Mesa's GLSL compiler
+ * emits CAL and BGNSUB instructions with labels that can be used to map
+ * subroutine calls to actual subroutine code blocks.
+ *
+ * The structures and function here implement patching of CAL instructions
+ * so they jump to the right subroutine code...
+ */
+
+
+/**
+ * For each OPCODE_BGNSUB we create one of these.
+ */
+struct brw_glsl_label
+{
+ const char *name; /**< the label string */
+ GLuint position; /**< the position of the brw instruction for this label */
+ struct brw_glsl_label *next; /**< next in linked list */
+};
+
+
+/**
+ * For each OPCODE_CAL we create one of these.
+ */
+struct brw_glsl_call
+{
+ GLuint call_inst_pos; /**< location of the CAL instruction */
+ const char *sub_name; /**< name of subroutine to call */
+ struct brw_glsl_call *next; /**< next in linked list */
+};
+
+
+/**
+ * Called for each OPCODE_BGNSUB.
+ */
+void
+brw_save_label(struct brw_compile *c, const char *name, GLuint position)
+{
+ struct brw_glsl_label *label = CALLOC_STRUCT(brw_glsl_label);
+ label->name = name;
+ label->position = position;
+ label->next = c->first_label;
+ c->first_label = label;
+}
+
+
+/**
+ * Called for each OPCODE_CAL.
+ */
+void
+brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos)
+{
+ struct brw_glsl_call *call = CALLOC_STRUCT(brw_glsl_call);
+ call->call_inst_pos = call_pos;
+ call->sub_name = name;
+ call->next = c->first_call;
+ c->first_call = call;
+}
+
+
+/**
+ * Lookup a label, return label's position/offset.
+ */
+static GLuint
+brw_lookup_label(struct brw_compile *c, const char *name)
+{
+ const struct brw_glsl_label *label;
+ for (label = c->first_label; label; label = label->next) {
+ if (strcmp(name, label->name) == 0) {
+ return label->position;
+ }
+ }
+ abort(); /* should never happen */
+ return ~0;
+}
+
+
+/**
+ * When we're done generating code, this function is called to resolve
+ * subroutine calls.
+ */
+void
+brw_resolve_cals(struct brw_compile *c)
+{
+ const struct brw_glsl_call *call;
+
+ for (call = c->first_call; call; call = call->next) {
+ const GLuint sub_loc = brw_lookup_label(c, call->sub_name);
+ struct brw_instruction *brw_call_inst = &c->store[call->call_inst_pos];
+ struct brw_instruction *brw_sub_inst = &c->store[sub_loc];
+ GLint offset = brw_sub_inst - brw_call_inst;
+
+ /* patch brw_inst1 to point to brw_inst2 */
+ brw_set_src1(brw_call_inst, brw_imm_d(offset * 16));
+ }
+
+ /* free linked list of calls */
+ {
+ struct brw_glsl_call *call, *next;
+ for (call = c->first_call; call; call = next) {
+ next = call->next;
+ _mesa_free(call);
+ }
+ c->first_call = NULL;
+ }
+
+ /* free linked list of labels */
+ {
+ struct brw_glsl_label *label, *next;
+ for (label = c->first_label; label; label = next) {
+ next = label->next;
+ _mesa_free(label);
+ }
+ c->first_label = NULL;
+ }
+}
diff --git a/src/gallium/drivers/i965/brw_eu.h b/src/gallium/drivers/i965/brw_eu.h
new file mode 100644
index 00000000000..30603bdd0e6
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_eu.h
@@ -0,0 +1,968 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#ifndef BRW_EU_H
+#define BRW_EU_H
+
+#include "brw_structs.h"
+#include "brw_defines.h"
+#include "shader/prog_instruction.h"
+
+#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
+#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
+
+#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
+#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
+
+
+#define REG_SIZE (8*4)
+
+
+/* These aren't hardware structs, just something useful for us to pass around:
+ *
+ * Align1 operation has a lot of control over input ranges. Used in
+ * WM programs to implement shaders decomposed into "channel serial"
+ * or "structure of array" form:
+ */
+struct brw_reg
+{
+ GLuint type:4;
+ GLuint file:2;
+ GLuint nr:8;
+ GLuint subnr:5; /* :1 in align16 */
+ GLuint negate:1; /* source only */
+ GLuint abs:1; /* source only */
+ GLuint vstride:4; /* source only */
+ GLuint width:3; /* src only, align1 only */
+ GLuint hstride:2; /* align1 only */
+ GLuint address_mode:1; /* relative addressing, hopefully! */
+ GLuint pad0:1;
+
+ union {
+ struct {
+ GLuint swizzle:8; /* src only, align16 only */
+ GLuint writemask:4; /* dest only, align16 only */
+ GLint indirect_offset:10; /* relative addressing offset */
+ GLuint pad1:10; /* two dwords total */
+ } bits;
+
+ GLfloat f;
+ GLint d;
+ GLuint ud;
+ } dw1;
+};
+
+
+struct brw_indirect {
+ GLuint addr_subnr:4;
+ GLint addr_offset:10;
+ GLuint pad:18;
+};
+
+
+struct brw_glsl_label;
+struct brw_glsl_call;
+
+
+
+#define BRW_EU_MAX_INSN_STACK 5
+#define BRW_EU_MAX_INSN 10000
+
+struct brw_compile {
+ struct brw_instruction store[BRW_EU_MAX_INSN];
+ GLuint nr_insn;
+
+ /* Allow clients to push/pop instruction state:
+ */
+ struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
+ struct brw_instruction *current;
+
+ GLuint flag_value;
+ GLboolean single_program_flow;
+ struct brw_context *brw;
+
+ struct brw_glsl_label *first_label; /**< linked list of labels */
+ struct brw_glsl_call *first_call; /**< linked list of CALs */
+};
+
+
+void
+brw_save_label(struct brw_compile *c, const char *name, GLuint position);
+
+void
+brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos);
+
+void
+brw_resolve_cals(struct brw_compile *c);
+
+
+
+static INLINE int type_sz( GLuint type )
+{
+ switch( type ) {
+ case BRW_REGISTER_TYPE_UD:
+ case BRW_REGISTER_TYPE_D:
+ case BRW_REGISTER_TYPE_F:
+ return 4;
+ case BRW_REGISTER_TYPE_HF:
+ case BRW_REGISTER_TYPE_UW:
+ case BRW_REGISTER_TYPE_W:
+ return 2;
+ case BRW_REGISTER_TYPE_UB:
+ case BRW_REGISTER_TYPE_B:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/**
+ * Construct a brw_reg.
+ * \param file one of the BRW_x_REGISTER_FILE values
+ * \param nr register number/index
+ * \param subnr register sub number
+ * \param type one of BRW_REGISTER_TYPE_x
+ * \param vstride one of BRW_VERTICAL_STRIDE_x
+ * \param width one of BRW_WIDTH_x
+ * \param hstride one of BRW_HORIZONTAL_STRIDE_x
+ * \param swizzle one of BRW_SWIZZLE_x
+ * \param writemask WRITEMASK_X/Y/Z/W bitfield
+ */
+static INLINE struct brw_reg brw_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr,
+ GLuint type,
+ GLuint vstride,
+ GLuint width,
+ GLuint hstride,
+ GLuint swizzle,
+ GLuint writemask )
+{
+ struct brw_reg reg;
+ if (type == BRW_GENERAL_REGISTER_FILE)
+ assert(nr < BRW_MAX_GRF);
+ else if (type == BRW_MESSAGE_REGISTER_FILE)
+ assert(nr < BRW_MAX_MRF);
+ else if (type == BRW_ARCHITECTURE_REGISTER_FILE)
+ assert(nr <= BRW_ARF_IP);
+
+ reg.type = type;
+ reg.file = file;
+ reg.nr = nr;
+ reg.subnr = subnr * type_sz(type);
+ reg.negate = 0;
+ reg.abs = 0;
+ reg.vstride = vstride;
+ reg.width = width;
+ reg.hstride = hstride;
+ reg.address_mode = BRW_ADDRESS_DIRECT;
+ reg.pad0 = 0;
+
+ /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
+ * set swizzle and writemask to W, as the lower bits of subnr will
+ * be lost when converted to align16. This is probably too much to
+ * keep track of as you'd want it adjusted by suboffset(), etc.
+ * Perhaps fix up when converting to align16?
+ */
+ reg.dw1.bits.swizzle = swizzle;
+ reg.dw1.bits.writemask = writemask;
+ reg.dw1.bits.indirect_offset = 0;
+ reg.dw1.bits.pad1 = 0;
+ return reg;
+}
+
+/** Construct float[16] register */
+static INLINE struct brw_reg brw_vec16_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_16,
+ BRW_WIDTH_16,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ WRITEMASK_XYZW);
+}
+
+/** Construct float[8] register */
+static INLINE struct brw_reg brw_vec8_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_8,
+ BRW_WIDTH_8,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ WRITEMASK_XYZW);
+}
+
+/** Construct float[4] register */
+static INLINE struct brw_reg brw_vec4_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_4,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ WRITEMASK_XYZW);
+}
+
+/** Construct float[2] register */
+static INLINE struct brw_reg brw_vec2_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_2,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYXY,
+ WRITEMASK_XY);
+}
+
+/** Construct float[1] register */
+static INLINE struct brw_reg brw_vec1_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XXXX,
+ WRITEMASK_X);
+}
+
+
+static INLINE struct brw_reg retype( struct brw_reg reg,
+ GLuint type )
+{
+ reg.type = type;
+ return reg;
+}
+
+static INLINE struct brw_reg suboffset( struct brw_reg reg,
+ GLuint delta )
+{
+ reg.subnr += delta * type_sz(reg.type);
+ return reg;
+}
+
+
+static INLINE struct brw_reg offset( struct brw_reg reg,
+ GLuint delta )
+{
+ reg.nr += delta;
+ return reg;
+}
+
+
+static INLINE struct brw_reg byte_offset( struct brw_reg reg,
+ GLuint bytes )
+{
+ GLuint newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
+ reg.nr = newoffset / REG_SIZE;
+ reg.subnr = newoffset % REG_SIZE;
+ return reg;
+}
+
+
+/** Construct unsigned word[16] register */
+static INLINE struct brw_reg brw_uw16_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+/** Construct unsigned word[8] register */
+static INLINE struct brw_reg brw_uw8_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+/** Construct unsigned word[1] register */
+static INLINE struct brw_reg brw_uw1_reg( GLuint file,
+ GLuint nr,
+ GLuint subnr )
+{
+ return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+static INLINE struct brw_reg brw_imm_reg( GLuint type )
+{
+ return brw_reg( BRW_IMMEDIATE_VALUE,
+ 0,
+ 0,
+ type,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ 0,
+ 0);
+}
+
+/** Construct float immediate register */
+static INLINE struct brw_reg brw_imm_f( GLfloat f )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
+ imm.dw1.f = f;
+ return imm;
+}
+
+/** Construct integer immediate register */
+static INLINE struct brw_reg brw_imm_d( GLint d )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
+ imm.dw1.d = d;
+ return imm;
+}
+
+/** Construct uint immediate register */
+static INLINE struct brw_reg brw_imm_ud( GLuint ud )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
+ imm.dw1.ud = ud;
+ return imm;
+}
+
+/** Construct ushort immediate register */
+static INLINE struct brw_reg brw_imm_uw( GLushort uw )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
+ imm.dw1.ud = uw | (uw << 16);
+ return imm;
+}
+
+/** Construct short immediate register */
+static INLINE struct brw_reg brw_imm_w( GLshort w )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
+ imm.dw1.d = w | (w << 16);
+ return imm;
+}
+
+/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
+ * numbers alias with _V and _VF below:
+ */
+
+/** Construct vector of eight signed half-byte values */
+static INLINE struct brw_reg brw_imm_v( GLuint v )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
+ imm.vstride = BRW_VERTICAL_STRIDE_0;
+ imm.width = BRW_WIDTH_8;
+ imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+ imm.dw1.ud = v;
+ return imm;
+}
+
+/** Construct vector of four 8-bit float values */
+static INLINE struct brw_reg brw_imm_vf( GLuint v )
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+ imm.vstride = BRW_VERTICAL_STRIDE_0;
+ imm.width = BRW_WIDTH_4;
+ imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+ imm.dw1.ud = v;
+ return imm;
+}
+
+#define VF_ZERO 0x0
+#define VF_ONE 0x30
+#define VF_NEG (1<<7)
+
+static INLINE struct brw_reg brw_imm_vf4( GLuint v0,
+ GLuint v1,
+ GLuint v2,
+ GLuint v3)
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+ imm.vstride = BRW_VERTICAL_STRIDE_0;
+ imm.width = BRW_WIDTH_4;
+ imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+ imm.dw1.ud = ((v0 << 0) |
+ (v1 << 8) |
+ (v2 << 16) |
+ (v3 << 24));
+ return imm;
+}
+
+
+static INLINE struct brw_reg brw_address( struct brw_reg reg )
+{
+ return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
+}
+
+/** Construct float[1] general-purpose register */
+static INLINE struct brw_reg brw_vec1_grf( GLuint nr, GLuint subnr )
+{
+ return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+/** Construct float[2] general-purpose register */
+static INLINE struct brw_reg brw_vec2_grf( GLuint nr, GLuint subnr )
+{
+ return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+/** Construct float[4] general-purpose register */
+static INLINE struct brw_reg brw_vec4_grf( GLuint nr, GLuint subnr )
+{
+ return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+/** Construct float[8] general-purpose register */
+static INLINE struct brw_reg brw_vec8_grf( GLuint nr, GLuint subnr )
+{
+ return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+
+static INLINE struct brw_reg brw_uw8_grf( GLuint nr, GLuint subnr )
+{
+ return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+static INLINE struct brw_reg brw_uw16_grf( GLuint nr, GLuint subnr )
+{
+ return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+
+/** Construct null register (usually used for setting condition codes) */
+static INLINE struct brw_reg brw_null_reg( void )
+{
+ return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_NULL,
+ 0);
+}
+
+static INLINE struct brw_reg brw_address_reg( GLuint subnr )
+{
+ return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_ADDRESS,
+ subnr);
+}
+
+/* If/else instructions break in align16 mode if writemask & swizzle
+ * aren't xyzw. This goes against the convention for other scalar
+ * regs:
+ */
+static INLINE struct brw_reg brw_ip_reg( void )
+{
+ return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_IP,
+ 0,
+ BRW_REGISTER_TYPE_UD,
+ BRW_VERTICAL_STRIDE_4, /* ? */
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, /* NOTE! */
+ WRITEMASK_XYZW); /* NOTE! */
+}
+
+static INLINE struct brw_reg brw_acc_reg( void )
+{
+ return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_ACCUMULATOR,
+ 0);
+}
+
+
+static INLINE struct brw_reg brw_flag_reg( void )
+{
+ return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_FLAG,
+ 0);
+}
+
+
+static INLINE struct brw_reg brw_mask_reg( GLuint subnr )
+{
+ return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_MASK,
+ subnr);
+}
+
+static INLINE struct brw_reg brw_message_reg( GLuint nr )
+{
+ assert(nr < BRW_MAX_MRF);
+ return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE,
+ nr,
+ 0);
+}
+
+
+
+
+/* This is almost always called with a numeric constant argument, so
+ * make things easy to evaluate at compile time:
+ */
+static INLINE GLuint cvt( GLuint val )
+{
+ switch (val) {
+ case 0: return 0;
+ case 1: return 1;
+ case 2: return 2;
+ case 4: return 3;
+ case 8: return 4;
+ case 16: return 5;
+ case 32: return 6;
+ }
+ return 0;
+}
+
+static INLINE struct brw_reg stride( struct brw_reg reg,
+ GLuint vstride,
+ GLuint width,
+ GLuint hstride )
+{
+ reg.vstride = cvt(vstride);
+ reg.width = cvt(width) - 1;
+ reg.hstride = cvt(hstride);
+ return reg;
+}
+
+
+static INLINE struct brw_reg vec16( struct brw_reg reg )
+{
+ return stride(reg, 16,16,1);
+}
+
+static INLINE struct brw_reg vec8( struct brw_reg reg )
+{
+ return stride(reg, 8,8,1);
+}
+
+static INLINE struct brw_reg vec4( struct brw_reg reg )
+{
+ return stride(reg, 4,4,1);
+}
+
+static INLINE struct brw_reg vec2( struct brw_reg reg )
+{
+ return stride(reg, 2,2,1);
+}
+
+static INLINE struct brw_reg vec1( struct brw_reg reg )
+{
+ return stride(reg, 0,1,0);
+}
+
+
+static INLINE struct brw_reg get_element( struct brw_reg reg, GLuint elt )
+{
+ return vec1(suboffset(reg, elt));
+}
+
+static INLINE struct brw_reg get_element_ud( struct brw_reg reg, GLuint elt )
+{
+ return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt));
+}
+
+
+static INLINE struct brw_reg brw_swizzle( struct brw_reg reg,
+ GLuint x,
+ GLuint y,
+ GLuint z,
+ GLuint w)
+{
+ reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
+ BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
+ BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
+ BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
+ return reg;
+}
+
+
+static INLINE struct brw_reg brw_swizzle1( struct brw_reg reg,
+ GLuint x )
+{
+ return brw_swizzle(reg, x, x, x, x);
+}
+
+static INLINE struct brw_reg brw_writemask( struct brw_reg reg,
+ GLuint mask )
+{
+ reg.dw1.bits.writemask &= mask;
+ return reg;
+}
+
+static INLINE struct brw_reg brw_set_writemask( struct brw_reg reg,
+ GLuint mask )
+{
+ reg.dw1.bits.writemask = mask;
+ return reg;
+}
+
+static INLINE struct brw_reg negate( struct brw_reg reg )
+{
+ reg.negate ^= 1;
+ return reg;
+}
+
+static INLINE struct brw_reg brw_abs( struct brw_reg reg )
+{
+ reg.abs = 1;
+ return reg;
+}
+
+/***********************************************************************
+ */
+static INLINE struct brw_reg brw_vec4_indirect( GLuint subnr,
+ GLint offset )
+{
+ struct brw_reg reg = brw_vec4_grf(0, 0);
+ reg.subnr = subnr;
+ reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+ reg.dw1.bits.indirect_offset = offset;
+ return reg;
+}
+
+static INLINE struct brw_reg brw_vec1_indirect( GLuint subnr,
+ GLint offset )
+{
+ struct brw_reg reg = brw_vec1_grf(0, 0);
+ reg.subnr = subnr;
+ reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+ reg.dw1.bits.indirect_offset = offset;
+ return reg;
+}
+
+static INLINE struct brw_reg deref_4f(struct brw_indirect ptr, GLint offset)
+{
+ return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+
+static INLINE struct brw_reg deref_1f(struct brw_indirect ptr, GLint offset)
+{
+ return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+
+static INLINE struct brw_reg deref_4b(struct brw_indirect ptr, GLint offset)
+{
+ return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
+}
+
+static INLINE struct brw_reg deref_1uw(struct brw_indirect ptr, GLint offset)
+{
+ return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
+}
+
+static INLINE struct brw_reg deref_1d(struct brw_indirect ptr, GLint offset)
+{
+ return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D);
+}
+
+static INLINE struct brw_reg deref_1ud(struct brw_indirect ptr, GLint offset)
+{
+ return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
+}
+
+static INLINE struct brw_reg get_addr_reg(struct brw_indirect ptr)
+{
+ return brw_address_reg(ptr.addr_subnr);
+}
+
+static INLINE struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, GLint offset )
+{
+ ptr.addr_offset += offset;
+ return ptr;
+}
+
+static INLINE struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset )
+{
+ struct brw_indirect ptr;
+ ptr.addr_subnr = addr_subnr;
+ ptr.addr_offset = offset;
+ ptr.pad = 0;
+ return ptr;
+}
+
+/** Do two brw_regs refer to the same register? */
+static INLINE GLboolean
+brw_same_reg(struct brw_reg r1, struct brw_reg r2)
+{
+ return r1.file == r2.file && r1.nr == r2.nr;
+}
+
+static INLINE struct brw_instruction *current_insn( struct brw_compile *p)
+{
+ return &p->store[p->nr_insn];
+}
+
+void brw_pop_insn_state( struct brw_compile *p );
+void brw_push_insn_state( struct brw_compile *p );
+void brw_set_mask_control( struct brw_compile *p, GLuint value );
+void brw_set_saturate( struct brw_compile *p, GLuint value );
+void brw_set_access_mode( struct brw_compile *p, GLuint access_mode );
+void brw_set_compression_control( struct brw_compile *p, GLboolean control );
+void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value );
+void brw_set_predicate_control( struct brw_compile *p, GLuint pc );
+void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional );
+
+void brw_init_compile( struct brw_context *, struct brw_compile *p );
+const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz );
+
+
+/* Helpers for regular instructions:
+ */
+#define ALU1(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0);
+
+#define ALU2(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0, \
+ struct brw_reg src1);
+
+ALU1(MOV)
+ALU2(SEL)
+ALU1(NOT)
+ALU2(AND)
+ALU2(OR)
+ALU2(XOR)
+ALU2(SHR)
+ALU2(SHL)
+ALU2(RSR)
+ALU2(RSL)
+ALU2(ASR)
+ALU2(JMPI)
+ALU2(ADD)
+ALU2(MUL)
+ALU1(FRC)
+ALU1(RNDD)
+ALU1(RNDZ)
+ALU2(MAC)
+ALU2(MACH)
+ALU1(LZD)
+ALU2(DP4)
+ALU2(DPH)
+ALU2(DP3)
+ALU2(DP2)
+ALU2(LINE)
+
+#undef ALU1
+#undef ALU2
+
+
+
+/* Helpers for SEND instruction:
+ */
+void brw_urb_WRITE(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot,
+ GLboolean writes_complete,
+ GLuint offset,
+ GLuint swizzle);
+
+void brw_ff_sync(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot,
+ GLboolean writes_complete,
+ GLuint offset,
+ GLuint swizzle);
+
+void brw_fb_WRITE(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLuint binding_table_index,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot);
+
+void brw_SAMPLE(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLuint binding_table_index,
+ GLuint sampler,
+ GLuint writemask,
+ GLuint msg_type,
+ GLuint response_length,
+ GLuint msg_length,
+ GLboolean eot,
+ GLuint header_present,
+ GLuint simd_mode);
+
+void brw_math_16( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint function,
+ GLuint saturate,
+ GLuint msg_reg_nr,
+ struct brw_reg src,
+ GLuint precision );
+
+void brw_math( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint function,
+ GLuint saturate,
+ GLuint msg_reg_nr,
+ struct brw_reg src,
+ GLuint data_type,
+ GLuint precision );
+
+void brw_dp_READ_16( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint scratch_offset );
+
+void brw_dp_READ_4( struct brw_compile *p,
+ struct brw_reg dest,
+ GLboolean relAddr,
+ GLuint location,
+ GLuint bind_table_index );
+
+void brw_dp_READ_4_vs( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint oword,
+ GLboolean relAddr,
+ struct brw_reg addrReg,
+ GLuint location,
+ GLuint bind_table_index );
+
+void brw_dp_WRITE_16( struct brw_compile *p,
+ struct brw_reg src,
+ GLuint scratch_offset );
+
+/* If/else/endif. Works by manipulating the execution flags on each
+ * channel.
+ */
+struct brw_instruction *brw_IF(struct brw_compile *p,
+ GLuint execute_size);
+
+struct brw_instruction *brw_ELSE(struct brw_compile *p,
+ struct brw_instruction *if_insn);
+
+void brw_ENDIF(struct brw_compile *p,
+ struct brw_instruction *if_or_else_insn);
+
+
+/* DO/WHILE loops:
+ */
+struct brw_instruction *brw_DO(struct brw_compile *p,
+ GLuint execute_size);
+
+struct brw_instruction *brw_WHILE(struct brw_compile *p,
+ struct brw_instruction *patch_insn);
+
+struct brw_instruction *brw_BREAK(struct brw_compile *p);
+struct brw_instruction *brw_CONT(struct brw_compile *p);
+/* Forward jumps:
+ */
+void brw_land_fwd_jump(struct brw_compile *p,
+ struct brw_instruction *jmp_insn);
+
+
+
+void brw_NOP(struct brw_compile *p);
+
+/* Special case: there is never a destination, execution size will be
+ * taken from src0:
+ */
+void brw_CMP(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint conditional,
+ struct brw_reg src0,
+ struct brw_reg src1);
+
+void brw_print_reg( struct brw_reg reg );
+
+
+/***********************************************************************
+ * brw_eu_util.c:
+ */
+
+void brw_copy_indirect_to_indirect(struct brw_compile *p,
+ struct brw_indirect dst_ptr,
+ struct brw_indirect src_ptr,
+ GLuint count);
+
+void brw_copy_from_indirect(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_indirect ptr,
+ GLuint count);
+
+void brw_copy4(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ GLuint count);
+
+void brw_copy8(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ GLuint count);
+
+void brw_math_invert( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src);
+
+void brw_set_src1( struct brw_instruction *insn,
+ struct brw_reg reg );
+#endif
diff --git a/src/gallium/drivers/i965/brw_eu_debug.c b/src/gallium/drivers/i965/brw_eu_debug.c
new file mode 100644
index 00000000000..29f3f6d02fa
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_eu_debug.c
@@ -0,0 +1,95 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#include "main/mtypes.h"
+#include "main/imports.h"
+#include "brw_eu.h"
+
+void brw_print_reg( struct brw_reg hwreg )
+{
+ static const char *file[] = {
+ "arf",
+ "grf",
+ "msg",
+ "imm"
+ };
+
+ static const char *type[] = {
+ "ud",
+ "d",
+ "uw",
+ "w",
+ "ub",
+ "vf",
+ "hf",
+ "f"
+ };
+
+ _mesa_printf("%s%s",
+ hwreg.abs ? "abs/" : "",
+ hwreg.negate ? "-" : "");
+
+ if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
+ hwreg.nr % 2 == 0 &&
+ hwreg.subnr == 0 &&
+ hwreg.vstride == BRW_VERTICAL_STRIDE_8 &&
+ hwreg.width == BRW_WIDTH_8 &&
+ hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+ hwreg.type == BRW_REGISTER_TYPE_F) {
+ /* vector register */
+ _mesa_printf("vec%d", hwreg.nr);
+ }
+ else if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
+ hwreg.vstride == BRW_VERTICAL_STRIDE_0 &&
+ hwreg.width == BRW_WIDTH_1 &&
+ hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 &&
+ hwreg.type == BRW_REGISTER_TYPE_F) {
+ /* "scalar" register */
+ _mesa_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4);
+ }
+ else if (hwreg.file == BRW_IMMEDIATE_VALUE) {
+ _mesa_printf("imm %f", hwreg.dw1.f);
+ }
+ else {
+ _mesa_printf("%s%d.%d<%d;%d,%d>:%s",
+ file[hwreg.file],
+ hwreg.nr,
+ hwreg.subnr / type_sz(hwreg.type),
+ hwreg.vstride ? (1<<(hwreg.vstride-1)) : 0,
+ 1<<hwreg.width,
+ hwreg.hstride ? (1<<(hwreg.hstride-1)) : 0,
+ type[hwreg.type]);
+ }
+}
+
+
+
diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c
new file mode 100644
index 00000000000..241cdc33f86
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_eu_emit.c
@@ -0,0 +1,1425 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+
+
+
+/***********************************************************************
+ * Internal helper for constructing instructions
+ */
+
+static void guess_execution_size( struct brw_instruction *insn,
+ struct brw_reg reg )
+{
+ if (reg.width == BRW_WIDTH_8 &&
+ insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
+ insn->header.execution_size = BRW_EXECUTE_16;
+ else
+ insn->header.execution_size = reg.width; /* note - definitions are compatible */
+}
+
+
+static void brw_set_dest( struct brw_instruction *insn,
+ struct brw_reg dest )
+{
+ if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE)
+ assert(dest.nr < 128);
+
+ insn->bits1.da1.dest_reg_file = dest.file;
+ insn->bits1.da1.dest_reg_type = dest.type;
+ insn->bits1.da1.dest_address_mode = dest.address_mode;
+
+ if (dest.address_mode == BRW_ADDRESS_DIRECT) {
+ insn->bits1.da1.dest_reg_nr = dest.nr;
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits1.da1.dest_subreg_nr = dest.subnr;
+ if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+ dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+ insn->bits1.da1.dest_horiz_stride = dest.hstride;
+ }
+ else {
+ insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
+ insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
+ }
+ }
+ else {
+ insn->bits1.ia1.dest_subreg_nr = dest.subnr;
+
+ /* These are different sizes in align1 vs align16:
+ */
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+ if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+ dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+ insn->bits1.ia1.dest_horiz_stride = dest.hstride;
+ }
+ else {
+ insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+ }
+ }
+
+ /* NEW: Set the execution size based on dest.width and
+ * insn->compression_control:
+ */
+ guess_execution_size(insn, dest);
+}
+
+static void brw_set_src0( struct brw_instruction *insn,
+ struct brw_reg reg )
+{
+ assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+
+ if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
+ assert(reg.nr < 128);
+
+ insn->bits1.da1.src0_reg_file = reg.file;
+ insn->bits1.da1.src0_reg_type = reg.type;
+ insn->bits2.da1.src0_abs = reg.abs;
+ insn->bits2.da1.src0_negate = reg.negate;
+ insn->bits2.da1.src0_address_mode = reg.address_mode;
+
+ if (reg.file == BRW_IMMEDIATE_VALUE) {
+ insn->bits3.ud = reg.dw1.ud;
+
+ /* Required to set some fields in src1 as well:
+ */
+ insn->bits1.da1.src1_reg_file = 0; /* arf */
+ insn->bits1.da1.src1_reg_type = reg.type;
+ }
+ else
+ {
+ if (reg.address_mode == BRW_ADDRESS_DIRECT) {
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits2.da1.src0_subreg_nr = reg.subnr;
+ insn->bits2.da1.src0_reg_nr = reg.nr;
+ }
+ else {
+ insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
+ insn->bits2.da16.src0_reg_nr = reg.nr;
+ }
+ }
+ else {
+ insn->bits2.ia1.src0_subreg_nr = reg.subnr;
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
+ }
+ else {
+ insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
+ }
+ }
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ if (reg.width == BRW_WIDTH_1 &&
+ insn->header.execution_size == BRW_EXECUTE_1) {
+ insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+ insn->bits2.da1.src0_width = BRW_WIDTH_1;
+ insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
+ }
+ else {
+ insn->bits2.da1.src0_horiz_stride = reg.hstride;
+ insn->bits2.da1.src0_width = reg.width;
+ insn->bits2.da1.src0_vert_stride = reg.vstride;
+ }
+ }
+ else {
+ insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+ insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+ insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+ insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+
+ /* This is an oddity of the fact we're using the same
+ * descriptions for registers in align_16 as align_1:
+ */
+ if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+ insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
+ else
+ insn->bits2.da16.src0_vert_stride = reg.vstride;
+ }
+ }
+}
+
+
+void brw_set_src1( struct brw_instruction *insn,
+ struct brw_reg reg )
+{
+ assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+
+ assert(reg.nr < 128);
+
+ insn->bits1.da1.src1_reg_file = reg.file;
+ insn->bits1.da1.src1_reg_type = reg.type;
+ insn->bits3.da1.src1_abs = reg.abs;
+ insn->bits3.da1.src1_negate = reg.negate;
+
+ /* Only src1 can be immediate in two-argument instructions.
+ */
+ assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
+
+ if (reg.file == BRW_IMMEDIATE_VALUE) {
+ insn->bits3.ud = reg.dw1.ud;
+ }
+ else {
+ /* This is a hardware restriction, which may or may not be lifted
+ * in the future:
+ */
+ assert (reg.address_mode == BRW_ADDRESS_DIRECT);
+ //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ insn->bits3.da1.src1_subreg_nr = reg.subnr;
+ insn->bits3.da1.src1_reg_nr = reg.nr;
+ }
+ else {
+ insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
+ insn->bits3.da16.src1_reg_nr = reg.nr;
+ }
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ if (reg.width == BRW_WIDTH_1 &&
+ insn->header.execution_size == BRW_EXECUTE_1) {
+ insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+ insn->bits3.da1.src1_width = BRW_WIDTH_1;
+ insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
+ }
+ else {
+ insn->bits3.da1.src1_horiz_stride = reg.hstride;
+ insn->bits3.da1.src1_width = reg.width;
+ insn->bits3.da1.src1_vert_stride = reg.vstride;
+ }
+ }
+ else {
+ insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+ insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+ insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+ insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+
+ /* This is an oddity of the fact we're using the same
+ * descriptions for registers in align_16 as align_1:
+ */
+ if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+ insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
+ else
+ insn->bits3.da16.src1_vert_stride = reg.vstride;
+ }
+ }
+}
+
+
+
+static void brw_set_math_message( struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLuint msg_length,
+ GLuint response_length,
+ GLuint function,
+ GLuint integer_type,
+ GLboolean low_precision,
+ GLboolean saturate,
+ GLuint dataType )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.math_igdng.function = function;
+ insn->bits3.math_igdng.int_type = integer_type;
+ insn->bits3.math_igdng.precision = low_precision;
+ insn->bits3.math_igdng.saturate = saturate;
+ insn->bits3.math_igdng.data_type = dataType;
+ insn->bits3.math_igdng.snapshot = 0;
+ insn->bits3.math_igdng.header_present = 0;
+ insn->bits3.math_igdng.response_length = response_length;
+ insn->bits3.math_igdng.msg_length = msg_length;
+ insn->bits3.math_igdng.end_of_thread = 0;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH;
+ insn->bits2.send_igdng.end_of_thread = 0;
+ } else {
+ insn->bits3.math.function = function;
+ insn->bits3.math.int_type = integer_type;
+ insn->bits3.math.precision = low_precision;
+ insn->bits3.math.saturate = saturate;
+ insn->bits3.math.data_type = dataType;
+ insn->bits3.math.response_length = response_length;
+ insn->bits3.math.msg_length = msg_length;
+ insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
+ insn->bits3.math.end_of_thread = 0;
+ }
+}
+
+
+static void brw_set_ff_sync_message( struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean end_of_thread,
+ GLboolean complete,
+ GLuint offset,
+ GLuint swizzle_control )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->bits3.urb_igdng.opcode = 1;
+ insn->bits3.urb_igdng.offset = offset;
+ insn->bits3.urb_igdng.swizzle_control = swizzle_control;
+ insn->bits3.urb_igdng.allocate = allocate;
+ insn->bits3.urb_igdng.used = used;
+ insn->bits3.urb_igdng.complete = complete;
+ insn->bits3.urb_igdng.header_present = 1;
+ insn->bits3.urb_igdng.response_length = response_length;
+ insn->bits3.urb_igdng.msg_length = msg_length;
+ insn->bits3.urb_igdng.end_of_thread = end_of_thread;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
+ insn->bits2.send_igdng.end_of_thread = end_of_thread;
+}
+
+static void brw_set_urb_message( struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean end_of_thread,
+ GLboolean complete,
+ GLuint offset,
+ GLuint swizzle_control )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.urb_igdng.opcode = 0; /* ? */
+ insn->bits3.urb_igdng.offset = offset;
+ insn->bits3.urb_igdng.swizzle_control = swizzle_control;
+ insn->bits3.urb_igdng.allocate = allocate;
+ insn->bits3.urb_igdng.used = used; /* ? */
+ insn->bits3.urb_igdng.complete = complete;
+ insn->bits3.urb_igdng.header_present = 1;
+ insn->bits3.urb_igdng.response_length = response_length;
+ insn->bits3.urb_igdng.msg_length = msg_length;
+ insn->bits3.urb_igdng.end_of_thread = end_of_thread;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
+ insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ } else {
+ insn->bits3.urb.opcode = 0; /* ? */
+ insn->bits3.urb.offset = offset;
+ insn->bits3.urb.swizzle_control = swizzle_control;
+ insn->bits3.urb.allocate = allocate;
+ insn->bits3.urb.used = used; /* ? */
+ insn->bits3.urb.complete = complete;
+ insn->bits3.urb.response_length = response_length;
+ insn->bits3.urb.msg_length = msg_length;
+ insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
+ insn->bits3.urb.end_of_thread = end_of_thread;
+ }
+}
+
+static void brw_set_dp_write_message( struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLuint binding_table_index,
+ GLuint msg_control,
+ GLuint msg_type,
+ GLuint msg_length,
+ GLuint pixel_scoreboard_clear,
+ GLuint response_length,
+ GLuint end_of_thread )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.dp_write_igdng.binding_table_index = binding_table_index;
+ insn->bits3.dp_write_igdng.msg_control = msg_control;
+ insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear;
+ insn->bits3.dp_write_igdng.msg_type = msg_type;
+ insn->bits3.dp_write_igdng.send_commit_msg = 0;
+ insn->bits3.dp_write_igdng.header_present = 1;
+ insn->bits3.dp_write_igdng.response_length = response_length;
+ insn->bits3.dp_write_igdng.msg_length = msg_length;
+ insn->bits3.dp_write_igdng.end_of_thread = end_of_thread;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
+ insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ } else {
+ insn->bits3.dp_write.binding_table_index = binding_table_index;
+ insn->bits3.dp_write.msg_control = msg_control;
+ insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
+ insn->bits3.dp_write.msg_type = msg_type;
+ insn->bits3.dp_write.send_commit_msg = 0;
+ insn->bits3.dp_write.response_length = response_length;
+ insn->bits3.dp_write.msg_length = msg_length;
+ insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
+ insn->bits3.dp_write.end_of_thread = end_of_thread;
+ }
+}
+
+static void brw_set_dp_read_message( struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLuint binding_table_index,
+ GLuint msg_control,
+ GLuint msg_type,
+ GLuint target_cache,
+ GLuint msg_length,
+ GLuint response_length,
+ GLuint end_of_thread )
+{
+ brw_set_src1(insn, brw_imm_d(0));
+
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.dp_read_igdng.binding_table_index = binding_table_index;
+ insn->bits3.dp_read_igdng.msg_control = msg_control;
+ insn->bits3.dp_read_igdng.msg_type = msg_type;
+ insn->bits3.dp_read_igdng.target_cache = target_cache;
+ insn->bits3.dp_read_igdng.header_present = 1;
+ insn->bits3.dp_read_igdng.response_length = response_length;
+ insn->bits3.dp_read_igdng.msg_length = msg_length;
+ insn->bits3.dp_read_igdng.pad1 = 0;
+ insn->bits3.dp_read_igdng.end_of_thread = end_of_thread;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
+ insn->bits2.send_igdng.end_of_thread = end_of_thread;
+ } else {
+ insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
+ insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
+ insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
+ insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
+ insn->bits3.dp_read.response_length = response_length; /*16:19*/
+ insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
+ insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
+ insn->bits3.dp_read.pad1 = 0; /*28:30*/
+ insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/
+ }
+}
+
+static void brw_set_sampler_message(struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLuint binding_table_index,
+ GLuint sampler,
+ GLuint msg_type,
+ GLuint response_length,
+ GLuint msg_length,
+ GLboolean eot,
+ GLuint header_present,
+ GLuint simd_mode)
+{
+ assert(eot == 0);
+ brw_set_src1(insn, brw_imm_d(0));
+
+ if (BRW_IS_IGDNG(brw)) {
+ insn->bits3.sampler_igdng.binding_table_index = binding_table_index;
+ insn->bits3.sampler_igdng.sampler = sampler;
+ insn->bits3.sampler_igdng.msg_type = msg_type;
+ insn->bits3.sampler_igdng.simd_mode = simd_mode;
+ insn->bits3.sampler_igdng.header_present = header_present;
+ insn->bits3.sampler_igdng.response_length = response_length;
+ insn->bits3.sampler_igdng.msg_length = msg_length;
+ insn->bits3.sampler_igdng.end_of_thread = eot;
+ insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER;
+ insn->bits2.send_igdng.end_of_thread = eot;
+ } else if (BRW_IS_G4X(brw)) {
+ insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
+ insn->bits3.sampler_g4x.sampler = sampler;
+ insn->bits3.sampler_g4x.msg_type = msg_type;
+ insn->bits3.sampler_g4x.response_length = response_length;
+ insn->bits3.sampler_g4x.msg_length = msg_length;
+ insn->bits3.sampler_g4x.end_of_thread = eot;
+ insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
+ } else {
+ insn->bits3.sampler.binding_table_index = binding_table_index;
+ insn->bits3.sampler.sampler = sampler;
+ insn->bits3.sampler.msg_type = msg_type;
+ insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
+ insn->bits3.sampler.response_length = response_length;
+ insn->bits3.sampler.msg_length = msg_length;
+ insn->bits3.sampler.end_of_thread = eot;
+ insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
+ }
+}
+
+
+
+static struct brw_instruction *next_insn( struct brw_compile *p,
+ GLuint opcode )
+{
+ struct brw_instruction *insn;
+
+ assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
+
+ insn = &p->store[p->nr_insn++];
+ memcpy(insn, p->current, sizeof(*insn));
+
+ /* Reset this one-shot flag:
+ */
+
+ if (p->current->header.destreg__conditionalmod) {
+ p->current->header.destreg__conditionalmod = 0;
+ p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+ }
+
+ insn->header.opcode = opcode;
+ return insn;
+}
+
+
+static struct brw_instruction *brw_alu1( struct brw_compile *p,
+ GLuint opcode,
+ struct brw_reg dest,
+ struct brw_reg src )
+{
+ struct brw_instruction *insn = next_insn(p, opcode);
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src);
+ return insn;
+}
+
+static struct brw_instruction *brw_alu2(struct brw_compile *p,
+ GLuint opcode,
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1 )
+{
+ struct brw_instruction *insn = next_insn(p, opcode);
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_src1(insn, src1);
+ return insn;
+}
+
+
+/***********************************************************************
+ * Convenience routines.
+ */
+#define ALU1(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0) \
+{ \
+ return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
+}
+
+#define ALU2(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0, \
+ struct brw_reg src1) \
+{ \
+ return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
+}
+
+
+ALU1(MOV)
+ALU2(SEL)
+ALU1(NOT)
+ALU2(AND)
+ALU2(OR)
+ALU2(XOR)
+ALU2(SHR)
+ALU2(SHL)
+ALU2(RSR)
+ALU2(RSL)
+ALU2(ASR)
+ALU2(ADD)
+ALU2(MUL)
+ALU1(FRC)
+ALU1(RNDD)
+ALU1(RNDZ)
+ALU2(MAC)
+ALU2(MACH)
+ALU1(LZD)
+ALU2(DP4)
+ALU2(DPH)
+ALU2(DP3)
+ALU2(DP2)
+ALU2(LINE)
+
+
+
+
+void brw_NOP(struct brw_compile *p)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
+ brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src1(insn, brw_imm_ud(0x0));
+}
+
+
+
+
+
+/***********************************************************************
+ * Comparisons, if/else/endif
+ */
+
+struct brw_instruction *brw_JMPI(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
+
+ insn->header.execution_size = 1;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.mask_control = BRW_MASK_DISABLE;
+
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+ return insn;
+}
+
+/* EU takes the value from the flag register and pushes it onto some
+ * sort of a stack (presumably merging with any flag value already on
+ * the stack). Within an if block, the flags at the top of the stack
+ * control execution on each channel of the unit, eg. on each of the
+ * 16 pixel values in our wm programs.
+ *
+ * When the matching 'else' instruction is reached (presumably by
+ * countdown of the instruction count patched in by our ELSE/ENDIF
+ * functions), the relevent flags are inverted.
+ *
+ * When the matching 'endif' instruction is reached, the flags are
+ * popped off. If the stack is now empty, normal execution resumes.
+ *
+ * No attempt is made to deal with stack overflow (14 elements?).
+ */
+struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
+{
+ struct brw_instruction *insn;
+
+ if (p->single_program_flow) {
+ assert(execute_size == BRW_EXECUTE_1);
+
+ insn = next_insn(p, BRW_OPCODE_ADD);
+ insn->header.predicate_inverse = 1;
+ } else {
+ insn = next_insn(p, BRW_OPCODE_IF);
+ }
+
+ /* Override the defaults for this instruction:
+ */
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->header.execution_size = execute_size;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.predicate_control = BRW_PREDICATE_NORMAL;
+ insn->header.mask_control = BRW_MASK_ENABLE;
+ if (!p->single_program_flow)
+ insn->header.thread_control = BRW_THREAD_SWITCH;
+
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+ return insn;
+}
+
+
+struct brw_instruction *brw_ELSE(struct brw_compile *p,
+ struct brw_instruction *if_insn)
+{
+ struct brw_instruction *insn;
+ GLuint br = 1;
+
+ if (BRW_IS_IGDNG(p->brw))
+ br = 2;
+
+ if (p->single_program_flow) {
+ insn = next_insn(p, BRW_OPCODE_ADD);
+ } else {
+ insn = next_insn(p, BRW_OPCODE_ELSE);
+ }
+
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = if_insn->header.execution_size;
+ insn->header.mask_control = BRW_MASK_ENABLE;
+ if (!p->single_program_flow)
+ insn->header.thread_control = BRW_THREAD_SWITCH;
+
+ /* Patch the if instruction to point at this instruction.
+ */
+ if (p->single_program_flow) {
+ assert(if_insn->header.opcode == BRW_OPCODE_ADD);
+
+ if_insn->bits3.ud = (insn - if_insn + 1) * 16;
+ } else {
+ assert(if_insn->header.opcode == BRW_OPCODE_IF);
+
+ if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
+ if_insn->bits3.if_else.pop_count = 0;
+ if_insn->bits3.if_else.pad0 = 0;
+ }
+
+ return insn;
+}
+
+void brw_ENDIF(struct brw_compile *p,
+ struct brw_instruction *patch_insn)
+{
+ GLuint br = 1;
+
+ if (BRW_IS_IGDNG(p->brw))
+ br = 2;
+
+ if (p->single_program_flow) {
+ /* In single program flow mode, there's no need to execute an ENDIF,
+ * since we don't need to do any stack operations, and if we're executing
+ * currently, we want to just continue executing.
+ */
+ struct brw_instruction *next = &p->store[p->nr_insn];
+
+ assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
+
+ patch_insn->bits3.ud = (next - patch_insn) * 16;
+ } else {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
+
+ brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = patch_insn->header.execution_size;
+ insn->header.mask_control = BRW_MASK_ENABLE;
+ insn->header.thread_control = BRW_THREAD_SWITCH;
+
+ assert(patch_insn->bits3.if_else.jump_count == 0);
+
+ /* Patch the if or else instructions to point at this or the next
+ * instruction respectively.
+ */
+ if (patch_insn->header.opcode == BRW_OPCODE_IF) {
+ /* Automagically turn it into an IFF:
+ */
+ patch_insn->header.opcode = BRW_OPCODE_IFF;
+ patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
+ patch_insn->bits3.if_else.pop_count = 0;
+ patch_insn->bits3.if_else.pad0 = 0;
+ } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
+ patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
+ patch_insn->bits3.if_else.pop_count = 1;
+ patch_insn->bits3.if_else.pad0 = 0;
+ } else {
+ assert(0);
+ }
+
+ /* Also pop item off the stack in the endif instruction:
+ */
+ insn->bits3.if_else.jump_count = 0;
+ insn->bits3.if_else.pop_count = 1;
+ insn->bits3.if_else.pad0 = 0;
+ }
+}
+
+struct brw_instruction *brw_BREAK(struct brw_compile *p)
+{
+ struct brw_instruction *insn;
+ insn = next_insn(p, BRW_OPCODE_BREAK);
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = BRW_EXECUTE_8;
+ /* insn->header.mask_control = BRW_MASK_DISABLE; */
+ insn->bits3.if_else.pad0 = 0;
+ return insn;
+}
+
+struct brw_instruction *brw_CONT(struct brw_compile *p)
+{
+ struct brw_instruction *insn;
+ insn = next_insn(p, BRW_OPCODE_CONTINUE);
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = BRW_EXECUTE_8;
+ /* insn->header.mask_control = BRW_MASK_DISABLE; */
+ insn->bits3.if_else.pad0 = 0;
+ return insn;
+}
+
+/* DO/WHILE loop:
+ */
+struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
+{
+ if (p->single_program_flow) {
+ return &p->store[p->nr_insn];
+ } else {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
+
+ /* Override the defaults for this instruction:
+ */
+ brw_set_dest(insn, brw_null_reg());
+ brw_set_src0(insn, brw_null_reg());
+ brw_set_src1(insn, brw_null_reg());
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = execute_size;
+ insn->header.predicate_control = BRW_PREDICATE_NONE;
+ /* insn->header.mask_control = BRW_MASK_ENABLE; */
+ /* insn->header.mask_control = BRW_MASK_DISABLE; */
+
+ return insn;
+ }
+}
+
+
+
+struct brw_instruction *brw_WHILE(struct brw_compile *p,
+ struct brw_instruction *do_insn)
+{
+ struct brw_instruction *insn;
+ GLuint br = 1;
+
+ if (BRW_IS_IGDNG(p->brw))
+ br = 2;
+
+ if (p->single_program_flow)
+ insn = next_insn(p, BRW_OPCODE_ADD);
+ else
+ insn = next_insn(p, BRW_OPCODE_WHILE);
+
+ brw_set_dest(insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+
+ if (p->single_program_flow) {
+ insn->header.execution_size = BRW_EXECUTE_1;
+
+ insn->bits3.d = (do_insn - insn) * 16;
+ } else {
+ insn->header.execution_size = do_insn->header.execution_size;
+
+ assert(do_insn->header.opcode == BRW_OPCODE_DO);
+ insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
+ insn->bits3.if_else.pop_count = 0;
+ insn->bits3.if_else.pad0 = 0;
+ }
+
+/* insn->header.mask_control = BRW_MASK_ENABLE; */
+
+ /* insn->header.mask_control = BRW_MASK_DISABLE; */
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
+ return insn;
+}
+
+
+/* FORWARD JUMPS:
+ */
+void brw_land_fwd_jump(struct brw_compile *p,
+ struct brw_instruction *jmp_insn)
+{
+ struct brw_instruction *landing = &p->store[p->nr_insn];
+ GLuint jmpi = 1;
+
+ if (BRW_IS_IGDNG(p->brw))
+ jmpi = 2;
+
+ assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
+ assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
+
+ jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
+}
+
+
+
+/* To integrate with the above, it makes sense that the comparison
+ * instruction should populate the flag register. It might be simpler
+ * just to use the flag reg for most WM tasks?
+ */
+void brw_CMP(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint conditional,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
+
+ insn->header.destreg__conditionalmod = conditional;
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_src1(insn, src1);
+
+/* guess_execution_size(insn, src0); */
+
+
+ /* Make it so that future instructions will use the computed flag
+ * value until brw_set_predicate_control_flag_value() is called
+ * again.
+ */
+ if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+ dest.nr == 0) {
+ p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+ p->flag_value = 0xff;
+ }
+}
+
+
+
+/***********************************************************************
+ * Helpers for the various SEND message types:
+ */
+
+/** Extended math function, float[8].
+ */
+void brw_math( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint function,
+ GLuint saturate,
+ GLuint msg_reg_nr,
+ struct brw_reg src,
+ GLuint data_type,
+ GLuint precision )
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+ GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
+ GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
+
+ /* Example code doesn't set predicate_control for send
+ * instructions.
+ */
+ insn->header.predicate_control = 0;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src);
+ brw_set_math_message(p->brw,
+ insn,
+ msg_length, response_length,
+ function,
+ BRW_MATH_INTEGER_UNSIGNED,
+ precision,
+ saturate,
+ data_type);
+}
+
+/**
+ * Extended math function, float[16].
+ * Use 2 send instructions.
+ */
+void brw_math_16( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint function,
+ GLuint saturate,
+ GLuint msg_reg_nr,
+ struct brw_reg src,
+ GLuint precision )
+{
+ struct brw_instruction *insn;
+ GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
+ GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
+
+ /* First instruction:
+ */
+ brw_push_insn_state(p);
+ brw_set_predicate_control_flag_value(p, 0xff);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ insn = next_insn(p, BRW_OPCODE_SEND);
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src);
+ brw_set_math_message(p->brw,
+ insn,
+ msg_length, response_length,
+ function,
+ BRW_MATH_INTEGER_UNSIGNED,
+ precision,
+ saturate,
+ BRW_MATH_DATA_VECTOR);
+
+ /* Second instruction:
+ */
+ insn = next_insn(p, BRW_OPCODE_SEND);
+ insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
+ insn->header.destreg__conditionalmod = msg_reg_nr+1;
+
+ brw_set_dest(insn, offset(dest,1));
+ brw_set_src0(insn, src);
+ brw_set_math_message(p->brw,
+ insn,
+ msg_length, response_length,
+ function,
+ BRW_MATH_INTEGER_UNSIGNED,
+ precision,
+ saturate,
+ BRW_MATH_DATA_VECTOR);
+
+ brw_pop_insn_state(p);
+}
+
+
+/**
+ * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
+ * Scratch offset should be a multiple of 64.
+ * Used for register spilling.
+ */
+void brw_dp_WRITE_16( struct brw_compile *p,
+ struct brw_reg src,
+ GLuint scratch_offset )
+{
+ GLuint msg_reg_nr = 1;
+ {
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ /* set message header global offset field (reg 0, element 2) */
+ brw_MOV(p,
+ retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
+ brw_imm_d(scratch_offset));
+
+ brw_pop_insn_state(p);
+ }
+
+ {
+ GLuint msg_length = 3;
+ struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = 0; /* XXX */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src);
+
+ brw_set_dp_write_message(p->brw,
+ insn,
+ 255, /* binding table index (255=stateless) */
+ BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
+ BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
+ msg_length,
+ 0, /* pixel scoreboard */
+ 0, /* response_length */
+ 0); /* eot */
+ }
+}
+
+
+/**
+ * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
+ * Scratch offset should be a multiple of 64.
+ * Used for register spilling.
+ */
+void brw_dp_READ_16( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint scratch_offset )
+{
+ GLuint msg_reg_nr = 1;
+ {
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+ /* set message header global offset field (reg 0, element 2) */
+ brw_MOV(p,
+ retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
+ brw_imm_d(scratch_offset));
+
+ brw_pop_insn_state(p);
+ }
+
+ {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = 0; /* XXX */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest); /* UW? */
+ brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
+
+ brw_set_dp_read_message(p->brw,
+ insn,
+ 255, /* binding table index (255=stateless) */
+ 3, /* msg_control (3 means 4 Owords) */
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+ 1, /* target cache (render/scratch) */
+ 1, /* msg_length */
+ 2, /* response_length */
+ 0); /* eot */
+ }
+}
+
+
+/**
+ * Read a float[4] vector from the data port Data Cache (const buffer).
+ * Location (in buffer) should be a multiple of 16.
+ * Used for fetching shader constants.
+ * If relAddr is true, we'll do an indirect fetch using the address register.
+ */
+void brw_dp_READ_4( struct brw_compile *p,
+ struct brw_reg dest,
+ GLboolean relAddr,
+ GLuint location,
+ GLuint bind_table_index )
+{
+ /* XXX: relAddr not implemented */
+ GLuint msg_reg_nr = 1;
+ {
+ struct brw_reg b;
+ brw_push_insn_state(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+ /* Setup MRF[1] with location/offset into const buffer */
+ b = brw_message_reg(msg_reg_nr);
+ b = retype(b, BRW_REGISTER_TYPE_UD);
+ /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
+ * when the docs say only dword[2] should be set. Hmmm. But it works.
+ */
+ brw_MOV(p, b, brw_imm_ud(location));
+ brw_pop_insn_state(p);
+ }
+
+ {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = BRW_PREDICATE_NONE;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+ insn->header.mask_control = BRW_MASK_DISABLE;
+
+ /* cast dest to a uword[8] vector */
+ dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, brw_null_reg());
+
+ brw_set_dp_read_message(p->brw,
+ insn,
+ bind_table_index,
+ 0, /* msg_control (0 means 1 Oword) */
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+ 0, /* source cache = data cache */
+ 1, /* msg_length */
+ 1, /* response_length (1 Oword) */
+ 0); /* eot */
+ }
+}
+
+
+/**
+ * Read float[4] constant(s) from VS constant buffer.
+ * For relative addressing, two float[4] constants will be read into 'dest'.
+ * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
+ */
+void brw_dp_READ_4_vs(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint oword,
+ GLboolean relAddr,
+ struct brw_reg addrReg,
+ GLuint location,
+ GLuint bind_table_index)
+{
+ GLuint msg_reg_nr = 1;
+
+ assert(oword < 2);
+ /*
+ printf("vs const read msg, location %u, msg_reg_nr %d\n",
+ location, msg_reg_nr);
+ */
+
+ /* Setup MRF[1] with location/offset into const buffer */
+ {
+ struct brw_reg b;
+
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ /*brw_set_access_mode(p, BRW_ALIGN_16);*/
+
+ /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
+ * when the docs say only dword[2] should be set. Hmmm. But it works.
+ */
+ b = brw_message_reg(msg_reg_nr);
+ b = retype(b, BRW_REGISTER_TYPE_UD);
+ /*b = get_element_ud(b, 2);*/
+ if (relAddr) {
+ brw_ADD(p, b, addrReg, brw_imm_ud(location));
+ }
+ else {
+ brw_MOV(p, b, brw_imm_ud(location));
+ }
+
+ brw_pop_insn_state(p);
+ }
+
+ {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = BRW_PREDICATE_NONE;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+ insn->header.mask_control = BRW_MASK_DISABLE;
+ /*insn->header.access_mode = BRW_ALIGN_16;*/
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, brw_null_reg());
+
+ brw_set_dp_read_message(p->brw,
+ insn,
+ bind_table_index,
+ oword, /* 0 = lower Oword, 1 = upper Oword */
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+ 0, /* source cache = data cache */
+ 1, /* msg_length */
+ 1, /* response_length (1 Oword) */
+ 0); /* eot */
+ }
+}
+
+
+
+void brw_fb_WRITE(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLuint binding_table_index,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = 0; /* XXX */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_dp_write_message(p->brw,
+ insn,
+ binding_table_index,
+ BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
+ BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
+ msg_length,
+ 1, /* pixel scoreboard */
+ response_length,
+ eot);
+}
+
+
+/**
+ * Texture sample instruction.
+ * Note: the msg_type plus msg_length values determine exactly what kind
+ * of sampling operation is performed. See volume 4, page 161 of docs.
+ */
+void brw_SAMPLE(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLuint binding_table_index,
+ GLuint sampler,
+ GLuint writemask,
+ GLuint msg_type,
+ GLuint response_length,
+ GLuint msg_length,
+ GLboolean eot,
+ GLuint header_present,
+ GLuint simd_mode)
+{
+ GLboolean need_stall = 0;
+
+ if (writemask == 0) {
+ /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
+ return;
+ }
+
+ /* Hardware doesn't do destination dependency checking on send
+ * instructions properly. Add a workaround which generates the
+ * dependency by other means. In practice it seems like this bug
+ * only crops up for texture samples, and only where registers are
+ * written by the send and then written again later without being
+ * read in between. Luckily for us, we already track that
+ * information and use it to modify the writemask for the
+ * instruction, so that is a guide for whether a workaround is
+ * needed.
+ */
+ if (writemask != WRITEMASK_XYZW) {
+ GLuint dst_offset = 0;
+ GLuint i, newmask = 0, len = 0;
+
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1<<i))
+ break;
+ dst_offset += 2;
+ }
+ for (; i < 4; i++) {
+ if (!(writemask & (1<<i)))
+ break;
+ newmask |= 1<<i;
+ len++;
+ }
+
+ if (newmask != writemask) {
+ need_stall = 1;
+ /* _mesa_printf("need stall %x %x\n", newmask , writemask); */
+ }
+ else {
+ struct brw_reg m1 = brw_message_reg(msg_reg_nr);
+
+ newmask = ~newmask & WRITEMASK_XYZW;
+
+ brw_push_insn_state(p);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+ brw_MOV(p, m1, brw_vec8_grf(0,0));
+ brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
+
+ brw_pop_insn_state(p);
+
+ src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
+ dest = offset(dest, dst_offset);
+ response_length = len * 2;
+ }
+ }
+
+ {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = 0; /* XXX */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_sampler_message(p->brw, insn,
+ binding_table_index,
+ sampler,
+ msg_type,
+ response_length,
+ msg_length,
+ eot,
+ header_present,
+ simd_mode);
+ }
+
+ if (need_stall) {
+ struct brw_reg reg = vec8(offset(dest, response_length-1));
+
+ /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 }
+ */
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p, reg, reg);
+ brw_pop_insn_state(p);
+ }
+
+}
+
+/* All these variables are pretty confusing - we might be better off
+ * using bitmasks and macros for this, in the old style. Or perhaps
+ * just having the caller instantiate the fields in dword3 itself.
+ */
+void brw_urb_WRITE(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot,
+ GLboolean writes_complete,
+ GLuint offset,
+ GLuint swizzle)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ assert(msg_length < BRW_MAX_MRF);
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_urb_message(p->brw,
+ insn,
+ allocate,
+ used,
+ msg_length,
+ response_length,
+ eot,
+ writes_complete,
+ offset,
+ swizzle);
+}
+
+void brw_ff_sync(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLboolean allocate,
+ GLboolean used,
+ GLuint msg_length,
+ GLuint response_length,
+ GLboolean eot,
+ GLboolean writes_complete,
+ GLuint offset,
+ GLuint swizzle)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ assert(msg_length < 16);
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src0);
+ brw_set_src1(insn, brw_imm_d(0));
+
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ brw_set_ff_sync_message(p->brw,
+ insn,
+ allocate,
+ used,
+ msg_length,
+ response_length,
+ eot,
+ writes_complete,
+ offset,
+ swizzle);
+}
diff --git a/src/gallium/drivers/i965/brw_eu_util.c b/src/gallium/drivers/i965/brw_eu_util.c
new file mode 100644
index 00000000000..5405cf17a4e
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_eu_util.c
@@ -0,0 +1,126 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+
+void brw_math_invert( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ brw_math( p,
+ dst,
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 0,
+ src,
+ BRW_MATH_PRECISION_FULL,
+ BRW_MATH_DATA_VECTOR );
+}
+
+
+
+void brw_copy4(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ GLuint count)
+{
+ GLuint i;
+
+ dst = vec4(dst);
+ src = vec4(src);
+
+ for (i = 0; i < count; i++)
+ {
+ GLuint delta = i*32;
+ brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta));
+ brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16));
+ }
+}
+
+
+void brw_copy8(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ GLuint count)
+{
+ GLuint i;
+
+ dst = vec8(dst);
+ src = vec8(src);
+
+ for (i = 0; i < count; i++)
+ {
+ GLuint delta = i*32;
+ brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta));
+ }
+}
+
+
+void brw_copy_indirect_to_indirect(struct brw_compile *p,
+ struct brw_indirect dst_ptr,
+ struct brw_indirect src_ptr,
+ GLuint count)
+{
+ GLuint i;
+
+ for (i = 0; i < count; i++)
+ {
+ GLuint delta = i*32;
+ brw_MOV(p, deref_4f(dst_ptr, delta), deref_4f(src_ptr, delta));
+ brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16));
+ }
+}
+
+
+void brw_copy_from_indirect(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_indirect ptr,
+ GLuint count)
+{
+ GLuint i;
+
+ dst = vec4(dst);
+
+ for (i = 0; i < count; i++)
+ {
+ GLuint delta = i*32;
+ brw_MOV(p, byte_offset(dst, delta), deref_4f(ptr, delta));
+ brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16));
+ }
+}
+
+
+
+
diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c
new file mode 100644
index 00000000000..48c2b9a41ce
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_gs.c
@@ -0,0 +1,201 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_state.h"
+#include "brw_gs.h"
+
+
+
+static void compile_gs_prog( struct brw_context *brw,
+ struct brw_gs_prog_key *key )
+{
+ struct brw_gs_compile c;
+ const GLuint *program;
+ GLuint program_size;
+
+ memset(&c, 0, sizeof(c));
+
+ c.key = *key;
+ c.need_ff_sync = BRW_IS_IGDNG(brw);
+ /* Need to locate the two positions present in vertex + header.
+ * These are currently hardcoded:
+ */
+ c.nr_attrs = brw_count_bits(c.key.attrs);
+
+ if (BRW_IS_IGDNG(brw))
+ c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */
+ else
+ c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
+
+ c.nr_bytes = c.nr_regs * REG_SIZE;
+
+
+ /* Begin the compilation:
+ */
+ brw_init_compile(brw, &c.func);
+
+ c.func.single_program_flow = 1;
+
+ /* For some reason the thread is spawned with only 4 channels
+ * unmasked.
+ */
+ brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
+
+
+ /* Note that primitives which don't require a GS program have
+ * already been weeded out by this stage:
+ */
+ switch (key->primitive) {
+ case GL_QUADS:
+ brw_gs_quads( &c );
+ break;
+ case GL_QUAD_STRIP:
+ brw_gs_quad_strip( &c );
+ break;
+ case GL_LINE_LOOP:
+ brw_gs_lines( &c );
+ break;
+ case GL_LINES:
+ if (key->hint_gs_always)
+ brw_gs_lines( &c );
+ else {
+ return;
+ }
+ break;
+ case GL_TRIANGLES:
+ if (key->hint_gs_always)
+ brw_gs_tris( &c );
+ else {
+ return;
+ }
+ break;
+ case GL_POINTS:
+ if (key->hint_gs_always)
+ brw_gs_points( &c );
+ else {
+ return;
+ }
+ break;
+ default:
+ return;
+ }
+
+ /* get the program
+ */
+ program = brw_get_program(&c.func, &program_size);
+
+ /* Upload
+ */
+ dri_bo_unreference(brw->gs.prog_bo);
+ brw->gs.prog_bo = brw_upload_cache( &brw->cache, BRW_GS_PROG,
+ &c.key, sizeof(c.key),
+ NULL, 0,
+ program, program_size,
+ &c.prog_data,
+ &brw->gs.prog_data );
+}
+
+static const GLenum gs_prim[GL_POLYGON+1] = {
+ GL_POINTS,
+ GL_LINES,
+ GL_LINE_LOOP,
+ GL_LINES,
+ GL_TRIANGLES,
+ GL_TRIANGLES,
+ GL_TRIANGLES,
+ GL_QUADS,
+ GL_QUAD_STRIP,
+ GL_TRIANGLES
+};
+
+static void populate_key( struct brw_context *brw,
+ struct brw_gs_prog_key *key )
+{
+ memset(key, 0, sizeof(*key));
+
+ /* CACHE_NEW_VS_PROG */
+ key->attrs = brw->vs.prog_data->outputs_written;
+
+ /* BRW_NEW_PRIMITIVE */
+ key->primitive = gs_prim[brw->primitive];
+
+ key->hint_gs_always = 0; /* debug code? */
+
+ key->need_gs_prog = (key->hint_gs_always ||
+ brw->primitive == GL_QUADS ||
+ brw->primitive == GL_QUAD_STRIP ||
+ brw->primitive == GL_LINE_LOOP);
+}
+
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static void prepare_gs_prog(struct brw_context *brw)
+{
+ struct brw_gs_prog_key key;
+ /* Populate the key:
+ */
+ populate_key(brw, &key);
+
+ if (brw->gs.prog_active != key.need_gs_prog) {
+ brw->state.dirty.cache |= CACHE_NEW_GS_PROG;
+ brw->gs.prog_active = key.need_gs_prog;
+ }
+
+ if (brw->gs.prog_active) {
+ dri_bo_unreference(brw->gs.prog_bo);
+ brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG,
+ &key, sizeof(key),
+ NULL, 0,
+ &brw->gs.prog_data);
+ if (brw->gs.prog_bo == NULL)
+ compile_gs_prog( brw, &key );
+ }
+}
+
+
+const struct brw_tracked_state brw_gs_prog = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_PRIMITIVE,
+ .cache = CACHE_NEW_VS_PROG
+ },
+ .prepare = prepare_gs_prog
+};
diff --git a/src/gallium/drivers/i965/brw_gs.h b/src/gallium/drivers/i965/brw_gs.h
new file mode 100644
index 00000000000..bbb991ea2e5
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_gs.h
@@ -0,0 +1,76 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#ifndef BRW_GS_H
+#define BRW_GS_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+#define MAX_GS_VERTS (4)
+
+struct brw_gs_prog_key {
+ GLuint attrs:32;
+ GLuint primitive:4;
+ GLuint hint_gs_always:1;
+ GLuint need_gs_prog:1;
+ GLuint pad:26;
+};
+
+struct brw_gs_compile {
+ struct brw_compile func;
+ struct brw_gs_prog_key key;
+ struct brw_gs_prog_data prog_data;
+
+ struct {
+ struct brw_reg R0;
+ struct brw_reg vertex[MAX_GS_VERTS];
+ } reg;
+
+ /* 3 different ways of expressing vertex size:
+ */
+ GLuint nr_attrs;
+ GLuint nr_regs;
+ GLuint nr_bytes;
+ GLboolean need_ff_sync;
+};
+
+#define ATTR_SIZE (4*4)
+
+void brw_gs_quads( struct brw_gs_compile *c );
+void brw_gs_quad_strip( struct brw_gs_compile *c );
+void brw_gs_tris( struct brw_gs_compile *c );
+void brw_gs_lines( struct brw_gs_compile *c );
+void brw_gs_points( struct brw_gs_compile *c );
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_gs_emit.c b/src/gallium/drivers/i965/brw_gs_emit.c
new file mode 100644
index 00000000000..a9b2aa2eace
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_gs_emit.c
@@ -0,0 +1,186 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+
+#include "shader/program.h"
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_gs.h"
+
+static void brw_gs_alloc_regs( struct brw_gs_compile *c,
+ GLuint nr_verts )
+{
+ GLuint i = 0,j;
+
+ /* Register usage is static, precompute here:
+ */
+ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+
+ /* Payload vertices plus space for more generated vertices:
+ */
+ for (j = 0; j < nr_verts; j++) {
+ c->reg.vertex[j] = brw_vec4_grf(i, 0);
+ i += c->nr_regs;
+ }
+
+ c->prog_data.urb_read_length = c->nr_regs;
+ c->prog_data.total_grf = i;
+}
+
+
+static void brw_gs_emit_vue(struct brw_gs_compile *c,
+ struct brw_reg vert,
+ GLboolean last,
+ GLuint header)
+{
+ struct brw_compile *p = &c->func;
+ GLboolean allocate = !last;
+
+ /* Overwrite PrimType and PrimStart in the message header, for
+ * each vertex in turn:
+ */
+ brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header));
+
+ /* Copy the vertex from vertn into m1..mN+1:
+ */
+ brw_copy8(p, brw_message_reg(1), vert, c->nr_regs);
+
+ /* Send each vertex as a seperate write to the urb. This is
+ * different to the concept in brw_sf_emit.c, where subsequent
+ * writes are used to build up a single urb entry. Each of these
+ * writes instantiates a seperate urb entry, and a new one must be
+ * allocated each time.
+ */
+ brw_urb_WRITE(p,
+ allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+ 0,
+ c->reg.R0,
+ allocate,
+ 1, /* used */
+ c->nr_regs + 1, /* msg length */
+ allocate ? 1 : 0, /* response length */
+ allocate ? 0 : 1, /* eot */
+ 1, /* writes_complete */
+ 0, /* urb offset */
+ BRW_URB_SWIZZLE_NONE);
+}
+
+static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim)
+{
+ struct brw_compile *p = &c->func;
+ brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim));
+ brw_ff_sync(p,
+ c->reg.R0,
+ 0,
+ c->reg.R0,
+ 1,
+ 1, /* used */
+ 1, /* msg length */
+ 1, /* response length */
+ 0, /* eot */
+ 1, /* write compelete */
+ 0, /* urb offset */
+ BRW_URB_SWIZZLE_NONE);
+}
+
+
+void brw_gs_quads( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 4);
+
+ /* Use polygons for correct edgeflag behaviour. Note that vertex 3
+ * is the PV for quads, but vertex 0 for polygons:
+ */
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
+ brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+}
+
+void brw_gs_quad_strip( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 4);
+
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
+ brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+}
+
+void brw_gs_tris( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 3);
+
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2));
+ brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END));
+}
+
+void brw_gs_lines( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 2);
+
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
+ brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START));
+ brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END));
+}
+
+void brw_gs_points( struct brw_gs_compile *c )
+{
+ brw_gs_alloc_regs(c, 1);
+
+ if (c->need_ff_sync)
+ brw_gs_ff_sync(c, 1);
+ brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END));
+}
+
+
+
+
+
+
+
+
diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c
new file mode 100644
index 00000000000..ed9d2ffe605
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_gs_state.c
@@ -0,0 +1,149 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "main/macros.h"
+
+struct brw_gs_unit_key {
+ unsigned int total_grf;
+ unsigned int urb_entry_read_length;
+
+ unsigned int curbe_offset;
+
+ unsigned int nr_urb_entries, urb_size;
+ GLboolean prog_active;
+};
+
+static void
+gs_unit_populate_key(struct brw_context *brw, struct brw_gs_unit_key *key)
+{
+ memset(key, 0, sizeof(*key));
+
+ /* CACHE_NEW_GS_PROG */
+ key->prog_active = brw->gs.prog_active;
+ if (key->prog_active) {
+ key->total_grf = brw->gs.prog_data->total_grf;
+ key->urb_entry_read_length = brw->gs.prog_data->urb_read_length;
+ } else {
+ key->total_grf = 1;
+ key->urb_entry_read_length = 1;
+ }
+
+ /* BRW_NEW_CURBE_OFFSETS */
+ key->curbe_offset = brw->curbe.clip_start;
+
+ /* BRW_NEW_URB_FENCE */
+ key->nr_urb_entries = brw->urb.nr_gs_entries;
+ key->urb_size = brw->urb.vsize;
+}
+
+static dri_bo *
+gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
+{
+ struct brw_gs_unit_state gs;
+ dri_bo *bo;
+
+ memset(&gs, 0, sizeof(gs));
+
+ gs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+ if (key->prog_active) /* reloc */
+ gs.thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6;
+
+ gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ gs.thread1.single_program_flow = 1;
+
+ gs.thread3.dispatch_grf_start_reg = 1;
+ gs.thread3.const_urb_entry_read_offset = 0;
+ gs.thread3.const_urb_entry_read_length = 0;
+ gs.thread3.urb_entry_read_offset = 0;
+ gs.thread3.urb_entry_read_length = key->urb_entry_read_length;
+
+ gs.thread4.nr_urb_entries = key->nr_urb_entries;
+ gs.thread4.urb_entry_allocation_size = key->urb_size - 1;
+
+ if (key->nr_urb_entries >= 8)
+ gs.thread4.max_threads = 1;
+ else
+ gs.thread4.max_threads = 0;
+
+ if (BRW_IS_IGDNG(brw))
+ gs.thread4.rendering_enable = 1;
+
+ if (INTEL_DEBUG & DEBUG_STATS)
+ gs.thread4.stats_enable = 1;
+
+ bo = brw_upload_cache(&brw->cache, BRW_GS_UNIT,
+ key, sizeof(*key),
+ &brw->gs.prog_bo, 1,
+ &gs, sizeof(gs),
+ NULL, NULL);
+
+ if (key->prog_active) {
+ /* Emit GS program relocation */
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ gs.thread0.grf_reg_count << 1,
+ offsetof(struct brw_gs_unit_state, thread0),
+ brw->gs.prog_bo);
+ }
+
+ return bo;
+}
+
+static void prepare_gs_unit(struct brw_context *brw)
+{
+ struct brw_gs_unit_key key;
+
+ gs_unit_populate_key(brw, &key);
+
+ dri_bo_unreference(brw->gs.state_bo);
+ brw->gs.state_bo = brw_search_cache(&brw->cache, BRW_GS_UNIT,
+ &key, sizeof(key),
+ &brw->gs.prog_bo, 1,
+ NULL);
+ if (brw->gs.state_bo == NULL) {
+ brw->gs.state_bo = gs_unit_create_from_key(brw, &key);
+ }
+}
+
+const struct brw_tracked_state brw_gs_unit = {
+ .dirty = {
+ .mesa = 0,
+ .brw = (BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_URB_FENCE),
+ .cache = CACHE_NEW_GS_PROG
+ },
+ .prepare = prepare_gs_unit,
+};
diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c
new file mode 100644
index 00000000000..ea718575484
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_misc_state.c
@@ -0,0 +1,545 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+
+#include "intel_batchbuffer.h"
+#include "intel_regions.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+
+
+
+
+/***********************************************************************
+ * Blend color
+ */
+
+static void upload_blend_constant_color(struct brw_context *brw)
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ struct brw_blend_constant_color bcc;
+
+ memset(&bcc, 0, sizeof(bcc));
+ bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR;
+ bcc.header.length = sizeof(bcc)/4-2;
+ bcc.blend_constant_color[0] = ctx->Color.BlendColor[0];
+ bcc.blend_constant_color[1] = ctx->Color.BlendColor[1];
+ bcc.blend_constant_color[2] = ctx->Color.BlendColor[2];
+ bcc.blend_constant_color[3] = ctx->Color.BlendColor[3];
+
+ BRW_CACHED_BATCH_STRUCT(brw, &bcc);
+}
+
+
+const struct brw_tracked_state brw_blend_constant_color = {
+ .dirty = {
+ .mesa = _NEW_COLOR,
+ .brw = 0,
+ .cache = 0
+ },
+ .emit = upload_blend_constant_color
+};
+
+/* Constant single cliprect for framebuffer object or DRI2 drawing */
+static void upload_drawing_rect(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ GLcontext *ctx = &intel->ctx;
+
+ if (!intel->constant_cliprect)
+ return;
+
+ BEGIN_BATCH(4, NO_LOOP_CLIPRECTS);
+ OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965);
+ OUT_BATCH(0); /* xmin, ymin */
+ OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
+ ((ctx->DrawBuffer->Height - 1) << 16));
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state brw_drawing_rect = {
+ .dirty = {
+ .mesa = _NEW_BUFFERS,
+ .brw = 0,
+ .cache = 0
+ },
+ .emit = upload_drawing_rect
+};
+
+static void prepare_binding_table_pointers(struct brw_context *brw)
+{
+ brw_add_validated_bo(brw, brw->vs.bind_bo);
+ brw_add_validated_bo(brw, brw->wm.bind_bo);
+}
+
+/**
+ * Upload the binding table pointers, which point each stage's array of surface
+ * state pointers.
+ *
+ * The binding table pointers are relative to the surface state base address,
+ * which is 0.
+ */
+static void upload_binding_table_pointers(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+
+ BEGIN_BATCH(6, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
+ if (brw->vs.bind_bo != NULL)
+ OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */
+ else
+ OUT_BATCH(0);
+ OUT_BATCH(0); /* gs */
+ OUT_BATCH(0); /* clip */
+ OUT_BATCH(0); /* sf */
+ OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */
+ ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state brw_binding_table_pointers = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_BATCH,
+ .cache = CACHE_NEW_SURF_BIND,
+ },
+ .prepare = prepare_binding_table_pointers,
+ .emit = upload_binding_table_pointers,
+};
+
+
+/**
+ * Upload pointers to the per-stage state.
+ *
+ * The state pointers in this packet are all relative to the general state
+ * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
+ */
+static void upload_pipelined_state_pointers(struct brw_context *brw )
+{
+ struct intel_context *intel = &brw->intel;
+
+ BEGIN_BATCH(7, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2));
+ OUT_RELOC(brw->vs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ if (brw->gs.prog_active)
+ OUT_RELOC(brw->gs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+ else
+ OUT_BATCH(0);
+ OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+ OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ OUT_RELOC(brw->wm.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ ADVANCE_BATCH();
+
+ brw->state.dirty.brw |= BRW_NEW_PSP;
+}
+
+
+static void prepare_psp_urb_cbs(struct brw_context *brw)
+{
+ brw_add_validated_bo(brw, brw->vs.state_bo);
+ brw_add_validated_bo(brw, brw->gs.state_bo);
+ brw_add_validated_bo(brw, brw->clip.state_bo);
+ brw_add_validated_bo(brw, brw->sf.state_bo);
+ brw_add_validated_bo(brw, brw->wm.state_bo);
+ brw_add_validated_bo(brw, brw->cc.state_bo);
+}
+
+static void upload_psp_urb_cbs(struct brw_context *brw )
+{
+ upload_pipelined_state_pointers(brw);
+ brw_upload_urb_fence(brw);
+ brw_upload_cs_urb_state(brw);
+}
+
+const struct brw_tracked_state brw_psp_urb_cbs = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_URB_FENCE | BRW_NEW_BATCH,
+ .cache = (CACHE_NEW_VS_UNIT |
+ CACHE_NEW_GS_UNIT |
+ CACHE_NEW_GS_PROG |
+ CACHE_NEW_CLIP_UNIT |
+ CACHE_NEW_SF_UNIT |
+ CACHE_NEW_WM_UNIT |
+ CACHE_NEW_CC_UNIT)
+ },
+ .prepare = prepare_psp_urb_cbs,
+ .emit = upload_psp_urb_cbs,
+};
+
+static void prepare_depthbuffer(struct brw_context *brw)
+{
+ struct intel_region *region = brw->state.depth_region;
+
+ if (region != NULL)
+ brw_add_validated_bo(brw, region->buffer);
+}
+
+static void emit_depthbuffer(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ struct intel_region *region = brw->state.depth_region;
+ unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5;
+
+ if (region == NULL) {
+ BEGIN_BATCH(len, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
+ OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
+ (BRW_SURFACE_NULL << 29));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+
+ if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ OUT_BATCH(0);
+
+ ADVANCE_BATCH();
+ } else {
+ unsigned int format;
+
+ switch (region->cpp) {
+ case 2:
+ format = BRW_DEPTHFORMAT_D16_UNORM;
+ break;
+ case 4:
+ if (intel->depth_buffer_is_float)
+ format = BRW_DEPTHFORMAT_D32_FLOAT;
+ else
+ format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ assert(region->tiling != I915_TILING_X);
+
+ BEGIN_BATCH(len, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
+ OUT_BATCH(((region->pitch * region->cpp) - 1) |
+ (format << 18) |
+ (BRW_TILEWALK_YMAJOR << 26) |
+ ((region->tiling != I915_TILING_NONE) << 27) |
+ (BRW_SURFACE_2D << 29));
+ OUT_RELOC(region->buffer,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ 0);
+ OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
+ ((region->pitch - 1) << 6) |
+ ((region->height - 1) << 19));
+ OUT_BATCH(0);
+
+ if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+ OUT_BATCH(0);
+
+ ADVANCE_BATCH();
+ }
+}
+
+const struct brw_tracked_state brw_depthbuffer = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_DEPTH_BUFFER | BRW_NEW_BATCH,
+ .cache = 0,
+ },
+ .prepare = prepare_depthbuffer,
+ .emit = emit_depthbuffer,
+};
+
+
+
+/***********************************************************************
+ * Polygon stipple packet
+ */
+
+static void upload_polygon_stipple(struct brw_context *brw)
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ struct brw_polygon_stipple bps;
+ GLuint i;
+
+ memset(&bps, 0, sizeof(bps));
+ bps.header.opcode = CMD_POLY_STIPPLE_PATTERN;
+ bps.header.length = sizeof(bps)/4-2;
+
+ /* Polygon stipple is provided in OpenGL order, i.e. bottom
+ * row first. If we're rendering to a window (i.e. the
+ * default frame buffer object, 0), then we need to invert
+ * it to match our pixel layout. But if we're rendering
+ * to a FBO (i.e. any named frame buffer object), we *don't*
+ * need to invert - we already match the layout.
+ */
+ if (ctx->DrawBuffer->Name == 0) {
+ for (i = 0; i < 32; i++)
+ bps.stipple[i] = ctx->PolygonStipple[31 - i]; /* invert */
+ }
+ else {
+ for (i = 0; i < 32; i++)
+ bps.stipple[i] = ctx->PolygonStipple[i]; /* don't invert */
+ }
+
+ BRW_CACHED_BATCH_STRUCT(brw, &bps);
+}
+
+const struct brw_tracked_state brw_polygon_stipple = {
+ .dirty = {
+ .mesa = _NEW_POLYGONSTIPPLE,
+ .brw = 0,
+ .cache = 0
+ },
+ .emit = upload_polygon_stipple
+};
+
+
+/***********************************************************************
+ * Polygon stipple offset packet
+ */
+
+static void upload_polygon_stipple_offset(struct brw_context *brw)
+{
+ __DRIdrawablePrivate *dPriv = brw->intel.driDrawable;
+ struct brw_polygon_stipple_offset bpso;
+
+ memset(&bpso, 0, sizeof(bpso));
+ bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET;
+ bpso.header.length = sizeof(bpso)/4-2;
+
+ /* If we're drawing to a system window (ctx->DrawBuffer->Name == 0),
+ * we have to invert the Y axis in order to match the OpenGL
+ * pixel coordinate system, and our offset must be matched
+ * to the window position. If we're drawing to a FBO
+ * (ctx->DrawBuffer->Name != 0), then our native pixel coordinate
+ * system works just fine, and there's no window system to
+ * worry about.
+ */
+ if (brw->intel.ctx.DrawBuffer->Name == 0) {
+ bpso.bits0.x_offset = (32 - (dPriv->x & 31)) & 31;
+ bpso.bits0.y_offset = (32 - ((dPriv->y + dPriv->h) & 31)) & 31;
+ }
+ else {
+ bpso.bits0.y_offset = 0;
+ bpso.bits0.x_offset = 0;
+ }
+
+ BRW_CACHED_BATCH_STRUCT(brw, &bpso);
+}
+
+#define _NEW_WINDOW_POS 0x40000000
+
+const struct brw_tracked_state brw_polygon_stipple_offset = {
+ .dirty = {
+ .mesa = _NEW_WINDOW_POS,
+ .brw = 0,
+ .cache = 0
+ },
+ .emit = upload_polygon_stipple_offset
+};
+
+/**********************************************************************
+ * AA Line parameters
+ */
+static void upload_aa_line_parameters(struct brw_context *brw)
+{
+ struct brw_aa_line_parameters balp;
+
+ if (BRW_IS_965(brw))
+ return;
+
+ /* use legacy aa line coverage computation */
+ memset(&balp, 0, sizeof(balp));
+ balp.header.opcode = CMD_AA_LINE_PARAMETERS;
+ balp.header.length = sizeof(balp) / 4 - 2;
+
+ BRW_CACHED_BATCH_STRUCT(brw, &balp);
+}
+
+const struct brw_tracked_state brw_aa_line_parameters = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_CONTEXT,
+ .cache = 0
+ },
+ .emit = upload_aa_line_parameters
+};
+
+/***********************************************************************
+ * Line stipple packet
+ */
+
+static void upload_line_stipple(struct brw_context *brw)
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ struct brw_line_stipple bls;
+ GLfloat tmp;
+ GLint tmpi;
+
+ memset(&bls, 0, sizeof(bls));
+ bls.header.opcode = CMD_LINE_STIPPLE_PATTERN;
+ bls.header.length = sizeof(bls)/4 - 2;
+
+ bls.bits0.pattern = ctx->Line.StipplePattern;
+ bls.bits1.repeat_count = ctx->Line.StippleFactor;
+
+ tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
+ tmpi = tmp * (1<<13);
+
+
+ bls.bits1.inverse_repeat_count = tmpi;
+
+ BRW_CACHED_BATCH_STRUCT(brw, &bls);
+}
+
+const struct brw_tracked_state brw_line_stipple = {
+ .dirty = {
+ .mesa = _NEW_LINE,
+ .brw = 0,
+ .cache = 0
+ },
+ .emit = upload_line_stipple
+};
+
+
+/***********************************************************************
+ * Misc invarient state packets
+ */
+
+static void upload_invarient_state( struct brw_context *brw )
+{
+ {
+ /* 0x61040000 Pipeline Select */
+ /* PipelineSelect : 0 */
+ struct brw_pipeline_select ps;
+
+ memset(&ps, 0, sizeof(ps));
+ ps.header.opcode = CMD_PIPELINE_SELECT(brw);
+ ps.header.pipeline_select = 0;
+ BRW_BATCH_STRUCT(brw, &ps);
+ }
+
+ {
+ struct brw_global_depth_offset_clamp gdo;
+ memset(&gdo, 0, sizeof(gdo));
+
+ /* Disable depth offset clamping.
+ */
+ gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP;
+ gdo.header.length = sizeof(gdo)/4 - 2;
+ gdo.depth_offset_clamp = 0.0;
+
+ BRW_BATCH_STRUCT(brw, &gdo);
+ }
+
+
+ /* 0x61020000 State Instruction Pointer */
+ {
+ struct brw_system_instruction_pointer sip;
+ memset(&sip, 0, sizeof(sip));
+
+ sip.header.opcode = CMD_STATE_INSN_POINTER;
+ sip.header.length = 0;
+ sip.bits0.pad = 0;
+ sip.bits0.system_instruction_pointer = 0;
+ BRW_BATCH_STRUCT(brw, &sip);
+ }
+
+
+ {
+ struct brw_vf_statistics vfs;
+ memset(&vfs, 0, sizeof(vfs));
+
+ vfs.opcode = CMD_VF_STATISTICS(brw);
+ if (INTEL_DEBUG & DEBUG_STATS)
+ vfs.statistics_enable = 1;
+
+ BRW_BATCH_STRUCT(brw, &vfs);
+ }
+}
+
+const struct brw_tracked_state brw_invarient_state = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_CONTEXT,
+ .cache = 0
+ },
+ .emit = upload_invarient_state
+};
+
+/**
+ * Define the base addresses which some state is referenced from.
+ *
+ * This allows us to avoid having to emit relocations in many places for
+ * cached state, and instead emit pointers inside of large, mostly-static
+ * state pools. This comes at the expense of memory, and more expensive cache
+ * misses.
+ */
+static void upload_state_base_address( struct brw_context *brw )
+{
+ struct intel_context *intel = &brw->intel;
+
+ /* Output the structure (brw_state_base_address) directly to the
+ * batchbuffer, so we can emit relocations inline.
+ */
+ if (BRW_IS_IGDNG(brw)) {
+ BEGIN_BATCH(8, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
+ OUT_BATCH(1); /* General state base address */
+ OUT_BATCH(1); /* Surface state base address */
+ OUT_BATCH(1); /* Indirect object base address */
+ OUT_BATCH(1); /* Instruction base address */
+ OUT_BATCH(1); /* General state upper bound */
+ OUT_BATCH(1); /* Indirect object upper bound */
+ OUT_BATCH(1); /* Instruction access upper bound */
+ ADVANCE_BATCH();
+ } else {
+ BEGIN_BATCH(6, IGNORE_CLIPRECTS);
+ OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
+ OUT_BATCH(1); /* General state base address */
+ OUT_BATCH(1); /* Surface state base address */
+ OUT_BATCH(1); /* Indirect object base address */
+ OUT_BATCH(1); /* General state upper bound */
+ OUT_BATCH(1); /* Indirect object upper bound */
+ ADVANCE_BATCH();
+ }
+}
+
+const struct brw_tracked_state brw_state_base_address = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_CONTEXT,
+ .cache = 0,
+ },
+ .emit = upload_state_base_address
+};
diff --git a/src/gallium/drivers/i965/brw_program.c b/src/gallium/drivers/i965/brw_program.c
new file mode 100644
index 00000000000..bac69187c19
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_program.c
@@ -0,0 +1,166 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+#include "main/imports.h"
+#include "main/enums.h"
+#include "shader/prog_parameter.h"
+#include "shader/program.h"
+#include "shader/programopt.h"
+#include "tnl/tnl.h"
+
+#include "brw_context.h"
+#include "brw_util.h"
+#include "brw_wm.h"
+
+static void brwBindProgram( GLcontext *ctx,
+ GLenum target,
+ struct gl_program *prog )
+{
+ struct brw_context *brw = brw_context(ctx);
+
+ switch (target) {
+ case GL_VERTEX_PROGRAM_ARB:
+ brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
+ break;
+ case GL_FRAGMENT_PROGRAM_ARB:
+ brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
+ break;
+ }
+}
+
+static struct gl_program *brwNewProgram( GLcontext *ctx,
+ GLenum target,
+ GLuint id )
+{
+ struct brw_context *brw = brw_context(ctx);
+
+ switch (target) {
+ case GL_VERTEX_PROGRAM_ARB: {
+ struct brw_vertex_program *prog = CALLOC_STRUCT(brw_vertex_program);
+ if (prog) {
+ prog->id = brw->program_id++;
+
+ return _mesa_init_vertex_program( ctx, &prog->program,
+ target, id );
+ }
+ else
+ return NULL;
+ }
+
+ case GL_FRAGMENT_PROGRAM_ARB: {
+ struct brw_fragment_program *prog = CALLOC_STRUCT(brw_fragment_program);
+ if (prog) {
+ prog->id = brw->program_id++;
+
+ return _mesa_init_fragment_program( ctx, &prog->program,
+ target, id );
+ }
+ else
+ return NULL;
+ }
+
+ default:
+ return _mesa_new_program(ctx, target, id);
+ }
+}
+
+static void brwDeleteProgram( GLcontext *ctx,
+ struct gl_program *prog )
+{
+ if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+ struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
+ struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog);
+ dri_bo_unreference(brw_fprog->const_buffer);
+ }
+
+ _mesa_delete_program( ctx, prog );
+}
+
+
+static GLboolean brwIsProgramNative( GLcontext *ctx,
+ GLenum target,
+ struct gl_program *prog )
+{
+ return GL_TRUE;
+}
+
+static void brwProgramStringNotify( GLcontext *ctx,
+ GLenum target,
+ struct gl_program *prog )
+{
+ struct brw_context *brw = brw_context(ctx);
+
+ if (target == GL_FRAGMENT_PROGRAM_ARB) {
+ struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
+ struct brw_fragment_program *newFP = brw_fragment_program(fprog);
+ const struct brw_fragment_program *curFP =
+ brw_fragment_program_const(brw->fragment_program);
+
+ if (fprog->FogOption) {
+ _mesa_append_fog_code(ctx, fprog);
+ fprog->FogOption = GL_NONE;
+ }
+
+ if (newFP == curFP)
+ brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
+ newFP->id = brw->program_id++;
+ newFP->isGLSL = brw_wm_is_glsl(fprog);
+ }
+ else if (target == GL_VERTEX_PROGRAM_ARB) {
+ struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog;
+ struct brw_vertex_program *newVP = brw_vertex_program(vprog);
+ const struct brw_vertex_program *curVP =
+ brw_vertex_program_const(brw->vertex_program);
+
+ if (newVP == curVP)
+ brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
+ if (newVP->program.IsPositionInvariant) {
+ _mesa_insert_mvp_code(ctx, &newVP->program);
+ }
+ newVP->id = brw->program_id++;
+
+ /* Also tell tnl about it:
+ */
+ _tnl_program_string(ctx, target, prog);
+ }
+}
+
+void brwInitFragProgFuncs( struct dd_function_table *functions )
+{
+ assert(functions->ProgramStringNotify == _tnl_program_string);
+
+ functions->BindProgram = brwBindProgram;
+ functions->NewProgram = brwNewProgram;
+ functions->DeleteProgram = brwDeleteProgram;
+ functions->IsProgramNative = brwIsProgramNative;
+ functions->ProgramStringNotify = brwProgramStringNotify;
+}
+
diff --git a/src/gallium/drivers/i965/brw_queryobj.c b/src/gallium/drivers/i965/brw_queryobj.c
new file mode 100644
index 00000000000..a195bc32b07
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_queryobj.c
@@ -0,0 +1,254 @@
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <[email protected]>
+ *
+ */
+
+/** @file support for ARB_query_object
+ *
+ * ARB_query_object is implemented by using the PIPE_CONTROL command to stall
+ * execution on the completion of previous depth tests, and write the
+ * current PS_DEPTH_COUNT to a buffer object.
+ *
+ * We use before and after counts when drawing during a query so that
+ * we don't pick up other clients' query data in ours. To reduce overhead,
+ * a single BO is used to record the query data for all active queries at
+ * once. This also gives us a simple bound on how much batchbuffer space is
+ * required for handling queries, so that we can be sure that we won't
+ * have to emit a batchbuffer without getting the ending PS_DEPTH_COUNT.
+ */
+#include "main/simple_list.h"
+#include "main/imports.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "intel_batchbuffer.h"
+#include "intel_reg.h"
+
+/** Waits on the query object's BO and totals the results for this query */
+static void
+brw_queryobj_get_results(struct brw_query_object *query)
+{
+ int i;
+ uint64_t *results;
+
+ if (query->bo == NULL)
+ return;
+
+ /* Map and count the pixels from the current query BO */
+ dri_bo_map(query->bo, GL_FALSE);
+ results = query->bo->virtual;
+ for (i = query->first_index; i <= query->last_index; i++) {
+ query->Base.Result += results[i * 2 + 1] - results[i * 2];
+ }
+ dri_bo_unmap(query->bo);
+
+ dri_bo_unreference(query->bo);
+ query->bo = NULL;
+}
+
+static struct gl_query_object *
+brw_new_query_object(GLcontext *ctx, GLuint id)
+{
+ struct brw_query_object *query;
+
+ query = _mesa_calloc(sizeof(struct brw_query_object));
+
+ query->Base.Id = id;
+ query->Base.Result = 0;
+ query->Base.Active = GL_FALSE;
+ query->Base.Ready = GL_TRUE;
+
+ return &query->Base;
+}
+
+static void
+brw_delete_query(GLcontext *ctx, struct gl_query_object *q)
+{
+ struct brw_query_object *query = (struct brw_query_object *)q;
+
+ dri_bo_unreference(query->bo);
+ _mesa_free(query);
+}
+
+static void
+brw_begin_query(GLcontext *ctx, struct gl_query_object *q)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct intel_context *intel = intel_context(ctx);
+ struct brw_query_object *query = (struct brw_query_object *)q;
+
+ /* Reset our driver's tracking of query state. */
+ dri_bo_unreference(query->bo);
+ query->bo = NULL;
+ query->first_index = -1;
+ query->last_index = -1;
+
+ insert_at_head(&brw->query.active_head, query);
+ intel->stats_wm++;
+}
+
+/**
+ * Begin the ARB_occlusion_query query on a query object.
+ */
+static void
+brw_end_query(GLcontext *ctx, struct gl_query_object *q)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct intel_context *intel = intel_context(ctx);
+ struct brw_query_object *query = (struct brw_query_object *)q;
+
+ /* Flush the batchbuffer in case it has writes to our query BO.
+ * Have later queries write to a new query BO so that further rendering
+ * doesn't delay the collection of our results.
+ */
+ if (query->bo) {
+ brw_emit_query_end(brw);
+ intel_batchbuffer_flush(intel->batch);
+
+ dri_bo_unreference(brw->query.bo);
+ brw->query.bo = NULL;
+ }
+
+ remove_from_list(query);
+
+ intel->stats_wm--;
+}
+
+static void brw_wait_query(GLcontext *ctx, struct gl_query_object *q)
+{
+ struct brw_query_object *query = (struct brw_query_object *)q;
+
+ brw_queryobj_get_results(query);
+ query->Base.Ready = GL_TRUE;
+}
+
+static void brw_check_query(GLcontext *ctx, struct gl_query_object *q)
+{
+ struct brw_query_object *query = (struct brw_query_object *)q;
+
+ if (query->bo == NULL || !drm_intel_bo_busy(query->bo)) {
+ brw_queryobj_get_results(query);
+ query->Base.Ready = GL_TRUE;
+ }
+}
+
+/** Called to set up the query BO and account for its aperture space */
+void
+brw_prepare_query_begin(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+
+ /* Skip if we're not doing any queries. */
+ if (is_empty_list(&brw->query.active_head))
+ return;
+
+ /* Get a new query BO if we're going to need it. */
+ if (brw->query.bo == NULL ||
+ brw->query.index * 2 + 1 >= 4096 / sizeof(uint64_t)) {
+ dri_bo_unreference(brw->query.bo);
+ brw->query.bo = NULL;
+
+ brw->query.bo = dri_bo_alloc(intel->bufmgr, "query", 4096, 1);
+ brw->query.index = 0;
+ }
+
+ brw_add_validated_bo(brw, brw->query.bo);
+}
+
+/** Called just before primitive drawing to get a beginning PS_DEPTH_COUNT. */
+void
+brw_emit_query_begin(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ struct brw_query_object *query;
+
+ /* Skip if we're not doing any queries, or we've emitted the start. */
+ if (brw->query.active || is_empty_list(&brw->query.active_head))
+ return;
+
+ BEGIN_BATCH(4, IGNORE_CLIPRECTS);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL |
+ PIPE_CONTROL_DEPTH_STALL |
+ PIPE_CONTROL_WRITE_DEPTH_COUNT);
+ /* This object could be mapped cacheable, but we don't have an exposed
+ * mechanism to support that. Since it's going uncached, tell GEM that
+ * we're writing to it. The usual clflush should be all that's required
+ * to pick up the results.
+ */
+ OUT_RELOC(brw->query.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ PIPE_CONTROL_GLOBAL_GTT_WRITE |
+ ((brw->query.index * 2) * sizeof(uint64_t)));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+
+ foreach(query, &brw->query.active_head) {
+ if (query->bo != brw->query.bo) {
+ if (query->bo != NULL)
+ brw_queryobj_get_results(query);
+ dri_bo_reference(brw->query.bo);
+ query->bo = brw->query.bo;
+ query->first_index = brw->query.index;
+ }
+ query->last_index = brw->query.index;
+ }
+ brw->query.active = GL_TRUE;
+}
+
+/** Called at batchbuffer flush to get an ending PS_DEPTH_COUNT */
+void
+brw_emit_query_end(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+
+ if (!brw->query.active)
+ return;
+
+ BEGIN_BATCH(4, IGNORE_CLIPRECTS);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL |
+ PIPE_CONTROL_DEPTH_STALL |
+ PIPE_CONTROL_WRITE_DEPTH_COUNT);
+ OUT_RELOC(brw->query.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ PIPE_CONTROL_GLOBAL_GTT_WRITE |
+ ((brw->query.index * 2 + 1) * sizeof(uint64_t)));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+
+ brw->query.active = GL_FALSE;
+ brw->query.index++;
+}
+
+void brw_init_queryobj_functions(struct dd_function_table *functions)
+{
+ functions->NewQueryObject = brw_new_query_object;
+ functions->DeleteQuery = brw_delete_query;
+ functions->BeginQuery = brw_begin_query;
+ functions->EndQuery = brw_end_query;
+ functions->CheckQuery = brw_check_query;
+ functions->WaitQuery = brw_wait_query;
+}
diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c
new file mode 100644
index 00000000000..e1c2c7777b5
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_sf.c
@@ -0,0 +1,200 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_sf.h"
+#include "brw_state.h"
+
+static void compile_sf_prog( struct brw_context *brw,
+ struct brw_sf_prog_key *key )
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ struct brw_sf_compile c;
+ const GLuint *program;
+ GLuint program_size;
+ GLuint i, idx;
+
+ memset(&c, 0, sizeof(c));
+
+ /* Begin the compilation:
+ */
+ brw_init_compile(brw, &c.func);
+
+ c.key = *key;
+ c.nr_attrs = brw_count_bits(c.key.attrs);
+ c.nr_attr_regs = (c.nr_attrs+1)/2;
+ c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS);
+ c.nr_setup_regs = (c.nr_setup_attrs+1)/2;
+
+ c.prog_data.urb_read_length = c.nr_attr_regs;
+ c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
+
+ /* Construct map from attribute number to position in the vertex.
+ */
+ for (i = idx = 0; i < VERT_RESULT_MAX; i++)
+ if (c.key.attrs & (1<<i)) {
+ c.attr_to_idx[i] = idx;
+ c.idx_to_attr[idx] = i;
+ if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) {
+ c.point_attrs[i].CoordReplace =
+ ctx->Point.CoordReplace[i - VERT_RESULT_TEX0];
+ }
+ else {
+ c.point_attrs[i].CoordReplace = GL_FALSE;
+ }
+ idx++;
+ }
+
+ /* Which primitive? Or all three?
+ */
+ switch (key->primitive) {
+ case SF_TRIANGLES:
+ c.nr_verts = 3;
+ brw_emit_tri_setup( &c, GL_TRUE );
+ break;
+ case SF_LINES:
+ c.nr_verts = 2;
+ brw_emit_line_setup( &c, GL_TRUE );
+ break;
+ case SF_POINTS:
+ c.nr_verts = 1;
+ if (key->do_point_sprite)
+ brw_emit_point_sprite_setup( &c, GL_TRUE );
+ else
+ brw_emit_point_setup( &c, GL_TRUE );
+ break;
+ case SF_UNFILLED_TRIS:
+ c.nr_verts = 3;
+ brw_emit_anyprim_setup( &c );
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ /* get the program
+ */
+ program = brw_get_program(&c.func, &program_size);
+
+ /* Upload
+ */
+ dri_bo_unreference(brw->sf.prog_bo);
+ brw->sf.prog_bo = brw_upload_cache( &brw->cache, BRW_SF_PROG,
+ &c.key, sizeof(c.key),
+ NULL, 0,
+ program, program_size,
+ &c.prog_data,
+ &brw->sf.prog_data );
+}
+
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static void upload_sf_prog(struct brw_context *brw)
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ struct brw_sf_prog_key key;
+
+ memset(&key, 0, sizeof(key));
+
+ /* Populate the key, noting state dependencies:
+ */
+ /* CACHE_NEW_VS_PROG */
+ key.attrs = brw->vs.prog_data->outputs_written;
+
+ /* BRW_NEW_REDUCED_PRIMITIVE */
+ switch (brw->intel.reduced_primitive) {
+ case GL_TRIANGLES:
+ /* NOTE: We just use the edgeflag attribute as an indicator that
+ * unfilled triangles are active. We don't actually do the
+ * edgeflag testing here, it is already done in the clip
+ * program.
+ */
+ if (key.attrs & (1<<VERT_RESULT_EDGE))
+ key.primitive = SF_UNFILLED_TRIS;
+ else
+ key.primitive = SF_TRIANGLES;
+ break;
+ case GL_LINES:
+ key.primitive = SF_LINES;
+ break;
+ case GL_POINTS:
+ key.primitive = SF_POINTS;
+ break;
+ }
+
+ key.do_point_sprite = ctx->Point.PointSprite;
+ key.SpriteOrigin = ctx->Point.SpriteOrigin;
+ /* _NEW_LIGHT */
+ key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
+ key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide);
+
+ /* _NEW_HINT */
+ key.linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST);
+
+ /* _NEW_POLYGON */
+ if (key.do_twoside_color) {
+ /* If we're rendering to a FBO, we have to invert the polygon
+ * face orientation, just as we invert the viewport in
+ * sf_unit_create_from_key(). ctx->DrawBuffer->Name will be
+ * nonzero if we're rendering to such an FBO.
+ */
+ key.frontface_ccw = (ctx->Polygon.FrontFace == GL_CCW) ^ (ctx->DrawBuffer->Name != 0);
+ }
+
+ dri_bo_unreference(brw->sf.prog_bo);
+ brw->sf.prog_bo = brw_search_cache(&brw->cache, BRW_SF_PROG,
+ &key, sizeof(key),
+ NULL, 0,
+ &brw->sf.prog_data);
+ if (brw->sf.prog_bo == NULL)
+ compile_sf_prog( brw, &key );
+}
+
+
+const struct brw_tracked_state brw_sf_prog = {
+ .dirty = {
+ .mesa = (_NEW_HINT | _NEW_LIGHT | _NEW_POLYGON | _NEW_POINT),
+ .brw = (BRW_NEW_REDUCED_PRIMITIVE),
+ .cache = CACHE_NEW_VS_PROG
+ },
+ .prepare = upload_sf_prog
+};
+
diff --git a/src/gallium/drivers/i965/brw_sf.h b/src/gallium/drivers/i965/brw_sf.h
new file mode 100644
index 00000000000..6426b6df9ff
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_sf.h
@@ -0,0 +1,113 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#ifndef BRW_SF_H
+#define BRW_SF_H
+
+
+#include "shader/program.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+
+
+#define SF_POINTS 0
+#define SF_LINES 1
+#define SF_TRIANGLES 2
+#define SF_UNFILLED_TRIS 3
+
+struct brw_sf_prog_key {
+ GLuint attrs:32;
+ GLuint primitive:2;
+ GLuint do_twoside_color:1;
+ GLuint do_flat_shading:1;
+ GLuint frontface_ccw:1;
+ GLuint do_point_sprite:1;
+ GLuint linear_color:1; /**< linear interp vs. perspective interp */
+ GLuint pad:25;
+ GLenum SpriteOrigin;
+};
+
+struct brw_sf_point_tex {
+ GLboolean CoordReplace;
+};
+
+struct brw_sf_compile {
+ struct brw_compile func;
+ struct brw_sf_prog_key key;
+ struct brw_sf_prog_data prog_data;
+
+ struct brw_reg pv;
+ struct brw_reg det;
+ struct brw_reg dx0;
+ struct brw_reg dx2;
+ struct brw_reg dy0;
+ struct brw_reg dy2;
+
+ /* z and 1/w passed in seperately:
+ */
+ struct brw_reg z[3];
+ struct brw_reg inv_w[3];
+
+ /* The vertices:
+ */
+ struct brw_reg vert[3];
+
+ /* Temporaries, allocated after last vertex reg.
+ */
+ struct brw_reg inv_det;
+ struct brw_reg a1_sub_a0;
+ struct brw_reg a2_sub_a0;
+ struct brw_reg tmp;
+
+ struct brw_reg m1Cx;
+ struct brw_reg m2Cy;
+ struct brw_reg m3C0;
+
+ GLuint nr_verts;
+ GLuint nr_attrs;
+ GLuint nr_attr_regs;
+ GLuint nr_setup_attrs;
+ GLuint nr_setup_regs;
+
+ GLubyte attr_to_idx[VERT_RESULT_MAX];
+ GLubyte idx_to_attr[VERT_RESULT_MAX];
+ struct brw_sf_point_tex point_attrs[VERT_RESULT_MAX];
+};
+
+
+void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_anyprim_setup( struct brw_sf_compile *c );
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c
new file mode 100644
index 00000000000..ca8f97f9f9e
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_sf_emit.c
@@ -0,0 +1,739 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_sf.h"
+
+
+static struct brw_reg get_vert_attr(struct brw_sf_compile *c,
+ struct brw_reg vert,
+ GLuint attr)
+{
+ GLuint off = c->attr_to_idx[attr] / 2;
+ GLuint sub = c->attr_to_idx[attr] % 2;
+
+ return brw_vec4_grf(vert.nr + off, sub * 4);
+}
+
+static GLboolean have_attr(struct brw_sf_compile *c,
+ GLuint attr)
+{
+ return (c->key.attrs & (1<<attr)) ? 1 : 0;
+}
+
+/***********************************************************************
+ * Twoside lighting
+ */
+static void copy_bfc( struct brw_sf_compile *c,
+ struct brw_reg vert )
+{
+ struct brw_compile *p = &c->func;
+ GLuint i;
+
+ for (i = 0; i < 2; i++) {
+ if (have_attr(c, VERT_RESULT_COL0+i) &&
+ have_attr(c, VERT_RESULT_BFC0+i))
+ brw_MOV(p,
+ get_vert_attr(c, vert, VERT_RESULT_COL0+i),
+ get_vert_attr(c, vert, VERT_RESULT_BFC0+i));
+ }
+}
+
+
+static void do_twoside_color( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *if_insn;
+ GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
+
+ /* Already done in clip program:
+ */
+ if (c->key.primitive == SF_UNFILLED_TRIS)
+ return;
+
+ /* XXX: What happens if BFC isn't present? This could only happen
+ * for user-supplied vertex programs, as t_vp_build.c always does
+ * the right thing.
+ */
+ if (!(have_attr(c, VERT_RESULT_COL0) && have_attr(c, VERT_RESULT_BFC0)) &&
+ !(have_attr(c, VERT_RESULT_COL1) && have_attr(c, VERT_RESULT_BFC1)))
+ return;
+
+ /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
+ * to get all channels active inside the IF. In the clipping code
+ * we run with NoMask, so it's not an option and we can use
+ * BRW_EXECUTE_1 for all comparisions.
+ */
+ brw_push_insn_state(p);
+ brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
+ if_insn = brw_IF(p, BRW_EXECUTE_4);
+ {
+ switch (c->nr_verts) {
+ case 3: copy_bfc(c, c->vert[2]);
+ case 2: copy_bfc(c, c->vert[1]);
+ case 1: copy_bfc(c, c->vert[0]);
+ }
+ }
+ brw_ENDIF(p, if_insn);
+ brw_pop_insn_state(p);
+}
+
+
+
+/***********************************************************************
+ * Flat shading
+ */
+
+#define VERT_RESULT_COLOR_BITS ((1<<VERT_RESULT_COL0) | \
+ (1<<VERT_RESULT_COL1))
+
+static void copy_colors( struct brw_sf_compile *c,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ struct brw_compile *p = &c->func;
+ GLuint i;
+
+ for (i = VERT_RESULT_COL0; i <= VERT_RESULT_COL1; i++) {
+ if (have_attr(c,i))
+ brw_MOV(p,
+ get_vert_attr(c, dst, i),
+ get_vert_attr(c, src, i));
+ }
+}
+
+
+
+/* Need to use a computed jump to copy flatshaded attributes as the
+ * vertices are ordered according to y-coordinate before reaching this
+ * point, so the PV could be anywhere.
+ */
+static void do_flatshade_triangle( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg ip = brw_ip_reg();
+ GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
+ GLuint jmpi = 1;
+
+ if (!nr)
+ return;
+
+ /* Already done in clip program:
+ */
+ if (c->key.primitive == SF_UNFILLED_TRIS)
+ return;
+
+ if (BRW_IS_IGDNG(p->brw))
+ jmpi = 2;
+
+ brw_push_insn_state(p);
+
+ brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
+ brw_JMPI(p, ip, ip, c->pv);
+
+ copy_colors(c, c->vert[1], c->vert[0]);
+ copy_colors(c, c->vert[2], c->vert[0]);
+ brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1)));
+
+ copy_colors(c, c->vert[0], c->vert[1]);
+ copy_colors(c, c->vert[2], c->vert[1]);
+ brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2));
+
+ copy_colors(c, c->vert[0], c->vert[2]);
+ copy_colors(c, c->vert[1], c->vert[2]);
+
+ brw_pop_insn_state(p);
+}
+
+
+static void do_flatshade_line( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg ip = brw_ip_reg();
+ GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
+ GLuint jmpi = 1;
+
+ if (!nr)
+ return;
+
+ /* Already done in clip program:
+ */
+ if (c->key.primitive == SF_UNFILLED_TRIS)
+ return;
+
+ if (BRW_IS_IGDNG(p->brw))
+ jmpi = 2;
+
+ brw_push_insn_state(p);
+
+ brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
+ brw_JMPI(p, ip, ip, c->pv);
+ copy_colors(c, c->vert[1], c->vert[0]);
+
+ brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr));
+ copy_colors(c, c->vert[0], c->vert[1]);
+
+ brw_pop_insn_state(p);
+}
+
+
+
+/***********************************************************************
+ * Triangle setup.
+ */
+
+
+static void alloc_regs( struct brw_sf_compile *c )
+{
+ GLuint reg, i;
+
+ /* Values computed by fixed function unit:
+ */
+ c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D);
+ c->det = brw_vec1_grf(1, 2);
+ c->dx0 = brw_vec1_grf(1, 3);
+ c->dx2 = brw_vec1_grf(1, 4);
+ c->dy0 = brw_vec1_grf(1, 5);
+ c->dy2 = brw_vec1_grf(1, 6);
+
+ /* z and 1/w passed in seperately:
+ */
+ c->z[0] = brw_vec1_grf(2, 0);
+ c->inv_w[0] = brw_vec1_grf(2, 1);
+ c->z[1] = brw_vec1_grf(2, 2);
+ c->inv_w[1] = brw_vec1_grf(2, 3);
+ c->z[2] = brw_vec1_grf(2, 4);
+ c->inv_w[2] = brw_vec1_grf(2, 5);
+
+ /* The vertices:
+ */
+ reg = 3;
+ for (i = 0; i < c->nr_verts; i++) {
+ c->vert[i] = brw_vec8_grf(reg, 0);
+ reg += c->nr_attr_regs;
+ }
+
+ /* Temporaries, allocated after last vertex reg.
+ */
+ c->inv_det = brw_vec1_grf(reg, 0); reg++;
+ c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++;
+ c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++;
+ c->tmp = brw_vec8_grf(reg, 0); reg++;
+
+ /* Note grf allocation:
+ */
+ c->prog_data.total_grf = reg;
+
+
+ /* Outputs of this program - interpolation coefficients for
+ * rasterization:
+ */
+ c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);
+ c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);
+ c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);
+}
+
+
+static void copy_z_inv_w( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ GLuint i;
+
+ brw_push_insn_state(p);
+
+ /* Copy both scalars with a single MOV:
+ */
+ for (i = 0; i < c->nr_verts; i++)
+ brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
+
+ brw_pop_insn_state(p);
+}
+
+
+static void invert_det( struct brw_sf_compile *c)
+{
+ /* Looks like we invert all 8 elements just to get 1/det in
+ * position 2 !?!
+ */
+ brw_math(&c->func,
+ c->inv_det,
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 0,
+ c->det,
+ BRW_MATH_DATA_SCALAR,
+ BRW_MATH_PRECISION_FULL);
+
+}
+
+
+static GLboolean calculate_masks( struct brw_sf_compile *c,
+ GLuint reg,
+ GLushort *pc,
+ GLushort *pc_persp,
+ GLushort *pc_linear)
+{
+ GLboolean is_last_attr = (reg == c->nr_setup_regs - 1);
+ GLuint persp_mask;
+ GLuint linear_mask;
+
+ if (c->key.do_flat_shading || c->key.linear_color)
+ persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS |
+ FRAG_BIT_COL0 |
+ FRAG_BIT_COL1);
+ else
+ persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS);
+
+ if (c->key.do_flat_shading)
+ linear_mask = c->key.attrs & ~(FRAG_BIT_COL0|FRAG_BIT_COL1);
+ else
+ linear_mask = c->key.attrs;
+
+ *pc_persp = 0;
+ *pc_linear = 0;
+ *pc = 0xf;
+
+ if (persp_mask & (1 << c->idx_to_attr[reg*2]))
+ *pc_persp = 0xf;
+
+ if (linear_mask & (1 << c->idx_to_attr[reg*2]))
+ *pc_linear = 0xf;
+
+ /* Maybe only processs one attribute on the final round:
+ */
+ if (reg*2+1 < c->nr_setup_attrs) {
+ *pc |= 0xf0;
+
+ if (persp_mask & (1 << c->idx_to_attr[reg*2+1]))
+ *pc_persp |= 0xf0;
+
+ if (linear_mask & (1 << c->idx_to_attr[reg*2+1]))
+ *pc_linear |= 0xf0;
+ }
+
+ return is_last_attr;
+}
+
+
+
+void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+ struct brw_compile *p = &c->func;
+ GLuint i;
+
+ c->nr_verts = 3;
+
+ if (allocate)
+ alloc_regs(c);
+
+ invert_det(c);
+ copy_z_inv_w(c);
+
+ if (c->key.do_twoside_color)
+ do_twoside_color(c);
+
+ if (c->key.do_flat_shading)
+ do_flatshade_triangle(c);
+
+
+ for (i = 0; i < c->nr_setup_regs; i++)
+ {
+ /* Pair of incoming attributes:
+ */
+ struct brw_reg a0 = offset(c->vert[0], i);
+ struct brw_reg a1 = offset(c->vert[1], i);
+ struct brw_reg a2 = offset(c->vert[2], i);
+ GLushort pc, pc_persp, pc_linear;
+ GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+ if (pc_persp)
+ {
+ brw_set_predicate_control_flag_value(p, pc_persp);
+ brw_MUL(p, a0, a0, c->inv_w[0]);
+ brw_MUL(p, a1, a1, c->inv_w[1]);
+ brw_MUL(p, a2, a2, c->inv_w[2]);
+ }
+
+
+ /* Calculate coefficients for interpolated values:
+ */
+ if (pc_linear)
+ {
+ brw_set_predicate_control_flag_value(p, pc_linear);
+
+ brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
+ brw_ADD(p, c->a2_sub_a0, a2, negate(a0));
+
+ /* calculate dA/dx
+ */
+ brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
+ brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
+ brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
+
+ /* calculate dA/dy
+ */
+ brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
+ brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
+ brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
+ }
+
+ {
+ brw_set_predicate_control_flag_value(p, pc);
+ /* start point for interpolation
+ */
+ brw_MOV(p, c->m3C0, a0);
+
+ /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in
+ * the send instruction:
+ */
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
+ 0, /* allocate */
+ 1, /* used */
+ 4, /* msg len */
+ 0, /* response len */
+ last, /* eot */
+ last, /* writes complete */
+ i*4, /* offset */
+ BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
+ }
+ }
+}
+
+
+
+void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+ struct brw_compile *p = &c->func;
+ GLuint i;
+
+
+ c->nr_verts = 2;
+
+ if (allocate)
+ alloc_regs(c);
+
+ invert_det(c);
+ copy_z_inv_w(c);
+
+ if (c->key.do_flat_shading)
+ do_flatshade_line(c);
+
+ for (i = 0; i < c->nr_setup_regs; i++)
+ {
+ /* Pair of incoming attributes:
+ */
+ struct brw_reg a0 = offset(c->vert[0], i);
+ struct brw_reg a1 = offset(c->vert[1], i);
+ GLushort pc, pc_persp, pc_linear;
+ GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+ if (pc_persp)
+ {
+ brw_set_predicate_control_flag_value(p, pc_persp);
+ brw_MUL(p, a0, a0, c->inv_w[0]);
+ brw_MUL(p, a1, a1, c->inv_w[1]);
+ }
+
+ /* Calculate coefficients for position, color:
+ */
+ if (pc_linear) {
+ brw_set_predicate_control_flag_value(p, pc_linear);
+
+ brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
+
+ brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
+ brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
+
+ brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
+ brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
+ }
+
+ {
+ brw_set_predicate_control_flag_value(p, pc);
+
+ /* start point for interpolation
+ */
+ brw_MOV(p, c->m3C0, a0);
+
+ /* Copy m0..m3 to URB.
+ */
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0),
+ 0, /* allocate */
+ 1, /* used */
+ 4, /* msg len */
+ 0, /* response len */
+ last, /* eot */
+ last, /* writes complete */
+ i*4, /* urb destination offset */
+ BRW_URB_SWIZZLE_TRANSPOSE);
+ }
+ }
+}
+
+void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+ struct brw_compile *p = &c->func;
+ GLuint i;
+
+ c->nr_verts = 1;
+
+ if (allocate)
+ alloc_regs(c);
+
+ copy_z_inv_w(c);
+ for (i = 0; i < c->nr_setup_regs; i++)
+ {
+ struct brw_sf_point_tex *tex = &c->point_attrs[c->idx_to_attr[2*i]];
+ struct brw_reg a0 = offset(c->vert[0], i);
+ GLushort pc, pc_persp, pc_linear;
+ GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+ if (pc_persp)
+ {
+ if (!tex->CoordReplace) {
+ brw_set_predicate_control_flag_value(p, pc_persp);
+ brw_MUL(p, a0, a0, c->inv_w[0]);
+ }
+ }
+
+ if (tex->CoordReplace) {
+ /* Caculate 1.0/PointWidth */
+ brw_math(&c->func,
+ c->tmp,
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 0,
+ c->dx0,
+ BRW_MATH_DATA_SCALAR,
+ BRW_MATH_PRECISION_FULL);
+
+ if (c->key.SpriteOrigin == GL_LOWER_LEFT) {
+ brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
+ brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
+ brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0]));
+ brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
+ } else {
+ brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
+ brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
+ brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]);
+ brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
+ }
+ } else {
+ brw_MOV(p, c->m1Cx, brw_imm_ud(0));
+ brw_MOV(p, c->m2Cy, brw_imm_ud(0));
+ }
+
+ {
+ brw_set_predicate_control_flag_value(p, pc);
+ if (tex->CoordReplace) {
+ if (c->key.SpriteOrigin == GL_LOWER_LEFT) {
+ brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0));
+ brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0));
+ }
+ else
+ brw_MOV(p, c->m3C0, brw_imm_f(0.0));
+ } else {
+ brw_MOV(p, c->m3C0, a0); /* constant value */
+ }
+
+ /* Copy m0..m3 to URB.
+ */
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0),
+ 0, /* allocate */
+ 1, /* used */
+ 4, /* msg len */
+ 0, /* response len */
+ last, /* eot */
+ last, /* writes complete */
+ i*4, /* urb destination offset */
+ BRW_URB_SWIZZLE_TRANSPOSE);
+ }
+ }
+}
+
+/* Points setup - several simplifications as all attributes are
+ * constant across the face of the point (point sprites excluded!)
+ */
+void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+ struct brw_compile *p = &c->func;
+ GLuint i;
+
+ c->nr_verts = 1;
+
+ if (allocate)
+ alloc_regs(c);
+
+ copy_z_inv_w(c);
+
+ brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
+ brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */
+
+ for (i = 0; i < c->nr_setup_regs; i++)
+ {
+ struct brw_reg a0 = offset(c->vert[0], i);
+ GLushort pc, pc_persp, pc_linear;
+ GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+ if (pc_persp)
+ {
+ /* This seems odd as the values are all constant, but the
+ * fragment shader will be expecting it:
+ */
+ brw_set_predicate_control_flag_value(p, pc_persp);
+ brw_MUL(p, a0, a0, c->inv_w[0]);
+ }
+
+
+ /* The delta values are always zero, just send the starting
+ * coordinate. Again, this is to fit in with the interpolation
+ * code in the fragment shader.
+ */
+ {
+ brw_set_predicate_control_flag_value(p, pc);
+
+ brw_MOV(p, c->m3C0, a0); /* constant value */
+
+ /* Copy m0..m3 to URB.
+ */
+ brw_urb_WRITE(p,
+ brw_null_reg(),
+ 0,
+ brw_vec8_grf(0, 0),
+ 0, /* allocate */
+ 1, /* used */
+ 4, /* msg len */
+ 0, /* response len */
+ last, /* eot */
+ last, /* writes complete */
+ i*4, /* urb destination offset */
+ BRW_URB_SWIZZLE_TRANSPOSE);
+ }
+ }
+}
+
+void brw_emit_anyprim_setup( struct brw_sf_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg ip = brw_ip_reg();
+ struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
+ struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0);
+ struct brw_reg primmask;
+ struct brw_instruction *jmp;
+ struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
+
+ GLuint saveflag;
+
+ c->nr_verts = 3;
+ alloc_regs(c);
+
+ primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
+
+ brw_MOV(p, primmask, brw_imm_ud(1));
+ brw_SHL(p, primmask, primmask, payload_prim);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+ brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
+ (1<<_3DPRIM_TRISTRIP) |
+ (1<<_3DPRIM_TRIFAN) |
+ (1<<_3DPRIM_TRISTRIP_REVERSE) |
+ (1<<_3DPRIM_POLYGON) |
+ (1<<_3DPRIM_RECTLIST) |
+ (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
+ jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
+ {
+ saveflag = p->flag_value;
+ brw_push_insn_state(p);
+ brw_emit_tri_setup( c, GL_FALSE );
+ brw_pop_insn_state(p);
+ p->flag_value = saveflag;
+ /* note - thread killed in subroutine, so must
+ * restore the flag which is changed when building
+ * the subroutine. fix #13240
+ */
+ }
+ brw_land_fwd_jump(p, jmp);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+ brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
+ (1<<_3DPRIM_LINESTRIP) |
+ (1<<_3DPRIM_LINELOOP) |
+ (1<<_3DPRIM_LINESTRIP_CONT) |
+ (1<<_3DPRIM_LINESTRIP_BF) |
+ (1<<_3DPRIM_LINESTRIP_CONT_BF)));
+ jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
+ {
+ saveflag = p->flag_value;
+ brw_push_insn_state(p);
+ brw_emit_line_setup( c, GL_FALSE );
+ brw_pop_insn_state(p);
+ p->flag_value = saveflag;
+ /* note - thread killed in subroutine */
+ }
+ brw_land_fwd_jump(p, jmp);
+
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+ brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
+ jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
+ {
+ saveflag = p->flag_value;
+ brw_push_insn_state(p);
+ brw_emit_point_sprite_setup( c, GL_FALSE );
+ brw_pop_insn_state(p);
+ p->flag_value = saveflag;
+ }
+ brw_land_fwd_jump(p, jmp);
+
+ brw_emit_point_setup( c, GL_FALSE );
+}
+
+
+
+
diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c
new file mode 100644
index 00000000000..bc0f0760738
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_sf_state.c
@@ -0,0 +1,365 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "main/macros.h"
+#include "intel_fbo.h"
+
+static void upload_sf_vp(struct brw_context *brw)
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+ struct brw_sf_viewport sfv;
+ GLfloat y_scale, y_bias;
+ const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
+ const GLfloat *v = ctx->Viewport._WindowMap.m;
+
+ memset(&sfv, 0, sizeof(sfv));
+
+ if (render_to_fbo) {
+ y_scale = 1.0;
+ y_bias = 0;
+ }
+ else {
+ y_scale = -1.0;
+ y_bias = ctx->DrawBuffer->Height;
+ }
+
+ /* _NEW_VIEWPORT */
+
+ sfv.viewport.m00 = v[MAT_SX];
+ sfv.viewport.m11 = v[MAT_SY] * y_scale;
+ sfv.viewport.m22 = v[MAT_SZ] * depth_scale;
+ sfv.viewport.m30 = v[MAT_TX];
+ sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias;
+ sfv.viewport.m32 = v[MAT_TZ] * depth_scale;
+
+ /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT
+ * for DrawBuffer->_[XY]{min,max}
+ */
+
+ /* The scissor only needs to handle the intersection of drawable and
+ * scissor rect. Clipping to the boundaries of static shared buffers
+ * for front/back/depth is covered by looping over cliprects in brw_draw.c.
+ *
+ * Note that the hardware's coordinates are inclusive, while Mesa's min is
+ * inclusive but max is exclusive.
+ */
+ if (render_to_fbo) {
+ /* texmemory: Y=0=bottom */
+ sfv.scissor.xmin = ctx->DrawBuffer->_Xmin;
+ sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
+ sfv.scissor.ymin = ctx->DrawBuffer->_Ymin;
+ sfv.scissor.ymax = ctx->DrawBuffer->_Ymax - 1;
+ }
+ else {
+ /* memory: Y=0=top */
+ sfv.scissor.xmin = ctx->DrawBuffer->_Xmin;
+ sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
+ sfv.scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax;
+ sfv.scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1;
+ }
+
+ dri_bo_unreference(brw->sf.vp_bo);
+ brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 );
+}
+
+const struct brw_tracked_state brw_sf_vp = {
+ .dirty = {
+ .mesa = (_NEW_VIEWPORT |
+ _NEW_SCISSOR |
+ _NEW_BUFFERS),
+ .brw = 0,
+ .cache = 0
+ },
+ .prepare = upload_sf_vp
+};
+
+struct brw_sf_unit_key {
+ unsigned int total_grf;
+ unsigned int urb_entry_read_length;
+
+ unsigned int nr_urb_entries, urb_size, sfsize;
+
+ GLenum front_face, cull_face, provoking_vertex;
+ unsigned scissor:1;
+ unsigned line_smooth:1;
+ unsigned point_sprite:1;
+ unsigned point_attenuated:1;
+ unsigned render_to_fbo:1;
+ float line_width;
+ float point_size;
+};
+
+static void
+sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ memset(key, 0, sizeof(*key));
+
+ /* CACHE_NEW_SF_PROG */
+ key->total_grf = brw->sf.prog_data->total_grf;
+ key->urb_entry_read_length = brw->sf.prog_data->urb_read_length;
+
+ /* BRW_NEW_URB_FENCE */
+ key->nr_urb_entries = brw->urb.nr_sf_entries;
+ key->urb_size = brw->urb.vsize;
+ key->sfsize = brw->urb.sfsize;
+
+ key->scissor = ctx->Scissor.Enabled;
+ key->front_face = ctx->Polygon.FrontFace;
+
+ if (ctx->Polygon.CullFlag)
+ key->cull_face = ctx->Polygon.CullFaceMode;
+ else
+ key->cull_face = GL_NONE;
+
+ key->line_width = ctx->Line.Width;
+ key->line_smooth = ctx->Line.SmoothFlag;
+
+ key->point_sprite = ctx->Point.PointSprite;
+ key->point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
+ key->point_attenuated = ctx->Point._Attenuated;
+
+ /* _NEW_LIGHT */
+ key->provoking_vertex = ctx->Light.ProvokingVertex;
+
+ key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
+}
+
+static dri_bo *
+sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
+ dri_bo **reloc_bufs)
+{
+ struct brw_sf_unit_state sf;
+ dri_bo *bo;
+ int chipset_max_threads;
+ memset(&sf, 0, sizeof(sf));
+
+ sf.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+ sf.thread0.kernel_start_pointer = brw->sf.prog_bo->offset >> 6; /* reloc */
+
+ sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+
+ sf.thread3.dispatch_grf_start_reg = 3;
+
+ if (BRW_IS_IGDNG(brw))
+ sf.thread3.urb_entry_read_offset = 3;
+ else
+ sf.thread3.urb_entry_read_offset = 1;
+
+ sf.thread3.urb_entry_read_length = key->urb_entry_read_length;
+
+ sf.thread4.nr_urb_entries = key->nr_urb_entries;
+ sf.thread4.urb_entry_allocation_size = key->sfsize - 1;
+
+ /* Each SF thread produces 1 PUE, and there can be up to 24(Pre-IGDNG) or
+ * 48(IGDNG) threads
+ */
+ if (BRW_IS_IGDNG(brw))
+ chipset_max_threads = 48;
+ else
+ chipset_max_threads = 24;
+
+ sf.thread4.max_threads = MIN2(chipset_max_threads, key->nr_urb_entries) - 1;
+
+ if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
+ sf.thread4.max_threads = 0;
+
+ if (INTEL_DEBUG & DEBUG_STATS)
+ sf.thread4.stats_enable = 1;
+
+ /* CACHE_NEW_SF_VP */
+ sf.sf5.sf_viewport_state_offset = brw->sf.vp_bo->offset >> 5; /* reloc */
+
+ sf.sf5.viewport_transform = 1;
+
+ /* _NEW_SCISSOR */
+ if (key->scissor)
+ sf.sf6.scissor = 1;
+
+ /* _NEW_POLYGON */
+ if (key->front_face == GL_CCW)
+ sf.sf5.front_winding = BRW_FRONTWINDING_CCW;
+ else
+ sf.sf5.front_winding = BRW_FRONTWINDING_CW;
+
+ /* The viewport is inverted for rendering to a FBO, and that inverts
+ * polygon front/back orientation.
+ */
+ sf.sf5.front_winding ^= key->render_to_fbo;
+
+ switch (key->cull_face) {
+ case GL_FRONT:
+ sf.sf6.cull_mode = BRW_CULLMODE_FRONT;
+ break;
+ case GL_BACK:
+ sf.sf6.cull_mode = BRW_CULLMODE_BACK;
+ break;
+ case GL_FRONT_AND_BACK:
+ sf.sf6.cull_mode = BRW_CULLMODE_BOTH;
+ break;
+ case GL_NONE:
+ sf.sf6.cull_mode = BRW_CULLMODE_NONE;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ /* _NEW_LINE */
+ /* XXX use ctx->Const.Min/MaxLineWidth here */
+ sf.sf6.line_width = CLAMP(key->line_width, 1.0, 5.0) * (1<<1);
+
+ sf.sf6.line_endcap_aa_region_width = 1;
+ if (key->line_smooth)
+ sf.sf6.aa_enable = 1;
+ else if (sf.sf6.line_width <= 0x2)
+ sf.sf6.line_width = 0;
+
+ /* _NEW_BUFFERS */
+ key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
+ if (!key->render_to_fbo) {
+ /* Rendering to an OpenGL window */
+ sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT;
+ }
+ else {
+ /* If rendering to an FBO, the pixel coordinate system is
+ * inverted with respect to the normal OpenGL coordinate
+ * system, so BRW_RASTRULE_LOWER_RIGHT is correct.
+ * But this value is listed as "Reserved, but not seen as useful"
+ * in Intel documentation (page 212, "Point Rasterization Rule",
+ * section 7.4 "SF Pipeline State Summary", of document
+ * "Intel® 965 Express Chipset Family and Intel® G35 Express
+ * Chipset Graphics Controller Programmer's Reference Manual,
+ * Volume 2: 3D/Media", Revision 1.0b as of January 2008,
+ * available at
+ * http://intellinuxgraphics.org/documentation.html
+ * at the time of this writing).
+ *
+ * It does work on at least some devices, if not all;
+ * if devices that don't support it can be identified,
+ * the likely failure case is that points are rasterized
+ * incorrectly, which is no worse than occurs without
+ * the value, so we're using it here.
+ */
+ sf.sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT;
+ }
+ /* XXX clamp max depends on AA vs. non-AA */
+
+ /* _NEW_POINT */
+ sf.sf7.sprite_point = key->point_sprite;
+ sf.sf7.point_size = CLAMP(rint(key->point_size), 1, 255) * (1<<3);
+ sf.sf7.use_point_size_state = !key->point_attenuated;
+ sf.sf7.aa_line_distance_mode = 0;
+
+ /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
+ */
+ if (key->provoking_vertex == GL_LAST_VERTEX_CONVENTION) {
+ sf.sf7.trifan_pv = 2;
+ sf.sf7.linestrip_pv = 1;
+ sf.sf7.tristrip_pv = 2;
+ } else {
+ sf.sf7.trifan_pv = 1;
+ sf.sf7.linestrip_pv = 0;
+ sf.sf7.tristrip_pv = 0;
+ }
+ sf.sf7.line_last_pixel_enable = 0;
+
+ /* Set bias for OpenGL rasterization rules:
+ */
+ sf.sf6.dest_org_vbias = 0x8;
+ sf.sf6.dest_org_hbias = 0x8;
+
+ bo = brw_upload_cache(&brw->cache, BRW_SF_UNIT,
+ key, sizeof(*key),
+ reloc_bufs, 2,
+ &sf, sizeof(sf),
+ NULL, NULL);
+
+ /* STATE_PREFETCH command description describes this state as being
+ * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain.
+ */
+ /* Emit SF program relocation */
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ sf.thread0.grf_reg_count << 1,
+ offsetof(struct brw_sf_unit_state, thread0),
+ brw->sf.prog_bo);
+
+ /* Emit SF viewport relocation */
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ sf.sf5.front_winding | (sf.sf5.viewport_transform << 1),
+ offsetof(struct brw_sf_unit_state, sf5),
+ brw->sf.vp_bo);
+
+ return bo;
+}
+
+static void upload_sf_unit( struct brw_context *brw )
+{
+ struct brw_sf_unit_key key;
+ dri_bo *reloc_bufs[2];
+
+ sf_unit_populate_key(brw, &key);
+
+ reloc_bufs[0] = brw->sf.prog_bo;
+ reloc_bufs[1] = brw->sf.vp_bo;
+
+ dri_bo_unreference(brw->sf.state_bo);
+ brw->sf.state_bo = brw_search_cache(&brw->cache, BRW_SF_UNIT,
+ &key, sizeof(key),
+ reloc_bufs, 2,
+ NULL);
+ if (brw->sf.state_bo == NULL) {
+ brw->sf.state_bo = sf_unit_create_from_key(brw, &key, reloc_bufs);
+ }
+}
+
+const struct brw_tracked_state brw_sf_unit = {
+ .dirty = {
+ .mesa = (_NEW_POLYGON |
+ _NEW_LIGHT |
+ _NEW_LINE |
+ _NEW_POINT |
+ _NEW_SCISSOR |
+ _NEW_BUFFERS),
+ .brw = BRW_NEW_URB_FENCE,
+ .cache = (CACHE_NEW_SF_VP |
+ CACHE_NEW_SF_PROG)
+ },
+ .prepare = upload_sf_unit,
+};
diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h
new file mode 100644
index 00000000000..d639656b9d4
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_state.h
@@ -0,0 +1,173 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#ifndef BRW_STATE_H
+#define BRW_STATE_H
+
+#include "brw_context.h"
+
+static inline void
+brw_add_validated_bo(struct brw_context *brw, dri_bo *bo)
+{
+ assert(brw->state.validated_bo_count < ARRAY_SIZE(brw->state.validated_bos));
+
+ if (bo != NULL) {
+ dri_bo_reference(bo);
+ brw->state.validated_bos[brw->state.validated_bo_count++] = bo;
+ }
+};
+
+const struct brw_tracked_state brw_blend_constant_color;
+const struct brw_tracked_state brw_cc_unit;
+const struct brw_tracked_state brw_cc_vp;
+const struct brw_tracked_state brw_check_fallback;
+const struct brw_tracked_state brw_clip_prog;
+const struct brw_tracked_state brw_clip_unit;
+const struct brw_tracked_state brw_constant_buffer;
+const struct brw_tracked_state brw_curbe_offsets;
+const struct brw_tracked_state brw_invarient_state;
+const struct brw_tracked_state brw_gs_prog;
+const struct brw_tracked_state brw_gs_unit;
+const struct brw_tracked_state brw_line_stipple;
+const struct brw_tracked_state brw_aa_line_parameters;
+const struct brw_tracked_state brw_pipelined_state_pointers;
+const struct brw_tracked_state brw_binding_table_pointers;
+const struct brw_tracked_state brw_depthbuffer;
+const struct brw_tracked_state brw_polygon_stipple_offset;
+const struct brw_tracked_state brw_polygon_stipple;
+const struct brw_tracked_state brw_program_parameters;
+const struct brw_tracked_state brw_recalculate_urb_fence;
+const struct brw_tracked_state brw_sf_prog;
+const struct brw_tracked_state brw_sf_unit;
+const struct brw_tracked_state brw_sf_vp;
+const struct brw_tracked_state brw_state_base_address;
+const struct brw_tracked_state brw_urb_fence;
+const struct brw_tracked_state brw_vertex_state;
+const struct brw_tracked_state brw_vs_surfaces;
+const struct brw_tracked_state brw_vs_prog;
+const struct brw_tracked_state brw_vs_unit;
+const struct brw_tracked_state brw_wm_input_sizes;
+const struct brw_tracked_state brw_wm_prog;
+const struct brw_tracked_state brw_wm_samplers;
+const struct brw_tracked_state brw_wm_constant_surface;
+const struct brw_tracked_state brw_wm_surfaces;
+const struct brw_tracked_state brw_wm_unit;
+
+const struct brw_tracked_state brw_psp_urb_cbs;
+
+const struct brw_tracked_state brw_pipe_control;
+
+const struct brw_tracked_state brw_drawing_rect;
+const struct brw_tracked_state brw_indices;
+const struct brw_tracked_state brw_vertices;
+const struct brw_tracked_state brw_index_buffer;
+
+/**
+ * Use same key for WM and VS surfaces.
+ */
+struct brw_surface_key {
+ GLenum target, depthmode;
+ dri_bo *bo;
+ GLint format, internal_format;
+ GLint first_level, last_level;
+ GLint width, height, depth;
+ GLint pitch, cpp;
+ uint32_t tiling;
+ GLuint offset;
+};
+
+/***********************************************************************
+ * brw_state.c
+ */
+void brw_validate_state(struct brw_context *brw);
+void brw_upload_state(struct brw_context *brw);
+void brw_init_state(struct brw_context *brw);
+void brw_destroy_state(struct brw_context *brw);
+
+/***********************************************************************
+ * brw_state_cache.c
+ */
+dri_bo *brw_cache_data(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *data,
+ dri_bo **reloc_bufs,
+ GLuint nr_reloc_bufs);
+
+dri_bo *brw_cache_data_sz(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *data,
+ GLuint data_size,
+ dri_bo **reloc_bufs,
+ GLuint nr_reloc_bufs);
+
+dri_bo *brw_upload_cache( struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_sz,
+ dri_bo **reloc_bufs,
+ GLuint nr_reloc_bufs,
+ const void *data,
+ GLuint data_sz,
+ const void *aux,
+ void *aux_return );
+
+dri_bo *brw_search_cache( struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_size,
+ dri_bo **reloc_bufs,
+ GLuint nr_reloc_bufs,
+ void *aux_return);
+void brw_state_cache_check_size( struct brw_context *brw );
+
+void brw_init_caches( struct brw_context *brw );
+void brw_destroy_caches( struct brw_context *brw );
+void brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo);
+
+/***********************************************************************
+ * brw_state_batch.c
+ */
+#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS)
+#define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) )
+
+GLboolean brw_cached_batch_struct( struct brw_context *brw,
+ const void *data,
+ GLuint sz );
+void brw_destroy_batch_cache( struct brw_context *brw );
+void brw_clear_batch_cache( struct brw_context *brw );
+
+/* brw_wm_surface_state.c */
+dri_bo *
+brw_create_constant_surface( struct brw_context *brw,
+ struct brw_surface_key *key );
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_state_batch.c b/src/gallium/drivers/i965/brw_state_batch.c
new file mode 100644
index 00000000000..7821898cf9b
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_state_batch.c
@@ -0,0 +1,99 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+
+#include "brw_state.h"
+#include "intel_batchbuffer.h"
+#include "main/imports.h"
+
+
+
+/* A facility similar to the data caching code above, which aims to
+ * prevent identical commands being issued repeatedly.
+ */
+GLboolean brw_cached_batch_struct( struct brw_context *brw,
+ const void *data,
+ GLuint sz )
+{
+ struct brw_cached_batch_item *item = brw->cached_batch_items;
+ struct header *newheader = (struct header *)data;
+
+ if (brw->emit_state_always) {
+ intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS);
+ return GL_TRUE;
+ }
+
+ while (item) {
+ if (item->header->opcode == newheader->opcode) {
+ if (item->sz == sz && memcmp(item->header, newheader, sz) == 0)
+ return GL_FALSE;
+ if (item->sz != sz) {
+ _mesa_free(item->header);
+ item->header = _mesa_malloc(sz);
+ item->sz = sz;
+ }
+ goto emit;
+ }
+ item = item->next;
+ }
+
+ assert(!item);
+ item = CALLOC_STRUCT(brw_cached_batch_item);
+ item->header = _mesa_malloc(sz);
+ item->sz = sz;
+ item->next = brw->cached_batch_items;
+ brw->cached_batch_items = item;
+
+ emit:
+ memcpy(item->header, newheader, sz);
+ intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS);
+ return GL_TRUE;
+}
+
+void brw_clear_batch_cache( struct brw_context *brw )
+{
+ struct brw_cached_batch_item *item = brw->cached_batch_items;
+
+ while (item) {
+ struct brw_cached_batch_item *next = item->next;
+ free((void *)item->header);
+ free(item);
+ item = next;
+ }
+
+ brw->cached_batch_items = NULL;
+}
+
+void brw_destroy_batch_cache( struct brw_context *brw )
+{
+ brw_clear_batch_cache(brw);
+}
diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c
new file mode 100644
index 00000000000..c262e1db8b9
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_state_cache.c
@@ -0,0 +1,597 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+/** @file brw_state_cache.c
+ *
+ * This file implements a simple static state cache for 965. The consumers
+ * can query the hash table of state using a cache_id, opaque key data,
+ * and list of buffers that will be used in relocations, and receive the
+ * corresponding state buffer object of state (plus associated auxiliary
+ * data) in return.
+ *
+ * The inner workings are a simple hash table based on a CRC of the key data.
+ * The cache_id and relocation target buffers associated with the state
+ * buffer are included as auxiliary key data, but are not part of the hash
+ * value (this should be fixed, but will likely be fixed instead by making
+ * consumers use structured keys).
+ *
+ * Replacement is not implemented. Instead, when the cache gets too big, at
+ * a safe point (unlock) we throw out all of the cache data and let it
+ * regenerate for the next rendering operation.
+ *
+ * The reloc_buf pointers need to be included as key data, otherwise the
+ * non-unique values stuffed in the offset in key data through
+ * brw_cache_data() may result in successful probe for state buffers
+ * even when the buffer being referenced doesn't match. The result would be
+ * that the same state cache entry is used twice for different buffers,
+ * only one of the two buffers referenced gets put into the offset, and the
+ * incorrect program is run for the other instance.
+ */
+
+#include "main/imports.h"
+#include "brw_state.h"
+#include "intel_batchbuffer.h"
+
+/* XXX: Fixme - have to include these to get the sizes of the prog_key
+ * structs:
+ */
+#include "brw_wm.h"
+#include "brw_vs.h"
+#include "brw_clip.h"
+#include "brw_sf.h"
+#include "brw_gs.h"
+
+
+static GLuint
+hash_key(const void *key, GLuint key_size,
+ dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
+{
+ GLuint *ikey = (GLuint *)key;
+ GLuint hash = 0, i;
+
+ assert(key_size % 4 == 0);
+
+ /* I'm sure this can be improved on:
+ */
+ for (i = 0; i < key_size/4; i++) {
+ hash ^= ikey[i];
+ hash = (hash << 5) | (hash >> 27);
+ }
+
+ /* Include the BO pointers as key data as well */
+ ikey = (GLuint *)reloc_bufs;
+ key_size = nr_reloc_bufs * sizeof(dri_bo *);
+ for (i = 0; i < key_size/4; i++) {
+ hash ^= ikey[i];
+ hash = (hash << 5) | (hash >> 27);
+ }
+
+ return hash;
+}
+
+
+/**
+ * Marks a new buffer as being chosen for the given cache id.
+ */
+static void
+update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id,
+ dri_bo *bo)
+{
+ if (bo == cache->last_bo[cache_id])
+ return; /* no change */
+
+ dri_bo_unreference(cache->last_bo[cache_id]);
+ cache->last_bo[cache_id] = bo;
+ dri_bo_reference(cache->last_bo[cache_id]);
+ cache->brw->state.dirty.cache |= 1 << cache_id;
+}
+
+
+static struct brw_cache_item *
+search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
+ GLuint hash, const void *key, GLuint key_size,
+ dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
+{
+ struct brw_cache_item *c;
+
+#if 0
+ int bucketcount = 0;
+
+ for (c = cache->items[hash % cache->size]; c; c = c->next)
+ bucketcount++;
+
+ fprintf(stderr, "bucket %d/%d = %d/%d items\n", hash % cache->size,
+ cache->size, bucketcount, cache->n_items);
+#endif
+
+ for (c = cache->items[hash % cache->size]; c; c = c->next) {
+ if (c->cache_id == cache_id &&
+ c->hash == hash &&
+ c->key_size == key_size &&
+ memcmp(c->key, key, key_size) == 0 &&
+ c->nr_reloc_bufs == nr_reloc_bufs &&
+ memcmp(c->reloc_bufs, reloc_bufs,
+ nr_reloc_bufs * sizeof(dri_bo *)) == 0)
+ return c;
+ }
+
+ return NULL;
+}
+
+
+static void
+rehash(struct brw_cache *cache)
+{
+ struct brw_cache_item **items;
+ struct brw_cache_item *c, *next;
+ GLuint size, i;
+
+ size = cache->size * 3;
+ items = (struct brw_cache_item**) _mesa_calloc(size * sizeof(*items));
+
+ for (i = 0; i < cache->size; i++)
+ for (c = cache->items[i]; c; c = next) {
+ next = c->next;
+ c->next = items[c->hash % size];
+ items[c->hash % size] = c;
+ }
+
+ FREE(cache->items);
+ cache->items = items;
+ cache->size = size;
+}
+
+
+/**
+ * Returns the buffer object matching cache_id and key, or NULL.
+ */
+dri_bo *
+brw_search_cache(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_size,
+ dri_bo **reloc_bufs, GLuint nr_reloc_bufs,
+ void *aux_return)
+{
+ struct brw_cache_item *item;
+ GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs);
+
+ item = search_cache(cache, cache_id, hash, key, key_size,
+ reloc_bufs, nr_reloc_bufs);
+
+ if (item == NULL)
+ return NULL;
+
+ if (aux_return)
+ *(void **)aux_return = (void *)((char *)item->key + item->key_size);
+
+ update_cache_last(cache, cache_id, item->bo);
+
+ dri_bo_reference(item->bo);
+ return item->bo;
+}
+
+
+dri_bo *
+brw_upload_cache( struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_size,
+ dri_bo **reloc_bufs,
+ GLuint nr_reloc_bufs,
+ const void *data,
+ GLuint data_size,
+ const void *aux,
+ void *aux_return )
+{
+ struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
+ GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs);
+ GLuint relocs_size = nr_reloc_bufs * sizeof(dri_bo *);
+ GLuint aux_size = cache->aux_size[cache_id];
+ void *tmp;
+ dri_bo *bo;
+ int i;
+
+ /* Create the buffer object to contain the data */
+ bo = dri_bo_alloc(cache->brw->intel.bufmgr,
+ cache->name[cache_id], data_size, 1 << 6);
+
+
+ /* Set up the memory containing the key, aux_data, and reloc_bufs */
+ tmp = _mesa_malloc(key_size + aux_size + relocs_size);
+
+ memcpy(tmp, key, key_size);
+ memcpy(tmp + key_size, aux, cache->aux_size[cache_id]);
+ memcpy(tmp + key_size + aux_size, reloc_bufs, relocs_size);
+ for (i = 0; i < nr_reloc_bufs; i++) {
+ if (reloc_bufs[i] != NULL)
+ dri_bo_reference(reloc_bufs[i]);
+ }
+
+ item->cache_id = cache_id;
+ item->key = tmp;
+ item->hash = hash;
+ item->key_size = key_size;
+ item->reloc_bufs = tmp + key_size + aux_size;
+ item->nr_reloc_bufs = nr_reloc_bufs;
+
+ item->bo = bo;
+ dri_bo_reference(bo);
+ item->data_size = data_size;
+
+ if (cache->n_items > cache->size * 1.5)
+ rehash(cache);
+
+ hash %= cache->size;
+ item->next = cache->items[hash];
+ cache->items[hash] = item;
+ cache->n_items++;
+
+ if (aux_return) {
+ assert(cache->aux_size[cache_id]);
+ *(void **)aux_return = (void *)((char *)item->key + item->key_size);
+ }
+
+ if (INTEL_DEBUG & DEBUG_STATE)
+ _mesa_printf("upload %s: %d bytes to cache id %d\n",
+ cache->name[cache_id],
+ data_size, cache_id);
+
+ /* Copy data to the buffer */
+ dri_bo_subdata(bo, 0, data_size, data);
+
+ update_cache_last(cache, cache_id, bo);
+
+ return bo;
+}
+
+
+/**
+ * This doesn't really work with aux data. Use search/upload instead
+ */
+dri_bo *
+brw_cache_data_sz(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *data,
+ GLuint data_size,
+ dri_bo **reloc_bufs,
+ GLuint nr_reloc_bufs)
+{
+ dri_bo *bo;
+ struct brw_cache_item *item;
+ GLuint hash = hash_key(data, data_size, reloc_bufs, nr_reloc_bufs);
+
+ item = search_cache(cache, cache_id, hash, data, data_size,
+ reloc_bufs, nr_reloc_bufs);
+ if (item) {
+ update_cache_last(cache, cache_id, item->bo);
+ dri_bo_reference(item->bo);
+ return item->bo;
+ }
+
+ bo = brw_upload_cache(cache, cache_id,
+ data, data_size,
+ reloc_bufs, nr_reloc_bufs,
+ data, data_size,
+ NULL, NULL);
+
+ return bo;
+}
+
+
+/**
+ * Wrapper around brw_cache_data_sz using the cache_id's canonical key size.
+ *
+ * If nr_reloc_bufs is nonzero, brw_search_cache()/brw_upload_cache() would be
+ * better to use, as the potentially changing offsets in the data-used-as-key
+ * will result in excessive cache misses.
+ */
+dri_bo *
+brw_cache_data(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *data,
+ dri_bo **reloc_bufs,
+ GLuint nr_reloc_bufs)
+{
+ return brw_cache_data_sz(cache, cache_id, data, cache->key_size[cache_id],
+ reloc_bufs, nr_reloc_bufs);
+}
+
+enum pool_type {
+ DW_SURFACE_STATE,
+ DW_GENERAL_STATE
+};
+
+
+static void
+brw_init_cache_id(struct brw_cache *cache,
+ const char *name,
+ enum brw_cache_id id,
+ GLuint key_size,
+ GLuint aux_size)
+{
+ cache->name[id] = strdup(name);
+ cache->key_size[id] = key_size;
+ cache->aux_size[id] = aux_size;
+}
+
+
+static void
+brw_init_non_surface_cache(struct brw_context *brw)
+{
+ struct brw_cache *cache = &brw->cache;
+
+ cache->brw = brw;
+
+ cache->size = 7;
+ cache->n_items = 0;
+ cache->items = (struct brw_cache_item **)
+ _mesa_calloc(cache->size * sizeof(struct brw_cache_item));
+
+ brw_init_cache_id(cache,
+ "CC_VP",
+ BRW_CC_VP,
+ sizeof(struct brw_cc_viewport),
+ 0);
+
+ brw_init_cache_id(cache,
+ "CC_UNIT",
+ BRW_CC_UNIT,
+ sizeof(struct brw_cc_unit_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "WM_PROG",
+ BRW_WM_PROG,
+ sizeof(struct brw_wm_prog_key),
+ sizeof(struct brw_wm_prog_data));
+
+ brw_init_cache_id(cache,
+ "SAMPLER_DEFAULT_COLOR",
+ BRW_SAMPLER_DEFAULT_COLOR,
+ sizeof(struct brw_sampler_default_color),
+ 0);
+
+ brw_init_cache_id(cache,
+ "SAMPLER",
+ BRW_SAMPLER,
+ 0, /* variable key/data size */
+ 0);
+
+ brw_init_cache_id(cache,
+ "WM_UNIT",
+ BRW_WM_UNIT,
+ sizeof(struct brw_wm_unit_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "SF_PROG",
+ BRW_SF_PROG,
+ sizeof(struct brw_sf_prog_key),
+ sizeof(struct brw_sf_prog_data));
+
+ brw_init_cache_id(cache,
+ "SF_VP",
+ BRW_SF_VP,
+ sizeof(struct brw_sf_viewport),
+ 0);
+
+ brw_init_cache_id(cache,
+ "SF_UNIT",
+ BRW_SF_UNIT,
+ sizeof(struct brw_sf_unit_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "VS_UNIT",
+ BRW_VS_UNIT,
+ sizeof(struct brw_vs_unit_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "VS_PROG",
+ BRW_VS_PROG,
+ sizeof(struct brw_vs_prog_key),
+ sizeof(struct brw_vs_prog_data));
+
+ brw_init_cache_id(cache,
+ "CLIP_UNIT",
+ BRW_CLIP_UNIT,
+ sizeof(struct brw_clip_unit_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "CLIP_PROG",
+ BRW_CLIP_PROG,
+ sizeof(struct brw_clip_prog_key),
+ sizeof(struct brw_clip_prog_data));
+
+ brw_init_cache_id(cache,
+ "GS_UNIT",
+ BRW_GS_UNIT,
+ sizeof(struct brw_gs_unit_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "GS_PROG",
+ BRW_GS_PROG,
+ sizeof(struct brw_gs_prog_key),
+ sizeof(struct brw_gs_prog_data));
+}
+
+
+static void
+brw_init_surface_cache(struct brw_context *brw)
+{
+ struct brw_cache *cache = &brw->surface_cache;
+
+ cache->brw = brw;
+
+ cache->size = 7;
+ cache->n_items = 0;
+ cache->items = (struct brw_cache_item **)
+ _mesa_calloc(cache->size * sizeof(struct brw_cache_item));
+
+ brw_init_cache_id(cache,
+ "SS_SURFACE",
+ BRW_SS_SURFACE,
+ sizeof(struct brw_surface_state),
+ 0);
+
+ brw_init_cache_id(cache,
+ "SS_SURF_BIND",
+ BRW_SS_SURF_BIND,
+ 0,
+ 0);
+}
+
+
+void
+brw_init_caches(struct brw_context *brw)
+{
+ brw_init_non_surface_cache(brw);
+ brw_init_surface_cache(brw);
+}
+
+
+static void
+brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
+{
+ struct brw_cache_item *c, *next;
+ GLuint i;
+
+ if (INTEL_DEBUG & DEBUG_STATE)
+ _mesa_printf("%s\n", __FUNCTION__);
+
+ for (i = 0; i < cache->size; i++) {
+ for (c = cache->items[i]; c; c = next) {
+ int j;
+
+ next = c->next;
+ for (j = 0; j < c->nr_reloc_bufs; j++)
+ dri_bo_unreference(c->reloc_bufs[j]);
+ dri_bo_unreference(c->bo);
+ free((void *)c->key);
+ free(c);
+ }
+ cache->items[i] = NULL;
+ }
+
+ cache->n_items = 0;
+
+ if (brw->curbe.last_buf) {
+ _mesa_free(brw->curbe.last_buf);
+ brw->curbe.last_buf = NULL;
+ }
+
+ brw->state.dirty.mesa |= ~0;
+ brw->state.dirty.brw |= ~0;
+ brw->state.dirty.cache |= ~0;
+}
+
+/* Clear all entries from the cache that point to the given bo.
+ *
+ * This lets us release memory for reuse earlier for known-dead buffers,
+ * at the cost of walking the entire hash table.
+ */
+void
+brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo)
+{
+ struct brw_cache_item **prev;
+ GLuint i;
+
+ if (INTEL_DEBUG & DEBUG_STATE)
+ _mesa_printf("%s\n", __FUNCTION__);
+
+ for (i = 0; i < cache->size; i++) {
+ for (prev = &cache->items[i]; *prev;) {
+ struct brw_cache_item *c = *prev;
+
+ if (drm_intel_bo_references(c->bo, bo)) {
+ int j;
+
+ *prev = c->next;
+
+ for (j = 0; j < c->nr_reloc_bufs; j++)
+ dri_bo_unreference(c->reloc_bufs[j]);
+ dri_bo_unreference(c->bo);
+ free((void *)c->key);
+ free(c);
+ cache->n_items--;
+ } else {
+ prev = &c->next;
+ }
+ }
+ }
+}
+
+void
+brw_state_cache_check_size(struct brw_context *brw)
+{
+ if (INTEL_DEBUG & DEBUG_STATE)
+ _mesa_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items);
+
+ /* un-tuned guess. We've got around 20 state objects for a total of around
+ * 32k, so 1000 of them is around 1.5MB.
+ */
+ if (brw->cache.n_items > 1000)
+ brw_clear_cache(brw, &brw->cache);
+
+ if (brw->surface_cache.n_items > 1000)
+ brw_clear_cache(brw, &brw->surface_cache);
+}
+
+
+static void
+brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
+{
+ GLuint i;
+
+ if (INTEL_DEBUG & DEBUG_STATE)
+ _mesa_printf("%s\n", __FUNCTION__);
+
+ brw_clear_cache(brw, cache);
+ for (i = 0; i < BRW_MAX_CACHE; i++) {
+ dri_bo_unreference(cache->last_bo[i]);
+ free(cache->name[i]);
+ }
+ free(cache->items);
+ cache->items = NULL;
+ cache->size = 0;
+}
+
+
+void
+brw_destroy_caches(struct brw_context *brw)
+{
+ brw_destroy_cache(brw, &brw->cache);
+ brw_destroy_cache(brw, &brw->surface_cache);
+}
diff --git a/src/gallium/drivers/i965/brw_state_dump.c b/src/gallium/drivers/i965/brw_state_dump.c
new file mode 100644
index 00000000000..e94fa7d2b4c
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_state_dump.c
@@ -0,0 +1,224 @@
+/*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <[email protected]>
+ *
+ */
+
+#include "main/mtypes.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+/**
+ * Prints out a header, the contents, and the message associated with
+ * the hardware state data given.
+ *
+ * \param name Name of the state object
+ * \param data Pointer to the base of the state object
+ * \param hw_offset Hardware offset of the base of the state data.
+ * \param index Index of the DWORD being output.
+ */
+static void
+state_out(const char *name, void *data, uint32_t hw_offset, int index,
+ char *fmt, ...)
+{
+ va_list va;
+
+ fprintf(stderr, "%8s: 0x%08x: 0x%08x: ",
+ name, hw_offset + index * 4, ((uint32_t *)data)[index]);
+ va_start(va, fmt);
+ vfprintf(stderr, fmt, va);
+ va_end(va);
+}
+
+/** Generic, undecoded state buffer debug printout */
+static void
+state_struct_out(const char *name, dri_bo *buffer, unsigned int state_size)
+{
+ int i;
+
+ if (buffer == NULL)
+ return;
+
+ dri_bo_map(buffer, GL_FALSE);
+ for (i = 0; i < state_size / 4; i++) {
+ state_out(name, buffer->virtual, buffer->offset, i,
+ "dword %d\n", i);
+ }
+ dri_bo_unmap(buffer);
+}
+
+static const char *
+get_965_surfacetype(unsigned int surfacetype)
+{
+ switch (surfacetype) {
+ case 0: return "1D";
+ case 1: return "2D";
+ case 2: return "3D";
+ case 3: return "CUBE";
+ case 4: return "BUFFER";
+ case 7: return "NULL";
+ default: return "unknown";
+ }
+}
+
+static const char *
+get_965_surface_format(unsigned int surface_format)
+{
+ switch (surface_format) {
+ case 0x000: return "r32g32b32a32_float";
+ case 0x0c1: return "b8g8r8a8_unorm";
+ case 0x100: return "b5g6r5_unorm";
+ case 0x102: return "b5g5r5a1_unorm";
+ case 0x104: return "b4g4r4a4_unorm";
+ default: return "unknown";
+ }
+}
+
+static void dump_wm_surface_state(struct brw_context *brw)
+{
+ int i;
+
+ for (i = 0; i < brw->wm.nr_surfaces; i++) {
+ dri_bo *surf_bo = brw->wm.surf_bo[i];
+ unsigned int surfoff;
+ struct brw_surface_state *surf;
+ char name[20];
+
+ if (surf_bo == NULL) {
+ fprintf(stderr, " WM SS%d: NULL\n", i);
+ continue;
+ }
+ dri_bo_map(surf_bo, GL_FALSE);
+ surfoff = surf_bo->offset;
+ surf = (struct brw_surface_state *)(surf_bo->virtual);
+
+ sprintf(name, "WM SS%d", i);
+ state_out(name, surf, surfoff, 0, "%s %s\n",
+ get_965_surfacetype(surf->ss0.surface_type),
+ get_965_surface_format(surf->ss0.surface_format));
+ state_out(name, surf, surfoff, 1, "offset\n");
+ state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n",
+ surf->ss2.width + 1, surf->ss2.height + 1, surf->ss2.mip_count);
+ state_out(name, surf, surfoff, 3, "pitch %d, %stiled\n",
+ surf->ss3.pitch + 1, surf->ss3.tiled_surface ? "" : "not ");
+ state_out(name, surf, surfoff, 4, "mip base %d\n",
+ surf->ss4.min_lod);
+ state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n",
+ surf->ss5.x_offset, surf->ss5.y_offset);
+
+ dri_bo_unmap(surf_bo);
+ }
+}
+
+static void dump_sf_viewport_state(struct brw_context *brw)
+{
+ const char *name = "SF VP";
+ struct brw_sf_viewport *vp;
+ uint32_t vp_off;
+
+ if (brw->sf.vp_bo == NULL)
+ return;
+
+ dri_bo_map(brw->sf.vp_bo, GL_FALSE);
+
+ vp = brw->sf.vp_bo->virtual;
+ vp_off = brw->sf.vp_bo->offset;
+
+ state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00);
+ state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11);
+ state_out(name, vp, vp_off, 2, "m22 = %f\n", vp->viewport.m22);
+ state_out(name, vp, vp_off, 3, "m30 = %f\n", vp->viewport.m30);
+ state_out(name, vp, vp_off, 4, "m31 = %f\n", vp->viewport.m31);
+ state_out(name, vp, vp_off, 5, "m32 = %f\n", vp->viewport.m32);
+
+ state_out(name, vp, vp_off, 6, "top left = %d,%d\n",
+ vp->scissor.xmin, vp->scissor.ymin);
+ state_out(name, vp, vp_off, 7, "bottom right = %d,%d\n",
+ vp->scissor.xmax, vp->scissor.ymax);
+
+ dri_bo_unmap(brw->sf.vp_bo);
+}
+
+static void brw_debug_prog(const char *name, dri_bo *prog)
+{
+ unsigned int i;
+ uint32_t *data;
+
+ if (prog == NULL)
+ return;
+
+ dri_bo_map(prog, GL_FALSE);
+
+ data = prog->virtual;
+
+ for (i = 0; i < prog->size / 4 / 4; i++) {
+ fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n",
+ name, (unsigned int)prog->offset + i * 4 * 4,
+ data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]);
+ /* Stop at the end of the program. It'd be nice to keep track of the actual
+ * intended program size instead of guessing like this.
+ */
+ if (data[i * 4 + 0] == 0 &&
+ data[i * 4 + 1] == 0 &&
+ data[i * 4 + 2] == 0 &&
+ data[i * 4 + 3] == 0)
+ break;
+ }
+
+ dri_bo_unmap(prog);
+}
+
+
+/**
+ * Print additional debug information associated with the batchbuffer
+ * when DEBUG_BATCH is set.
+ *
+ * For 965, this means mapping the state buffers that would have been referenced
+ * by the batchbuffer and dumping them.
+ *
+ * The buffer offsets printed rely on the buffer containing the last offset
+ * it was validated at.
+ */
+void brw_debug_batch(struct intel_context *intel)
+{
+ struct brw_context *brw = brw_context(&intel->ctx);
+
+ state_struct_out("WM bind", brw->wm.bind_bo, 4 * brw->wm.nr_surfaces);
+ dump_wm_surface_state(brw);
+
+ state_struct_out("VS", brw->vs.state_bo, sizeof(struct brw_vs_unit_state));
+ brw_debug_prog("VS prog", brw->vs.prog_bo);
+
+ state_struct_out("GS", brw->gs.state_bo, sizeof(struct brw_gs_unit_state));
+ brw_debug_prog("GS prog", brw->gs.prog_bo);
+
+ state_struct_out("SF", brw->sf.state_bo, sizeof(struct brw_sf_unit_state));
+ dump_sf_viewport_state(brw);
+ brw_debug_prog("SF prog", brw->sf.prog_bo);
+
+ state_struct_out("WM", brw->wm.state_bo, sizeof(struct brw_wm_unit_state));
+ brw_debug_prog("WM prog", brw->wm.prog_bo);
+}
diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c
new file mode 100644
index 00000000000..b817b741e77
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_state_upload.c
@@ -0,0 +1,416 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "intel_batchbuffer.h"
+
+/* This is used to initialize brw->state.atoms[]. We could use this
+ * list directly except for a single atom, brw_constant_buffer, which
+ * has a .dirty value which changes according to the parameters of the
+ * current fragment and vertex programs, and so cannot be a static
+ * value.
+ */
+const struct brw_tracked_state *atoms[] =
+{
+ &brw_check_fallback,
+
+ &brw_wm_input_sizes,
+ &brw_vs_prog,
+ &brw_gs_prog,
+ &brw_clip_prog,
+ &brw_sf_prog,
+ &brw_wm_prog,
+
+ /* Once all the programs are done, we know how large urb entry
+ * sizes need to be and can decide if we need to change the urb
+ * layout.
+ */
+ &brw_curbe_offsets,
+ &brw_recalculate_urb_fence,
+
+ &brw_cc_vp,
+ &brw_cc_unit,
+
+ &brw_vs_surfaces, /* must do before unit */
+ &brw_wm_constant_surface, /* must do before wm surfaces/bind bo */
+ &brw_wm_surfaces, /* must do before samplers and unit */
+ &brw_wm_samplers,
+
+ &brw_wm_unit,
+ &brw_sf_vp,
+ &brw_sf_unit,
+ &brw_vs_unit, /* always required, enabled or not */
+ &brw_clip_unit,
+ &brw_gs_unit,
+
+ /* Command packets:
+ */
+ &brw_invarient_state,
+ &brw_state_base_address,
+
+ &brw_binding_table_pointers,
+ &brw_blend_constant_color,
+
+ &brw_depthbuffer,
+
+ &brw_polygon_stipple,
+ &brw_polygon_stipple_offset,
+
+ &brw_line_stipple,
+ &brw_aa_line_parameters,
+
+ &brw_psp_urb_cbs,
+
+ &brw_drawing_rect,
+ &brw_indices,
+ &brw_index_buffer,
+ &brw_vertices,
+
+ &brw_constant_buffer
+};
+
+
+void brw_init_state( struct brw_context *brw )
+{
+ brw_init_caches(brw);
+}
+
+
+void brw_destroy_state( struct brw_context *brw )
+{
+ brw_destroy_caches(brw);
+ brw_destroy_batch_cache(brw);
+}
+
+/***********************************************************************
+ */
+
+static GLboolean check_state( const struct brw_state_flags *a,
+ const struct brw_state_flags *b )
+{
+ return ((a->mesa & b->mesa) ||
+ (a->brw & b->brw) ||
+ (a->cache & b->cache));
+}
+
+static void accumulate_state( struct brw_state_flags *a,
+ const struct brw_state_flags *b )
+{
+ a->mesa |= b->mesa;
+ a->brw |= b->brw;
+ a->cache |= b->cache;
+}
+
+
+static void xor_states( struct brw_state_flags *result,
+ const struct brw_state_flags *a,
+ const struct brw_state_flags *b )
+{
+ result->mesa = a->mesa ^ b->mesa;
+ result->brw = a->brw ^ b->brw;
+ result->cache = a->cache ^ b->cache;
+}
+
+static void
+brw_clear_validated_bos(struct brw_context *brw)
+{
+ int i;
+
+ /* Clear the last round of validated bos */
+ for (i = 0; i < brw->state.validated_bo_count; i++) {
+ dri_bo_unreference(brw->state.validated_bos[i]);
+ brw->state.validated_bos[i] = NULL;
+ }
+ brw->state.validated_bo_count = 0;
+}
+
+struct dirty_bit_map {
+ uint32_t bit;
+ char *name;
+ uint32_t count;
+};
+
+#define DEFINE_BIT(name) {name, #name, 0}
+
+static struct dirty_bit_map mesa_bits[] = {
+ DEFINE_BIT(_NEW_MODELVIEW),
+ DEFINE_BIT(_NEW_PROJECTION),
+ DEFINE_BIT(_NEW_TEXTURE_MATRIX),
+ DEFINE_BIT(_NEW_COLOR_MATRIX),
+ DEFINE_BIT(_NEW_ACCUM),
+ DEFINE_BIT(_NEW_COLOR),
+ DEFINE_BIT(_NEW_DEPTH),
+ DEFINE_BIT(_NEW_EVAL),
+ DEFINE_BIT(_NEW_FOG),
+ DEFINE_BIT(_NEW_HINT),
+ DEFINE_BIT(_NEW_LIGHT),
+ DEFINE_BIT(_NEW_LINE),
+ DEFINE_BIT(_NEW_PIXEL),
+ DEFINE_BIT(_NEW_POINT),
+ DEFINE_BIT(_NEW_POLYGON),
+ DEFINE_BIT(_NEW_POLYGONSTIPPLE),
+ DEFINE_BIT(_NEW_SCISSOR),
+ DEFINE_BIT(_NEW_STENCIL),
+ DEFINE_BIT(_NEW_TEXTURE),
+ DEFINE_BIT(_NEW_TRANSFORM),
+ DEFINE_BIT(_NEW_VIEWPORT),
+ DEFINE_BIT(_NEW_PACKUNPACK),
+ DEFINE_BIT(_NEW_ARRAY),
+ DEFINE_BIT(_NEW_RENDERMODE),
+ DEFINE_BIT(_NEW_BUFFERS),
+ DEFINE_BIT(_NEW_MULTISAMPLE),
+ DEFINE_BIT(_NEW_TRACK_MATRIX),
+ DEFINE_BIT(_NEW_PROGRAM),
+ DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
+ {0, 0, 0}
+};
+
+static struct dirty_bit_map brw_bits[] = {
+ DEFINE_BIT(BRW_NEW_URB_FENCE),
+ DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
+ DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
+ DEFINE_BIT(BRW_NEW_INPUT_DIMENSIONS),
+ DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
+ DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
+ DEFINE_BIT(BRW_NEW_PRIMITIVE),
+ DEFINE_BIT(BRW_NEW_CONTEXT),
+ DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS),
+ DEFINE_BIT(BRW_NEW_PSP),
+ DEFINE_BIT(BRW_NEW_FENCE),
+ DEFINE_BIT(BRW_NEW_INDICES),
+ DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
+ DEFINE_BIT(BRW_NEW_VERTICES),
+ DEFINE_BIT(BRW_NEW_BATCH),
+ DEFINE_BIT(BRW_NEW_DEPTH_BUFFER),
+ {0, 0, 0}
+};
+
+static struct dirty_bit_map cache_bits[] = {
+ DEFINE_BIT(CACHE_NEW_CC_VP),
+ DEFINE_BIT(CACHE_NEW_CC_UNIT),
+ DEFINE_BIT(CACHE_NEW_WM_PROG),
+ DEFINE_BIT(CACHE_NEW_SAMPLER_DEFAULT_COLOR),
+ DEFINE_BIT(CACHE_NEW_SAMPLER),
+ DEFINE_BIT(CACHE_NEW_WM_UNIT),
+ DEFINE_BIT(CACHE_NEW_SF_PROG),
+ DEFINE_BIT(CACHE_NEW_SF_VP),
+ DEFINE_BIT(CACHE_NEW_SF_UNIT),
+ DEFINE_BIT(CACHE_NEW_VS_UNIT),
+ DEFINE_BIT(CACHE_NEW_VS_PROG),
+ DEFINE_BIT(CACHE_NEW_GS_UNIT),
+ DEFINE_BIT(CACHE_NEW_GS_PROG),
+ DEFINE_BIT(CACHE_NEW_CLIP_VP),
+ DEFINE_BIT(CACHE_NEW_CLIP_UNIT),
+ DEFINE_BIT(CACHE_NEW_CLIP_PROG),
+ DEFINE_BIT(CACHE_NEW_SURFACE),
+ DEFINE_BIT(CACHE_NEW_SURF_BIND),
+ {0, 0, 0}
+};
+
+
+static void
+brw_update_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
+{
+ int i;
+
+ for (i = 0; i < 32; i++) {
+ if (bit_map[i].bit == 0)
+ return;
+
+ if (bit_map[i].bit & bits)
+ bit_map[i].count++;
+ }
+}
+
+static void
+brw_print_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
+{
+ int i;
+
+ for (i = 0; i < 32; i++) {
+ if (bit_map[i].bit == 0)
+ return;
+
+ fprintf(stderr, "0x%08x: %12d (%s)\n",
+ bit_map[i].bit, bit_map[i].count, bit_map[i].name);
+ }
+}
+
+/***********************************************************************
+ * Emit all state:
+ */
+void brw_validate_state( struct brw_context *brw )
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ struct intel_context *intel = &brw->intel;
+ struct brw_state_flags *state = &brw->state.dirty;
+ GLuint i;
+
+ brw_clear_validated_bos(brw);
+
+ state->mesa |= brw->intel.NewGLState;
+ brw->intel.NewGLState = 0;
+
+ brw_add_validated_bo(brw, intel->batch->buf);
+
+ if (brw->emit_state_always) {
+ state->mesa |= ~0;
+ state->brw |= ~0;
+ state->cache |= ~0;
+ }
+
+ if (brw->fragment_program != ctx->FragmentProgram._Current) {
+ brw->fragment_program = ctx->FragmentProgram._Current;
+ brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
+ }
+
+ if (brw->vertex_program != ctx->VertexProgram._Current) {
+ brw->vertex_program = ctx->VertexProgram._Current;
+ brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
+ }
+
+ if (state->mesa == 0 &&
+ state->cache == 0 &&
+ state->brw == 0)
+ return;
+
+ if (brw->state.dirty.brw & BRW_NEW_CONTEXT)
+ brw_clear_batch_cache(brw);
+
+ brw->intel.Fallback = 0;
+
+ /* do prepare stage for all atoms */
+ for (i = 0; i < Elements(atoms); i++) {
+ const struct brw_tracked_state *atom = atoms[i];
+
+ if (brw->intel.Fallback)
+ break;
+
+ if (check_state(state, &atom->dirty)) {
+ if (atom->prepare) {
+ atom->prepare(brw);
+ }
+ }
+ }
+
+ /* Make sure that the textures which are referenced by the current
+ * brw fragment program are actually present/valid.
+ * If this fails, we can experience GPU lock-ups.
+ */
+ {
+ const struct brw_fragment_program *fp;
+ fp = brw_fragment_program_const(brw->fragment_program);
+ if (fp) {
+ assert((fp->tex_units_used & ctx->Texture._EnabledUnits)
+ == fp->tex_units_used);
+ }
+ }
+}
+
+
+void brw_upload_state(struct brw_context *brw)
+{
+ struct brw_state_flags *state = &brw->state.dirty;
+ int i;
+ static int dirty_count = 0;
+
+ brw_clear_validated_bos(brw);
+
+ if (INTEL_DEBUG) {
+ /* Debug version which enforces various sanity checks on the
+ * state flags which are generated and checked to help ensure
+ * state atoms are ordered correctly in the list.
+ */
+ struct brw_state_flags examined, prev;
+ _mesa_memset(&examined, 0, sizeof(examined));
+ prev = *state;
+
+ for (i = 0; i < Elements(atoms); i++) {
+ const struct brw_tracked_state *atom = atoms[i];
+ struct brw_state_flags generated;
+
+ assert(atom->dirty.mesa ||
+ atom->dirty.brw ||
+ atom->dirty.cache);
+
+ if (brw->intel.Fallback)
+ break;
+
+ if (check_state(state, &atom->dirty)) {
+ if (atom->emit) {
+ atom->emit( brw );
+ }
+ }
+
+ accumulate_state(&examined, &atom->dirty);
+
+ /* generated = (prev ^ state)
+ * if (examined & generated)
+ * fail;
+ */
+ xor_states(&generated, &prev, state);
+ assert(!check_state(&examined, &generated));
+ prev = *state;
+ }
+ }
+ else {
+ for (i = 0; i < Elements(atoms); i++) {
+ const struct brw_tracked_state *atom = atoms[i];
+
+ if (brw->intel.Fallback)
+ break;
+
+ if (check_state(state, &atom->dirty)) {
+ if (atom->emit) {
+ atom->emit( brw );
+ }
+ }
+ }
+ }
+
+ if (INTEL_DEBUG & DEBUG_STATE) {
+ brw_update_dirty_count(mesa_bits, state->mesa);
+ brw_update_dirty_count(brw_bits, state->brw);
+ brw_update_dirty_count(cache_bits, state->cache);
+ if (dirty_count++ % 1000 == 0) {
+ brw_print_dirty_count(mesa_bits, state->mesa);
+ brw_print_dirty_count(brw_bits, state->brw);
+ brw_print_dirty_count(cache_bits, state->cache);
+ fprintf(stderr, "\n");
+ }
+ }
+
+ if (!brw->intel.Fallback)
+ memset(state, 0, sizeof(*state));
+}
diff --git a/src/gallium/drivers/i965/brw_structs.h b/src/gallium/drivers/i965/brw_structs.h
new file mode 100644
index 00000000000..66d4127271a
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_structs.h
@@ -0,0 +1,1575 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#ifndef BRW_STRUCTS_H
+#define BRW_STRUCTS_H
+
+
+/** Number of general purpose registers (VS, WM, etc) */
+#define BRW_MAX_GRF 128
+
+/** Number of message register file registers */
+#define BRW_MAX_MRF 16
+
+
+/* Command packets:
+ */
+struct header
+{
+ GLuint length:16;
+ GLuint opcode:16;
+};
+
+
+union header_union
+{
+ struct header bits;
+ GLuint dword;
+};
+
+struct brw_3d_control
+{
+ struct
+ {
+ GLuint length:8;
+ GLuint notify_enable:1;
+ GLuint pad:3;
+ GLuint wc_flush_enable:1;
+ GLuint depth_stall_enable:1;
+ GLuint operation:2;
+ GLuint opcode:16;
+ } header;
+
+ struct
+ {
+ GLuint pad:2;
+ GLuint dest_addr_type:1;
+ GLuint dest_addr:29;
+ } dest;
+
+ GLuint dword2;
+ GLuint dword3;
+};
+
+
+struct brw_3d_primitive
+{
+ struct
+ {
+ GLuint length:8;
+ GLuint pad:2;
+ GLuint topology:5;
+ GLuint indexed:1;
+ GLuint opcode:16;
+ } header;
+
+ GLuint verts_per_instance;
+ GLuint start_vert_location;
+ GLuint instance_count;
+ GLuint start_instance_location;
+ GLuint base_vert_location;
+};
+
+/* These seem to be passed around as function args, so it works out
+ * better to keep them as #defines:
+ */
+#define BRW_FLUSH_READ_CACHE 0x1
+#define BRW_FLUSH_STATE_CACHE 0x2
+#define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4
+#define BRW_FLUSH_SNAPSHOT_COUNTERS 0x8
+
+struct brw_mi_flush
+{
+ GLuint flags:4;
+ GLuint pad:12;
+ GLuint opcode:16;
+};
+
+struct brw_vf_statistics
+{
+ GLuint statistics_enable:1;
+ GLuint pad:15;
+ GLuint opcode:16;
+};
+
+
+
+struct brw_binding_table_pointers
+{
+ struct header header;
+ GLuint vs;
+ GLuint gs;
+ GLuint clp;
+ GLuint sf;
+ GLuint wm;
+};
+
+
+struct brw_blend_constant_color
+{
+ struct header header;
+ GLfloat blend_constant_color[4];
+};
+
+
+struct brw_depthbuffer
+{
+ union header_union header;
+
+ union {
+ struct {
+ GLuint pitch:18;
+ GLuint format:3;
+ GLuint pad:2;
+ GLuint software_tiled_rendering_mode:2;
+ GLuint depth_offset_disable:1;
+ GLuint tile_walk:1;
+ GLuint tiled_surface:1;
+ GLuint pad2:1;
+ GLuint surface_type:3;
+ } bits;
+ GLuint dword;
+ } dword1;
+
+ GLuint dword2_base_addr;
+
+ union {
+ struct {
+ GLuint pad:1;
+ GLuint mipmap_layout:1;
+ GLuint lod:4;
+ GLuint width:13;
+ GLuint height:13;
+ } bits;
+ GLuint dword;
+ } dword3;
+
+ union {
+ struct {
+ GLuint pad:10;
+ GLuint min_array_element:11;
+ GLuint depth:11;
+ } bits;
+ GLuint dword;
+ } dword4;
+};
+
+struct brw_depthbuffer_g4x
+{
+ union header_union header;
+
+ union {
+ struct {
+ GLuint pitch:18;
+ GLuint format:3;
+ GLuint pad:2;
+ GLuint software_tiled_rendering_mode:2;
+ GLuint depth_offset_disable:1;
+ GLuint tile_walk:1;
+ GLuint tiled_surface:1;
+ GLuint pad2:1;
+ GLuint surface_type:3;
+ } bits;
+ GLuint dword;
+ } dword1;
+
+ GLuint dword2_base_addr;
+
+ union {
+ struct {
+ GLuint pad:1;
+ GLuint mipmap_layout:1;
+ GLuint lod:4;
+ GLuint width:13;
+ GLuint height:13;
+ } bits;
+ GLuint dword;
+ } dword3;
+
+ union {
+ struct {
+ GLuint pad:10;
+ GLuint min_array_element:11;
+ GLuint depth:11;
+ } bits;
+ GLuint dword;
+ } dword4;
+
+ union {
+ struct {
+ GLuint xoffset:16;
+ GLuint yoffset:16;
+ } bits;
+ GLuint dword;
+ } dword5; /* NEW in Integrated Graphics Device */
+};
+
+struct brw_drawrect
+{
+ struct header header;
+ GLuint xmin:16;
+ GLuint ymin:16;
+ GLuint xmax:16;
+ GLuint ymax:16;
+ GLuint xorg:16;
+ GLuint yorg:16;
+};
+
+
+
+
+struct brw_global_depth_offset_clamp
+{
+ struct header header;
+ GLfloat depth_offset_clamp;
+};
+
+struct brw_indexbuffer
+{
+ union {
+ struct
+ {
+ GLuint length:8;
+ GLuint index_format:2;
+ GLuint cut_index_enable:1;
+ GLuint pad:5;
+ GLuint opcode:16;
+ } bits;
+ GLuint dword;
+
+ } header;
+
+ GLuint buffer_start;
+ GLuint buffer_end;
+};
+
+/* NEW in Integrated Graphics Device */
+struct brw_aa_line_parameters
+{
+ struct header header;
+
+ struct {
+ GLuint aa_coverage_scope:8;
+ GLuint pad0:8;
+ GLuint aa_coverage_bias:8;
+ GLuint pad1:8;
+ } bits0;
+
+ struct {
+ GLuint aa_coverage_endcap_slope:8;
+ GLuint pad0:8;
+ GLuint aa_coverage_endcap_bias:8;
+ GLuint pad1:8;
+ } bits1;
+};
+
+struct brw_line_stipple
+{
+ struct header header;
+
+ struct
+ {
+ GLuint pattern:16;
+ GLuint pad:16;
+ } bits0;
+
+ struct
+ {
+ GLuint repeat_count:9;
+ GLuint pad:7;
+ GLuint inverse_repeat_count:16;
+ } bits1;
+};
+
+
+struct brw_pipelined_state_pointers
+{
+ struct header header;
+
+ struct {
+ GLuint pad:5;
+ GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
+ } vs;
+
+ struct
+ {
+ GLuint enable:1;
+ GLuint pad:4;
+ GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
+ } gs;
+
+ struct
+ {
+ GLuint enable:1;
+ GLuint pad:4;
+ GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
+ } clp;
+
+ struct
+ {
+ GLuint pad:5;
+ GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
+ } sf;
+
+ struct
+ {
+ GLuint pad:5;
+ GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
+ } wm;
+
+ struct
+ {
+ GLuint pad:5;
+ GLuint offset:27; /* Offset from GENERAL_STATE_BASE. KW: check me! */
+ } cc;
+};
+
+
+struct brw_polygon_stipple_offset
+{
+ struct header header;
+
+ struct {
+ GLuint y_offset:5;
+ GLuint pad:3;
+ GLuint x_offset:5;
+ GLuint pad0:19;
+ } bits0;
+};
+
+
+
+struct brw_polygon_stipple
+{
+ struct header header;
+ GLuint stipple[32];
+};
+
+
+
+struct brw_pipeline_select
+{
+ struct
+ {
+ GLuint pipeline_select:1;
+ GLuint pad:15;
+ GLuint opcode:16;
+ } header;
+};
+
+
+struct brw_pipe_control
+{
+ struct
+ {
+ GLuint length:8;
+ GLuint notify_enable:1;
+ GLuint texture_cache_flush_enable:1;
+ GLuint indirect_state_pointers_disable:1;
+ GLuint instruction_state_cache_flush_enable:1;
+ GLuint write_cache_flush_enable:1;
+ GLuint depth_stall_enable:1;
+ GLuint post_sync_operation:2;
+
+ GLuint opcode:16;
+ } header;
+
+ struct
+ {
+ GLuint pad:2;
+ GLuint dest_addr_type:1;
+ GLuint dest_addr:29;
+ } bits1;
+
+ GLuint data0;
+ GLuint data1;
+};
+
+
+struct brw_urb_fence
+{
+ struct
+ {
+ GLuint length:8;
+ GLuint vs_realloc:1;
+ GLuint gs_realloc:1;
+ GLuint clp_realloc:1;
+ GLuint sf_realloc:1;
+ GLuint vfe_realloc:1;
+ GLuint cs_realloc:1;
+ GLuint pad:2;
+ GLuint opcode:16;
+ } header;
+
+ struct
+ {
+ GLuint vs_fence:10;
+ GLuint gs_fence:10;
+ GLuint clp_fence:10;
+ GLuint pad:2;
+ } bits0;
+
+ struct
+ {
+ GLuint sf_fence:10;
+ GLuint vf_fence:10;
+ GLuint cs_fence:11;
+ GLuint pad:1;
+ } bits1;
+};
+
+struct brw_cs_urb_state
+{
+ struct header header;
+
+ struct
+ {
+ GLuint nr_urb_entries:3;
+ GLuint pad:1;
+ GLuint urb_entry_size:5;
+ GLuint pad0:23;
+ } bits0;
+};
+
+struct brw_constant_buffer
+{
+ struct
+ {
+ GLuint length:8;
+ GLuint valid:1;
+ GLuint pad:7;
+ GLuint opcode:16;
+ } header;
+
+ struct
+ {
+ GLuint buffer_length:6;
+ GLuint buffer_address:26;
+ } bits0;
+};
+
+struct brw_state_base_address
+{
+ struct header header;
+
+ struct
+ {
+ GLuint modify_enable:1;
+ GLuint pad:4;
+ GLuint general_state_address:27;
+ } bits0;
+
+ struct
+ {
+ GLuint modify_enable:1;
+ GLuint pad:4;
+ GLuint surface_state_address:27;
+ } bits1;
+
+ struct
+ {
+ GLuint modify_enable:1;
+ GLuint pad:4;
+ GLuint indirect_object_state_address:27;
+ } bits2;
+
+ struct
+ {
+ GLuint modify_enable:1;
+ GLuint pad:11;
+ GLuint general_state_upper_bound:20;
+ } bits3;
+
+ struct
+ {
+ GLuint modify_enable:1;
+ GLuint pad:11;
+ GLuint indirect_object_state_upper_bound:20;
+ } bits4;
+};
+
+struct brw_state_prefetch
+{
+ struct header header;
+
+ struct
+ {
+ GLuint prefetch_count:3;
+ GLuint pad:3;
+ GLuint prefetch_pointer:26;
+ } bits0;
+};
+
+struct brw_system_instruction_pointer
+{
+ struct header header;
+
+ struct
+ {
+ GLuint pad:4;
+ GLuint system_instruction_pointer:28;
+ } bits0;
+};
+
+
+
+
+/* State structs for the various fixed function units:
+ */
+
+
+struct thread0
+{
+ GLuint pad0:1;
+ GLuint grf_reg_count:3;
+ GLuint pad1:2;
+ GLuint kernel_start_pointer:26; /* Offset from GENERAL_STATE_BASE */
+};
+
+struct thread1
+{
+ GLuint ext_halt_exception_enable:1;
+ GLuint sw_exception_enable:1;
+ GLuint mask_stack_exception_enable:1;
+ GLuint timeout_exception_enable:1;
+ GLuint illegal_op_exception_enable:1;
+ GLuint pad0:3;
+ GLuint depth_coef_urb_read_offset:6; /* WM only */
+ GLuint pad1:2;
+ GLuint floating_point_mode:1;
+ GLuint thread_priority:1;
+ GLuint binding_table_entry_count:8;
+ GLuint pad3:5;
+ GLuint single_program_flow:1;
+};
+
+struct thread2
+{
+ GLuint per_thread_scratch_space:4;
+ GLuint pad0:6;
+ GLuint scratch_space_base_pointer:22;
+};
+
+
+struct thread3
+{
+ GLuint dispatch_grf_start_reg:4;
+ GLuint urb_entry_read_offset:6;
+ GLuint pad0:1;
+ GLuint urb_entry_read_length:6;
+ GLuint pad1:1;
+ GLuint const_urb_entry_read_offset:6;
+ GLuint pad2:1;
+ GLuint const_urb_entry_read_length:6;
+ GLuint pad3:1;
+};
+
+
+
+struct brw_clip_unit_state
+{
+ struct thread0 thread0;
+ struct
+ {
+ GLuint pad0:7;
+ GLuint sw_exception_enable:1;
+ GLuint pad1:3;
+ GLuint mask_stack_exception_enable:1;
+ GLuint pad2:1;
+ GLuint illegal_op_exception_enable:1;
+ GLuint pad3:2;
+ GLuint floating_point_mode:1;
+ GLuint thread_priority:1;
+ GLuint binding_table_entry_count:8;
+ GLuint pad4:5;
+ GLuint single_program_flow:1;
+ } thread1;
+
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ GLuint pad0:9;
+ GLuint gs_output_stats:1; /* not always */
+ GLuint stats_enable:1;
+ GLuint nr_urb_entries:7;
+ GLuint pad1:1;
+ GLuint urb_entry_allocation_size:5;
+ GLuint pad2:1;
+ GLuint max_threads:5; /* may be less */
+ GLuint pad3:2;
+ } thread4;
+
+ struct
+ {
+ GLuint pad0:13;
+ GLuint clip_mode:3;
+ GLuint userclip_enable_flags:8;
+ GLuint userclip_must_clip:1;
+ GLuint negative_w_clip_test:1;
+ GLuint guard_band_enable:1;
+ GLuint viewport_z_clip_enable:1;
+ GLuint viewport_xy_clip_enable:1;
+ GLuint vertex_position_space:1;
+ GLuint api_mode:1;
+ GLuint pad2:1;
+ } clip5;
+
+ struct
+ {
+ GLuint pad0:5;
+ GLuint clipper_viewport_state_ptr:27;
+ } clip6;
+
+
+ GLfloat viewport_xmin;
+ GLfloat viewport_xmax;
+ GLfloat viewport_ymin;
+ GLfloat viewport_ymax;
+};
+
+
+
+struct brw_cc_unit_state
+{
+ struct
+ {
+ GLuint pad0:3;
+ GLuint bf_stencil_pass_depth_pass_op:3;
+ GLuint bf_stencil_pass_depth_fail_op:3;
+ GLuint bf_stencil_fail_op:3;
+ GLuint bf_stencil_func:3;
+ GLuint bf_stencil_enable:1;
+ GLuint pad1:2;
+ GLuint stencil_write_enable:1;
+ GLuint stencil_pass_depth_pass_op:3;
+ GLuint stencil_pass_depth_fail_op:3;
+ GLuint stencil_fail_op:3;
+ GLuint stencil_func:3;
+ GLuint stencil_enable:1;
+ } cc0;
+
+
+ struct
+ {
+ GLuint bf_stencil_ref:8;
+ GLuint stencil_write_mask:8;
+ GLuint stencil_test_mask:8;
+ GLuint stencil_ref:8;
+ } cc1;
+
+
+ struct
+ {
+ GLuint logicop_enable:1;
+ GLuint pad0:10;
+ GLuint depth_write_enable:1;
+ GLuint depth_test_function:3;
+ GLuint depth_test:1;
+ GLuint bf_stencil_write_mask:8;
+ GLuint bf_stencil_test_mask:8;
+ } cc2;
+
+
+ struct
+ {
+ GLuint pad0:8;
+ GLuint alpha_test_func:3;
+ GLuint alpha_test:1;
+ GLuint blend_enable:1;
+ GLuint ia_blend_enable:1;
+ GLuint pad1:1;
+ GLuint alpha_test_format:1;
+ GLuint pad2:16;
+ } cc3;
+
+ struct
+ {
+ GLuint pad0:5;
+ GLuint cc_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */
+ } cc4;
+
+ struct
+ {
+ GLuint pad0:2;
+ GLuint ia_dest_blend_factor:5;
+ GLuint ia_src_blend_factor:5;
+ GLuint ia_blend_function:3;
+ GLuint statistics_enable:1;
+ GLuint logicop_func:4;
+ GLuint pad1:11;
+ GLuint dither_enable:1;
+ } cc5;
+
+ struct
+ {
+ GLuint clamp_post_alpha_blend:1;
+ GLuint clamp_pre_alpha_blend:1;
+ GLuint clamp_range:2;
+ GLuint pad0:11;
+ GLuint y_dither_offset:2;
+ GLuint x_dither_offset:2;
+ GLuint dest_blend_factor:5;
+ GLuint src_blend_factor:5;
+ GLuint blend_function:3;
+ } cc6;
+
+ struct {
+ union {
+ GLfloat f;
+ GLubyte ub[4];
+ } alpha_ref;
+ } cc7;
+};
+
+
+
+struct brw_sf_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ GLuint pad0:10;
+ GLuint stats_enable:1;
+ GLuint nr_urb_entries:7;
+ GLuint pad1:1;
+ GLuint urb_entry_allocation_size:5;
+ GLuint pad2:1;
+ GLuint max_threads:6;
+ GLuint pad3:1;
+ } thread4;
+
+ struct
+ {
+ GLuint front_winding:1;
+ GLuint viewport_transform:1;
+ GLuint pad0:3;
+ GLuint sf_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */
+ } sf5;
+
+ struct
+ {
+ GLuint pad0:9;
+ GLuint dest_org_vbias:4;
+ GLuint dest_org_hbias:4;
+ GLuint scissor:1;
+ GLuint disable_2x2_trifilter:1;
+ GLuint disable_zero_pix_trifilter:1;
+ GLuint point_rast_rule:2;
+ GLuint line_endcap_aa_region_width:2;
+ GLuint line_width:4;
+ GLuint fast_scissor_disable:1;
+ GLuint cull_mode:2;
+ GLuint aa_enable:1;
+ } sf6;
+
+ struct
+ {
+ GLuint point_size:11;
+ GLuint use_point_size_state:1;
+ GLuint subpixel_precision:1;
+ GLuint sprite_point:1;
+ GLuint pad0:10;
+ GLuint aa_line_distance_mode:1;
+ GLuint trifan_pv:2;
+ GLuint linestrip_pv:2;
+ GLuint tristrip_pv:2;
+ GLuint line_last_pixel_enable:1;
+ } sf7;
+
+};
+
+
+struct brw_gs_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ GLuint pad0:8;
+ GLuint rendering_enable:1; /* for IGDNG */
+ GLuint pad4:1;
+ GLuint stats_enable:1;
+ GLuint nr_urb_entries:7;
+ GLuint pad1:1;
+ GLuint urb_entry_allocation_size:5;
+ GLuint pad2:1;
+ GLuint max_threads:5;
+ GLuint pad3:2;
+ } thread4;
+
+ struct
+ {
+ GLuint sampler_count:3;
+ GLuint pad0:2;
+ GLuint sampler_state_pointer:27;
+ } gs5;
+
+
+ struct
+ {
+ GLuint max_vp_index:4;
+ GLuint pad0:12;
+ GLuint svbi_post_inc_value:10;
+ GLuint pad1:1;
+ GLuint svbi_post_inc_enable:1;
+ GLuint svbi_payload:1;
+ GLuint discard_adjaceny:1;
+ GLuint reorder_enable:1;
+ GLuint pad2:1;
+ } gs6;
+};
+
+
+struct brw_vs_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ GLuint pad0:10;
+ GLuint stats_enable:1;
+ GLuint nr_urb_entries:7;
+ GLuint pad1:1;
+ GLuint urb_entry_allocation_size:5;
+ GLuint pad2:1;
+ GLuint max_threads:6;
+ GLuint pad3:1;
+ } thread4;
+
+ struct
+ {
+ GLuint sampler_count:3;
+ GLuint pad0:2;
+ GLuint sampler_state_pointer:27;
+ } vs5;
+
+ struct
+ {
+ GLuint vs_enable:1;
+ GLuint vert_cache_disable:1;
+ GLuint pad0:30;
+ } vs6;
+};
+
+
+struct brw_wm_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct {
+ GLuint stats_enable:1;
+ GLuint depth_buffer_clear:1;
+ GLuint sampler_count:3;
+ GLuint sampler_state_pointer:27;
+ } wm4;
+
+ struct
+ {
+ GLuint enable_8_pix:1;
+ GLuint enable_16_pix:1;
+ GLuint enable_32_pix:1;
+ GLuint enable_con_32_pix:1;
+ GLuint enable_con_64_pix:1;
+ GLuint pad0:5;
+ GLuint legacy_global_depth_bias:1;
+ GLuint line_stipple:1;
+ GLuint depth_offset:1;
+ GLuint polygon_stipple:1;
+ GLuint line_aa_region_width:2;
+ GLuint line_endcap_aa_region_width:2;
+ GLuint early_depth_test:1;
+ GLuint thread_dispatch_enable:1;
+ GLuint program_uses_depth:1;
+ GLuint program_computes_depth:1;
+ GLuint program_uses_killpixel:1;
+ GLuint legacy_line_rast: 1;
+ GLuint transposed_urb_read_enable:1;
+ GLuint max_threads:7;
+ } wm5;
+
+ GLfloat global_depth_offset_constant;
+ GLfloat global_depth_offset_scale;
+
+ /* for IGDNG only */
+ struct {
+ GLuint pad0:1;
+ GLuint grf_reg_count_1:3;
+ GLuint pad1:2;
+ GLuint kernel_start_pointer_1:26;
+ } wm8;
+
+ struct {
+ GLuint pad0:1;
+ GLuint grf_reg_count_2:3;
+ GLuint pad1:2;
+ GLuint kernel_start_pointer_2:26;
+ } wm9;
+
+ struct {
+ GLuint pad0:1;
+ GLuint grf_reg_count_3:3;
+ GLuint pad1:2;
+ GLuint kernel_start_pointer_3:26;
+ } wm10;
+};
+
+struct brw_sampler_default_color {
+ GLfloat color[4];
+};
+
+struct brw_sampler_state
+{
+
+ struct
+ {
+ GLuint shadow_function:3;
+ GLuint lod_bias:11;
+ GLuint min_filter:3;
+ GLuint mag_filter:3;
+ GLuint mip_filter:2;
+ GLuint base_level:5;
+ GLuint pad:1;
+ GLuint lod_preclamp:1;
+ GLuint default_color_mode:1;
+ GLuint pad0:1;
+ GLuint disable:1;
+ } ss0;
+
+ struct
+ {
+ GLuint r_wrap_mode:3;
+ GLuint t_wrap_mode:3;
+ GLuint s_wrap_mode:3;
+ GLuint pad:3;
+ GLuint max_lod:10;
+ GLuint min_lod:10;
+ } ss1;
+
+
+ struct
+ {
+ GLuint pad:5;
+ GLuint default_color_pointer:27;
+ } ss2;
+
+ struct
+ {
+ GLuint pad:19;
+ GLuint max_aniso:3;
+ GLuint chroma_key_mode:1;
+ GLuint chroma_key_index:2;
+ GLuint chroma_key_enable:1;
+ GLuint monochrome_filter_width:3;
+ GLuint monochrome_filter_height:3;
+ } ss3;
+};
+
+
+struct brw_clipper_viewport
+{
+ GLfloat xmin;
+ GLfloat xmax;
+ GLfloat ymin;
+ GLfloat ymax;
+};
+
+struct brw_cc_viewport
+{
+ GLfloat min_depth;
+ GLfloat max_depth;
+};
+
+struct brw_sf_viewport
+{
+ struct {
+ GLfloat m00;
+ GLfloat m11;
+ GLfloat m22;
+ GLfloat m30;
+ GLfloat m31;
+ GLfloat m32;
+ } viewport;
+
+ /* scissor coordinates are inclusive */
+ struct {
+ GLshort xmin;
+ GLshort ymin;
+ GLshort xmax;
+ GLshort ymax;
+ } scissor;
+};
+
+/* Documented in the subsystem/shared-functions/sampler chapter...
+ */
+struct brw_surface_state
+{
+ struct {
+ GLuint cube_pos_z:1;
+ GLuint cube_neg_z:1;
+ GLuint cube_pos_y:1;
+ GLuint cube_neg_y:1;
+ GLuint cube_pos_x:1;
+ GLuint cube_neg_x:1;
+ GLuint pad:4;
+ GLuint mipmap_layout_mode:1;
+ GLuint vert_line_stride_ofs:1;
+ GLuint vert_line_stride:1;
+ GLuint color_blend:1;
+ GLuint writedisable_blue:1;
+ GLuint writedisable_green:1;
+ GLuint writedisable_red:1;
+ GLuint writedisable_alpha:1;
+ GLuint surface_format:9; /**< BRW_SURFACEFORMAT_x */
+ GLuint data_return_format:1;
+ GLuint pad0:1;
+ GLuint surface_type:3; /**< BRW_SURFACE_1D/2D/3D/CUBE */
+ } ss0;
+
+ struct {
+ GLuint base_addr;
+ } ss1;
+
+ struct {
+ GLuint pad:2;
+ GLuint mip_count:4;
+ GLuint width:13;
+ GLuint height:13;
+ } ss2;
+
+ struct {
+ GLuint tile_walk:1;
+ GLuint tiled_surface:1;
+ GLuint pad:1;
+ GLuint pitch:18;
+ GLuint depth:11;
+ } ss3;
+
+ struct {
+ GLuint multisample_position_palette_index:3;
+ GLuint pad1:1;
+ GLuint num_multisamples:3;
+ GLuint pad0:1;
+ GLuint render_target_view_extent:9;
+ GLuint min_array_elt:11;
+ GLuint min_lod:4;
+ } ss4;
+
+ struct {
+ GLuint pad1:16;
+ GLuint llc_mapping:1;
+ GLuint mlc_mapping:1;
+ GLuint gfdt:1;
+ GLuint gfdt_src:1;
+ GLuint y_offset:4;
+ GLuint pad0:1;
+ GLuint x_offset:7;
+ } ss5; /* New in G4X */
+
+};
+
+
+
+struct brw_vertex_buffer_state
+{
+ struct {
+ GLuint pitch:11;
+ GLuint pad:15;
+ GLuint access_type:1;
+ GLuint vb_index:5;
+ } vb0;
+
+ GLuint start_addr;
+ GLuint max_index;
+#if 1
+ GLuint instance_data_step_rate; /* not included for sequential/random vertices? */
+#endif
+};
+
+#define BRW_VBP_MAX 17
+
+struct brw_vb_array_state {
+ struct header header;
+ struct brw_vertex_buffer_state vb[BRW_VBP_MAX];
+};
+
+
+struct brw_vertex_element_state
+{
+ struct
+ {
+ GLuint src_offset:11;
+ GLuint pad:5;
+ GLuint src_format:9;
+ GLuint pad0:1;
+ GLuint valid:1;
+ GLuint vertex_buffer_index:5;
+ } ve0;
+
+ struct
+ {
+ GLuint dst_offset:8;
+ GLuint pad:8;
+ GLuint vfcomponent3:4;
+ GLuint vfcomponent2:4;
+ GLuint vfcomponent1:4;
+ GLuint vfcomponent0:4;
+ } ve1;
+};
+
+#define BRW_VEP_MAX 18
+
+struct brw_vertex_element_packet {
+ struct header header;
+ struct brw_vertex_element_state ve[BRW_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */
+};
+
+
+struct brw_urb_immediate {
+ GLuint opcode:4;
+ GLuint offset:6;
+ GLuint swizzle_control:2;
+ GLuint pad:1;
+ GLuint allocate:1;
+ GLuint used:1;
+ GLuint complete:1;
+ GLuint response_length:4;
+ GLuint msg_length:4;
+ GLuint msg_target:4;
+ GLuint pad1:3;
+ GLuint end_of_thread:1;
+};
+
+/* Instruction format for the execution units:
+ */
+
+struct brw_instruction
+{
+ struct
+ {
+ GLuint opcode:7;
+ GLuint pad:1;
+ GLuint access_mode:1;
+ GLuint mask_control:1;
+ GLuint dependency_control:2;
+ GLuint compression_control:2;
+ GLuint thread_control:2;
+ GLuint predicate_control:4;
+ GLuint predicate_inverse:1;
+ GLuint execution_size:3;
+ GLuint destreg__conditionalmod:4; /* destreg - send, conditionalmod - others */
+ GLuint pad0:2;
+ GLuint debug_control:1;
+ GLuint saturate:1;
+ } header;
+
+ union {
+ struct
+ {
+ GLuint dest_reg_file:2;
+ GLuint dest_reg_type:3;
+ GLuint src0_reg_file:2;
+ GLuint src0_reg_type:3;
+ GLuint src1_reg_file:2;
+ GLuint src1_reg_type:3;
+ GLuint pad:1;
+ GLuint dest_subreg_nr:5;
+ GLuint dest_reg_nr:8;
+ GLuint dest_horiz_stride:2;
+ GLuint dest_address_mode:1;
+ } da1;
+
+ struct
+ {
+ GLuint dest_reg_file:2;
+ GLuint dest_reg_type:3;
+ GLuint src0_reg_file:2;
+ GLuint src0_reg_type:3;
+ GLuint src1_reg_file:2; /* 0x00000c00 */
+ GLuint src1_reg_type:3; /* 0x00007000 */
+ GLuint pad:1;
+ GLint dest_indirect_offset:10; /* offset against the deref'd address reg */
+ GLuint dest_subreg_nr:3; /* subnr for the address reg a0.x */
+ GLuint dest_horiz_stride:2;
+ GLuint dest_address_mode:1;
+ } ia1;
+
+ struct
+ {
+ GLuint dest_reg_file:2;
+ GLuint dest_reg_type:3;
+ GLuint src0_reg_file:2;
+ GLuint src0_reg_type:3;
+ GLuint src1_reg_file:2;
+ GLuint src1_reg_type:3;
+ GLuint pad:1;
+ GLuint dest_writemask:4;
+ GLuint dest_subreg_nr:1;
+ GLuint dest_reg_nr:8;
+ GLuint pad1:2;
+ GLuint dest_address_mode:1;
+ } da16;
+
+ struct
+ {
+ GLuint dest_reg_file:2;
+ GLuint dest_reg_type:3;
+ GLuint src0_reg_file:2;
+ GLuint src0_reg_type:3;
+ GLuint pad0:6;
+ GLuint dest_writemask:4;
+ GLint dest_indirect_offset:6;
+ GLuint dest_subreg_nr:3;
+ GLuint pad1:2;
+ GLuint dest_address_mode:1;
+ } ia16;
+ } bits1;
+
+
+ union {
+ struct
+ {
+ GLuint src0_subreg_nr:5;
+ GLuint src0_reg_nr:8;
+ GLuint src0_abs:1;
+ GLuint src0_negate:1;
+ GLuint src0_address_mode:1;
+ GLuint src0_horiz_stride:2;
+ GLuint src0_width:3;
+ GLuint src0_vert_stride:4;
+ GLuint flag_reg_nr:1;
+ GLuint pad:6;
+ } da1;
+
+ struct
+ {
+ GLint src0_indirect_offset:10;
+ GLuint src0_subreg_nr:3;
+ GLuint src0_abs:1;
+ GLuint src0_negate:1;
+ GLuint src0_address_mode:1;
+ GLuint src0_horiz_stride:2;
+ GLuint src0_width:3;
+ GLuint src0_vert_stride:4;
+ GLuint flag_reg_nr:1;
+ GLuint pad:6;
+ } ia1;
+
+ struct
+ {
+ GLuint src0_swz_x:2;
+ GLuint src0_swz_y:2;
+ GLuint src0_subreg_nr:1;
+ GLuint src0_reg_nr:8;
+ GLuint src0_abs:1;
+ GLuint src0_negate:1;
+ GLuint src0_address_mode:1;
+ GLuint src0_swz_z:2;
+ GLuint src0_swz_w:2;
+ GLuint pad0:1;
+ GLuint src0_vert_stride:4;
+ GLuint flag_reg_nr:1;
+ GLuint pad1:6;
+ } da16;
+
+ struct
+ {
+ GLuint src0_swz_x:2;
+ GLuint src0_swz_y:2;
+ GLint src0_indirect_offset:6;
+ GLuint src0_subreg_nr:3;
+ GLuint src0_abs:1;
+ GLuint src0_negate:1;
+ GLuint src0_address_mode:1;
+ GLuint src0_swz_z:2;
+ GLuint src0_swz_w:2;
+ GLuint pad0:1;
+ GLuint src0_vert_stride:4;
+ GLuint flag_reg_nr:1;
+ GLuint pad1:6;
+ } ia16;
+
+ struct
+ {
+ GLuint pad:26;
+ GLuint end_of_thread:1;
+ GLuint pad1:1;
+ GLuint sfid:4;
+ } send_igdng; /* for IGDNG only */
+
+ } bits2;
+
+ union
+ {
+ struct
+ {
+ GLuint src1_subreg_nr:5;
+ GLuint src1_reg_nr:8;
+ GLuint src1_abs:1;
+ GLuint src1_negate:1;
+ GLuint src1_address_mode:1;
+ GLuint src1_horiz_stride:2;
+ GLuint src1_width:3;
+ GLuint src1_vert_stride:4;
+ GLuint pad0:7;
+ } da1;
+
+ struct
+ {
+ GLuint src1_swz_x:2;
+ GLuint src1_swz_y:2;
+ GLuint src1_subreg_nr:1;
+ GLuint src1_reg_nr:8;
+ GLuint src1_abs:1;
+ GLuint src1_negate:1;
+ GLuint src1_address_mode:1;
+ GLuint src1_swz_z:2;
+ GLuint src1_swz_w:2;
+ GLuint pad1:1;
+ GLuint src1_vert_stride:4;
+ GLuint pad2:7;
+ } da16;
+
+ struct
+ {
+ GLint src1_indirect_offset:10;
+ GLuint src1_subreg_nr:3;
+ GLuint src1_abs:1;
+ GLuint src1_negate:1;
+ GLuint src1_address_mode:1;
+ GLuint src1_horiz_stride:2;
+ GLuint src1_width:3;
+ GLuint src1_vert_stride:4;
+ GLuint flag_reg_nr:1;
+ GLuint pad1:6;
+ } ia1;
+
+ struct
+ {
+ GLuint src1_swz_x:2;
+ GLuint src1_swz_y:2;
+ GLint src1_indirect_offset:6;
+ GLuint src1_subreg_nr:3;
+ GLuint src1_abs:1;
+ GLuint src1_negate:1;
+ GLuint pad0:1;
+ GLuint src1_swz_z:2;
+ GLuint src1_swz_w:2;
+ GLuint pad1:1;
+ GLuint src1_vert_stride:4;
+ GLuint flag_reg_nr:1;
+ GLuint pad2:6;
+ } ia16;
+
+
+ struct
+ {
+ GLint jump_count:16; /* note: signed */
+ GLuint pop_count:4;
+ GLuint pad0:12;
+ } if_else;
+
+ struct {
+ GLuint function:4;
+ GLuint int_type:1;
+ GLuint precision:1;
+ GLuint saturate:1;
+ GLuint data_type:1;
+ GLuint pad0:8;
+ GLuint response_length:4;
+ GLuint msg_length:4;
+ GLuint msg_target:4;
+ GLuint pad1:3;
+ GLuint end_of_thread:1;
+ } math;
+
+ struct {
+ GLuint function:4;
+ GLuint int_type:1;
+ GLuint precision:1;
+ GLuint saturate:1;
+ GLuint data_type:1;
+ GLuint snapshot:1;
+ GLuint pad0:10;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } math_igdng;
+
+ struct {
+ GLuint binding_table_index:8;
+ GLuint sampler:4;
+ GLuint return_format:2;
+ GLuint msg_type:2;
+ GLuint response_length:4;
+ GLuint msg_length:4;
+ GLuint msg_target:4;
+ GLuint pad1:3;
+ GLuint end_of_thread:1;
+ } sampler;
+
+ struct {
+ GLuint binding_table_index:8;
+ GLuint sampler:4;
+ GLuint msg_type:4;
+ GLuint response_length:4;
+ GLuint msg_length:4;
+ GLuint msg_target:4;
+ GLuint pad1:3;
+ GLuint end_of_thread:1;
+ } sampler_g4x;
+
+ struct {
+ GLuint binding_table_index:8;
+ GLuint sampler:4;
+ GLuint msg_type:4;
+ GLuint simd_mode:2;
+ GLuint pad0:1;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } sampler_igdng;
+
+ struct brw_urb_immediate urb;
+
+ struct {
+ GLuint opcode:4;
+ GLuint offset:6;
+ GLuint swizzle_control:2;
+ GLuint pad:1;
+ GLuint allocate:1;
+ GLuint used:1;
+ GLuint complete:1;
+ GLuint pad0:3;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } urb_igdng;
+
+ struct {
+ GLuint binding_table_index:8;
+ GLuint msg_control:4;
+ GLuint msg_type:2;
+ GLuint target_cache:2;
+ GLuint response_length:4;
+ GLuint msg_length:4;
+ GLuint msg_target:4;
+ GLuint pad1:3;
+ GLuint end_of_thread:1;
+ } dp_read;
+
+ struct {
+ GLuint binding_table_index:8;
+ GLuint msg_control:3;
+ GLuint msg_type:3;
+ GLuint target_cache:2;
+ GLuint pad0:3;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } dp_read_igdng;
+
+ struct {
+ GLuint binding_table_index:8;
+ GLuint msg_control:3;
+ GLuint pixel_scoreboard_clear:1;
+ GLuint msg_type:3;
+ GLuint send_commit_msg:1;
+ GLuint response_length:4;
+ GLuint msg_length:4;
+ GLuint msg_target:4;
+ GLuint pad1:3;
+ GLuint end_of_thread:1;
+ } dp_write;
+
+ struct {
+ GLuint binding_table_index:8;
+ GLuint msg_control:3;
+ GLuint pixel_scoreboard_clear:1;
+ GLuint msg_type:3;
+ GLuint send_commit_msg:1;
+ GLuint pad0:3;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } dp_write_igdng;
+
+ struct {
+ GLuint pad:16;
+ GLuint response_length:4;
+ GLuint msg_length:4;
+ GLuint msg_target:4;
+ GLuint pad1:3;
+ GLuint end_of_thread:1;
+ } generic;
+
+ struct {
+ GLuint pad:19;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } generic_igdng;
+
+ GLint d;
+ GLuint ud;
+ float f;
+ } bits3;
+};
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_tex.c b/src/gallium/drivers/i965/brw_tex.c
new file mode 100644
index 00000000000..e911b105b23
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_tex.c
@@ -0,0 +1,59 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/teximage.h"
+
+#include "intel_context.h"
+#include "intel_regions.h"
+#include "intel_tex.h"
+#include "brw_context.h"
+
+/**
+ * Finalizes all textures, completing any rendering that needs to be done
+ * to prepare them.
+ */
+void brw_validate_textures( struct brw_context *brw )
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ struct intel_context *intel = &brw->intel;
+ int i;
+
+ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+ struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
+
+ if (texUnit->_ReallyEnabled) {
+ intel_finalize_mipmap_tree(intel, i);
+ }
+ }
+}
diff --git a/src/gallium/drivers/i965/brw_tex_layout.c b/src/gallium/drivers/i965/brw_tex_layout.c
new file mode 100644
index 00000000000..5986cbffade
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_tex_layout.c
@@ -0,0 +1,222 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+/* Code to layout images in a mipmap tree for i965.
+ */
+
+#include "intel_mipmap_tree.h"
+#include "intel_tex_layout.h"
+#include "intel_context.h"
+#include "main/macros.h"
+#include "intel_chipset.h"
+
+#define FILE_DEBUG_FLAG DEBUG_MIPTREE
+
+GLboolean brw_miptree_layout(struct intel_context *intel,
+ struct intel_mipmap_tree *mt,
+ uint32_t tiling)
+{
+ /* XXX: these vary depending on image format: */
+ /* GLint align_w = 4; */
+
+ switch (mt->target) {
+ case GL_TEXTURE_CUBE_MAP:
+ if (IS_IGDNG(intel->intelScreen->deviceID)) {
+ GLuint align_h = 2, align_w = 4;
+ GLuint level;
+ GLuint x = 0;
+ GLuint y = 0;
+ GLuint width = mt->width0;
+ GLuint height = mt->height0;
+ GLuint qpitch = 0;
+ GLuint y_pitch = 0;
+
+ mt->pitch = mt->width0;
+ intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h);
+ y_pitch = ALIGN(height, align_h);
+
+ if (mt->compressed) {
+ mt->pitch = ALIGN(mt->width0, align_w);
+ }
+
+ if (mt->first_level != mt->last_level) {
+ GLuint mip1_width;
+
+ if (mt->compressed) {
+ mip1_width = ALIGN(minify(mt->width0), align_w)
+ + ALIGN(minify(minify(mt->width0)), align_w);
+ } else {
+ mip1_width = ALIGN(minify(mt->width0), align_w)
+ + minify(minify(mt->width0));
+ }
+
+ if (mip1_width > mt->pitch) {
+ mt->pitch = mip1_width;
+ }
+ }
+
+ mt->pitch = intel_miptree_pitch_align(intel, mt, tiling, mt->pitch);
+
+ if (mt->compressed) {
+ qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * mt->pitch * mt->cpp;
+ mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6;
+ } else {
+ qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * mt->pitch * mt->cpp;
+ mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6;
+ }
+
+ for (level = mt->first_level; level <= mt->last_level; level++) {
+ GLuint img_height;
+ GLuint nr_images = 6;
+ GLuint q = 0;
+
+ intel_miptree_set_level_info(mt, level, nr_images, x, y, width,
+ height, 1);
+
+ for (q = 0; q < nr_images; q++)
+ intel_miptree_set_image_offset_ex(mt, level, q, x, y, q * qpitch);
+
+ if (mt->compressed)
+ img_height = MAX2(1, height/4);
+ else
+ img_height = ALIGN(height, align_h);
+
+ if (level == mt->first_level + 1) {
+ x += ALIGN(width, align_w);
+ }
+ else {
+ y += img_height;
+ }
+
+ width = minify(width);
+ height = minify(height);
+ }
+
+ break;
+ }
+
+ case GL_TEXTURE_3D: {
+ GLuint width = mt->width0;
+ GLuint height = mt->height0;
+ GLuint depth = mt->depth0;
+ GLuint pack_x_pitch, pack_x_nr;
+ GLuint pack_y_pitch;
+ GLuint level;
+ GLuint align_h = 2;
+ GLuint align_w = 4;
+
+ mt->total_height = 0;
+ intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h);
+
+ if (mt->compressed) {
+ mt->pitch = ALIGN(width, align_w);
+ pack_y_pitch = (height + 3) / 4;
+ } else {
+ mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0);
+ pack_y_pitch = ALIGN(mt->height0, align_h);
+ }
+
+ pack_x_pitch = width;
+ pack_x_nr = 1;
+
+ for (level = mt->first_level ; level <= mt->last_level ; level++) {
+ GLuint nr_images = mt->target == GL_TEXTURE_3D ? depth : 6;
+ GLint x = 0;
+ GLint y = 0;
+ GLint q, j;
+
+ intel_miptree_set_level_info(mt, level, nr_images,
+ 0, mt->total_height,
+ width, height, depth);
+
+ for (q = 0; q < nr_images;) {
+ for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) {
+ intel_miptree_set_image_offset(mt, level, q, x, y);
+ x += pack_x_pitch;
+ }
+
+ x = 0;
+ y += pack_y_pitch;
+ }
+
+
+ mt->total_height += y;
+ width = minify(width);
+ height = minify(height);
+ depth = minify(depth);
+
+ if (mt->compressed) {
+ pack_y_pitch = (height + 3) / 4;
+
+ if (pack_x_pitch > ALIGN(width, align_w)) {
+ pack_x_pitch = ALIGN(width, align_w);
+ pack_x_nr <<= 1;
+ }
+ } else {
+ if (pack_x_pitch > 4) {
+ pack_x_pitch >>= 1;
+ pack_x_nr <<= 1;
+ assert(pack_x_pitch * pack_x_nr <= mt->pitch);
+ }
+
+ if (pack_y_pitch > 2) {
+ pack_y_pitch >>= 1;
+ pack_y_pitch = ALIGN(pack_y_pitch, align_h);
+ }
+ }
+
+ }
+ /* The 965's sampler lays cachelines out according to how accesses
+ * in the texture surfaces run, so they may be "vertical" through
+ * memory. As a result, the docs say in Surface Padding Requirements:
+ * Sampling Engine Surfaces that two extra rows of padding are required.
+ * We don't know of similar requirements for pre-965, but given that
+ * those docs are silent on padding requirements in general, let's play
+ * it safe.
+ */
+ if (mt->target == GL_TEXTURE_CUBE_MAP)
+ mt->total_height += 2;
+ break;
+ }
+
+ default:
+ i945_miptree_layout_2d(intel, mt, tiling);
+ break;
+ }
+ DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__,
+ mt->pitch,
+ mt->total_height,
+ mt->cpp,
+ mt->pitch * mt->total_height * mt->cpp );
+
+ return GL_TRUE;
+}
+
diff --git a/src/gallium/drivers/i965/brw_urb.c b/src/gallium/drivers/i965/brw_urb.c
new file mode 100644
index 00000000000..8c6f4355a6e
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_urb.c
@@ -0,0 +1,250 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+
+#include "intel_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+#define VS 0
+#define GS 1
+#define CLP 2
+#define SF 3
+#define CS 4
+
+/** @file brw_urb.c
+ *
+ * Manages the division of the URB space between the various fixed-function
+ * units.
+ *
+ * See the Thread Initiation Management section of the GEN4 B-Spec, and
+ * the individual *_STATE structures for restrictions on numbers of
+ * entries and threads.
+ */
+
+/*
+ * Generally, a unit requires a min_nr_entries based on how many entries
+ * it produces before the downstream unit gets unblocked and can use and
+ * dereference some of its handles.
+ *
+ * The SF unit preallocates a PUE at the start of thread dispatch, and only
+ * uses that one. So it requires one entry per thread.
+ *
+ * For CLIP, the SF unit will hold the previous primitive while the
+ * next is getting assembled, meaning that linestrips require 3 CLIP VUEs
+ * (vertices) to ensure continued processing, trifans require 4, and tristrips
+ * require 5. There can be 1 or 2 threads, and each has the same requirement.
+ *
+ * GS has the same requirement as CLIP, but it never handles tristrips,
+ * so we can lower the minimum to 4 for the POLYGONs (trifans) it produces.
+ * We only run it single-threaded.
+ *
+ * For VS, the number of entries may be 8, 12, 16, or 32 (or 64 on G4X).
+ * Each thread processes 2 preallocated VUEs (vertices) at a time, and they
+ * get streamed down as soon as threads processing earlier vertices get
+ * theirs accepted.
+ *
+ * Each unit will take the number of URB entries we give it (based on the
+ * entry size calculated in brw_vs_emit.c for VUEs, brw_sf_emit.c for PUEs,
+ * and brw_curbe.c for the CURBEs) and decide its maximum number of
+ * threads it can support based on that. in brw_*_state.c.
+ *
+ * XXX: Are the min_entry_size numbers useful?
+ * XXX: Verify min_nr_entries, esp for VS.
+ * XXX: Verify SF min_entry_size.
+ */
+static const struct {
+ GLuint min_nr_entries;
+ GLuint preferred_nr_entries;
+ GLuint min_entry_size;
+ GLuint max_entry_size;
+} limits[CS+1] = {
+ { 16, 32, 1, 5 }, /* vs */
+ { 4, 8, 1, 5 }, /* gs */
+ { 5, 10, 1, 5 }, /* clp */
+ { 1, 8, 1, 12 }, /* sf */
+ { 1, 4, 1, 32 } /* cs */
+};
+
+
+static GLboolean check_urb_layout( struct brw_context *brw )
+{
+ brw->urb.vs_start = 0;
+ brw->urb.gs_start = brw->urb.nr_vs_entries * brw->urb.vsize;
+ brw->urb.clip_start = brw->urb.gs_start + brw->urb.nr_gs_entries * brw->urb.vsize;
+ brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize;
+ brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize;
+
+ return brw->urb.cs_start + brw->urb.nr_cs_entries * brw->urb.csize <= URB_SIZES(brw);
+}
+
+/* Most minimal update, forces re-emit of URB fence packet after GS
+ * unit turned on/off.
+ */
+static void recalculate_urb_fence( struct brw_context *brw )
+{
+ GLuint csize = brw->curbe.total_size;
+ GLuint vsize = brw->vs.prog_data->urb_entry_size;
+ GLuint sfsize = brw->sf.prog_data->urb_entry_size;
+
+ if (csize < limits[CS].min_entry_size)
+ csize = limits[CS].min_entry_size;
+
+ if (vsize < limits[VS].min_entry_size)
+ vsize = limits[VS].min_entry_size;
+
+ if (sfsize < limits[SF].min_entry_size)
+ sfsize = limits[SF].min_entry_size;
+
+ if (brw->urb.vsize < vsize ||
+ brw->urb.sfsize < sfsize ||
+ brw->urb.csize < csize ||
+ (brw->urb.constrained && (brw->urb.vsize > vsize ||
+ brw->urb.sfsize > sfsize ||
+ brw->urb.csize > csize))) {
+
+
+ brw->urb.csize = csize;
+ brw->urb.sfsize = sfsize;
+ brw->urb.vsize = vsize;
+
+ brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+ brw->urb.nr_gs_entries = limits[GS].preferred_nr_entries;
+ brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries;
+ brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
+ brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries;
+
+ brw->urb.constrained = 0;
+
+ if (BRW_IS_IGDNG(brw)) {
+ brw->urb.nr_vs_entries = 128;
+ brw->urb.nr_sf_entries = 48;
+ if (check_urb_layout(brw)) {
+ goto done;
+ } else {
+ brw->urb.constrained = 1;
+ brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+ brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
+ }
+ } else if (BRW_IS_G4X(brw)) {
+ brw->urb.nr_vs_entries = 64;
+ if (check_urb_layout(brw)) {
+ goto done;
+ } else {
+ brw->urb.constrained = 1;
+ brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+ }
+ }
+
+ if (!check_urb_layout(brw)) {
+ brw->urb.nr_vs_entries = limits[VS].min_nr_entries;
+ brw->urb.nr_gs_entries = limits[GS].min_nr_entries;
+ brw->urb.nr_clip_entries = limits[CLP].min_nr_entries;
+ brw->urb.nr_sf_entries = limits[SF].min_nr_entries;
+ brw->urb.nr_cs_entries = limits[CS].min_nr_entries;
+
+ /* Mark us as operating with constrained nr_entries, so that next
+ * time we recalculate we'll resize the fences in the hope of
+ * escaping constrained mode and getting back to normal performance.
+ */
+ brw->urb.constrained = 1;
+
+ if (!check_urb_layout(brw)) {
+ /* This is impossible, given the maximal sizes of urb
+ * entries and the values for minimum nr of entries
+ * provided above.
+ */
+ _mesa_printf("couldn't calculate URB layout!\n");
+ exit(1);
+ }
+
+ if (INTEL_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS))
+ _mesa_printf("URB CONSTRAINED\n");
+ }
+
+done:
+ if (INTEL_DEBUG & DEBUG_URB)
+ _mesa_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
+ brw->urb.vs_start,
+ brw->urb.gs_start,
+ brw->urb.clip_start,
+ brw->urb.sf_start,
+ brw->urb.cs_start,
+ URB_SIZES(brw));
+
+ brw->state.dirty.brw |= BRW_NEW_URB_FENCE;
+ }
+}
+
+
+const struct brw_tracked_state brw_recalculate_urb_fence = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_CURBE_OFFSETS,
+ .cache = (CACHE_NEW_VS_PROG |
+ CACHE_NEW_SF_PROG)
+ },
+ .prepare = recalculate_urb_fence
+};
+
+
+
+
+
+void brw_upload_urb_fence(struct brw_context *brw)
+{
+ struct brw_urb_fence uf;
+ memset(&uf, 0, sizeof(uf));
+
+ uf.header.opcode = CMD_URB_FENCE;
+ uf.header.length = sizeof(uf)/4-2;
+ uf.header.vs_realloc = 1;
+ uf.header.gs_realloc = 1;
+ uf.header.clp_realloc = 1;
+ uf.header.sf_realloc = 1;
+ uf.header.vfe_realloc = 1;
+ uf.header.cs_realloc = 1;
+
+ /* The ordering below is correct, not the layout in the
+ * instruction.
+ *
+ * There are 256/384 urb reg pairs in total.
+ */
+ uf.bits0.vs_fence = brw->urb.gs_start;
+ uf.bits0.gs_fence = brw->urb.clip_start;
+ uf.bits0.clp_fence = brw->urb.sf_start;
+ uf.bits1.sf_fence = brw->urb.cs_start;
+ uf.bits1.cs_fence = URB_SIZES(brw);
+
+ BRW_BATCH_STRUCT(brw, &uf);
+}
diff --git a/src/gallium/drivers/i965/brw_util.c b/src/gallium/drivers/i965/brw_util.c
new file mode 100644
index 00000000000..ce21aa48695
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_util.c
@@ -0,0 +1,104 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#include "main/mtypes.h"
+#include "shader/prog_parameter.h"
+#include "brw_util.h"
+#include "brw_defines.h"
+
+GLuint brw_count_bits( GLuint val )
+{
+ GLuint i;
+ for (i = 0; val ; val >>= 1)
+ if (val & 1)
+ i++;
+ return i;
+}
+
+
+GLuint brw_translate_blend_equation( GLenum mode )
+{
+ switch (mode) {
+ case GL_FUNC_ADD:
+ return BRW_BLENDFUNCTION_ADD;
+ case GL_MIN:
+ return BRW_BLENDFUNCTION_MIN;
+ case GL_MAX:
+ return BRW_BLENDFUNCTION_MAX;
+ case GL_FUNC_SUBTRACT:
+ return BRW_BLENDFUNCTION_SUBTRACT;
+ case GL_FUNC_REVERSE_SUBTRACT:
+ return BRW_BLENDFUNCTION_REVERSE_SUBTRACT;
+ default:
+ assert(0);
+ return BRW_BLENDFUNCTION_ADD;
+ }
+}
+
+GLuint brw_translate_blend_factor( GLenum factor )
+{
+ switch(factor) {
+ case GL_ZERO:
+ return BRW_BLENDFACTOR_ZERO;
+ case GL_SRC_ALPHA:
+ return BRW_BLENDFACTOR_SRC_ALPHA;
+ case GL_ONE:
+ return BRW_BLENDFACTOR_ONE;
+ case GL_SRC_COLOR:
+ return BRW_BLENDFACTOR_SRC_COLOR;
+ case GL_ONE_MINUS_SRC_COLOR:
+ return BRW_BLENDFACTOR_INV_SRC_COLOR;
+ case GL_DST_COLOR:
+ return BRW_BLENDFACTOR_DST_COLOR;
+ case GL_ONE_MINUS_DST_COLOR:
+ return BRW_BLENDFACTOR_INV_DST_COLOR;
+ case GL_ONE_MINUS_SRC_ALPHA:
+ return BRW_BLENDFACTOR_INV_SRC_ALPHA;
+ case GL_DST_ALPHA:
+ return BRW_BLENDFACTOR_DST_ALPHA;
+ case GL_ONE_MINUS_DST_ALPHA:
+ return BRW_BLENDFACTOR_INV_DST_ALPHA;
+ case GL_SRC_ALPHA_SATURATE:
+ return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
+ case GL_CONSTANT_COLOR:
+ return BRW_BLENDFACTOR_CONST_COLOR;
+ case GL_ONE_MINUS_CONSTANT_COLOR:
+ return BRW_BLENDFACTOR_INV_CONST_COLOR;
+ case GL_CONSTANT_ALPHA:
+ return BRW_BLENDFACTOR_CONST_ALPHA;
+ case GL_ONE_MINUS_CONSTANT_ALPHA:
+ return BRW_BLENDFACTOR_INV_CONST_ALPHA;
+ default:
+ assert(0);
+ return BRW_BLENDFACTOR_ZERO;
+ }
+}
diff --git a/src/gallium/drivers/i965/brw_util.h b/src/gallium/drivers/i965/brw_util.h
new file mode 100644
index 00000000000..33e7cd87e42
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_util.h
@@ -0,0 +1,45 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#ifndef BRW_UTIL_H
+#define BRW_UTIL_H
+
+#include "main/mtypes.h"
+
+extern GLuint brw_count_bits( GLuint val );
+extern GLuint brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList);
+extern GLuint brw_translate_blend_factor( GLenum factor );
+extern GLuint brw_translate_blend_equation( GLenum mode );
+
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c
new file mode 100644
index 00000000000..f0c79efbd96
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_vs.c
@@ -0,0 +1,124 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#include "brw_context.h"
+#include "brw_vs.h"
+#include "brw_util.h"
+#include "brw_state.h"
+#include "shader/prog_print.h"
+
+
+
+static void do_vs_prog( struct brw_context *brw,
+ struct brw_vertex_program *vp,
+ struct brw_vs_prog_key *key )
+{
+ GLuint program_size;
+ const GLuint *program;
+ struct brw_vs_compile c;
+
+ memset(&c, 0, sizeof(c));
+ memcpy(&c.key, key, sizeof(*key));
+
+ brw_init_compile(brw, &c.func);
+ c.vp = vp;
+
+ c.prog_data.outputs_written = vp->program.Base.OutputsWritten;
+ c.prog_data.inputs_read = vp->program.Base.InputsRead;
+
+ if (c.key.copy_edgeflag) {
+ c.prog_data.outputs_written |= 1<<VERT_RESULT_EDGE;
+ c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG;
+ }
+
+ if (0)
+ _mesa_print_program(&c.vp->program.Base);
+
+
+
+ /* Emit GEN4 code.
+ */
+ brw_vs_emit(&c);
+
+ /* get the program
+ */
+ program = brw_get_program(&c.func, &program_size);
+
+ dri_bo_unreference(brw->vs.prog_bo);
+ brw->vs.prog_bo = brw_upload_cache( &brw->cache, BRW_VS_PROG,
+ &c.key, sizeof(c.key),
+ NULL, 0,
+ program, program_size,
+ &c.prog_data,
+ &brw->vs.prog_data );
+}
+
+
+static void brw_upload_vs_prog(struct brw_context *brw)
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ struct brw_vs_prog_key key;
+ struct brw_vertex_program *vp =
+ (struct brw_vertex_program *)brw->vertex_program;
+
+ memset(&key, 0, sizeof(key));
+
+ /* Just upload the program verbatim for now. Always send it all
+ * the inputs it asks for, whether they are varying or not.
+ */
+ key.program_string_id = vp->id;
+ key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
+ key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
+ ctx->Polygon.BackMode != GL_FILL);
+
+ /* Make an early check for the key.
+ */
+ dri_bo_unreference(brw->vs.prog_bo);
+ brw->vs.prog_bo = brw_search_cache(&brw->cache, BRW_VS_PROG,
+ &key, sizeof(key),
+ NULL, 0,
+ &brw->vs.prog_data);
+ if (brw->vs.prog_bo == NULL)
+ do_vs_prog(brw, vp, &key);
+}
+
+
+/* See brw_vs.c:
+ */
+const struct brw_tracked_state brw_vs_prog = {
+ .dirty = {
+ .mesa = _NEW_TRANSFORM | _NEW_POLYGON,
+ .brw = BRW_NEW_VERTEX_PROGRAM,
+ .cache = 0
+ },
+ .prepare = brw_upload_vs_prog
+};
diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h
new file mode 100644
index 00000000000..4a591365c98
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_vs.h
@@ -0,0 +1,88 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#ifndef BRW_VS_H
+#define BRW_VS_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "shader/program.h"
+
+
+struct brw_vs_prog_key {
+ GLuint program_string_id;
+ GLuint nr_userclip:4;
+ GLuint copy_edgeflag:1;
+ GLuint pad:26;
+};
+
+
+struct brw_vs_compile {
+ struct brw_compile func;
+ struct brw_vs_prog_key key;
+ struct brw_vs_prog_data prog_data;
+
+ struct brw_vertex_program *vp;
+
+ GLuint nr_inputs;
+
+ GLuint first_output;
+ GLuint nr_outputs;
+ GLuint first_overflow_output; /**< VERT_ATTRIB_x */
+
+ GLuint first_tmp;
+ GLuint last_tmp;
+
+ struct brw_reg r0;
+ struct brw_reg r1;
+ struct brw_reg regs[PROGRAM_ADDRESS+1][128];
+ struct brw_reg tmp;
+ struct brw_reg stack;
+
+ struct {
+ GLboolean used_in_src;
+ struct brw_reg reg;
+ } output_regs[128];
+
+ struct brw_reg userplane[6];
+
+ /** we may need up to 3 constants per instruction (if use_const_buffer) */
+ struct {
+ GLint index;
+ struct brw_reg reg;
+ } current_const[3];
+};
+
+void brw_vs_emit( struct brw_vs_compile *c );
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
new file mode 100644
index 00000000000..1638ef81115
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -0,0 +1,1667 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#include "main/macros.h"
+#include "shader/program.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "brw_context.h"
+#include "brw_vs.h"
+
+
+static struct brw_reg get_tmp( struct brw_vs_compile *c )
+{
+ struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
+
+ if (++c->last_tmp > c->prog_data.total_grf)
+ c->prog_data.total_grf = c->last_tmp;
+
+ return tmp;
+}
+
+static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
+{
+ if (tmp.nr == c->last_tmp-1)
+ c->last_tmp--;
+}
+
+static void release_tmps( struct brw_vs_compile *c )
+{
+ c->last_tmp = c->first_tmp;
+}
+
+
+/**
+ * Preallocate GRF register before code emit.
+ * Do things as simply as possible. Allocate and populate all regs
+ * ahead of time.
+ */
+static void brw_vs_alloc_regs( struct brw_vs_compile *c )
+{
+ GLuint i, reg = 0, mrf;
+ int attributes_in_vue;
+
+ /* Determine whether to use a real constant buffer or use a block
+ * of GRF registers for constants. The later is faster but only
+ * works if everything fits in the GRF.
+ * XXX this heuristic/check may need some fine tuning...
+ */
+ if (c->vp->program.Base.Parameters->NumParameters +
+ c->vp->program.Base.NumTemporaries + 20 > BRW_MAX_GRF)
+ c->vp->use_const_buffer = GL_TRUE;
+ else
+ c->vp->use_const_buffer = GL_FALSE;
+
+ /*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/
+
+ /* r0 -- reserved as usual
+ */
+ c->r0 = brw_vec8_grf(reg, 0);
+ reg++;
+
+ /* User clip planes from curbe:
+ */
+ if (c->key.nr_userclip) {
+ for (i = 0; i < c->key.nr_userclip; i++) {
+ c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1);
+ }
+
+ /* Deal with curbe alignment:
+ */
+ reg += ((6 + c->key.nr_userclip + 3) / 4) * 2;
+ }
+
+ /* Vertex program parameters from curbe:
+ */
+ if (c->vp->use_const_buffer) {
+ /* get constants from a real constant buffer */
+ c->prog_data.curb_read_length = 0;
+ c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */
+ }
+ else {
+ /* use a section of the GRF for constants */
+ GLuint nr_params = c->vp->program.Base.Parameters->NumParameters;
+ for (i = 0; i < nr_params; i++) {
+ c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
+ }
+ reg += (nr_params + 1) / 2;
+ c->prog_data.curb_read_length = reg - 1;
+
+ c->prog_data.nr_params = nr_params * 4;
+ }
+
+ /* Allocate input regs:
+ */
+ c->nr_inputs = 0;
+ for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+ if (c->prog_data.inputs_read & (1 << i)) {
+ c->nr_inputs++;
+ c->regs[PROGRAM_INPUT][i] = brw_vec8_grf(reg, 0);
+ reg++;
+ }
+ }
+ /* If there are no inputs, we'll still be reading one attribute's worth
+ * because it's required -- see urb_read_length setting.
+ */
+ if (c->nr_inputs == 0)
+ reg++;
+
+ /* Allocate outputs. The non-position outputs go straight into message regs.
+ */
+ c->nr_outputs = 0;
+ c->first_output = reg;
+ c->first_overflow_output = 0;
+
+ if (BRW_IS_IGDNG(c->func.brw))
+ mrf = 8;
+ else
+ mrf = 4;
+
+ for (i = 0; i < VERT_RESULT_MAX; i++) {
+ if (c->prog_data.outputs_written & (1 << i)) {
+ c->nr_outputs++;
+ assert(i < Elements(c->regs[PROGRAM_OUTPUT]));
+ if (i == VERT_RESULT_HPOS) {
+ c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
+ reg++;
+ }
+ else if (i == VERT_RESULT_PSIZ) {
+ c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
+ reg++;
+ mrf++; /* just a placeholder? XXX fix later stages & remove this */
+ }
+ else {
+ if (mrf < 16) {
+ c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf);
+ mrf++;
+ }
+ else {
+ /* too many vertex results to fit in MRF, use GRF for overflow */
+ if (!c->first_overflow_output)
+ c->first_overflow_output = i;
+ c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
+ reg++;
+ }
+ }
+ }
+ }
+
+ /* Allocate program temporaries:
+ */
+ for (i = 0; i < c->vp->program.Base.NumTemporaries; i++) {
+ c->regs[PROGRAM_TEMPORARY][i] = brw_vec8_grf(reg, 0);
+ reg++;
+ }
+
+ /* Address reg(s). Don't try to use the internal address reg until
+ * deref time.
+ */
+ for (i = 0; i < c->vp->program.Base.NumAddressRegs; i++) {
+ c->regs[PROGRAM_ADDRESS][i] = brw_reg(BRW_GENERAL_REGISTER_FILE,
+ reg,
+ 0,
+ BRW_REGISTER_TYPE_D,
+ BRW_VERTICAL_STRIDE_8,
+ BRW_WIDTH_8,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XXXX,
+ WRITEMASK_X);
+ reg++;
+ }
+
+ if (c->vp->use_const_buffer) {
+ for (i = 0; i < 3; i++) {
+ c->current_const[i].index = -1;
+ c->current_const[i].reg = brw_vec8_grf(reg, 0);
+ reg++;
+ }
+ }
+
+ for (i = 0; i < 128; i++) {
+ if (c->output_regs[i].used_in_src) {
+ c->output_regs[i].reg = brw_vec8_grf(reg, 0);
+ reg++;
+ }
+ }
+
+ c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
+ reg += 2;
+
+ /* Some opcodes need an internal temporary:
+ */
+ c->first_tmp = reg;
+ c->last_tmp = reg; /* for allocation purposes */
+
+ /* Each input reg holds data from two vertices. The
+ * urb_read_length is the number of registers read from *each*
+ * vertex urb, so is half the amount:
+ */
+ c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2;
+ /* Setting this field to 0 leads to undefined behavior according to the
+ * the VS_STATE docs. Our VUEs will always have at least one attribute
+ * sitting in them, even if it's padding.
+ */
+ if (c->prog_data.urb_read_length == 0)
+ c->prog_data.urb_read_length = 1;
+
+ /* The VS VUEs are shared by VF (outputting our inputs) and VS, so size
+ * them to fit the biggest thing they need to.
+ */
+ attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs);
+
+ if (BRW_IS_IGDNG(c->func.brw))
+ c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4;
+ else
+ c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4;
+
+ c->prog_data.total_grf = reg;
+
+ if (INTEL_DEBUG & DEBUG_VS) {
+ _mesa_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs);
+ _mesa_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries);
+ _mesa_printf("%s reg = %d\n", __FUNCTION__, reg);
+ }
+}
+
+
+/**
+ * If an instruction uses a temp reg both as a src and the dest, we
+ * sometimes need to allocate an intermediate temporary.
+ */
+static void unalias1( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ void (*func)( struct brw_vs_compile *,
+ struct brw_reg,
+ struct brw_reg ))
+{
+ if (dst.file == arg0.file && dst.nr == arg0.nr) {
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
+ func(c, tmp, arg0);
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+ else {
+ func(c, dst, arg0);
+ }
+}
+
+/**
+ * \sa unalias2
+ * Checkes if 2-operand instruction needs an intermediate temporary.
+ */
+static void unalias2( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ void (*func)( struct brw_vs_compile *,
+ struct brw_reg,
+ struct brw_reg,
+ struct brw_reg ))
+{
+ if ((dst.file == arg0.file && dst.nr == arg0.nr) ||
+ (dst.file == arg1.file && dst.nr == arg1.nr)) {
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
+ func(c, tmp, arg0, arg1);
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+ else {
+ func(c, dst, arg0, arg1);
+ }
+}
+
+/**
+ * \sa unalias2
+ * Checkes if 3-operand instruction needs an intermediate temporary.
+ */
+static void unalias3( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ struct brw_reg arg2,
+ void (*func)( struct brw_vs_compile *,
+ struct brw_reg,
+ struct brw_reg,
+ struct brw_reg,
+ struct brw_reg ))
+{
+ if ((dst.file == arg0.file && dst.nr == arg0.nr) ||
+ (dst.file == arg1.file && dst.nr == arg1.nr) ||
+ (dst.file == arg2.file && dst.nr == arg2.nr)) {
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
+ func(c, tmp, arg0, arg1, arg2);
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+ else {
+ func(c, dst, arg0, arg1, arg2);
+ }
+}
+
+static void emit_sop( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ GLuint cond)
+{
+ brw_MOV(p, dst, brw_imm_f(0.0f));
+ brw_CMP(p, brw_null_reg(), cond, arg0, arg1);
+ brw_MOV(p, dst, brw_imm_f(1.0f));
+ brw_set_predicate_control_flag_value(p, 0xff);
+}
+
+static void emit_seq( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ);
+}
+
+static void emit_sne( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ);
+}
+static void emit_slt( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L);
+}
+
+static void emit_sle( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE);
+}
+
+static void emit_sgt( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G);
+}
+
+static void emit_sge( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE);
+}
+
+static void emit_max( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
+ brw_SEL(p, dst, arg1, arg0);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+}
+
+static void emit_min( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1 )
+{
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
+ brw_SEL(p, dst, arg0, arg1);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+}
+
+
+static void emit_math1( struct brw_vs_compile *c,
+ GLuint function,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ GLuint precision)
+{
+ /* There are various odd behaviours with SEND on the simulator. In
+ * addition there are documented issues with the fact that the GEN4
+ * processor doesn't do dependency control properly on SEND
+ * results. So, on balance, this kludge to get around failures
+ * with writemasked math results looks like it might be necessary
+ * whether that turns out to be a simulator bug or not:
+ */
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = dst;
+ GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
+ dst.file != BRW_GENERAL_REGISTER_FILE);
+
+ if (need_tmp)
+ tmp = get_tmp(c);
+
+ brw_math(p,
+ tmp,
+ function,
+ BRW_MATH_SATURATE_NONE,
+ 2,
+ arg0,
+ BRW_MATH_DATA_SCALAR,
+ precision);
+
+ if (need_tmp) {
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+}
+
+
+static void emit_math2( struct brw_vs_compile *c,
+ GLuint function,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ GLuint precision)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = dst;
+ GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
+ dst.file != BRW_GENERAL_REGISTER_FILE);
+
+ if (need_tmp)
+ tmp = get_tmp(c);
+
+ brw_MOV(p, brw_message_reg(3), arg1);
+
+ brw_math(p,
+ tmp,
+ function,
+ BRW_MATH_SATURATE_NONE,
+ 2,
+ arg0,
+ BRW_MATH_DATA_SCALAR,
+ precision);
+
+ if (need_tmp) {
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+}
+
+
+static void emit_exp_noalias( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0 )
+{
+ struct brw_compile *p = &c->func;
+
+
+ if (dst.dw1.bits.writemask & WRITEMASK_X) {
+ struct brw_reg tmp = get_tmp(c);
+ struct brw_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D);
+
+ /* tmp_d = floor(arg0.x) */
+ brw_RNDD(p, tmp_d, brw_swizzle1(arg0, 0));
+
+ /* result[0] = 2.0 ^ tmp */
+
+ /* Adjust exponent for floating point:
+ * exp += 127
+ */
+ brw_ADD(p, brw_writemask(tmp_d, WRITEMASK_X), tmp_d, brw_imm_d(127));
+
+ /* Install exponent and sign.
+ * Excess drops off the edge:
+ */
+ brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), WRITEMASK_X),
+ tmp_d, brw_imm_d(23));
+
+ release_tmp(c, tmp);
+ }
+
+ if (dst.dw1.bits.writemask & WRITEMASK_Y) {
+ /* result[1] = arg0.x - floor(arg0.x) */
+ brw_FRC(p, brw_writemask(dst, WRITEMASK_Y), brw_swizzle1(arg0, 0));
+ }
+
+ if (dst.dw1.bits.writemask & WRITEMASK_Z) {
+ /* As with the LOG instruction, we might be better off just
+ * doing a taylor expansion here, seeing as we have to do all
+ * the prep work.
+ *
+ * If mathbox partial precision is too low, consider also:
+ * result[3] = result[0] * EXP(result[1])
+ */
+ emit_math1(c,
+ BRW_MATH_FUNCTION_EXP,
+ brw_writemask(dst, WRITEMASK_Z),
+ brw_swizzle1(arg0, 0),
+ BRW_MATH_PRECISION_FULL);
+ }
+
+ if (dst.dw1.bits.writemask & WRITEMASK_W) {
+ /* result[3] = 1.0; */
+ brw_MOV(p, brw_writemask(dst, WRITEMASK_W), brw_imm_f(1));
+ }
+}
+
+
+static void emit_log_noalias( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0 )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = dst;
+ struct brw_reg tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
+ struct brw_reg arg0_ud = retype(arg0, BRW_REGISTER_TYPE_UD);
+ GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
+ dst.file != BRW_GENERAL_REGISTER_FILE);
+
+ if (need_tmp) {
+ tmp = get_tmp(c);
+ tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
+ }
+
+ /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
+ * according to spec:
+ *
+ * These almost look likey they could be joined up, but not really
+ * practical:
+ *
+ * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
+ * result[1].i = (x.i & ((1<<23)-1) + (127<<23)
+ */
+ if (dst.dw1.bits.writemask & WRITEMASK_XZ) {
+ brw_AND(p,
+ brw_writemask(tmp_ud, WRITEMASK_X),
+ brw_swizzle1(arg0_ud, 0),
+ brw_imm_ud((1U<<31)-1));
+
+ brw_SHR(p,
+ brw_writemask(tmp_ud, WRITEMASK_X),
+ tmp_ud,
+ brw_imm_ud(23));
+
+ brw_ADD(p,
+ brw_writemask(tmp, WRITEMASK_X),
+ retype(tmp_ud, BRW_REGISTER_TYPE_D), /* does it matter? */
+ brw_imm_d(-127));
+ }
+
+ if (dst.dw1.bits.writemask & WRITEMASK_YZ) {
+ brw_AND(p,
+ brw_writemask(tmp_ud, WRITEMASK_Y),
+ brw_swizzle1(arg0_ud, 0),
+ brw_imm_ud((1<<23)-1));
+
+ brw_OR(p,
+ brw_writemask(tmp_ud, WRITEMASK_Y),
+ tmp_ud,
+ brw_imm_ud(127<<23));
+ }
+
+ if (dst.dw1.bits.writemask & WRITEMASK_Z) {
+ /* result[2] = result[0] + LOG2(result[1]); */
+
+ /* Why bother? The above is just a hint how to do this with a
+ * taylor series. Maybe we *should* use a taylor series as by
+ * the time all the above has been done it's almost certainly
+ * quicker than calling the mathbox, even with low precision.
+ *
+ * Options are:
+ * - result[0] + mathbox.LOG2(result[1])
+ * - mathbox.LOG2(arg0.x)
+ * - result[0] + inline_taylor_approx(result[1])
+ */
+ emit_math1(c,
+ BRW_MATH_FUNCTION_LOG,
+ brw_writemask(tmp, WRITEMASK_Z),
+ brw_swizzle1(tmp, 1),
+ BRW_MATH_PRECISION_FULL);
+
+ brw_ADD(p,
+ brw_writemask(tmp, WRITEMASK_Z),
+ brw_swizzle1(tmp, 2),
+ brw_swizzle1(tmp, 0));
+ }
+
+ if (dst.dw1.bits.writemask & WRITEMASK_W) {
+ /* result[3] = 1.0; */
+ brw_MOV(p, brw_writemask(tmp, WRITEMASK_W), brw_imm_f(1));
+ }
+
+ if (need_tmp) {
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+}
+
+
+/* Need to unalias - consider swizzles: r0 = DST r0.xxxx r1
+ */
+static void emit_dst_noalias( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1)
+{
+ struct brw_compile *p = &c->func;
+
+ /* There must be a better way to do this:
+ */
+ if (dst.dw1.bits.writemask & WRITEMASK_X)
+ brw_MOV(p, brw_writemask(dst, WRITEMASK_X), brw_imm_f(1.0));
+ if (dst.dw1.bits.writemask & WRITEMASK_Y)
+ brw_MUL(p, brw_writemask(dst, WRITEMASK_Y), arg0, arg1);
+ if (dst.dw1.bits.writemask & WRITEMASK_Z)
+ brw_MOV(p, brw_writemask(dst, WRITEMASK_Z), arg0);
+ if (dst.dw1.bits.writemask & WRITEMASK_W)
+ brw_MOV(p, brw_writemask(dst, WRITEMASK_W), arg1);
+}
+
+
+static void emit_xpd( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg t,
+ struct brw_reg u)
+{
+ brw_MUL(p, brw_null_reg(), brw_swizzle(t, 1,2,0,3), brw_swizzle(u,2,0,1,3));
+ brw_MAC(p, dst, negate(brw_swizzle(t, 2,0,1,3)), brw_swizzle(u,1,2,0,3));
+}
+
+
+static void emit_lit_noalias( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0 )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_instruction *if_insn;
+ struct brw_reg tmp = dst;
+ GLboolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
+
+ if (need_tmp)
+ tmp = get_tmp(c);
+
+ brw_MOV(p, brw_writemask(dst, WRITEMASK_YZ), brw_imm_f(0));
+ brw_MOV(p, brw_writemask(dst, WRITEMASK_XW), brw_imm_f(1));
+
+ /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order
+ * to get all channels active inside the IF. In the clipping code
+ * we run with NoMask, so it's not an option and we can use
+ * BRW_EXECUTE_1 for all comparisions.
+ */
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0));
+ if_insn = brw_IF(p, BRW_EXECUTE_8);
+ {
+ brw_MOV(p, brw_writemask(dst, WRITEMASK_Y), brw_swizzle1(arg0,0));
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0));
+ brw_MOV(p, brw_writemask(tmp, WRITEMASK_Z), brw_swizzle1(arg0,1));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ emit_math2(c,
+ BRW_MATH_FUNCTION_POW,
+ brw_writemask(dst, WRITEMASK_Z),
+ brw_swizzle1(tmp, 2),
+ brw_swizzle1(arg0, 3),
+ BRW_MATH_PRECISION_PARTIAL);
+ }
+
+ brw_ENDIF(p, if_insn);
+
+ release_tmp(c, tmp);
+}
+
+static void emit_lrp_noalias(struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ struct brw_reg arg2)
+{
+ struct brw_compile *p = &c->func;
+
+ brw_ADD(p, dst, negate(arg0), brw_imm_f(1.0));
+ brw_MUL(p, brw_null_reg(), dst, arg2);
+ brw_MAC(p, dst, arg0, arg1);
+}
+
+/** 3 or 4-component vector normalization */
+static void emit_nrm( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ int num_comps)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = get_tmp(c);
+
+ /* tmp = dot(arg0, arg0) */
+ if (num_comps == 3)
+ brw_DP3(p, tmp, arg0, arg0);
+ else
+ brw_DP4(p, tmp, arg0, arg0);
+
+ /* tmp = 1 / sqrt(tmp) */
+ emit_math1(c, BRW_MATH_FUNCTION_RSQ, tmp, tmp, BRW_MATH_PRECISION_FULL);
+
+ /* dst = arg0 * tmp */
+ brw_MUL(p, dst, arg0, tmp);
+
+ release_tmp(c, tmp);
+}
+
+
+static struct brw_reg
+get_constant(struct brw_vs_compile *c,
+ const struct prog_instruction *inst,
+ GLuint argIndex)
+{
+ const struct prog_src_register *src = &inst->SrcReg[argIndex];
+ struct brw_compile *p = &c->func;
+ struct brw_reg const_reg;
+ struct brw_reg const2_reg;
+ const GLboolean relAddr = src->RelAddr;
+
+ assert(argIndex < 3);
+
+ if (c->current_const[argIndex].index != src->Index || relAddr) {
+ struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0];
+
+ c->current_const[argIndex].index = src->Index;
+
+#if 0
+ printf(" fetch const[%d] for arg %d into reg %d\n",
+ src->Index, argIndex, c->current_const[argIndex].reg.nr);
+#endif
+ /* need to fetch the constant now */
+ brw_dp_READ_4_vs(p,
+ c->current_const[argIndex].reg,/* writeback dest */
+ 0, /* oword */
+ relAddr, /* relative indexing? */
+ addrReg, /* address register */
+ 16 * src->Index, /* byte offset */
+ SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
+ );
+
+ if (relAddr) {
+ /* second read */
+ const2_reg = get_tmp(c);
+
+ /* use upper half of address reg for second read */
+ addrReg = stride(addrReg, 0, 4, 0);
+ addrReg.subnr = 16;
+
+ brw_dp_READ_4_vs(p,
+ const2_reg, /* writeback dest */
+ 1, /* oword */
+ relAddr, /* relative indexing? */
+ addrReg, /* address register */
+ 16 * src->Index, /* byte offset */
+ SURF_INDEX_VERT_CONST_BUFFER
+ );
+ }
+ }
+
+ const_reg = c->current_const[argIndex].reg;
+
+ if (relAddr) {
+ /* merge the two Owords into the constant register */
+ /* const_reg[7..4] = const2_reg[7..4] */
+ brw_MOV(p,
+ suboffset(stride(const_reg, 0, 4, 1), 4),
+ suboffset(stride(const2_reg, 0, 4, 1), 4));
+ release_tmp(c, const2_reg);
+ }
+ else {
+ /* replicate lower four floats into upper half (to get XYZWXYZW) */
+ const_reg = stride(const_reg, 0, 4, 0);
+ const_reg.subnr = 0;
+ }
+
+ return const_reg;
+}
+
+
+
+/* TODO: relative addressing!
+ */
+static struct brw_reg get_reg( struct brw_vs_compile *c,
+ gl_register_file file,
+ GLuint index )
+{
+ switch (file) {
+ case PROGRAM_TEMPORARY:
+ case PROGRAM_INPUT:
+ case PROGRAM_OUTPUT:
+ assert(c->regs[file][index].nr != 0);
+ return c->regs[file][index];
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_CONSTANT:
+ case PROGRAM_UNIFORM:
+ assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
+ return c->regs[PROGRAM_STATE_VAR][index];
+ case PROGRAM_ADDRESS:
+ assert(index == 0);
+ return c->regs[file][index];
+
+ case PROGRAM_UNDEFINED: /* undef values */
+ return brw_null_reg();
+
+ case PROGRAM_LOCAL_PARAM:
+ case PROGRAM_ENV_PARAM:
+ case PROGRAM_WRITE_ONLY:
+ default:
+ assert(0);
+ return brw_null_reg();
+ }
+}
+
+
+/**
+ * Indirect addressing: get reg[[arg] + offset].
+ */
+static struct brw_reg deref( struct brw_vs_compile *c,
+ struct brw_reg arg,
+ GLint offset)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = vec4(get_tmp(c));
+ struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
+ struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW);
+ GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
+ struct brw_reg indirect = brw_vec4_indirect(0,0);
+
+ {
+ brw_push_insn_state(p);
+ brw_set_access_mode(p, BRW_ALIGN_1);
+
+ /* This is pretty clunky - load the address register twice and
+ * fetch each 4-dword value in turn. There must be a way to do
+ * this in a single pass, but I couldn't get it to work.
+ */
+ brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset));
+ brw_MOV(p, tmp, indirect);
+
+ brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset));
+ brw_MOV(p, suboffset(tmp, 4), indirect);
+
+ brw_pop_insn_state(p);
+ }
+
+ /* NOTE: tmp not released */
+ return vec8(tmp);
+}
+
+
+/**
+ * Get brw reg corresponding to the instruction's [argIndex] src reg.
+ * TODO: relative addressing!
+ */
+static struct brw_reg
+get_src_reg( struct brw_vs_compile *c,
+ const struct prog_instruction *inst,
+ GLuint argIndex )
+{
+ const GLuint file = inst->SrcReg[argIndex].File;
+ const GLint index = inst->SrcReg[argIndex].Index;
+ const GLboolean relAddr = inst->SrcReg[argIndex].RelAddr;
+
+ switch (file) {
+ case PROGRAM_TEMPORARY:
+ case PROGRAM_INPUT:
+ case PROGRAM_OUTPUT:
+ if (relAddr) {
+ return deref(c, c->regs[file][0], index);
+ }
+ else {
+ assert(c->regs[file][index].nr != 0);
+ return c->regs[file][index];
+ }
+
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_CONSTANT:
+ case PROGRAM_UNIFORM:
+ case PROGRAM_ENV_PARAM:
+ if (c->vp->use_const_buffer) {
+ return get_constant(c, inst, argIndex);
+ }
+ else if (relAddr) {
+ return deref(c, c->regs[PROGRAM_STATE_VAR][0], index);
+ }
+ else {
+ assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
+ return c->regs[PROGRAM_STATE_VAR][index];
+ }
+ case PROGRAM_ADDRESS:
+ assert(index == 0);
+ return c->regs[file][index];
+
+ case PROGRAM_UNDEFINED:
+ /* this is a normal case since we loop over all three src args */
+ return brw_null_reg();
+
+ case PROGRAM_LOCAL_PARAM:
+ case PROGRAM_WRITE_ONLY:
+ default:
+ assert(0);
+ return brw_null_reg();
+ }
+}
+
+
+static void emit_arl( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0 )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp = dst;
+ GLboolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
+
+ if (need_tmp)
+ tmp = get_tmp(c);
+
+ brw_RNDD(p, tmp, arg0); /* tmp = round(arg0) */
+ brw_MUL(p, dst, tmp, brw_imm_d(16)); /* dst = tmp * 16 */
+
+ if (need_tmp)
+ release_tmp(c, tmp);
+}
+
+
+/**
+ * Return the brw reg for the given instruction's src argument.
+ * Will return mangled results for SWZ op. The emit_swz() function
+ * ignores this result and recalculates taking extended swizzles into
+ * account.
+ */
+static struct brw_reg get_arg( struct brw_vs_compile *c,
+ const struct prog_instruction *inst,
+ GLuint argIndex )
+{
+ const struct prog_src_register *src = &inst->SrcReg[argIndex];
+ struct brw_reg reg;
+
+ if (src->File == PROGRAM_UNDEFINED)
+ return brw_null_reg();
+
+ reg = get_src_reg(c, inst, argIndex);
+
+ /* Convert 3-bit swizzle to 2-bit.
+ */
+ reg.dw1.bits.swizzle = BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0),
+ GET_SWZ(src->Swizzle, 1),
+ GET_SWZ(src->Swizzle, 2),
+ GET_SWZ(src->Swizzle, 3));
+
+ /* Note this is ok for non-swizzle instructions:
+ */
+ reg.negate = src->Negate ? 1 : 0;
+
+ return reg;
+}
+
+
+/**
+ * Get brw register for the given program dest register.
+ */
+static struct brw_reg get_dst( struct brw_vs_compile *c,
+ struct prog_dst_register dst )
+{
+ struct brw_reg reg;
+
+ switch (dst.File) {
+ case PROGRAM_TEMPORARY:
+ case PROGRAM_OUTPUT:
+ assert(c->regs[dst.File][dst.Index].nr != 0);
+ reg = c->regs[dst.File][dst.Index];
+ break;
+ case PROGRAM_ADDRESS:
+ assert(dst.Index == 0);
+ reg = c->regs[dst.File][dst.Index];
+ break;
+ case PROGRAM_UNDEFINED:
+ /* we may hit this for OPCODE_END, OPCODE_KIL, etc */
+ reg = brw_null_reg();
+ break;
+ default:
+ assert(0);
+ reg = brw_null_reg();
+ }
+
+ reg.dw1.bits.writemask = dst.WriteMask;
+
+ return reg;
+}
+
+
+static void emit_swz( struct brw_vs_compile *c,
+ struct brw_reg dst,
+ const struct prog_instruction *inst)
+{
+ const GLuint argIndex = 0;
+ const struct prog_src_register src = inst->SrcReg[argIndex];
+ struct brw_compile *p = &c->func;
+ GLuint zeros_mask = 0;
+ GLuint ones_mask = 0;
+ GLuint src_mask = 0;
+ GLubyte src_swz[4];
+ GLboolean need_tmp = (src.Negate &&
+ dst.file != BRW_GENERAL_REGISTER_FILE);
+ struct brw_reg tmp = dst;
+ GLuint i;
+
+ if (need_tmp)
+ tmp = get_tmp(c);
+
+ for (i = 0; i < 4; i++) {
+ if (dst.dw1.bits.writemask & (1<<i)) {
+ GLubyte s = GET_SWZ(src.Swizzle, i);
+ switch (s) {
+ case SWIZZLE_X:
+ case SWIZZLE_Y:
+ case SWIZZLE_Z:
+ case SWIZZLE_W:
+ src_mask |= 1<<i;
+ src_swz[i] = s;
+ break;
+ case SWIZZLE_ZERO:
+ zeros_mask |= 1<<i;
+ break;
+ case SWIZZLE_ONE:
+ ones_mask |= 1<<i;
+ break;
+ }
+ }
+ }
+
+ /* Do src first, in case dst aliases src:
+ */
+ if (src_mask) {
+ struct brw_reg arg0;
+
+ arg0 = get_src_reg(c, inst, argIndex);
+
+ arg0 = brw_swizzle(arg0,
+ src_swz[0], src_swz[1],
+ src_swz[2], src_swz[3]);
+
+ brw_MOV(p, brw_writemask(tmp, src_mask), arg0);
+ }
+
+ if (zeros_mask)
+ brw_MOV(p, brw_writemask(tmp, zeros_mask), brw_imm_f(0));
+
+ if (ones_mask)
+ brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1));
+
+ if (src.Negate)
+ brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp));
+
+ if (need_tmp) {
+ brw_MOV(p, dst, tmp);
+ release_tmp(c, tmp);
+ }
+}
+
+
+/**
+ * Post-vertex-program processing. Send the results to the URB.
+ */
+static void emit_vertex_write( struct brw_vs_compile *c)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg m0 = brw_message_reg(0);
+ struct brw_reg pos = c->regs[PROGRAM_OUTPUT][VERT_RESULT_HPOS];
+ struct brw_reg ndc;
+ int eot;
+ GLuint len_vertext_header = 2;
+
+ if (c->key.copy_edgeflag) {
+ brw_MOV(p,
+ get_reg(c, PROGRAM_OUTPUT, VERT_RESULT_EDGE),
+ get_reg(c, PROGRAM_INPUT, VERT_ATTRIB_EDGEFLAG));
+ }
+
+ /* Build ndc coords */
+ ndc = get_tmp(c);
+ /* ndc = 1.0 / pos.w */
+ emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL);
+ /* ndc.xyz = pos * ndc */
+ brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc);
+
+ /* Update the header for point size, user clipping flags, and -ve rhw
+ * workaround.
+ */
+ if ((c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) ||
+ c->key.nr_userclip || BRW_IS_965(p->brw))
+ {
+ struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+ GLuint i;
+
+ brw_MOV(p, header1, brw_imm_ud(0));
+
+ brw_set_access_mode(p, BRW_ALIGN_16);
+
+ if (c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) {
+ struct brw_reg psiz = c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ];
+ brw_MUL(p, brw_writemask(header1, WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11));
+ brw_AND(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8));
+ }
+
+ for (i = 0; i < c->key.nr_userclip; i++) {
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+ brw_DP4(p, brw_null_reg(), pos, c->userplane[i]);
+ brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<i));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+
+ /* i965 clipping workaround:
+ * 1) Test for -ve rhw
+ * 2) If set,
+ * set ndc = (0,0,0,0)
+ * set ucp[6] = 1
+ *
+ * Later, clipping will detect ucp[6] and ensure the primitive is
+ * clipped against all fixed planes.
+ */
+ if (BRW_IS_965(p->brw)) {
+ brw_CMP(p,
+ vec8(brw_null_reg()),
+ BRW_CONDITIONAL_L,
+ brw_swizzle1(ndc, 3),
+ brw_imm_f(0));
+
+ brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
+ brw_MOV(p, ndc, brw_imm_f(0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+
+ brw_set_access_mode(p, BRW_ALIGN_1); /* why? */
+ brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), header1);
+ brw_set_access_mode(p, BRW_ALIGN_16);
+
+ release_tmp(c, header1);
+ }
+ else {
+ brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
+ }
+
+ /* Emit the (interleaved) headers for the two vertices - an 8-reg
+ * of zeros followed by two sets of NDC coordinates:
+ */
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_MOV(p, offset(m0, 2), ndc);
+
+ if (BRW_IS_IGDNG(p->brw)) {
+ /* There are 20 DWs (D0-D19) in VUE vertex header on IGDNG */
+ brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */
+ /* m4, m5 contain the distances from vertex to the user clip planeXXX.
+ * Seems it is useless for us.
+ * m6 is used for aligning, so that the remainder of vertex element is
+ * reg-aligned.
+ */
+ brw_MOV(p, offset(m0, 7), pos); /* the remainder of vertex element */
+ len_vertext_header = 6;
+ } else {
+ brw_MOV(p, offset(m0, 3), pos);
+ len_vertext_header = 2;
+ }
+
+ eot = (c->first_overflow_output == 0);
+
+ brw_urb_WRITE(p,
+ brw_null_reg(), /* dest */
+ 0, /* starting mrf reg nr */
+ c->r0, /* src */
+ 0, /* allocate */
+ 1, /* used */
+ MIN2(c->nr_outputs + 1 + len_vertext_header, (BRW_MAX_MRF-1)), /* msg len */
+ 0, /* response len */
+ eot, /* eot */
+ eot, /* writes complete */
+ 0, /* urb destination offset */
+ BRW_URB_SWIZZLE_INTERLEAVE);
+
+ if (c->first_overflow_output > 0) {
+ /* Not all of the vertex outputs/results fit into the MRF.
+ * Move the overflowed attributes from the GRF to the MRF and
+ * issue another brw_urb_WRITE().
+ */
+ /* XXX I'm not 100% sure about which MRF regs to use here. Starting
+ * at mrf[4] atm...
+ */
+ GLuint i, mrf = 0;
+ for (i = c->first_overflow_output; i < VERT_RESULT_MAX; i++) {
+ if (c->prog_data.outputs_written & (1 << i)) {
+ /* move from GRF to MRF */
+ brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]);
+ mrf++;
+ }
+ }
+
+ brw_urb_WRITE(p,
+ brw_null_reg(), /* dest */
+ 4, /* starting mrf reg nr */
+ c->r0, /* src */
+ 0, /* allocate */
+ 1, /* used */
+ mrf+1, /* msg len */
+ 0, /* response len */
+ 1, /* eot */
+ 1, /* writes complete */
+ BRW_MAX_MRF-1, /* urb destination offset */
+ BRW_URB_SWIZZLE_INTERLEAVE);
+ }
+}
+
+
+/**
+ * Called after code generation to resolve subroutine calls and the
+ * END instruction.
+ * \param end_inst points to brw code for END instruction
+ * \param last_inst points to last instruction emitted before vertex write
+ */
+static void
+post_vs_emit( struct brw_vs_compile *c,
+ struct brw_instruction *end_inst,
+ struct brw_instruction *last_inst )
+{
+ GLint offset;
+
+ brw_resolve_cals(&c->func);
+
+ /* patch up the END code to jump past subroutines, etc */
+ offset = last_inst - end_inst;
+ if (offset > 1) {
+ brw_set_src1(end_inst, brw_imm_d(offset * 16));
+ } else {
+ end_inst->header.opcode = BRW_OPCODE_NOP;
+ }
+}
+
+static uint32_t
+get_predicate(const struct prog_instruction *inst)
+{
+ if (inst->DstReg.CondMask == COND_TR)
+ return BRW_PREDICATE_NONE;
+
+ /* All of GLSL only produces predicates for COND_NE and one channel per
+ * vector. Fail badly if someone starts doing something else, as it might
+ * mean infinite looping or something.
+ *
+ * We'd like to support all the condition codes, but our hardware doesn't
+ * quite match the Mesa IR, which is modeled after the NV extensions. For
+ * those, the instruction may update the condition codes or not, then any
+ * later instruction may use one of those condition codes. For gen4, the
+ * instruction may update the flags register based on one of the condition
+ * codes output by the instruction, and then further instructions may
+ * predicate on that. We can probably support this, but it won't
+ * necessarily be easy.
+ */
+ assert(inst->DstReg.CondMask == COND_NE);
+
+ switch (inst->DstReg.CondSwizzle) {
+ case SWIZZLE_XXXX:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_X;
+ case SWIZZLE_YYYY:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_Y;
+ case SWIZZLE_ZZZZ:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_Z;
+ case SWIZZLE_WWWW:
+ return BRW_PREDICATE_ALIGN16_REPLICATE_W;
+ default:
+ _mesa_problem(NULL, "Unexpected predicate: 0x%08x\n",
+ inst->DstReg.CondMask);
+ return BRW_PREDICATE_NORMAL;
+ }
+}
+
+/* Emit the vertex program instructions here.
+ */
+void brw_vs_emit(struct brw_vs_compile *c )
+{
+#define MAX_IF_DEPTH 32
+#define MAX_LOOP_DEPTH 32
+ struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
+ const GLuint nr_insns = c->vp->program.Base.NumInstructions;
+ GLuint insn, if_depth = 0, loop_depth = 0;
+ GLuint end_offset = 0;
+ struct brw_instruction *end_inst, *last_inst;
+ struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH];
+ const struct brw_indirect stack_index = brw_indirect(0, 0);
+ GLuint index;
+ GLuint file;
+
+ if (INTEL_DEBUG & DEBUG_VS) {
+ _mesa_printf("vs-mesa:\n");
+ _mesa_print_program(&c->vp->program.Base);
+ _mesa_printf("\n");
+ }
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_access_mode(p, BRW_ALIGN_16);
+
+ /* Message registers can't be read, so copy the output into GRF register
+ if they are used in source registers */
+ for (insn = 0; insn < nr_insns; insn++) {
+ GLuint i;
+ struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
+ for (i = 0; i < 3; i++) {
+ struct prog_src_register *src = &inst->SrcReg[i];
+ GLuint index = src->Index;
+ GLuint file = src->File;
+ if (file == PROGRAM_OUTPUT && index != VERT_RESULT_HPOS)
+ c->output_regs[index].used_in_src = GL_TRUE;
+ }
+ }
+
+ /* Static register allocation
+ */
+ brw_vs_alloc_regs(c);
+ brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
+
+ for (insn = 0; insn < nr_insns; insn++) {
+
+ const struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
+ struct brw_reg args[3], dst;
+ GLuint i;
+
+#if 0
+ printf("%d: ", insn);
+ _mesa_print_instruction(inst);
+#endif
+
+ /* Get argument regs. SWZ is special and does this itself.
+ */
+ if (inst->Opcode != OPCODE_SWZ)
+ for (i = 0; i < 3; i++) {
+ const struct prog_src_register *src = &inst->SrcReg[i];
+ index = src->Index;
+ file = src->File;
+ if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
+ args[i] = c->output_regs[index].reg;
+ else
+ args[i] = get_arg(c, inst, i);
+ }
+
+ /* Get dest regs. Note that it is possible for a reg to be both
+ * dst and arg, given the static allocation of registers. So
+ * care needs to be taken emitting multi-operation instructions.
+ */
+ index = inst->DstReg.Index;
+ file = inst->DstReg.File;
+ if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
+ dst = c->output_regs[index].reg;
+ else
+ dst = get_dst(c, inst->DstReg);
+
+ if (inst->SaturateMode != SATURATE_OFF) {
+ _mesa_problem(NULL, "Unsupported saturate %d in vertex shader",
+ inst->SaturateMode);
+ }
+
+ switch (inst->Opcode) {
+ case OPCODE_ABS:
+ brw_MOV(p, dst, brw_abs(args[0]));
+ break;
+ case OPCODE_ADD:
+ brw_ADD(p, dst, args[0], args[1]);
+ break;
+ case OPCODE_COS:
+ emit_math1(c, BRW_MATH_FUNCTION_COS, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case OPCODE_DP3:
+ brw_DP3(p, dst, args[0], args[1]);
+ break;
+ case OPCODE_DP4:
+ brw_DP4(p, dst, args[0], args[1]);
+ break;
+ case OPCODE_DPH:
+ brw_DPH(p, dst, args[0], args[1]);
+ break;
+ case OPCODE_NRM3:
+ emit_nrm(c, dst, args[0], 3);
+ break;
+ case OPCODE_NRM4:
+ emit_nrm(c, dst, args[0], 4);
+ break;
+ case OPCODE_DST:
+ unalias2(c, dst, args[0], args[1], emit_dst_noalias);
+ break;
+ case OPCODE_EXP:
+ unalias1(c, dst, args[0], emit_exp_noalias);
+ break;
+ case OPCODE_EX2:
+ emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case OPCODE_ARL:
+ emit_arl(c, dst, args[0]);
+ break;
+ case OPCODE_FLR:
+ brw_RNDD(p, dst, args[0]);
+ break;
+ case OPCODE_FRC:
+ brw_FRC(p, dst, args[0]);
+ break;
+ case OPCODE_LOG:
+ unalias1(c, dst, args[0], emit_log_noalias);
+ break;
+ case OPCODE_LG2:
+ emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case OPCODE_LIT:
+ unalias1(c, dst, args[0], emit_lit_noalias);
+ break;
+ case OPCODE_LRP:
+ unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias);
+ break;
+ case OPCODE_MAD:
+ brw_MOV(p, brw_acc_reg(), args[2]);
+ brw_MAC(p, dst, args[0], args[1]);
+ break;
+ case OPCODE_MAX:
+ emit_max(p, dst, args[0], args[1]);
+ break;
+ case OPCODE_MIN:
+ emit_min(p, dst, args[0], args[1]);
+ break;
+ case OPCODE_MOV:
+ brw_MOV(p, dst, args[0]);
+ break;
+ case OPCODE_MUL:
+ brw_MUL(p, dst, args[0], args[1]);
+ break;
+ case OPCODE_POW:
+ emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL);
+ break;
+ case OPCODE_RCP:
+ emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case OPCODE_RSQ:
+ emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+
+ case OPCODE_SEQ:
+ emit_seq(p, dst, args[0], args[1]);
+ break;
+ case OPCODE_SIN:
+ emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case OPCODE_SNE:
+ emit_sne(p, dst, args[0], args[1]);
+ break;
+ case OPCODE_SGE:
+ emit_sge(p, dst, args[0], args[1]);
+ break;
+ case OPCODE_SGT:
+ emit_sgt(p, dst, args[0], args[1]);
+ break;
+ case OPCODE_SLT:
+ emit_slt(p, dst, args[0], args[1]);
+ break;
+ case OPCODE_SLE:
+ emit_sle(p, dst, args[0], args[1]);
+ break;
+ case OPCODE_SUB:
+ brw_ADD(p, dst, args[0], negate(args[1]));
+ break;
+ case OPCODE_SWZ:
+ /* The args[0] value can't be used here as it won't have
+ * correctly encoded the full swizzle:
+ */
+ emit_swz(c, dst, inst);
+ break;
+ case OPCODE_TRUNC:
+ /* round toward zero */
+ brw_RNDZ(p, dst, args[0]);
+ break;
+ case OPCODE_XPD:
+ emit_xpd(p, dst, args[0], args[1]);
+ break;
+ case OPCODE_IF:
+ assert(if_depth < MAX_IF_DEPTH);
+ if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8);
+ /* Note that brw_IF smashes the predicate_control field. */
+ if_inst[if_depth]->header.predicate_control = get_predicate(inst);
+ if_depth++;
+ break;
+ case OPCODE_ELSE:
+ if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
+ break;
+ case OPCODE_ENDIF:
+ assert(if_depth > 0);
+ brw_ENDIF(p, if_inst[--if_depth]);
+ break;
+ case OPCODE_BGNLOOP:
+ loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
+ break;
+ case OPCODE_BRK:
+ brw_set_predicate_control(p, get_predicate(inst));
+ brw_BREAK(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case OPCODE_CONT:
+ brw_set_predicate_control(p, get_predicate(inst));
+ brw_CONT(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case OPCODE_ENDLOOP:
+ {
+ struct brw_instruction *inst0, *inst1;
+ GLuint br = 1;
+
+ loop_depth--;
+
+ if (BRW_IS_IGDNG(brw))
+ br = 2;
+
+ inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
+ /* patch all the BREAK/CONT instructions from last BEGINLOOP */
+ while (inst0 > loop_inst[loop_depth]) {
+ inst0--;
+ if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+ inst0->bits3.if_else.pop_count = 0;
+ }
+ else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+ inst0->bits3.if_else.pop_count = 0;
+ }
+ }
+ }
+ break;
+ case OPCODE_BRA:
+ brw_set_predicate_control(p, get_predicate(inst));
+ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case OPCODE_CAL:
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_ADD(p, deref_1d(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_ADD(p, get_addr_reg(stack_index),
+ get_addr_reg(stack_index), brw_imm_d(4));
+ brw_save_call(p, inst->Comment, p->nr_insn);
+ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+ break;
+ case OPCODE_RET:
+ brw_ADD(p, get_addr_reg(stack_index),
+ get_addr_reg(stack_index), brw_imm_d(-4));
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ break;
+ case OPCODE_END:
+ end_offset = p->nr_insn;
+ /* this instruction will get patched later to jump past subroutine
+ * code, etc.
+ */
+ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+ break;
+ case OPCODE_PRINT:
+ /* no-op */
+ break;
+ case OPCODE_BGNSUB:
+ brw_save_label(p, inst->Comment, p->nr_insn);
+ break;
+ case OPCODE_ENDSUB:
+ /* no-op */
+ break;
+ default:
+ _mesa_problem(NULL, "Unsupported opcode %i (%s) in vertex shader",
+ inst->Opcode, inst->Opcode < MAX_OPCODE ?
+ _mesa_opcode_string(inst->Opcode) :
+ "unknown");
+ }
+
+ /* Set the predication update on the last instruction of the native
+ * instruction sequence.
+ *
+ * This would be problematic if it was set on a math instruction,
+ * but that shouldn't be the case with the current GLSL compiler.
+ */
+ if (inst->CondUpdate) {
+ struct brw_instruction *hw_insn = &p->store[p->nr_insn - 1];
+
+ assert(hw_insn->header.destreg__conditionalmod == 0);
+ hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ;
+ }
+
+ if ((inst->DstReg.File == PROGRAM_OUTPUT)
+ && (inst->DstReg.Index != VERT_RESULT_HPOS)
+ && c->output_regs[inst->DstReg.Index].used_in_src) {
+ brw_MOV(p, get_dst(c, inst->DstReg), dst);
+ }
+
+ /* Result color clamping.
+ *
+ * When destination register is an output register and
+ * it's primary/secondary front/back color, we have to clamp
+ * the result to [0,1]. This is done by enabling the
+ * saturation bit for the last instruction.
+ *
+ * We don't use brw_set_saturate() as it modifies
+ * p->current->header.saturate, which affects all the subsequent
+ * instructions. Instead, we directly modify the header
+ * of the last (already stored) instruction.
+ */
+ if (inst->DstReg.File == PROGRAM_OUTPUT) {
+ if ((inst->DstReg.Index == VERT_RESULT_COL0)
+ || (inst->DstReg.Index == VERT_RESULT_COL1)
+ || (inst->DstReg.Index == VERT_RESULT_BFC0)
+ || (inst->DstReg.Index == VERT_RESULT_BFC1)) {
+ p->store[p->nr_insn-1].header.saturate = 1;
+ }
+ }
+
+ release_tmps(c);
+ }
+
+ end_inst = &p->store[end_offset];
+ last_inst = &p->store[p->nr_insn];
+
+ /* The END instruction will be patched to jump to this code */
+ emit_vertex_write(c);
+
+ post_vs_emit(c, end_inst, last_inst);
+
+ if (INTEL_DEBUG & DEBUG_VS) {
+ int i;
+
+ _mesa_printf("vs-native:\n");
+ for (i = 0; i < p->nr_insn; i++)
+ brw_disasm(stderr, &p->store[i]);
+ _mesa_printf("\n");
+ }
+}
diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c
new file mode 100644
index 00000000000..d790ab65553
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_vs_state.c
@@ -0,0 +1,185 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "main/macros.h"
+
+struct brw_vs_unit_key {
+ unsigned int total_grf;
+ unsigned int urb_entry_read_length;
+ unsigned int curb_entry_read_length;
+
+ unsigned int curbe_offset;
+
+ unsigned int nr_urb_entries, urb_size;
+
+ unsigned int nr_surfaces;
+};
+
+static void
+vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key)
+{
+ GLcontext *ctx = &brw->intel.ctx;
+
+ memset(key, 0, sizeof(*key));
+
+ /* CACHE_NEW_VS_PROG */
+ key->total_grf = brw->vs.prog_data->total_grf;
+ key->urb_entry_read_length = brw->vs.prog_data->urb_read_length;
+ key->curb_entry_read_length = brw->vs.prog_data->curb_read_length;
+
+ /* BRW_NEW_URB_FENCE */
+ key->nr_urb_entries = brw->urb.nr_vs_entries;
+ key->urb_size = brw->urb.vsize;
+
+ /* BRW_NEW_NR_VS_SURFACES */
+ key->nr_surfaces = brw->vs.nr_surfaces;
+
+ /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */
+ if (ctx->Transform.ClipPlanesEnabled) {
+ /* Note that we read in the userclip planes as well, hence
+ * clip_start:
+ */
+ key->curbe_offset = brw->curbe.clip_start;
+ }
+ else {
+ key->curbe_offset = brw->curbe.vs_start;
+ }
+}
+
+static dri_bo *
+vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
+{
+ struct brw_vs_unit_state vs;
+ dri_bo *bo;
+ int chipset_max_threads;
+
+ memset(&vs, 0, sizeof(vs));
+
+ vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */
+ vs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+ vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ /* Choosing multiple program flow means that we may get 2-vertex threads,
+ * which will have the channel mask for dwords 4-7 enabled in the thread,
+ * and those dwords will be written to the second URB handle when we
+ * brw_urb_WRITE() results.
+ */
+ vs.thread1.single_program_flow = 0;
+
+ if (BRW_IS_IGDNG(brw))
+ vs.thread1.binding_table_entry_count = 0; /* hardware requirement */
+ else
+ vs.thread1.binding_table_entry_count = key->nr_surfaces;
+
+ vs.thread3.urb_entry_read_length = key->urb_entry_read_length;
+ vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
+ vs.thread3.dispatch_grf_start_reg = 1;
+ vs.thread3.urb_entry_read_offset = 0;
+ vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
+
+ if (BRW_IS_IGDNG(brw))
+ vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2;
+ else
+ vs.thread4.nr_urb_entries = key->nr_urb_entries;
+
+ vs.thread4.urb_entry_allocation_size = key->urb_size - 1;
+
+ if (BRW_IS_IGDNG(brw))
+ chipset_max_threads = 72;
+ else if (BRW_IS_G4X(brw))
+ chipset_max_threads = 32;
+ else
+ chipset_max_threads = 16;
+ vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2,
+ 1, chipset_max_threads) - 1;
+
+ if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
+ vs.thread4.max_threads = 0;
+
+ /* No samplers for ARB_vp programs:
+ */
+ /* It has to be set to 0 for IGDNG
+ */
+ vs.vs5.sampler_count = 0;
+
+ if (INTEL_DEBUG & DEBUG_STATS)
+ vs.thread4.stats_enable = 1;
+
+ /* Vertex program always enabled:
+ */
+ vs.vs6.vs_enable = 1;
+
+ bo = brw_upload_cache(&brw->cache, BRW_VS_UNIT,
+ key, sizeof(*key),
+ &brw->vs.prog_bo, 1,
+ &vs, sizeof(vs),
+ NULL, NULL);
+
+ /* Emit VS program relocation */
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ vs.thread0.grf_reg_count << 1,
+ offsetof(struct brw_vs_unit_state, thread0),
+ brw->vs.prog_bo);
+
+ return bo;
+}
+
+static void prepare_vs_unit(struct brw_context *brw)
+{
+ struct brw_vs_unit_key key;
+
+ vs_unit_populate_key(brw, &key);
+
+ dri_bo_unreference(brw->vs.state_bo);
+ brw->vs.state_bo = brw_search_cache(&brw->cache, BRW_VS_UNIT,
+ &key, sizeof(key),
+ &brw->vs.prog_bo, 1,
+ NULL);
+ if (brw->vs.state_bo == NULL) {
+ brw->vs.state_bo = vs_unit_create_from_key(brw, &key);
+ }
+}
+
+const struct brw_tracked_state brw_vs_unit = {
+ .dirty = {
+ .mesa = _NEW_TRANSFORM,
+ .brw = (BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_NR_VS_SURFACES |
+ BRW_NEW_URB_FENCE),
+ .cache = CACHE_NEW_VS_PROG
+ },
+ .prepare = prepare_vs_unit,
+};
diff --git a/src/gallium/drivers/i965/brw_vs_surface_state.c b/src/gallium/drivers/i965/brw_vs_surface_state.c
new file mode 100644
index 00000000000..89f47522a1c
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_vs_surface_state.c
@@ -0,0 +1,226 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+#include "main/mtypes.h"
+#include "main/texformat.h"
+#include "main/texstore.h"
+#include "shader/prog_parameter.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+/* Creates a new VS constant buffer reflecting the current VS program's
+ * constants, if needed by the VS program.
+ *
+ * Otherwise, constants go through the CURBEs using the brw_constant_buffer
+ * state atom.
+ */
+static drm_intel_bo *
+brw_vs_update_constant_buffer(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ struct brw_vertex_program *vp =
+ (struct brw_vertex_program *) brw->vertex_program;
+ const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
+ const int size = params->NumParameters * 4 * sizeof(GLfloat);
+ drm_intel_bo *const_buffer;
+
+ /* BRW_NEW_VERTEX_PROGRAM */
+ if (!vp->use_const_buffer)
+ return NULL;
+
+ const_buffer = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer",
+ size, 64);
+
+ /* _NEW_PROGRAM_CONSTANTS */
+ dri_bo_subdata(const_buffer, 0, size, params->ParameterValues);
+
+ return const_buffer;
+}
+
+/**
+ * Update the surface state for a VS constant buffer.
+ *
+ * Sets brw->vs.surf_bo[surf] and brw->vp->const_buffer.
+ */
+static void
+brw_update_vs_constant_surface( GLcontext *ctx,
+ GLuint surf)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_surface_key key;
+ struct brw_vertex_program *vp =
+ (struct brw_vertex_program *) brw->vertex_program;
+ const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
+
+ assert(surf == 0);
+
+ /* If we're in this state update atom, we need to update VS constants, so
+ * free the old buffer and create a new one for the new contents.
+ */
+ dri_bo_unreference(vp->const_buffer);
+ vp->const_buffer = brw_vs_update_constant_buffer(brw);
+
+ /* If there's no constant buffer, then no surface BO is needed to point at
+ * it.
+ */
+ if (vp->const_buffer == 0) {
+ drm_intel_bo_unreference(brw->vs.surf_bo[surf]);
+ brw->vs.surf_bo[surf] = NULL;
+ return;
+ }
+
+ memset(&key, 0, sizeof(key));
+
+ key.format = MESA_FORMAT_RGBA_FLOAT32;
+ key.internal_format = GL_RGBA;
+ key.bo = vp->const_buffer;
+ key.depthmode = GL_NONE;
+ key.pitch = params->NumParameters;
+ key.width = params->NumParameters;
+ key.height = 1;
+ key.depth = 1;
+ key.cpp = 16;
+
+ /*
+ printf("%s:\n", __FUNCTION__);
+ printf(" width %d height %d depth %d cpp %d pitch %d\n",
+ key.width, key.height, key.depth, key.cpp, key.pitch);
+ */
+
+ drm_intel_bo_unreference(brw->vs.surf_bo[surf]);
+ brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
+ &key, sizeof(key),
+ &key.bo, key.bo ? 1 : 0,
+ NULL);
+ if (brw->vs.surf_bo[surf] == NULL) {
+ brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key);
+ }
+}
+
+
+/**
+ * Constructs the binding table for the VS surface state.
+ */
+static dri_bo *
+brw_vs_get_binding_table(struct brw_context *brw)
+{
+ dri_bo *bind_bo;
+
+ bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
+ NULL, 0,
+ brw->vs.surf_bo, BRW_VS_MAX_SURF,
+ NULL);
+
+ if (bind_bo == NULL) {
+ GLuint data_size = BRW_VS_MAX_SURF * sizeof(GLuint);
+ uint32_t *data = malloc(data_size);
+ int i;
+
+ for (i = 0; i < BRW_VS_MAX_SURF; i++)
+ if (brw->vs.surf_bo[i])
+ data[i] = brw->vs.surf_bo[i]->offset;
+ else
+ data[i] = 0;
+
+ bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
+ NULL, 0,
+ brw->vs.surf_bo, BRW_VS_MAX_SURF,
+ data, data_size,
+ NULL, NULL);
+
+ /* Emit binding table relocations to surface state */
+ for (i = 0; i < BRW_VS_MAX_SURF; i++) {
+ if (brw->vs.surf_bo[i] != NULL) {
+ /* The presumed offsets were set in the data values for
+ * brw_upload_cache.
+ */
+ drm_intel_bo_emit_reloc(bind_bo, i * 4,
+ brw->vs.surf_bo[i], 0,
+ I915_GEM_DOMAIN_INSTRUCTION, 0);
+ }
+ }
+
+ free(data);
+ }
+
+ return bind_bo;
+}
+
+/**
+ * Vertex shader surfaces (constant buffer).
+ *
+ * This consumes the state updates for the constant buffer needing
+ * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit and
+ * CACHE_NEW_SURF_BIND for the binding table upload.
+ */
+static void prepare_vs_surfaces(struct brw_context *brw )
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ int i;
+ int nr_surfaces = 0;
+
+ brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER);
+
+ for (i = 0; i < BRW_VS_MAX_SURF; i++) {
+ if (brw->vs.surf_bo[i] != NULL) {
+ nr_surfaces = i + 1;
+ }
+ }
+
+ if (brw->vs.nr_surfaces != nr_surfaces) {
+ brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES;
+ brw->vs.nr_surfaces = nr_surfaces;
+ }
+
+ /* Note that we don't end up updating the bind_bo if we don't have a
+ * surface to be pointing at. This should be relatively harmless, as it
+ * just slightly increases our working set size.
+ */
+ if (brw->vs.nr_surfaces != 0) {
+ dri_bo_unreference(brw->vs.bind_bo);
+ brw->vs.bind_bo = brw_vs_get_binding_table(brw);
+ }
+}
+
+const struct brw_tracked_state brw_vs_surfaces = {
+ .dirty = {
+ .mesa = (_NEW_PROGRAM_CONSTANTS),
+ .brw = (BRW_NEW_VERTEX_PROGRAM),
+ .cache = 0
+ },
+ .prepare = prepare_vs_surfaces,
+};
+
+
+
diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c
new file mode 100644
index 00000000000..2292de94c44
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm.c
@@ -0,0 +1,375 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+#include "main/texformat.h"
+#include "brw_context.h"
+#include "brw_util.h"
+#include "brw_wm.h"
+#include "brw_state.h"
+
+
+/** Return number of src args for given instruction */
+GLuint brw_wm_nr_args( GLuint opcode )
+{
+ switch (opcode) {
+ case WM_FRONTFACING:
+ case WM_PIXELXY:
+ return 0;
+ case WM_CINTERP:
+ case WM_WPOSXY:
+ case WM_DELTAXY:
+ return 1;
+ case WM_LINTERP:
+ case WM_PIXELW:
+ return 2;
+ case WM_FB_WRITE:
+ case WM_PINTERP:
+ return 3;
+ default:
+ assert(opcode < MAX_OPCODE);
+ return _mesa_num_inst_src_regs(opcode);
+ }
+}
+
+
+GLuint brw_wm_is_scalar_result( GLuint opcode )
+{
+ switch (opcode) {
+ case OPCODE_COS:
+ case OPCODE_EX2:
+ case OPCODE_LG2:
+ case OPCODE_POW:
+ case OPCODE_RCP:
+ case OPCODE_RSQ:
+ case OPCODE_SIN:
+ case OPCODE_DP3:
+ case OPCODE_DP4:
+ case OPCODE_DPH:
+ case OPCODE_DST:
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+
+
+/**
+ * Do GPU code generation for non-GLSL shader. non-GLSL shaders have
+ * no flow control instructions so we can more readily do SSA-style
+ * optimizations.
+ */
+static void
+brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
+{
+ /* Augment fragment program. Add instructions for pre- and
+ * post-fragment-program tasks such as interpolation and fogging.
+ */
+ brw_wm_pass_fp(c);
+
+ /* Translate to intermediate representation. Build register usage
+ * chains.
+ */
+ brw_wm_pass0(c);
+
+ /* Dead code removal.
+ */
+ brw_wm_pass1(c);
+
+ /* Register allocation.
+ * Divide by two because we operate on 16 pixels at a time and require
+ * two GRF entries for each logical shader register.
+ */
+ c->grf_limit = BRW_WM_MAX_GRF / 2;
+
+ brw_wm_pass2(c);
+
+ /* how many general-purpose registers are used */
+ c->prog_data.total_grf = c->max_wm_grf;
+
+ /* Scratch space is used for register spilling */
+ if (c->last_scratch) {
+ c->prog_data.total_scratch = c->last_scratch + 0x40;
+ }
+ else {
+ c->prog_data.total_scratch = 0;
+ }
+
+ /* Emit GEN4 code.
+ */
+ brw_wm_emit(c);
+}
+
+
+/**
+ * All Mesa program -> GPU code generation goes through this function.
+ * Depending on the instructions used (i.e. flow control instructions)
+ * we'll use one of two code generators.
+ */
+static void do_wm_prog( struct brw_context *brw,
+ struct brw_fragment_program *fp,
+ struct brw_wm_prog_key *key)
+{
+ struct brw_wm_compile *c;
+ const GLuint *program;
+ GLuint program_size;
+
+ c = brw->wm.compile_data;
+ if (c == NULL) {
+ brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data));
+ c = brw->wm.compile_data;
+ if (c == NULL) {
+ /* Ouch - big out of memory problem. Can't continue
+ * without triggering a segfault, no way to signal,
+ * so just return.
+ */
+ return;
+ }
+ } else {
+ memset(c, 0, sizeof(*brw->wm.compile_data));
+ }
+ memcpy(&c->key, key, sizeof(*key));
+
+ c->fp = fp;
+ c->env_param = brw->intel.ctx.FragmentProgram.Parameters;
+
+ brw_init_compile(brw, &c->func);
+
+ /* temporary sanity check assertion */
+ ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program));
+
+ /*
+ * Shader which use GLSL features such as flow control are handled
+ * differently from "simple" shaders.
+ */
+ if (fp->isGLSL) {
+ c->dispatch_width = 8;
+ brw_wm_glsl_emit(brw, c);
+ }
+ else {
+ c->dispatch_width = 16;
+ brw_wm_non_glsl_emit(brw, c);
+ }
+
+ if (INTEL_DEBUG & DEBUG_WM)
+ fprintf(stderr, "\n");
+
+ /* get the program
+ */
+ program = brw_get_program(&c->func, &program_size);
+
+ dri_bo_unreference(brw->wm.prog_bo);
+ brw->wm.prog_bo = brw_upload_cache( &brw->cache, BRW_WM_PROG,
+ &c->key, sizeof(c->key),
+ NULL, 0,
+ program, program_size,
+ &c->prog_data,
+ &brw->wm.prog_data );
+}
+
+
+
+static void brw_wm_populate_key( struct brw_context *brw,
+ struct brw_wm_prog_key *key )
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ /* BRW_NEW_FRAGMENT_PROGRAM */
+ const struct brw_fragment_program *fp =
+ (struct brw_fragment_program *)brw->fragment_program;
+ GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
+ GLuint lookup = 0;
+ GLuint line_aa;
+ GLuint i;
+
+ memset(key, 0, sizeof(*key));
+
+ /* Build the index for table lookup
+ */
+ /* _NEW_COLOR */
+ if (fp->program.UsesKill ||
+ ctx->Color.AlphaEnabled)
+ lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+
+ if (fp->program.Base.OutputsWritten & (1<<FRAG_RESULT_DEPTH))
+ lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+
+ /* _NEW_DEPTH */
+ if (ctx->Depth.Test)
+ lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
+
+ if (ctx->Depth.Test &&
+ ctx->Depth.Mask) /* ?? */
+ lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
+
+ /* _NEW_STENCIL */
+ if (ctx->Stencil._Enabled) {
+ lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
+
+ if (ctx->Stencil.WriteMask[0] ||
+ ctx->Stencil.WriteMask[ctx->Stencil._BackFace])
+ lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
+ }
+
+ line_aa = AA_NEVER;
+
+ /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
+ if (ctx->Line.SmoothFlag) {
+ if (brw->intel.reduced_primitive == GL_LINES) {
+ line_aa = AA_ALWAYS;
+ }
+ else if (brw->intel.reduced_primitive == GL_TRIANGLES) {
+ if (ctx->Polygon.FrontMode == GL_LINE) {
+ line_aa = AA_SOMETIMES;
+
+ if (ctx->Polygon.BackMode == GL_LINE ||
+ (ctx->Polygon.CullFlag &&
+ ctx->Polygon.CullFaceMode == GL_BACK))
+ line_aa = AA_ALWAYS;
+ }
+ else if (ctx->Polygon.BackMode == GL_LINE) {
+ line_aa = AA_SOMETIMES;
+
+ if ((ctx->Polygon.CullFlag &&
+ ctx->Polygon.CullFaceMode == GL_FRONT))
+ line_aa = AA_ALWAYS;
+ }
+ }
+ }
+
+ brw_wm_lookup_iz(line_aa,
+ lookup,
+ uses_depth,
+ key);
+
+
+ /* BRW_NEW_WM_INPUT_DIMENSIONS */
+ key->proj_attrib_mask = brw->wm.input_size_masks[4-1];
+
+ /* _NEW_LIGHT */
+ key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT);
+
+ /* _NEW_HINT */
+ key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST);
+
+ /* _NEW_TEXTURE */
+ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+ const struct gl_texture_unit *unit = &ctx->Texture.Unit[i];
+
+ if (unit->_ReallyEnabled) {
+ const struct gl_texture_object *t = unit->_Current;
+ const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
+ if (img->InternalFormat == GL_YCBCR_MESA) {
+ key->yuvtex_mask |= 1 << i;
+ if (img->TexFormat->MesaFormat == MESA_FORMAT_YCBCR)
+ key->yuvtex_swap_mask |= 1 << i;
+ }
+
+ key->tex_swizzles[i] = t->_Swizzle;
+ }
+ else {
+ key->tex_swizzles[i] = SWIZZLE_NOOP;
+ }
+ }
+
+ /* Shadow */
+ key->shadowtex_mask = fp->program.Base.ShadowSamplers;
+
+ /* _NEW_BUFFERS */
+ /*
+ * Include the draw buffer origin and height so that we can calculate
+ * fragment position values relative to the bottom left of the drawable,
+ * from the incoming screen origin relative position we get as part of our
+ * payload.
+ *
+ * We could avoid recompiling by including this as a constant referenced by
+ * our program, but if we were to do that it would also be nice to handle
+ * getting that constant updated at batchbuffer submit time (when we
+ * hold the lock and know where the buffer really is) rather than at emit
+ * time when we don't hold the lock and are just guessing. We could also
+ * just avoid using this as key data if the program doesn't use
+ * fragment.position.
+ *
+ * This pretty much becomes moot with DRI2 and redirected buffers anyway,
+ * as our origins will always be zero then.
+ */
+ if (brw->intel.driDrawable != NULL) {
+ key->origin_x = brw->intel.driDrawable->x;
+ key->origin_y = brw->intel.driDrawable->y;
+ key->drawable_height = brw->intel.driDrawable->h;
+ }
+
+ /* CACHE_NEW_VS_PROG */
+ key->vp_outputs_written = brw->vs.prog_data->outputs_written & DO_SETUP_BITS;
+
+ /* The unique fragment program ID */
+ key->program_string_id = fp->id;
+}
+
+
+static void brw_prepare_wm_prog(struct brw_context *brw)
+{
+ struct brw_wm_prog_key key;
+ struct brw_fragment_program *fp = (struct brw_fragment_program *)
+ brw->fragment_program;
+
+ brw_wm_populate_key(brw, &key);
+
+ /* Make an early check for the key.
+ */
+ dri_bo_unreference(brw->wm.prog_bo);
+ brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG,
+ &key, sizeof(key),
+ NULL, 0,
+ &brw->wm.prog_data);
+ if (brw->wm.prog_bo == NULL)
+ do_wm_prog(brw, fp, &key);
+}
+
+
+const struct brw_tracked_state brw_wm_prog = {
+ .dirty = {
+ .mesa = (_NEW_COLOR |
+ _NEW_DEPTH |
+ _NEW_HINT |
+ _NEW_STENCIL |
+ _NEW_POLYGON |
+ _NEW_LINE |
+ _NEW_LIGHT |
+ _NEW_BUFFERS |
+ _NEW_TEXTURE),
+ .brw = (BRW_NEW_FRAGMENT_PROGRAM |
+ BRW_NEW_WM_INPUT_DIMENSIONS |
+ BRW_NEW_REDUCED_PRIMITIVE),
+ .cache = CACHE_NEW_VS_PROG,
+ },
+ .prepare = brw_prepare_wm_prog
+};
+
diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h
new file mode 100644
index 00000000000..872b1f3ecf4
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm.h
@@ -0,0 +1,309 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#ifndef BRW_WM_H
+#define BRW_WM_H
+
+
+#include "shader/prog_instruction.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+
+#define SATURATE (1<<5)
+
+/* A big lookup table is used to figure out which and how many
+ * additional regs will inserted before the main payload in the WM
+ * program execution. These mainly relate to depth and stencil
+ * processing and the early-depth-test optimization.
+ */
+#define IZ_PS_KILL_ALPHATEST_BIT 0x1
+#define IZ_PS_COMPUTES_DEPTH_BIT 0x2
+#define IZ_DEPTH_WRITE_ENABLE_BIT 0x4
+#define IZ_DEPTH_TEST_ENABLE_BIT 0x8
+#define IZ_STENCIL_WRITE_ENABLE_BIT 0x10
+#define IZ_STENCIL_TEST_ENABLE_BIT 0x20
+#define IZ_BIT_MAX 0x40
+
+#define AA_NEVER 0
+#define AA_SOMETIMES 1
+#define AA_ALWAYS 2
+
+struct brw_wm_prog_key {
+ GLuint source_depth_reg:3;
+ GLuint aa_dest_stencil_reg:3;
+ GLuint dest_depth_reg:3;
+ GLuint nr_depth_regs:3;
+ GLuint computes_depth:1; /* could be derived from program string */
+ GLuint source_depth_to_render_target:1;
+ GLuint flat_shade:1;
+ GLuint linear_color:1; /**< linear interpolation vs perspective interp */
+ GLuint runtime_check_aads_emit:1;
+
+ GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */
+ GLuint shadowtex_mask:16;
+ GLuint yuvtex_mask:16;
+ GLuint yuvtex_swap_mask:16; /* UV swaped */
+
+ GLuint tex_swizzles[BRW_MAX_TEX_UNIT];
+
+ GLuint program_string_id:32;
+ GLuint origin_x, origin_y;
+ GLuint drawable_height;
+ GLuint vp_outputs_written;
+};
+
+
+/* A bit of a glossary:
+ *
+ * brw_wm_value: A computed value or program input. Values are
+ * constant, they are created once and are never modified. When a
+ * fragment program register is written or overwritten, new values are
+ * created fresh, preserving the rule that values are constant.
+ *
+ * brw_wm_ref: A reference to a value. Wherever a value used is by an
+ * instruction or as a program output, that is tracked with an
+ * instance of this struct. All references to a value occur after it
+ * is created. After the last reference, a value is dead and can be
+ * discarded.
+ *
+ * brw_wm_grf: Represents a physical hardware register. May be either
+ * empty or hold a value. Register allocation is the process of
+ * assigning values to grf registers. This occurs in pass2 and the
+ * brw_wm_grf struct is not used before that.
+ *
+ * Fragment program registers: These are time-varying constructs that
+ * are hard to reason about and which we translate away in pass0. A
+ * single fragment program register element (eg. temp[0].x) will be
+ * translated to one or more brw_wm_value structs, one for each time
+ * that temp[0].x is written to during the program.
+ */
+
+
+
+/* Used in pass2 to track register allocation.
+ */
+struct brw_wm_grf {
+ struct brw_wm_value *value;
+ GLuint nextuse;
+};
+
+struct brw_wm_value {
+ struct brw_reg hw_reg; /* emitted to this reg, may not always be there */
+ struct brw_wm_ref *lastuse;
+ struct brw_wm_grf *resident;
+ GLuint contributes_to_output:1;
+ GLuint spill_slot:16; /* if non-zero, spill immediately after calculation */
+};
+
+struct brw_wm_ref {
+ struct brw_reg hw_reg; /* nr filled in in pass2, everything else, pass0 */
+ struct brw_wm_value *value;
+ struct brw_wm_ref *prevuse;
+ GLuint unspill_reg:7; /* unspill to reg */
+ GLuint emitted:1;
+ GLuint insn:24;
+};
+
+struct brw_wm_constref {
+ const struct brw_wm_ref *ref;
+ GLfloat constval;
+};
+
+
+struct brw_wm_instruction {
+ struct brw_wm_value *dst[4];
+ struct brw_wm_ref *src[3][4];
+ GLuint opcode:8;
+ GLuint saturate:1;
+ GLuint writemask:4;
+ GLuint tex_unit:4; /* texture unit for TEX, TXD, TXP instructions */
+ GLuint tex_idx:3; /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */
+ GLuint tex_shadow:1; /* do shadow comparison? */
+ GLuint eot:1; /* End of thread indicator for FB_WRITE*/
+ GLuint target:10; /* target binding table index for FB_WRITE*/
+};
+
+
+#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3)
+#define BRW_WM_MAX_GRF 128 /* hardware limit */
+#define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4)
+#define BRW_WM_MAX_REF (BRW_WM_MAX_INSN * 12)
+#define BRW_WM_MAX_PARAM 256
+#define BRW_WM_MAX_CONST 256
+#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS
+#define BRW_WM_MAX_SUBROUTINE 16
+
+
+
+/* New opcodes to track internal operations required for WM unit.
+ * These are added early so that the registers used can be tracked,
+ * freed and reused like those of other instructions.
+ */
+#define WM_PIXELXY (MAX_OPCODE)
+#define WM_DELTAXY (MAX_OPCODE + 1)
+#define WM_PIXELW (MAX_OPCODE + 2)
+#define WM_LINTERP (MAX_OPCODE + 3)
+#define WM_PINTERP (MAX_OPCODE + 4)
+#define WM_CINTERP (MAX_OPCODE + 5)
+#define WM_WPOSXY (MAX_OPCODE + 6)
+#define WM_FB_WRITE (MAX_OPCODE + 7)
+#define WM_FRONTFACING (MAX_OPCODE + 8)
+#define MAX_WM_OPCODE (MAX_OPCODE + 9)
+
+#define PROGRAM_PAYLOAD (PROGRAM_FILE_MAX)
+#define PAYLOAD_DEPTH (FRAG_ATTRIB_MAX)
+
+struct brw_wm_compile {
+ struct brw_compile func;
+ struct brw_wm_prog_key key;
+ struct brw_wm_prog_data prog_data;
+
+ struct brw_fragment_program *fp;
+
+ GLfloat (*env_param)[4];
+
+ enum {
+ START,
+ PASS2_DONE
+ } state;
+
+ /* Initial pass - translate fp instructions to fp instructions,
+ * simplifying and adding instructions for interpolation and
+ * framebuffer writes.
+ */
+ struct prog_instruction prog_instructions[BRW_WM_MAX_INSN];
+ GLuint nr_fp_insns;
+ GLuint fp_temp;
+ GLuint fp_interp_emitted;
+ GLuint fp_fragcolor_emitted;
+
+ struct prog_src_register pixel_xy;
+ struct prog_src_register delta_xy;
+ struct prog_src_register pixel_w;
+
+
+ struct brw_wm_value vreg[BRW_WM_MAX_VREG];
+ GLuint nr_vreg;
+
+ struct brw_wm_value creg[BRW_WM_MAX_PARAM];
+ GLuint nr_creg;
+
+ struct {
+ struct brw_wm_value depth[4]; /* includes r0/r1 */
+ struct brw_wm_value input_interp[FRAG_ATTRIB_MAX];
+ } payload;
+
+
+ const struct brw_wm_ref *pass0_fp_reg[PROGRAM_PAYLOAD+1][256][4];
+
+ struct brw_wm_ref undef_ref;
+ struct brw_wm_value undef_value;
+
+ struct brw_wm_ref refs[BRW_WM_MAX_REF];
+ GLuint nr_refs;
+
+ struct brw_wm_instruction instruction[BRW_WM_MAX_INSN];
+ GLuint nr_insns;
+
+ struct brw_wm_constref constref[BRW_WM_MAX_CONST];
+ GLuint nr_constrefs;
+
+ struct brw_wm_grf pass2_grf[BRW_WM_MAX_GRF/2];
+
+ GLuint grf_limit;
+ GLuint max_wm_grf;
+ GLuint last_scratch;
+
+ GLuint cur_inst; /**< index of current instruction */
+
+ GLboolean out_of_regs; /**< ran out of GRF registers? */
+
+ /** Mapping from Mesa registers to hardware registers */
+ struct {
+ GLboolean inited;
+ struct brw_reg reg;
+ } wm_regs[PROGRAM_PAYLOAD+1][256][4];
+
+ GLboolean used_grf[BRW_WM_MAX_GRF];
+ GLuint first_free_grf;
+ struct brw_reg stack;
+ struct brw_reg emit_mask_reg;
+ GLuint tmp_regs[BRW_WM_MAX_GRF];
+ GLuint tmp_index;
+ GLuint tmp_max;
+ GLuint subroutines[BRW_WM_MAX_SUBROUTINE];
+ GLuint dispatch_width;
+
+ /** we may need up to 3 constants per instruction (if use_const_buffer) */
+ struct {
+ GLint index;
+ struct brw_reg reg;
+ } current_const[3];
+};
+
+
+GLuint brw_wm_nr_args( GLuint opcode );
+GLuint brw_wm_is_scalar_result( GLuint opcode );
+
+void brw_wm_pass_fp( struct brw_wm_compile *c );
+void brw_wm_pass0( struct brw_wm_compile *c );
+void brw_wm_pass1( struct brw_wm_compile *c );
+void brw_wm_pass2( struct brw_wm_compile *c );
+void brw_wm_emit( struct brw_wm_compile *c );
+
+void brw_wm_print_value( struct brw_wm_compile *c,
+ struct brw_wm_value *value );
+
+void brw_wm_print_ref( struct brw_wm_compile *c,
+ struct brw_wm_ref *ref );
+
+void brw_wm_print_insn( struct brw_wm_compile *c,
+ struct brw_wm_instruction *inst );
+
+void brw_wm_print_program( struct brw_wm_compile *c,
+ const char *stage );
+
+void brw_wm_lookup_iz( GLuint line_aa,
+ GLuint lookup,
+ GLboolean ps_uses_depth,
+ struct brw_wm_prog_key *key );
+
+GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp);
+void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c);
+
+void emit_ddxy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ GLboolean is_ddx,
+ const struct brw_reg *arg0);
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_wm_debug.c b/src/gallium/drivers/i965/brw_wm_debug.c
new file mode 100644
index 00000000000..220821087c1
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_debug.c
@@ -0,0 +1,174 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#include "brw_context.h"
+#include "brw_wm.h"
+
+
+void brw_wm_print_value( struct brw_wm_compile *c,
+ struct brw_wm_value *value )
+{
+ assert(value);
+ if (c->state >= PASS2_DONE)
+ brw_print_reg(value->hw_reg);
+ else if( value == &c->undef_value )
+ _mesa_printf("undef");
+ else if( value - c->vreg >= 0 &&
+ value - c->vreg < BRW_WM_MAX_VREG)
+ _mesa_printf("r%d", value - c->vreg);
+ else if (value - c->creg >= 0 &&
+ value - c->creg < BRW_WM_MAX_PARAM)
+ _mesa_printf("c%d", value - c->creg);
+ else if (value - c->payload.input_interp >= 0 &&
+ value - c->payload.input_interp < FRAG_ATTRIB_MAX)
+ _mesa_printf("i%d", value - c->payload.input_interp);
+ else if (value - c->payload.depth >= 0 &&
+ value - c->payload.depth < FRAG_ATTRIB_MAX)
+ _mesa_printf("d%d", value - c->payload.depth);
+ else
+ _mesa_printf("?");
+}
+
+void brw_wm_print_ref( struct brw_wm_compile *c,
+ struct brw_wm_ref *ref )
+{
+ struct brw_reg hw_reg = ref->hw_reg;
+
+ if (ref->unspill_reg)
+ _mesa_printf("UNSPILL(%x)/", ref->value->spill_slot);
+
+ if (c->state >= PASS2_DONE)
+ brw_print_reg(ref->hw_reg);
+ else {
+ _mesa_printf("%s", hw_reg.negate ? "-" : "");
+ _mesa_printf("%s", hw_reg.abs ? "abs/" : "");
+ brw_wm_print_value(c, ref->value);
+ if ((hw_reg.nr&1) || hw_reg.subnr) {
+ _mesa_printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr);
+ }
+ }
+}
+
+void brw_wm_print_insn( struct brw_wm_compile *c,
+ struct brw_wm_instruction *inst )
+{
+ GLuint i, arg;
+ GLuint nr_args = brw_wm_nr_args(inst->opcode);
+
+ _mesa_printf("[");
+ for (i = 0; i < 4; i++) {
+ if (inst->dst[i]) {
+ brw_wm_print_value(c, inst->dst[i]);
+ if (inst->dst[i]->spill_slot)
+ _mesa_printf("/SPILL(%x)",inst->dst[i]->spill_slot);
+ }
+ else
+ _mesa_printf("#");
+ if (i < 3)
+ _mesa_printf(",");
+ }
+ _mesa_printf("]");
+
+ if (inst->writemask != WRITEMASK_XYZW)
+ _mesa_printf(".%s%s%s%s",
+ GET_BIT(inst->writemask, 0) ? "x" : "",
+ GET_BIT(inst->writemask, 1) ? "y" : "",
+ GET_BIT(inst->writemask, 2) ? "z" : "",
+ GET_BIT(inst->writemask, 3) ? "w" : "");
+
+ switch (inst->opcode) {
+ case WM_PIXELXY:
+ _mesa_printf(" = PIXELXY");
+ break;
+ case WM_DELTAXY:
+ _mesa_printf(" = DELTAXY");
+ break;
+ case WM_PIXELW:
+ _mesa_printf(" = PIXELW");
+ break;
+ case WM_WPOSXY:
+ _mesa_printf(" = WPOSXY");
+ break;
+ case WM_PINTERP:
+ _mesa_printf(" = PINTERP");
+ break;
+ case WM_LINTERP:
+ _mesa_printf(" = LINTERP");
+ break;
+ case WM_CINTERP:
+ _mesa_printf(" = CINTERP");
+ break;
+ case WM_FB_WRITE:
+ _mesa_printf(" = FB_WRITE");
+ break;
+ case WM_FRONTFACING:
+ _mesa_printf(" = FRONTFACING");
+ break;
+ default:
+ _mesa_printf(" = %s", _mesa_opcode_string(inst->opcode));
+ break;
+ }
+
+ if (inst->saturate)
+ _mesa_printf("_SAT");
+
+ for (arg = 0; arg < nr_args; arg++) {
+
+ _mesa_printf(" [");
+
+ for (i = 0; i < 4; i++) {
+ if (inst->src[arg][i]) {
+ brw_wm_print_ref(c, inst->src[arg][i]);
+ }
+ else
+ _mesa_printf("%%");
+
+ if (i < 3)
+ _mesa_printf(",");
+ else
+ _mesa_printf("]");
+ }
+ }
+ _mesa_printf("\n");
+}
+
+void brw_wm_print_program( struct brw_wm_compile *c,
+ const char *stage )
+{
+ GLuint insn;
+
+ _mesa_printf("%s:\n", stage);
+ for (insn = 0; insn < c->nr_insns; insn++)
+ brw_wm_print_insn(c, &c->instruction[insn]);
+ _mesa_printf("\n");
+}
+
diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c
new file mode 100644
index 00000000000..bf80a2942a4
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_emit.c
@@ -0,0 +1,1509 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#include "main/macros.h"
+#include "brw_context.h"
+#include "brw_wm.h"
+
+/* Not quite sure how correct this is - need to understand horiz
+ * vs. vertical strides a little better.
+ */
+static INLINE struct brw_reg sechalf( struct brw_reg reg )
+{
+ if (reg.vstride)
+ reg.nr++;
+ return reg;
+}
+
+/* Payload R0:
+ *
+ * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
+ * corresponding to each of the 16 execution channels.
+ * R0.1..8 -- ?
+ * R1.0 -- triangle vertex 0.X
+ * R1.1 -- triangle vertex 0.Y
+ * R1.2 -- tile 0 x,y coords (2 packed uwords)
+ * R1.3 -- tile 1 x,y coords (2 packed uwords)
+ * R1.4 -- tile 2 x,y coords (2 packed uwords)
+ * R1.5 -- tile 3 x,y coords (2 packed uwords)
+ * R1.6 -- ?
+ * R1.7 -- ?
+ * R1.8 -- ?
+ */
+
+
+static void emit_pixel_xy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask)
+{
+ struct brw_reg r1 = brw_vec1_grf(1, 0);
+ struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ /* Calculate pixel centers by adding 1 or 0 to each of the
+ * micro-tile coordinates passed in r1.
+ */
+ if (mask & WRITEMASK_X) {
+ brw_ADD(p,
+ vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
+ stride(suboffset(r1_uw, 4), 2, 4, 0),
+ brw_imm_v(0x10101010));
+ }
+
+ if (mask & WRITEMASK_Y) {
+ brw_ADD(p,
+ vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
+ stride(suboffset(r1_uw,5), 2, 4, 0),
+ brw_imm_v(0x11001100));
+ }
+
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+}
+
+
+
+static void emit_delta_xy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
+{
+ struct brw_reg r1 = brw_vec1_grf(1, 0);
+
+ /* Calc delta X,Y by subtracting origin in r1 from the pixel
+ * centers.
+ */
+ if (mask & WRITEMASK_X) {
+ brw_ADD(p,
+ dst[0],
+ retype(arg0[0], BRW_REGISTER_TYPE_UW),
+ negate(r1));
+ }
+
+ if (mask & WRITEMASK_Y) {
+ brw_ADD(p,
+ dst[1],
+ retype(arg0[1], BRW_REGISTER_TYPE_UW),
+ negate(suboffset(r1,1)));
+
+ }
+}
+
+static void emit_wpos_xy(struct brw_wm_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
+{
+ struct brw_compile *p = &c->func;
+
+ /* Calculate the pixel offset from window bottom left into destination
+ * X and Y channels.
+ */
+ if (mask & WRITEMASK_X) {
+ /* X' = X - origin */
+ brw_ADD(p,
+ dst[0],
+ retype(arg0[0], BRW_REGISTER_TYPE_W),
+ brw_imm_d(0 - c->key.origin_x));
+ }
+
+ if (mask & WRITEMASK_Y) {
+ /* Y' = height - (Y - origin_y) = height + origin_y - Y */
+ brw_ADD(p,
+ dst[1],
+ negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
+ brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
+ }
+}
+
+
+static void emit_pixel_w( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas)
+{
+ /* Don't need this if all you are doing is interpolating color, for
+ * instance.
+ */
+ if (mask & WRITEMASK_W) {
+ struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
+
+ /* Calc 1/w - just linterp wpos[3] optimized by putting the
+ * result straight into a message reg.
+ */
+ brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
+ brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
+
+ /* Calc w */
+ brw_math_16( p, dst[3],
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 2, brw_null_reg(),
+ BRW_MATH_PRECISION_FULL);
+ }
+}
+
+
+
+static void emit_linterp( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas )
+{
+ struct brw_reg interp[4];
+ GLuint nr = arg0[0].nr;
+ GLuint i;
+
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
+ brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+ }
+ }
+}
+
+
+static void emit_pinterp( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas,
+ const struct brw_reg *w)
+{
+ struct brw_reg interp[4];
+ GLuint nr = arg0[0].nr;
+ GLuint i;
+
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
+ brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+ }
+ }
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_MUL(p, dst[i], dst[i], w[3]);
+ }
+ }
+}
+
+
+static void emit_cinterp( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0 )
+{
+ struct brw_reg interp[4];
+ GLuint nr = arg0[0].nr;
+ GLuint i;
+
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */
+ }
+ }
+}
+
+/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
+static void emit_frontfacing( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask )
+{
+ struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
+ GLuint i;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_MOV(p, dst[i], brw_imm_f(0.0));
+ }
+ }
+
+ /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
+ * us front face
+ */
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_MOV(p, dst[i], brw_imm_f(1.0));
+ }
+ }
+ brw_set_predicate_control_flag_value(p, 0xff);
+}
+
+/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
+ * looking like:
+ *
+ * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
+ *
+ * and we're trying to produce:
+ *
+ * DDX DDY
+ * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
+ * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
+ * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
+ * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
+ * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
+ * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
+ * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
+ * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
+ *
+ * and add another set of two more subspans if in 16-pixel dispatch mode.
+ *
+ * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
+ * for each pair, and vertstride = 2 jumps us 2 elements after processing a
+ * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
+ * between each other. We could probably do it like ddx and swizzle the right
+ * order later, but bail for now and just produce
+ * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
+ */
+void emit_ddxy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ GLboolean is_ddx,
+ const struct brw_reg *arg0)
+{
+ int i;
+ struct brw_reg src0, src1;
+
+ if (mask & SATURATE)
+ brw_set_saturate(p, 1);
+ for (i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ if (is_ddx) {
+ src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_2,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_2,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ } else {
+ src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_4,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_4,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ }
+ brw_ADD(p, dst[i], src0, negate(src1));
+ }
+ }
+ if (mask & SATURATE)
+ brw_set_saturate(p, 0);
+}
+
+static void emit_alu1( struct brw_compile *p,
+ struct brw_instruction *(*func)(struct brw_compile *,
+ struct brw_reg,
+ struct brw_reg),
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0 )
+{
+ GLuint i;
+
+ if (mask & SATURATE)
+ brw_set_saturate(p, 1);
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ func(p, dst[i], arg0[i]);
+ }
+ }
+
+ if (mask & SATURATE)
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_alu2( struct brw_compile *p,
+ struct brw_instruction *(*func)(struct brw_compile *,
+ struct brw_reg,
+ struct brw_reg,
+ struct brw_reg),
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ GLuint i;
+
+ if (mask & SATURATE)
+ brw_set_saturate(p, 1);
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ func(p, dst[i], arg0[i], arg1[i]);
+ }
+ }
+
+ if (mask & SATURATE)
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_mad( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2 )
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_MUL(p, dst[i], arg0[i], arg1[i]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_ADD(p, dst[i], dst[i], arg2[i]);
+ brw_set_saturate(p, 0);
+ }
+ }
+}
+
+static void emit_trunc( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_RNDZ(p, dst[i], arg0[i]);
+ }
+ }
+}
+
+static void emit_lrp( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2 )
+{
+ GLuint i;
+
+ /* Uses dst as a temporary:
+ */
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ /* Can I use the LINE instruction for this?
+ */
+ brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
+ brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MAC(p, dst[i], arg0[i], arg1[i]);
+ brw_set_saturate(p, 0);
+ }
+ }
+}
+
+static void emit_sop( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ GLuint cond,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_MOV(p, dst[i], brw_imm_f(0));
+ brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
+ brw_MOV(p, dst[i], brw_imm_f(1.0));
+ brw_set_predicate_control_flag_value(p, 0xff);
+ }
+ }
+}
+
+static void emit_slt( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
+}
+
+static void emit_sle( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
+}
+
+static void emit_sgt( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
+}
+
+static void emit_sge( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
+}
+
+static void emit_seq( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
+}
+
+static void emit_sne( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
+}
+
+static void emit_cmp( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2 )
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[i], arg2[i]);
+ brw_set_saturate(p, 0);
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[i], arg1[i]);
+ brw_set_saturate(p, 0);
+ brw_set_predicate_control_flag_value(p, 0xff);
+ }
+ }
+}
+
+static void emit_max( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[i], arg0[i]);
+ brw_set_saturate(p, 0);
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[i], arg1[i]);
+ brw_set_saturate(p, 0);
+ brw_set_predicate_control_flag_value(p, 0xff);
+ }
+ }
+}
+
+static void emit_min( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[i], arg1[i]);
+ brw_set_saturate(p, 0);
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[i], arg0[i]);
+ brw_set_saturate(p, 0);
+ brw_set_predicate_control_flag_value(p, 0xff);
+ }
+ }
+}
+
+
+static void emit_dp3( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return; /* Do not emit dead code */
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+ brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
+ brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_dp4( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return; /* Do not emit dead code */
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+ brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
+ brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
+ brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_dph( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return; /* Do not emit dead code */
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+ brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
+ brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
+ brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]);
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_xpd( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1 )
+{
+ GLuint i;
+
+ assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
+
+ for (i = 0 ; i < 3; i++) {
+ if (mask & (1<<i)) {
+ GLuint i2 = (i+2)%3;
+ GLuint i1 = (i+1)%3;
+
+ brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
+ brw_set_saturate(p, 0);
+ }
+ }
+}
+
+
+static void emit_math1( struct brw_compile *p,
+ GLuint function,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0 )
+{
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return; /* Do not emit dead code */
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+ brw_MOV(p, brw_message_reg(2), arg0[0]);
+
+ /* Send two messages to perform all 16 operations:
+ */
+ brw_math_16(p,
+ dst[dst_chan],
+ function,
+ (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+ 2,
+ brw_null_reg(),
+ BRW_MATH_PRECISION_FULL);
+}
+
+
+static void emit_math2( struct brw_compile *p,
+ GLuint function,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
+{
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return; /* Do not emit dead code */
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+ brw_push_insn_state(p);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p, brw_message_reg(2), arg0[0]);
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p, brw_message_reg(3), arg1[0]);
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
+
+
+ /* Send two messages to perform all 16 operations:
+ */
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_math(p,
+ dst[dst_chan],
+ function,
+ (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+ 2,
+ brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math(p,
+ offset(dst[dst_chan],1),
+ function,
+ (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+ 4,
+ brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+
+ brw_pop_insn_state(p);
+}
+
+
+
+static void emit_tex( struct brw_wm_compile *c,
+ const struct brw_wm_instruction *inst,
+ struct brw_reg *dst,
+ GLuint dst_flags,
+ struct brw_reg *arg )
+{
+ struct brw_compile *p = &c->func;
+ GLuint msgLength, responseLength;
+ GLuint i, nr;
+ GLuint emit;
+ GLuint msg_type;
+
+ /* How many input regs are there?
+ */
+ switch (inst->tex_idx) {
+ case TEXTURE_1D_INDEX:
+ emit = WRITEMASK_X;
+ nr = 1;
+ break;
+ case TEXTURE_2D_INDEX:
+ case TEXTURE_RECT_INDEX:
+ emit = WRITEMASK_XY;
+ nr = 2;
+ break;
+ case TEXTURE_3D_INDEX:
+ case TEXTURE_CUBE_INDEX:
+ emit = WRITEMASK_XYZ;
+ nr = 3;
+ break;
+ default:
+ /* unexpected target */
+ abort();
+ }
+
+ if (inst->tex_shadow) {
+ nr = 4;
+ emit |= WRITEMASK_W;
+ }
+
+ msgLength = 1;
+
+ for (i = 0; i < nr; i++) {
+ static const GLuint swz[4] = {0,1,2,2};
+ if (emit & (1<<i))
+ brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
+ else
+ brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
+ msgLength += 2;
+ }
+
+ responseLength = 8; /* always */
+
+ if (BRW_IS_IGDNG(p->brw)) {
+ if (inst->tex_shadow)
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
+ } else {
+ if (inst->tex_shadow)
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
+ }
+
+ brw_SAMPLE(p,
+ retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
+ 1,
+ retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
+ SURF_INDEX_TEXTURE(inst->tex_unit),
+ inst->tex_unit, /* sampler */
+ inst->writemask,
+ msg_type,
+ responseLength,
+ msgLength,
+ 0,
+ 1,
+ BRW_SAMPLER_SIMD_MODE_SIMD16);
+}
+
+
+static void emit_txb( struct brw_wm_compile *c,
+ const struct brw_wm_instruction *inst,
+ struct brw_reg *dst,
+ GLuint dst_flags,
+ struct brw_reg *arg )
+{
+ struct brw_compile *p = &c->func;
+ GLuint msgLength;
+ GLuint msg_type;
+ /* Shadow ignored for txb.
+ */
+ switch (inst->tex_idx) {
+ case TEXTURE_1D_INDEX:
+ brw_MOV(p, brw_message_reg(2), arg[0]);
+ brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
+ brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
+ break;
+ case TEXTURE_2D_INDEX:
+ case TEXTURE_RECT_INDEX:
+ brw_MOV(p, brw_message_reg(2), arg[0]);
+ brw_MOV(p, brw_message_reg(4), arg[1]);
+ brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
+ break;
+ case TEXTURE_3D_INDEX:
+ case TEXTURE_CUBE_INDEX:
+ brw_MOV(p, brw_message_reg(2), arg[0]);
+ brw_MOV(p, brw_message_reg(4), arg[1]);
+ brw_MOV(p, brw_message_reg(6), arg[2]);
+ break;
+ default:
+ /* unexpected target */
+ abort();
+ }
+
+ brw_MOV(p, brw_message_reg(8), arg[3]);
+ msgLength = 9;
+
+ if (BRW_IS_IGDNG(p->brw))
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
+
+ brw_SAMPLE(p,
+ retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
+ 1,
+ retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
+ SURF_INDEX_TEXTURE(inst->tex_unit),
+ inst->tex_unit, /* sampler */
+ inst->writemask,
+ msg_type,
+ 8, /* responseLength */
+ msgLength,
+ 0,
+ 1,
+ BRW_SAMPLER_SIMD_MODE_SIMD16);
+}
+
+
+static void emit_lit( struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0 )
+{
+ assert((mask & WRITEMASK_XW) == 0);
+
+ if (mask & WRITEMASK_Y) {
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MOV(p, dst[1], arg0[0]);
+ brw_set_saturate(p, 0);
+ }
+
+ if (mask & WRITEMASK_Z) {
+ emit_math2(p, BRW_MATH_FUNCTION_POW,
+ &dst[2],
+ WRITEMASK_X | (mask & SATURATE),
+ &arg0[1],
+ &arg0[3]);
+ }
+
+ /* Ordinarily you'd use an iff statement to skip or shortcircuit
+ * some of the POW calculations above, but 16-wide iff statements
+ * seem to lock c1 hardware, so this is a nasty workaround:
+ */
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
+ {
+ if (mask & WRITEMASK_Y)
+ brw_MOV(p, dst[1], brw_imm_f(0));
+
+ if (mask & WRITEMASK_Z)
+ brw_MOV(p, dst[2], brw_imm_f(0));
+ }
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+}
+
+
+/* Kill pixel - set execution mask to zero for those pixels which
+ * fail.
+ */
+static void emit_kil( struct brw_wm_compile *c,
+ struct brw_reg *arg0)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+ GLuint i;
+
+ /* XXX - usually won't need 4 compares!
+ */
+ for (i = 0; i < 4; i++) {
+ brw_push_insn_state(p);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
+ brw_set_predicate_control_flag_value(p, 0xff);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_AND(p, r0uw, brw_flag_reg(), r0uw);
+ brw_pop_insn_state(p);
+ }
+}
+
+/* KIL_NV kills the pixels that are currently executing, not based on a test
+ * of the arguments.
+ */
+static void emit_kil_nv( struct brw_wm_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
+ brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
+ brw_pop_insn_state(p);
+}
+
+static void fire_fb_write( struct brw_wm_compile *c,
+ GLuint base_reg,
+ GLuint nr,
+ GLuint target,
+ GLuint eot )
+{
+ struct brw_compile *p = &c->func;
+
+ /* Pass through control information:
+ */
+/* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
+ {
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p,
+ brw_message_reg(base_reg + 1),
+ brw_vec8_grf(1, 0));
+ brw_pop_insn_state(p);
+ }
+
+ /* Send framebuffer write message: */
+/* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
+ brw_fb_WRITE(p,
+ retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
+ base_reg,
+ retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
+ target,
+ nr,
+ 0,
+ eot);
+}
+
+
+static void emit_aa( struct brw_wm_compile *c,
+ struct brw_reg *arg1,
+ GLuint reg )
+{
+ struct brw_compile *p = &c->func;
+ GLuint comp = c->key.aa_dest_stencil_reg / 2;
+ GLuint off = c->key.aa_dest_stencil_reg % 2;
+ struct brw_reg aa = offset(arg1[comp], off);
+
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
+ brw_MOV(p, brw_message_reg(reg), aa);
+ brw_pop_insn_state(p);
+}
+
+
+/* Post-fragment-program processing. Send the results to the
+ * framebuffer.
+ * \param arg0 the fragment color
+ * \param arg1 the pass-through depth value
+ * \param arg2 the shader-computed depth value
+ */
+static void emit_fb_write( struct brw_wm_compile *c,
+ struct brw_reg *arg0,
+ struct brw_reg *arg1,
+ struct brw_reg *arg2,
+ GLuint target,
+ GLuint eot)
+{
+ struct brw_compile *p = &c->func;
+ GLuint nr = 2;
+ GLuint channel;
+
+ /* Reserve a space for AA - may not be needed:
+ */
+ if (c->key.aa_dest_stencil_reg)
+ nr += 1;
+
+ /* I don't really understand how this achieves the color interleave
+ * (ie RGBARGBA) in the result: [Do the saturation here]
+ */
+ {
+ brw_push_insn_state(p);
+
+ for (channel = 0; channel < 4; channel++) {
+ /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
+ /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p,
+ brw_message_reg(nr + channel),
+ arg0[channel]);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MOV(p,
+ brw_message_reg(nr + channel + 4),
+ sechalf(arg0[channel]));
+ }
+
+ /* skip over the regs populated above:
+ */
+ nr += 8;
+
+ brw_pop_insn_state(p);
+ }
+
+ if (c->key.source_depth_to_render_target)
+ {
+ if (c->key.computes_depth)
+ brw_MOV(p, brw_message_reg(nr), arg2[2]);
+ else
+ brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
+
+ nr += 2;
+ }
+
+ if (c->key.dest_depth_reg)
+ {
+ GLuint comp = c->key.dest_depth_reg / 2;
+ GLuint off = c->key.dest_depth_reg % 2;
+
+ if (off != 0) {
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
+ /* 2nd half? */
+ brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
+ brw_pop_insn_state(p);
+ }
+ else {
+ brw_MOV(p, brw_message_reg(nr), arg1[comp]);
+ }
+ nr += 2;
+ }
+
+ if (!c->key.runtime_check_aads_emit) {
+ if (c->key.aa_dest_stencil_reg)
+ emit_aa(c, arg1, 2);
+
+ fire_fb_write(c, 0, nr, target, eot);
+ }
+ else {
+ struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
+ struct brw_reg ip = brw_ip_reg();
+ struct brw_instruction *jmp;
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+ brw_AND(p,
+ v1_null_ud,
+ get_element_ud(brw_vec8_grf(1,0), 6),
+ brw_imm_ud(1<<26));
+
+ jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
+ {
+ emit_aa(c, arg1, 2);
+ fire_fb_write(c, 0, nr, target, eot);
+ /* note - thread killed in subroutine */
+ }
+ brw_land_fwd_jump(p, jmp);
+
+ /* ELSE: Shuffle up one register to fill in the hole left for AA:
+ */
+ fire_fb_write(c, 1, nr-1, target, eot);
+ }
+}
+
+
+/**
+ * Move a GPR to scratch memory.
+ */
+static void emit_spill( struct brw_wm_compile *c,
+ struct brw_reg reg,
+ GLuint slot )
+{
+ struct brw_compile *p = &c->func;
+
+ /*
+ mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr }
+ */
+ brw_MOV(p, brw_message_reg(2), reg);
+
+ /*
+ mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
+ send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
+ */
+ brw_dp_WRITE_16(p,
+ retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
+ slot);
+}
+
+
+/**
+ * Load a GPR from scratch memory.
+ */
+static void emit_unspill( struct brw_wm_compile *c,
+ struct brw_reg reg,
+ GLuint slot )
+{
+ struct brw_compile *p = &c->func;
+
+ /* Slot 0 is the undef value.
+ */
+ if (slot == 0) {
+ brw_MOV(p, reg, brw_imm_f(0));
+ return;
+ }
+
+ /*
+ mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask }
+ send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 }
+ */
+
+ brw_dp_READ_16(p,
+ retype(vec16(reg), BRW_REGISTER_TYPE_UW),
+ slot);
+}
+
+
+/**
+ * Retrieve up to 4 GEN4 register pairs for the given wm reg:
+ * Args with unspill_reg != 0 will be loaded from scratch memory.
+ */
+static void get_argument_regs( struct brw_wm_compile *c,
+ struct brw_wm_ref *arg[],
+ struct brw_reg *regs )
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (arg[i]) {
+ if (arg[i]->unspill_reg)
+ emit_unspill(c,
+ brw_vec8_grf(arg[i]->unspill_reg, 0),
+ arg[i]->value->spill_slot);
+
+ regs[i] = arg[i]->hw_reg;
+ }
+ else {
+ regs[i] = brw_null_reg();
+ }
+ }
+}
+
+
+/**
+ * For values that have a spill_slot!=0, write those regs to scratch memory.
+ */
+static void spill_values( struct brw_wm_compile *c,
+ struct brw_wm_value *values,
+ GLuint nr )
+{
+ GLuint i;
+
+ for (i = 0; i < nr; i++)
+ if (values[i].spill_slot)
+ emit_spill(c, values[i].hw_reg, values[i].spill_slot);
+}
+
+
+/* Emit the fragment program instructions here.
+ */
+void brw_wm_emit( struct brw_wm_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ GLuint insn;
+
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+
+ /* Check if any of the payload regs need to be spilled:
+ */
+ spill_values(c, c->payload.depth, 4);
+ spill_values(c, c->creg, c->nr_creg);
+ spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
+
+
+ for (insn = 0; insn < c->nr_insns; insn++) {
+
+ struct brw_wm_instruction *inst = &c->instruction[insn];
+ struct brw_reg args[3][4], dst[4];
+ GLuint i, dst_flags;
+
+ /* Get argument regs:
+ */
+ for (i = 0; i < 3; i++)
+ get_argument_regs(c, inst->src[i], args[i]);
+
+ /* Get dest regs:
+ */
+ for (i = 0; i < 4; i++)
+ if (inst->dst[i])
+ dst[i] = inst->dst[i]->hw_reg;
+ else
+ dst[i] = brw_null_reg();
+
+ /* Flags
+ */
+ dst_flags = inst->writemask;
+ if (inst->saturate)
+ dst_flags |= SATURATE;
+
+ switch (inst->opcode) {
+ /* Generated instructions for calculating triangle interpolants:
+ */
+ case WM_PIXELXY:
+ emit_pixel_xy(p, dst, dst_flags);
+ break;
+
+ case WM_DELTAXY:
+ emit_delta_xy(p, dst, dst_flags, args[0]);
+ break;
+
+ case WM_WPOSXY:
+ emit_wpos_xy(c, dst, dst_flags, args[0]);
+ break;
+
+ case WM_PIXELW:
+ emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case WM_LINTERP:
+ emit_linterp(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case WM_PINTERP:
+ emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
+ break;
+
+ case WM_CINTERP:
+ emit_cinterp(p, dst, dst_flags, args[0]);
+ break;
+
+ case WM_FB_WRITE:
+ emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
+ break;
+
+ case WM_FRONTFACING:
+ emit_frontfacing(p, dst, dst_flags);
+ break;
+
+ /* Straightforward arithmetic:
+ */
+ case OPCODE_ADD:
+ emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case OPCODE_FRC:
+ emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
+ break;
+
+ case OPCODE_FLR:
+ emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
+ break;
+
+ case OPCODE_DDX:
+ emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
+ break;
+
+ case OPCODE_DDY:
+ emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
+ break;
+
+ case OPCODE_DP3:
+ emit_dp3(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case OPCODE_DP4:
+ emit_dp4(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case OPCODE_DPH:
+ emit_dph(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case OPCODE_TRUNC:
+ emit_trunc(p, dst, dst_flags, args[0]);
+ break;
+
+ case OPCODE_LRP:
+ emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
+ break;
+
+ case OPCODE_MAD:
+ emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
+ break;
+
+ case OPCODE_MOV:
+ case OPCODE_SWZ:
+ emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
+ break;
+
+ case OPCODE_MUL:
+ emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case OPCODE_XPD:
+ emit_xpd(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ /* Higher math functions:
+ */
+ case OPCODE_RCP:
+ emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
+ break;
+
+ case OPCODE_RSQ:
+ emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
+ break;
+
+ case OPCODE_SIN:
+ emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
+ break;
+
+ case OPCODE_COS:
+ emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
+ break;
+
+ case OPCODE_EX2:
+ emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
+ break;
+
+ case OPCODE_LG2:
+ emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
+ break;
+
+ case OPCODE_SCS:
+ /* There is an scs math function, but it would need some
+ * fixup for 16-element execution.
+ */
+ if (dst_flags & WRITEMASK_X)
+ emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
+ if (dst_flags & WRITEMASK_Y)
+ emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
+ break;
+
+ case OPCODE_POW:
+ emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
+ break;
+
+ /* Comparisons:
+ */
+ case OPCODE_CMP:
+ emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
+ break;
+
+ case OPCODE_MAX:
+ emit_max(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case OPCODE_MIN:
+ emit_min(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case OPCODE_SLT:
+ emit_slt(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case OPCODE_SLE:
+ emit_sle(p, dst, dst_flags, args[0], args[1]);
+ break;
+ case OPCODE_SGT:
+ emit_sgt(p, dst, dst_flags, args[0], args[1]);
+ break;
+ case OPCODE_SGE:
+ emit_sge(p, dst, dst_flags, args[0], args[1]);
+ break;
+ case OPCODE_SEQ:
+ emit_seq(p, dst, dst_flags, args[0], args[1]);
+ break;
+ case OPCODE_SNE:
+ emit_sne(p, dst, dst_flags, args[0], args[1]);
+ break;
+
+ case OPCODE_LIT:
+ emit_lit(p, dst, dst_flags, args[0]);
+ break;
+
+ /* Texturing operations:
+ */
+ case OPCODE_TEX:
+ emit_tex(c, inst, dst, dst_flags, args[0]);
+ break;
+
+ case OPCODE_TXB:
+ emit_txb(c, inst, dst, dst_flags, args[0]);
+ break;
+
+ case OPCODE_KIL:
+ emit_kil(c, args[0]);
+ break;
+
+ case OPCODE_KIL_NV:
+ emit_kil_nv(c);
+ break;
+
+ default:
+ _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
+ inst->opcode, inst->opcode < MAX_OPCODE ?
+ _mesa_opcode_string(inst->opcode) :
+ "unknown");
+ }
+
+ for (i = 0; i < 4; i++)
+ if (inst->dst[i] && inst->dst[i]->spill_slot)
+ emit_spill(c,
+ inst->dst[i]->hw_reg,
+ inst->dst[i]->spill_slot);
+ }
+
+ if (INTEL_DEBUG & DEBUG_WM) {
+ int i;
+
+ _mesa_printf("wm-native:\n");
+ for (i = 0; i < p->nr_insn; i++)
+ brw_disasm(stderr, &p->store[i]);
+ _mesa_printf("\n");
+ }
+}
diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c
new file mode 100644
index 00000000000..4e3edfbbffa
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_fp.c
@@ -0,0 +1,1177 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "brw_context.h"
+#include "brw_wm.h"
+#include "brw_util.h"
+
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "shader/prog_statevars.h"
+
+
+/** An invalid texture target */
+#define TEX_TARGET_NONE NUM_TEXTURE_TARGETS
+
+/** An invalid texture unit */
+#define TEX_UNIT_NONE BRW_MAX_TEX_UNIT
+
+#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
+
+#define X 0
+#define Y 1
+#define Z 2
+#define W 3
+
+
+static const char *wm_opcode_strings[] = {
+ "PIXELXY",
+ "DELTAXY",
+ "PIXELW",
+ "LINTERP",
+ "PINTERP",
+ "CINTERP",
+ "WPOSXY",
+ "FB_WRITE",
+ "FRONTFACING",
+};
+
+#if 0
+static const char *wm_file_strings[] = {
+ "PAYLOAD"
+};
+#endif
+
+
+/***********************************************************************
+ * Source regs
+ */
+
+static struct prog_src_register src_reg(GLuint file, GLuint idx)
+{
+ struct prog_src_register reg;
+ reg.File = file;
+ reg.Index = idx;
+ reg.Swizzle = SWIZZLE_NOOP;
+ reg.RelAddr = 0;
+ reg.Negate = NEGATE_NONE;
+ reg.Abs = 0;
+ return reg;
+}
+
+static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
+{
+ return src_reg(dst.File, dst.Index);
+}
+
+static struct prog_src_register src_undef( void )
+{
+ return src_reg(PROGRAM_UNDEFINED, 0);
+}
+
+static GLboolean src_is_undef(struct prog_src_register src)
+{
+ return src.File == PROGRAM_UNDEFINED;
+}
+
+static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
+{
+ reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
+ return reg;
+}
+
+static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
+{
+ return src_swizzle(reg, x, x, x, x);
+}
+
+static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
+{
+ reg.Swizzle = swizzle;
+ return reg;
+}
+
+
+/***********************************************************************
+ * Dest regs
+ */
+
+static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
+{
+ struct prog_dst_register reg;
+ reg.File = file;
+ reg.Index = idx;
+ reg.WriteMask = WRITEMASK_XYZW;
+ reg.RelAddr = 0;
+ reg.CondMask = COND_TR;
+ reg.CondSwizzle = 0;
+ reg.CondSrc = 0;
+ reg.pad = 0;
+ return reg;
+}
+
+static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
+{
+ reg.WriteMask &= mask;
+ return reg;
+}
+
+static struct prog_dst_register dst_undef( void )
+{
+ return dst_reg(PROGRAM_UNDEFINED, 0);
+}
+
+
+
+static struct prog_dst_register get_temp( struct brw_wm_compile *c )
+{
+ int bit = _mesa_ffs( ~c->fp_temp );
+
+ if (!bit) {
+ _mesa_printf("%s: out of temporaries\n", __FILE__);
+ exit(1);
+ }
+
+ c->fp_temp |= 1<<(bit-1);
+ return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
+}
+
+
+static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
+{
+ c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
+}
+
+
+/***********************************************************************
+ * Instructions
+ */
+
+static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
+{
+ return &c->prog_instructions[c->nr_fp_insns++];
+}
+
+static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
+ const struct prog_instruction *inst0)
+{
+ struct prog_instruction *inst = get_fp_inst(c);
+ *inst = *inst0;
+ return inst;
+}
+
+static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
+ GLuint op,
+ struct prog_dst_register dest,
+ GLuint saturate,
+ GLuint tex_src_unit,
+ GLuint tex_src_target,
+ GLuint tex_shadow,
+ struct prog_src_register src0,
+ struct prog_src_register src1,
+ struct prog_src_register src2 )
+{
+ struct prog_instruction *inst = get_fp_inst(c);
+
+ assert(tex_src_unit < BRW_MAX_TEX_UNIT ||
+ tex_src_unit == TEX_UNIT_NONE);
+ assert(tex_src_target < NUM_TEXTURE_TARGETS ||
+ tex_src_target == TEX_TARGET_NONE);
+
+ /* update mask of which texture units are referenced by this program */
+ if (tex_src_unit != TEX_UNIT_NONE)
+ c->fp->tex_units_used |= (1 << tex_src_unit);
+
+ memset(inst, 0, sizeof(*inst));
+
+ inst->Opcode = op;
+ inst->DstReg = dest;
+ inst->SaturateMode = saturate;
+ inst->TexSrcUnit = tex_src_unit;
+ inst->TexSrcTarget = tex_src_target;
+ inst->TexShadow = tex_shadow;
+ inst->SrcReg[0] = src0;
+ inst->SrcReg[1] = src1;
+ inst->SrcReg[2] = src2;
+ return inst;
+}
+
+
+static struct prog_instruction * emit_op(struct brw_wm_compile *c,
+ GLuint op,
+ struct prog_dst_register dest,
+ GLuint saturate,
+ struct prog_src_register src0,
+ struct prog_src_register src1,
+ struct prog_src_register src2 )
+{
+ return emit_tex_op(c, op, dest, saturate,
+ TEX_UNIT_NONE, TEX_TARGET_NONE, 0, /* unit, tgt, shadow */
+ src0, src1, src2);
+}
+
+
+/* Many Mesa opcodes produce the same value across all the result channels.
+ * We'd rather not have to support that splatting in the opcode implementations,
+ * and brw_wm_pass*.c wants to optimize them out by shuffling references around
+ * anyway. We can easily get both by emitting the opcode to one channel, and
+ * then MOVing it to the others, which brw_wm_pass*.c already understands.
+ */
+static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
+ const struct prog_instruction *inst0)
+{
+ struct prog_instruction *inst;
+ unsigned int dst_chan;
+ unsigned int other_channel_mask;
+
+ if (inst0->DstReg.WriteMask == 0)
+ return NULL;
+
+ dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1;
+ inst = get_fp_inst(c);
+ *inst = *inst0;
+ inst->DstReg.WriteMask = 1 << dst_chan;
+
+ other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan);
+ if (other_channel_mask != 0) {
+ inst = emit_op(c,
+ OPCODE_MOV,
+ dst_mask(inst0->DstReg, other_channel_mask),
+ 0,
+ src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan),
+ src_undef(),
+ src_undef());
+ }
+ return inst;
+}
+
+
+/***********************************************************************
+ * Special instructions for interpolation and other tasks
+ */
+
+static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
+{
+ if (src_is_undef(c->pixel_xy)) {
+ struct prog_dst_register pixel_xy = get_temp(c);
+ struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
+
+
+ /* Emit the out calculations, and hold onto the results. Use
+ * two instructions as a temporary is required.
+ */
+ /* pixel_xy.xy = PIXELXY payload[0];
+ */
+ emit_op(c,
+ WM_PIXELXY,
+ dst_mask(pixel_xy, WRITEMASK_XY),
+ 0,
+ payload_r0_depth,
+ src_undef(),
+ src_undef());
+
+ c->pixel_xy = src_reg_from_dst(pixel_xy);
+ }
+
+ return c->pixel_xy;
+}
+
+static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
+{
+ if (src_is_undef(c->delta_xy)) {
+ struct prog_dst_register delta_xy = get_temp(c);
+ struct prog_src_register pixel_xy = get_pixel_xy(c);
+ struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
+
+ /* deltas.xy = DELTAXY pixel_xy, payload[0]
+ */
+ emit_op(c,
+ WM_DELTAXY,
+ dst_mask(delta_xy, WRITEMASK_XY),
+ 0,
+ pixel_xy,
+ payload_r0_depth,
+ src_undef());
+
+ c->delta_xy = src_reg_from_dst(delta_xy);
+ }
+
+ return c->delta_xy;
+}
+
+static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
+{
+ if (src_is_undef(c->pixel_w)) {
+ struct prog_dst_register pixel_w = get_temp(c);
+ struct prog_src_register deltas = get_delta_xy(c);
+ struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
+
+ /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
+ */
+ emit_op(c,
+ WM_PIXELW,
+ dst_mask(pixel_w, WRITEMASK_W),
+ 0,
+ interp_wpos,
+ deltas,
+ src_undef());
+
+
+ c->pixel_w = src_reg_from_dst(pixel_w);
+ }
+
+ return c->pixel_w;
+}
+
+static void emit_interp( struct brw_wm_compile *c,
+ GLuint idx )
+{
+ struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
+ struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
+ struct prog_src_register deltas = get_delta_xy(c);
+
+ /* Need to use PINTERP on attributes which have been
+ * multiplied by 1/W in the SF program, and LINTERP on those
+ * which have not:
+ */
+ switch (idx) {
+ case FRAG_ATTRIB_WPOS:
+ /* Have to treat wpos.xy specially:
+ */
+ emit_op(c,
+ WM_WPOSXY,
+ dst_mask(dst, WRITEMASK_XY),
+ 0,
+ get_pixel_xy(c),
+ src_undef(),
+ src_undef());
+
+ dst = dst_mask(dst, WRITEMASK_ZW);
+
+ /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
+ */
+ emit_op(c,
+ WM_LINTERP,
+ dst,
+ 0,
+ interp,
+ deltas,
+ src_undef());
+ break;
+ case FRAG_ATTRIB_COL0:
+ case FRAG_ATTRIB_COL1:
+ if (c->key.flat_shade) {
+ emit_op(c,
+ WM_CINTERP,
+ dst,
+ 0,
+ interp,
+ src_undef(),
+ src_undef());
+ }
+ else {
+ if (c->key.linear_color) {
+ emit_op(c,
+ WM_LINTERP,
+ dst,
+ 0,
+ interp,
+ deltas,
+ src_undef());
+ }
+ else {
+ /* perspective-corrected color interpolation */
+ emit_op(c,
+ WM_PINTERP,
+ dst,
+ 0,
+ interp,
+ deltas,
+ get_pixel_w(c));
+ }
+ }
+ break;
+ case FRAG_ATTRIB_FOGC:
+ /* Interpolate the fog coordinate */
+ emit_op(c,
+ WM_PINTERP,
+ dst_mask(dst, WRITEMASK_X),
+ 0,
+ interp,
+ deltas,
+ get_pixel_w(c));
+
+ emit_op(c,
+ OPCODE_MOV,
+ dst_mask(dst, WRITEMASK_YZW),
+ 0,
+ src_swizzle(interp,
+ SWIZZLE_ZERO,
+ SWIZZLE_ZERO,
+ SWIZZLE_ZERO,
+ SWIZZLE_ONE),
+ src_undef(),
+ src_undef());
+ break;
+
+ case FRAG_ATTRIB_FACE:
+ /* XXX review/test this case */
+ emit_op(c,
+ WM_FRONTFACING,
+ dst_mask(dst, WRITEMASK_X),
+ 0,
+ src_undef(),
+ src_undef(),
+ src_undef());
+ break;
+
+ case FRAG_ATTRIB_PNTC:
+ /* XXX review/test this case */
+ emit_op(c,
+ WM_PINTERP,
+ dst_mask(dst, WRITEMASK_XY),
+ 0,
+ interp,
+ deltas,
+ get_pixel_w(c));
+
+ emit_op(c,
+ OPCODE_MOV,
+ dst_mask(dst, WRITEMASK_ZW),
+ 0,
+ src_swizzle(interp,
+ SWIZZLE_ZERO,
+ SWIZZLE_ZERO,
+ SWIZZLE_ZERO,
+ SWIZZLE_ONE),
+ src_undef(),
+ src_undef());
+ break;
+
+ default:
+ emit_op(c,
+ WM_PINTERP,
+ dst,
+ 0,
+ interp,
+ deltas,
+ get_pixel_w(c));
+ break;
+ }
+
+ c->fp_interp_emitted |= 1<<idx;
+}
+
+/***********************************************************************
+ * Hacks to extend the program parameter and constant lists.
+ */
+
+/* Add the fog parameters to the parameter list of the original
+ * program, rather than creating a new list. Doesn't really do any
+ * harm and it's not as if the parameter handling isn't a big hack
+ * anyway.
+ */
+static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
+ GLint s0,
+ GLint s1,
+ GLint s2,
+ GLint s3,
+ GLint s4)
+{
+ struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
+ gl_state_index tokens[STATE_LENGTH];
+ GLuint idx;
+ tokens[0] = s0;
+ tokens[1] = s1;
+ tokens[2] = s2;
+ tokens[3] = s3;
+ tokens[4] = s4;
+
+ for (idx = 0; idx < paramList->NumParameters; idx++) {
+ if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
+ memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
+ return src_reg(PROGRAM_STATE_VAR, idx);
+ }
+
+ idx = _mesa_add_state_reference( paramList, tokens );
+
+ return src_reg(PROGRAM_STATE_VAR, idx);
+}
+
+
+static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
+ GLfloat s0,
+ GLfloat s1,
+ GLfloat s2,
+ GLfloat s3)
+{
+ struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
+ GLfloat values[4];
+ GLuint idx;
+ GLuint swizzle;
+
+ values[0] = s0;
+ values[1] = s1;
+ values[2] = s2;
+ values[3] = s3;
+
+ /* Have to search, otherwise multiple compilations will each grow
+ * the parameter list.
+ */
+ for (idx = 0; idx < paramList->NumParameters; idx++) {
+ if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT &&
+ memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0)
+
+ /* XXX: this mimics the mesa bug which puts all constants and
+ * parameters into the "PROGRAM_STATE_VAR" category:
+ */
+ return src_reg(PROGRAM_STATE_VAR, idx);
+ }
+
+ idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
+ assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */
+ return src_reg(PROGRAM_STATE_VAR, idx);
+}
+
+
+
+/***********************************************************************
+ * Expand various instructions here to simpler forms.
+ */
+static void precalc_dst( struct brw_wm_compile *c,
+ const struct prog_instruction *inst )
+{
+ struct prog_src_register src0 = inst->SrcReg[0];
+ struct prog_src_register src1 = inst->SrcReg[1];
+ struct prog_dst_register dst = inst->DstReg;
+
+ if (dst.WriteMask & WRITEMASK_Y) {
+ /* dst.y = mul src0.y, src1.y
+ */
+ emit_op(c,
+ OPCODE_MUL,
+ dst_mask(dst, WRITEMASK_Y),
+ inst->SaturateMode,
+ src0,
+ src1,
+ src_undef());
+ }
+
+ if (dst.WriteMask & WRITEMASK_XZ) {
+ struct prog_instruction *swz;
+ GLuint z = GET_SWZ(src0.Swizzle, Z);
+
+ /* dst.xz = swz src0.1zzz
+ */
+ swz = emit_op(c,
+ OPCODE_SWZ,
+ dst_mask(dst, WRITEMASK_XZ),
+ inst->SaturateMode,
+ src_swizzle(src0, SWIZZLE_ONE, z, z, z),
+ src_undef(),
+ src_undef());
+ /* Avoid letting negation flag of src0 affect our 1 constant. */
+ swz->SrcReg[0].Negate &= ~NEGATE_X;
+ }
+ if (dst.WriteMask & WRITEMASK_W) {
+ /* dst.w = mov src1.w
+ */
+ emit_op(c,
+ OPCODE_MOV,
+ dst_mask(dst, WRITEMASK_W),
+ inst->SaturateMode,
+ src1,
+ src_undef(),
+ src_undef());
+ }
+}
+
+
+static void precalc_lit( struct brw_wm_compile *c,
+ const struct prog_instruction *inst )
+{
+ struct prog_src_register src0 = inst->SrcReg[0];
+ struct prog_dst_register dst = inst->DstReg;
+
+ if (dst.WriteMask & WRITEMASK_XW) {
+ struct prog_instruction *swz;
+
+ /* dst.xw = swz src0.1111
+ */
+ swz = emit_op(c,
+ OPCODE_SWZ,
+ dst_mask(dst, WRITEMASK_XW),
+ 0,
+ src_swizzle1(src0, SWIZZLE_ONE),
+ src_undef(),
+ src_undef());
+ /* Avoid letting the negation flag of src0 affect our 1 constant. */
+ swz->SrcReg[0].Negate = NEGATE_NONE;
+ }
+
+ if (dst.WriteMask & WRITEMASK_YZ) {
+ emit_op(c,
+ OPCODE_LIT,
+ dst_mask(dst, WRITEMASK_YZ),
+ inst->SaturateMode,
+ src0,
+ src_undef(),
+ src_undef());
+ }
+}
+
+
+/**
+ * Some TEX instructions require extra code, cube map coordinate
+ * normalization, or coordinate scaling for RECT textures, etc.
+ * This function emits those extra instructions and the TEX
+ * instruction itself.
+ */
+static void precalc_tex( struct brw_wm_compile *c,
+ const struct prog_instruction *inst )
+{
+ struct prog_src_register coord;
+ struct prog_dst_register tmpcoord;
+ const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
+
+ assert(unit < BRW_MAX_TEX_UNIT);
+
+ if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
+ struct prog_instruction *out;
+ struct prog_dst_register tmp0 = get_temp(c);
+ struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
+ struct prog_dst_register tmp1 = get_temp(c);
+ struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
+ struct prog_src_register src0 = inst->SrcReg[0];
+
+ /* find longest component of coord vector and normalize it */
+ tmpcoord = get_temp(c);
+ coord = src_reg_from_dst(tmpcoord);
+
+ /* tmpcoord = src0 (i.e.: coord = src0) */
+ out = emit_op(c, OPCODE_MOV,
+ tmpcoord,
+ 0,
+ src0,
+ src_undef(),
+ src_undef());
+ out->SrcReg[0].Negate = NEGATE_NONE;
+ out->SrcReg[0].Abs = 1;
+
+ /* tmp0 = MAX(coord.X, coord.Y) */
+ emit_op(c, OPCODE_MAX,
+ tmp0,
+ 0,
+ src_swizzle1(coord, X),
+ src_swizzle1(coord, Y),
+ src_undef());
+
+ /* tmp1 = MAX(tmp0, coord.Z) */
+ emit_op(c, OPCODE_MAX,
+ tmp1,
+ 0,
+ tmp0src,
+ src_swizzle1(coord, Z),
+ src_undef());
+
+ /* tmp0 = 1 / tmp1 */
+ emit_op(c, OPCODE_RCP,
+ dst_mask(tmp0, WRITEMASK_X),
+ 0,
+ tmp1src,
+ src_undef(),
+ src_undef());
+
+ /* tmpCoord = src0 * tmp0 */
+ emit_op(c, OPCODE_MUL,
+ tmpcoord,
+ 0,
+ src0,
+ src_swizzle1(tmp0src, SWIZZLE_X),
+ src_undef());
+
+ release_temp(c, tmp0);
+ release_temp(c, tmp1);
+ }
+ else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
+ struct prog_src_register scale =
+ search_or_add_param5( c,
+ STATE_INTERNAL,
+ STATE_TEXRECT_SCALE,
+ unit,
+ 0,0 );
+
+ tmpcoord = get_temp(c);
+
+ /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height }
+ */
+ emit_op(c,
+ OPCODE_MUL,
+ tmpcoord,
+ 0,
+ inst->SrcReg[0],
+ src_swizzle(scale,
+ SWIZZLE_X,
+ SWIZZLE_Y,
+ SWIZZLE_ONE,
+ SWIZZLE_ONE),
+ src_undef());
+
+ coord = src_reg_from_dst(tmpcoord);
+ }
+ else {
+ coord = inst->SrcReg[0];
+ }
+
+ /* Need to emit YUV texture conversions by hand. Probably need to
+ * do this here - the alternative is in brw_wm_emit.c, but the
+ * conversion requires allocating a temporary variable which we
+ * don't have the facility to do that late in the compilation.
+ */
+ if (c->key.yuvtex_mask & (1 << unit)) {
+ /* convert ycbcr to RGBA */
+ GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
+
+ /*
+ CONST C0 = { -.5, -.0625, -.5, 1.164 }
+ CONST C1 = { 1.596, -0.813, 2.018, -.391 }
+ UYV = TEX ...
+ UYV.xyz = ADD UYV, C0
+ UYV.y = MUL UYV.y, C0.w
+ if (UV swaped)
+ RGB.xyz = MAD UYV.zzx, C1, UYV.y
+ else
+ RGB.xyz = MAD UYV.xxz, C1, UYV.y
+ RGB.y = MAD UYV.z, C1.w, RGB.y
+ */
+ struct prog_dst_register dst = inst->DstReg;
+ struct prog_dst_register tmp = get_temp(c);
+ struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
+ struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 );
+ struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
+
+ /* tmp = TEX ...
+ */
+ emit_tex_op(c,
+ OPCODE_TEX,
+ tmp,
+ inst->SaturateMode,
+ unit,
+ inst->TexSrcTarget,
+ inst->TexShadow,
+ coord,
+ src_undef(),
+ src_undef());
+
+ /* tmp.xyz = ADD TMP, C0
+ */
+ emit_op(c,
+ OPCODE_ADD,
+ dst_mask(tmp, WRITEMASK_XYZ),
+ 0,
+ tmpsrc,
+ C0,
+ src_undef());
+
+ /* YUV.y = MUL YUV.y, C0.w
+ */
+
+ emit_op(c,
+ OPCODE_MUL,
+ dst_mask(tmp, WRITEMASK_Y),
+ 0,
+ tmpsrc,
+ src_swizzle1(C0, W),
+ src_undef());
+
+ /*
+ * if (UV swaped)
+ * RGB.xyz = MAD YUV.zzx, C1, YUV.y
+ * else
+ * RGB.xyz = MAD YUV.xxz, C1, YUV.y
+ */
+
+ emit_op(c,
+ OPCODE_MAD,
+ dst_mask(dst, WRITEMASK_XYZ),
+ 0,
+ swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
+ C1,
+ src_swizzle1(tmpsrc, Y));
+
+ /* RGB.y = MAD YUV.z, C1.w, RGB.y
+ */
+ emit_op(c,
+ OPCODE_MAD,
+ dst_mask(dst, WRITEMASK_Y),
+ 0,
+ src_swizzle1(tmpsrc, Z),
+ src_swizzle1(C1, W),
+ src_swizzle1(src_reg_from_dst(dst), Y));
+
+ release_temp(c, tmp);
+ }
+ else {
+ /* ordinary RGBA tex instruction */
+ emit_tex_op(c,
+ OPCODE_TEX,
+ inst->DstReg,
+ inst->SaturateMode,
+ unit,
+ inst->TexSrcTarget,
+ inst->TexShadow,
+ coord,
+ src_undef(),
+ src_undef());
+ }
+
+ /* For GL_EXT_texture_swizzle: */
+ if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
+ /* swizzle the result of the TEX instruction */
+ struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
+ emit_op(c, OPCODE_SWZ,
+ inst->DstReg,
+ SATURATE_OFF, /* saturate already done above */
+ src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
+ src_undef(),
+ src_undef());
+ }
+
+ if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
+ (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
+ release_temp(c, tmpcoord);
+}
+
+
+/**
+ * Check if the given TXP instruction really needs the divide-by-W step.
+ */
+static GLboolean projtex( struct brw_wm_compile *c,
+ const struct prog_instruction *inst )
+{
+ const struct prog_src_register src = inst->SrcReg[0];
+ GLboolean retVal;
+
+ assert(inst->Opcode == OPCODE_TXP);
+
+ /* Only try to detect the simplest cases. Could detect (later)
+ * cases where we are trying to emit code like RCP {1.0}, MUL x,
+ * {1.0}, and so on.
+ *
+ * More complex cases than this typically only arise from
+ * user-provided fragment programs anyway:
+ */
+ if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
+ retVal = GL_FALSE; /* ut2004 gun rendering !?! */
+ else if (src.File == PROGRAM_INPUT &&
+ GET_SWZ(src.Swizzle, W) == W &&
+ (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
+ retVal = GL_FALSE;
+ else
+ retVal = GL_TRUE;
+
+ return retVal;
+}
+
+
+/**
+ * Emit code for TXP.
+ */
+static void precalc_txp( struct brw_wm_compile *c,
+ const struct prog_instruction *inst )
+{
+ struct prog_src_register src0 = inst->SrcReg[0];
+
+ if (projtex(c, inst)) {
+ struct prog_dst_register tmp = get_temp(c);
+ struct prog_instruction tmp_inst;
+
+ /* tmp0.w = RCP inst.arg[0][3]
+ */
+ emit_op(c,
+ OPCODE_RCP,
+ dst_mask(tmp, WRITEMASK_W),
+ 0,
+ src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
+ src_undef(),
+ src_undef());
+
+ /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
+ */
+ emit_op(c,
+ OPCODE_MUL,
+ dst_mask(tmp, WRITEMASK_XYZ),
+ 0,
+ src0,
+ src_swizzle1(src_reg_from_dst(tmp), W),
+ src_undef());
+
+ /* dst = precalc(TEX tmp0)
+ */
+ tmp_inst = *inst;
+ tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
+ precalc_tex(c, &tmp_inst);
+
+ release_temp(c, tmp);
+ }
+ else
+ {
+ /* dst = precalc(TEX src0)
+ */
+ precalc_tex(c, inst);
+ }
+}
+
+
+
+static void emit_fb_write( struct brw_wm_compile *c )
+{
+ struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
+ struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
+ struct prog_src_register outcolor;
+ GLuint i;
+
+ struct prog_instruction *inst, *last_inst;
+ struct brw_context *brw = c->func.brw;
+
+ /* The inst->Aux field is used for FB write target and the EOT marker */
+
+ if (brw->state.nr_color_regions > 1) {
+ for (i = 0 ; i < brw->state.nr_color_regions; i++) {
+ outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
+ last_inst = inst = emit_op(c,
+ WM_FB_WRITE, dst_mask(dst_undef(),0), 0,
+ outcolor, payload_r0_depth, outdepth);
+ inst->Aux = (i<<1);
+ if (c->fp_fragcolor_emitted) {
+ outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
+ last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
+ 0, outcolor, payload_r0_depth, outdepth);
+ inst->Aux = (i<<1);
+ }
+ }
+ last_inst->Aux |= 1; //eot
+ }
+ else {
+ /* if gl_FragData[0] is written, use it, else use gl_FragColor */
+ if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0))
+ outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
+ else
+ outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
+
+ inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
+ 0, outcolor, payload_r0_depth, outdepth);
+ inst->Aux = 1|(0<<1);
+ }
+}
+
+
+
+
+/***********************************************************************
+ * Emit INTERP instructions ahead of first use of each attrib.
+ */
+
+static void validate_src_regs( struct brw_wm_compile *c,
+ const struct prog_instruction *inst )
+{
+ GLuint nr_args = brw_wm_nr_args( inst->Opcode );
+ GLuint i;
+
+ for (i = 0; i < nr_args; i++) {
+ if (inst->SrcReg[i].File == PROGRAM_INPUT) {
+ GLuint idx = inst->SrcReg[i].Index;
+ if (!(c->fp_interp_emitted & (1<<idx))) {
+ emit_interp(c, idx);
+ }
+ }
+ }
+}
+
+static void validate_dst_regs( struct brw_wm_compile *c,
+ const struct prog_instruction *inst )
+{
+ if (inst->DstReg.File == PROGRAM_OUTPUT) {
+ GLuint idx = inst->DstReg.Index;
+ if (idx == FRAG_RESULT_COLOR)
+ c->fp_fragcolor_emitted = 1;
+ }
+}
+
+static void print_insns( const struct prog_instruction *insn,
+ GLuint nr )
+{
+ GLuint i;
+ for (i = 0; i < nr; i++, insn++) {
+ _mesa_printf("%3d: ", i);
+ if (insn->Opcode < MAX_OPCODE)
+ _mesa_print_instruction(insn);
+ else if (insn->Opcode < MAX_WM_OPCODE) {
+ GLuint idx = insn->Opcode - MAX_OPCODE;
+
+ _mesa_print_alu_instruction(insn,
+ wm_opcode_strings[idx],
+ 3);
+ }
+ else
+ _mesa_printf("965 Opcode %d\n", insn->Opcode);
+ }
+}
+
+
+/**
+ * Initial pass for fragment program code generation.
+ * This function is used by both the GLSL and non-GLSL paths.
+ */
+void brw_wm_pass_fp( struct brw_wm_compile *c )
+{
+ struct brw_fragment_program *fp = c->fp;
+ GLuint insn;
+
+ if (INTEL_DEBUG & DEBUG_WM) {
+ _mesa_printf("pre-fp:\n");
+ _mesa_print_program(&fp->program.Base);
+ _mesa_printf("\n");
+ }
+
+ c->pixel_xy = src_undef();
+ c->delta_xy = src_undef();
+ c->pixel_w = src_undef();
+ c->nr_fp_insns = 0;
+ c->fp->tex_units_used = 0x0;
+
+ /* Emit preamble instructions. This is where special instructions such as
+ * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
+ * compute shader inputs from varying vars.
+ */
+ for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
+ const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
+ validate_src_regs(c, inst);
+ validate_dst_regs(c, inst);
+ }
+
+ /* Loop over all instructions doing assorted simplifications and
+ * transformations.
+ */
+ for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
+ const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
+ struct prog_instruction *out;
+
+ /* Check for INPUT values, emit INTERP instructions where
+ * necessary:
+ */
+
+ switch (inst->Opcode) {
+ case OPCODE_SWZ:
+ out = emit_insn(c, inst);
+ out->Opcode = OPCODE_MOV;
+ break;
+
+ case OPCODE_ABS:
+ out = emit_insn(c, inst);
+ out->Opcode = OPCODE_MOV;
+ out->SrcReg[0].Negate = NEGATE_NONE;
+ out->SrcReg[0].Abs = 1;
+ break;
+
+ case OPCODE_SUB:
+ out = emit_insn(c, inst);
+ out->Opcode = OPCODE_ADD;
+ out->SrcReg[1].Negate ^= NEGATE_XYZW;
+ break;
+
+ case OPCODE_SCS:
+ out = emit_insn(c, inst);
+ /* This should probably be done in the parser.
+ */
+ out->DstReg.WriteMask &= WRITEMASK_XY;
+ break;
+
+ case OPCODE_DST:
+ precalc_dst(c, inst);
+ break;
+
+ case OPCODE_LIT:
+ precalc_lit(c, inst);
+ break;
+
+ case OPCODE_TEX:
+ precalc_tex(c, inst);
+ break;
+
+ case OPCODE_TXP:
+ precalc_txp(c, inst);
+ break;
+
+ case OPCODE_TXB:
+ out = emit_insn(c, inst);
+ out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
+ assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
+ break;
+
+ case OPCODE_XPD:
+ out = emit_insn(c, inst);
+ /* This should probably be done in the parser.
+ */
+ out->DstReg.WriteMask &= WRITEMASK_XYZ;
+ break;
+
+ case OPCODE_KIL:
+ out = emit_insn(c, inst);
+ /* This should probably be done in the parser.
+ */
+ out->DstReg.WriteMask = 0;
+ break;
+ case OPCODE_END:
+ emit_fb_write(c);
+ break;
+ case OPCODE_PRINT:
+ break;
+ default:
+ if (brw_wm_is_scalar_result(inst->Opcode))
+ emit_scalar_insn(c, inst);
+ else
+ emit_insn(c, inst);
+ break;
+ }
+ }
+
+ if (INTEL_DEBUG & DEBUG_WM) {
+ _mesa_printf("pass_fp:\n");
+ print_insns( c->prog_instructions, c->nr_fp_insns );
+ _mesa_printf("\n");
+ }
+}
+
diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c
new file mode 100644
index 00000000000..c9fe1dd8ad2
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_glsl.c
@@ -0,0 +1,3046 @@
+#include "main/macros.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "shader/prog_optimize.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+
+enum _subroutine {
+ SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4
+};
+
+static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
+ const struct prog_instruction *inst,
+ GLuint component);
+
+/**
+ * Determine if the given fragment program uses GLSL features such
+ * as flow conditionals, loops, subroutines.
+ * Some GLSL shaders may use these features, others might not.
+ */
+GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
+{
+ int i;
+
+ for (i = 0; i < fp->Base.NumInstructions; i++) {
+ const struct prog_instruction *inst = &fp->Base.Instructions[i];
+ switch (inst->Opcode) {
+ case OPCODE_ARL:
+ case OPCODE_IF:
+ case OPCODE_ENDIF:
+ case OPCODE_CAL:
+ case OPCODE_BRK:
+ case OPCODE_RET:
+ case OPCODE_NOISE1:
+ case OPCODE_NOISE2:
+ case OPCODE_NOISE3:
+ case OPCODE_NOISE4:
+ case OPCODE_BGNLOOP:
+ return GL_TRUE;
+ default:
+ break;
+ }
+ }
+ return GL_FALSE;
+}
+
+
+
+static void
+reclaim_temps(struct brw_wm_compile *c);
+
+
+/** Mark GRF register as used. */
+static void
+prealloc_grf(struct brw_wm_compile *c, int r)
+{
+ c->used_grf[r] = GL_TRUE;
+}
+
+
+/** Mark given GRF register as not in use. */
+static void
+release_grf(struct brw_wm_compile *c, int r)
+{
+ /*assert(c->used_grf[r]);*/
+ c->used_grf[r] = GL_FALSE;
+ c->first_free_grf = MIN2(c->first_free_grf, r);
+}
+
+
+/** Return index of a free GRF, mark it as used. */
+static int
+alloc_grf(struct brw_wm_compile *c)
+{
+ GLuint r;
+ for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
+ if (!c->used_grf[r]) {
+ c->used_grf[r] = GL_TRUE;
+ c->first_free_grf = r + 1; /* a guess */
+ return r;
+ }
+ }
+
+ /* no free temps, try to reclaim some */
+ reclaim_temps(c);
+ c->first_free_grf = 0;
+
+ /* try alloc again */
+ for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
+ if (!c->used_grf[r]) {
+ c->used_grf[r] = GL_TRUE;
+ c->first_free_grf = r + 1; /* a guess */
+ return r;
+ }
+ }
+
+ for (r = 0; r < BRW_WM_MAX_GRF; r++) {
+ assert(c->used_grf[r]);
+ }
+
+ /* really, no free GRF regs found */
+ if (!c->out_of_regs) {
+ /* print warning once per compilation */
+ _mesa_warning(NULL, "i965: ran out of registers for fragment program");
+ c->out_of_regs = GL_TRUE;
+ }
+
+ return -1;
+}
+
+
+/** Return number of GRF registers used */
+static int
+num_grf_used(const struct brw_wm_compile *c)
+{
+ int r;
+ for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--)
+ if (c->used_grf[r])
+ return r + 1;
+ return 0;
+}
+
+
+
+/**
+ * Record the mapping of a Mesa register to a hardware register.
+ */
+static void set_reg(struct brw_wm_compile *c, int file, int index,
+ int component, struct brw_reg reg)
+{
+ c->wm_regs[file][index][component].reg = reg;
+ c->wm_regs[file][index][component].inited = GL_TRUE;
+}
+
+static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
+{
+ struct brw_reg reg;
+
+ /* if we need to allocate another temp, grow the tmp_regs[] array */
+ if (c->tmp_index == c->tmp_max) {
+ int r = alloc_grf(c);
+ if (r < 0) {
+ /*printf("Out of temps in %s\n", __FUNCTION__);*/
+ r = 50; /* XXX random register! */
+ }
+ c->tmp_regs[ c->tmp_max++ ] = r;
+ }
+
+ /* form the GRF register */
+ reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0);
+ /*printf("alloc_temp %d\n", reg.nr);*/
+ assert(reg.nr < BRW_WM_MAX_GRF);
+ return reg;
+
+}
+
+/**
+ * Save current temp register info.
+ * There must be a matching call to release_tmps().
+ */
+static int mark_tmps(struct brw_wm_compile *c)
+{
+ return c->tmp_index;
+}
+
+static struct brw_reg lookup_tmp( struct brw_wm_compile *c, int index )
+{
+ return brw_vec8_grf( c->tmp_regs[ index ], 0 );
+}
+
+static void release_tmps(struct brw_wm_compile *c, int mark)
+{
+ c->tmp_index = mark;
+}
+
+/**
+ * Convert Mesa src register to brw register.
+ *
+ * Since we're running in SOA mode each Mesa register corresponds to four
+ * hardware registers. We allocate the hardware registers as needed here.
+ *
+ * \param file register file, one of PROGRAM_x
+ * \param index register number
+ * \param component src component (X=0, Y=1, Z=2, W=3)
+ * \param nr not used?!?
+ * \param neg negate value?
+ * \param abs take absolute value?
+ */
+static struct brw_reg
+get_reg(struct brw_wm_compile *c, int file, int index, int component,
+ int nr, GLuint neg, GLuint abs)
+{
+ struct brw_reg reg;
+ switch (file) {
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_CONSTANT:
+ case PROGRAM_UNIFORM:
+ file = PROGRAM_STATE_VAR;
+ break;
+ case PROGRAM_UNDEFINED:
+ return brw_null_reg();
+ case PROGRAM_TEMPORARY:
+ case PROGRAM_INPUT:
+ case PROGRAM_OUTPUT:
+ case PROGRAM_PAYLOAD:
+ break;
+ default:
+ _mesa_problem(NULL, "Unexpected file in get_reg()");
+ return brw_null_reg();
+ }
+
+ assert(index < 256);
+ assert(component < 4);
+
+ /* see if we've already allocated a HW register for this Mesa register */
+ if (c->wm_regs[file][index][component].inited) {
+ /* yes, re-use */
+ reg = c->wm_regs[file][index][component].reg;
+ }
+ else {
+ /* no, allocate new register */
+ int grf = alloc_grf(c);
+ /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/
+ if (grf < 0) {
+ /* totally out of temps */
+ grf = 51; /* XXX random register! */
+ }
+
+ reg = brw_vec8_grf(grf, 0);
+ /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/
+
+ set_reg(c, file, index, component, reg);
+ }
+
+ if (neg & (1 << component)) {
+ reg = negate(reg);
+ }
+ if (abs)
+ reg = brw_abs(reg);
+ return reg;
+}
+
+
+
+/**
+ * This is called if we run out of GRF registers. Examine the live intervals
+ * of temp regs in the program and free those which won't be used again.
+ */
+static void
+reclaim_temps(struct brw_wm_compile *c)
+{
+ GLint intBegin[MAX_PROGRAM_TEMPS];
+ GLint intEnd[MAX_PROGRAM_TEMPS];
+ int index;
+
+ /*printf("Reclaim temps:\n");*/
+
+ _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns,
+ intBegin, intEnd);
+
+ for (index = 0; index < MAX_PROGRAM_TEMPS; index++) {
+ if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) {
+ /* program temp[i] can be freed */
+ int component;
+ /*printf(" temp[%d] is dead\n", index);*/
+ for (component = 0; component < 4; component++) {
+ if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) {
+ int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr;
+ release_grf(c, r);
+ /*
+ printf(" Reclaim temp %d, reg %d at inst %d\n",
+ index, r, c->cur_inst);
+ */
+ c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE;
+ }
+ }
+ }
+ }
+}
+
+
+
+
+/**
+ * Preallocate registers. This sets up the Mesa to hardware register
+ * mapping for certain registers, such as constants (uniforms/state vars)
+ * and shader inputs.
+ */
+static void prealloc_reg(struct brw_wm_compile *c)
+{
+ int i, j;
+ struct brw_reg reg;
+ int urb_read_length = 0;
+ GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted;
+ GLuint reg_index = 0;
+
+ memset(c->used_grf, GL_FALSE, sizeof(c->used_grf));
+ c->first_free_grf = 0;
+
+ for (i = 0; i < 4; i++) {
+ if (i < c->key.nr_depth_regs)
+ reg = brw_vec8_grf(i * 2, 0);
+ else
+ reg = brw_vec8_grf(0, 0);
+ set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg);
+ }
+ reg_index += 2 * c->key.nr_depth_regs;
+
+ /* constants */
+ {
+ const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters;
+ const GLuint nr_temps = c->fp->program.Base.NumTemporaries;
+
+ /* use a real constant buffer, or just use a section of the GRF? */
+ /* XXX this heuristic may need adjustment... */
+ if ((nr_params + nr_temps) * 4 + reg_index > 80)
+ c->fp->use_const_buffer = GL_TRUE;
+ else
+ c->fp->use_const_buffer = GL_FALSE;
+ /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
+
+ if (c->fp->use_const_buffer) {
+ /* We'll use a real constant buffer and fetch constants from
+ * it with a dataport read message.
+ */
+
+ /* number of float constants in CURBE */
+ c->prog_data.nr_params = 0;
+ }
+ else {
+ const struct gl_program_parameter_list *plist =
+ c->fp->program.Base.Parameters;
+ int index = 0;
+
+ /* number of float constants in CURBE */
+ c->prog_data.nr_params = 4 * nr_params;
+
+ /* loop over program constants (float[4]) */
+ for (i = 0; i < nr_params; i++) {
+ /* loop over XYZW channels */
+ for (j = 0; j < 4; j++, index++) {
+ reg = brw_vec1_grf(reg_index + index / 8, index % 8);
+ /* Save pointer to parameter/constant value.
+ * Constants will be copied in prepare_constant_buffer()
+ */
+ c->prog_data.param[index] = &plist->ParameterValues[i][j];
+ set_reg(c, PROGRAM_STATE_VAR, i, j, reg);
+ }
+ }
+ /* number of constant regs used (each reg is float[8]) */
+ c->nr_creg = 2 * ((4 * nr_params + 15) / 16);
+ reg_index += c->nr_creg;
+ }
+ }
+
+ /* fragment shader inputs */
+ for (i = 0; i < VERT_RESULT_MAX; i++) {
+ int fp_input;
+
+ if (i >= VERT_RESULT_VAR0)
+ fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0;
+ else if (i <= VERT_RESULT_TEX7)
+ fp_input = i;
+ else
+ fp_input = -1;
+
+ if (fp_input >= 0 && inputs & (1 << fp_input)) {
+ urb_read_length = reg_index;
+ reg = brw_vec8_grf(reg_index, 0);
+ for (j = 0; j < 4; j++)
+ set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg);
+ }
+ if (c->key.vp_outputs_written & (1 << i)) {
+ reg_index += 2;
+ }
+ }
+
+ c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
+ c->prog_data.urb_read_length = urb_read_length;
+ c->prog_data.curb_read_length = c->nr_creg;
+ c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
+ reg_index++;
+ c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
+ reg_index += 2;
+
+ /* mark GRF regs [0..reg_index-1] as in-use */
+ for (i = 0; i < reg_index; i++)
+ prealloc_grf(c, i);
+
+ /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */
+ prealloc_grf(c, 126);
+ prealloc_grf(c, 127);
+
+ for (i = 0; i < c->nr_fp_insns; i++) {
+ const struct prog_instruction *inst = &c->prog_instructions[i];
+ struct brw_reg dst[4];
+
+ switch (inst->Opcode) {
+ case OPCODE_TEX:
+ case OPCODE_TXB:
+ /* Allocate the channels of texture results contiguously,
+ * since they are written out that way by the sampler unit.
+ */
+ for (j = 0; j < 4; j++) {
+ dst[j] = get_dst_reg(c, inst, j);
+ if (j != 0)
+ assert(dst[j].nr == dst[j - 1].nr + 1);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* An instruction may reference up to three constants.
+ * They'll be found in these registers.
+ * XXX alloc these on demand!
+ */
+ if (c->fp->use_const_buffer) {
+ for (i = 0; i < 3; i++) {
+ c->current_const[i].index = -1;
+ c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0);
+ }
+ }
+#if 0
+ printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer);
+ printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index);
+#endif
+}
+
+
+/**
+ * Check if any of the instruction's src registers are constants, uniforms,
+ * or statevars. If so, fetch any constants that we don't already have in
+ * the three GRF slots.
+ */
+static void fetch_constants(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ GLuint i;
+
+ /* loop over instruction src regs */
+ for (i = 0; i < 3; i++) {
+ const struct prog_src_register *src = &inst->SrcReg[i];
+ if (src->File == PROGRAM_STATE_VAR ||
+ src->File == PROGRAM_CONSTANT ||
+ src->File == PROGRAM_UNIFORM) {
+ c->current_const[i].index = src->Index;
+
+#if 0
+ printf(" fetch const[%d] for arg %d into reg %d\n",
+ src->Index, i, c->current_const[i].reg.nr);
+#endif
+
+ /* need to fetch the constant now */
+ brw_dp_READ_4(p,
+ c->current_const[i].reg, /* writeback dest */
+ src->RelAddr, /* relative indexing? */
+ 16 * src->Index, /* byte offset */
+ SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */
+ );
+ }
+ }
+}
+
+
+/**
+ * Convert Mesa dst register to brw register.
+ */
+static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
+ const struct prog_instruction *inst,
+ GLuint component)
+{
+ const int nr = 1;
+ return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr,
+ 0, 0);
+}
+
+
+static struct brw_reg
+get_src_reg_const(struct brw_wm_compile *c,
+ const struct prog_instruction *inst,
+ GLuint srcRegIndex, GLuint component)
+{
+ /* We should have already fetched the constant from the constant
+ * buffer in fetch_constants(). Now we just have to return a
+ * register description that extracts the needed component and
+ * smears it across all eight vector components.
+ */
+ const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
+ struct brw_reg const_reg;
+
+ assert(component < 4);
+ assert(srcRegIndex < 3);
+ assert(c->current_const[srcRegIndex].index != -1);
+ const_reg = c->current_const[srcRegIndex].reg;
+
+ /* extract desired float from the const_reg, and smear */
+ const_reg = stride(const_reg, 0, 1, 0);
+ const_reg.subnr = component * 4;
+
+ if (src->Negate & (1 << component))
+ const_reg = negate(const_reg);
+ if (src->Abs)
+ const_reg = brw_abs(const_reg);
+
+#if 0
+ printf(" form const[%d].%d for arg %d, reg %d\n",
+ c->current_const[srcRegIndex].index,
+ component,
+ srcRegIndex,
+ const_reg.nr);
+#endif
+
+ return const_reg;
+}
+
+
+/**
+ * Convert Mesa src register to brw register.
+ */
+static struct brw_reg get_src_reg(struct brw_wm_compile *c,
+ const struct prog_instruction *inst,
+ GLuint srcRegIndex, GLuint channel)
+{
+ const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
+ const GLuint nr = 1;
+ const GLuint component = GET_SWZ(src->Swizzle, channel);
+
+ /* Extended swizzle terms */
+ if (component == SWIZZLE_ZERO) {
+ return brw_imm_f(0.0F);
+ }
+ else if (component == SWIZZLE_ONE) {
+ return brw_imm_f(1.0F);
+ }
+
+ if (c->fp->use_const_buffer &&
+ (src->File == PROGRAM_STATE_VAR ||
+ src->File == PROGRAM_CONSTANT ||
+ src->File == PROGRAM_UNIFORM)) {
+ return get_src_reg_const(c, inst, srcRegIndex, component);
+ }
+ else {
+ /* other type of source register */
+ return get_reg(c, src->File, src->Index, component, nr,
+ src->Negate, src->Abs);
+ }
+}
+
+
+/**
+ * Same as \sa get_src_reg() but if the register is a literal, emit
+ * a brw_reg encoding the literal.
+ * Note that a brw instruction only allows one src operand to be a literal.
+ * For instructions with more than one operand, only the second can be a
+ * literal. This means that we treat some literals as constants/uniforms
+ * (which why PROGRAM_CONSTANT is checked in fetch_constants()).
+ *
+ */
+static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c,
+ const struct prog_instruction *inst,
+ GLuint srcRegIndex, GLuint channel)
+{
+ const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
+ if (src->File == PROGRAM_CONSTANT) {
+ /* a literal */
+ const int component = GET_SWZ(src->Swizzle, channel);
+ const GLfloat *param =
+ c->fp->program.Base.Parameters->ParameterValues[src->Index];
+ GLfloat value = param[component];
+ if (src->Negate & (1 << channel))
+ value = -value;
+ if (src->Abs)
+ value = FABSF(value);
+#if 0
+ printf(" form immed value %f for chan %d\n", value, channel);
+#endif
+ return brw_imm_f(value);
+ }
+ else {
+ return get_src_reg(c, inst, srcRegIndex, channel);
+ }
+}
+
+
+/**
+ * Subroutines are minimal support for resusable instruction sequences.
+ * They are implemented as simply as possible to minimise overhead: there
+ * is no explicit support for communication between the caller and callee
+ * other than saving the return address in a temporary register, nor is
+ * there any automatic local storage. This implies that great care is
+ * required before attempting reentrancy or any kind of nested
+ * subroutine invocations.
+ */
+static void invoke_subroutine( struct brw_wm_compile *c,
+ enum _subroutine subroutine,
+ void (*emit)( struct brw_wm_compile * ) )
+{
+ struct brw_compile *p = &c->func;
+
+ assert( subroutine < BRW_WM_MAX_SUBROUTINE );
+
+ if( c->subroutines[ subroutine ] ) {
+ /* subroutine previously emitted: reuse existing instructions */
+
+ int mark = mark_tmps( c );
+ struct brw_reg return_address = retype( alloc_tmp( c ),
+ BRW_REGISTER_TYPE_UD );
+ int here = p->nr_insn;
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 2 << 4 ) );
+
+ brw_ADD( p, brw_ip_reg(), brw_ip_reg(),
+ brw_imm_d( ( c->subroutines[ subroutine ] -
+ here - 1 ) << 4 ) );
+ brw_pop_insn_state(p);
+
+ release_tmps( c, mark );
+ } else {
+ /* previously unused subroutine: emit, and mark for later reuse */
+
+ int mark = mark_tmps( c );
+ struct brw_reg return_address = retype( alloc_tmp( c ),
+ BRW_REGISTER_TYPE_UD );
+ struct brw_instruction *calc;
+ int base = p->nr_insn;
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ calc = brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 0 ) );
+ brw_pop_insn_state(p);
+
+ c->subroutines[ subroutine ] = p->nr_insn;
+
+ emit( c );
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV( p, brw_ip_reg(), return_address );
+ brw_pop_insn_state(p);
+
+ brw_set_src1( calc, brw_imm_ud( ( p->nr_insn - base ) << 4 ) );
+
+ release_tmps( c, mark );
+ }
+}
+
+static void emit_trunc( struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ int i;
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ struct brw_reg src, dst;
+ dst = get_dst_reg(c, inst, i);
+ src = get_src_reg(c, inst, 0, i);
+ brw_RNDZ(p, dst, src);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+static void emit_mov( struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ int i;
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ struct brw_reg src, dst;
+ dst = get_dst_reg(c, inst, i);
+ /* XXX some moves from immediate value don't work reliably!!! */
+ /*src = get_src_reg_imm(c, inst, 0, i);*/
+ src = get_src_reg(c, inst, 0, i);
+ brw_MOV(p, dst, src);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+static void emit_pixel_xy(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_reg r1 = brw_vec1_grf(1, 0);
+ struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
+
+ struct brw_reg dst0, dst1;
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+
+ dst0 = get_dst_reg(c, inst, 0);
+ dst1 = get_dst_reg(c, inst, 1);
+ /* Calculate pixel centers by adding 1 or 0 to each of the
+ * micro-tile coordinates passed in r1.
+ */
+ if (mask & WRITEMASK_X) {
+ brw_ADD(p,
+ vec8(retype(dst0, BRW_REGISTER_TYPE_UW)),
+ stride(suboffset(r1_uw, 4), 2, 4, 0),
+ brw_imm_v(0x10101010));
+ }
+
+ if (mask & WRITEMASK_Y) {
+ brw_ADD(p,
+ vec8(retype(dst1, BRW_REGISTER_TYPE_UW)),
+ stride(suboffset(r1_uw, 5), 2, 4, 0),
+ brw_imm_v(0x11001100));
+ }
+}
+
+static void emit_delta_xy(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_reg r1 = brw_vec1_grf(1, 0);
+ struct brw_reg dst0, dst1, src0, src1;
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+
+ dst0 = get_dst_reg(c, inst, 0);
+ dst1 = get_dst_reg(c, inst, 1);
+ src0 = get_src_reg(c, inst, 0, 0);
+ src1 = get_src_reg(c, inst, 0, 1);
+ /* Calc delta X,Y by subtracting origin in r1 from the pixel
+ * centers.
+ */
+ if (mask & WRITEMASK_X) {
+ brw_ADD(p,
+ dst0,
+ retype(src0, BRW_REGISTER_TYPE_UW),
+ negate(r1));
+ }
+
+ if (mask & WRITEMASK_Y) {
+ brw_ADD(p,
+ dst1,
+ retype(src1, BRW_REGISTER_TYPE_UW),
+ negate(suboffset(r1,1)));
+
+ }
+}
+
+static void fire_fb_write( struct brw_wm_compile *c,
+ GLuint base_reg,
+ GLuint nr,
+ GLuint target,
+ GLuint eot)
+{
+ struct brw_compile *p = &c->func;
+ /* Pass through control information:
+ */
+ /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
+ {
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
+ brw_MOV(p,
+ brw_message_reg(base_reg + 1),
+ brw_vec8_grf(1, 0));
+ brw_pop_insn_state(p);
+ }
+ /* Send framebuffer write message: */
+ brw_fb_WRITE(p,
+ retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
+ base_reg,
+ retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
+ target,
+ nr,
+ 0,
+ eot);
+}
+
+static void emit_fb_write(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ int nr = 2;
+ int channel;
+ GLuint target, eot;
+ struct brw_reg src0;
+
+ /* Reserve a space for AA - may not be needed:
+ */
+ if (c->key.aa_dest_stencil_reg)
+ nr += 1;
+
+ brw_push_insn_state(p);
+ for (channel = 0; channel < 4; channel++) {
+ src0 = get_src_reg(c, inst, 0, channel);
+ /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
+ /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
+ brw_MOV(p, brw_message_reg(nr + channel), src0);
+ }
+ /* skip over the regs populated above: */
+ nr += 8;
+ brw_pop_insn_state(p);
+
+ if (c->key.source_depth_to_render_target) {
+ if (c->key.computes_depth) {
+ src0 = get_src_reg(c, inst, 2, 2);
+ brw_MOV(p, brw_message_reg(nr), src0);
+ }
+ else {
+ src0 = get_src_reg(c, inst, 1, 1);
+ brw_MOV(p, brw_message_reg(nr), src0);
+ }
+
+ nr += 2;
+ }
+
+ if (c->key.dest_depth_reg) {
+ const GLuint comp = c->key.dest_depth_reg / 2;
+ const GLuint off = c->key.dest_depth_reg % 2;
+
+ if (off != 0) {
+ /* XXX this code needs review/testing */
+ struct brw_reg arg1_0 = get_src_reg(c, inst, 1, comp);
+ struct brw_reg arg1_1 = get_src_reg(c, inst, 1, comp+1);
+
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ brw_MOV(p, brw_message_reg(nr), offset(arg1_0, 1));
+ /* 2nd half? */
+ brw_MOV(p, brw_message_reg(nr+1), arg1_1);
+ brw_pop_insn_state(p);
+ }
+ else
+ {
+ struct brw_reg src = get_src_reg(c, inst, 1, 1);
+ brw_MOV(p, brw_message_reg(nr), src);
+ }
+ nr += 2;
+ }
+
+ target = inst->Aux >> 1;
+ eot = inst->Aux & 1;
+ fire_fb_write(c, 0, nr, target, eot);
+}
+
+static void emit_pixel_w( struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ if (mask & WRITEMASK_W) {
+ struct brw_reg dst, src0, delta0, delta1;
+ struct brw_reg interp3;
+
+ dst = get_dst_reg(c, inst, 3);
+ src0 = get_src_reg(c, inst, 0, 0);
+ delta0 = get_src_reg(c, inst, 1, 0);
+ delta1 = get_src_reg(c, inst, 1, 1);
+
+ interp3 = brw_vec1_grf(src0.nr+1, 4);
+ /* Calc 1/w - just linterp wpos[3] optimized by putting the
+ * result straight into a message reg.
+ */
+ brw_LINE(p, brw_null_reg(), interp3, delta0);
+ brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1);
+
+ /* Calc w */
+ brw_math_16( p, dst,
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 2, brw_null_reg(),
+ BRW_MATH_PRECISION_FULL);
+ }
+}
+
+static void emit_linterp(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ struct brw_reg interp[4];
+ struct brw_reg dst, delta0, delta1;
+ struct brw_reg src0;
+ GLuint nr, i;
+
+ src0 = get_src_reg(c, inst, 0, 0);
+ delta0 = get_src_reg(c, inst, 1, 0);
+ delta1 = get_src_reg(c, inst, 1, 1);
+ nr = src0.nr;
+
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ brw_LINE(p, brw_null_reg(), interp[i], delta0);
+ brw_MAC(p, dst, suboffset(interp[i],1), delta1);
+ }
+ }
+}
+
+static void emit_cinterp(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+
+ struct brw_reg interp[4];
+ struct brw_reg dst, src0;
+ GLuint nr, i;
+
+ src0 = get_src_reg(c, inst, 0, 0);
+ nr = src0.nr;
+
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ brw_MOV(p, dst, suboffset(interp[i],3));
+ }
+ }
+}
+
+static void emit_pinterp(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+
+ struct brw_reg interp[4];
+ struct brw_reg dst, delta0, delta1;
+ struct brw_reg src0, w;
+ GLuint nr, i;
+
+ src0 = get_src_reg(c, inst, 0, 0);
+ delta0 = get_src_reg(c, inst, 1, 0);
+ delta1 = get_src_reg(c, inst, 1, 1);
+ w = get_src_reg(c, inst, 2, 3);
+ nr = src0.nr;
+
+ interp[0] = brw_vec1_grf(nr, 0);
+ interp[1] = brw_vec1_grf(nr, 4);
+ interp[2] = brw_vec1_grf(nr+1, 0);
+ interp[3] = brw_vec1_grf(nr+1, 4);
+
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ brw_LINE(p, brw_null_reg(), interp[i], delta0);
+ brw_MAC(p, dst, suboffset(interp[i],1),
+ delta1);
+ brw_MUL(p, dst, dst, w);
+ }
+ }
+}
+
+/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
+static void emit_frontfacing(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
+ struct brw_reg dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ brw_MOV(p, dst, brw_imm_f(0.0));
+ }
+ }
+
+ /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
+ * us front face
+ */
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ brw_MOV(p, dst, brw_imm_f(1.0));
+ }
+ }
+ brw_set_predicate_control_flag_value(p, 0xff);
+}
+
+static void emit_xpd(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ int i;
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ for (i = 0; i < 4; i++) {
+ GLuint i2 = (i+2)%3;
+ GLuint i1 = (i+1)%3;
+ if (mask & (1<<i)) {
+ struct brw_reg src0, src1, dst;
+ dst = get_dst_reg(c, inst, i);
+ src0 = negate(get_src_reg(c, inst, 0, i2));
+ src1 = get_src_reg_imm(c, inst, 1, i1);
+ brw_MUL(p, brw_null_reg(), src0, src1);
+ src0 = get_src_reg(c, inst, 0, i1);
+ src1 = get_src_reg_imm(c, inst, 1, i2);
+ brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
+ brw_MAC(p, dst, src0, src1);
+ brw_set_saturate(p, 0);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+static void emit_dp3(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_reg src0[3], src1[3], dst;
+ int i;
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+ for (i = 0; i < 3; i++) {
+ src0[i] = get_src_reg(c, inst, 0, i);
+ src1[i] = get_src_reg_imm(c, inst, 1, i);
+ }
+
+ dst = get_dst_reg(c, inst, dst_chan);
+ brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+ brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_MAC(p, dst, src0[2], src1[2]);
+ brw_set_saturate(p, 0);
+}
+
+static void emit_dp4(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_reg src0[4], src1[4], dst;
+ int i;
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+ for (i = 0; i < 4; i++) {
+ src0[i] = get_src_reg(c, inst, 0, i);
+ src1[i] = get_src_reg_imm(c, inst, 1, i);
+ }
+ dst = get_dst_reg(c, inst, dst_chan);
+ brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+ brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+ brw_MAC(p, brw_null_reg(), src0[2], src1[2]);
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_MAC(p, dst, src0[3], src1[3]);
+ brw_set_saturate(p, 0);
+}
+
+static void emit_dph(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_reg src0[4], src1[4], dst;
+ int i;
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+ for (i = 0; i < 4; i++) {
+ src0[i] = get_src_reg(c, inst, 0, i);
+ src1[i] = get_src_reg_imm(c, inst, 1, i);
+ }
+ dst = get_dst_reg(c, inst, dst_chan);
+ brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+ brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+ brw_MAC(p, dst, src0[2], src1[2]);
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_ADD(p, dst, dst, src1[3]);
+ brw_set_saturate(p, 0);
+}
+
+/**
+ * Emit a scalar instruction, like RCP, RSQ, LOG, EXP.
+ * Note that the result of the function is smeared across the dest
+ * register's X, Y, Z and W channels (subject to writemasking of course).
+ */
+static void emit_math1(struct brw_wm_compile *c,
+ const struct prog_instruction *inst, GLuint func)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+ /* Get first component of source register */
+ dst = get_dst_reg(c, inst, dst_chan);
+ src0 = get_src_reg(c, inst, 0, 0);
+
+ brw_MOV(p, brw_message_reg(2), src0);
+ brw_math(p,
+ dst,
+ func,
+ (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+ 2,
+ brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+}
+
+static void emit_rcp(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ emit_math1(c, inst, BRW_MATH_FUNCTION_INV);
+}
+
+static void emit_rsq(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ);
+}
+
+static void emit_sin(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ emit_math1(c, inst, BRW_MATH_FUNCTION_SIN);
+}
+
+static void emit_cos(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ emit_math1(c, inst, BRW_MATH_FUNCTION_COS);
+}
+
+static void emit_ex2(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ emit_math1(c, inst, BRW_MATH_FUNCTION_EXP);
+}
+
+static void emit_lg2(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ emit_math1(c, inst, BRW_MATH_FUNCTION_LOG);
+}
+
+static void emit_add(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, src1, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, inst, 0, i);
+ src1 = get_src_reg_imm(c, inst, 1, i);
+ brw_ADD(p, dst, src0, src1);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+static void emit_arl(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, addr_reg;
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_ADDRESS, 0);
+ src0 = get_src_reg(c, inst, 0, 0); /* channel 0 */
+ brw_MOV(p, addr_reg, src0);
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_mul(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, src1, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, inst, 0, i);
+ src1 = get_src_reg_imm(c, inst, 1, i);
+ brw_MUL(p, dst, src0, src1);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+static void emit_frc(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg_imm(c, inst, 0, i);
+ brw_FRC(p, dst, src0);
+ }
+ }
+ if (inst->SaturateMode != SATURATE_OFF)
+ brw_set_saturate(p, 0);
+}
+
+static void emit_flr(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg_imm(c, inst, 0, i);
+ brw_RNDD(p, dst, src0);
+ }
+ }
+ brw_set_saturate(p, 0);
+}
+
+
+static void emit_min_max(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ const GLuint mask = inst->DstReg.WriteMask;
+ const int mark = mark_tmps(c);
+ int i;
+ brw_push_insn_state(p);
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ struct brw_reg real_dst = get_dst_reg(c, inst, i);
+ struct brw_reg src0 = get_src_reg(c, inst, 0, i);
+ struct brw_reg src1 = get_src_reg(c, inst, 1, i);
+ struct brw_reg dst;
+ /* if dst==src0 or dst==src1 we need to use a temp reg */
+ GLboolean use_temp = brw_same_reg(dst, src0) ||
+ brw_same_reg(dst, src1);
+ if (use_temp)
+ dst = alloc_tmp(c);
+ else
+ dst = real_dst;
+
+ /*
+ printf(" Min/max: dst %d src0 %d src1 %d\n",
+ dst.nr, src0.nr, src1.nr);
+ */
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_MOV(p, dst, src0);
+ brw_set_saturate(p, 0);
+
+ if (inst->Opcode == OPCODE_MIN)
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0);
+ else
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, src1, src0);
+
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ brw_MOV(p, dst, src1);
+ brw_set_saturate(p, 0);
+ brw_set_predicate_control_flag_value(p, 0xff);
+ if (use_temp)
+ brw_MOV(p, real_dst, dst);
+ }
+ }
+ brw_pop_insn_state(p);
+ release_tmps(c, mark);
+}
+
+static void emit_pow(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg dst, src0, src1;
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+ dst = get_dst_reg(c, inst, dst_chan);
+ src0 = get_src_reg_imm(c, inst, 0, 0);
+ src1 = get_src_reg_imm(c, inst, 1, 0);
+
+ brw_MOV(p, brw_message_reg(2), src0);
+ brw_MOV(p, brw_message_reg(3), src1);
+
+ brw_math(p,
+ dst,
+ BRW_MATH_FUNCTION_POW,
+ (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+ 2,
+ brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+}
+
+static void emit_lrp(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ struct brw_reg dst, tmp1, tmp2, src0, src1, src2;
+ int i;
+ int mark = mark_tmps(c);
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, inst, 0, i);
+
+ src1 = get_src_reg_imm(c, inst, 1, i);
+
+ if (src1.nr == dst.nr) {
+ tmp1 = alloc_tmp(c);
+ brw_MOV(p, tmp1, src1);
+ } else
+ tmp1 = src1;
+
+ src2 = get_src_reg(c, inst, 2, i);
+ if (src2.nr == dst.nr) {
+ tmp2 = alloc_tmp(c);
+ brw_MOV(p, tmp2, src2);
+ } else
+ tmp2 = src2;
+
+ brw_ADD(p, dst, negate(src0), brw_imm_f(1.0));
+ brw_MUL(p, brw_null_reg(), dst, tmp2);
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_MAC(p, dst, src0, tmp1);
+ brw_set_saturate(p, 0);
+ }
+ release_tmps(c, mark);
+ }
+}
+
+/**
+ * For GLSL shaders, this KIL will be unconditional.
+ * It may be contained inside an IF/ENDIF structure of course.
+ */
+static void emit_kil(struct brw_wm_compile *c)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
+ brw_AND(p, depth, c->emit_mask_reg, depth);
+ brw_pop_insn_state(p);
+}
+
+static void emit_mad(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ struct brw_reg dst, src0, src1, src2;
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, inst, 0, i);
+ src1 = get_src_reg_imm(c, inst, 1, i);
+ src2 = get_src_reg_imm(c, inst, 2, i);
+ brw_MUL(p, dst, src0, src1);
+
+ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+ brw_ADD(p, dst, dst, src2);
+ brw_set_saturate(p, 0);
+ }
+ }
+}
+
+static void emit_sop(struct brw_wm_compile *c,
+ const struct prog_instruction *inst, GLuint cond)
+{
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ struct brw_reg dst, src0, src1;
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, inst, 0, i);
+ src1 = get_src_reg_imm(c, inst, 1, i);
+ brw_push_insn_state(p);
+ brw_CMP(p, brw_null_reg(), cond, src0, src1);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_MOV(p, dst, brw_imm_f(0.0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ brw_MOV(p, dst, brw_imm_f(1.0));
+ brw_pop_insn_state(p);
+ }
+ }
+}
+
+static void emit_slt(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ emit_sop(c, inst, BRW_CONDITIONAL_L);
+}
+
+static void emit_sle(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ emit_sop(c, inst, BRW_CONDITIONAL_LE);
+}
+
+static void emit_sgt(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ emit_sop(c, inst, BRW_CONDITIONAL_G);
+}
+
+static void emit_sge(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ emit_sop(c, inst, BRW_CONDITIONAL_GE);
+}
+
+static void emit_seq(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ emit_sop(c, inst, BRW_CONDITIONAL_EQ);
+}
+
+static void emit_sne(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ emit_sop(c, inst, BRW_CONDITIONAL_NEQ);
+}
+
+static INLINE struct brw_reg high_words( struct brw_reg reg )
+{
+ return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ),
+ 0, 8, 2 );
+}
+
+static INLINE struct brw_reg low_words( struct brw_reg reg )
+{
+ return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 );
+}
+
+static INLINE struct brw_reg even_bytes( struct brw_reg reg )
+{
+ return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 );
+}
+
+static INLINE struct brw_reg odd_bytes( struct brw_reg reg )
+{
+ return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ),
+ 0, 16, 2 );
+}
+
+/* One-, two- and three-dimensional Perlin noise, similar to the description
+ in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */
+static void noise1_sub( struct brw_wm_compile *c ) {
+
+ struct brw_compile *p = &c->func;
+ struct brw_reg param,
+ x0, x1, /* gradients at each end */
+ t, tmp[ 2 ], /* float temporaries */
+ itmp[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */
+ int i;
+ int mark = mark_tmps( c );
+
+ x0 = alloc_tmp( c );
+ x1 = alloc_tmp( c );
+ t = alloc_tmp( c );
+ tmp[ 0 ] = alloc_tmp( c );
+ tmp[ 1 ] = alloc_tmp( c );
+ itmp[ 0 ] = retype( tmp[ 0 ], BRW_REGISTER_TYPE_UD );
+ itmp[ 1 ] = retype( tmp[ 1 ], BRW_REGISTER_TYPE_UD );
+ itmp[ 2 ] = retype( x0, BRW_REGISTER_TYPE_UD );
+ itmp[ 3 ] = retype( x1, BRW_REGISTER_TYPE_UD );
+ itmp[ 4 ] = retype( t, BRW_REGISTER_TYPE_UD );
+
+ param = lookup_tmp( c, mark - 2 );
+
+ brw_set_access_mode( p, BRW_ALIGN_1 );
+
+ brw_MOV( p, itmp[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
+
+ /* Arrange the two end coordinates into scalars (itmp0/itmp1) to
+ be hashed. Also compute the remainder (offset within the unit
+ length), interleaved to reduce register dependency penalties. */
+ brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param );
+ brw_FRC( p, param, param );
+ brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 1 ) );
+ brw_MOV( p, itmp[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
+ brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
+
+ /* We're now ready to perform the hashing. The two hashes are
+ interleaved for performance. The hash function used is
+ designed to rapidly achieve avalanche and require only 32x16
+ bit multiplication, and 16-bit swizzles (which we get for
+ free). We can't use immediate operands in the multiplies,
+ because immediates are permitted only in src1 and the 16-bit
+ factor is permitted only in src0. */
+ for( i = 0; i < 2; i++ )
+ brw_MUL( p, itmp[ i ], itmp[ 2 ], itmp[ i ] );
+ for( i = 0; i < 2; i++ )
+ brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+ high_words( itmp[ i ] ) );
+ for( i = 0; i < 2; i++ )
+ brw_MUL( p, itmp[ i ], itmp[ 3 ], itmp[ i ] );
+ for( i = 0; i < 2; i++ )
+ brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+ high_words( itmp[ i ] ) );
+ for( i = 0; i < 2; i++ )
+ brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] );
+ for( i = 0; i < 2; i++ )
+ brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+ high_words( itmp[ i ] ) );
+
+ /* Now we want to initialise the two gradients based on the
+ hashes. Format conversion from signed integer to float leaves
+ everything scaled too high by a factor of pow( 2, 31 ), but
+ we correct for that right at the end. */
+ brw_ADD( p, t, param, brw_imm_f( -1.0 ) );
+ brw_MOV( p, x0, retype( tmp[ 0 ], BRW_REGISTER_TYPE_D ) );
+ brw_MOV( p, x1, retype( tmp[ 1 ], BRW_REGISTER_TYPE_D ) );
+
+ brw_MUL( p, x0, x0, param );
+ brw_MUL( p, x1, x1, t );
+
+ /* We interpolate between the gradients using the polynomial
+ 6t^5 - 15t^4 + 10t^3 (Perlin). */
+ brw_MUL( p, tmp[ 0 ], param, brw_imm_f( 6.0 ) );
+ brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) );
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param );
+ brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) );
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param );
+ brw_ADD( p, x1, x1, negate( x0 ) ); /* unrelated work to fill the
+ pipeline */
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param );
+ brw_MUL( p, param, tmp[ 0 ], param );
+ brw_MUL( p, x1, x1, param );
+ brw_ADD( p, x0, x0, x1 );
+ /* scale by pow( 2, -30 ), to compensate for the format conversion
+ above and an extra factor of 2 so that a single gradient covers
+ the [-1,1] range */
+ brw_MUL( p, param, x0, brw_imm_f( 0.000000000931322574615478515625 ) );
+
+ release_tmps( c, mark );
+}
+
+static void emit_noise1( struct brw_wm_compile *c,
+ const struct prog_instruction *inst )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src, param, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+ int mark = mark_tmps( c );
+
+ assert( mark == 0 );
+
+ src = get_src_reg( c, inst, 0, 0 );
+
+ param = alloc_tmp( c );
+
+ brw_MOV( p, param, src );
+
+ invoke_subroutine( c, SUB_NOISE1, noise1_sub );
+
+ /* Fill in the result: */
+ brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ brw_MOV( p, dst, param );
+ }
+ }
+ if( inst->SaturateMode == SATURATE_ZERO_ONE )
+ brw_set_saturate( p, 0 );
+
+ release_tmps( c, mark );
+}
+
+static void noise2_sub( struct brw_wm_compile *c ) {
+
+ struct brw_compile *p = &c->func;
+ struct brw_reg param0, param1,
+ x0y0, x0y1, x1y0, x1y1, /* gradients at each corner */
+ t, tmp[ 4 ], /* float temporaries */
+ itmp[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */
+ int i;
+ int mark = mark_tmps( c );
+
+ x0y0 = alloc_tmp( c );
+ x0y1 = alloc_tmp( c );
+ x1y0 = alloc_tmp( c );
+ x1y1 = alloc_tmp( c );
+ t = alloc_tmp( c );
+ for( i = 0; i < 4; i++ ) {
+ tmp[ i ] = alloc_tmp( c );
+ itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD );
+ }
+ itmp[ 4 ] = retype( x0y0, BRW_REGISTER_TYPE_UD );
+ itmp[ 5 ] = retype( x0y1, BRW_REGISTER_TYPE_UD );
+ itmp[ 6 ] = retype( x1y0, BRW_REGISTER_TYPE_UD );
+
+ param0 = lookup_tmp( c, mark - 3 );
+ param1 = lookup_tmp( c, mark - 2 );
+
+ brw_set_access_mode( p, BRW_ALIGN_1 );
+
+ /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to
+ be hashed. Also compute the remainders (offsets within the unit
+ square), interleaved to reduce register dependency penalties. */
+ brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 );
+ brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 );
+ brw_FRC( p, param0, param0 );
+ brw_FRC( p, param1, param1 );
+ brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
+ brw_ADD( p, high_words( itmp[ 0 ] ), high_words( itmp[ 0 ] ),
+ low_words( itmp[ 1 ] ) );
+ brw_MOV( p, itmp[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
+ brw_MOV( p, itmp[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
+ brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 0x10000 ) );
+ brw_ADD( p, itmp[ 2 ], itmp[ 0 ], brw_imm_ud( 0x1 ) );
+ brw_ADD( p, itmp[ 3 ], itmp[ 0 ], brw_imm_ud( 0x10001 ) );
+
+ /* We're now ready to perform the hashing. The four hashes are
+ interleaved for performance. The hash function used is
+ designed to rapidly achieve avalanche and require only 32x16
+ bit multiplication, and 16-bit swizzles (which we get for
+ free). We can't use immediate operands in the multiplies,
+ because immediates are permitted only in src1 and the 16-bit
+ factor is permitted only in src0. */
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] );
+ for( i = 0; i < 4; i++ )
+ brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+ high_words( itmp[ i ] ) );
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, itmp[ i ], itmp[ 5 ], itmp[ i ] );
+ for( i = 0; i < 4; i++ )
+ brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+ high_words( itmp[ i ] ) );
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, itmp[ i ], itmp[ 6 ], itmp[ i ] );
+ for( i = 0; i < 4; i++ )
+ brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+ high_words( itmp[ i ] ) );
+
+ /* Now we want to initialise the four gradients based on the
+ hashes. Format conversion from signed integer to float leaves
+ everything scaled too high by a factor of pow( 2, 15 ), but
+ we correct for that right at the end. */
+ brw_ADD( p, t, param0, brw_imm_f( -1.0 ) );
+ brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) );
+ brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) );
+ brw_MOV( p, x1y0, low_words( tmp[ 2 ] ) );
+ brw_MOV( p, x1y1, low_words( tmp[ 3 ] ) );
+
+ brw_MOV( p, tmp[ 0 ], high_words( tmp[ 0 ] ) );
+ brw_MOV( p, tmp[ 1 ], high_words( tmp[ 1 ] ) );
+ brw_MOV( p, tmp[ 2 ], high_words( tmp[ 2 ] ) );
+ brw_MOV( p, tmp[ 3 ], high_words( tmp[ 3 ] ) );
+
+ brw_MUL( p, x1y0, x1y0, t );
+ brw_MUL( p, x1y1, x1y1, t );
+ brw_ADD( p, t, param1, brw_imm_f( -1.0 ) );
+ brw_MUL( p, x0y0, x0y0, param0 );
+ brw_MUL( p, x0y1, x0y1, param0 );
+
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param1 );
+ brw_MUL( p, tmp[ 2 ], tmp[ 2 ], param1 );
+ brw_MUL( p, tmp[ 1 ], tmp[ 1 ], t );
+ brw_MUL( p, tmp[ 3 ], tmp[ 3 ], t );
+
+ brw_ADD( p, x0y0, x0y0, tmp[ 0 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 2 ] );
+ brw_ADD( p, x0y1, x0y1, tmp[ 1 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 3 ] );
+
+ /* We interpolate between the gradients using the polynomial
+ 6t^5 - 15t^4 + 10t^3 (Perlin). */
+ brw_MUL( p, tmp[ 0 ], param0, brw_imm_f( 6.0 ) );
+ brw_MUL( p, tmp[ 1 ], param1, brw_imm_f( 6.0 ) );
+ brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) );
+ brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( -15.0 ) );
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 );
+ brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 );
+ brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work to fill the
+ pipeline */
+ brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) );
+ brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( 10.0 ) );
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 );
+ brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 );
+ brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work to fill the
+ pipeline */
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 );
+ brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 );
+ brw_MUL( p, param0, tmp[ 0 ], param0 );
+ brw_MUL( p, param1, tmp[ 1 ], param1 );
+
+ /* Here we interpolate in the y dimension... */
+ brw_MUL( p, x0y1, x0y1, param1 );
+ brw_MUL( p, x1y1, x1y1, param1 );
+ brw_ADD( p, x0y0, x0y0, x0y1 );
+ brw_ADD( p, x1y0, x1y0, x1y1 );
+
+ /* And now in x. There are horrible register dependencies here,
+ but we have nothing else to do. */
+ brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
+ brw_MUL( p, x1y0, x1y0, param0 );
+ brw_ADD( p, x0y0, x0y0, x1y0 );
+
+ /* scale by pow( 2, -15 ), as described above */
+ brw_MUL( p, param0, x0y0, brw_imm_f( 0.000030517578125 ) );
+
+ release_tmps( c, mark );
+}
+
+static void emit_noise2( struct brw_wm_compile *c,
+ const struct prog_instruction *inst )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, src1, param0, param1, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+ int mark = mark_tmps( c );
+
+ assert( mark == 0 );
+
+ src0 = get_src_reg( c, inst, 0, 0 );
+ src1 = get_src_reg( c, inst, 0, 1 );
+
+ param0 = alloc_tmp( c );
+ param1 = alloc_tmp( c );
+
+ brw_MOV( p, param0, src0 );
+ brw_MOV( p, param1, src1 );
+
+ invoke_subroutine( c, SUB_NOISE2, noise2_sub );
+
+ /* Fill in the result: */
+ brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ brw_MOV( p, dst, param0 );
+ }
+ }
+ if( inst->SaturateMode == SATURATE_ZERO_ONE )
+ brw_set_saturate( p, 0 );
+
+ release_tmps( c, mark );
+}
+
+/**
+ * The three-dimensional case is much like the one- and two- versions above,
+ * but since the number of corners is rapidly growing we now pack 16 16-bit
+ * hashes into each register to extract more parallelism from the EUs.
+ */
+static void noise3_sub( struct brw_wm_compile *c ) {
+
+ struct brw_compile *p = &c->func;
+ struct brw_reg param0, param1, param2,
+ x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */
+ xi, yi, zi, /* interpolation coefficients */
+ t, tmp[ 8 ], /* float temporaries */
+ itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
+ wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
+ int i;
+ int mark = mark_tmps( c );
+
+ x0y0 = alloc_tmp( c );
+ x0y1 = alloc_tmp( c );
+ x1y0 = alloc_tmp( c );
+ x1y1 = alloc_tmp( c );
+ xi = alloc_tmp( c );
+ yi = alloc_tmp( c );
+ zi = alloc_tmp( c );
+ t = alloc_tmp( c );
+ for( i = 0; i < 8; i++ ) {
+ tmp[ i ] = alloc_tmp( c );
+ itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD );
+ wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 );
+ }
+
+ param0 = lookup_tmp( c, mark - 4 );
+ param1 = lookup_tmp( c, mark - 3 );
+ param2 = lookup_tmp( c, mark - 2 );
+
+ brw_set_access_mode( p, BRW_ALIGN_1 );
+
+ /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
+ be hashed. Also compute the remainders (offsets within the unit
+ cube), interleaved to reduce register dependency penalties. */
+ brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 );
+ brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 );
+ brw_RNDD( p, retype( itmp[ 2 ], BRW_REGISTER_TYPE_D ), param2 );
+ brw_FRC( p, param0, param0 );
+ brw_FRC( p, param1, param1 );
+ brw_FRC( p, param2, param2 );
+ /* Since we now have only 16 bits of precision in the hash, we must
+ be more careful about thorough mixing to maintain entropy as we
+ squash the input vector into a small scalar. */
+ brw_MUL( p, brw_null_reg(), low_words( itmp[ 0 ] ), brw_imm_uw( 0xBC8F ) );
+ brw_MAC( p, brw_null_reg(), low_words( itmp[ 1 ] ), brw_imm_uw( 0xD0BD ) );
+ brw_MAC( p, low_words( itmp[ 0 ] ), low_words( itmp[ 2 ] ),
+ brw_imm_uw( 0x9B93 ) );
+ brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ),
+ brw_imm_uw( 0xBC8F ) );
+
+ /* Temporarily disable the execution mask while we work with ExecSize=16
+ channels (the mask is set for ExecSize=8 and is probably incorrect).
+ Although this might cause execution of unwanted channels, the code
+ writes only to temporary registers and has no side effects, so
+ disabling the mask is harmless. */
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) );
+ brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) );
+ brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) );
+
+ /* We're now ready to perform the hashing. The eight hashes are
+ interleaved for performance. The hash function used is
+ designed to rapidly achieve avalanche and require only 16x16
+ bit multiplication, and 8-bit swizzles (which we get for
+ free). */
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) );
+ for( i = 0; i < 4; i++ )
+ brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),
+ odd_bytes( wtmp[ i ] ) );
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) );
+ for( i = 0; i < 4; i++ )
+ brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),
+ odd_bytes( wtmp[ i ] ) );
+ brw_pop_insn_state( p );
+
+ /* Now we want to initialise the four rear gradients based on the
+ hashes. Format conversion from signed integer to float leaves
+ everything scaled too high by a factor of pow( 2, 15 ), but
+ we correct for that right at the end. */
+ /* x component */
+ brw_ADD( p, t, param0, brw_imm_f( -1.0 ) );
+ brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) );
+ brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) );
+ brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) );
+ brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) );
+
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) );
+ brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) );
+ brw_pop_insn_state( p );
+
+ brw_MUL( p, x1y0, x1y0, t );
+ brw_MUL( p, x1y1, x1y1, t );
+ brw_ADD( p, t, param1, brw_imm_f( -1.0 ) );
+ brw_MUL( p, x0y0, x0y0, param0 );
+ brw_MUL( p, x0y1, x0y1, param0 );
+
+ /* y component */
+ brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
+ brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
+ brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
+ brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
+
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) );
+ brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) );
+ brw_pop_insn_state( p );
+
+ brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+ brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+ brw_ADD( p, t, param0, brw_imm_f( -1.0 ) );
+ brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 );
+ brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 );
+
+ brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+ brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+
+ /* z component */
+ brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
+ brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
+ brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
+ brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
+
+ brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param2 );
+ brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param2 );
+ brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param2 );
+ brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param2 );
+
+ brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+ brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+
+ /* We interpolate between the gradients using the polynomial
+ 6t^5 - 15t^4 + 10t^3 (Perlin). */
+ brw_MUL( p, xi, param0, brw_imm_f( 6.0 ) );
+ brw_MUL( p, yi, param1, brw_imm_f( 6.0 ) );
+ brw_MUL( p, zi, param2, brw_imm_f( 6.0 ) );
+ brw_ADD( p, xi, xi, brw_imm_f( -15.0 ) );
+ brw_ADD( p, yi, yi, brw_imm_f( -15.0 ) );
+ brw_ADD( p, zi, zi, brw_imm_f( -15.0 ) );
+ brw_MUL( p, xi, xi, param0 );
+ brw_MUL( p, yi, yi, param1 );
+ brw_MUL( p, zi, zi, param2 );
+ brw_ADD( p, xi, xi, brw_imm_f( 10.0 ) );
+ brw_ADD( p, yi, yi, brw_imm_f( 10.0 ) );
+ brw_ADD( p, zi, zi, brw_imm_f( 10.0 ) );
+ brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work */
+ brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work */
+ brw_MUL( p, xi, xi, param0 );
+ brw_MUL( p, yi, yi, param1 );
+ brw_MUL( p, zi, zi, param2 );
+ brw_MUL( p, xi, xi, param0 );
+ brw_MUL( p, yi, yi, param1 );
+ brw_MUL( p, zi, zi, param2 );
+ brw_MUL( p, xi, xi, param0 );
+ brw_MUL( p, yi, yi, param1 );
+ brw_MUL( p, zi, zi, param2 );
+
+ /* Here we interpolate in the y dimension... */
+ brw_MUL( p, x0y1, x0y1, yi );
+ brw_MUL( p, x1y1, x1y1, yi );
+ brw_ADD( p, x0y0, x0y0, x0y1 );
+ brw_ADD( p, x1y0, x1y0, x1y1 );
+
+ /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
+ brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
+ brw_MUL( p, x1y0, x1y0, xi );
+ brw_ADD( p, tmp[ 0 ], x0y0, x1y0 );
+
+ /* Now do the same thing for the front four gradients... */
+ /* x component */
+ brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) );
+ brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) );
+ brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) );
+ brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) );
+
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) );
+ brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) );
+ brw_pop_insn_state( p );
+
+ brw_MUL( p, x1y0, x1y0, t );
+ brw_MUL( p, x1y1, x1y1, t );
+ brw_ADD( p, t, param1, brw_imm_f( -1.0 ) );
+ brw_MUL( p, x0y0, x0y0, param0 );
+ brw_MUL( p, x0y1, x0y1, param0 );
+
+ /* y component */
+ brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
+ brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
+ brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
+ brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
+
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) );
+ brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) );
+ brw_pop_insn_state( p );
+
+ brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+ brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+ brw_ADD( p, t, param2, brw_imm_f( -1.0 ) );
+ brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 );
+ brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 );
+
+ brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+ brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+
+ /* z component */
+ brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
+ brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
+ brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
+ brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
+
+ brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );
+ brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+ brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );
+ brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+
+ brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+ brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+
+ /* The interpolation coefficients are still around from last time, so
+ again interpolate in the y dimension... */
+ brw_ADD( p, x0y1, x0y1, negate( x0y0 ) );
+ brw_ADD( p, x1y1, x1y1, negate( x1y0 ) );
+ brw_MUL( p, x0y1, x0y1, yi );
+ brw_MUL( p, x1y1, x1y1, yi );
+ brw_ADD( p, x0y0, x0y0, x0y1 );
+ brw_ADD( p, x1y0, x1y0, x1y1 );
+
+ /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
+ time put the front face in tmp[ 1 ] and we're nearly there... */
+ brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
+ brw_MUL( p, x1y0, x1y0, xi );
+ brw_ADD( p, tmp[ 1 ], x0y0, x1y0 );
+
+ /* The final interpolation, in the z dimension: */
+ brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) );
+ brw_MUL( p, tmp[ 1 ], tmp[ 1 ], zi );
+ brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] );
+
+ /* scale by pow( 2, -15 ), as described above */
+ brw_MUL( p, param0, tmp[ 0 ], brw_imm_f( 0.000030517578125 ) );
+
+ release_tmps( c, mark );
+}
+
+static void emit_noise3( struct brw_wm_compile *c,
+ const struct prog_instruction *inst )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, src1, src2, param0, param1, param2, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+ int mark = mark_tmps( c );
+
+ assert( mark == 0 );
+
+ src0 = get_src_reg( c, inst, 0, 0 );
+ src1 = get_src_reg( c, inst, 0, 1 );
+ src2 = get_src_reg( c, inst, 0, 2 );
+
+ param0 = alloc_tmp( c );
+ param1 = alloc_tmp( c );
+ param2 = alloc_tmp( c );
+
+ brw_MOV( p, param0, src0 );
+ brw_MOV( p, param1, src1 );
+ brw_MOV( p, param2, src2 );
+
+ invoke_subroutine( c, SUB_NOISE3, noise3_sub );
+
+ /* Fill in the result: */
+ brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ brw_MOV( p, dst, param0 );
+ }
+ }
+ if( inst->SaturateMode == SATURATE_ZERO_ONE )
+ brw_set_saturate( p, 0 );
+
+ release_tmps( c, mark );
+}
+
+/**
+ * For the four-dimensional case, the little micro-optimisation benefits
+ * we obtain by unrolling all the loops aren't worth the massive bloat it
+ * now causes. Instead, we loop twice around performing a similar operation
+ * to noise3, once for the w=0 cube and once for the w=1, with a bit more
+ * code to glue it all together.
+ */
+static void noise4_sub( struct brw_wm_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg param[ 4 ],
+ x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */
+ w0, /* noise for the w=0 cube */
+ floors[ 2 ], /* integer coordinates of base corner of hypercube */
+ interp[ 4 ], /* interpolation coefficients */
+ t, tmp[ 8 ], /* float temporaries */
+ itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
+ wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
+ int i, j;
+ int mark = mark_tmps( c );
+ GLuint loop, origin;
+
+ x0y0 = alloc_tmp( c );
+ x0y1 = alloc_tmp( c );
+ x1y0 = alloc_tmp( c );
+ x1y1 = alloc_tmp( c );
+ t = alloc_tmp( c );
+ w0 = alloc_tmp( c );
+ floors[ 0 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD );
+ floors[ 1 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD );
+
+ for( i = 0; i < 4; i++ ) {
+ param[ i ] = lookup_tmp( c, mark - 5 + i );
+ interp[ i ] = alloc_tmp( c );
+ }
+
+ for( i = 0; i < 8; i++ ) {
+ tmp[ i ] = alloc_tmp( c );
+ itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD );
+ wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 );
+ }
+
+ brw_set_access_mode( p, BRW_ALIGN_1 );
+
+ /* We only want 16 bits of precision from the integral part of each
+ co-ordinate, but unfortunately the RNDD semantics would saturate
+ at 16 bits if we performed the operation directly to a 16-bit
+ destination. Therefore, we round to 32-bit temporaries where
+ appropriate, and then store only the lower 16 bits. */
+ brw_RNDD( p, retype( floors[ 0 ], BRW_REGISTER_TYPE_D ), param[ 0 ] );
+ brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param[ 1 ] );
+ brw_RNDD( p, retype( floors[ 1 ], BRW_REGISTER_TYPE_D ), param[ 2 ] );
+ brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param[ 3 ] );
+ brw_MOV( p, high_words( floors[ 0 ] ), low_words( itmp[ 0 ] ) );
+ brw_MOV( p, high_words( floors[ 1 ] ), low_words( itmp[ 1 ] ) );
+
+ /* Modify the flag register here, because the side effect is useful
+ later (see below). We know for certain that all flags will be
+ cleared, since the FRC instruction cannot possibly generate
+ negative results. Even for exceptional inputs (infinities, denormals,
+ NaNs), the architecture guarantees that the L conditional is false. */
+ brw_set_conditionalmod( p, BRW_CONDITIONAL_L );
+ brw_FRC( p, param[ 0 ], param[ 0 ] );
+ brw_set_predicate_control( p, BRW_PREDICATE_NONE );
+ for( i = 1; i < 4; i++ )
+ brw_FRC( p, param[ i ], param[ i ] );
+
+ /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first
+ of all. */
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, interp[ i ], param[ i ], brw_imm_f( 6.0 ) );
+ for( i = 0; i < 4; i++ )
+ brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( -15.0 ) );
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, interp[ i ], interp[ i ], param[ i ] );
+ for( i = 0; i < 4; i++ )
+ brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( 10.0 ) );
+ for( j = 0; j < 3; j++ )
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, interp[ i ], interp[ i ], param[ i ] );
+
+ /* Mark the current address, as it will be a jump destination. The
+ following code will be executed twice: first, with the flag
+ register clear indicating the w=0 case, and second with flags
+ set for w=1. */
+ loop = p->nr_insn;
+
+ /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
+ be hashed. Since we have only 16 bits of precision in the hash, we
+ must be careful about thorough mixing to maintain entropy as we
+ squash the input vector into a small scalar. */
+ brw_MUL( p, brw_null_reg(), low_words( floors[ 0 ] ),
+ brw_imm_uw( 0xBC8F ) );
+ brw_MAC( p, brw_null_reg(), high_words( floors[ 0 ] ),
+ brw_imm_uw( 0xD0BD ) );
+ brw_MAC( p, brw_null_reg(), low_words( floors[ 1 ] ),
+ brw_imm_uw( 0x9B93 ) );
+ brw_MAC( p, low_words( itmp[ 0 ] ), high_words( floors[ 1 ] ),
+ brw_imm_uw( 0xA359 ) );
+ brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ),
+ brw_imm_uw( 0xBC8F ) );
+
+ /* Temporarily disable the execution mask while we work with ExecSize=16
+ channels (the mask is set for ExecSize=8 and is probably incorrect).
+ Although this might cause execution of unwanted channels, the code
+ writes only to temporary registers and has no side effects, so
+ disabling the mask is harmless. */
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) );
+ brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) );
+ brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) );
+
+ /* We're now ready to perform the hashing. The eight hashes are
+ interleaved for performance. The hash function used is
+ designed to rapidly achieve avalanche and require only 16x16
+ bit multiplication, and 8-bit swizzles (which we get for
+ free). */
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) );
+ for( i = 0; i < 4; i++ )
+ brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),
+ odd_bytes( wtmp[ i ] ) );
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) );
+ for( i = 0; i < 4; i++ )
+ brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),
+ odd_bytes( wtmp[ i ] ) );
+ brw_pop_insn_state( p );
+
+ /* Now we want to initialise the four rear gradients based on the
+ hashes. Format conversion from signed integer to float leaves
+ everything scaled too high by a factor of pow( 2, 15 ), but
+ we correct for that right at the end. */
+ /* x component */
+ brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) );
+ brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) );
+ brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) );
+ brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) );
+ brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) );
+
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) );
+ brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) );
+ brw_pop_insn_state( p );
+
+ brw_MUL( p, x1y0, x1y0, t );
+ brw_MUL( p, x1y1, x1y1, t );
+ brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) );
+ brw_MUL( p, x0y0, x0y0, param[ 0 ] );
+ brw_MUL( p, x0y1, x0y1, param[ 0 ] );
+
+ /* y component */
+ brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
+ brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
+ brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
+ brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
+
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) );
+ brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) );
+ brw_pop_insn_state( p );
+
+ brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+ brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+ /* prepare t for the w component (used below): w the first time through
+ the loop; w - 1 the second time) */
+ brw_set_predicate_control( p, BRW_PREDICATE_NORMAL );
+ brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) );
+ p->current->header.predicate_inverse = 1;
+ brw_MOV( p, t, param[ 3 ] );
+ p->current->header.predicate_inverse = 0;
+ brw_set_predicate_control( p, BRW_PREDICATE_NONE );
+ brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] );
+ brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] );
+
+ brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+ brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+
+ /* z component */
+ brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
+ brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
+ brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
+ brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
+
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) );
+ brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) );
+ brw_pop_insn_state( p );
+
+ brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 2 ] );
+ brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param[ 2 ] );
+ brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 2 ] );
+ brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param[ 2 ] );
+
+ brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+ brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+
+ /* w component */
+ brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
+ brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
+ brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
+ brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
+
+ brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );
+ brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+ brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );
+ brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+ brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) );
+
+ brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+ brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+
+ /* Here we interpolate in the y dimension... */
+ brw_ADD( p, x0y1, x0y1, negate( x0y0 ) );
+ brw_ADD( p, x1y1, x1y1, negate( x1y0 ) );
+ brw_MUL( p, x0y1, x0y1, interp[ 1 ] );
+ brw_MUL( p, x1y1, x1y1, interp[ 1 ] );
+ brw_ADD( p, x0y0, x0y0, x0y1 );
+ brw_ADD( p, x1y0, x1y0, x1y1 );
+
+ /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
+ brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
+ brw_MUL( p, x1y0, x1y0, interp[ 0 ] );
+ brw_ADD( p, tmp[ 0 ], x0y0, x1y0 );
+
+ /* Now do the same thing for the front four gradients... */
+ /* x component */
+ brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) );
+ brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) );
+ brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) );
+ brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) );
+
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) );
+ brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) );
+ brw_pop_insn_state( p );
+
+ brw_MUL( p, x1y0, x1y0, t );
+ brw_MUL( p, x1y1, x1y1, t );
+ brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) );
+ brw_MUL( p, x0y0, x0y0, param[ 0 ] );
+ brw_MUL( p, x0y1, x0y1, param[ 0 ] );
+
+ /* y component */
+ brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
+ brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
+ brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
+ brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
+
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) );
+ brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) );
+ brw_pop_insn_state( p );
+
+ brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+ brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+ brw_ADD( p, t, param[ 2 ], brw_imm_f( -1.0 ) );
+ brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] );
+ brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] );
+
+ brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+ brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+
+ /* z component */
+ brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
+ brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
+ brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
+ brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
+
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) );
+ brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) );
+ brw_pop_insn_state( p );
+
+ brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );
+ brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+ brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );
+ brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+ /* prepare t for the w component (used below): w the first time through
+ the loop; w - 1 the second time) */
+ brw_set_predicate_control( p, BRW_PREDICATE_NORMAL );
+ brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) );
+ p->current->header.predicate_inverse = 1;
+ brw_MOV( p, t, param[ 3 ] );
+ p->current->header.predicate_inverse = 0;
+ brw_set_predicate_control( p, BRW_PREDICATE_NONE );
+
+ brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+ brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+
+ /* w component */
+ brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
+ brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
+ brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
+ brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
+
+ brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );
+ brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+ brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );
+ brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+
+ brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+ brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+
+ /* Interpolate in the y dimension: */
+ brw_ADD( p, x0y1, x0y1, negate( x0y0 ) );
+ brw_ADD( p, x1y1, x1y1, negate( x1y0 ) );
+ brw_MUL( p, x0y1, x0y1, interp[ 1 ] );
+ brw_MUL( p, x1y1, x1y1, interp[ 1 ] );
+ brw_ADD( p, x0y0, x0y0, x0y1 );
+ brw_ADD( p, x1y0, x1y0, x1y1 );
+
+ /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
+ time put the front face in tmp[ 1 ] and we're nearly there... */
+ brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
+ brw_MUL( p, x1y0, x1y0, interp[ 0 ] );
+ brw_ADD( p, tmp[ 1 ], x0y0, x1y0 );
+
+ /* Another interpolation, in the z dimension: */
+ brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) );
+ brw_MUL( p, tmp[ 1 ], tmp[ 1 ], interp[ 2 ] );
+ brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] );
+
+ /* Exit the loop if we've computed both cubes... */
+ origin = p->nr_insn;
+ brw_push_insn_state( p );
+ brw_set_predicate_control( p, BRW_PREDICATE_NORMAL );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_ADD( p, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) );
+ brw_pop_insn_state( p );
+
+ /* Save the result for the w=0 case, and increment the w coordinate: */
+ brw_MOV( p, w0, tmp[ 0 ] );
+ brw_ADD( p, high_words( floors[ 1 ] ), high_words( floors[ 1 ] ),
+ brw_imm_uw( 1 ) );
+
+ /* Loop around for the other cube. Explicitly set the flag register
+ (unfortunately we must spend an extra instruction to do this: we
+ can't rely on a side effect of the previous MOV or ADD because
+ conditional modifiers which are normally true might be false in
+ exceptional circumstances, e.g. given a NaN input; the add to
+ brw_ip_reg() is not suitable because the IP is not an 8-vector). */
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_MOV( p, brw_flag_reg(), brw_imm_uw( 0xFF ) );
+ brw_ADD( p, brw_ip_reg(), brw_ip_reg(),
+ brw_imm_d( ( loop - p->nr_insn ) << 4 ) );
+ brw_pop_insn_state( p );
+
+ /* Patch the previous conditional branch now that we know the
+ destination address. */
+ brw_set_src1( p->store + origin,
+ brw_imm_d( ( p->nr_insn - origin ) << 4 ) );
+
+ /* The very last interpolation. */
+ brw_ADD( p, tmp[ 0 ], tmp[ 0 ], negate( w0 ) );
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], interp[ 3 ] );
+ brw_ADD( p, tmp[ 0 ], tmp[ 0 ], w0 );
+
+ /* scale by pow( 2, -15 ), as described above */
+ brw_MUL( p, param[ 0 ], tmp[ 0 ], brw_imm_f( 0.000030517578125 ) );
+
+ release_tmps( c, mark );
+}
+
+static void emit_noise4( struct brw_wm_compile *c,
+ const struct prog_instruction *inst )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, src1, src2, src3, param0, param1, param2, param3, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+ int mark = mark_tmps( c );
+
+ assert( mark == 0 );
+
+ src0 = get_src_reg( c, inst, 0, 0 );
+ src1 = get_src_reg( c, inst, 0, 1 );
+ src2 = get_src_reg( c, inst, 0, 2 );
+ src3 = get_src_reg( c, inst, 0, 3 );
+
+ param0 = alloc_tmp( c );
+ param1 = alloc_tmp( c );
+ param2 = alloc_tmp( c );
+ param3 = alloc_tmp( c );
+
+ brw_MOV( p, param0, src0 );
+ brw_MOV( p, param1, src1 );
+ brw_MOV( p, param2, src2 );
+ brw_MOV( p, param3, src3 );
+
+ invoke_subroutine( c, SUB_NOISE4, noise4_sub );
+
+ /* Fill in the result: */
+ brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ brw_MOV( p, dst, param0 );
+ }
+ }
+ if( inst->SaturateMode == SATURATE_ZERO_ONE )
+ brw_set_saturate( p, 0 );
+
+ release_tmps( c, mark );
+}
+
+static void emit_wpos_xy(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ struct brw_reg src0[2], dst[2];
+
+ dst[0] = get_dst_reg(c, inst, 0);
+ dst[1] = get_dst_reg(c, inst, 1);
+
+ src0[0] = get_src_reg(c, inst, 0, 0);
+ src0[1] = get_src_reg(c, inst, 0, 1);
+
+ /* Calculate the pixel offset from window bottom left into destination
+ * X and Y channels.
+ */
+ if (mask & WRITEMASK_X) {
+ /* X' = X - origin_x */
+ brw_ADD(p,
+ dst[0],
+ retype(src0[0], BRW_REGISTER_TYPE_W),
+ brw_imm_d(0 - c->key.origin_x));
+ }
+
+ if (mask & WRITEMASK_Y) {
+ /* Y' = height - (Y - origin_y) = height + origin_y - Y */
+ brw_ADD(p,
+ dst[1],
+ negate(retype(src0[1], BRW_REGISTER_TYPE_W)),
+ brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
+ }
+}
+
+/* TODO
+ BIAS on SIMD8 not working yet...
+ */
+static void emit_txb(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg dst[4], src[4], payload_reg;
+ /* Note: TexSrcUnit was already looked up through SamplerTextures[] */
+ const GLuint unit = inst->TexSrcUnit;
+ GLuint i;
+ GLuint msg_type;
+
+ assert(unit < BRW_MAX_TEX_UNIT);
+
+ payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
+
+ for (i = 0; i < 4; i++)
+ dst[i] = get_dst_reg(c, inst, i);
+ for (i = 0; i < 4; i++)
+ src[i] = get_src_reg(c, inst, 0, i);
+
+ switch (inst->TexSrcTarget) {
+ case TEXTURE_1D_INDEX:
+ brw_MOV(p, brw_message_reg(2), src[0]); /* s coord */
+ brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); /* t coord */
+ brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); /* r coord */
+ break;
+ case TEXTURE_2D_INDEX:
+ case TEXTURE_RECT_INDEX:
+ brw_MOV(p, brw_message_reg(2), src[0]);
+ brw_MOV(p, brw_message_reg(3), src[1]);
+ brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
+ break;
+ case TEXTURE_3D_INDEX:
+ case TEXTURE_CUBE_INDEX:
+ brw_MOV(p, brw_message_reg(2), src[0]);
+ brw_MOV(p, brw_message_reg(3), src[1]);
+ brw_MOV(p, brw_message_reg(4), src[2]);
+ break;
+ default:
+ /* invalid target */
+ abort();
+ }
+ brw_MOV(p, brw_message_reg(5), src[3]); /* bias */
+ brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */
+
+ if (BRW_IS_IGDNG(p->brw)) {
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG;
+ } else {
+ /* Does it work well on SIMD8? */
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
+ }
+
+ brw_SAMPLE(p,
+ retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */
+ 1, /* msg_reg_nr */
+ retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */
+ SURF_INDEX_TEXTURE(unit),
+ unit, /* sampler */
+ inst->DstReg.WriteMask, /* writemask */
+ msg_type, /* msg_type */
+ 4, /* response_length */
+ 4, /* msg_length */
+ 0, /* eot */
+ 1,
+ BRW_SAMPLER_SIMD_MODE_SIMD8);
+}
+
+
+static void emit_tex(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg dst[4], src[4], payload_reg;
+ /* Note: TexSrcUnit was already looked up through SamplerTextures[] */
+ const GLuint unit = inst->TexSrcUnit;
+ GLuint msg_len;
+ GLuint i, nr;
+ GLuint emit;
+ GLboolean shadow = (c->key.shadowtex_mask & (1<<unit)) ? 1 : 0;
+ GLuint msg_type;
+
+ assert(unit < BRW_MAX_TEX_UNIT);
+
+ payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
+
+ for (i = 0; i < 4; i++)
+ dst[i] = get_dst_reg(c, inst, i);
+ for (i = 0; i < 4; i++)
+ src[i] = get_src_reg(c, inst, 0, i);
+
+ switch (inst->TexSrcTarget) {
+ case TEXTURE_1D_INDEX:
+ emit = WRITEMASK_X;
+ nr = 1;
+ break;
+ case TEXTURE_2D_INDEX:
+ case TEXTURE_RECT_INDEX:
+ emit = WRITEMASK_XY;
+ nr = 2;
+ break;
+ case TEXTURE_3D_INDEX:
+ case TEXTURE_CUBE_INDEX:
+ emit = WRITEMASK_XYZ;
+ nr = 3;
+ break;
+ default:
+ /* invalid target */
+ abort();
+ }
+ msg_len = 1;
+
+ /* move/load S, T, R coords */
+ for (i = 0; i < nr; i++) {
+ static const GLuint swz[4] = {0,1,2,2};
+ if (emit & (1<<i))
+ brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]);
+ else
+ brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0));
+ msg_len += 1;
+ }
+
+ if (shadow) {
+ brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */
+ brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */
+ }
+
+ if (BRW_IS_IGDNG(p->brw)) {
+ if (shadow)
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG;
+ } else {
+ /* Does it work for shadow on SIMD8 ? */
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
+ }
+
+ brw_SAMPLE(p,
+ retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */
+ 1, /* msg_reg_nr */
+ retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */
+ SURF_INDEX_TEXTURE(unit),
+ unit, /* sampler */
+ inst->DstReg.WriteMask, /* writemask */
+ msg_type, /* msg_type */
+ 4, /* response_length */
+ shadow ? 6 : 4, /* msg_length */
+ 0, /* eot */
+ 1,
+ BRW_SAMPLER_SIMD_MODE_SIMD8);
+
+ if (shadow)
+ brw_MOV(p, dst[3], brw_imm_f(1.0));
+}
+
+
+/**
+ * Resolve subroutine calls after code emit is done.
+ */
+static void post_wm_emit( struct brw_wm_compile *c )
+{
+ brw_resolve_cals(&c->func);
+}
+
+static void
+get_argument_regs(struct brw_wm_compile *c,
+ const struct prog_instruction *inst,
+ int index,
+ struct brw_reg *regs,
+ int mask)
+{
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1 << i))
+ regs[i] = get_src_reg(c, inst, index, i);
+ }
+}
+
+static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
+{
+#define MAX_IF_DEPTH 32
+#define MAX_LOOP_DEPTH 32
+ struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH];
+ GLuint i, if_depth = 0, loop_depth = 0;
+ struct brw_compile *p = &c->func;
+ struct brw_indirect stack_index = brw_indirect(0, 0);
+
+ c->out_of_regs = GL_FALSE;
+
+ prealloc_reg(c);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
+
+ for (i = 0; i < c->nr_fp_insns; i++) {
+ const struct prog_instruction *inst = &c->prog_instructions[i];
+ int dst_flags;
+ struct brw_reg args[3][4], dst[4];
+ int j;
+
+ c->cur_inst = i;
+
+#if 0
+ _mesa_printf("Inst %d: ", i);
+ _mesa_print_instruction(inst);
+#endif
+
+ /* fetch any constants that this instruction needs */
+ if (c->fp->use_const_buffer)
+ fetch_constants(c, inst);
+
+ if (inst->CondUpdate)
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ else
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
+
+ dst_flags = inst->DstReg.WriteMask;
+ if (inst->SaturateMode == SATURATE_ZERO_ONE)
+ dst_flags |= SATURATE;
+
+ switch (inst->Opcode) {
+ case WM_PIXELXY:
+ emit_pixel_xy(c, inst);
+ break;
+ case WM_DELTAXY:
+ emit_delta_xy(c, inst);
+ break;
+ case WM_PIXELW:
+ emit_pixel_w(c, inst);
+ break;
+ case WM_LINTERP:
+ emit_linterp(c, inst);
+ break;
+ case WM_PINTERP:
+ emit_pinterp(c, inst);
+ break;
+ case WM_CINTERP:
+ emit_cinterp(c, inst);
+ break;
+ case WM_WPOSXY:
+ emit_wpos_xy(c, inst);
+ break;
+ case WM_FB_WRITE:
+ emit_fb_write(c, inst);
+ break;
+ case WM_FRONTFACING:
+ emit_frontfacing(c, inst);
+ break;
+ case OPCODE_ADD:
+ emit_add(c, inst);
+ break;
+ case OPCODE_ARL:
+ emit_arl(c, inst);
+ break;
+ case OPCODE_FRC:
+ emit_frc(c, inst);
+ break;
+ case OPCODE_FLR:
+ emit_flr(c, inst);
+ break;
+ case OPCODE_LRP:
+ emit_lrp(c, inst);
+ break;
+ case OPCODE_TRUNC:
+ emit_trunc(c, inst);
+ break;
+ case OPCODE_MOV:
+ case OPCODE_SWZ:
+ emit_mov(c, inst);
+ break;
+ case OPCODE_DP3:
+ emit_dp3(c, inst);
+ break;
+ case OPCODE_DP4:
+ emit_dp4(c, inst);
+ break;
+ case OPCODE_XPD:
+ emit_xpd(c, inst);
+ break;
+ case OPCODE_DPH:
+ emit_dph(c, inst);
+ break;
+ case OPCODE_RCP:
+ emit_rcp(c, inst);
+ break;
+ case OPCODE_RSQ:
+ emit_rsq(c, inst);
+ break;
+ case OPCODE_SIN:
+ emit_sin(c, inst);
+ break;
+ case OPCODE_COS:
+ emit_cos(c, inst);
+ break;
+ case OPCODE_EX2:
+ emit_ex2(c, inst);
+ break;
+ case OPCODE_LG2:
+ emit_lg2(c, inst);
+ break;
+ case OPCODE_MIN:
+ case OPCODE_MAX:
+ emit_min_max(c, inst);
+ break;
+ case OPCODE_DDX:
+ case OPCODE_DDY:
+ for (j = 0; j < 4; j++) {
+ if (inst->DstReg.WriteMask & (1 << j))
+ dst[j] = get_dst_reg(c, inst, j);
+ else
+ dst[j] = brw_null_reg();
+ }
+ get_argument_regs(c, inst, 0, args[0], WRITEMASK_XYZW);
+ emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX),
+ args[0]);
+ break;
+ case OPCODE_SLT:
+ emit_slt(c, inst);
+ break;
+ case OPCODE_SLE:
+ emit_sle(c, inst);
+ break;
+ case OPCODE_SGT:
+ emit_sgt(c, inst);
+ break;
+ case OPCODE_SGE:
+ emit_sge(c, inst);
+ break;
+ case OPCODE_SEQ:
+ emit_seq(c, inst);
+ break;
+ case OPCODE_SNE:
+ emit_sne(c, inst);
+ break;
+ case OPCODE_MUL:
+ emit_mul(c, inst);
+ break;
+ case OPCODE_POW:
+ emit_pow(c, inst);
+ break;
+ case OPCODE_MAD:
+ emit_mad(c, inst);
+ break;
+ case OPCODE_NOISE1:
+ emit_noise1(c, inst);
+ break;
+ case OPCODE_NOISE2:
+ emit_noise2(c, inst);
+ break;
+ case OPCODE_NOISE3:
+ emit_noise3(c, inst);
+ break;
+ case OPCODE_NOISE4:
+ emit_noise4(c, inst);
+ break;
+ case OPCODE_TEX:
+ emit_tex(c, inst);
+ break;
+ case OPCODE_TXB:
+ emit_txb(c, inst);
+ break;
+ case OPCODE_KIL_NV:
+ emit_kil(c);
+ break;
+ case OPCODE_IF:
+ assert(if_depth < MAX_IF_DEPTH);
+ if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8);
+ break;
+ case OPCODE_ELSE:
+ if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
+ break;
+ case OPCODE_ENDIF:
+ assert(if_depth > 0);
+ brw_ENDIF(p, if_inst[--if_depth]);
+ break;
+ case OPCODE_BGNSUB:
+ brw_save_label(p, inst->Comment, p->nr_insn);
+ break;
+ case OPCODE_ENDSUB:
+ /* no-op */
+ break;
+ case OPCODE_CAL:
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_ADD(p, get_addr_reg(stack_index),
+ get_addr_reg(stack_index), brw_imm_d(4));
+ brw_save_call(&c->func, inst->Comment, p->nr_insn);
+ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+ brw_pop_insn_state(p);
+ break;
+
+ case OPCODE_RET:
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_ADD(p, get_addr_reg(stack_index),
+ get_addr_reg(stack_index), brw_imm_d(-4));
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_pop_insn_state(p);
+
+ break;
+ case OPCODE_BGNLOOP:
+ /* XXX may need to invalidate the current_constant regs */
+ loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
+ break;
+ case OPCODE_BRK:
+ brw_BREAK(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case OPCODE_CONT:
+ brw_CONT(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case OPCODE_ENDLOOP:
+ {
+ struct brw_instruction *inst0, *inst1;
+ GLuint br = 1;
+
+ if (BRW_IS_IGDNG(brw))
+ br = 2;
+
+ loop_depth--;
+ inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
+ /* patch all the BREAK/CONT instructions from last BGNLOOP */
+ while (inst0 > loop_inst[loop_depth]) {
+ inst0--;
+ if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+ inst0->bits3.if_else.pop_count = 0;
+ }
+ else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+ inst0->bits3.if_else.pop_count = 0;
+ }
+ }
+ }
+ break;
+ default:
+ _mesa_printf("unsupported IR in fragment shader %d\n",
+ inst->Opcode);
+ }
+
+ if (inst->CondUpdate)
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ else
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+ post_wm_emit(c);
+
+ if (INTEL_DEBUG & DEBUG_WM) {
+ _mesa_printf("wm-native:\n");
+ for (i = 0; i < p->nr_insn; i++)
+ brw_disasm(stderr, &p->store[i]);
+ _mesa_printf("\n");
+ }
+}
+
+/**
+ * Do GPU code generation for shaders that use GLSL features such as
+ * flow control. Other shaders will be compiled with the
+ */
+void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
+{
+ if (INTEL_DEBUG & DEBUG_WM) {
+ _mesa_printf("brw_wm_glsl_emit:\n");
+ }
+
+ /* initial instruction translation/simplification */
+ brw_wm_pass_fp(c);
+
+ /* actual code generation */
+ brw_wm_emit_glsl(brw, c);
+
+ if (INTEL_DEBUG & DEBUG_WM) {
+ brw_wm_print_program(c, "brw_wm_glsl_emit done");
+ }
+
+ c->prog_data.total_grf = num_grf_used(c);
+ c->prog_data.total_scratch = 0;
+}
diff --git a/src/gallium/drivers/i965/brw_wm_iz.c b/src/gallium/drivers/i965/brw_wm_iz.c
new file mode 100644
index 00000000000..5e399ac62a8
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_iz.c
@@ -0,0 +1,157 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#include "main/mtypes.h"
+#include "brw_wm.h"
+
+
+#undef P /* prompted depth */
+#undef C /* computed */
+#undef N /* non-promoted? */
+
+#define P 0
+#define C 1
+#define N 2
+
+const struct {
+ GLuint mode:2;
+ GLuint sd_present:1;
+ GLuint sd_to_rt:1;
+ GLuint dd_present:1;
+ GLuint ds_present:1;
+} wm_iz_table[IZ_BIT_MAX] =
+{
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 0 },
+ { N, 0, 1, 0, 0 },
+ { N, 0, 1, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 0 },
+ { N, 0, 1, 0, 0 },
+ { N, 0, 1, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 1 },
+ { N, 0, 1, 0, 1 },
+ { N, 0, 1, 0, 1 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 0, 0, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 0, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 1, 1, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 1, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 1, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 }
+};
+
+/**
+ * \param line_aa AA_NEVER, AA_ALWAYS or AA_SOMETIMES
+ * \param lookup bitmask of IZ_* flags
+ */
+void brw_wm_lookup_iz( GLuint line_aa,
+ GLuint lookup,
+ GLboolean ps_uses_depth,
+ struct brw_wm_prog_key *key )
+{
+ GLuint reg = 2;
+
+ assert (lookup < IZ_BIT_MAX);
+
+ if (lookup & IZ_PS_COMPUTES_DEPTH_BIT)
+ key->computes_depth = 1;
+
+ if (wm_iz_table[lookup].sd_present || ps_uses_depth) {
+ key->source_depth_reg = reg;
+ reg += 2;
+ }
+
+ if (wm_iz_table[lookup].sd_to_rt)
+ key->source_depth_to_render_target = 1;
+
+ if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) {
+ key->aa_dest_stencil_reg = reg;
+ key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present &&
+ line_aa == AA_SOMETIMES);
+ reg++;
+ }
+
+ if (wm_iz_table[lookup].dd_present) {
+ key->dest_depth_reg = reg;
+ reg+=2;
+ }
+
+ key->nr_depth_regs = (reg+1)/2;
+}
+
diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c
new file mode 100644
index 00000000000..62792583396
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_pass0.c
@@ -0,0 +1,442 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#include "brw_context.h"
+#include "brw_wm.h"
+#include "shader/prog_parameter.h"
+
+
+
+/***********************************************************************
+ */
+
+static struct brw_wm_ref *get_ref( struct brw_wm_compile *c )
+{
+ assert(c->nr_refs < BRW_WM_MAX_REF);
+ return &c->refs[c->nr_refs++];
+}
+
+static struct brw_wm_value *get_value( struct brw_wm_compile *c)
+{
+ assert(c->nr_refs < BRW_WM_MAX_VREG);
+ return &c->vreg[c->nr_vreg++];
+}
+
+/** return pointer to a newly allocated instruction */
+static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c )
+{
+ assert(c->nr_insns < BRW_WM_MAX_INSN);
+ return &c->instruction[c->nr_insns++];
+}
+
+/***********************************************************************
+ */
+
+/** Init the "undef" register */
+static void pass0_init_undef( struct brw_wm_compile *c)
+{
+ struct brw_wm_ref *ref = &c->undef_ref;
+ ref->value = &c->undef_value;
+ ref->hw_reg = brw_vec8_grf(0, 0);
+ ref->insn = 0;
+ ref->prevuse = NULL;
+}
+
+/** Set a FP register to a value */
+static void pass0_set_fpreg_value( struct brw_wm_compile *c,
+ GLuint file,
+ GLuint idx,
+ GLuint component,
+ struct brw_wm_value *value )
+{
+ struct brw_wm_ref *ref = get_ref(c);
+ ref->value = value;
+ ref->hw_reg = brw_vec8_grf(0, 0);
+ ref->insn = 0;
+ ref->prevuse = NULL;
+ c->pass0_fp_reg[file][idx][component] = ref;
+}
+
+/** Set a FP register to a ref */
+static void pass0_set_fpreg_ref( struct brw_wm_compile *c,
+ GLuint file,
+ GLuint idx,
+ GLuint component,
+ const struct brw_wm_ref *src_ref )
+{
+ c->pass0_fp_reg[file][idx][component] = src_ref;
+}
+
+static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c,
+ const GLfloat *param_ptr )
+{
+ GLuint i = c->prog_data.nr_params++;
+
+ if (i >= BRW_WM_MAX_PARAM) {
+ _mesa_printf("%s: out of params\n", __FUNCTION__);
+ c->prog_data.error = 1;
+ return NULL;
+ }
+ else {
+ struct brw_wm_ref *ref = get_ref(c);
+
+ c->prog_data.param[i] = param_ptr;
+ c->nr_creg = (i+16)/16;
+
+ /* Push the offsets into hw_reg. These will be added to the
+ * real register numbers once one is allocated in pass2.
+ */
+ ref->hw_reg = brw_vec1_grf((i&8)?1:0, i%8);
+ ref->value = &c->creg[i/16];
+ ref->insn = 0;
+ ref->prevuse = NULL;
+
+ return ref;
+ }
+}
+
+
+/** Return a ref to a constant/literal value */
+static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c,
+ const GLfloat *constval )
+{
+ GLuint i;
+
+ /* Search for an existing const value matching the request:
+ */
+ for (i = 0; i < c->nr_constrefs; i++) {
+ if (c->constref[i].constval == *constval)
+ return c->constref[i].ref;
+ }
+
+ /* Else try to add a new one:
+ */
+ if (c->nr_constrefs < BRW_WM_MAX_CONST) {
+ GLuint i = c->nr_constrefs++;
+
+ /* A constant is a special type of parameter:
+ */
+ c->constref[i].constval = *constval;
+ c->constref[i].ref = get_param_ref(c, constval);
+
+ return c->constref[i].ref;
+ }
+ else {
+ _mesa_printf("%s: out of constrefs\n", __FUNCTION__);
+ c->prog_data.error = 1;
+ return NULL;
+ }
+}
+
+
+/* Lookup our internal registers
+ */
+static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c,
+ GLuint file,
+ GLuint idx,
+ GLuint component )
+{
+ const struct brw_wm_ref *ref = c->pass0_fp_reg[file][idx][component];
+
+ if (!ref) {
+ switch (file) {
+ case PROGRAM_INPUT:
+ case PROGRAM_PAYLOAD:
+ case PROGRAM_TEMPORARY:
+ case PROGRAM_OUTPUT:
+ case PROGRAM_VARYING:
+ break;
+
+ case PROGRAM_LOCAL_PARAM:
+ ref = get_param_ref(c, &c->fp->program.Base.LocalParams[idx][component]);
+ break;
+
+ case PROGRAM_ENV_PARAM:
+ ref = get_param_ref(c, &c->env_param[idx][component]);
+ break;
+
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_UNIFORM:
+ case PROGRAM_CONSTANT:
+ case PROGRAM_NAMED_PARAM: {
+ struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters;
+
+ /* There's something really hokey about parameters parsed in
+ * arb programs - they all end up in here, whether they be
+ * state values, parameters or constants. This duplicates the
+ * structure above & also seems to subvert the limits set for
+ * each type of constant/param.
+ */
+ switch (plist->Parameters[idx].Type) {
+ case PROGRAM_NAMED_PARAM:
+ case PROGRAM_CONSTANT:
+ /* These are invarient:
+ */
+ ref = get_const_ref(c, &plist->ParameterValues[idx][component]);
+ break;
+
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_UNIFORM:
+ /* These may change from run to run:
+ */
+ ref = get_param_ref(c, &plist->ParameterValues[idx][component] );
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+ break;
+ }
+
+ default:
+ assert(0);
+ break;
+ }
+
+ c->pass0_fp_reg[file][idx][component] = ref;
+ }
+
+ if (!ref)
+ ref = &c->undef_ref;
+
+ return ref;
+}
+
+
+
+/***********************************************************************
+ * Straight translation to internal instruction format
+ */
+
+static void pass0_set_dst( struct brw_wm_compile *c,
+ struct brw_wm_instruction *out,
+ const struct prog_instruction *inst,
+ GLuint writemask )
+{
+ const struct prog_dst_register *dst = &inst->DstReg;
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1<<i)) {
+ out->dst[i] = get_value(c);
+ pass0_set_fpreg_value(c, dst->File, dst->Index, i, out->dst[i]);
+ }
+ }
+
+ out->writemask = writemask;
+}
+
+
+static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c,
+ struct prog_src_register src,
+ GLuint i )
+{
+ GLuint component = GET_SWZ(src.Swizzle,i);
+ const struct brw_wm_ref *src_ref;
+ static const GLfloat const_zero = 0.0;
+ static const GLfloat const_one = 1.0;
+
+ if (component == SWIZZLE_ZERO)
+ src_ref = get_const_ref(c, &const_zero);
+ else if (component == SWIZZLE_ONE)
+ src_ref = get_const_ref(c, &const_one);
+ else
+ src_ref = pass0_get_reg(c, src.File, src.Index, component);
+
+ return src_ref;
+}
+
+
+static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c,
+ struct prog_src_register src,
+ GLuint i,
+ struct brw_wm_instruction *insn)
+{
+ const struct brw_wm_ref *ref = get_fp_src_reg_ref(c, src, i);
+ struct brw_wm_ref *newref = get_ref(c);
+
+ newref->value = ref->value;
+ newref->hw_reg = ref->hw_reg;
+
+ if (insn) {
+ newref->insn = insn - c->instruction;
+ newref->prevuse = newref->value->lastuse;
+ newref->value->lastuse = newref;
+ }
+
+ if (src.Negate & (1 << i))
+ newref->hw_reg.negate ^= 1;
+
+ if (src.Abs) {
+ newref->hw_reg.negate = 0;
+ newref->hw_reg.abs = 1;
+ }
+
+ return newref;
+}
+
+
+static void
+translate_insn(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_wm_instruction *out = get_instruction(c);
+ GLuint writemask = inst->DstReg.WriteMask;
+ GLuint nr_args = brw_wm_nr_args(inst->Opcode);
+ GLuint i, j;
+
+ /* Copy some data out of the instruction
+ */
+ out->opcode = inst->Opcode;
+ out->saturate = (inst->SaturateMode != SATURATE_OFF);
+ out->tex_unit = inst->TexSrcUnit;
+ out->tex_idx = inst->TexSrcTarget;
+ out->tex_shadow = inst->TexShadow;
+ out->eot = inst->Aux & 1;
+ out->target = inst->Aux >> 1;
+
+ /* Args:
+ */
+ for (i = 0; i < nr_args; i++) {
+ for (j = 0; j < 4; j++) {
+ out->src[i][j] = get_new_ref(c, inst->SrcReg[i], j, out);
+ }
+ }
+
+ /* Dst:
+ */
+ pass0_set_dst(c, out, inst, writemask);
+}
+
+
+
+/***********************************************************************
+ * Optimize moves and swizzles away:
+ */
+static void pass0_precalc_mov( struct brw_wm_compile *c,
+ const struct prog_instruction *inst )
+{
+ const struct prog_dst_register *dst = &inst->DstReg;
+ GLuint writemask = inst->DstReg.WriteMask;
+ struct brw_wm_ref *refs[4];
+ GLuint i;
+
+ /* Get the effect of a MOV by manipulating our register table:
+ * First get all refs, then assign refs. This ensures that "in-place"
+ * swizzles such as:
+ * MOV t, t.xxyx
+ * are handled correctly. Previously, these two steps were done in
+ * one loop and the above case was incorrectly handled.
+ */
+ for (i = 0; i < 4; i++) {
+ refs[i] = get_new_ref(c, inst->SrcReg[0], i, NULL);
+ }
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1 << i)) {
+ pass0_set_fpreg_ref( c, dst->File, dst->Index, i, refs[i]);
+ }
+ }
+}
+
+
+/* Initialize payload "registers".
+ */
+static void pass0_init_payload( struct brw_wm_compile *c )
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ GLuint j = i >= c->key.nr_depth_regs ? 0 : i;
+ pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i,
+ &c->payload.depth[j] );
+ }
+
+#if 0
+ /* This seems to be an alternative to the INTERP_WPOS stuff I do
+ * elsewhere:
+ */
+ if (c->key.source_depth_reg)
+ pass0_set_fpreg_value(c, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, 2,
+ &c->payload.depth[c->key.source_depth_reg/2]);
+#endif
+
+ for (i = 0; i < FRAG_ATTRIB_MAX; i++)
+ pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, i, 0,
+ &c->payload.input_interp[i] );
+}
+
+
+/***********************************************************************
+ * PASS 0
+ *
+ * Work forwards to give each calculated value a unique number. Where
+ * an instruction produces duplicate values (eg DP3), all are given
+ * the same number.
+ *
+ * Translate away swizzling and eliminate non-saturating moves.
+ */
+void brw_wm_pass0( struct brw_wm_compile *c )
+{
+ GLuint insn;
+
+ c->nr_vreg = 0;
+ c->nr_insns = 0;
+
+ pass0_init_undef(c);
+ pass0_init_payload(c);
+
+ for (insn = 0; insn < c->nr_fp_insns; insn++) {
+ const struct prog_instruction *inst = &c->prog_instructions[insn];
+
+ /* Optimize away moves, otherwise emit translated instruction:
+ */
+ switch (inst->Opcode) {
+ case OPCODE_MOV:
+ case OPCODE_SWZ:
+ if (!inst->SaturateMode) {
+ pass0_precalc_mov(c, inst);
+ }
+ else {
+ translate_insn(c, inst);
+ }
+ break;
+ default:
+ translate_insn(c, inst);
+ break;
+ }
+ }
+
+ if (INTEL_DEBUG & DEBUG_WM) {
+ brw_wm_print_program(c, "pass0");
+ }
+}
diff --git a/src/gallium/drivers/i965/brw_wm_pass1.c b/src/gallium/drivers/i965/brw_wm_pass1.c
new file mode 100644
index 00000000000..b4493940292
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_pass1.c
@@ -0,0 +1,291 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#include "brw_context.h"
+#include "brw_wm.h"
+
+
+static GLuint get_tracked_mask(struct brw_wm_compile *c,
+ struct brw_wm_instruction *inst)
+{
+ GLuint i;
+ for (i = 0; i < 4; i++) {
+ if (inst->writemask & (1<<i)) {
+ if (!inst->dst[i]->contributes_to_output) {
+ inst->writemask &= ~(1<<i);
+ inst->dst[i] = 0;
+ }
+ }
+ }
+
+ return inst->writemask;
+}
+
+/* Remove a reference from a value's usage chain.
+ */
+static void unlink_ref(struct brw_wm_ref *ref)
+{
+ struct brw_wm_value *value = ref->value;
+
+ if (ref == value->lastuse) {
+ value->lastuse = ref->prevuse;
+ }
+ else {
+ struct brw_wm_ref *i = value->lastuse;
+ while (i->prevuse != ref) i = i->prevuse;
+ i->prevuse = ref->prevuse;
+ }
+}
+
+static void track_arg(struct brw_wm_compile *c,
+ struct brw_wm_instruction *inst,
+ GLuint arg,
+ GLuint readmask)
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ struct brw_wm_ref *ref = inst->src[arg][i];
+ if (ref) {
+ if (readmask & (1<<i)) {
+ ref->value->contributes_to_output = 1;
+ }
+ else {
+ unlink_ref(ref);
+ inst->src[arg][i] = NULL;
+ }
+ }
+ }
+}
+
+static GLuint get_texcoord_mask( GLuint tex_idx )
+{
+ switch (tex_idx) {
+ case TEXTURE_1D_INDEX:
+ return WRITEMASK_X;
+ case TEXTURE_2D_INDEX:
+ return WRITEMASK_XY;
+ case TEXTURE_3D_INDEX:
+ return WRITEMASK_XYZ;
+ case TEXTURE_CUBE_INDEX:
+ return WRITEMASK_XYZ;
+ case TEXTURE_RECT_INDEX:
+ return WRITEMASK_XY;
+ default: return 0;
+ }
+}
+
+
+/* Step two: Basically this is dead code elimination.
+ *
+ * Iterate backwards over instructions, noting which values
+ * contribute to the final result. Adjust writemasks to only
+ * calculate these values.
+ */
+void brw_wm_pass1( struct brw_wm_compile *c )
+{
+ GLint insn;
+
+ for (insn = c->nr_insns-1; insn >= 0; insn--) {
+ struct brw_wm_instruction *inst = &c->instruction[insn];
+ GLuint writemask;
+ GLuint read0, read1, read2;
+
+ if (inst->opcode == OPCODE_KIL) {
+ track_arg(c, inst, 0, WRITEMASK_XYZW); /* All args contribute to final */
+ continue;
+ }
+
+ if (inst->opcode == WM_FB_WRITE) {
+ track_arg(c, inst, 0, WRITEMASK_XYZW);
+ track_arg(c, inst, 1, WRITEMASK_XYZW);
+ if (c->key.source_depth_to_render_target &&
+ c->key.computes_depth)
+ track_arg(c, inst, 2, WRITEMASK_Z);
+ else
+ track_arg(c, inst, 2, 0);
+ continue;
+ }
+
+ /* Lookup all the registers which were written by this
+ * instruction and get a mask of those that contribute to the output:
+ */
+ writemask = get_tracked_mask(c, inst);
+ if (!writemask) {
+ GLuint arg;
+ for (arg = 0; arg < 3; arg++)
+ track_arg(c, inst, arg, 0);
+ continue;
+ }
+
+ read0 = 0;
+ read1 = 0;
+ read2 = 0;
+
+ /* Mark all inputs which contribute to the marked outputs:
+ */
+ switch (inst->opcode) {
+ case OPCODE_ABS:
+ case OPCODE_FLR:
+ case OPCODE_FRC:
+ case OPCODE_MOV:
+ case OPCODE_SWZ:
+ case OPCODE_TRUNC:
+ read0 = writemask;
+ break;
+
+ case OPCODE_SUB:
+ case OPCODE_SLT:
+ case OPCODE_SLE:
+ case OPCODE_SGE:
+ case OPCODE_SGT:
+ case OPCODE_SEQ:
+ case OPCODE_SNE:
+ case OPCODE_ADD:
+ case OPCODE_MAX:
+ case OPCODE_MIN:
+ case OPCODE_MUL:
+ read0 = writemask;
+ read1 = writemask;
+ break;
+
+ case OPCODE_DDX:
+ case OPCODE_DDY:
+ read0 = writemask;
+ break;
+
+ case OPCODE_MAD:
+ case OPCODE_CMP:
+ case OPCODE_LRP:
+ read0 = writemask;
+ read1 = writemask;
+ read2 = writemask;
+ break;
+
+ case OPCODE_XPD:
+ if (writemask & WRITEMASK_X) read0 |= WRITEMASK_YZ;
+ if (writemask & WRITEMASK_Y) read0 |= WRITEMASK_XZ;
+ if (writemask & WRITEMASK_Z) read0 |= WRITEMASK_XY;
+ read1 = read0;
+ break;
+
+ case OPCODE_COS:
+ case OPCODE_EX2:
+ case OPCODE_LG2:
+ case OPCODE_RCP:
+ case OPCODE_RSQ:
+ case OPCODE_SIN:
+ case OPCODE_SCS:
+ case WM_CINTERP:
+ case WM_PIXELXY:
+ read0 = WRITEMASK_X;
+ break;
+
+ case OPCODE_POW:
+ read0 = WRITEMASK_X;
+ read1 = WRITEMASK_X;
+ break;
+
+ case OPCODE_TEX:
+ case OPCODE_TXP:
+ read0 = get_texcoord_mask(inst->tex_idx);
+
+ if (inst->tex_shadow)
+ read0 |= WRITEMASK_Z;
+ break;
+
+ case OPCODE_TXB:
+ /* Shadow ignored for txb.
+ */
+ read0 = get_texcoord_mask(inst->tex_idx) | WRITEMASK_W;
+ break;
+
+ case WM_WPOSXY:
+ read0 = writemask & WRITEMASK_XY;
+ break;
+
+ case WM_DELTAXY:
+ read0 = writemask & WRITEMASK_XY;
+ read1 = WRITEMASK_X;
+ break;
+
+ case WM_PIXELW:
+ read0 = WRITEMASK_X;
+ read1 = WRITEMASK_XY;
+ break;
+
+ case WM_LINTERP:
+ read0 = WRITEMASK_X;
+ read1 = WRITEMASK_XY;
+ break;
+
+ case WM_PINTERP:
+ read0 = WRITEMASK_X; /* interpolant */
+ read1 = WRITEMASK_XY; /* deltas */
+ read2 = WRITEMASK_W; /* pixel w */
+ break;
+
+ case OPCODE_DP3:
+ read0 = WRITEMASK_XYZ;
+ read1 = WRITEMASK_XYZ;
+ break;
+
+ case OPCODE_DPH:
+ read0 = WRITEMASK_XYZ;
+ read1 = WRITEMASK_XYZW;
+ break;
+
+ case OPCODE_DP4:
+ read0 = WRITEMASK_XYZW;
+ read1 = WRITEMASK_XYZW;
+ break;
+
+ case OPCODE_LIT:
+ read0 = WRITEMASK_XYW;
+ break;
+
+ case OPCODE_DST:
+ case WM_FRONTFACING:
+ case OPCODE_KIL_NV:
+ default:
+ break;
+ }
+
+ track_arg(c, inst, 0, read0);
+ track_arg(c, inst, 1, read1);
+ track_arg(c, inst, 2, read2);
+ }
+
+ if (INTEL_DEBUG & DEBUG_WM) {
+ brw_wm_print_program(c, "pass1");
+ }
+}
diff --git a/src/gallium/drivers/i965/brw_wm_pass2.c b/src/gallium/drivers/i965/brw_wm_pass2.c
new file mode 100644
index 00000000000..6faea018fbc
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_pass2.c
@@ -0,0 +1,343 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#include "brw_context.h"
+#include "brw_wm.h"
+
+
+/* Use these to force spilling so that that functionality can be
+ * tested with known-good examples rather than having to construct new
+ * tests.
+ */
+#define TEST_PAYLOAD_SPILLS 0
+#define TEST_DST_SPILLS 0
+
+static void spill_value(struct brw_wm_compile *c,
+ struct brw_wm_value *value);
+
+static void prealloc_reg(struct brw_wm_compile *c,
+ struct brw_wm_value *value,
+ GLuint reg)
+{
+ if (value->lastuse) {
+ /* Set nextuse to zero, it will be corrected by
+ * update_register_usage().
+ */
+ c->pass2_grf[reg].value = value;
+ c->pass2_grf[reg].nextuse = 0;
+
+ value->resident = &c->pass2_grf[reg];
+ value->hw_reg = brw_vec8_grf(reg*2, 0);
+
+ if (TEST_PAYLOAD_SPILLS)
+ spill_value(c, value);
+ }
+}
+
+
+/* Initialize all the register values. Do the initial setup
+ * calculations for interpolants.
+ */
+static void init_registers( struct brw_wm_compile *c )
+{
+ GLuint nr_interp_regs = 0;
+ GLuint i = 0;
+ GLuint j;
+
+ for (j = 0; j < c->grf_limit; j++)
+ c->pass2_grf[j].nextuse = BRW_WM_MAX_INSN;
+
+ for (j = 0; j < c->key.nr_depth_regs; j++)
+ prealloc_reg(c, &c->payload.depth[j], i++);
+
+ for (j = 0; j < c->nr_creg; j++)
+ prealloc_reg(c, &c->creg[j], i++);
+
+ for (j = 0; j < FRAG_ATTRIB_MAX; j++) {
+ if (c->key.vp_outputs_written & (1<<j)) {
+ int fp_index;
+
+ if (j >= VERT_RESULT_VAR0)
+ fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0);
+ else if (j <= VERT_RESULT_TEX7)
+ fp_index = j;
+ else
+ fp_index = -1;
+
+ nr_interp_regs++;
+ if (fp_index >= 0)
+ prealloc_reg(c, &c->payload.input_interp[fp_index], i++);
+ }
+ }
+
+ assert(nr_interp_regs >= 1);
+
+ c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
+ c->prog_data.urb_read_length = nr_interp_regs * 2;
+ c->prog_data.curb_read_length = c->nr_creg * 2;
+
+ c->max_wm_grf = i * 2;
+}
+
+
+/* Update the nextuse value for each register in our file.
+ */
+static void update_register_usage(struct brw_wm_compile *c,
+ GLuint thisinsn)
+{
+ GLuint i;
+
+ for (i = 1; i < c->grf_limit; i++) {
+ struct brw_wm_grf *grf = &c->pass2_grf[i];
+
+ /* Only search those which can change:
+ */
+ if (grf->nextuse < thisinsn) {
+ const struct brw_wm_ref *ref = grf->value->lastuse;
+
+ /* Has last use of value been passed?
+ */
+ if (ref->insn < thisinsn) {
+ grf->value->resident = 0;
+ grf->value = 0;
+ grf->nextuse = BRW_WM_MAX_INSN;
+ }
+ else {
+ /* Else loop through chain to update:
+ */
+ while (ref->prevuse && ref->prevuse->insn >= thisinsn)
+ ref = ref->prevuse;
+
+ grf->nextuse = ref->insn;
+ }
+ }
+ }
+}
+
+
+static void spill_value(struct brw_wm_compile *c,
+ struct brw_wm_value *value)
+{
+ /* Allocate a spill slot. Note that allocations start from 0x40 -
+ * the first slot is reserved to mean "undef" in brw_wm_emit.c
+ */
+ if (!value->spill_slot) {
+ c->last_scratch += 0x40;
+ value->spill_slot = c->last_scratch;
+ }
+
+ /* The spill will be done in brw_wm_emit.c immediately after the
+ * value is calculated, so we can just take this reg without any
+ * further work.
+ */
+ value->resident->value = NULL;
+ value->resident->nextuse = BRW_WM_MAX_INSN;
+ value->resident = NULL;
+}
+
+
+
+/* Search for contiguous region with the most distant nearest
+ * member. Free regs count as very distant.
+ *
+ * TODO: implement spill-to-reg so that we can rearrange discontigous
+ * free regs and then spill the oldest non-free regs in sequence.
+ * This would mean inserting instructions in this pass.
+ */
+static GLuint search_contiguous_regs(struct brw_wm_compile *c,
+ GLuint nr,
+ GLuint thisinsn)
+{
+ struct brw_wm_grf *grf = c->pass2_grf;
+ GLuint furthest = 0;
+ GLuint reg = 0;
+ GLuint i, j;
+
+ /* Start search at 1: r0 is special and can't be used or spilled.
+ */
+ for (i = 1; i < c->grf_limit && furthest < BRW_WM_MAX_INSN; i++) {
+ GLuint group_nextuse = BRW_WM_MAX_INSN;
+
+ for (j = 0; j < nr; j++) {
+ if (grf[i+j].nextuse < group_nextuse)
+ group_nextuse = grf[i+j].nextuse;
+ }
+
+ if (group_nextuse > furthest) {
+ furthest = group_nextuse;
+ reg = i;
+ }
+ }
+
+ assert(furthest != thisinsn);
+
+ /* Any non-empty regs will need to be spilled:
+ */
+ for (j = 0; j < nr; j++)
+ if (grf[reg+j].value)
+ spill_value(c, grf[reg+j].value);
+
+ return reg;
+}
+
+
+static void alloc_contiguous_dest(struct brw_wm_compile *c,
+ struct brw_wm_value *dst[],
+ GLuint nr,
+ GLuint thisinsn)
+{
+ GLuint reg = search_contiguous_regs(c, nr, thisinsn);
+ GLuint i;
+
+ for (i = 0; i < nr; i++) {
+ if (!dst[i]) {
+ /* Need to grab a dummy value in TEX case. Don't introduce
+ * it into the tracking scheme.
+ */
+ dst[i] = &c->vreg[c->nr_vreg++];
+ }
+ else {
+ assert(!dst[i]->resident);
+ assert(c->pass2_grf[reg+i].nextuse != thisinsn);
+
+ c->pass2_grf[reg+i].value = dst[i];
+ c->pass2_grf[reg+i].nextuse = thisinsn;
+
+ dst[i]->resident = &c->pass2_grf[reg+i];
+ }
+
+ dst[i]->hw_reg = brw_vec8_grf((reg+i)*2, 0);
+ }
+
+ if ((reg+nr)*2 > c->max_wm_grf)
+ c->max_wm_grf = (reg+nr) * 2;
+}
+
+
+static void load_args(struct brw_wm_compile *c,
+ struct brw_wm_instruction *inst)
+{
+ GLuint thisinsn = inst - c->instruction;
+ GLuint i,j;
+
+ for (i = 0; i < 3; i++) {
+ for (j = 0; j < 4; j++) {
+ struct brw_wm_ref *ref = inst->src[i][j];
+
+ if (ref) {
+ if (!ref->value->resident) {
+ /* Need to bring the value in from scratch space. The code for
+ * this will be done in brw_wm_emit.c, here we just do the
+ * register allocation and mark the ref as requiring a fill.
+ */
+ GLuint reg = search_contiguous_regs(c, 1, thisinsn);
+
+ c->pass2_grf[reg].value = ref->value;
+ c->pass2_grf[reg].nextuse = thisinsn;
+
+ ref->value->resident = &c->pass2_grf[reg];
+
+ /* Note that a fill is required:
+ */
+ ref->unspill_reg = reg*2;
+ }
+
+ /* Adjust the hw_reg to point at the value's current location:
+ */
+ assert(ref->value == ref->value->resident->value);
+ ref->hw_reg.nr += (ref->value->resident - c->pass2_grf) * 2;
+ }
+ }
+ }
+}
+
+
+
+/* Step 3: Work forwards once again. Perform register allocations,
+ * taking into account instructions like TEX which require contiguous
+ * result registers. Where necessary spill registers to scratch space
+ * and reload later.
+ */
+void brw_wm_pass2( struct brw_wm_compile *c )
+{
+ GLuint insn;
+ GLuint i;
+
+ init_registers(c);
+
+ for (insn = 0; insn < c->nr_insns; insn++) {
+ struct brw_wm_instruction *inst = &c->instruction[insn];
+
+ /* Update registers' nextuse values:
+ */
+ update_register_usage(c, insn);
+
+ /* May need to unspill some args.
+ */
+ load_args(c, inst);
+
+ /* Allocate registers to hold results:
+ */
+ switch (inst->opcode) {
+ case OPCODE_TEX:
+ case OPCODE_TXB:
+ case OPCODE_TXP:
+ alloc_contiguous_dest(c, inst->dst, 4, insn);
+ break;
+
+ default:
+ for (i = 0; i < 4; i++) {
+ if (inst->writemask & (1<<i)) {
+ assert(inst->dst[i]);
+ alloc_contiguous_dest(c, &inst->dst[i], 1, insn);
+ }
+ }
+ break;
+ }
+
+ if (TEST_DST_SPILLS && inst->opcode != WM_PIXELXY) {
+ for (i = 0; i < 4; i++)
+ if (inst->dst[i])
+ spill_value(c, inst->dst[i]);
+ }
+ }
+
+ if (INTEL_DEBUG & DEBUG_WM) {
+ brw_wm_print_program(c, "pass2");
+ }
+
+ c->state = PASS2_DONE;
+
+ if (INTEL_DEBUG & DEBUG_WM) {
+ brw_wm_print_program(c, "pass2/done");
+ }
+}
diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c
new file mode 100644
index 00000000000..dff466587a8
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c
@@ -0,0 +1,369 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+#include "main/macros.h"
+
+
+
+/* Samplers aren't strictly wm state from the hardware's perspective,
+ * but that is the only situation in which we use them in this driver.
+ */
+
+
+
+/* The brw (and related graphics cores) do not support GL_CLAMP. The
+ * Intel drivers for "other operating systems" implement GL_CLAMP as
+ * GL_CLAMP_TO_EDGE, so the same is done here.
+ */
+static GLuint translate_wrap_mode( GLenum wrap )
+{
+ switch( wrap ) {
+ case GL_REPEAT:
+ return BRW_TEXCOORDMODE_WRAP;
+ case GL_CLAMP:
+ return BRW_TEXCOORDMODE_CLAMP;
+ case GL_CLAMP_TO_EDGE:
+ return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */
+ case GL_CLAMP_TO_BORDER:
+ return BRW_TEXCOORDMODE_CLAMP_BORDER;
+ case GL_MIRRORED_REPEAT:
+ return BRW_TEXCOORDMODE_MIRROR;
+ default:
+ return BRW_TEXCOORDMODE_WRAP;
+ }
+}
+
+
+static GLuint U_FIXED(GLfloat value, GLuint frac_bits)
+{
+ value *= (1<<frac_bits);
+ return value < 0 ? 0 : value;
+}
+
+static GLint S_FIXED(GLfloat value, GLuint frac_bits)
+{
+ return value * (1<<frac_bits);
+}
+
+
+static dri_bo *upload_default_color( struct brw_context *brw,
+ const GLfloat *color )
+{
+ struct brw_sampler_default_color sdc;
+
+ COPY_4V(sdc.color, color);
+
+ return brw_cache_data( &brw->cache, BRW_SAMPLER_DEFAULT_COLOR, &sdc,
+ NULL, 0 );
+}
+
+
+struct wm_sampler_key {
+ int sampler_count;
+
+ struct wm_sampler_entry {
+ GLenum tex_target;
+ GLenum wrap_r, wrap_s, wrap_t;
+ float maxlod, minlod;
+ float lod_bias;
+ float max_aniso;
+ GLenum minfilter, magfilter;
+ GLenum comparemode, comparefunc;
+ dri_bo *sdc_bo;
+
+ /** If target is cubemap, take context setting.
+ */
+ GLboolean seamless_cube_map;
+ } sampler[BRW_MAX_TEX_UNIT];
+};
+
+/**
+ * Sets the sampler state for a single unit based off of the sampler key
+ * entry.
+ */
+static void brw_update_sampler_state(struct wm_sampler_entry *key,
+ dri_bo *sdc_bo,
+ struct brw_sampler_state *sampler)
+{
+ _mesa_memset(sampler, 0, sizeof(*sampler));
+
+ switch (key->minfilter) {
+ case GL_NEAREST:
+ sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+ sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+ break;
+ case GL_LINEAR:
+ sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+ sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+ break;
+ case GL_NEAREST_MIPMAP_NEAREST:
+ sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+ sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
+ break;
+ case GL_LINEAR_MIPMAP_NEAREST:
+ sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+ sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
+ break;
+ case GL_NEAREST_MIPMAP_LINEAR:
+ sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+ sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
+ break;
+ case GL_LINEAR_MIPMAP_LINEAR:
+ sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+ sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
+ break;
+ default:
+ break;
+ }
+
+ /* Set Anisotropy:
+ */
+ if (key->max_aniso > 1.0) {
+ sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC;
+ sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC;
+
+ if (key->max_aniso > 2.0) {
+ sampler->ss3.max_aniso = MIN2((key->max_aniso - 2) / 2,
+ BRW_ANISORATIO_16);
+ }
+ }
+ else {
+ switch (key->magfilter) {
+ case GL_NEAREST:
+ sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
+ break;
+ case GL_LINEAR:
+ sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
+ break;
+ default:
+ break;
+ }
+ }
+
+ sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r);
+ sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s);
+ sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t);
+
+ /* Cube-maps on 965 and later must use the same wrap mode for all 3
+ * coordinate dimensions. Futher, only CUBE and CLAMP are valid.
+ */
+ if (key->tex_target == GL_TEXTURE_CUBE_MAP) {
+ if (key->seamless_cube_map &&
+ (key->minfilter != GL_NEAREST || key->magfilter != GL_NEAREST)) {
+ sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+ sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+ sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+ } else {
+ sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ }
+ } else if (key->tex_target == GL_TEXTURE_1D) {
+ /* There's a bug in 1D texture sampling - it actually pays
+ * attention to the wrap_t value, though it should not.
+ * Override the wrap_t value here to GL_REPEAT to keep
+ * any nonexistent border pixels from floating in.
+ */
+ sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
+ }
+
+
+ /* Set shadow function:
+ */
+ if (key->comparemode == GL_COMPARE_R_TO_TEXTURE_ARB) {
+ /* Shadowing is "enabled" by emitting a particular sampler
+ * message (sample_c). So need to recompile WM program when
+ * shadow comparison is enabled on each/any texture unit.
+ */
+ sampler->ss0.shadow_function =
+ intel_translate_shadow_compare_func(key->comparefunc);
+ }
+
+ /* Set LOD bias:
+ */
+ sampler->ss0.lod_bias = S_FIXED(CLAMP(key->lod_bias, -16, 15), 6);
+
+ sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
+ sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
+
+ /* Set BaseMipLevel, MaxLOD, MinLOD:
+ *
+ * XXX: I don't think that using firstLevel, lastLevel works,
+ * because we always setup the surface state as if firstLevel ==
+ * level zero. Probably have to subtract firstLevel from each of
+ * these:
+ */
+ sampler->ss0.base_level = U_FIXED(0, 1);
+
+ sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(key->maxlod, 0), 13), 6);
+ sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(key->minlod, 0), 13), 6);
+
+ sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */
+}
+
+
+/** Sets up the cache key for sampler state for all texture units */
+static void
+brw_wm_sampler_populate_key(struct brw_context *brw,
+ struct wm_sampler_key *key)
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ int unit;
+
+ memset(key, 0, sizeof(*key));
+
+ for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) {
+ if (ctx->Texture.Unit[unit]._ReallyEnabled) {
+ struct wm_sampler_entry *entry = &key->sampler[unit];
+ struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+ struct gl_texture_object *texObj = texUnit->_Current;
+ struct intel_texture_object *intelObj = intel_texture_object(texObj);
+ struct gl_texture_image *firstImage =
+ texObj->Image[0][intelObj->firstLevel];
+
+ entry->tex_target = texObj->Target;
+
+ entry->seamless_cube_map = (texObj->Target == GL_TEXTURE_CUBE_MAP)
+ ? ctx->Texture.CubeMapSeamless : GL_FALSE;
+
+ entry->wrap_r = texObj->WrapR;
+ entry->wrap_s = texObj->WrapS;
+ entry->wrap_t = texObj->WrapT;
+
+ entry->maxlod = texObj->MaxLod;
+ entry->minlod = texObj->MinLod;
+ entry->lod_bias = texUnit->LodBias + texObj->LodBias;
+ entry->max_aniso = texObj->MaxAnisotropy;
+ entry->minfilter = texObj->MinFilter;
+ entry->magfilter = texObj->MagFilter;
+ entry->comparemode = texObj->CompareMode;
+ entry->comparefunc = texObj->CompareFunc;
+
+ dri_bo_unreference(brw->wm.sdc_bo[unit]);
+ if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
+ float bordercolor[4] = {
+ texObj->BorderColor[0],
+ texObj->BorderColor[0],
+ texObj->BorderColor[0],
+ texObj->BorderColor[0]
+ };
+ /* GL specs that border color for depth textures is taken from the
+ * R channel, while the hardware uses A. Spam R into all the
+ * channels for safety.
+ */
+ brw->wm.sdc_bo[unit] = upload_default_color(brw, bordercolor);
+ } else {
+ brw->wm.sdc_bo[unit] = upload_default_color(brw,
+ texObj->BorderColor);
+ }
+ key->sampler_count = unit + 1;
+ }
+ }
+}
+
+/* All samplers must be uploaded in a single contiguous array, which
+ * complicates various things. However, this is still too confusing -
+ * FIXME: simplify all the different new texture state flags.
+ */
+static void upload_wm_samplers( struct brw_context *brw )
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ struct wm_sampler_key key;
+ int i;
+
+ brw_wm_sampler_populate_key(brw, &key);
+
+ if (brw->wm.sampler_count != key.sampler_count) {
+ brw->wm.sampler_count = key.sampler_count;
+ brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
+ }
+
+ dri_bo_unreference(brw->wm.sampler_bo);
+ brw->wm.sampler_bo = NULL;
+ if (brw->wm.sampler_count == 0)
+ return;
+
+ brw->wm.sampler_bo = brw_search_cache(&brw->cache, BRW_SAMPLER,
+ &key, sizeof(key),
+ brw->wm.sdc_bo, key.sampler_count,
+ NULL);
+
+ /* If we didnt find it in the cache, compute the state and put it in the
+ * cache.
+ */
+ if (brw->wm.sampler_bo == NULL) {
+ struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT];
+
+ memset(sampler, 0, sizeof(sampler));
+ for (i = 0; i < key.sampler_count; i++) {
+ if (brw->wm.sdc_bo[i] == NULL)
+ continue;
+
+ brw_update_sampler_state(&key.sampler[i], brw->wm.sdc_bo[i],
+ &sampler[i]);
+ }
+
+ brw->wm.sampler_bo = brw_upload_cache(&brw->cache, BRW_SAMPLER,
+ &key, sizeof(key),
+ brw->wm.sdc_bo, key.sampler_count,
+ &sampler, sizeof(sampler),
+ NULL, NULL);
+
+ /* Emit SDC relocations */
+ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+ if (!ctx->Texture.Unit[i]._ReallyEnabled)
+ continue;
+
+ dri_bo_emit_reloc(brw->wm.sampler_bo,
+ I915_GEM_DOMAIN_SAMPLER, 0,
+ 0,
+ i * sizeof(struct brw_sampler_state) +
+ offsetof(struct brw_sampler_state, ss2),
+ brw->wm.sdc_bo[i]);
+ }
+ }
+}
+
+const struct brw_tracked_state brw_wm_samplers = {
+ .dirty = {
+ .mesa = _NEW_TEXTURE,
+ .brw = 0,
+ .cache = 0
+ },
+ .prepare = upload_wm_samplers,
+};
+
+
diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c
new file mode 100644
index 00000000000..361f91292be
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_state.c
@@ -0,0 +1,317 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_wm.h"
+
+/***********************************************************************
+ * WM unit - fragment programs and rasterization
+ */
+
+struct brw_wm_unit_key {
+ unsigned int total_grf, total_scratch;
+ unsigned int urb_entry_read_length;
+ unsigned int curb_entry_read_length;
+ unsigned int dispatch_grf_start_reg;
+
+ unsigned int curbe_offset;
+ unsigned int urb_size;
+
+ unsigned int max_threads;
+
+ unsigned int nr_surfaces, sampler_count;
+ GLboolean uses_depth, computes_depth, uses_kill, is_glsl;
+ GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable;
+ GLfloat offset_units, offset_factor;
+};
+
+static void
+wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ const struct gl_fragment_program *fp = brw->fragment_program;
+ const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp;
+ struct intel_context *intel = &brw->intel;
+
+ memset(key, 0, sizeof(*key));
+
+ if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
+ key->max_threads = 1;
+ else {
+ /* WM maximum threads is number of EUs times number of threads per EU. */
+ if (BRW_IS_IGDNG(brw))
+ key->max_threads = 12 * 6;
+ else if (BRW_IS_G4X(brw))
+ key->max_threads = 10 * 5;
+ else
+ key->max_threads = 8 * 4;
+ }
+
+ /* CACHE_NEW_WM_PROG */
+ key->total_grf = brw->wm.prog_data->total_grf;
+ key->urb_entry_read_length = brw->wm.prog_data->urb_read_length;
+ key->curb_entry_read_length = brw->wm.prog_data->curb_read_length;
+ key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
+ key->total_scratch = ALIGN(brw->wm.prog_data->total_scratch, 1024);
+
+ /* BRW_NEW_URB_FENCE */
+ key->urb_size = brw->urb.vsize;
+
+ /* BRW_NEW_CURBE_OFFSETS */
+ key->curbe_offset = brw->curbe.wm_start;
+
+ /* BRW_NEW_NR_SURFACEs */
+ key->nr_surfaces = brw->wm.nr_surfaces;
+
+ /* CACHE_NEW_SAMPLER */
+ key->sampler_count = brw->wm.sampler_count;
+
+ /* _NEW_POLYGONSTIPPLE */
+ key->polygon_stipple = ctx->Polygon.StippleFlag;
+
+ /* BRW_NEW_FRAGMENT_PROGRAM */
+ key->uses_depth = (fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
+
+ /* as far as we can tell */
+ key->computes_depth =
+ (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) != 0;
+ /* BRW_NEW_DEPTH_BUFFER
+ * Override for NULL depthbuffer case, required by the Pixel Shader Computed
+ * Depth field.
+ */
+ if (brw->state.depth_region == NULL)
+ key->computes_depth = 0;
+
+ /* _NEW_COLOR */
+ key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled;
+ key->is_glsl = bfp->isGLSL;
+
+ /* temporary sanity check assertion */
+ ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp));
+
+ /* _NEW_DEPTH */
+ key->stats_wm = intel->stats_wm;
+
+ /* _NEW_LINE */
+ key->line_stipple = ctx->Line.StippleFlag;
+
+ /* _NEW_POLYGON */
+ key->offset_enable = ctx->Polygon.OffsetFill;
+ key->offset_units = ctx->Polygon.OffsetUnits;
+ key->offset_factor = ctx->Polygon.OffsetFactor;
+}
+
+/**
+ * Setup wm hardware state. See page 225 of Volume 2
+ */
+static dri_bo *
+wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
+ dri_bo **reloc_bufs)
+{
+ struct brw_wm_unit_state wm;
+ dri_bo *bo;
+
+ memset(&wm, 0, sizeof(wm));
+
+ wm.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+ wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */
+ wm.thread1.depth_coef_urb_read_offset = 1;
+ wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+
+ if (BRW_IS_IGDNG(brw))
+ wm.thread1.binding_table_entry_count = 0; /* hardware requirement */
+ else
+ wm.thread1.binding_table_entry_count = key->nr_surfaces;
+
+ if (key->total_scratch != 0) {
+ wm.thread2.scratch_space_base_pointer =
+ brw->wm.scratch_bo->offset >> 10; /* reloc */
+ wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1;
+ } else {
+ wm.thread2.scratch_space_base_pointer = 0;
+ wm.thread2.per_thread_scratch_space = 0;
+ }
+
+ wm.thread3.dispatch_grf_start_reg = key->dispatch_grf_start_reg;
+ wm.thread3.urb_entry_read_length = key->urb_entry_read_length;
+ wm.thread3.urb_entry_read_offset = 0;
+ wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
+ wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
+
+ if (BRW_IS_IGDNG(brw))
+ wm.wm4.sampler_count = 0; /* hardware requirement */
+ else
+ wm.wm4.sampler_count = (key->sampler_count + 1) / 4;
+
+ if (brw->wm.sampler_bo != NULL) {
+ /* reloc */
+ wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5;
+ } else {
+ wm.wm4.sampler_state_pointer = 0;
+ }
+
+ wm.wm5.program_uses_depth = key->uses_depth;
+ wm.wm5.program_computes_depth = key->computes_depth;
+ wm.wm5.program_uses_killpixel = key->uses_kill;
+
+ if (key->is_glsl)
+ wm.wm5.enable_8_pix = 1;
+ else
+ wm.wm5.enable_16_pix = 1;
+
+ wm.wm5.max_threads = key->max_threads - 1;
+ wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */
+ wm.wm5.legacy_line_rast = 0;
+ wm.wm5.legacy_global_depth_bias = 0;
+ wm.wm5.early_depth_test = 1; /* never need to disable */
+ wm.wm5.line_aa_region_width = 0;
+ wm.wm5.line_endcap_aa_region_width = 1;
+
+ wm.wm5.polygon_stipple = key->polygon_stipple;
+
+ if (key->offset_enable) {
+ wm.wm5.depth_offset = 1;
+ /* Something wierd going on with legacy_global_depth_bias,
+ * offset_constant, scaling and MRD. This value passes glean
+ * but gives some odd results elsewere (eg. the
+ * quad-offset-units test).
+ */
+ wm.global_depth_offset_constant = key->offset_units * 2;
+
+ /* This is the only value that passes glean:
+ */
+ wm.global_depth_offset_scale = key->offset_factor;
+ }
+
+ wm.wm5.line_stipple = key->line_stipple;
+
+ if (INTEL_DEBUG & DEBUG_STATS || key->stats_wm)
+ wm.wm4.stats_enable = 1;
+
+ bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT,
+ key, sizeof(*key),
+ reloc_bufs, 3,
+ &wm, sizeof(wm),
+ NULL, NULL);
+
+ /* Emit WM program relocation */
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ wm.thread0.grf_reg_count << 1,
+ offsetof(struct brw_wm_unit_state, thread0),
+ brw->wm.prog_bo);
+
+ /* Emit scratch space relocation */
+ if (key->total_scratch != 0) {
+ dri_bo_emit_reloc(bo,
+ 0, 0,
+ wm.thread2.per_thread_scratch_space,
+ offsetof(struct brw_wm_unit_state, thread2),
+ brw->wm.scratch_bo);
+ }
+
+ /* Emit sampler state relocation */
+ if (key->sampler_count != 0) {
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ wm.wm4.stats_enable | (wm.wm4.sampler_count << 2),
+ offsetof(struct brw_wm_unit_state, wm4),
+ brw->wm.sampler_bo);
+ }
+
+ return bo;
+}
+
+
+static void upload_wm_unit( struct brw_context *brw )
+{
+ struct intel_context *intel = &brw->intel;
+ struct brw_wm_unit_key key;
+ dri_bo *reloc_bufs[3];
+ wm_unit_populate_key(brw, &key);
+
+ /* Allocate the necessary scratch space if we haven't already. Don't
+ * bother reducing the allocation later, since we use scratch so
+ * rarely.
+ */
+ assert(key.total_scratch <= 12 * 1024);
+ if (key.total_scratch) {
+ GLuint total = key.total_scratch * key.max_threads;
+
+ if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) {
+ dri_bo_unreference(brw->wm.scratch_bo);
+ brw->wm.scratch_bo = NULL;
+ }
+ if (brw->wm.scratch_bo == NULL) {
+ brw->wm.scratch_bo = dri_bo_alloc(intel->bufmgr,
+ "wm scratch",
+ total,
+ 4096);
+ }
+ }
+
+ reloc_bufs[0] = brw->wm.prog_bo;
+ reloc_bufs[1] = brw->wm.scratch_bo;
+ reloc_bufs[2] = brw->wm.sampler_bo;
+
+ dri_bo_unreference(brw->wm.state_bo);
+ brw->wm.state_bo = brw_search_cache(&brw->cache, BRW_WM_UNIT,
+ &key, sizeof(key),
+ reloc_bufs, 3,
+ NULL);
+ if (brw->wm.state_bo == NULL) {
+ brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs);
+ }
+}
+
+const struct brw_tracked_state brw_wm_unit = {
+ .dirty = {
+ .mesa = (_NEW_POLYGON |
+ _NEW_POLYGONSTIPPLE |
+ _NEW_LINE |
+ _NEW_COLOR |
+ _NEW_DEPTH),
+
+ .brw = (BRW_NEW_FRAGMENT_PROGRAM |
+ BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_DEPTH_BUFFER |
+ BRW_NEW_NR_WM_SURFACES),
+
+ .cache = (CACHE_NEW_WM_PROG |
+ CACHE_NEW_SAMPLER)
+ },
+ .prepare = upload_wm_unit,
+};
+
diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c
new file mode 100644
index 00000000000..f7cc5153a8e
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_surface_state.c
@@ -0,0 +1,752 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ */
+
+
+#include "main/mtypes.h"
+#include "main/texformat.h"
+#include "main/texstore.h"
+#include "shader/prog_parameter.h"
+
+#include "intel_mipmap_tree.h"
+#include "intel_batchbuffer.h"
+#include "intel_tex.h"
+#include "intel_fbo.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+
+static GLuint translate_tex_target( GLenum target )
+{
+ switch (target) {
+ case GL_TEXTURE_1D:
+ return BRW_SURFACE_1D;
+
+ case GL_TEXTURE_RECTANGLE_NV:
+ return BRW_SURFACE_2D;
+
+ case GL_TEXTURE_2D:
+ return BRW_SURFACE_2D;
+
+ case GL_TEXTURE_3D:
+ return BRW_SURFACE_3D;
+
+ case GL_TEXTURE_CUBE_MAP:
+ return BRW_SURFACE_CUBE;
+
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+
+static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format,
+ GLenum depth_mode )
+{
+ switch( mesa_format ) {
+ case MESA_FORMAT_L8:
+ return BRW_SURFACEFORMAT_L8_UNORM;
+
+ case MESA_FORMAT_I8:
+ return BRW_SURFACEFORMAT_I8_UNORM;
+
+ case MESA_FORMAT_A8:
+ return BRW_SURFACEFORMAT_A8_UNORM;
+
+ case MESA_FORMAT_AL88:
+ return BRW_SURFACEFORMAT_L8A8_UNORM;
+
+ case MESA_FORMAT_RGB888:
+ assert(0); /* not supported for sampling */
+ return BRW_SURFACEFORMAT_R8G8B8_UNORM;
+
+ case MESA_FORMAT_ARGB8888:
+ if (internal_format == GL_RGB)
+ return BRW_SURFACEFORMAT_B8G8R8X8_UNORM;
+ else
+ return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+
+ case MESA_FORMAT_RGBA8888_REV:
+ if (internal_format == GL_RGB)
+ return BRW_SURFACEFORMAT_R8G8B8X8_UNORM;
+ else
+ return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
+
+ case MESA_FORMAT_RGB565:
+ return BRW_SURFACEFORMAT_B5G6R5_UNORM;
+
+ case MESA_FORMAT_ARGB1555:
+ return BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
+
+ case MESA_FORMAT_ARGB4444:
+ return BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
+
+ case MESA_FORMAT_YCBCR_REV:
+ return BRW_SURFACEFORMAT_YCRCB_NORMAL;
+
+ case MESA_FORMAT_YCBCR:
+ return BRW_SURFACEFORMAT_YCRCB_SWAPUVY;
+
+ case MESA_FORMAT_RGB_FXT1:
+ case MESA_FORMAT_RGBA_FXT1:
+ return BRW_SURFACEFORMAT_FXT1;
+
+ case MESA_FORMAT_Z16:
+ if (depth_mode == GL_INTENSITY)
+ return BRW_SURFACEFORMAT_I16_UNORM;
+ else if (depth_mode == GL_ALPHA)
+ return BRW_SURFACEFORMAT_A16_UNORM;
+ else
+ return BRW_SURFACEFORMAT_L16_UNORM;
+
+ case MESA_FORMAT_RGB_DXT1:
+ return BRW_SURFACEFORMAT_DXT1_RGB;
+
+ case MESA_FORMAT_RGBA_DXT1:
+ return BRW_SURFACEFORMAT_BC1_UNORM;
+
+ case MESA_FORMAT_RGBA_DXT3:
+ return BRW_SURFACEFORMAT_BC2_UNORM;
+
+ case MESA_FORMAT_RGBA_DXT5:
+ return BRW_SURFACEFORMAT_BC3_UNORM;
+
+ case MESA_FORMAT_SARGB8:
+ return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB;
+
+ case MESA_FORMAT_SLA8:
+ return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB;
+
+ case MESA_FORMAT_SL8:
+ return BRW_SURFACEFORMAT_L8_UNORM_SRGB;
+
+ case MESA_FORMAT_SRGB_DXT1:
+ return BRW_SURFACEFORMAT_BC1_UNORM_SRGB;
+
+ case MESA_FORMAT_S8_Z24:
+ /* XXX: these different surface formats don't seem to
+ * make any difference for shadow sampler/compares.
+ */
+ if (depth_mode == GL_INTENSITY)
+ return BRW_SURFACEFORMAT_I24X8_UNORM;
+ else if (depth_mode == GL_ALPHA)
+ return BRW_SURFACEFORMAT_A24X8_UNORM;
+ else
+ return BRW_SURFACEFORMAT_L24X8_UNORM;
+
+ case MESA_FORMAT_DUDV8:
+ return BRW_SURFACEFORMAT_R8G8_SNORM;
+
+ case MESA_FORMAT_SIGNED_RGBA8888_REV:
+ return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
+
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static void
+brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
+{
+ switch (tiling) {
+ case I915_TILING_NONE:
+ surf->ss3.tiled_surface = 0;
+ surf->ss3.tile_walk = 0;
+ break;
+ case I915_TILING_X:
+ surf->ss3.tiled_surface = 1;
+ surf->ss3.tile_walk = BRW_TILEWALK_XMAJOR;
+ break;
+ case I915_TILING_Y:
+ surf->ss3.tiled_surface = 1;
+ surf->ss3.tile_walk = BRW_TILEWALK_YMAJOR;
+ break;
+ }
+}
+
+static dri_bo *
+brw_create_texture_surface( struct brw_context *brw,
+ struct brw_surface_key *key )
+{
+ struct brw_surface_state surf;
+ dri_bo *bo;
+
+ memset(&surf, 0, sizeof(surf));
+
+ surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+ surf.ss0.surface_type = translate_tex_target(key->target);
+ if (key->bo) {
+ surf.ss0.surface_format = translate_tex_format(key->format,
+ key->internal_format,
+ key->depthmode);
+ }
+ else {
+ switch (key->depth) {
+ case 32:
+ surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ break;
+ default:
+ case 24:
+ surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8X8_UNORM;
+ break;
+ case 16:
+ surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
+ break;
+ }
+ }
+
+ /* This is ok for all textures with channel width 8bit or less:
+ */
+/* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
+ if (key->bo)
+ surf.ss1.base_addr = key->bo->offset; /* reloc */
+ else
+ surf.ss1.base_addr = key->offset;
+
+ surf.ss2.mip_count = key->last_level - key->first_level;
+ surf.ss2.width = key->width - 1;
+ surf.ss2.height = key->height - 1;
+ brw_set_surface_tiling(&surf, key->tiling);
+ surf.ss3.pitch = (key->pitch * key->cpp) - 1;
+ surf.ss3.depth = key->depth - 1;
+
+ surf.ss4.min_lod = 0;
+
+ if (key->target == GL_TEXTURE_CUBE_MAP) {
+ surf.ss0.cube_pos_x = 1;
+ surf.ss0.cube_pos_y = 1;
+ surf.ss0.cube_pos_z = 1;
+ surf.ss0.cube_neg_x = 1;
+ surf.ss0.cube_neg_y = 1;
+ surf.ss0.cube_neg_z = 1;
+ }
+
+ bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
+ key, sizeof(*key),
+ &key->bo, key->bo ? 1 : 0,
+ &surf, sizeof(surf),
+ NULL, NULL);
+
+ if (key->bo) {
+ /* Emit relocation to surface contents */
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_SAMPLER, 0,
+ 0,
+ offsetof(struct brw_surface_state, ss1),
+ key->bo);
+ }
+ return bo;
+}
+
+static void
+brw_update_texture_surface( GLcontext *ctx, GLuint unit )
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
+ struct intel_texture_object *intelObj = intel_texture_object(tObj);
+ struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel];
+ struct brw_surface_key key;
+ const GLuint surf = SURF_INDEX_TEXTURE(unit);
+
+ memset(&key, 0, sizeof(key));
+
+ if (intelObj->imageOverride) {
+ key.pitch = intelObj->pitchOverride / intelObj->mt->cpp;
+ key.depth = intelObj->depthOverride;
+ key.bo = NULL;
+ key.offset = intelObj->textureOffset;
+ } else {
+ key.format = firstImage->TexFormat->MesaFormat;
+ key.internal_format = firstImage->InternalFormat;
+ key.pitch = intelObj->mt->pitch;
+ key.depth = firstImage->Depth;
+ key.bo = intelObj->mt->region->buffer;
+ key.offset = 0;
+ }
+
+ key.target = tObj->Target;
+ key.depthmode = tObj->DepthMode;
+ key.first_level = intelObj->firstLevel;
+ key.last_level = intelObj->lastLevel;
+ key.width = firstImage->Width;
+ key.height = firstImage->Height;
+ key.cpp = intelObj->mt->cpp;
+ key.tiling = intelObj->mt->region->tiling;
+
+ dri_bo_unreference(brw->wm.surf_bo[surf]);
+ brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
+ &key, sizeof(key),
+ &key.bo, key.bo ? 1 : 0,
+ NULL);
+ if (brw->wm.surf_bo[surf] == NULL) {
+ brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key);
+ }
+}
+
+
+
+/**
+ * Create the constant buffer surface. Vertex/fragment shader constants will be
+ * read from this buffer with Data Port Read instructions/messages.
+ */
+dri_bo *
+brw_create_constant_surface( struct brw_context *brw,
+ struct brw_surface_key *key )
+{
+ const GLint w = key->width - 1;
+ struct brw_surface_state surf;
+ dri_bo *bo;
+
+ memset(&surf, 0, sizeof(surf));
+
+ surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+ surf.ss0.surface_type = BRW_SURFACE_BUFFER;
+ surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+
+ assert(key->bo);
+ if (key->bo)
+ surf.ss1.base_addr = key->bo->offset; /* reloc */
+ else
+ surf.ss1.base_addr = key->offset;
+
+ surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */
+ surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */
+ surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */
+ surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */
+ brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */
+
+ bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
+ key, sizeof(*key),
+ &key->bo, key->bo ? 1 : 0,
+ &surf, sizeof(surf),
+ NULL, NULL);
+
+ if (key->bo) {
+ /* Emit relocation to surface contents */
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_SAMPLER, 0,
+ 0,
+ offsetof(struct brw_surface_state, ss1),
+ key->bo);
+ }
+
+ return bo;
+}
+
+/* Creates a new WM constant buffer reflecting the current fragment program's
+ * constants, if needed by the fragment program.
+ *
+ * Otherwise, constants go through the CURBEs using the brw_constant_buffer
+ * state atom.
+ */
+static drm_intel_bo *
+brw_wm_update_constant_buffer(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ struct brw_fragment_program *fp =
+ (struct brw_fragment_program *) brw->fragment_program;
+ const struct gl_program_parameter_list *params = fp->program.Base.Parameters;
+ const int size = params->NumParameters * 4 * sizeof(GLfloat);
+ drm_intel_bo *const_buffer;
+
+ /* BRW_NEW_FRAGMENT_PROGRAM */
+ if (!fp->use_const_buffer)
+ return NULL;
+
+ const_buffer = drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer",
+ size, 64);
+
+ /* _NEW_PROGRAM_CONSTANTS */
+ dri_bo_subdata(const_buffer, 0, size, params->ParameterValues);
+
+ return const_buffer;
+}
+
+/**
+ * Update the surface state for a WM constant buffer.
+ * The constant buffer will be (re)allocated here if needed.
+ */
+static void
+brw_update_wm_constant_surface( GLcontext *ctx,
+ GLuint surf)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_surface_key key;
+ struct brw_fragment_program *fp =
+ (struct brw_fragment_program *) brw->fragment_program;
+ const struct gl_program_parameter_list *params =
+ fp->program.Base.Parameters;
+
+ /* If we're in this state update atom, we need to update WM constants, so
+ * free the old buffer and create a new one for the new contents.
+ */
+ dri_bo_unreference(fp->const_buffer);
+ fp->const_buffer = brw_wm_update_constant_buffer(brw);
+
+ /* If there's no constant buffer, then no surface BO is needed to point at
+ * it.
+ */
+ if (fp->const_buffer == 0) {
+ drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
+ brw->wm.surf_bo[surf] = NULL;
+ return;
+ }
+
+ memset(&key, 0, sizeof(key));
+
+ key.format = MESA_FORMAT_RGBA_FLOAT32;
+ key.internal_format = GL_RGBA;
+ key.bo = fp->const_buffer;
+ key.depthmode = GL_NONE;
+ key.pitch = params->NumParameters;
+ key.width = params->NumParameters;
+ key.height = 1;
+ key.depth = 1;
+ key.cpp = 16;
+
+ /*
+ printf("%s:\n", __FUNCTION__);
+ printf(" width %d height %d depth %d cpp %d pitch %d\n",
+ key.width, key.height, key.depth, key.cpp, key.pitch);
+ */
+
+ dri_bo_unreference(brw->wm.surf_bo[surf]);
+ brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
+ &key, sizeof(key),
+ &key.bo, key.bo ? 1 : 0,
+ NULL);
+ if (brw->wm.surf_bo[surf] == NULL) {
+ brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key);
+ }
+ brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
+}
+
+/**
+ * Updates surface / buffer for fragment shader constant buffer, if
+ * one is required.
+ *
+ * This consumes the state updates for the constant buffer, and produces
+ * BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for
+ * inclusion in the binding table.
+ */
+static void prepare_wm_constant_surface(struct brw_context *brw )
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ struct brw_fragment_program *fp =
+ (struct brw_fragment_program *) brw->fragment_program;
+ GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER;
+
+ drm_intel_bo_unreference(fp->const_buffer);
+ fp->const_buffer = brw_wm_update_constant_buffer(brw);
+
+ /* If there's no constant buffer, then no surface BO is needed to point at
+ * it.
+ */
+ if (fp->const_buffer == 0) {
+ if (brw->wm.surf_bo[surf] != NULL) {
+ drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
+ brw->wm.surf_bo[surf] = NULL;
+ brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
+ }
+ return;
+ }
+
+ brw_update_wm_constant_surface(ctx, surf);
+}
+
+const struct brw_tracked_state brw_wm_constant_surface = {
+ .dirty = {
+ .mesa = (_NEW_PROGRAM_CONSTANTS),
+ .brw = (BRW_NEW_FRAGMENT_PROGRAM),
+ .cache = 0
+ },
+ .prepare = prepare_wm_constant_surface,
+};
+
+
+/**
+ * Sets up a surface state structure to point at the given region.
+ * While it is only used for the front/back buffer currently, it should be
+ * usable for further buffers when doing ARB_draw_buffer support.
+ */
+static void
+brw_update_renderbuffer_surface(struct brw_context *brw,
+ struct gl_renderbuffer *rb,
+ unsigned int unit)
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ dri_bo *region_bo = NULL;
+ struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+ struct intel_region *region = irb ? irb->region : NULL;
+ struct {
+ unsigned int surface_type;
+ unsigned int surface_format;
+ unsigned int width, height, pitch, cpp;
+ GLubyte color_mask[4];
+ GLboolean color_blend;
+ uint32_t tiling;
+ uint32_t draw_offset;
+ } key;
+
+ memset(&key, 0, sizeof(key));
+
+ if (region != NULL) {
+ region_bo = region->buffer;
+
+ key.surface_type = BRW_SURFACE_2D;
+ switch (irb->texformat->MesaFormat) {
+ case MESA_FORMAT_ARGB8888:
+ key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ break;
+ case MESA_FORMAT_RGB565:
+ key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
+ break;
+ case MESA_FORMAT_ARGB1555:
+ key.surface_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
+ break;
+ case MESA_FORMAT_ARGB4444:
+ key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
+ break;
+ default:
+ _mesa_problem(ctx, "Bad renderbuffer format: %d\n",
+ irb->texformat->MesaFormat);
+ }
+ key.tiling = region->tiling;
+ if (brw->intel.intelScreen->driScrnPriv->dri2.enabled) {
+ key.width = rb->Width;
+ key.height = rb->Height;
+ } else {
+ key.width = region->width;
+ key.height = region->height;
+ }
+ key.pitch = region->pitch;
+ key.cpp = region->cpp;
+ key.draw_offset = region->draw_offset; /* cur 3d or cube face offset */
+ } else {
+ key.surface_type = BRW_SURFACE_NULL;
+ key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ key.tiling = I915_TILING_X;
+ key.width = 1;
+ key.height = 1;
+ key.cpp = 4;
+ key.draw_offset = 0;
+ }
+ memcpy(key.color_mask, ctx->Color.ColorMask,
+ sizeof(key.color_mask));
+ key.color_blend = (!ctx->Color._LogicOpEnabled &&
+ ctx->Color.BlendEnabled);
+
+ dri_bo_unreference(brw->wm.surf_bo[unit]);
+ brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
+ &key, sizeof(key),
+ &region_bo, 1,
+ NULL);
+
+ if (brw->wm.surf_bo[unit] == NULL) {
+ struct brw_surface_state surf;
+
+ memset(&surf, 0, sizeof(surf));
+
+ surf.ss0.surface_format = key.surface_format;
+ surf.ss0.surface_type = key.surface_type;
+ if (key.tiling == I915_TILING_NONE) {
+ surf.ss1.base_addr = key.draw_offset;
+ } else {
+ uint32_t tile_offset = key.draw_offset % 4096;
+
+ surf.ss1.base_addr = key.draw_offset - tile_offset;
+
+ assert(BRW_IS_G4X(brw) || tile_offset == 0);
+ if (BRW_IS_G4X(brw)) {
+ if (key.tiling == I915_TILING_X) {
+ /* Note that the low bits of these fields are missing, so
+ * there's the possibility of getting in trouble.
+ */
+ surf.ss5.x_offset = (tile_offset % 512) / key.cpp / 4;
+ surf.ss5.y_offset = tile_offset / 512 / 2;
+ } else {
+ surf.ss5.x_offset = (tile_offset % 128) / key.cpp / 4;
+ surf.ss5.y_offset = tile_offset / 128 / 2;
+ }
+ }
+ }
+ if (region_bo != NULL)
+ surf.ss1.base_addr += region_bo->offset; /* reloc */
+
+ surf.ss2.width = key.width - 1;
+ surf.ss2.height = key.height - 1;
+ brw_set_surface_tiling(&surf, key.tiling);
+ surf.ss3.pitch = (key.pitch * key.cpp) - 1;
+
+ /* _NEW_COLOR */
+ surf.ss0.color_blend = key.color_blend;
+ surf.ss0.writedisable_red = !key.color_mask[0];
+ surf.ss0.writedisable_green = !key.color_mask[1];
+ surf.ss0.writedisable_blue = !key.color_mask[2];
+ surf.ss0.writedisable_alpha = !key.color_mask[3];
+
+ /* Key size will never match key size for textures, so we're safe. */
+ brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache,
+ BRW_SS_SURFACE,
+ &key, sizeof(key),
+ &region_bo, 1,
+ &surf, sizeof(surf),
+ NULL, NULL);
+ if (region_bo != NULL) {
+ /* We might sample from it, and we might render to it, so flag
+ * them both. We might be able to figure out from other state
+ * a more restrictive relocation to emit.
+ */
+ drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit],
+ offsetof(struct brw_surface_state, ss1),
+ region_bo,
+ surf.ss1.base_addr - region_bo->offset,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER);
+ }
+ }
+}
+
+
+/**
+ * Constructs the binding table for the WM surface state, which maps unit
+ * numbers to surface state objects.
+ */
+static dri_bo *
+brw_wm_get_binding_table(struct brw_context *brw)
+{
+ dri_bo *bind_bo;
+
+ assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF);
+
+ bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
+ NULL, 0,
+ brw->wm.surf_bo, brw->wm.nr_surfaces,
+ NULL);
+
+ if (bind_bo == NULL) {
+ GLuint data_size = brw->wm.nr_surfaces * sizeof(GLuint);
+ uint32_t data[BRW_WM_MAX_SURF];
+ int i;
+
+ for (i = 0; i < brw->wm.nr_surfaces; i++)
+ if (brw->wm.surf_bo[i])
+ data[i] = brw->wm.surf_bo[i]->offset;
+ else
+ data[i] = 0;
+
+ bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
+ NULL, 0,
+ brw->wm.surf_bo, brw->wm.nr_surfaces,
+ data, data_size,
+ NULL, NULL);
+
+ /* Emit binding table relocations to surface state */
+ for (i = 0; i < BRW_WM_MAX_SURF; i++) {
+ if (brw->wm.surf_bo[i] != NULL) {
+ dri_bo_emit_reloc(bind_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0,
+ i * sizeof(GLuint),
+ brw->wm.surf_bo[i]);
+ }
+ }
+ }
+
+ return bind_bo;
+}
+
+static void prepare_wm_surfaces(struct brw_context *brw )
+{
+ GLcontext *ctx = &brw->intel.ctx;
+ GLuint i;
+ int old_nr_surfaces;
+
+ /* _NEW_BUFFERS */
+ /* Update surfaces for drawing buffers */
+ if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
+ for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+ brw_update_renderbuffer_surface(brw,
+ ctx->DrawBuffer->_ColorDrawBuffers[i],
+ i);
+ }
+ } else {
+ brw_update_renderbuffer_surface(brw, NULL, 0);
+ }
+
+ old_nr_surfaces = brw->wm.nr_surfaces;
+ brw->wm.nr_surfaces = MAX_DRAW_BUFFERS;
+
+ if (brw->wm.surf_bo[SURF_INDEX_FRAG_CONST_BUFFER] != NULL)
+ brw->wm.nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1;
+
+ /* Update surfaces for textures */
+ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+ const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
+ const GLuint surf = SURF_INDEX_TEXTURE(i);
+
+ /* _NEW_TEXTURE, BRW_NEW_TEXDATA */
+ if (texUnit->_ReallyEnabled) {
+ brw_update_texture_surface(ctx, i);
+ brw->wm.nr_surfaces = surf + 1;
+ } else {
+ dri_bo_unreference(brw->wm.surf_bo[surf]);
+ brw->wm.surf_bo[surf] = NULL;
+ }
+ }
+
+ dri_bo_unreference(brw->wm.bind_bo);
+ brw->wm.bind_bo = brw_wm_get_binding_table(brw);
+
+ if (brw->wm.nr_surfaces != old_nr_surfaces)
+ brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES;
+}
+
+const struct brw_tracked_state brw_wm_surfaces = {
+ .dirty = {
+ .mesa = (_NEW_COLOR |
+ _NEW_TEXTURE |
+ _NEW_BUFFERS),
+ .brw = (BRW_NEW_CONTEXT |
+ BRW_NEW_WM_SURFACES),
+ .cache = 0
+ },
+ .prepare = prepare_wm_surfaces,
+};
+
+
+
diff --git a/src/gallium/drivers/i965/intel_batchbuffer.h b/src/gallium/drivers/i965/intel_batchbuffer.h
new file mode 100644
index 00000000000..d4899aab7fa
--- /dev/null
+++ b/src/gallium/drivers/i965/intel_batchbuffer.h
@@ -0,0 +1,184 @@
+#ifndef INTEL_BATCHBUFFER_H
+#define INTEL_BATCHBUFFER_H
+
+#include "main/mtypes.h"
+
+#include "intel_context.h"
+#include "intel_bufmgr.h"
+#include "intel_reg.h"
+
+#define BATCH_SZ 16384
+#define BATCH_RESERVED 16
+
+enum cliprect_mode {
+ /**
+ * Batchbuffer contents may be looped over per cliprect, but do not
+ * require it.
+ */
+ IGNORE_CLIPRECTS,
+ /**
+ * Batchbuffer contents require looping over per cliprect at batch submit
+ * time.
+ *
+ * This will be upgraded to NO_LOOP_CLIPRECTS when there's a single
+ * constant cliprect, as in DRI2 or FBO rendering.
+ */
+ LOOP_CLIPRECTS,
+ /**
+ * Batchbuffer contents contain drawing that should not be executed multiple
+ * times.
+ */
+ NO_LOOP_CLIPRECTS,
+ /**
+ * Batchbuffer contents contain drawing that already handles cliprects, such
+ * as 2D drawing to front/back/depth that doesn't respect DRAWING_RECTANGLE.
+ *
+ * Equivalent behavior to NO_LOOP_CLIPRECTS, but may not persist in batch
+ * outside of LOCK/UNLOCK. This is upgraded to just NO_LOOP_CLIPRECTS when
+ * there's a constant cliprect, as in DRI2 or FBO rendering.
+ */
+ REFERENCES_CLIPRECTS
+};
+
+struct intel_batchbuffer
+{
+ struct intel_context *intel;
+
+ dri_bo *buf;
+
+ GLubyte *buffer;
+
+ GLubyte *map;
+ GLubyte *ptr;
+
+ enum cliprect_mode cliprect_mode;
+
+ GLuint size;
+
+ /** Tracking of BEGIN_BATCH()/OUT_BATCH()/ADVANCE_BATCH() debugging */
+ struct {
+ GLuint total;
+ GLubyte *start_ptr;
+ } emit;
+
+ GLuint dirty_state;
+};
+
+struct intel_batchbuffer *intel_batchbuffer_alloc(struct intel_context
+ *intel);
+
+void intel_batchbuffer_free(struct intel_batchbuffer *batch);
+
+
+void _intel_batchbuffer_flush(struct intel_batchbuffer *batch,
+ const char *file, int line);
+
+#define intel_batchbuffer_flush(batch) \
+ _intel_batchbuffer_flush(batch, __FILE__, __LINE__)
+
+void intel_batchbuffer_reset(struct intel_batchbuffer *batch);
+
+
+/* Unlike bmBufferData, this currently requires the buffer be mapped.
+ * Consider it a convenience function wrapping multple
+ * intel_buffer_dword() calls.
+ */
+void intel_batchbuffer_data(struct intel_batchbuffer *batch,
+ const void *data, GLuint bytes,
+ enum cliprect_mode cliprect_mode);
+
+void intel_batchbuffer_release_space(struct intel_batchbuffer *batch,
+ GLuint bytes);
+
+GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
+ dri_bo *buffer,
+ uint32_t read_domains,
+ uint32_t write_domain,
+ uint32_t offset);
+
+/* Inline functions - might actually be better off with these
+ * non-inlined. Certainly better off switching all command packets to
+ * be passed as structs rather than dwords, but that's a little bit of
+ * work...
+ */
+static INLINE GLint
+intel_batchbuffer_space(struct intel_batchbuffer *batch)
+{
+ return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map);
+}
+
+
+static INLINE void
+intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, GLuint dword)
+{
+ assert(batch->map);
+ assert(intel_batchbuffer_space(batch) >= 4);
+ *(GLuint *) (batch->ptr) = dword;
+ batch->ptr += 4;
+}
+
+static INLINE void
+intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
+ GLuint sz,
+ enum cliprect_mode cliprect_mode)
+{
+ assert(sz < batch->size - 8);
+ if (intel_batchbuffer_space(batch) < sz)
+ intel_batchbuffer_flush(batch);
+
+ if ((cliprect_mode == LOOP_CLIPRECTS ||
+ cliprect_mode == REFERENCES_CLIPRECTS) &&
+ batch->intel->constant_cliprect)
+ cliprect_mode = NO_LOOP_CLIPRECTS;
+
+ if (cliprect_mode != IGNORE_CLIPRECTS) {
+ if (batch->cliprect_mode == IGNORE_CLIPRECTS) {
+ batch->cliprect_mode = cliprect_mode;
+ } else {
+ if (batch->cliprect_mode != cliprect_mode) {
+ intel_batchbuffer_flush(batch);
+ batch->cliprect_mode = cliprect_mode;
+ }
+ }
+ }
+}
+
+/* Here are the crusty old macros, to be removed:
+ */
+#define BATCH_LOCALS
+
+#define BEGIN_BATCH(n, cliprect_mode) do { \
+ intel_batchbuffer_require_space(intel->batch, (n)*4, cliprect_mode); \
+ assert(intel->batch->emit.start_ptr == NULL); \
+ intel->batch->emit.total = (n) * 4; \
+ intel->batch->emit.start_ptr = intel->batch->ptr; \
+} while (0)
+
+#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d)
+
+#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \
+ assert((unsigned) (delta) < buf->size); \
+ intel_batchbuffer_emit_reloc(intel->batch, buf, \
+ read_domains, write_domain, delta); \
+} while (0)
+
+#define ADVANCE_BATCH() do { \
+ unsigned int _n = intel->batch->ptr - intel->batch->emit.start_ptr; \
+ assert(intel->batch->emit.start_ptr != NULL); \
+ if (_n != intel->batch->emit.total) { \
+ fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n", \
+ _n, intel->batch->emit.total); \
+ abort(); \
+ } \
+ intel->batch->emit.start_ptr = NULL; \
+} while(0)
+
+
+static INLINE void
+intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch)
+{
+ intel_batchbuffer_require_space(batch, 4, IGNORE_CLIPRECTS);
+ intel_batchbuffer_emit_dword(batch, MI_FLUSH);
+}
+
+#endif
diff --git a/src/gallium/drivers/i965/intel_chipset.h b/src/gallium/drivers/i965/intel_chipset.h
new file mode 100644
index 00000000000..3dc8653a735
--- /dev/null
+++ b/src/gallium/drivers/i965/intel_chipset.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <[email protected]>
+ *
+ */
+
+#define PCI_CHIP_I810 0x7121
+#define PCI_CHIP_I810_DC100 0x7123
+#define PCI_CHIP_I810_E 0x7125
+#define PCI_CHIP_I815 0x1132
+
+#define PCI_CHIP_I830_M 0x3577
+#define PCI_CHIP_845_G 0x2562
+#define PCI_CHIP_I855_GM 0x3582
+#define PCI_CHIP_I865_G 0x2572
+
+#define PCI_CHIP_I915_G 0x2582
+#define PCI_CHIP_E7221_G 0x258A
+#define PCI_CHIP_I915_GM 0x2592
+#define PCI_CHIP_I945_G 0x2772
+#define PCI_CHIP_I945_GM 0x27A2
+#define PCI_CHIP_I945_GME 0x27AE
+
+#define PCI_CHIP_Q35_G 0x29B2
+#define PCI_CHIP_G33_G 0x29C2
+#define PCI_CHIP_Q33_G 0x29D2
+
+#define PCI_CHIP_IGD_GM 0xA011
+#define PCI_CHIP_IGD_G 0xA001
+
+#define IS_IGDGM(devid) (devid == PCI_CHIP_IGD_GM)
+#define IS_IGDG(devid) (devid == PCI_CHIP_IGD_G)
+#define IS_IGD(devid) (IS_IGDG(devid) || IS_IGDGM(devid))
+
+#define PCI_CHIP_I965_G 0x29A2
+#define PCI_CHIP_I965_Q 0x2992
+#define PCI_CHIP_I965_G_1 0x2982
+#define PCI_CHIP_I946_GZ 0x2972
+#define PCI_CHIP_I965_GM 0x2A02
+#define PCI_CHIP_I965_GME 0x2A12
+
+#define PCI_CHIP_GM45_GM 0x2A42
+
+#define PCI_CHIP_IGD_E_G 0x2E02
+#define PCI_CHIP_Q45_G 0x2E12
+#define PCI_CHIP_G45_G 0x2E22
+#define PCI_CHIP_G41_G 0x2E32
+#define PCI_CHIP_B43_G 0x2E42
+
+#define PCI_CHIP_ILD_G 0x0042
+#define PCI_CHIP_ILM_G 0x0046
+
+#define IS_MOBILE(devid) (devid == PCI_CHIP_I855_GM || \
+ devid == PCI_CHIP_I915_GM || \
+ devid == PCI_CHIP_I945_GM || \
+ devid == PCI_CHIP_I945_GME || \
+ devid == PCI_CHIP_I965_GM || \
+ devid == PCI_CHIP_I965_GME || \
+ devid == PCI_CHIP_GM45_GM || \
+ IS_IGD(devid) || \
+ devid == PCI_CHIP_ILM_G)
+
+#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \
+ devid == PCI_CHIP_Q45_G || \
+ devid == PCI_CHIP_G45_G || \
+ devid == PCI_CHIP_G41_G || \
+ devid == PCI_CHIP_B43_G)
+#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM)
+#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid))
+
+#define IS_ILD(devid) (devid == PCI_CHIP_ILD_G)
+#define IS_ILM(devid) (devid == PCI_CHIP_ILM_G)
+#define IS_IGDNG(devid) (IS_ILD(devid) || IS_ILM(devid))
+
+#define IS_915(devid) (devid == PCI_CHIP_I915_G || \
+ devid == PCI_CHIP_E7221_G || \
+ devid == PCI_CHIP_I915_GM)
+
+#define IS_945(devid) (devid == PCI_CHIP_I945_G || \
+ devid == PCI_CHIP_I945_GM || \
+ devid == PCI_CHIP_I945_GME || \
+ devid == PCI_CHIP_G33_G || \
+ devid == PCI_CHIP_Q33_G || \
+ devid == PCI_CHIP_Q35_G || IS_IGD(devid))
+
+#define IS_965(devid) (devid == PCI_CHIP_I965_G || \
+ devid == PCI_CHIP_I965_Q || \
+ devid == PCI_CHIP_I965_G_1 || \
+ devid == PCI_CHIP_I965_GM || \
+ devid == PCI_CHIP_I965_GME || \
+ devid == PCI_CHIP_I946_GZ || \
+ IS_G4X(devid) || \
+ IS_IGDNG(devid))
+
+#define IS_9XX(devid) (IS_915(devid) || \
+ IS_945(devid) || \
+ IS_965(devid))
diff --git a/src/gallium/drivers/i965/intel_structs.h b/src/gallium/drivers/i965/intel_structs.h
new file mode 100644
index 00000000000..522e3bd92c2
--- /dev/null
+++ b/src/gallium/drivers/i965/intel_structs.h
@@ -0,0 +1,132 @@
+#ifndef INTEL_STRUCTS_H
+#define INTEL_STRUCTS_H
+
+struct br0 {
+ GLuint length:8;
+ GLuint pad0:3;
+ GLuint dst_tiled:1;
+ GLuint pad1:8;
+ GLuint write_rgb:1;
+ GLuint write_alpha:1;
+ GLuint opcode:7;
+ GLuint client:3;
+};
+
+
+struct br13 {
+ GLint dest_pitch:16;
+ GLuint rop:8;
+ GLuint color_depth:2;
+ GLuint pad1:3;
+ GLuint mono_source_transparency:1;
+ GLuint clipping_enable:1;
+ GLuint pad0:1;
+};
+
+
+
+/* This is an attempt to move some of the 2D interaction in this
+ * driver to using structs for packets rather than a bunch of #defines
+ * and dwords.
+ */
+struct xy_color_blit {
+ struct br0 br0;
+ struct br13 br13;
+
+ struct {
+ GLuint dest_x1:16;
+ GLuint dest_y1:16;
+ } dw2;
+
+ struct {
+ GLuint dest_x2:16;
+ GLuint dest_y2:16;
+ } dw3;
+
+ GLuint dest_base_addr;
+ GLuint color;
+};
+
+struct xy_src_copy_blit {
+ struct br0 br0;
+ struct br13 br13;
+
+ struct {
+ GLuint dest_x1:16;
+ GLuint dest_y1:16;
+ } dw2;
+
+ struct {
+ GLuint dest_x2:16;
+ GLuint dest_y2:16;
+ } dw3;
+
+ GLuint dest_base_addr;
+
+ struct {
+ GLuint src_x1:16;
+ GLuint src_y1:16;
+ } dw5;
+
+ struct {
+ GLint src_pitch:16;
+ GLuint pad:16;
+ } dw6;
+
+ GLuint src_base_addr;
+};
+
+struct xy_setup_blit {
+ struct br0 br0;
+ struct br13 br13;
+
+ struct {
+ GLuint clip_x1:16;
+ GLuint clip_y1:16;
+ } dw2;
+
+ struct {
+ GLuint clip_x2:16;
+ GLuint clip_y2:16;
+ } dw3;
+
+ GLuint dest_base_addr;
+ GLuint background_color;
+ GLuint foreground_color;
+ GLuint pattern_base_addr;
+};
+
+
+struct xy_text_immediate_blit {
+ struct {
+ GLuint length:8;
+ GLuint pad2:3;
+ GLuint dst_tiled:1;
+ GLuint pad1:4;
+ GLuint byte_packed:1;
+ GLuint pad0:5;
+ GLuint opcode:7;
+ GLuint client:3;
+ } dw0;
+
+ struct {
+ GLuint dest_x1:16;
+ GLuint dest_y1:16;
+ } dw1;
+
+ struct {
+ GLuint dest_x2:16;
+ GLuint dest_y2:16;
+ } dw2;
+
+ /* Src bitmap data follows as inline dwords.
+ */
+};
+
+
+#define CLIENT_2D 0x2
+#define OPCODE_XY_SETUP_BLT 0x1
+#define OPCODE_XY_COLOR_BLT 0x50
+#define OPCODE_XY_TEXT_IMMEDIATE_BLT 0x31
+
+#endif
diff --git a/src/gallium/drivers/i965/intel_tex_format.c b/src/gallium/drivers/i965/intel_tex_format.c
new file mode 100644
index 00000000000..3322a711307
--- /dev/null
+++ b/src/gallium/drivers/i965/intel_tex_format.c
@@ -0,0 +1,225 @@
+#include "intel_context.h"
+#include "intel_tex.h"
+#include "intel_chipset.h"
+#include "main/texformat.h"
+#include "main/enums.h"
+
+
+/**
+ * Choose hardware texture format given the user's glTexImage parameters.
+ *
+ * It works out that this function is fine for all the supported
+ * hardware. However, there is still a need to map the formats onto
+ * hardware descriptors.
+ *
+ * Note that the i915 can actually support many more formats than
+ * these if we take the step of simply swizzling the colors
+ * immediately after sampling...
+ */
+const struct gl_texture_format *
+intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat,
+ GLenum format, GLenum type)
+{
+ struct intel_context *intel = intel_context(ctx);
+ const GLboolean do32bpt = (intel->ctx.Visual.rgbBits >= 24);
+
+#if 0
+ printf("%s intFmt=0x%x format=0x%x type=0x%x\n",
+ __FUNCTION__, internalFormat, format, type);
+#endif
+
+ switch (internalFormat) {
+ case 4:
+ case GL_RGBA:
+ case GL_COMPRESSED_RGBA:
+ if (format == GL_BGRA) {
+ if (type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) {
+ return &_mesa_texformat_argb8888;
+ }
+ else if (type == GL_UNSIGNED_SHORT_4_4_4_4_REV) {
+ return &_mesa_texformat_argb4444;
+ }
+ else if (type == GL_UNSIGNED_SHORT_1_5_5_5_REV) {
+ return &_mesa_texformat_argb1555;
+ }
+ }
+ return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444;
+
+ case 3:
+ case GL_RGB:
+ case GL_COMPRESSED_RGB:
+ if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) {
+ return &_mesa_texformat_rgb565;
+ }
+ return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_rgb565;
+
+ case GL_RGBA8:
+ case GL_RGB10_A2:
+ case GL_RGBA12:
+ case GL_RGBA16:
+ return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444;
+
+ case GL_RGBA4:
+ case GL_RGBA2:
+ return &_mesa_texformat_argb4444;
+
+ case GL_RGB5_A1:
+ return &_mesa_texformat_argb1555;
+
+ case GL_RGB8:
+ case GL_RGB10:
+ case GL_RGB12:
+ case GL_RGB16:
+ return &_mesa_texformat_argb8888;
+
+ case GL_RGB5:
+ case GL_RGB4:
+ case GL_R3_G3_B2:
+ return &_mesa_texformat_rgb565;
+
+ case GL_ALPHA:
+ case GL_ALPHA4:
+ case GL_ALPHA8:
+ case GL_ALPHA12:
+ case GL_ALPHA16:
+ case GL_COMPRESSED_ALPHA:
+ return &_mesa_texformat_a8;
+
+ case 1:
+ case GL_LUMINANCE:
+ case GL_LUMINANCE4:
+ case GL_LUMINANCE8:
+ case GL_LUMINANCE12:
+ case GL_LUMINANCE16:
+ case GL_COMPRESSED_LUMINANCE:
+ return &_mesa_texformat_l8;
+
+ case 2:
+ case GL_LUMINANCE_ALPHA:
+ case GL_LUMINANCE4_ALPHA4:
+ case GL_LUMINANCE6_ALPHA2:
+ case GL_LUMINANCE8_ALPHA8:
+ case GL_LUMINANCE12_ALPHA4:
+ case GL_LUMINANCE12_ALPHA12:
+ case GL_LUMINANCE16_ALPHA16:
+ case GL_COMPRESSED_LUMINANCE_ALPHA:
+ return &_mesa_texformat_al88;
+
+ case GL_INTENSITY:
+ case GL_INTENSITY4:
+ case GL_INTENSITY8:
+ case GL_INTENSITY12:
+ case GL_INTENSITY16:
+ case GL_COMPRESSED_INTENSITY:
+ return &_mesa_texformat_i8;
+
+ case GL_YCBCR_MESA:
+ if (type == GL_UNSIGNED_SHORT_8_8_MESA || type == GL_UNSIGNED_BYTE)
+ return &_mesa_texformat_ycbcr;
+ else
+ return &_mesa_texformat_ycbcr_rev;
+
+ case GL_COMPRESSED_RGB_FXT1_3DFX:
+ return &_mesa_texformat_rgb_fxt1;
+ case GL_COMPRESSED_RGBA_FXT1_3DFX:
+ return &_mesa_texformat_rgba_fxt1;
+
+ case GL_RGB_S3TC:
+ case GL_RGB4_S3TC:
+ case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+ return &_mesa_texformat_rgb_dxt1;
+
+ case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+ return &_mesa_texformat_rgba_dxt1;
+
+ case GL_RGBA_S3TC:
+ case GL_RGBA4_S3TC:
+ case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+ return &_mesa_texformat_rgba_dxt3;
+
+ case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+ return &_mesa_texformat_rgba_dxt5;
+
+ case GL_DEPTH_COMPONENT:
+ case GL_DEPTH_COMPONENT16:
+ case GL_DEPTH_COMPONENT24:
+ case GL_DEPTH_COMPONENT32:
+#if 0
+ return &_mesa_texformat_z16;
+#else
+ /* fall-through.
+ * 16bpp depth texture can't be paired with a stencil buffer so
+ * always used combined depth/stencil format.
+ */
+#endif
+ case GL_DEPTH_STENCIL_EXT:
+ case GL_DEPTH24_STENCIL8_EXT:
+ return &_mesa_texformat_s8_z24;
+
+#ifndef I915
+ case GL_SRGB_EXT:
+ case GL_SRGB8_EXT:
+ case GL_SRGB_ALPHA_EXT:
+ case GL_SRGB8_ALPHA8_EXT:
+ case GL_COMPRESSED_SRGB_EXT:
+ case GL_COMPRESSED_SRGB_ALPHA_EXT:
+ case GL_COMPRESSED_SLUMINANCE_EXT:
+ case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT:
+ return &_mesa_texformat_sargb8;
+ case GL_SLUMINANCE_EXT:
+ case GL_SLUMINANCE8_EXT:
+ if (IS_G4X(intel->intelScreen->deviceID))
+ return &_mesa_texformat_sl8;
+ else
+ return &_mesa_texformat_sargb8;
+ case GL_SLUMINANCE_ALPHA_EXT:
+ case GL_SLUMINANCE8_ALPHA8_EXT:
+ if (IS_G4X(intel->intelScreen->deviceID))
+ return &_mesa_texformat_sla8;
+ else
+ return &_mesa_texformat_sargb8;
+ case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
+ case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
+ case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
+ case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
+ return &_mesa_texformat_srgb_dxt1;
+
+ /* i915 could also do this */
+ case GL_DUDV_ATI:
+ case GL_DU8DV8_ATI:
+ return &_mesa_texformat_dudv8;
+ case GL_RGBA_SNORM:
+ case GL_RGBA8_SNORM:
+ return &_mesa_texformat_signed_rgba8888_rev;
+#endif
+
+ default:
+ fprintf(stderr, "unexpected texture format %s in %s\n",
+ _mesa_lookup_enum_by_nr(internalFormat), __FUNCTION__);
+ return NULL;
+ }
+
+ return NULL; /* never get here */
+}
+
+int intel_compressed_num_bytes(GLuint mesaFormat)
+{
+ int bytes = 0;
+ switch(mesaFormat) {
+
+ case MESA_FORMAT_RGB_FXT1:
+ case MESA_FORMAT_RGBA_FXT1:
+ case MESA_FORMAT_RGB_DXT1:
+ case MESA_FORMAT_RGBA_DXT1:
+ bytes = 2;
+ break;
+
+ case MESA_FORMAT_RGBA_DXT3:
+ case MESA_FORMAT_RGBA_DXT5:
+ bytes = 4;
+ default:
+ break;
+ }
+
+ return bytes;
+}
diff --git a/src/gallium/drivers/i965/intel_tex_layout.c b/src/gallium/drivers/i965/intel_tex_layout.c
new file mode 100644
index 00000000000..7d69ea4484a
--- /dev/null
+++ b/src/gallium/drivers/i965/intel_tex_layout.c
@@ -0,0 +1,140 @@
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <[email protected]>
+ * Michel Dänzer <[email protected]>
+ */
+
+#include "intel_mipmap_tree.h"
+#include "intel_tex_layout.h"
+#include "intel_context.h"
+#include "main/macros.h"
+
+void intel_get_texture_alignment_unit(GLenum internalFormat, GLuint *w, GLuint *h)
+{
+ switch (internalFormat) {
+ case GL_COMPRESSED_RGB_FXT1_3DFX:
+ case GL_COMPRESSED_RGBA_FXT1_3DFX:
+ *w = 8;
+ *h = 4;
+ break;
+
+ case GL_RGB_S3TC:
+ case GL_RGB4_S3TC:
+ case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+ case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+ case GL_RGBA_S3TC:
+ case GL_RGBA4_S3TC:
+ case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+ case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+ *w = 4;
+ *h = 4;
+ break;
+
+ default:
+ *w = 4;
+ *h = 2;
+ break;
+ }
+}
+
+void i945_miptree_layout_2d( struct intel_context *intel,
+ struct intel_mipmap_tree *mt,
+ uint32_t tiling )
+{
+ GLuint align_h = 2, align_w = 4;
+ GLuint level;
+ GLuint x = 0;
+ GLuint y = 0;
+ GLuint width = mt->width0;
+ GLuint height = mt->height0;
+
+ mt->pitch = mt->width0;
+ intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h);
+
+ if (mt->compressed) {
+ mt->pitch = ALIGN(mt->width0, align_w);
+ }
+
+ /* May need to adjust pitch to accomodate the placement of
+ * the 2nd mipmap. This occurs when the alignment
+ * constraints of mipmap placement push the right edge of the
+ * 2nd mipmap out past the width of its parent.
+ */
+ if (mt->first_level != mt->last_level) {
+ GLuint mip1_width;
+
+ if (mt->compressed) {
+ mip1_width = ALIGN(minify(mt->width0), align_w)
+ + ALIGN(minify(minify(mt->width0)), align_w);
+ } else {
+ mip1_width = ALIGN(minify(mt->width0), align_w)
+ + minify(minify(mt->width0));
+ }
+
+ if (mip1_width > mt->pitch) {
+ mt->pitch = mip1_width;
+ }
+ }
+
+ /* Pitch must be a whole number of dwords, even though we
+ * express it in texels.
+ */
+ mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->pitch);
+ mt->total_height = 0;
+
+ for ( level = mt->first_level ; level <= mt->last_level ; level++ ) {
+ GLuint img_height;
+
+ intel_miptree_set_level_info(mt, level, 1, x, y, width,
+ height, 1);
+
+ if (mt->compressed)
+ img_height = MAX2(1, height/4);
+ else
+ img_height = ALIGN(height, align_h);
+
+
+ /* Because the images are packed better, the final offset
+ * might not be the maximal one:
+ */
+ mt->total_height = MAX2(mt->total_height, y + img_height);
+
+ /* Layout_below: step right after second mipmap.
+ */
+ if (level == mt->first_level + 1) {
+ x += ALIGN(width, align_w);
+ }
+ else {
+ y += img_height;
+ }
+
+ width = minify(width);
+ height = minify(height);
+ }
+}