summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2010-07-26 17:47:59 -0700
committerEric Anholt <[email protected]>2010-07-26 17:53:27 -0700
commitafe125e0a18ac3886c45c7e6b02b122fb2d327b5 (patch)
tree78621707e71154c0b388b0baacffc26432b7e992 /src/mesa/drivers/dri
parentd64343f1ae84979bd154475badf11af8a9bfc2eb (diff)
parent5403ca79b225605c79f49866a6497c97da53be3b (diff)
Merge remote branch 'origin/master' into glsl2
This pulls in multiple i965 driver fixes which will help ensure better testing coverage during development, and also gets past the conflicts of the src/mesa/shader -> src/mesa/program move. Conflicts: src/mesa/Makefile src/mesa/main/shaderapi.c src/mesa/main/shaderobj.h
Diffstat (limited to 'src/mesa/drivers/dri')
-rw-r--r--src/mesa/drivers/dri/common/dri_metaops.c4
-rw-r--r--src/mesa/drivers/dri/common/dri_util.c35
-rw-r--r--src/mesa/drivers/dri/common/dri_util.h1
-rw-r--r--src/mesa/drivers/dri/i915/i915_fragprog.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip.c25
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip.h5
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_line.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_point.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_tri.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_unfilled.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_util.c20
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_curbe.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h39
-rw-r--r--src/mesa/drivers/dri/i965/brw_disasm.c70
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h29
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c188
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs_emit.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_optimize.c592
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c37
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf_state.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_structs.h33
-rw-r--r--src/mesa/drivers/dri/i965/brw_util.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c225
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_surface_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c14
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_fp.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_glsl.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass0.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_sf_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_vs_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen6_wm_state.c4
-rw-r--r--src/mesa/drivers/dri/intel/intel_batchbuffer.c2
-rw-r--r--src/mesa/drivers/dri/intel/intel_chipset.h3
-rw-r--r--src/mesa/drivers/dri/intel/intel_decode.c618
-rw-r--r--src/mesa/drivers/dri/intel/intel_decode.h4
-rw-r--r--src/mesa/drivers/dri/intel/intel_pixel_bitmap.c2
-rw-r--r--src/mesa/drivers/dri/mach64/mach64_screen.c1
-rw-r--r--src/mesa/drivers/dri/mga/mga_xmesa.c1
-rw-r--r--src/mesa/drivers/dri/r128/r128_screen.c1
-rw-r--r--src/mesa/drivers/dri/r128/r128_state.c1
-rw-r--r--src/mesa/drivers/dri/r128/r128_tex.c1
-rw-r--r--src/mesa/drivers/dri/r200/r200_fragshader.c4
-rw-r--r--src/mesa/drivers/dri/r200/r200_ioctl.c107
-rw-r--r--src/mesa/drivers/dri/r200/r200_ioctl.h5
-rw-r--r--src/mesa/drivers/dri/r200/r200_vertprog.c10
-rw-r--r--src/mesa/drivers/dri/r300/compiler/Makefile1
-rwxr-xr-xsrc/mesa/drivers/dri/r300/compiler/SConscript1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c42
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c90
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.c114
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.h4
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c100
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.h11
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c79
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c101
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h22
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c10
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h5
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_optimize.c20
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c86
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c29
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c131
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.h9
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.c9
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.h2
-rw-r--r--src/mesa/drivers/dri/r300/r300_draw.c3
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog_common.c4
-rw-r--r--src/mesa/drivers/dri/r300/r300_reg.h4
-rw-r--r--src/mesa/drivers/dri/r300/r300_shader.c2
-rw-r--r--src/mesa/drivers/dri/r300/r300_state.c4
-rw-r--r--src/mesa/drivers/dri/r300/r300_vertprog.c12
-rw-r--r--src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c4
-rw-r--r--src/mesa/drivers/dri/r600/r600_cmdbuf.c1
-rw-r--r--src/mesa/drivers/dri/r600/r600_cmdbuf.h1
-rw-r--r--src/mesa/drivers/dri/r600/r600_context.c1
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.c8
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.h2
-rw-r--r--src/mesa/drivers/dri/r600/r700_chip.h4
-rw-r--r--src/mesa/drivers/dri/r600/r700_fragprog.c11
-rw-r--r--src/mesa/drivers/dri/r600/r700_oglprog.c2
-rw-r--r--src/mesa/drivers/dri/r600/r700_oglprog.h2
-rw-r--r--src/mesa/drivers/dri/r600/r700_state.c4
-rw-r--r--src/mesa/drivers/dri/r600/r700_vertprog.c9
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common_context.c2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c10
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_screen.c16
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_tex_getimage.c9
-rw-r--r--src/mesa/drivers/dri/sis/sis_state.c1
-rw-r--r--src/mesa/drivers/dri/unichrome/via_screen.c1
97 files changed, 2308 insertions, 835 deletions
diff --git a/src/mesa/drivers/dri/common/dri_metaops.c b/src/mesa/drivers/dri/common/dri_metaops.c
index dfb7d640409..86e59a8e51c 100644
--- a/src/mesa/drivers/dri/common/dri_metaops.c
+++ b/src/mesa/drivers/dri/common/dri_metaops.c
@@ -26,6 +26,7 @@
*
**************************************************************************/
+#include "main/arbprogram.h"
#include "main/arrayobj.h"
#include "main/bufferobj.h"
#include "main/enable.h"
@@ -33,8 +34,7 @@
#include "main/texstate.h"
#include "main/varray.h"
#include "main/viewport.h"
-#include "shader/arbprogram.h"
-#include "shader/program.h"
+#include "program/program.h"
#include "dri_metaops.h"
void
diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c
index 18b9035248f..dce84ef0deb 100644
--- a/src/mesa/drivers/dri/common/dri_util.c
+++ b/src/mesa/drivers/dri/common/dri_util.c
@@ -927,41 +927,6 @@ const __DRI2configQueryExtension dri2ConfigQueryExtension = {
dri2ConfigQueryf,
};
-static int
-driFrameTracking(__DRIdrawable *drawable, GLboolean enable)
-{
- return GLX_BAD_CONTEXT;
-}
-
-static int
-driQueryFrameTracking(__DRIdrawable *dpriv,
- int64_t * sbc, int64_t * missedFrames,
- float * lastMissedUsage, float * usage)
-{
- __DRIswapInfo sInfo;
- int status;
- int64_t ust;
- __DRIscreen *psp = dpriv->driScreenPriv;
-
- status = dpriv->driScreenPriv->DriverAPI.GetSwapInfo( dpriv, & sInfo );
- if ( status == 0 ) {
- *sbc = sInfo.swap_count;
- *missedFrames = sInfo.swap_missed_count;
- *lastMissedUsage = sInfo.swap_missed_usage;
-
- (*psp->systemTime->getUST)( & ust );
- *usage = driCalculateSwapUsage( dpriv, sInfo.swap_ust, ust );
- }
-
- return status;
-}
-
-const __DRIframeTrackingExtension driFrameTrackingExtension = {
- { __DRI_FRAME_TRACKING, __DRI_FRAME_TRACKING_VERSION },
- driFrameTracking,
- driQueryFrameTracking
-};
-
/**
* Calculate amount of swap interval used between GLX buffer swaps.
*
diff --git a/src/mesa/drivers/dri/common/dri_util.h b/src/mesa/drivers/dri/common/dri_util.h
index e4c590b1322..bc647ff8130 100644
--- a/src/mesa/drivers/dri/common/dri_util.h
+++ b/src/mesa/drivers/dri/common/dri_util.h
@@ -70,7 +70,6 @@ extern const __DRIdri2Extension driDRI2Extension;
extern const __DRIextension driReadDrawableExtension;
extern const __DRIcopySubBufferExtension driCopySubBufferExtension;
extern const __DRIswapControlExtension driSwapControlExtension;
-extern const __DRIframeTrackingExtension driFrameTrackingExtension;
extern const __DRImediaStreamCounterExtension driMediaStreamCounterExtension;
extern const __DRI2configQueryExtension dri2ConfigQueryExtension;
diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c
index e60157f3777..f1505dc5e73 100644
--- a/src/mesa/drivers/dri/i915/i915_fragprog.c
+++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
@@ -29,11 +29,11 @@
#include "main/macros.h"
#include "main/enums.h"
-#include "shader/prog_instruction.h"
-#include "shader/prog_parameter.h"
-#include "shader/program.h"
-#include "shader/programopt.h"
-#include "shader/prog_print.h"
+#include "program/prog_instruction.h"
+#include "program/prog_parameter.h"
+#include "program/program.h"
+#include "program/programopt.h"
+#include "program/prog_print.h"
#include "tnl/tnl.h"
#include "tnl/t_context.h"
diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c
index 228ee3f3be1..a1e9dae9154 100644
--- a/src/mesa/drivers/dri/i965/brw_clip.c
+++ b/src/mesa/drivers/dri/i965/brw_clip.c
@@ -55,6 +55,7 @@ static void compile_clip_prog( struct brw_context *brw,
GLuint program_size;
GLuint delta;
GLuint i;
+ GLuint header_regs;
memset(&c, 0, sizeof(c));
@@ -72,22 +73,28 @@ static void compile_clip_prog( struct brw_context *brw,
c.header_position_offset = ATTR_SIZE;
if (intel->gen == 5)
- delta = 3 * REG_SIZE;
+ header_regs = 3;
else
- delta = REG_SIZE;
+ header_regs = 1;
- for (i = 0; i < VERT_RESULT_MAX; i++)
+ delta = header_regs * REG_SIZE;
+
+ for (i = 0; i < VERT_RESULT_MAX; i++) {
if (c.key.attrs & BITFIELD64_BIT(i)) {
c.offset[i] = delta;
delta += ATTR_SIZE;
+
+ c.idx_to_attr[c.nr_attrs] = i;
+ c.nr_attrs++;
}
+ }
- c.nr_attrs = brw_count_bits(c.key.attrs);
-
- if (intel->gen == 5)
- c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */
- else
- c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
+ /* The vertex attributes start at a URB row-aligned offset after
+ * the 8-20 dword vertex header, and continue for a URB row-aligned
+ * length. nr_regs determines the urb_read_length from the start
+ * of the header to the end of the vertex data.
+ */
+ c.nr_regs = header_regs + (c.nr_attrs + 1) / 2;
c.nr_bytes = c.nr_regs * REG_SIZE;
diff --git a/src/mesa/drivers/dri/i965/brw_clip.h b/src/mesa/drivers/dri/i965/brw_clip.h
index 68222c6c278..3a8cd7bf390 100644
--- a/src/mesa/drivers/dri/i965/brw_clip.h
+++ b/src/mesa/drivers/dri/i965/brw_clip.h
@@ -115,7 +115,10 @@ struct brw_clip_compile {
GLboolean need_direction;
GLuint header_position_offset;
- GLuint offset[VERT_ATTRIB_MAX];
+ /** Mapping from VERT_RESULT_* to offset within the VUE. */
+ GLuint offset[VERT_RESULT_MAX];
+ /** Mapping from attribute index to VERT_RESULT_* */
+ GLuint idx_to_attr[VERT_RESULT_MAX];
};
#define ATTR_SIZE (4*4)
diff --git a/src/mesa/drivers/dri/i965/brw_clip_line.c b/src/mesa/drivers/dri/i965/brw_clip_line.c
index ceb62a31162..4b9117bb0b1 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_line.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_line.c
@@ -32,7 +32,7 @@
#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
-#include "shader/program.h"
+#include "program/program.h"
#include "intel_batchbuffer.h"
diff --git a/src/mesa/drivers/dri/i965/brw_clip_point.c b/src/mesa/drivers/dri/i965/brw_clip_point.c
index 7f47634dca8..b994a32bc3b 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_point.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_point.c
@@ -32,7 +32,7 @@
#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
-#include "shader/program.h"
+#include "program/program.h"
#include "intel_batchbuffer.h"
diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c
index 916a99ea004..cb58d1da9fe 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_tri.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c
@@ -32,7 +32,7 @@
#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
-#include "shader/program.h"
+#include "program/program.h"
#include "intel_batchbuffer.h"
@@ -76,10 +76,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
if (c->nr_attrs & 1) {
for (j = 0; j < 3; j++) {
- GLuint delta = c->nr_attrs*16 + 32;
-
- if (intel->gen == 5)
- delta = c->nr_attrs * 16 + 32 * 3;
+ GLuint delta = c->offset[c->idx_to_attr[c->nr_attrs - 1]] + ATTR_SIZE;
brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
}
diff --git a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c
index f36d22fdbf8..afd93f8be0b 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c
@@ -32,7 +32,7 @@
#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
-#include "shader/program.h"
+#include "program/program.h"
#include "intel_batchbuffer.h"
diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c
index 2148bc8244a..d2ac1235e46 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_util.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_util.c
@@ -33,7 +33,7 @@
#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
-#include "shader/program.h"
+#include "program/program.h"
#include "intel_batchbuffer.h"
@@ -134,7 +134,6 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
GLboolean force_edgeflag)
{
struct brw_compile *p = &c->func;
- struct intel_context *intel = &p->brw->intel;
struct brw_reg tmp = get_tmp(c);
GLuint i;
@@ -149,12 +148,9 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
/* Iterate over each attribute (could be done in pairs?)
*/
for (i = 0; i < c->nr_attrs; i++) {
- GLuint delta = i*16 + 32;
+ GLuint delta = c->offset[c->idx_to_attr[i]];
- if (intel->gen == 5)
- delta = i * 16 + 32 * 3;
-
- if (delta == c->offset[VERT_RESULT_EDGE]) {
+ if (c->idx_to_attr[i] == VERT_RESULT_EDGE) {
if (force_edgeflag)
brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));
else
@@ -183,10 +179,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
}
if (i & 1) {
- GLuint delta = i*16 + 32;
-
- if (intel->gen == 5)
- delta = i * 16 + 32 * 3;
+ GLuint delta = c->offset[c->idx_to_attr[c->nr_attrs - 1]] + ATTR_SIZE;
brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
}
@@ -199,11 +192,6 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
brw_clip_project_vertex(c, dest_ptr );
}
-
-
-
-#define MAX_MRF 16
-
void brw_clip_emit_vue(struct brw_clip_compile *c,
struct brw_indirect vert,
GLboolean allocate,
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index d13b9ae298b..6d064b822e5 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -34,7 +34,6 @@
#include "main/api_noop.h"
#include "main/macros.h"
#include "main/simple_list.h"
-
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_draw.h"
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index 6c0b79f7241..8196d8ca625 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -35,9 +35,9 @@
#include "main/context.h"
#include "main/macros.h"
#include "main/enums.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
-#include "shader/prog_statevars.h"
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
+#include "program/prog_statevars.h"
#include "intel_batchbuffer.h"
#include "intel_regions.h"
#include "brw_context.h"
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 39bf5b63fc2..f7a68cead7c 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -501,6 +501,10 @@
#define BRW_MASK_ENABLE 0
#define BRW_MASK_DISABLE 1
+/* Sandybridge is WECtrl (Write enable control) */
+#define BRW_WE_NORMAL 0
+#define BRW_WE_KILL_PRED 1
+
#define BRW_OPCODE_MOV 1
#define BRW_OPCODE_SEL 2
#define BRW_OPCODE_NOT 4
@@ -600,6 +604,8 @@
#define BRW_ARF_NOTIFICATION_COUNT 0x90
#define BRW_ARF_IP 0xA0
+#define BRW_MRF_COMPR4 (1 << 7)
+
#define BRW_AMASK 0
#define BRW_IMASK 1
#define BRW_LMASK 2
@@ -646,13 +652,14 @@
#define BRW_POLYGON_FACING_BACK 1
#define BRW_MESSAGE_TARGET_NULL 0
-#define BRW_MESSAGE_TARGET_MATH 1
+#define BRW_MESSAGE_TARGET_MATH 1 /* reserved on GEN6 */
#define BRW_MESSAGE_TARGET_SAMPLER 2
#define BRW_MESSAGE_TARGET_GATEWAY 3
-#define BRW_MESSAGE_TARGET_DATAPORT_READ 4
-#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5
+#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 /* sampler cache on GEN6 */
+#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 /* render cache on Gen6 */
#define BRW_MESSAGE_TARGET_URB 6
#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7
+#define BRW_MESSAGE_TARGET_CONST_CACHE 9 /* GEN6 */
#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0
#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2
@@ -698,10 +705,24 @@
#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
+/* This one stays the same across generations. */
#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
+/* GEN4 */
#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
-#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2
+#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 2
#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
+/* G45, GEN5 */
+#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1
+#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2
+#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ 3
+#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4
+#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6
+/* GEN6 */
+#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1
+#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2
+#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4
+#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ 5
+#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6
#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0
#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1
@@ -721,6 +742,16 @@
#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
+/* GEN6 */
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE_GEN6 7
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE_GEN6 8
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE_GEN6 9
+#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE_GEN6 10
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORLD_SCATTERED_WRITE_GEN6 11
+#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6 12
+#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE_GEN6 13
+#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE_GEN6 14
+
#define BRW_MATH_FUNCTION_INV 1
#define BRW_MATH_FUNCTION_LOG 2
#define BRW_MATH_FUNCTION_EXP 3
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index ff12daf497d..d2307145361 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -598,7 +598,7 @@ static int src_da16 (FILE *file,
format (file, ".%d", _subreg_nr);
string (file, "<");
err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
- string (file, ",1,1>");
+ string (file, ",4,1>");
err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
/*
* Three kinds of swizzle display:
@@ -836,10 +836,12 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
if (inst->header.opcode == BRW_OPCODE_SEND) {
int target;
- if (gen >= 5)
- target = inst->bits2.send_gen5.sfid;
+ if (gen >= 6)
+ target = inst->header.destreg__conditionalmod;
+ else if (gen == 5)
+ target = inst->bits2.send_gen5.sfid;
else
- target = inst->bits3.generic.msg_target;
+ target = inst->bits3.generic.msg_target;
newline (file);
pad (file, 16);
@@ -868,13 +870,44 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
inst->bits3.sampler.return_format, NULL);
string (file, ")");
break;
+ case BRW_MESSAGE_TARGET_DATAPORT_READ:
+ if (gen >= 6) {
+ format (file, " (%d, %d, %d, %d, %d, %d)",
+ inst->bits3.dp_render_cache.binding_table_index,
+ inst->bits3.dp_render_cache.msg_control,
+ inst->bits3.dp_render_cache.msg_type,
+ inst->bits3.dp_render_cache.send_commit_msg,
+ inst->bits3.dp_render_cache.msg_length,
+ inst->bits3.dp_render_cache.response_length);
+ } else if (gen >= 5) {
+ format (file, " (%d, %d, %d)",
+ inst->bits3.dp_read_gen5.binding_table_index,
+ inst->bits3.dp_read_gen5.msg_control,
+ inst->bits3.dp_read_gen5.msg_type);
+ } else {
+ format (file, " (%d, %d, %d)",
+ inst->bits3.dp_read.binding_table_index,
+ inst->bits3.dp_read.msg_control,
+ inst->bits3.dp_read.msg_type);
+ }
+ break;
case BRW_MESSAGE_TARGET_DATAPORT_WRITE:
- format (file, " (%d, %d, %d, %d)",
- inst->bits3.dp_write.binding_table_index,
- (inst->bits3.dp_write.pixel_scoreboard_clear << 3) |
- inst->bits3.dp_write.msg_control,
- inst->bits3.dp_write.msg_type,
- inst->bits3.dp_write.send_commit_msg);
+ if (gen >= 6) {
+ format (file, " (%d, %d, %d, %d, %d, %d)",
+ inst->bits3.dp_render_cache.binding_table_index,
+ inst->bits3.dp_render_cache.msg_control,
+ inst->bits3.dp_render_cache.msg_type,
+ inst->bits3.dp_render_cache.send_commit_msg,
+ inst->bits3.dp_render_cache.msg_length,
+ inst->bits3.dp_render_cache.response_length);
+ } else {
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.dp_write.binding_table_index,
+ (inst->bits3.dp_write.pixel_scoreboard_clear << 3) |
+ inst->bits3.dp_write.msg_control,
+ inst->bits3.dp_write.msg_type,
+ inst->bits3.dp_write.send_commit_msg);
+ }
break;
case BRW_MESSAGE_TARGET_URB:
if (gen >= 5) {
@@ -900,15 +933,22 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
case BRW_MESSAGE_TARGET_THREAD_SPAWNER:
break;
default:
- format (file, "unsupported target %d", inst->bits3.generic.msg_target);
+ format (file, "unsupported target %d", target);
break;
}
if (space)
string (file, " ");
- format (file, "mlen %d",
- inst->bits3.generic.msg_length);
- format (file, " rlen %d",
- inst->bits3.generic.response_length);
+ if (gen >= 5) {
+ format (file, "mlen %d",
+ inst->bits3.generic_gen5.msg_length);
+ format (file, " rlen %d",
+ inst->bits3.generic_gen5.response_length);
+ } else {
+ format (file, "mlen %d",
+ inst->bits3.generic.msg_length);
+ format (file, " rlen %d",
+ inst->bits3.generic.response_length);
+ }
}
pad (file, 64);
if (inst->header.opcode != BRW_OPCODE_NOP) {
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 3a32ad26c12..ffdddd0a388 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -35,7 +35,7 @@
#include "brw_structs.h"
#include "brw_defines.h"
-#include "shader/prog_instruction.h"
+#include "program/prog_instruction.h"
#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
@@ -520,6 +520,20 @@ static INLINE struct brw_reg brw_acc_reg( void )
0);
}
+static INLINE struct brw_reg brw_notification_1_reg(void)
+{
+
+ return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_NOTIFICATION_COUNT,
+ 1,
+ BRW_REGISTER_TYPE_UD,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XXXX,
+ WRITEMASK_X);
+}
+
static INLINE struct brw_reg brw_flag_reg( void )
{
@@ -877,12 +891,15 @@ void brw_dp_READ_4( struct brw_compile *p,
void brw_dp_READ_4_vs( struct brw_compile *p,
struct brw_reg dest,
- GLuint oword,
- GLboolean relAddr,
- struct brw_reg addrReg,
GLuint location,
GLuint bind_table_index );
+void brw_dp_READ_4_vs_relative(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg addrReg,
+ GLuint offset,
+ GLuint bind_table_index);
+
void brw_dp_WRITE_16( struct brw_compile *p,
struct brw_reg src,
GLuint scratch_offset );
@@ -919,6 +936,8 @@ void brw_land_fwd_jump(struct brw_compile *p,
void brw_NOP(struct brw_compile *p);
+void brw_WAIT(struct brw_compile *p);
+
/* Special case: there is never a destination, execution size will be
* taken from src0:
*/
@@ -965,5 +984,7 @@ void brw_set_src1( struct brw_instruction *insn,
/* brw_optimize.c */
void brw_optimize(struct brw_compile *p);
+void brw_remove_duplicate_mrf_moves(struct brw_compile *p);
+void brw_remove_grf_to_mrf_moves(struct brw_compile *p);
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 34dfe10cb93..0d5d17f501d 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -364,7 +364,8 @@ static void brw_set_dp_write_message( struct brw_context *brw,
GLuint msg_length,
GLuint pixel_scoreboard_clear,
GLuint response_length,
- GLuint end_of_thread )
+ GLuint end_of_thread,
+ GLuint send_commit_msg)
{
struct intel_context *intel = &brw->intel;
brw_set_src1(insn, brw_imm_d(0));
@@ -374,7 +375,7 @@ static void brw_set_dp_write_message( struct brw_context *brw,
insn->bits3.dp_write_gen5.msg_control = msg_control;
insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
insn->bits3.dp_write_gen5.msg_type = msg_type;
- insn->bits3.dp_write_gen5.send_commit_msg = 0;
+ insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
insn->bits3.dp_write_gen5.header_present = 1;
insn->bits3.dp_write_gen5.response_length = response_length;
insn->bits3.dp_write_gen5.msg_length = msg_length;
@@ -386,7 +387,7 @@ static void brw_set_dp_write_message( struct brw_context *brw,
insn->bits3.dp_write.msg_control = msg_control;
insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
insn->bits3.dp_write.msg_type = msg_type;
- insn->bits3.dp_write.send_commit_msg = 0;
+ insn->bits3.dp_write.send_commit_msg = send_commit_msg;
insn->bits3.dp_write.response_length = response_length;
insn->bits3.dp_write.msg_length = msg_length;
insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
@@ -906,6 +907,20 @@ void brw_CMP(struct brw_compile *p,
}
}
+/* Issue 'wait' instruction for n1, host could program MMIO
+ to wake up thread. */
+void brw_WAIT (struct brw_compile *p)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
+ struct brw_reg src = brw_notification_1_reg();
+
+ brw_set_dest(insn, src);
+ brw_set_src0(insn, src);
+ brw_set_src1(insn, brw_null_reg());
+ insn->header.execution_size = 0; /* must */
+ insn->header.predicate_control = 0;
+ insn->header.compression_control = 0;
+}
/***********************************************************************
@@ -1040,6 +1055,7 @@ void brw_dp_WRITE_16( struct brw_compile *p,
struct brw_reg src,
GLuint scratch_offset )
{
+ struct intel_context *intel = &p->brw->intel;
GLuint msg_reg_nr = 1;
{
brw_push_insn_state(p);
@@ -1056,13 +1072,32 @@ void brw_dp_WRITE_16( struct brw_compile *p,
{
GLuint msg_length = 3;
- struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
+ struct brw_reg dest;
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
-
+ int send_commit_msg;
+
insn->header.predicate_control = 0; /* XXX */
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.destreg__conditionalmod = msg_reg_nr;
-
+
+ /* Until gen6, writes followed by reads from the same location
+ * are not guaranteed to be ordered unless write_commit is set.
+ * If set, then a no-op write is issued to the destination
+ * register to set a dependency, and a read from the destination
+ * can be used to ensure the ordering.
+ *
+ * For gen6, only writes between different threads need ordering
+ * protection. Our use of DP writes is all about register
+ * spilling within a thread.
+ */
+ if (intel->gen >= 6) {
+ dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
+ send_commit_msg = 0;
+ } else {
+ dest = brw_uw16_grf(0, 0);
+ send_commit_msg = 1;
+ }
+
brw_set_dest(insn, dest);
brw_set_src0(insn, src);
@@ -1073,8 +1108,9 @@ void brw_dp_WRITE_16( struct brw_compile *p,
BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
msg_length,
0, /* pixel scoreboard */
- 0, /* response_length */
- 0); /* eot */
+ send_commit_msg, /* response_length */
+ 0, /* eot */
+ send_commit_msg);
}
}
@@ -1115,7 +1151,7 @@ void brw_dp_READ_16( struct brw_compile *p,
brw_set_dp_read_message(p->brw,
insn,
255, /* binding table index (255=stateless) */
- 3, /* msg_control (3 means 4 Owords) */
+ BRW_DATAPORT_OWORD_BLOCK_4_OWORDS,
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1, /* target cache (render/scratch) */
1, /* msg_length */
@@ -1190,68 +1226,107 @@ void brw_dp_READ_4( struct brw_compile *p,
*/
void brw_dp_READ_4_vs(struct brw_compile *p,
struct brw_reg dest,
- GLuint oword,
- GLboolean relAddr,
- struct brw_reg addrReg,
GLuint location,
GLuint bind_table_index)
{
+ struct brw_instruction *insn;
GLuint msg_reg_nr = 1;
+ struct brw_reg b;
- assert(oword < 2);
/*
printf("vs const read msg, location %u, msg_reg_nr %d\n",
location, msg_reg_nr);
*/
/* Setup MRF[1] with location/offset into const buffer */
- {
- struct brw_reg b;
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- brw_push_insn_state(p);
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- /*brw_set_access_mode(p, BRW_ALIGN_16);*/
+ /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
+ * when the docs say only dword[2] should be set. Hmmm. But it works.
+ */
+ b = brw_message_reg(msg_reg_nr);
+ b = retype(b, BRW_REGISTER_TYPE_UD);
+ /*b = get_element_ud(b, 2);*/
+ brw_MOV(p, b, brw_imm_ud(location));
- /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
- * when the docs say only dword[2] should be set. Hmmm. But it works.
- */
- b = brw_message_reg(msg_reg_nr);
- b = retype(b, BRW_REGISTER_TYPE_UD);
- /*b = get_element_ud(b, 2);*/
- if (relAddr) {
- brw_ADD(p, b, addrReg, brw_imm_ud(location));
- }
- else {
- brw_MOV(p, b, brw_imm_ud(location));
- }
+ brw_pop_insn_state(p);
- brw_pop_insn_state(p);
- }
+ insn = next_insn(p, BRW_OPCODE_SEND);
- {
- struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
-
- insn->header.predicate_control = BRW_PREDICATE_NONE;
- insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.destreg__conditionalmod = msg_reg_nr;
- insn->header.mask_control = BRW_MASK_DISABLE;
- /*insn->header.access_mode = BRW_ALIGN_16;*/
-
- brw_set_dest(insn, dest);
- brw_set_src0(insn, brw_null_reg());
+ insn->header.predicate_control = BRW_PREDICATE_NONE;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+ insn->header.mask_control = BRW_MASK_DISABLE;
- brw_set_dp_read_message(p->brw,
- insn,
- bind_table_index,
- oword, /* 0 = lower Oword, 1 = upper Oword */
- BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
- 0, /* source cache = data cache */
- 1, /* msg_length */
- 1, /* response_length (1 Oword) */
- 0); /* eot */
- }
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, brw_null_reg());
+
+ brw_set_dp_read_message(p->brw,
+ insn,
+ bind_table_index,
+ 0,
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+ 0, /* source cache = data cache */
+ 1, /* msg_length */
+ 1, /* response_length (1 Oword) */
+ 0); /* eot */
+}
+
+/**
+ * Read a float[4] constant per vertex from VS constant buffer, with
+ * relative addressing.
+ */
+void brw_dp_READ_4_vs_relative(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg addr_reg,
+ GLuint offset,
+ GLuint bind_table_index)
+{
+ struct intel_context *intel = &p->brw->intel;
+ int msg_type;
+
+ /* Setup MRF[1] with offset into const buffer */
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* M1.0 is block offset 0, M1.4 is block offset 1, all other
+ * fields ignored.
+ */
+ brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD),
+ addr_reg, brw_imm_d(offset));
+ brw_pop_insn_state(p);
+
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ insn->header.predicate_control = BRW_PREDICATE_NONE;
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.destreg__conditionalmod = 0;
+ insn->header.mask_control = BRW_MASK_DISABLE;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, brw_vec8_grf(0, 0));
+
+ if (intel->gen == 6)
+ msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+ else if (intel->gen == 5 || intel->is_g4x)
+ msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+ else
+ msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+
+ brw_set_dp_read_message(p->brw,
+ insn,
+ bind_table_index,
+ BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
+ msg_type,
+ 0, /* source cache = data cache */
+ 2, /* msg_length */
+ 1, /* response_length */
+ 0); /* eot */
}
@@ -1281,7 +1356,8 @@ void brw_fb_WRITE(struct brw_compile *p,
msg_length,
1, /* pixel scoreboard */
response_length,
- eot);
+ eot,
+ 0 /* send_commit_msg */);
}
diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c
index 99a6f6be113..a01d5576f8c 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c
@@ -34,7 +34,7 @@
#include "main/macros.h"
#include "main/enums.h"
-#include "shader/program.h"
+#include "program/program.h"
#include "intel_batchbuffer.h"
#include "brw_defines.h"
diff --git a/src/mesa/drivers/dri/i965/brw_optimize.c b/src/mesa/drivers/dri/i965/brw_optimize.c
index e79b3ddea35..8aa6fb6cc6f 100644
--- a/src/mesa/drivers/dri/i965/brw_optimize.c
+++ b/src/mesa/drivers/dri/i965/brw_optimize.c
@@ -26,12 +26,600 @@
*/
#include "main/macros.h"
-#include "shader/program.h"
-#include "shader/prog_print.h"
+#include "program/program.h"
+#include "program/prog_print.h"
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_eu.h"
+static const struct {
+ char *name;
+ int nsrc;
+ int ndst;
+ GLboolean is_arith;
+} inst_opcode[128] = {
+ [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
+
+ [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 },
+
+ [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 },
+
+ [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 },
+ [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 },
+ [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 },
+ [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 },
+};
+
+static INLINE
+GLboolean brw_is_arithmetic_inst(const struct brw_instruction *inst)
+{
+ return inst_opcode[inst->header.opcode].is_arith;
+}
+
+static const GLuint inst_stride[7] = {
+ [0] = 0,
+ [1] = 1,
+ [2] = 2,
+ [3] = 4,
+ [4] = 8,
+ [5] = 16,
+ [6] = 32
+};
+
+static const GLuint inst_type_size[8] = {
+ [BRW_REGISTER_TYPE_UD] = 4,
+ [BRW_REGISTER_TYPE_D] = 4,
+ [BRW_REGISTER_TYPE_UW] = 2,
+ [BRW_REGISTER_TYPE_W] = 2,
+ [BRW_REGISTER_TYPE_UB] = 1,
+ [BRW_REGISTER_TYPE_B] = 1,
+ [BRW_REGISTER_TYPE_F] = 4
+};
+
+static INLINE GLboolean
+brw_is_grf_written(const struct brw_instruction *inst,
+ int reg_index, int size,
+ int gen)
+{
+ if (inst_opcode[inst->header.opcode].ndst == 0)
+ return GL_FALSE;
+
+ if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT)
+ if (inst->bits1.ia1.dest_reg_file == BRW_GENERAL_REGISTER_FILE)
+ return GL_TRUE;
+
+ if (inst->bits1.da1.dest_reg_file != BRW_GENERAL_REGISTER_FILE)
+ return GL_FALSE;
+
+ const int reg_start = reg_index * REG_SIZE;
+ const int reg_end = reg_start + size;
+
+ const int type_size = inst_type_size[inst->bits1.da1.dest_reg_type];
+ const int write_start = inst->bits1.da1.dest_reg_nr*REG_SIZE
+ + inst->bits1.da1.dest_subreg_nr;
+ int length, write_end;
+
+ /* SEND is specific */
+ if (inst->header.opcode == BRW_OPCODE_SEND) {
+ if (gen >= 5)
+ length = inst->bits3.generic_gen5.response_length*REG_SIZE;
+ else
+ length = inst->bits3.generic.response_length*REG_SIZE;
+ }
+ else {
+ length = 1 << inst->header.execution_size;
+ length *= type_size;
+ length *= inst->bits1.da1.dest_horiz_stride;
+ }
+
+ /* If the two intervals intersect, we overwrite the register */
+ write_end = write_start + length;
+ const int left = MAX2(write_start, reg_start);
+ const int right = MIN2(write_end, reg_end);
+
+ return left < right;
+}
+
+/* Specific path for message register since we need to handle the compr4 case */
+static INLINE GLboolean
+brw_is_mrf_written(const struct brw_instruction *inst, int reg_index, int size)
+{
+ if (inst_opcode[inst->header.opcode].ndst == 0)
+ return GL_FALSE;
+
+ if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT)
+ if (inst->bits1.ia1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE)
+ return GL_TRUE;
+
+ if (inst->bits1.da1.dest_reg_file != BRW_MESSAGE_REGISTER_FILE)
+ return GL_FALSE;
+
+ const int reg_start = reg_index * REG_SIZE;
+ const int reg_end = reg_start + size;
+
+ const int mrf_index = inst->bits1.da1.dest_reg_nr & 0x0f;
+ const int is_compr4 = inst->bits1.da1.dest_reg_nr & BRW_MRF_COMPR4;
+ const int type_size = inst_type_size[inst->bits1.da1.dest_reg_type];
+
+ /* We use compr4 with a size != 16 elements. Strange, we conservatively
+ * consider that we are writing the register.
+ */
+ if (is_compr4 && inst->header.execution_size != BRW_EXECUTE_16)
+ return GL_TRUE;
+
+ GLboolean is_written = GL_FALSE;
+
+ /* Here we write mrf_{i} and mrf_{i+4}. So we read two times 8 elements */
+ if (is_compr4) {
+ const int length = 8 * type_size * inst->bits1.da1.dest_horiz_stride;
+
+ /* First 8-way register */
+ const int write_start0 = mrf_index*REG_SIZE
+ + inst->bits1.da1.dest_subreg_nr;
+ const int write_end0 = write_start0 + length;
+
+ /* Second 8-way register */
+ const int write_start1 = (mrf_index+4)*REG_SIZE
+ + inst->bits1.da1.dest_subreg_nr;
+ const int write_end1 = write_start1 + length;
+
+ /* If the two intervals intersect, we overwrite the register */
+ const int left0 = MAX2(write_start0, reg_start);
+ const int right0 = MIN2(write_end0, reg_end);
+ const int left1 = MAX2(write_start1, reg_start);
+ const int right1 = MIN2(write_end1, reg_end);
+
+ is_written = left0 < right0 || left1 < right1;
+ }
+ else {
+ int length;
+ length = 1 << inst->header.execution_size;
+ length *= type_size;
+ length *= inst->bits1.da1.dest_horiz_stride;
+
+ /* If the two intervals intersect, we write into the register */
+ const int write_start = inst->bits1.da1.dest_reg_nr*REG_SIZE
+ + inst->bits1.da1.dest_subreg_nr;
+ const int write_end = write_start + length;
+ const int left = MAX2(write_start, reg_start);
+ const int right = MIN2(write_end, reg_end);;
+
+ is_written = left < right;
+ }
+
+ /* SEND may perform an implicit mov to a mrf register */
+ if (is_written == GL_FALSE &&
+ inst->header.opcode == BRW_OPCODE_SEND &&
+ inst->bits1.da1.src0_reg_file != 0) {
+
+ const int mrf_start = inst->header.destreg__conditionalmod;
+ const int write_start = mrf_start * REG_SIZE;
+ const int write_end = write_start + REG_SIZE;
+ const int left = MAX2(write_start, reg_start);
+ const int right = MIN2(write_end, reg_end);;
+ is_written = left < right;
+ }
+
+ return is_written;
+}
+
+static INLINE GLboolean
+brw_is_mrf_read(const struct brw_instruction *inst,
+ int reg_index, int size, int gen)
+{
+ if (inst->header.opcode != BRW_OPCODE_SEND)
+ return GL_FALSE;
+ if (inst->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT)
+ return GL_TRUE;
+
+ const int reg_start = reg_index*REG_SIZE;
+ const int reg_end = reg_start + size;
+
+ int length, read_start, read_end;
+ if (gen >= 5)
+ length = inst->bits3.generic_gen5.msg_length*REG_SIZE;
+ else
+ length = inst->bits3.generic.msg_length*REG_SIZE;
+
+ /* Look if SEND uses an implicit mov. In that case, we read one less register
+ * (but we write it)
+ */
+ if (inst->bits1.da1.src0_reg_file != 0)
+ read_start = inst->header.destreg__conditionalmod;
+ else {
+ length--;
+ read_start = inst->header.destreg__conditionalmod + 1;
+ }
+ read_start *= REG_SIZE;
+ read_end = read_start + length;
+
+ const int left = MAX2(read_start, reg_start);
+ const int right = MIN2(read_end, reg_end);
+
+ return left < right;
+}
+
+static INLINE GLboolean
+brw_is_grf_read(const struct brw_instruction *inst, int reg_index, int size)
+{
+ int i, j;
+ if (inst_opcode[inst->header.opcode].nsrc == 0)
+ return GL_FALSE;
+
+ /* Look at first source. We must take into account register regions to
+ * monitor carefully the read. Note that we are a bit too conservative here
+ * since we do not take into account the fact that some complete registers
+ * may be skipped
+ */
+ if (inst_opcode[inst->header.opcode].nsrc >= 1) {
+
+ if (inst->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT)
+ if (inst->bits1.ia1.src0_reg_file == BRW_GENERAL_REGISTER_FILE)
+ return GL_TRUE;
+ if (inst->bits1.da1.src0_reg_file != BRW_GENERAL_REGISTER_FILE)
+ return GL_FALSE;
+
+ const int reg_start = reg_index*REG_SIZE;
+ const int reg_end = reg_start + size;
+
+ /* See if at least one of this element intersects the interval */
+ const int type_size = inst_type_size[inst->bits1.da1.src0_reg_type];
+ const int elem_num = 1 << inst->header.execution_size;
+ const int width = 1 << inst->bits2.da1.src0_width;
+ const int row_num = elem_num >> inst->bits2.da1.src0_width;
+ const int hs = type_size*inst_stride[inst->bits2.da1.src0_horiz_stride];
+ const int vs = type_size*inst_stride[inst->bits2.da1.src0_vert_stride];
+ int row_start = inst->bits2.da1.src0_reg_nr*REG_SIZE
+ + inst->bits2.da1.src0_subreg_nr;
+ for (j = 0; j < row_num; ++j) {
+ int write_start = row_start;
+ for (i = 0; i < width; ++i) {
+ const int write_end = write_start + type_size;
+ const int left = write_start > reg_start ? write_start : reg_start;
+ const int right = write_end < reg_end ? write_end : reg_end;
+ if (left < right)
+ return GL_TRUE;
+ write_start += hs;
+ }
+ row_start += vs;
+ }
+ }
+
+ /* Second src register */
+ if (inst_opcode[inst->header.opcode].nsrc >= 2) {
+
+ if (inst->bits3.da1.src1_address_mode != BRW_ADDRESS_DIRECT)
+ if (inst->bits1.ia1.src1_reg_file == BRW_GENERAL_REGISTER_FILE)
+ return GL_TRUE;
+ if (inst->bits1.da1.src1_reg_file != BRW_GENERAL_REGISTER_FILE)
+ return GL_FALSE;
+
+ const int reg_start = reg_index*REG_SIZE;
+ const int reg_end = reg_start + size;
+
+ /* See if at least one of this element intersects the interval */
+ const int type_size = inst_type_size[inst->bits1.da1.src1_reg_type];
+ const int elem_num = 1 << inst->header.execution_size;
+ const int width = 1 << inst->bits3.da1.src1_width;
+ const int row_num = elem_num >> inst->bits3.da1.src1_width;
+ const int hs = type_size*inst_stride[inst->bits3.da1.src1_horiz_stride];
+ const int vs = type_size*inst_stride[inst->bits3.da1.src1_vert_stride];
+ int row_start = inst->bits3.da1.src1_reg_nr*REG_SIZE
+ + inst->bits3.da1.src1_subreg_nr;
+ for (j = 0; j < row_num; ++j) {
+ int write_start = row_start;
+ for (i = 0; i < width; ++i) {
+ const int write_end = write_start + type_size;
+ const int left = write_start > reg_start ? write_start : reg_start;
+ const int right = write_end < reg_end ? write_end : reg_end;
+ if (left < right)
+ return GL_TRUE;
+ write_start += hs;
+ }
+ row_start += vs;
+ }
+ }
+
+ return GL_FALSE;
+}
+
+static INLINE GLboolean
+brw_is_control_done(const struct brw_instruction *mov) {
+ return
+ mov->header.dependency_control != 0 ||
+ mov->header.thread_control != 0 ||
+ mov->header.mask_control != 0 ||
+ mov->header.saturate != 0 ||
+ mov->header.debug_control != 0;
+}
+
+static INLINE GLboolean
+brw_is_predicated(const struct brw_instruction *mov) {
+ return mov->header.predicate_control != 0;
+}
+
+static INLINE GLboolean
+brw_is_grf_to_mrf_mov(const struct brw_instruction *mov,
+ int *mrf_index,
+ int *grf_index,
+ GLboolean *is_compr4)
+{
+ if (brw_is_predicated(mov) ||
+ brw_is_control_done(mov) ||
+ mov->header.debug_control != 0)
+ return GL_FALSE;
+
+ if (mov->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT ||
+ mov->bits1.da1.dest_reg_file != BRW_MESSAGE_REGISTER_FILE ||
+ mov->bits1.da1.dest_reg_type != BRW_REGISTER_TYPE_F ||
+ mov->bits1.da1.dest_horiz_stride != BRW_HORIZONTAL_STRIDE_1 ||
+ mov->bits1.da1.dest_subreg_nr != 0)
+ return GL_FALSE;
+
+ if (mov->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT ||
+ mov->bits1.da1.src0_reg_file != BRW_GENERAL_REGISTER_FILE ||
+ mov->bits1.da1.src0_reg_type != BRW_REGISTER_TYPE_F ||
+ mov->bits2.da1.src0_width != BRW_WIDTH_8 ||
+ mov->bits2.da1.src0_horiz_stride != BRW_HORIZONTAL_STRIDE_1 ||
+ mov->bits2.da1.src0_vert_stride != BRW_VERTICAL_STRIDE_8 ||
+ mov->bits2.da1.src0_subreg_nr != 0 ||
+ mov->bits2.da1.src0_abs != 0 ||
+ mov->bits2.da1.src0_negate != 0)
+ return GL_FALSE;
+
+ *grf_index = mov->bits2.da1.src0_reg_nr;
+ *mrf_index = mov->bits1.da1.dest_reg_nr & 0x0f;
+ *is_compr4 = (mov->bits1.da1.dest_reg_nr & BRW_MRF_COMPR4) != 0;
+ return GL_TRUE;
+}
+
+static INLINE GLboolean
+brw_is_grf_straight_write(const struct brw_instruction *inst, int grf_index)
+{
+ /* remark: no problem to predicate a SEL instruction */
+ if ((!brw_is_predicated(inst) || inst->header.opcode == BRW_OPCODE_SEL) &&
+ brw_is_control_done(inst) == GL_FALSE &&
+ inst->header.execution_size == 4 &&
+ inst->header.access_mode == BRW_ALIGN_1 &&
+ inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT &&
+ inst->bits1.da1.dest_reg_file == BRW_GENERAL_REGISTER_FILE &&
+ inst->bits1.da1.dest_reg_type == BRW_REGISTER_TYPE_F &&
+ inst->bits1.da1.dest_horiz_stride == BRW_HORIZONTAL_STRIDE_1 &&
+ inst->bits1.da1.dest_reg_nr == grf_index &&
+ inst->bits1.da1.dest_subreg_nr == 0 &&
+ brw_is_arithmetic_inst(inst))
+ return GL_TRUE;
+
+ return GL_FALSE;
+}
+
+static INLINE GLboolean
+brw_inst_are_equal(const struct brw_instruction *src0,
+ const struct brw_instruction *src1)
+{
+ const GLuint *field0 = (GLuint *) src0;
+ const GLuint *field1 = (GLuint *) src1;
+ return field0[0] == field1[0] &&
+ field0[1] == field1[1] &&
+ field0[2] == field1[2] &&
+ field0[3] == field1[3];
+}
+
+static INLINE void
+brw_inst_copy(struct brw_instruction *dst,
+ const struct brw_instruction *src)
+{
+ GLuint *field_dst = (GLuint *) dst;
+ const GLuint *field_src = (GLuint *) src;
+ field_dst[0] = field_src[0];
+ field_dst[1] = field_src[1];
+ field_dst[2] = field_src[2];
+ field_dst[3] = field_src[3];
+}
+
+static void brw_remove_inst(struct brw_compile *p, const GLboolean *removeInst)
+{
+ int i, nr_insn = 0, to = 0, from = 0;
+
+ for (from = 0; from < p->nr_insn; ++from) {
+ if (removeInst[from])
+ continue;
+ if(to != from)
+ brw_inst_copy(p->store + to, p->store + from);
+ to++;
+ }
+
+ for (i = 0; i < p->nr_insn; ++i)
+ if (removeInst[i] == GL_FALSE)
+ nr_insn++;
+ p->nr_insn = nr_insn;
+}
+
+/* The gen code emitter generates a lot of duplications in the
+ * grf-to-mrf moves, for example when texture sampling with the same
+ * coordinates from multiple textures.. Here, we monitor same mov
+ * grf-to-mrf instrutions and remove repeated ones where the operands
+ * and dst ahven't changed in between.
+ */
+void brw_remove_duplicate_mrf_moves(struct brw_compile *p)
+{
+ const int gen = p->brw->intel.gen;
+ int i, j;
+
+ GLboolean *removeInst = calloc(sizeof(GLboolean), p->nr_insn);
+ for (i = 0; i < p->nr_insn; i++) {
+ if (removeInst[i])
+ continue;
+
+ const struct brw_instruction *mov = p->store + i;
+ int mrf_index, grf_index;
+ GLboolean is_compr4;
+
+ /* Only consider _straight_ grf-to-mrf moves */
+ if (!brw_is_grf_to_mrf_mov(mov, &mrf_index, &grf_index, &is_compr4))
+ continue;
+
+ const int mrf_index0 = mrf_index;
+ const int mrf_index1 = is_compr4 ? mrf_index0+4 : mrf_index0+1;
+ const int simd16_size = 2 * REG_SIZE;
+
+ for (j = i + 1; j < p->nr_insn; j++) {
+ const struct brw_instruction *inst = p->store + j;
+
+ if (brw_inst_are_equal(mov, inst)) {
+ removeInst[j] = GL_TRUE;
+ continue;
+ }
+
+ if (brw_is_grf_written(inst, grf_index, simd16_size, gen) ||
+ brw_is_mrf_written(inst, mrf_index0, REG_SIZE) ||
+ brw_is_mrf_written(inst, mrf_index1, REG_SIZE))
+ break;
+ }
+ }
+
+ brw_remove_inst(p, removeInst);
+ free(removeInst);
+}
+
+/* Replace moves to MRFs where the value moved is the result of a
+ * normal arithmetic operation with computation right into the MRF.
+ */
+void brw_remove_grf_to_mrf_moves(struct brw_compile *p)
+{
+ int i, j, prev;
+ struct brw_context *brw = p->brw;
+ const int gen = brw->intel.gen;
+ const int simd16_size = 2*REG_SIZE;
+
+ GLboolean *removeInst = calloc(sizeof(GLboolean), p->nr_insn);
+ assert(removeInst);
+
+ for (i = 0; i < p->nr_insn; i++) {
+ if (removeInst[i])
+ continue;
+
+ struct brw_instruction *grf_inst = NULL;
+ const struct brw_instruction *mov = p->store + i;
+ int mrf_index, grf_index;
+ GLboolean is_compr4;
+
+ /* Only consider _straight_ grf-to-mrf moves */
+ if (!brw_is_grf_to_mrf_mov(mov, &mrf_index, &grf_index, &is_compr4))
+ continue;
+
+ /* Using comp4 enables a stride of 4 for this instruction */
+ const int mrf_index0 = mrf_index;
+ const int mrf_index1 = is_compr4 ? mrf_index+4 : mrf_index+1;
+
+ /* Look where the register has been set */
+ prev = i;
+ GLboolean potential_remove = GL_FALSE;
+ while (prev--) {
+
+ /* If _one_ instruction writes the grf, we try to remove the mov */
+ struct brw_instruction *inst = p->store + prev;
+ if (brw_is_grf_straight_write(inst, grf_index)) {
+ potential_remove = GL_TRUE;
+ grf_inst = inst;
+ break;
+ }
+
+ }
+
+ if (potential_remove == GL_FALSE)
+ continue;
+ removeInst[i] = GL_TRUE;
+
+ /* Monitor first the section of code between the grf computation and the
+ * mov. Here we cannot read or write both mrf and grf register
+ */
+ for (j = prev + 1; j < i; ++j) {
+ struct brw_instruction *inst = p->store + j;
+ if (removeInst[j])
+ continue;
+ if (brw_is_grf_written(inst, grf_index, simd16_size, gen) ||
+ brw_is_grf_read(inst, grf_index, simd16_size) ||
+ brw_is_mrf_written(inst, mrf_index0, REG_SIZE) ||
+ brw_is_mrf_written(inst, mrf_index1, REG_SIZE) ||
+ brw_is_mrf_read(inst, mrf_index0, REG_SIZE, gen) ||
+ brw_is_mrf_read(inst, mrf_index1, REG_SIZE, gen)) {
+ removeInst[i] = GL_FALSE;
+ break;
+ }
+ }
+
+ /* After the mov, we can read or write the mrf. If the grf is overwritten,
+ * we are done
+ */
+ for (j = i + 1; j < p->nr_insn; ++j) {
+ struct brw_instruction *inst = p->store + j;
+ if (removeInst[j])
+ continue;
+
+ if (brw_is_grf_read(inst, grf_index, simd16_size)) {
+ removeInst[i] = GL_FALSE;
+ break;
+ }
+
+ if (brw_is_grf_straight_write(inst, grf_index))
+ break;
+ }
+
+ /* Note that with the top down traversal, we can safely pacth the mov
+ * instruction
+ */
+ if (removeInst[i]) {
+ grf_inst->bits1.da1.dest_reg_file = mov->bits1.da1.dest_reg_file;
+ grf_inst->bits1.da1.dest_reg_nr = mov->bits1.da1.dest_reg_nr;
+ }
+ }
+
+ brw_remove_inst(p, removeInst);
+ free(removeInst);
+}
+
static GLboolean
is_single_channel_dp4(struct brw_instruction *insn)
{
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index bd560acdadf..4b08d2599bc 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -31,10 +31,10 @@
#include "main/imports.h"
#include "main/enums.h"
-#include "shader/prog_parameter.h"
-#include "shader/program.h"
-#include "shader/programopt.h"
-#include "shader/shader_api.h"
+#include "main/shaderobj.h"
+#include "program/prog_parameter.h"
+#include "program/program.h"
+#include "program/programopt.h"
#include "tnl/tnl.h"
#include "brw_context.h"
@@ -174,9 +174,36 @@ static GLboolean brwProgramStringNotify( GLcontext *ctx,
shader_error(ctx, prog,
"i965 driver doesn't yet support uninlined function "
"calls. Move to using a single return statement at "
- "the end of the function to work around it.");
+ "the end of the function to work around it.\n");
return GL_FALSE;
}
+ if (prog->Instructions[i].DstReg.RelAddr &&
+ prog->Instructions[i].DstReg.File == PROGRAM_INPUT) {
+ shader_error(ctx, prog,
+ "Variable indexing of shader inputs unsupported\n");
+ return GL_FALSE;
+ }
+ if (prog->Instructions[i].DstReg.RelAddr &&
+ prog->Instructions[i].DstReg.File == PROGRAM_OUTPUT) {
+ shader_error(ctx, prog,
+ "Variable indexing of shader outputs unsupported\n");
+ return GL_FALSE;
+ }
+ if (target == GL_FRAGMENT_PROGRAM_ARB) {
+ if ((prog->Instructions[i].DstReg.RelAddr &&
+ prog->Instructions[i].DstReg.File == PROGRAM_TEMPORARY) ||
+ (prog->Instructions[i].SrcReg[0].RelAddr &&
+ prog->Instructions[i].SrcReg[0].File == PROGRAM_TEMPORARY) ||
+ (prog->Instructions[i].SrcReg[1].RelAddr &&
+ prog->Instructions[i].SrcReg[1].File == PROGRAM_TEMPORARY) ||
+ (prog->Instructions[i].SrcReg[2].RelAddr &&
+ prog->Instructions[i].SrcReg[2].File == PROGRAM_TEMPORARY)) {
+ shader_error(ctx, prog,
+ "Variable indexing of variable arrays in the FS "
+ "unsupported\n");
+ return GL_FALSE;
+ }
+ }
}
return GL_TRUE;
diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h
index a0680a56f2c..e525c730d3f 100644
--- a/src/mesa/drivers/dri/i965/brw_sf.h
+++ b/src/mesa/drivers/dri/i965/brw_sf.h
@@ -34,7 +34,7 @@
#define BRW_SF_H
-#include "shader/program.h"
+#include "program/program.h"
#include "brw_context.h"
#include "brw_eu.h"
diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c
index e290ca92f60..914f275cc67 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_state.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_state.c
@@ -130,7 +130,7 @@ struct brw_sf_unit_key {
unsigned scissor:1;
unsigned line_smooth:1;
unsigned point_sprite:1;
- unsigned point_attenuated:1;
+ unsigned use_vs_point_size:1;
unsigned render_to_fbo:1;
float line_width;
float point_size;
@@ -164,7 +164,8 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
key->point_sprite = ctx->Point.PointSprite;
key->point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
- key->point_attenuated = ctx->Point._Attenuated;
+ key->use_vs_point_size = (ctx->VertexProgram.PointSizeEnabled ||
+ ctx->Point._Attenuated);
/* _NEW_LIGHT */
key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
@@ -296,7 +297,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
/* _NEW_POINT */
sf.sf7.sprite_point = key->point_sprite;
sf.sf7.point_size = CLAMP(rint(key->point_size), 1, 255) * (1<<3);
- sf.sf7.use_point_size_state = !key->point_attenuated;
+ sf.sf7.use_point_size_state = !key->use_vs_point_size;
sf.sf7.aa_line_distance_mode = 0;
/* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h
index 2a7fa5b6997..2fde42a7060 100644
--- a/src/mesa/drivers/dri/i965/brw_structs.h
+++ b/src/mesa/drivers/dri/i965/brw_structs.h
@@ -1657,8 +1657,36 @@ struct brw_instruction
GLuint end_of_thread:1;
} dp_write_gen5;
+ /* Sandybridge DP for sample cache, constant cache, render cache */
struct {
- GLuint pad:16;
+ GLuint binding_table_index:8;
+ GLuint msg_control:5;
+ GLuint msg_type:3;
+ GLuint pad0:3;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } dp_sampler_const_cache;
+
+ struct {
+ GLuint binding_table_index:8;
+ GLuint msg_control:3;
+ GLuint slot_group_select:1;
+ GLuint pixel_scoreboard_clear:1;
+ GLuint msg_type:4;
+ GLuint send_commit_msg:1;
+ GLuint pad0:1;
+ GLuint header_present:1;
+ GLuint response_length:5;
+ GLuint msg_length:4;
+ GLuint pad1:2;
+ GLuint end_of_thread:1;
+ } dp_render_cache;
+
+ struct {
+ GLuint function_control:16;
GLuint response_length:4;
GLuint msg_length:4;
GLuint msg_target:4;
@@ -1666,8 +1694,9 @@ struct brw_instruction
GLuint end_of_thread:1;
} generic;
+ /* Of this struct, only end_of_thread is not present for gen6. */
struct {
- GLuint pad:19;
+ GLuint function_control:19;
GLuint header_present:1;
GLuint response_length:5;
GLuint msg_length:4;
diff --git a/src/mesa/drivers/dri/i965/brw_util.c b/src/mesa/drivers/dri/i965/brw_util.c
index bba9249d1b4..1db2a210d45 100644
--- a/src/mesa/drivers/dri/i965/brw_util.c
+++ b/src/mesa/drivers/dri/i965/brw_util.c
@@ -31,7 +31,7 @@
#include "main/mtypes.h"
-#include "shader/prog_parameter.h"
+#include "program/prog_parameter.h"
#include "brw_util.h"
#include "brw_defines.h"
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 3c12f11ea78..9a832af9a97 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -34,8 +34,8 @@
#include "brw_vs.h"
#include "brw_util.h"
#include "brw_state.h"
-#include "shader/prog_print.h"
-#include "shader/prog_parameter.h"
+#include "program/prog_print.h"
+#include "program/prog_parameter.h"
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 6493744f3eb..9338a6b7dbf 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -36,7 +36,7 @@
#include "brw_context.h"
#include "brw_eu.h"
-#include "shader/program.h"
+#include "program/program.h"
struct brw_vs_prog_key {
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 128987d78a6..c1d6525e9b7 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -31,9 +31,9 @@
#include "main/macros.h"
-#include "shader/program.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
+#include "program/program.h"
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
#include "brw_context.h"
#include "brw_vs.h"
@@ -44,6 +44,7 @@ static GLboolean
brw_vs_arg_can_be_immediate(enum prog_opcode opcode, int arg)
{
int opcode_array[] = {
+ [OPCODE_MOV] = 1,
[OPCODE_ADD] = 2,
[OPCODE_CMP] = 3,
[OPCODE_DP3] = 2,
@@ -218,7 +219,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
c->first_overflow_output = 0;
if (intel->gen >= 6)
- mrf = 6;
+ mrf = 4;
else if (intel->gen == 5)
mrf = 8;
else
@@ -238,12 +239,25 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
mrf++; /* just a placeholder? XXX fix later stages & remove this */
}
else {
- if (mrf < 16) {
+ /* Two restrictions on our compute-to-MRF here. The
+ * message length for all SEND messages is restricted to
+ * [1,15], so we can't use mrf 15, as that means a length
+ * of 16.
+ *
+ * Additionally, URB writes are aligned to URB rows, so we
+ * need to put an even number of registers of URB data in
+ * each URB write so that the later write is aligned. A
+ * message length of 15 means 1 message header reg plus 14
+ * regs of URB data.
+ *
+ * For attributes beyond the compute-to-MRF, we compute to
+ * GRFs and they will be written in the second URB_WRITE.
+ */
+ if (mrf < 15) {
c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf);
mrf++;
}
else {
- /* too many vertex results to fit in MRF, use GRF for overflow */
if (!c->first_overflow_output)
c->first_overflow_output = i;
c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
@@ -318,8 +332,11 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
*/
attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs);
+ /* See emit_vertex_write() for where the VUE's overhead on top of the
+ * attributes comes from.
+ */
if (intel->gen >= 6)
- c->prog_data.urb_entry_size = (attributes_in_vue + 4 + 7) / 8;
+ c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 7) / 8;
else if (intel->gen == 5)
c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4;
else
@@ -869,8 +886,6 @@ get_constant(struct brw_vs_compile *c,
assert(argIndex < 3);
if (c->current_const[argIndex].index != src->Index) {
- struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0];
-
/* Keep track of the last constant loaded in this slot, for reuse. */
c->current_const[argIndex].index = src->Index;
@@ -881,9 +896,6 @@ get_constant(struct brw_vs_compile *c,
/* need to fetch the constant now */
brw_dp_READ_4_vs(p,
const_reg, /* writeback dest */
- 0, /* oword */
- 0, /* relative indexing? */
- addrReg, /* address register */
16 * src->Index, /* byte offset */
SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
);
@@ -904,8 +916,8 @@ get_reladdr_constant(struct brw_vs_compile *c,
const struct prog_src_register *src = &inst->SrcReg[argIndex];
struct brw_compile *p = &c->func;
struct brw_reg const_reg = c->current_const[argIndex].reg;
- struct brw_reg const2_reg;
struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0];
+ struct brw_reg byte_addr_reg = get_tmp(c);
assert(argIndex < 3);
@@ -917,37 +929,15 @@ get_reladdr_constant(struct brw_vs_compile *c,
src->Index, argIndex, c->current_const[argIndex].reg.nr);
#endif
+ brw_MUL(p, byte_addr_reg, addrReg, brw_imm_ud(16));
+
/* fetch the first vec4 */
- brw_dp_READ_4_vs(p,
- const_reg, /* writeback dest */
- 0, /* oword */
- 1, /* relative indexing? */
- addrReg, /* address register */
- 16 * src->Index, /* byte offset */
- SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
- );
- /* second vec4 */
- const2_reg = get_tmp(c);
-
- /* use upper half of address reg for second read */
- addrReg = stride(addrReg, 0, 4, 0);
- addrReg.subnr = 16;
-
- brw_dp_READ_4_vs(p,
- const2_reg, /* writeback dest */
- 1, /* oword */
- 1, /* relative indexing? */
- addrReg, /* address register */
- 16 * src->Index, /* byte offset */
- SURF_INDEX_VERT_CONST_BUFFER
- );
-
- /* merge the two Owords into the constant register */
- /* const_reg[7..4] = const2_reg[7..4] */
- brw_MOV(p,
- suboffset(stride(const_reg, 0, 4, 1), 4),
- suboffset(stride(const2_reg, 0, 4, 1), 4));
- release_tmp(c, const2_reg);
+ brw_dp_READ_4_vs_relative(p,
+ const_reg, /* writeback dest */
+ byte_addr_reg, /* address register */
+ 16 * src->Index, /* byte offset */
+ SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
+ );
return const_reg;
}
@@ -993,36 +983,71 @@ static struct brw_reg get_reg( struct brw_vs_compile *c,
*/
static struct brw_reg deref( struct brw_vs_compile *c,
struct brw_reg arg,
- GLint offset)
+ GLint offset,
+ GLuint reg_size )
{
struct brw_compile *p = &c->func;
- struct brw_reg tmp = vec4(get_tmp(c));
+ struct brw_reg tmp = get_tmp(c);
struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
- struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW);
- GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
+ struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_D);
+ GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * reg_size;
struct brw_reg indirect = brw_vec4_indirect(0,0);
+ struct brw_reg acc = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UW);
+
+ /* Set the vertical stride on the register access so that the first
+ * 4 components come from a0.0 and the second 4 from a0.1.
+ */
+ indirect.vstride = BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL;
{
brw_push_insn_state(p);
brw_set_access_mode(p, BRW_ALIGN_1);
- /* This is pretty clunky - load the address register twice and
- * fetch each 4-dword value in turn. There must be a way to do
- * this in a single pass, but I couldn't get it to work.
- */
- brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset));
- brw_MOV(p, tmp, indirect);
+ brw_MUL(p, acc, vp_address, brw_imm_uw(reg_size));
+ brw_ADD(p, brw_address_reg(0), acc, brw_imm_uw(byte_offset));
+
+ brw_MUL(p, acc, suboffset(vp_address, 4), brw_imm_uw(reg_size));
+ brw_ADD(p, brw_address_reg(1), acc, brw_imm_uw(byte_offset));
- brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset));
- brw_MOV(p, suboffset(tmp, 4), indirect);
+ brw_MOV(p, tmp, indirect);
brw_pop_insn_state(p);
}
-
+
/* NOTE: tmp not released */
- return vec8(tmp);
+ return tmp;
}
+static void
+move_to_reladdr_dst(struct brw_vs_compile *c,
+ const struct prog_instruction *inst,
+ struct brw_reg val)
+{
+ struct brw_compile *p = &c->func;
+ int reg_size = 32;
+ struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
+ struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_D);
+ struct brw_reg temp_base = c->regs[inst->DstReg.File][0];
+ GLuint byte_offset = temp_base.nr * 32 + temp_base.subnr;
+ struct brw_reg indirect = brw_vec4_indirect(0,0);
+ struct brw_reg acc = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UW);
+
+ byte_offset += inst->DstReg.Index * reg_size;
+
+ brw_push_insn_state(p);
+ brw_set_access_mode(p, BRW_ALIGN_1);
+
+ brw_MUL(p, acc, vp_address, brw_imm_uw(reg_size));
+ brw_ADD(p, brw_address_reg(0), acc, brw_imm_uw(byte_offset));
+ brw_MOV(p, indirect, val);
+
+ brw_MUL(p, acc, suboffset(vp_address, 4), brw_imm_uw(reg_size));
+ brw_ADD(p, brw_address_reg(0), acc,
+ brw_imm_uw(byte_offset + reg_size / 2));
+ brw_MOV(p, indirect, suboffset(val, 4));
+
+ brw_pop_insn_state(p);
+}
/**
* Get brw reg corresponding to the instruction's [argIndex] src reg.
@@ -1091,7 +1116,7 @@ get_src_reg( struct brw_vs_compile *c,
case PROGRAM_INPUT:
case PROGRAM_OUTPUT:
if (relAddr) {
- return deref(c, c->regs[file][0], index);
+ return deref(c, c->regs[file][0], index, 32);
}
else {
assert(c->regs[file][index].nr != 0);
@@ -1113,7 +1138,7 @@ get_src_reg( struct brw_vs_compile *c,
return get_constant(c, inst, argIndex);
}
else if (relAddr) {
- return deref(c, c->regs[PROGRAM_STATE_VAR][0], index);
+ return deref(c, c->regs[PROGRAM_STATE_VAR][0], index, 16);
}
else {
assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
@@ -1134,26 +1159,6 @@ get_src_reg( struct brw_vs_compile *c,
}
}
-
-static void emit_arl( struct brw_vs_compile *c,
- struct brw_reg dst,
- struct brw_reg arg0 )
-{
- struct brw_compile *p = &c->func;
- struct brw_reg tmp = dst;
- GLboolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
-
- if (need_tmp)
- tmp = get_tmp(c);
-
- brw_RNDD(p, tmp, arg0); /* tmp = round(arg0) */
- brw_MUL(p, dst, tmp, brw_imm_d(16)); /* dst = tmp * 16 */
-
- if (need_tmp)
- release_tmp(c, tmp);
-}
-
-
/**
* Return the brw reg for the given instruction's src argument.
* Will return mangled results for SWZ op. The emit_swz() function
@@ -1198,8 +1203,17 @@ static struct brw_reg get_dst( struct brw_vs_compile *c,
switch (dst.File) {
case PROGRAM_TEMPORARY:
case PROGRAM_OUTPUT:
- assert(c->regs[dst.File][dst.Index].nr != 0);
- reg = c->regs[dst.File][dst.Index];
+ /* register-indirect addressing is only 1x1, not VxH, for
+ * destination regs. So, for RelAddr we'll return a temporary
+ * for the dest and do a move of the result to the RelAddr
+ * register after the instruction emit.
+ */
+ if (dst.RelAddr) {
+ reg = get_tmp(c);
+ } else {
+ assert(c->regs[dst.File][dst.Index].nr != 0);
+ reg = c->regs[dst.File][dst.Index];
+ }
break;
case PROGRAM_ADDRESS:
assert(dst.Index == 0);
@@ -1298,7 +1312,6 @@ static void emit_vertex_write( struct brw_vs_compile *c)
struct brw_compile *p = &c->func;
struct brw_context *brw = p->brw;
struct intel_context *intel = &brw->intel;
- struct brw_reg m0 = brw_message_reg(0);
struct brw_reg pos = c->regs[PROGRAM_OUTPUT][VERT_RESULT_HPOS];
struct brw_reg ndc;
int eot;
@@ -1381,16 +1394,19 @@ static void emit_vertex_write( struct brw_vs_compile *c)
*/
brw_set_access_mode(p, BRW_ALIGN_1);
+ /* The VUE layout is documented in Volume 2a. */
if (intel->gen >= 6) {
- /* There are 16 DWs (D0-D15) in VUE header on Sandybridge:
+ /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
* dword 0-3 (m1) of the header is indices, point width, clip flags.
* dword 4-7 (m2) is the 4D space position
- * dword 8-15 (m3,m4) of the vertex header is the user clip distance.
- * m5 is the first vertex data we fill, which is the vertex position.
+ * dword 8-15 (m3,m4) of the vertex header is the user clip distance if
+ * enabled. We don't use it, so skip it.
+ * m3 is the first vertex element data we fill, which is the vertex
+ * position.
*/
- brw_MOV(p, offset(m0, 2), pos);
- brw_MOV(p, offset(m0, 5), pos);
- len_vertex_header = 4;
+ brw_MOV(p, brw_message_reg(2), pos);
+ brw_MOV(p, brw_message_reg(3), pos);
+ len_vertex_header = 2;
} else if (intel->gen == 5) {
/* There are 20 DWs (D0-D19) in VUE header on Ironlake:
* dword 0-3 (m1) of the header is indices, point width, clip flags.
@@ -1400,9 +1416,9 @@ static void emit_vertex_write( struct brw_vs_compile *c)
* m6 is a pad so that the vertex element data is aligned
* m7 is the first vertex data we fill, which is the vertex position.
*/
- brw_MOV(p, offset(m0, 2), ndc);
- brw_MOV(p, offset(m0, 3), pos);
- brw_MOV(p, offset(m0, 7), pos);
+ brw_MOV(p, brw_message_reg(2), ndc);
+ brw_MOV(p, brw_message_reg(3), pos);
+ brw_MOV(p, brw_message_reg(7), pos);
len_vertex_header = 6;
} else {
/* There are 8 dwords in VUE header pre-Ironlake:
@@ -1412,8 +1428,8 @@ static void emit_vertex_write( struct brw_vs_compile *c)
* dword 8-11 (m3) is the first vertex data, which we always have be the
* vertex position.
*/
- brw_MOV(p, offset(m0, 2), ndc);
- brw_MOV(p, offset(m0, 3), pos);
+ brw_MOV(p, brw_message_reg(2), ndc);
+ brw_MOV(p, brw_message_reg(3), pos);
len_vertex_header = 2;
}
@@ -1437,29 +1453,26 @@ static void emit_vertex_write( struct brw_vs_compile *c)
* Move the overflowed attributes from the GRF to the MRF and
* issue another brw_urb_WRITE().
*/
- /* XXX I'm not 100% sure about which MRF regs to use here. Starting
- * at mrf[4] atm...
- */
- GLuint i, mrf = 0;
+ GLuint i, mrf = 1;
for (i = c->first_overflow_output; i < VERT_RESULT_MAX; i++) {
if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) {
/* move from GRF to MRF */
- brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]);
+ brw_MOV(p, brw_message_reg(mrf), c->regs[PROGRAM_OUTPUT][i]);
mrf++;
}
}
brw_urb_WRITE(p,
brw_null_reg(), /* dest */
- 4, /* starting mrf reg nr */
+ 0, /* starting mrf reg nr */
c->r0, /* src */
0, /* allocate */
1, /* used */
- mrf+1, /* msg len */
+ mrf, /* msg len */
0, /* response len */
1, /* eot */
1, /* writes complete */
- BRW_MAX_MRF-1, /* urb destination offset */
+ 14 / 2, /* urb destination offset */
BRW_URB_SWIZZLE_INTERLEAVE);
}
}
@@ -1665,7 +1678,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL);
break;
case OPCODE_ARL:
- emit_arl(c, dst, args[0]);
+ brw_RNDD(p, dst, args[0]);
break;
case OPCODE_FLR:
brw_RNDD(p, dst, args[0]);
@@ -1890,6 +1903,14 @@ void brw_vs_emit(struct brw_vs_compile *c )
}
}
+ if (inst->DstReg.RelAddr && inst->DstReg.File == PROGRAM_TEMPORARY) {
+ /* We don't do RelAddr of PROGRAM_OUTPUT yet, because of the
+ * compute-to-mrf and the fact that we are allocating
+ * registers for only the used PROGRAM_OUTPUTs.
+ */
+ move_to_reladdr_dst(c, inst, dst);
+ }
+
release_tmps(c);
}
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
index be9e415cb07..0250a68d292 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -31,7 +31,7 @@
#include "main/mtypes.h"
#include "main/texstore.h"
-#include "shader/prog_parameter.h"
+#include "program/prog_parameter.h"
#include "brw_context.h"
#include "brw_state.h"
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index 197b8754345..40f51c21c95 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -34,7 +34,7 @@
#define BRW_WM_H
-#include "shader/prog_instruction.h"
+#include "program/prog_instruction.h"
#include "brw_context.h"
#include "brw_eu.h"
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index a90a2d3cf25..0c625a4cd02 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -1326,7 +1326,7 @@ void emit_fb_write(struct brw_wm_compile *c,
* + 1 for the second half we get destination + 4.
*/
brw_MOV(p,
- brw_message_reg(nr + channel + (1 << 7)),
+ brw_message_reg(nr + channel + BRW_MRF_COMPR4),
arg0[channel]);
} else {
/* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
@@ -1763,12 +1763,20 @@ void brw_wm_emit( struct brw_wm_compile *c )
inst->dst[i]->spill_slot);
}
+ /* Only properly tested on ILK */
+ if (p->brw->intel.gen == 5) {
+ brw_remove_duplicate_mrf_moves(p);
+ if (c->dispatch_width == 16)
+ brw_remove_grf_to_mrf_moves(p);
+ }
+
if (INTEL_DEBUG & DEBUG_WM) {
int i;
- printf("wm-native:\n");
- for (i = 0; i < p->nr_insn; i++)
+ printf("wm-native:\n");
+ for (i = 0; i < p->nr_insn; i++)
brw_disasm(stderr, &p->store[i], p->brw->intel.gen);
printf("\n");
}
}
+
diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
index d73c3915824..0bef874b887 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_fp.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -37,9 +37,9 @@
#include "brw_wm.h"
#include "brw_util.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
-#include "shader/prog_statevars.h"
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
+#include "program/prog_statevars.h"
/** An invalid texture target */
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index 57be08a8d1d..2dd346d6dd1 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -1,7 +1,7 @@
#include "main/macros.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
-#include "shader/prog_optimize.h"
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
+#include "program/prog_optimize.h"
#include "brw_context.h"
#include "brw_eu.h"
#include "brw_wm.h"
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
index 60bd92ed223..05de85a957e 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
@@ -32,7 +32,7 @@
#include "brw_context.h"
#include "brw_wm.h"
-#include "shader/prog_parameter.h"
+#include "program/prog_parameter.h"
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 1789b21451d..c1cf4db1cae 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -222,7 +222,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, thread2),
brw->wm.scratch_bo,
wm.thread2.per_thread_scratch_space,
- 0, 0);
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
}
/* Emit sampler state relocation */
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 77898dbbe72..17b016b569b 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -32,7 +32,7 @@
#include "main/mtypes.h"
#include "main/texstore.h"
-#include "shader/prog_parameter.h"
+#include "program/prog_parameter.h"
#include "intel_mipmap_tree.h"
#include "intel_batchbuffer.h"
diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c
index 51940efb443..6820ca3abf4 100644
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -69,7 +69,7 @@ upload_sf_state(struct brw_context *brw)
dw1 =
num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT |
(num_inputs + 1) / 2 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
- 3 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
+ 1 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
dw2 = GEN6_SF_VIEWPORT_TRANSFORM_ENABLE |
GEN6_SF_STATISTICS_ENABLE;
dw3 = 0;
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index 5916a139946..4080a9dedfd 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -29,8 +29,8 @@
#include "brw_state.h"
#include "brw_defines.h"
#include "brw_util.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_statevars.h"
+#include "program/prog_parameter.h"
+#include "program/prog_statevars.h"
#include "intel_batchbuffer.h"
static void
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index ed1a72f03ba..863c85449d9 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -29,8 +29,8 @@
#include "brw_state.h"
#include "brw_defines.h"
#include "brw_util.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_statevars.h"
+#include "program/prog_parameter.h"
+#include "program/prog_statevars.h"
#include "intel_batchbuffer.h"
static void
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index 698445c5268..ff741fc39ab 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -102,7 +102,7 @@ do_flush_locked(struct intel_batchbuffer *batch, GLuint used)
if (INTEL_DEBUG & DEBUG_BATCH) {
drm_intel_bo_map(batch->buf, GL_FALSE);
intel_decode(batch->buf->virtual, used / 4, batch->buf->offset,
- intel->intelScreen->deviceID);
+ intel->intelScreen->deviceID, GL_TRUE);
drm_intel_bo_unmap(batch->buf);
if (intel->vtbl.debug_batch != NULL)
diff --git a/src/mesa/drivers/dri/intel/intel_chipset.h b/src/mesa/drivers/dri/intel/intel_chipset.h
index cd614c59e55..72a74322ee5 100644
--- a/src/mesa/drivers/dri/intel/intel_chipset.h
+++ b/src/mesa/drivers/dri/intel/intel_chipset.h
@@ -115,6 +115,9 @@
devid == PCI_CHIP_I946_GZ || \
IS_G4X(devid))
+/* Compat macro for intel_decode.c */
+#define IS_IRONLAKE(devid) IS_GEN5(devid)
+
#define IS_GEN6(devid) (devid == PCI_CHIP_SANDYBRIDGE || \
devid == PCI_CHIP_SANDYBRIDGE_M)
diff --git a/src/mesa/drivers/dri/intel/intel_decode.c b/src/mesa/drivers/dri/intel/intel_decode.c
index 650010ac9ca..25b4131594f 100644
--- a/src/mesa/drivers/dri/intel/intel_decode.c
+++ b/src/mesa/drivers/dri/intel/intel_decode.c
@@ -1,48 +1,21 @@
-/* -*- c-basic-offset: 4 -*- */
-/*
- * Copyright © 2007 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- * Eric Anholt <[email protected]>
- *
- */
-
-/** @file intel_decode.c
- * This file contains code to print out batchbuffer contents in a
- * human-readable format.
- *
- * The current version only supports i915 packets, and only pretty-prints a
- * subset of them. The intention is for it to make just a best attempt to
- * decode, but never crash in the process.
- */
-
+#include <stdint.h>
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
-#include <inttypes.h>
#include "intel_decode.h"
#include "intel_chipset.h"
+static FILE *out;
+static uint32_t saved_s2 = 0, saved_s4 = 0;
+static char saved_s2_set = 0, saved_s4_set = 0;
+static uint32_t head_offset = 0xffffffff; /* undefined */
+static uint32_t tail_offset = 0xffffffff; /* undefined */
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(A) (sizeof(A)/sizeof(A[0]))
+#endif
+
#define BUFFER_FAIL(_count, _len, _name) do { \
fprintf(out, "Buffer size too small in %s (%d < %d)\n", \
(_name), (_count), (_len)); \
@@ -50,9 +23,6 @@
return count; \
} while (0)
-static FILE *out;
-static uint32_t saved_s2 = 0, saved_s4 = 0;
-static char saved_s2_set = 0, saved_s4_set = 0;
static float
int_as_float(uint32_t intval)
@@ -71,15 +41,24 @@ instr_out(uint32_t *data, uint32_t hw_offset, unsigned int index,
char *fmt, ...)
{
va_list va;
-
- fprintf(out, "0x%08x: 0x%08x:%s ", hw_offset + index * 4, data[index],
- index == 0 ? "" : " ");
+ char *parseinfo;
+ uint32_t offset = hw_offset + index * 4;
+
+ if (offset == head_offset)
+ parseinfo = "HEAD";
+ else if (offset == tail_offset)
+ parseinfo = "TAIL";
+ else
+ parseinfo = " ";
+
+ fprintf(out, "0x%08x: %s 0x%08x: %s", offset, parseinfo,
+ data[index],
+ index == 0 ? "" : " ");
va_start(va, fmt);
vfprintf(out, fmt, va);
va_end(va);
}
-
static int
decode_mi(uint32_t *data, int count, uint32_t hw_offset, int *failures)
{
@@ -94,10 +73,11 @@ decode_mi(uint32_t *data, int count, uint32_t hw_offset, int *failures)
} opcodes_mi[] = {
{ 0x08, 0, 1, 1, "MI_ARB_ON_OFF" },
{ 0x0a, 0, 1, 1, "MI_BATCH_BUFFER_END" },
+ { 0x30, 0x3f, 3, 3, "MI_BATCH_BUFFER" },
{ 0x31, 0x3f, 2, 2, "MI_BATCH_BUFFER_START" },
{ 0x14, 0x3f, 3, 3, "MI_DISPLAY_BUFFER_INFO" },
{ 0x04, 0, 1, 1, "MI_FLUSH" },
- { 0x22, 0, 3, 3, "MI_LOAD_REGISTER_IMM" },
+ { 0x22, 0x1f, 3, 3, "MI_LOAD_REGISTER_IMM" },
{ 0x13, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_EXCL" },
{ 0x12, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_INCL" },
{ 0x00, 0, 1, 1, "MI_NOOP" },
@@ -111,6 +91,11 @@ decode_mi(uint32_t *data, int count, uint32_t hw_offset, int *failures)
{ 0x03, 0, 1, 1, "MI_WAIT_FOR_EVENT" },
};
+ switch ((data[0] & 0x1f800000) >> 23) {
+ case 0x0a:
+ instr_out(data, hw_offset, 0, "MI_BATCH_BUFFER_END\n");
+ return -1;
+ }
for (opcode = 0; opcode < sizeof(opcodes_mi) / sizeof(opcodes_mi[0]);
opcode++) {
@@ -305,9 +290,13 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures)
static int
decode_3d_1c(uint32_t *data, int count, uint32_t hw_offset, int *failures)
{
- switch ((data[0] & 0x00f80000) >> 19) {
+ uint32_t opcode;
+
+ opcode = (data[0] & 0x00f80000) >> 19;
+
+ switch (opcode) {
case 0x11:
- instr_out(data, hw_offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISALBE\n");
+ instr_out(data, hw_offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE\n");
return 1;
case 0x10:
instr_out(data, hw_offset, 0, "3DSTATE_SCISSOR_ENABLE\n");
@@ -323,7 +312,8 @@ decode_3d_1c(uint32_t *data, int count, uint32_t hw_offset, int *failures)
return 1;
}
- instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_1c opcode = 0x%x\n",
+ opcode);
(*failures)++;
return 1;
}
@@ -381,7 +371,7 @@ i915_get_instruction_dst(uint32_t *data, int i, char *dstname, int do_mask)
sprintf(dstname, "oD%s%s", dstmask, sat);
break;
case 6:
- if (dst_nr > 2)
+ if (dst_nr > 3)
fprintf(out, "bad destination reg U%d\n", dst_nr);
sprintf(dstname, "U%d%s%s", dst_nr, dstmask, sat);
break;
@@ -452,7 +442,7 @@ i915_get_instruction_src_name(uint32_t src_type, uint32_t src_nr, char *name)
break;
case 6:
sprintf(name, "U%d", src_nr);
- if (src_nr > 2)
+ if (src_nr > 3)
fprintf(out, "bad src reg %s\n", name);
break;
default:
@@ -797,10 +787,14 @@ i915_decode_instruction(uint32_t *data, uint32_t hw_offset,
}
static int
-decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures, int i830)
+decode_3d_1d(uint32_t *data, int count,
+ uint32_t hw_offset,
+ uint32_t devid,
+ int *failures)
{
- unsigned int len, i, c, opcode, word, map, sampler, instr;
+ unsigned int len, i, c, idx, word, map, sampler, instr;
char *format;
+ uint32_t opcode;
struct {
uint32_t opcode;
@@ -811,7 +805,7 @@ decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures, int i
} opcodes_3d_1d[] = {
{ 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" },
{ 0x86, 0, 4, 4, "3DSTATE_CHROMA_KEY" },
- { 0x9c, 0, 1, 1, "3DSTATE_CLEAR_PARAMETERS" },
+ { 0x9c, 0, 7, 7, "3DSTATE_CLEAR_PARAMETERS" },
{ 0x88, 0, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" },
{ 0x99, 0, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" },
{ 0x9a, 0, 2, 2, "3DSTATE_DEFAULT_SPECULAR" },
@@ -819,7 +813,6 @@ decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures, int i
{ 0x97, 0, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" },
{ 0x85, 0, 2, 2, "3DSTATE_DEST_BUFFER_VARIABLES" },
{ 0x80, 0, 5, 5, "3DSTATE_DRAWING_RECTANGLE" },
- { 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" },
{ 0x9d, 0, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" },
{ 0x9e, 0, 4, 4, "3DSTATE_MONO_FILTER" },
{ 0x89, 0, 4, 4, "3DSTATE_FOG_MODE" },
@@ -831,9 +824,11 @@ decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures, int i
{ 0x8d, 1, 3, 3, "3DSTATE_W_STATE_I830" },
{ 0x01, 1, 2, 2, "3DSTATE_COLOR_FACTOR_I830" },
{ 0x02, 1, 2, 2, "3DSTATE_MAP_COORD_SETBIND_I830" },
- };
+ }, *opcode_3d_1d;
+
+ opcode = (data[0] & 0x00ff0000) >> 16;
- switch ((data[0] & 0x00ff0000) >> 16) {
+ switch (opcode) {
case 0x07:
/* This instruction is unusual. A 0 length means just 1 DWORD instead of
* 2. The 0 length is specified in one place to be unsupported, but
@@ -888,26 +883,56 @@ decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures, int i
instr_out(data, hw_offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_1\n");
len = (data[0] & 0x0000000f) + 2;
i = 1;
- for (word = 0; word <= 7; word++) {
+ for (word = 0; word <= 8; word++) {
if (data[0] & (1 << (4 + word))) {
if (i >= count)
BUFFER_FAIL(count, len, "3DSTATE_LOAD_STATE_IMMEDIATE_1");
/* save vertex state for decode */
- if (word == 2) {
- saved_s2_set = 1;
- saved_s2 = data[i];
- }
- if (word == 4) {
- saved_s4_set = 1;
- saved_s4 = data[i];
+ if (IS_9XX(devid)) {
+ if (word == 2) {
+ saved_s2_set = 1;
+ saved_s2 = data[i];
+ }
+ if (word == 4) {
+ saved_s4_set = 1;
+ saved_s4 = data[i];
+ }
}
instr_out(data, hw_offset, i++, "S%d\n", word);
}
}
if (len != i) {
- fprintf(out, "Bad count in 3DSTATE_LOAD_INDIRECT\n");
+ fprintf(out, "Bad count in 3DSTATE_LOAD_STATE_IMMEDIATE_1\n");
+ (*failures)++;
+ }
+ return len;
+ case 0x03:
+ instr_out(data, hw_offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_2\n");
+ len = (data[0] & 0x0000000f) + 2;
+ i = 1;
+ for (word = 6; word <= 14; word++) {
+ if (data[0] & (1 << word)) {
+ if (i >= count)
+ BUFFER_FAIL(count, len, "3DSTATE_LOAD_STATE_IMMEDIATE_2");
+
+ if (word == 6)
+ instr_out(data, hw_offset, i++, "TBCF\n");
+ else if (word >= 7 && word <= 10) {
+ instr_out(data, hw_offset, i++, "TB%dC\n", word - 7);
+ instr_out(data, hw_offset, i++, "TB%dA\n", word - 7);
+ } else if (word >= 11 && word <= 14) {
+ instr_out(data, hw_offset, i++, "TM%dS0\n", word - 11);
+ instr_out(data, hw_offset, i++, "TM%dS1\n", word - 11);
+ instr_out(data, hw_offset, i++, "TM%dS2\n", word - 11);
+ instr_out(data, hw_offset, i++, "TM%dS3\n", word - 11);
+ instr_out(data, hw_offset, i++, "TM%dS4\n", word - 11);
+ }
+ }
+ }
+ if (len != i) {
+ fprintf(out, "Bad count in 3DSTATE_LOAD_STATE_IMMEDIATE_2\n");
(*failures)++;
}
return len;
@@ -919,11 +944,28 @@ decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures, int i
i = 2;
for (map = 0; map <= 15; map++) {
if (data[1] & (1 << map)) {
+ int width, height, pitch, dword;
+ const char *tiling;
+
if (i + 3 >= count)
BUFFER_FAIL(count, len, "3DSTATE_MAP_STATE");
+
instr_out(data, hw_offset, i++, "map %d MS2\n", map);
- instr_out(data, hw_offset, i++, "map %d MS3\n", map);
- instr_out(data, hw_offset, i++, "map %d MS4\n", map);
+
+ dword = data[i];
+ width = ((dword >> 10) & ((1 << 11) - 1))+1;
+ height = ((dword >> 21) & ((1 << 11) - 1))+1;
+
+ tiling = "none";
+ if (dword & (1 << 2))
+ tiling = "fenced";
+ else if (dword & (1 << 1))
+ tiling = dword & (1 << 0) ? "Y" : "X";
+ instr_out(data, hw_offset, i++, "map %d MS3 [width=%d, height=%d, tiling=%s]\n", map, width, height, tiling);
+
+ dword = data[i];
+ pitch = 4*(((dword >> 21) & ((1 << 11) - 1))+1);
+ instr_out(data, hw_offset, i++, "map %d MS4 [pitch=%d]\n", map, pitch);
}
}
if (len != i) {
@@ -979,8 +1021,8 @@ decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures, int i
}
return len;
case 0x01:
- if (i830)
- break;
+ if (!IS_9XX(devid))
+ break;
instr_out(data, hw_offset, 0, "3DSTATE_SAMPLER_STATE\n");
instr_out(data, hw_offset, 1, "mask\n");
len = (data[0] & 0x0000003f) + 2;
@@ -1031,32 +1073,61 @@ decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures, int i
format,
(data[1] & (1 << 31)) ? "en" : "dis");
return len;
+
+ case 0x8e:
+ {
+ const char *name, *tiling;
+
+ len = (data[0] & 0x0000000f) + 2;
+ if (len != 3)
+ fprintf(out, "Bad count in 3DSTATE_BUFFER_INFO\n");
+ if (count < 3)
+ BUFFER_FAIL(count, len, "3DSTATE_BUFFER_INFO");
+
+ switch((data[1] >> 24) & 0x7) {
+ case 0x3: name = "color"; break;
+ case 0x7: name = "depth"; break;
+ default: name = "unknown"; break;
+ }
+
+ tiling = "none";
+ if (data[1] & (1 << 23))
+ tiling = "fenced";
+ else if (data[1] & (1 << 22))
+ tiling = data[1] & (1 << 21) ? "Y" : "X";
+
+ instr_out(data, hw_offset, 0, "3DSTATE_BUFFER_INFO\n");
+ instr_out(data, hw_offset, 1, "%s, tiling = %s, pitch=%d\n", name, tiling, data[1]&0xffff);
+
+ instr_out(data, hw_offset, 2, "address\n");
+ return len;
+ }
}
- for (opcode = 0; opcode < sizeof(opcodes_3d_1d) / sizeof(opcodes_3d_1d[0]);
- opcode++)
+ for (idx = 0; idx < ARRAY_SIZE(opcodes_3d_1d); idx++)
{
- if (opcodes_3d_1d[opcode].i830_only && !i830)
+ opcode_3d_1d = &opcodes_3d_1d[idx];
+ if (opcode_3d_1d->i830_only && IS_9XX(devid))
continue;
- if (((data[0] & 0x00ff0000) >> 16) == opcodes_3d_1d[opcode].opcode) {
+ if (((data[0] & 0x00ff0000) >> 16) == opcode_3d_1d->opcode) {
len = 1;
- instr_out(data, hw_offset, 0, "%s\n", opcodes_3d_1d[opcode].name);
- if (opcodes_3d_1d[opcode].max_len > 1) {
+ instr_out(data, hw_offset, 0, "%s\n", opcode_3d_1d->name);
+ if (opcode_3d_1d->max_len > 1) {
len = (data[0] & 0x0000ffff) + 2;
- if (len < opcodes_3d_1d[opcode].min_len ||
- len > opcodes_3d_1d[opcode].max_len)
+ if (len < opcode_3d_1d->min_len ||
+ len > opcode_3d_1d->max_len)
{
fprintf(out, "Bad count in %s\n",
- opcodes_3d_1d[opcode].name);
+ opcode_3d_1d->name);
(*failures)++;
}
}
for (i = 1; i < len; i++) {
if (i >= count)
- BUFFER_FAIL(count, len, opcodes_3d_1d[opcode].name);
+ BUFFER_FAIL(count, len, opcode_3d_1d->name);
instr_out(data, hw_offset, i, "dword %d\n", i);
}
@@ -1064,7 +1135,7 @@ decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures, int i
}
}
- instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_1d opcode = 0x%x\n", opcode);
(*failures)++;
return 1;
}
@@ -1074,8 +1145,10 @@ decode_3d_primitive(uint32_t *data, int count, uint32_t hw_offset,
int *failures)
{
char immediate = (data[0] & (1 << 23)) == 0;
- unsigned int len, i;
+ unsigned int len, i, ret;
char *primtype;
+ int original_s2 = saved_s2;
+ int original_s4 = saved_s4;
switch ((data[0] >> 18) & 0xf) {
case 0x0: primtype = "TRILIST"; break;
@@ -1088,7 +1161,7 @@ decode_3d_primitive(uint32_t *data, int count, uint32_t hw_offset,
case 0x7: primtype = "RECTLIST"; break;
case 0x8: primtype = "POINTLIST"; break;
case 0x9: primtype = "DIB"; break;
- case 0xa: primtype = "CLEAR_RECT"; break;
+ case 0xa: primtype = "CLEAR_RECT"; saved_s4 = 3 << 6; saved_s2 = ~0; break;
default: primtype = "unknown"; break;
}
@@ -1192,6 +1265,8 @@ decode_3d_primitive(uint32_t *data, int count, uint32_t hw_offset,
vertex++;
}
}
+
+ ret = len;
} else {
/* indirect vertices */
len = data[0] & 0x0000ffff; /* index count */
@@ -1209,13 +1284,15 @@ decode_3d_primitive(uint32_t *data, int count, uint32_t hw_offset,
if ((data[i] & 0xffff) == 0xffff) {
instr_out(data, hw_offset, i,
" indices: (terminator)\n");
- return i;
+ ret = i;
+ goto out;
} else if ((data[i] >> 16) == 0xffff) {
instr_out(data, hw_offset, i,
" indices: 0x%04x, "
"(terminator)\n",
data[i] & 0xffff);
- return i;
+ ret = i;
+ goto out;
} else {
instr_out(data, hw_offset, i,
" indices: 0x%04x, 0x%04x\n",
@@ -1225,7 +1302,8 @@ decode_3d_primitive(uint32_t *data, int count, uint32_t hw_offset,
fprintf(out,
"3DPRIMITIVE: no terminator found in index buffer\n");
(*failures)++;
- return count;
+ ret = count;
+ goto out;
} else {
/* fixed size vertex index buffer */
for (i = 0; i < len; i += 2) {
@@ -1240,7 +1318,8 @@ decode_3d_primitive(uint32_t *data, int count, uint32_t hw_offset,
}
}
}
- return (len + 1) / 2 + 1;
+ ret = (len + 1) / 2 + 1;
+ goto out;
} else {
/* sequential vertex access */
if (count < 2)
@@ -1249,17 +1328,22 @@ decode_3d_primitive(uint32_t *data, int count, uint32_t hw_offset,
"3DPRIMITIVE sequential indirect %s, %d starting from "
"%d\n", primtype, len, data[1] & 0xffff);
instr_out(data, hw_offset, 1, " start\n");
- return 2;
+ ret = 2;
+ goto out;
}
}
- return len;
+out:
+ saved_s2 = original_s2;
+ saved_s4 = original_s4;
+ return ret;
}
static int
-decode_3d(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+decode_3d(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int *failures)
{
- unsigned int opcode;
+ uint32_t opcode;
+ unsigned int idx;
struct {
uint32_t opcode;
@@ -1276,42 +1360,44 @@ decode_3d(uint32_t *data, int count, uint32_t hw_offset, int *failures)
{ 0x0d, 1, 1, "3DSTATE_MODES_4" },
{ 0x0c, 1, 1, "3DSTATE_MODES_5" },
{ 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" },
- };
+ }, *opcode_3d;
+
+ opcode = (data[0] & 0x1f000000) >> 24;
- switch ((data[0] & 0x1f000000) >> 24) {
+ switch (opcode) {
case 0x1f:
return decode_3d_primitive(data, count, hw_offset, failures);
case 0x1d:
- return decode_3d_1d(data, count, hw_offset, failures, 0);
+ return decode_3d_1d(data, count, hw_offset, devid, failures);
case 0x1c:
return decode_3d_1c(data, count, hw_offset, failures);
}
- for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]);
- opcode++) {
- if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) {
+ for (idx = 0; idx < ARRAY_SIZE(opcodes_3d); idx++) {
+ opcode_3d = &opcodes_3d[idx];
+ if (opcode == opcode_3d->opcode) {
unsigned int len = 1, i;
- instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name);
- if (opcodes_3d[opcode].max_len > 1) {
+ instr_out(data, hw_offset, 0, "%s\n", opcode_3d->name);
+ if (opcode_3d->max_len > 1) {
len = (data[0] & 0xff) + 2;
- if (len < opcodes_3d[opcode].min_len ||
- len > opcodes_3d[opcode].max_len)
+ if (len < opcode_3d->min_len ||
+ len > opcode_3d->max_len)
{
- fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name);
+ fprintf(out, "Bad count in %s\n", opcode_3d->name);
}
}
for (i = 1; i < len; i++) {
if (i >= count)
- BUFFER_FAIL(count, len, opcodes_3d[opcode].name);
+ BUFFER_FAIL(count, len, opcode_3d->name);
instr_out(data, hw_offset, i, "dword %d\n", i);
}
return len;
}
}
- instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d opcode = 0x%x\n", opcode);
(*failures)++;
return 1;
}
@@ -1403,11 +1489,86 @@ get_965_prim_type(uint32_t data)
}
static int
-decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+i965_decode_urb_fence(uint32_t *data, uint32_t hw_offset, int len, int count,
+ int *failures)
{
- unsigned int opcode, len;
- int i;
- char *desc1;
+ uint32_t vs_fence, clip_fence, gs_fence, sf_fence, vfe_fence, cs_fence;
+
+ if (len != 3)
+ fprintf(out, "Bad count in URB_FENCE\n");
+ if (count < 3)
+ BUFFER_FAIL(count, len, "URB_FENCE");
+
+ vs_fence = data[1] & 0x3ff;
+ gs_fence = (data[1] >> 10) & 0x3ff;
+ clip_fence = (data[1] >> 20) & 0x3ff;
+ sf_fence = data[2] & 0x3ff;
+ vfe_fence = (data[2] >> 10) & 0x3ff;
+ cs_fence = (data[2] >> 20) & 0x7ff;
+
+ instr_out(data, hw_offset, 0, "URB_FENCE: %s%s%s%s%s%s\n",
+ (data[0] >> 13) & 1 ? "cs " : "",
+ (data[0] >> 12) & 1 ? "vfe " : "",
+ (data[0] >> 11) & 1 ? "sf " : "",
+ (data[0] >> 10) & 1 ? "clip " : "",
+ (data[0] >> 9) & 1 ? "gs " : "",
+ (data[0] >> 8) & 1 ? "vs " : "");
+ instr_out(data, hw_offset, 1,
+ "vs fence: %d, clip_fence: %d, gs_fence: %d\n",
+ vs_fence, clip_fence, gs_fence);
+ instr_out(data, hw_offset, 2,
+ "sf fence: %d, vfe_fence: %d, cs_fence: %d\n",
+ sf_fence, vfe_fence, cs_fence);
+ if (gs_fence < vs_fence)
+ fprintf(out, "gs fence < vs fence!\n");
+ if (clip_fence < gs_fence)
+ fprintf(out, "clip fence < gs fence!\n");
+ if (sf_fence < clip_fence)
+ fprintf(out, "sf fence < clip fence!\n");
+ if (cs_fence < sf_fence)
+ fprintf(out, "cs fence < sf fence!\n");
+
+ return len;
+}
+
+static void
+state_base_out(uint32_t *data, uint32_t hw_offset, unsigned int index,
+ char *name)
+{
+ if (data[index] & 1) {
+ instr_out(data, hw_offset, index, "%s state base address 0x%08x\n",
+ name, data[index] & ~1);
+ } else {
+ instr_out(data, hw_offset, index, "%s state base not updated\n",
+ name);
+ }
+}
+
+static void
+state_max_out(uint32_t *data, uint32_t hw_offset, unsigned int index,
+ char *name)
+{
+ if (data[index] & 1) {
+ if (data[index] == 1) {
+ instr_out(data, hw_offset, index,
+ "%s state upper bound disabled\n", name);
+ } else {
+ instr_out(data, hw_offset, index, "%s state upper bound 0x%08x\n",
+ name, data[index] & ~1);
+ }
+ } else {
+ instr_out(data, hw_offset, index, "%s state upper bound not updated\n",
+ name);
+ }
+}
+
+static int
+decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int *failures)
+{
+ uint32_t opcode;
+ unsigned int idx, len;
+ int i, sba_len;
+ char *desc1 = NULL;
struct {
uint32_t opcode;
@@ -1436,57 +1597,78 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures)
{ 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" },
{ 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" },
{ 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" },
+ { 0x7909, 2, 2, "3DSTATE_CLEAR_PARAMS" },
{ 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" },
+ { 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" },
+ { 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" },
{ 0x7b00, 6, 6, "3DPRIMITIVE" },
+ { 0x7802, 4, 4, "3DSTATE_SAMPLER_STATE_POINTERS" },
+ { 0x7805, 3, 3, "3DSTATE_URB" },
{ 0x780e, 4, 4, "3DSTATE_CC_STATE_POINTERS" },
{ 0x7810, 6, 6, "3DSTATE_VS_STATE" },
- { 0x7811, 6, 6, "3DSTATE_GS_STATE" },
+ { 0x7811, 7, 7, "3DSTATE_GS_STATE" },
+ { 0x7812, 4, 4, "3DSTATE_CLIP_STATE" },
+ { 0x7813, 20, 20, "3DSTATE_SF_STATE" },
+ { 0x7814, 9, 9, "3DSTATE_WM_STATE" },
{ 0x7812, 4, 4, "3DSTATE_CLIP_STATE" },
{ 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" },
{ 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" },
- };
+ { 0x7817, 5, 5, "3DSTATE_CONSTANT_PS_STATE" },
+ { 0x7818, 2, 2, "3DSTATE_SAMPLE_MASK" },
+ }, *opcode_3d;
len = (data[0] & 0x0000ffff) + 2;
- switch ((data[0] & 0xffff0000) >> 16) {
+ opcode = (data[0] & 0xffff0000) >> 16;
+ switch (opcode) {
+ case 0x6000:
+ len = (data[0] & 0x000000ff) + 2;
+ return i965_decode_urb_fence(data, hw_offset, len, count, failures);
+ case 0x6001:
+ instr_out(data, hw_offset, 0, "CS_URB_STATE\n");
+ instr_out(data, hw_offset, 1, "entry_size: %d [%d bytes], n_entries: %d\n",
+ (data[1] >> 4) & 0x1f,
+ (((data[1] >> 4) & 0x1f) + 1) * 64,
+ data[1] & 0x7);
+ return len;
+ case 0x6002:
+ len = (data[0] & 0x000000ff) + 2;
+ instr_out(data, hw_offset, 0, "CONSTANT_BUFFER: %s\n",
+ (data[0] >> 8) & 1 ? "valid" : "invalid");
+ instr_out(data, hw_offset, 1, "offset: 0x%08x, length: %d bytes\n",
+ data[1] & ~0x3f, ((data[1] & 0x3f) + 1) * 64);
+ return len;
case 0x6101:
- if (len != 6)
+ if (IS_GEN6(devid))
+ sba_len = 10;
+ else if (IS_IRONLAKE(devid))
+ sba_len = 8;
+ else
+ sba_len = 6;
+ if (len != sba_len)
fprintf(out, "Bad count in STATE_BASE_ADDRESS\n");
- if (count < 6)
+ if (len != sba_len)
BUFFER_FAIL(count, len, "STATE_BASE_ADDRESS");
+ i = 0;
instr_out(data, hw_offset, 0,
"STATE_BASE_ADDRESS\n");
-
- if (data[1] & 1) {
- instr_out(data, hw_offset, 1, "General state at 0x%08x\n",
- data[1] & ~1);
- } else
- instr_out(data, hw_offset, 1, "General state not updated\n");
-
- if (data[2] & 1) {
- instr_out(data, hw_offset, 2, "Surface state at 0x%08x\n",
- data[2] & ~1);
- } else
- instr_out(data, hw_offset, 2, "Surface state not updated\n");
-
- if (data[3] & 1) {
- instr_out(data, hw_offset, 3, "Indirect state at 0x%08x\n",
- data[3] & ~1);
- } else
- instr_out(data, hw_offset, 3, "Indirect state not updated\n");
-
- if (data[4] & 1) {
- instr_out(data, hw_offset, 4, "General state upper bound 0x%08x\n",
- data[4] & ~1);
- } else
- instr_out(data, hw_offset, 4, "General state not updated\n");
-
- if (data[5] & 1) {
- instr_out(data, hw_offset, 5, "Indirect state upper bound 0x%08x\n",
- data[5] & ~1);
- } else
- instr_out(data, hw_offset, 5, "Indirect state not updated\n");
+ i++;
+
+ state_base_out(data, hw_offset, i++, "general");
+ state_base_out(data, hw_offset, i++, "surface");
+ if (IS_GEN6(devid))
+ state_base_out(data, hw_offset, i++, "dynamic");
+ state_base_out(data, hw_offset, i++, "indirect");
+ if (IS_IRONLAKE(devid) || IS_GEN6(devid))
+ state_base_out(data, hw_offset, i++, "instruction");
+
+ state_max_out(data, hw_offset, i++, "general");
+ if (IS_GEN6(devid))
+ state_max_out(data, hw_offset, i++, "dynamic");
+ state_max_out(data, hw_offset, i++, "indirect");
+ if (IS_IRONLAKE(devid) || IS_GEN6(devid))
+ state_max_out(data, hw_offset, i++, "instruction");
return len;
case 0x7800:
@@ -1505,18 +1687,33 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures)
instr_out(data, hw_offset, 6, "CC state\n");
return len;
case 0x7801:
- if (len != 6)
+ len = (data[0] & 0x000000ff) + 2;
+ if (len != 6 && len != 4)
fprintf(out, "Bad count in 3DSTATE_BINDING_TABLE_POINTERS\n");
- if (count < 6)
- BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS");
+ if (len == 6) {
+ if (count < 6)
+ BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS");
+ instr_out(data, hw_offset, 0,
+ "3DSTATE_BINDING_TABLE_POINTERS\n");
+ instr_out(data, hw_offset, 1, "VS binding table\n");
+ instr_out(data, hw_offset, 2, "GS binding table\n");
+ instr_out(data, hw_offset, 3, "Clip binding table\n");
+ instr_out(data, hw_offset, 4, "SF binding table\n");
+ instr_out(data, hw_offset, 5, "WM binding table\n");
+ } else {
+ if (count < 4)
+ BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS");
- instr_out(data, hw_offset, 0,
- "3DSTATE_BINDING_TABLE_POINTERS\n");
- instr_out(data, hw_offset, 1, "VS binding table\n");
- instr_out(data, hw_offset, 2, "GS binding table\n");
- instr_out(data, hw_offset, 3, "Clip binding table\n");
- instr_out(data, hw_offset, 4, "SF binding table\n");
- instr_out(data, hw_offset, 5, "WM binding table\n");
+ instr_out(data, hw_offset, 0,
+ "3DSTATE_BINDING_TABLE_POINTERS: VS mod %d, "
+ "GS mod %d, PS mod %d\n",
+ (data[0] & (1 << 8)) != 0,
+ (data[0] & (1 << 9)) != 0,
+ (data[0] & (1 << 10)) != 0);
+ instr_out(data, hw_offset, 1, "VS binding table\n");
+ instr_out(data, hw_offset, 2, "GS binding table\n");
+ instr_out(data, hw_offset, 3, "WM binding table\n");
+ }
return len;
@@ -1567,6 +1764,18 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures)
}
return len;
+ case 0x780d:
+ len = (data[0] & 0xff) + 2;
+ if (len != 4)
+ fprintf(out, "Bad count in 3DSTATE_VIEWPORT_STATE_POINTERS\n");
+ if (count < len)
+ BUFFER_FAIL(count, len, "3DSTATE_VIEWPORT_STATE_POINTERS");
+ instr_out(data, hw_offset, 0, "3DSTATE_VIEWPORT_STATE_POINTERS\n");
+ instr_out(data, hw_offset, 1, "clip\n");
+ instr_out(data, hw_offset, 2, "sf\n");
+ instr_out(data, hw_offset, 3, "cc\n");
+ return len;
+
case 0x780a:
len = (data[0] & 0xff) + 2;
if (len != 3)
@@ -1616,10 +1825,10 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures)
((data[3] & 0x0007ffc0) >> 6) + 1,
((data[3] & 0xfff80000) >> 19) + 1);
instr_out(data, hw_offset, 4, "volume depth\n");
- if (len == 6)
+ if (len >= 6)
instr_out(data, hw_offset, 5, "\n");
- if (len == 7)
- instr_out(data, hw_offset, 6, "render target view extent\n");
+ if (len >= 7)
+ instr_out(data, hw_offset, 6, "render target view extent\n");
return len;
@@ -1638,12 +1847,11 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures)
}
instr_out(data, hw_offset, 0,
"PIPE_CONTROL: %s, %sdepth stall, %sRC write flush, "
- "%sinst flush, %stexture flush\n",
+ "%sinst flush\n",
desc1,
data[0] & (1 << 13) ? "" : "no ",
data[0] & (1 << 12) ? "" : "no ",
- data[0] & (1 << 11) ? "" : "no ",
- data[0] & (1 << 9) ? "" : "no ");
+ data[0] & (1 << 11) ? "" : "no ");
instr_out(data, hw_offset, 1, "destination address\n");
instr_out(data, hw_offset, 2, "immediate dword low\n");
instr_out(data, hw_offset, 3, "immediate dword high\n");
@@ -1668,40 +1876,41 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures)
return len;
}
- for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]);
- opcode++) {
- if ((data[0] & 0xffff0000) >> 16 == opcodes_3d[opcode].opcode) {
+ for (idx = 0; idx < ARRAY_SIZE(opcodes_3d); idx++) {
+ opcode_3d = &opcodes_3d[idx];
+ if ((data[0] & 0xffff0000) >> 16 == opcode_3d->opcode) {
unsigned int i;
len = 1;
- instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name);
- if (opcodes_3d[opcode].max_len > 1) {
+ instr_out(data, hw_offset, 0, "%s\n", opcode_3d->name);
+ if (opcode_3d->max_len > 1) {
len = (data[0] & 0xff) + 2;
- if (len < opcodes_3d[opcode].min_len ||
- len > opcodes_3d[opcode].max_len)
+ if (len < opcode_3d->min_len ||
+ len > opcode_3d->max_len)
{
- fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name);
+ fprintf(out, "Bad count in %s\n", opcode_3d->name);
}
}
for (i = 1; i < len; i++) {
if (i >= count)
- BUFFER_FAIL(count, len, opcodes_3d[opcode].name);
+ BUFFER_FAIL(count, len, opcode_3d->name);
instr_out(data, hw_offset, i, "dword %d\n", i);
}
return len;
}
}
- instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_965 opcode = 0x%x\n", opcode);
(*failures)++;
return 1;
}
static int
-decode_3d_i830(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+decode_3d_i830(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int *failures)
{
- unsigned int opcode;
+ unsigned int idx;
+ uint32_t opcode;
struct {
uint32_t opcode;
@@ -1725,42 +1934,44 @@ decode_3d_i830(uint32_t *data, int count, uint32_t hw_offset, int *failures)
{ 0x0f, 1, 1, "3DSTATE_MODES_2" },
{ 0x15, 1, 1, "3DSTATE_FOG_COLOR" },
{ 0x16, 1, 1, "3DSTATE_MODES_4" },
- };
+ }, *opcode_3d;
- switch ((data[0] & 0x1f000000) >> 24) {
+ opcode = (data[0] & 0x1f000000) >> 24;
+
+ switch (opcode) {
case 0x1f:
return decode_3d_primitive(data, count, hw_offset, failures);
case 0x1d:
- return decode_3d_1d(data, count, hw_offset, failures, 1);
+ return decode_3d_1d(data, count, hw_offset, devid, failures);
case 0x1c:
return decode_3d_1c(data, count, hw_offset, failures);
}
- for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]);
- opcode++) {
- if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) {
+ for (idx = 0; idx < ARRAY_SIZE(opcodes_3d); idx++) {
+ opcode_3d = &opcodes_3d[idx];
+ if ((data[0] & 0x1f000000) >> 24 == opcode_3d->opcode) {
unsigned int len = 1, i;
- instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name);
- if (opcodes_3d[opcode].max_len > 1) {
+ instr_out(data, hw_offset, 0, "%s\n", opcode_3d->name);
+ if (opcode_3d->max_len > 1) {
len = (data[0] & 0xff) + 2;
- if (len < opcodes_3d[opcode].min_len ||
- len > opcodes_3d[opcode].max_len)
+ if (len < opcode_3d->min_len ||
+ len > opcode_3d->max_len)
{
- fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name);
+ fprintf(out, "Bad count in %s\n", opcode_3d->name);
}
}
for (i = 1; i < len; i++) {
if (i >= count)
- BUFFER_FAIL(count, len, opcodes_3d[opcode].name);
+ BUFFER_FAIL(count, len, opcode_3d->name);
instr_out(data, hw_offset, i, "dword %d\n", i);
}
return len;
}
}
- instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+ instr_out(data, hw_offset, 0, "3D UNKNOWN: 3d_i830 opcode = 0x%x\n", opcode);
(*failures)++;
return 1;
}
@@ -1773,18 +1984,37 @@ decode_3d_i830(uint32_t *data, int count, uint32_t hw_offset, int *failures)
* \param hw_offset hardware address for the buffer
*/
int
-intel_decode(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid)
+intel_decode(uint32_t *data, int count,
+ uint32_t hw_offset,
+ uint32_t devid,
+ uint32_t ignore_end_of_batchbuffer)
{
+ int ret;
int index = 0;
int failures = 0;
- out = stderr;
+ out = stdout;
while (index < count) {
switch ((data[index] & 0xe0000000) >> 29) {
case 0x0:
- index += decode_mi(data + index, count - index,
+ ret = decode_mi(data + index, count - index,
hw_offset + index * 4, &failures);
+
+ /* If MI_BATCHBUFFER_END happened, then dump the rest of the
+ * output in case we some day want it in debugging, but don't
+ * decode it since it'll just confuse in the common case.
+ */
+ if (ret == -1) {
+ if (ignore_end_of_batchbuffer) {
+ index++;
+ } else {
+ for (index = index + 1; index < count; index++) {
+ instr_out(data, hw_offset, index, "\n");
+ }
+ }
+ } else
+ index += ret;
break;
case 0x2:
index += decode_2d(data + index, count - index,
@@ -1793,13 +2023,16 @@ intel_decode(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid)
case 0x3:
if (IS_965(devid)) {
index += decode_3d_965(data + index, count - index,
- hw_offset + index * 4, &failures);
+ hw_offset + index * 4,
+ devid, &failures);
} else if (IS_9XX(devid)) {
index += decode_3d(data + index, count - index,
- hw_offset + index * 4, &failures);
+ hw_offset + index * 4,
+ devid, &failures);
} else {
index += decode_3d_i830(data + index, count - index,
- hw_offset + index * 4, &failures);
+ hw_offset + index * 4,
+ devid, &failures);
}
break;
default:
@@ -1820,3 +2053,8 @@ void intel_decode_context_reset(void)
saved_s4_set = 1;
}
+void intel_decode_context_set_head_tail(uint32_t head, uint32_t tail)
+{
+ head_offset = head;
+ tail_offset = tail;
+}
diff --git a/src/mesa/drivers/dri/intel/intel_decode.h b/src/mesa/drivers/dri/intel/intel_decode.h
index c50644a46b5..a13b075cef8 100644
--- a/src/mesa/drivers/dri/intel/intel_decode.h
+++ b/src/mesa/drivers/dri/intel/intel_decode.h
@@ -25,5 +25,7 @@
*
*/
-int intel_decode(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid);
+int intel_decode(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid,
+ uint32_t ignore_end_of_batchbuffer);
+void intel_decode_context_set_head_tail(uint32_t head, uint32_t tail);
void intel_decode_context_reset(void);
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
index 076fee89bdd..0e2fe893fed 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -26,6 +26,7 @@
**************************************************************************/
#include "main/glheader.h"
+#include "main/arbprogram.h"
#include "main/enums.h"
#include "main/image.h"
#include "main/colormac.h"
@@ -44,7 +45,6 @@
#include "main/attrib.h"
#include "main/enable.h"
#include "main/viewport.h"
-#include "shader/arbprogram.h"
#include "swrast/swrast.h"
#include "intel_screen.h"
diff --git a/src/mesa/drivers/dri/mach64/mach64_screen.c b/src/mesa/drivers/dri/mach64/mach64_screen.c
index 4bd6dee6c0e..239e8bc8fd0 100644
--- a/src/mesa/drivers/dri/mach64/mach64_screen.c
+++ b/src/mesa/drivers/dri/mach64/mach64_screen.c
@@ -256,7 +256,6 @@ mach64CreateScreen( __DRIscreen *sPriv )
mach64Screen->driScreen = sPriv;
i = 0;
- mach64Screen->extensions[i++] = &driFrameTrackingExtension.base;
if ( mach64Screen->irq != 0 ) {
mach64Screen->extensions[i++] = &driSwapControlExtension.base;
mach64Screen->extensions[i++] = &driMediaStreamCounterExtension.base;
diff --git a/src/mesa/drivers/dri/mga/mga_xmesa.c b/src/mesa/drivers/dri/mga/mga_xmesa.c
index 31007ccb1da..3a31dfb44a3 100644
--- a/src/mesa/drivers/dri/mga/mga_xmesa.c
+++ b/src/mesa/drivers/dri/mga/mga_xmesa.c
@@ -182,7 +182,6 @@ mgaFillInModes( __DRIscreen *psp,
const __DRIextension *mgaScreenExtensions[] = {
&driReadDrawableExtension,
&driSwapControlExtension.base,
- &driFrameTrackingExtension.base,
&driMediaStreamCounterExtension.base,
NULL
};
diff --git a/src/mesa/drivers/dri/r128/r128_screen.c b/src/mesa/drivers/dri/r128/r128_screen.c
index 2d918028236..7626a159d6a 100644
--- a/src/mesa/drivers/dri/r128/r128_screen.c
+++ b/src/mesa/drivers/dri/r128/r128_screen.c
@@ -221,7 +221,6 @@ r128CreateScreen( __DRIscreen *sPriv )
r128Screen->driScreen = sPriv;
i = 0;
- r128Screen->extensions[i++] = &driFrameTrackingExtension.base;
if ( r128Screen->irq != 0 ) {
r128Screen->extensions[i++] = &driSwapControlExtension.base;
r128Screen->extensions[i++] = &driMediaStreamCounterExtension.base;
diff --git a/src/mesa/drivers/dri/r128/r128_state.c b/src/mesa/drivers/dri/r128/r128_state.c
index 4d773feaaa8..9ad25f7f463 100644
--- a/src/mesa/drivers/dri/r128/r128_state.c
+++ b/src/mesa/drivers/dri/r128/r128_state.c
@@ -42,6 +42,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/context.h"
#include "main/enums.h"
#include "main/colormac.h"
+#include "main/macros.h"
#include "swrast/swrast.h"
#include "vbo/vbo.h"
#include "tnl/tnl.h"
diff --git a/src/mesa/drivers/dri/r128/r128_tex.c b/src/mesa/drivers/dri/r128/r128_tex.c
index 4ec4be9a47b..b5a19b510af 100644
--- a/src/mesa/drivers/dri/r128/r128_tex.c
+++ b/src/mesa/drivers/dri/r128/r128_tex.c
@@ -44,6 +44,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/texobj.h"
#include "main/imports.h"
#include "main/texobj.h"
+#include "main/macros.h"
#include "xmlpool.h"
diff --git a/src/mesa/drivers/dri/r200/r200_fragshader.c b/src/mesa/drivers/dri/r200/r200_fragshader.c
index 85c1b7bdd19..2a9268dd343 100644
--- a/src/mesa/drivers/dri/r200/r200_fragshader.c
+++ b/src/mesa/drivers/dri/r200/r200_fragshader.c
@@ -26,11 +26,11 @@
**************************************************************************/
#include "main/glheader.h"
+#include "main/atifragshader.h"
#include "main/macros.h"
#include "main/enums.h"
#include "tnl/t_context.h"
-#include "shader/atifragshader.h"
-#include "shader/program.h"
+#include "program/program.h"
#include "r200_context.h"
#include "r200_ioctl.h"
#include "r200_tex.h"
diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c
index b72f69b7f45..df73de5394a 100644
--- a/src/mesa/drivers/dri/r200/r200_ioctl.c
+++ b/src/mesa/drivers/dri/r200/r200_ioctl.c
@@ -253,113 +253,6 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask )
}
}
-/* This version of AllocateMemoryMESA allocates only GART memory, and
- * only does so after the point at which the driver has been
- * initialized.
- *
- * Theoretically a valid context isn't required. However, in this
- * implementation, it is, as I'm using the hardware lock to protect
- * the kernel data structures, and the current context to get the
- * device fd.
- */
-void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size,
- GLfloat readfreq, GLfloat writefreq,
- GLfloat priority)
-{
- GET_CURRENT_CONTEXT(ctx);
- r200ContextPtr rmesa;
- int region_offset;
- drm_radeon_mem_alloc_t alloc;
- int ret;
-
- if (R200_DEBUG & RADEON_IOCTL)
- fprintf(stderr, "%s sz %d %f/%f/%f\n", __FUNCTION__, size, readfreq,
- writefreq, priority);
-
- if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->radeon.radeonScreen->gartTextures.map)
- return NULL;
-
- if (getenv("R200_NO_ALLOC"))
- return NULL;
-
- alloc.region = RADEON_MEM_REGION_GART;
- alloc.alignment = 0;
- alloc.size = size;
- alloc.region_offset = &region_offset;
-
- ret = drmCommandWriteRead( rmesa->radeon.radeonScreen->driScreen->fd,
- DRM_RADEON_ALLOC,
- &alloc, sizeof(alloc));
-
- if (ret) {
- fprintf(stderr, "%s: DRM_RADEON_ALLOC ret %d\n", __FUNCTION__, ret);
- return NULL;
- }
-
- {
- char *region_start = (char *)rmesa->radeon.radeonScreen->gartTextures.map;
- return (void *)(region_start + region_offset);
- }
-}
-
-
-/* Called via glXFreeMemoryMESA() */
-void r200FreeMemoryMESA(__DRIscreen *screen, GLvoid *pointer)
-{
- GET_CURRENT_CONTEXT(ctx);
- r200ContextPtr rmesa;
- ptrdiff_t region_offset;
- drm_radeon_mem_free_t memfree;
- int ret;
-
- if (R200_DEBUG & RADEON_IOCTL)
- fprintf(stderr, "%s %p\n", __FUNCTION__, pointer);
-
- if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->radeon.radeonScreen->gartTextures.map) {
- fprintf(stderr, "%s: no context\n", __FUNCTION__);
- return;
- }
-
- region_offset = (char *)pointer - (char *)rmesa->radeon.radeonScreen->gartTextures.map;
-
- if (region_offset < 0 ||
- region_offset > rmesa->radeon.radeonScreen->gartTextures.size) {
- fprintf(stderr, "offset %d outside range 0..%d\n", region_offset,
- rmesa->radeon.radeonScreen->gartTextures.size);
- return;
- }
-
- memfree.region = RADEON_MEM_REGION_GART;
- memfree.region_offset = region_offset;
-
- ret = drmCommandWrite( rmesa->radeon.radeonScreen->driScreen->fd,
- DRM_RADEON_FREE,
- &memfree, sizeof(memfree));
-
- if (ret)
- fprintf(stderr, "%s: DRM_RADEON_FREE ret %d\n", __FUNCTION__, ret);
-}
-
-/* Called via glXGetMemoryOffsetMESA() */
-GLuint r200GetMemoryOffsetMESA(__DRIscreen *screen, const GLvoid *pointer)
-{
- GET_CURRENT_CONTEXT(ctx);
- r200ContextPtr rmesa;
- GLuint card_offset;
-
- if (!ctx || !(rmesa = R200_CONTEXT(ctx)) ) {
- fprintf(stderr, "%s: no context\n", __FUNCTION__);
- return ~0;
- }
-
- if (!r200IsGartMemory( rmesa, pointer, 0 ))
- return ~0;
-
- card_offset = r200GartOffsetFromVirtual( rmesa, pointer );
-
- return card_offset - rmesa->radeon.radeonScreen->gart_base;
-}
-
GLboolean r200IsGartMemory( r200ContextPtr rmesa, const GLvoid *pointer,
GLint size )
{
diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.h b/src/mesa/drivers/dri/r200/r200_ioctl.h
index 8d51aefa042..c5dca89bc76 100644
--- a/src/mesa/drivers/dri/r200/r200_ioctl.h
+++ b/src/mesa/drivers/dri/r200/r200_ioctl.h
@@ -64,11 +64,6 @@ extern void r200EmitAOS(r200ContextPtr rmesa, GLuint nr, GLuint offset);
extern void r200InitIoctlFuncs( struct dd_function_table *functions );
-extern void *r200AllocateMemoryMESA( __DRIscreen *screen, GLsizei size, GLfloat readfreq,
- GLfloat writefreq, GLfloat priority );
-extern void r200FreeMemoryMESA( __DRIscreen *screen, GLvoid *pointer );
-extern GLuint r200GetMemoryOffsetMESA( __DRIscreen *screen, const GLvoid *pointer );
-
extern GLboolean r200IsGartMemory( r200ContextPtr rmesa, const GLvoid *pointer,
GLint size );
diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c
index 12f869d96f8..5d268319f3f 100644
--- a/src/mesa/drivers/dri/r200/r200_vertprog.c
+++ b/src/mesa/drivers/dri/r200/r200_vertprog.c
@@ -33,11 +33,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
-#include "shader/program.h"
-#include "shader/prog_instruction.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_statevars.h"
-#include "shader/programopt.h"
+#include "program/program.h"
+#include "program/prog_instruction.h"
+#include "program/prog_parameter.h"
+#include "program/prog_statevars.h"
+#include "program/programopt.h"
#include "tnl/tnl.h"
#include "r200_context.h"
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile
index ff3801dc676..3167d49bcae 100644
--- a/src/mesa/drivers/dri/r300/compiler/Makefile
+++ b/src/mesa/drivers/dri/r300/compiler/Makefile
@@ -23,6 +23,7 @@ C_SOURCES = \
radeon_dataflow_deadcode.c \
radeon_dataflow_swizzles.c \
radeon_optimize.c \
+ radeon_rename_regs.c \
r3xx_fragprog.c \
r300_fragprog.c \
r300_fragprog_swizzle.c \
diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript
index 50d9cdb7f2d..c6f47a6f8a4 100755
--- a/src/mesa/drivers/dri/r300/compiler/SConscript
+++ b/src/mesa/drivers/dri/r300/compiler/SConscript
@@ -22,6 +22,7 @@ r300compiler = env.ConvenienceLibrary(
'radeon_pair_schedule.c',
'radeon_pair_regalloc.c',
'radeon_optimize.c',
+ 'radeon_rename_regs.c',
'radeon_emulate_branches.c',
'radeon_emulate_loops.c',
'radeon_dataflow.c',
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index 38312658d65..a326ee4c4fa 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -29,6 +29,7 @@
#include "radeon_emulate_loops.h"
#include "radeon_program_alu.h"
#include "radeon_program_tex.h"
+#include "radeon_rename_regs.h"
#include "r300_fragprog.h"
#include "r300_fragprog_swizzle.h"
#include "r500_fragprog.h"
@@ -97,25 +98,27 @@ static void debug_program_log(struct r300_fragment_program_compiler* c, const ch
void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{
+ struct emulate_loop_state loop_state;
+
rewrite_depth_out(c);
+ /* This transformation needs to be done before any of the IF
+ * instructions are modified. */
+ radeonTransformKILP(&c->Base);
+
debug_program_log(c, "before compilation");
- /* XXX Ideally this should be done only for r3xx, but since
- * we don't have branching support for r5xx, we use the emulation
- * on all chipsets. */
-
- if(c->Base.is_r500){
- rc_emulate_loops(&c->Base, R500_PFS_MAX_INST);
+ if (c->Base.is_r500){
+ r500_transform_unroll_loops(&c->Base, &loop_state);
+ debug_program_log(c, "after r500 transform loops");
}
else{
- rc_emulate_loops(&c->Base, R300_PFS_MAX_ALU_INST);
+ rc_transform_unroll_loops(&c->Base, &loop_state);
+ debug_program_log(c, "after transform loops");
+
+ rc_emulate_branches(&c->Base);
+ debug_program_log(c, "after emulate branches");
}
- debug_program_log(c, "after emulate loops");
-
- rc_emulate_branches(&c->Base);
-
- debug_program_log(c, "after emulate branches");
if (c->Base.is_r500) {
struct radeon_program_transformation transformations[] = {
@@ -162,6 +165,11 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
debug_program_log(c, "after deadcode");
+ if(!c->Base.is_r500){
+ rc_emulate_loops(&loop_state, R300_PFS_MAX_ALU_INST);
+ debug_program_log(c, "after emulate loops");
+ }
+
rc_optimize(&c->Base);
debug_program_log(c, "after dataflow optimize");
@@ -172,6 +180,16 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
debug_program_log(c, "after dataflow passes");
+ if(!c->Base.is_r500) {
+ /* This pass makes it easier for the scheduler to group TEX
+ * instructions and reduces the chances of creating too
+ * many texture indirections.*/
+ rc_rename_regs(&c->Base);
+ if (c->Base.Error)
+ return;
+ debug_program_log(c, "after register rename");
+ }
+
rc_pair_translate(c);
if (c->Base.Error)
return;
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index 507b2e532fe..d347b4df9cd 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -30,6 +30,7 @@
#include "radeon_program_alu.h"
#include "radeon_swizzle.h"
#include "radeon_emulate_branches.h"
+#include "radeon_emulate_loops.h"
/*
* Take an already-setup and valid source then swizzle it appropriately to
@@ -145,7 +146,8 @@ static unsigned long t_src(struct r300_vertex_program_code *vp,
t_swizzle(GET_SWZ(src->Swizzle, 2)),
t_swizzle(GET_SWZ(src->Swizzle, 3)),
t_src_class(src->File),
- src->Negate) | (src->RelAddr << 4);
+ src->Negate) |
+ (src->RelAddr << 4) | (src->Abs << 3);
}
static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
@@ -161,7 +163,7 @@ static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
t_swizzle(GET_SWZ(src->Swizzle, 0)),
t_src_class(src->File),
src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
- (src->RelAddr << 4);
+ (src->RelAddr << 4) | (src->Abs << 3);
}
static int valid_dst(struct r300_vertex_program_code *vp,
@@ -348,7 +350,8 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
if (!valid_dst(compiler->code, &vpi->DstReg))
continue;
- if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) {
+ if (compiler->code->length >= R500_VS_MAX_ALU_DWORDS ||
+ (compiler->code->length >= R300_VS_MAX_ALU_DWORDS && !compiler->Base.is_r500)) {
rc_error(&compiler->Base, "Vertex program has too many instructions\n");
return;
}
@@ -404,7 +407,7 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
{
struct rc_instruction *inst;
unsigned int num_orig_temps = 0;
- char hwtemps[VSF_MAX_FRAGMENT_TEMPS];
+ char hwtemps[R300_VS_MAX_TEMPS];
struct temporary_allocation * ta;
unsigned int i, j;
@@ -463,11 +466,11 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
unsigned int orig = inst->U.I.DstReg.Index;
if (!ta[orig].Allocated) {
- for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) {
+ for(j = 0; j < R300_VS_MAX_TEMPS; ++j) {
if (!hwtemps[j])
break;
}
- if (j >= VSF_MAX_FRAGMENT_TEMPS) {
+ if (j >= R300_VS_MAX_TEMPS) {
fprintf(stderr, "Out of hw temporaries\n");
} else {
ta[orig].Allocated = 1;
@@ -485,6 +488,44 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
}
}
+/**
+ * R3xx-R4xx vertex engine does not support the Absolute source operand modifier
+ * and the Saturate opcode modifier. Only Absolute is currently transformed.
+ */
+static int transform_nonnative_modifiers(
+ struct radeon_compiler *c,
+ struct rc_instruction *inst,
+ void* unused)
+{
+ const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned i;
+
+ /* Transform ABS(a) to MAX(a, -a). */
+ for (i = 0; i < opcode->NumSrcRegs; i++) {
+ if (inst->U.I.SrcReg[i].Abs) {
+ struct rc_instruction *new_inst;
+ unsigned temp;
+
+ inst->U.I.SrcReg[i].Abs = 0;
+
+ temp = rc_find_free_temporary(c);
+
+ new_inst = rc_insert_new_instruction(c, inst->Prev);
+ new_inst->U.I.Opcode = RC_OPCODE_MAX;
+ new_inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ new_inst->U.I.DstReg.Index = temp;
+ new_inst->U.I.SrcReg[0] = inst->U.I.SrcReg[i];
+ new_inst->U.I.SrcReg[1] = inst->U.I.SrcReg[i];
+ new_inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+
+ memset(&inst->U.I.SrcReg[i], 0, sizeof(inst->U.I.SrcReg[i]));
+ inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[i].Index = temp;
+ inst->U.I.SrcReg[i].Swizzle = RC_SWIZZLE_XYZW;
+ }
+ }
+ return 1;
+}
/**
* Vertex engine cannot read two inputs or two constants at the same time.
@@ -591,6 +632,8 @@ static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
{
+ struct emulate_loop_state loop_state;
+
compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
addArtificialOutputs(compiler);
@@ -600,19 +643,48 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
/* XXX Ideally this should be done only for r3xx, but since
* we don't have branching support for r5xx, we use the emulation
* on all chipsets. */
+ rc_transform_unroll_loops(&compiler->Base, &loop_state);
+
+ debug_program_log(compiler, "after transform loops");
+
+ if (compiler->Base.is_r500){
+ rc_emulate_loops(&loop_state, R500_VS_MAX_ALU);
+ } else {
+ rc_emulate_loops(&loop_state, R300_VS_MAX_ALU);
+ }
+ debug_program_log(compiler, "after emulate loops");
+
rc_emulate_branches(&compiler->Base);
debug_program_log(compiler, "after emulate branches");
- {
+ if (compiler->Base.is_r500) {
struct radeon_program_transformation transformations[] = {
{ &r300_transform_vertex_alu, 0 },
{ &r300_transform_trig_scale_vertex, 0 }
};
radeonLocalTransform(&compiler->Base, 2, transformations);
- }
- debug_program_log(compiler, "after native rewrite");
+ debug_program_log(compiler, "after native rewrite");
+ } else {
+ struct radeon_program_transformation transformations[] = {
+ { &r300_transform_vertex_alu, 0 },
+ { &radeonTransformTrigSimple, 0 }
+ };
+ radeonLocalTransform(&compiler->Base, 2, transformations);
+
+ debug_program_log(compiler, "after native rewrite");
+
+ /* Note: This pass has to be done seperately from ALU rewrite,
+ * because it needs to check every instruction.
+ */
+ struct radeon_program_transformation transformations2[] = {
+ { &transform_nonnative_modifiers, 0 },
+ };
+ radeonLocalTransform(&compiler->Base, 1, transformations2);
+
+ debug_program_log(compiler, "after emulate modifiers");
+ }
{
/* Note: This pass has to be done seperately from ALU rewrite,
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
index 632f0bcf4f8..e6b5522c5b9 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -30,6 +30,7 @@
#include <stdio.h>
#include "../r300_reg.h"
+#include "radeon_emulate_loops.h"
/**
* Rewrite IF instructions to use the ALU result special register.
@@ -59,6 +60,31 @@ int r500_transform_IF(
return 1;
}
+/**
+ * Rewrite loops to make them easier to emit. This is not a local
+ * transformation, because it modifies and reorders an entire block of code.
+ */
+void r500_transform_unroll_loops(struct radeon_compiler * c,
+ struct emulate_loop_state *s)
+{
+ int i;
+
+ rc_transform_unroll_loops(c, s);
+
+ for( i = s->LoopCount - 1; i >= 0; i-- ){
+ struct rc_instruction * inst_continue;
+ if(!s->Loops[i].EndLoop){
+ continue;
+ }
+ /* Insert a continue instruction at the end of the loop. This
+ * is required in order to emit loops correctly. */
+ inst_continue = rc_insert_new_instruction(c,
+ s->Loops[i].EndIf->Prev);
+ inst_continue->U.I.Opcode = RC_OPCODE_CONTINUE;
+ }
+
+}
+
static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
unsigned int relevant;
@@ -252,7 +278,7 @@ void r500FragmentProgramDump(struct rX00_fragment_program_code *c)
struct r500_fragment_program_code *code = &c->code.r500;
fprintf(stderr, "R500 Fragment Program:\n--------\n");
- int n;
+ int n, i;
uint32_t inst;
uint32_t inst0;
char *str = NULL;
@@ -275,8 +301,8 @@ void r500FragmentProgramDump(struct rX00_fragment_program_code *c)
to_mask((inst >> 15) & 0xf));
switch(inst0 & 0x3) {
- case 0:
- case 1:
+ case R500_INST_TYPE_ALU:
+ case R500_INST_TYPE_OUT:
fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1);
inst = code->inst[n].inst1;
@@ -319,9 +345,87 @@ void r500FragmentProgramDump(struct rX00_fragment_program_code *c)
(inst >> 23) & 0x3,
(inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
break;
- case 2:
+ case R500_INST_TYPE_FC:
+ fprintf(stderr, "\t2:FC_INST 0x%08x:", code->inst[n].inst2);
+ inst = code->inst[n].inst2;
+ /* JUMP_FUNC JUMP_ANY*/
+ fprintf(stderr, "0x%02x %1x ", inst >> 8 & 0xff,
+ (inst & R500_FC_JUMP_ANY) >> 5);
+
+ /* OP */
+ switch(inst & 0x7){
+ case R500_FC_OP_JUMP:
+ fprintf(stderr, "JUMP");
+ break;
+ case R500_FC_OP_LOOP:
+ fprintf(stderr, "LOOP");
+ break;
+ case R500_FC_OP_ENDLOOP:
+ fprintf(stderr, "ENDLOOP");
+ break;
+ case R500_FC_OP_REP:
+ fprintf(stderr, "REP");
+ break;
+ case R500_FC_OP_ENDREP:
+ fprintf(stderr, "ENDREP");
+ break;
+ case R500_FC_OP_BREAKLOOP:
+ fprintf(stderr, "BREAKLOOP");
+ break;
+ case R500_FC_OP_BREAKREP:
+ fprintf(stderr, "BREAKREP");
+ break;
+ case R500_FC_OP_CONTINUE:
+ fprintf(stderr, "CONTINUE");
+ break;
+ }
+ fprintf(stderr," ");
+ /* A_OP */
+ switch(inst & (0x3 << 6)){
+ case R500_FC_A_OP_NONE:
+ fprintf(stderr, "NONE");
+ break;
+ case R500_FC_A_OP_POP:
+ fprintf(stderr, "POP");
+ break;
+ case R500_FC_A_OP_PUSH:
+ fprintf(stderr, "PUSH");
+ break;
+ }
+ /* B_OP0 B_OP1 */
+ for(i=0; i<2; i++){
+ fprintf(stderr, " ");
+ switch(inst & (0x3 << (24 + (i * 2)))){
+ /* R500_FC_B_OP0_NONE
+ * R500_FC_B_OP1_NONE */
+ case 0:
+ fprintf(stderr, "NONE");
+ break;
+ case R500_FC_B_OP0_DECR:
+ case R500_FC_B_OP1_DECR:
+ fprintf(stderr, "DECR");
+ break;
+ case R500_FC_B_OP0_INCR:
+ case R500_FC_B_OP1_INCR:
+ fprintf(stderr, "INCR");
+ break;
+ }
+ }
+ /*POP_CNT B_ELSE */
+ fprintf(stderr, " %d %1x", (inst >> 16) & 0x1f, (inst & R500_FC_B_ELSE) >> 4);
+ inst = code->inst[n].inst3;
+ /* JUMP_ADDR */
+ fprintf(stderr, " %d", inst >> 16);
+
+ if(code->inst[n].inst2 & R500_FC_IGNORE_UNCOVERED){
+ fprintf(stderr, " IGN_UNC");
+ }
+ inst = code->inst[n].inst3;
+ fprintf(stderr, "\n\t3:FC_ADDR 0x%08x:", inst);
+ fprintf(stderr, "BOOL: 0x%02x, INT: 0x%02x, JUMP_ADDR: %d, JMP_GLBL: %1x\n",
+ inst & 0x1f, (inst >> 8) & 0x1f, (inst >> 16) & 0x1ff, inst >> 31);
break;
- case 3:
+ case R500_INST_TYPE_TEX:
inst = code->inst[n].inst1;
fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf,
to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "",
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
index 4efbae7ba67..0d005a794ff 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
@@ -36,6 +36,8 @@
#include "radeon_compiler.h"
#include "radeon_swizzle.h"
+struct emulate_loop_state;
+
extern void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c);
@@ -47,4 +49,6 @@ extern int r500_transform_IF(
struct rc_instruction * inst,
void* data);
+void r500_transform_unroll_loops(struct radeon_compiler * c,
+ struct emulate_loop_state * s);
#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index fb2d8b5a9c0..0bd8f0a239f 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -45,6 +45,8 @@
#include "radeon_program_pair.h"
+#define MAX_BRANCH_DEPTH_FULL 32
+#define MAX_BRANCH_DEPTH_PARTIAL 4
#define PROG_CODE \
struct r500_fragment_program_code *code = &c->code->code.r500
@@ -61,6 +63,10 @@ struct branch_info {
int Endif;
};
+struct loop_info {
+ int LoopStart;
+};
+
struct emit_state {
struct radeon_compiler * C;
struct r500_fragment_program_code * Code;
@@ -69,7 +75,12 @@ struct emit_state {
unsigned int CurrentBranchDepth;
unsigned int BranchesReserved;
+ struct loop_info * Loops;
+ unsigned int CurrentLoopDepth;
+ unsigned int LoopsReserved;
+
unsigned int MaxBranchDepth;
+
};
static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
@@ -359,16 +370,49 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst
s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
- if (inst->U.I.Opcode == RC_OPCODE_IF) {
- if (s->CurrentBranchDepth >= 32) {
+ switch(inst->U.I.Opcode){
+ struct branch_info * branch;
+ struct loop_info * loop;
+ case RC_OPCODE_BGNLOOP:
+ memory_pool_array_reserve(&s->C->Pool, struct loop_info,
+ s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
+
+ loop = &s->Loops[s->CurrentLoopDepth++];
+
+ /* We don't emit an instruction for BGNLOOP, so we need to
+ * decrement the instruction counter, but first we need to
+ * set LoopStart to the current value of inst_end, which
+ * will end up being the first real instruction in the loop.*/
+ loop->LoopStart = s->Code->inst_end--;
+ break;
+
+ case RC_OPCODE_BRK:
+ /* Don't emit an instruction for BRK */
+ s->Code->inst_end--;
+ break;
+
+ case RC_OPCODE_CONTINUE:
+ loop = &s->Loops[s->CurrentLoopDepth - 1];
+ s->Code->inst[newip].inst2 = R500_FC_OP_JUMP |
+ R500_FC_JUMP_FUNC(0xff);
+ s->Code->inst[newip].inst3 = R500_FC_JUMP_ADDR(loop->LoopStart);
+ break;
+
+ case RC_OPCODE_ENDLOOP:
+ /* Don't emit an instruction for ENDLOOP */
+ s->Code->inst_end--;
+ s->CurrentLoopDepth--;
+ break;
+
+ case RC_OPCODE_IF:
+ if ( s->CurrentBranchDepth >= MAX_BRANCH_DEPTH_FULL) {
rc_error(s->C, "Branch depth exceeds hardware limit");
return;
}
-
memory_pool_array_reserve(&s->C->Pool, struct branch_info,
s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
- struct branch_info * branch = &s->Branches[s->CurrentBranchDepth++];
+ branch = &s->Branches[s->CurrentBranchDepth++];
branch->If = newip;
branch->Else = -1;
branch->Endif = -1;
@@ -377,29 +421,50 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst
s->MaxBranchDepth = s->CurrentBranchDepth;
/* actual instruction is filled in at ENDIF time */
- } else if (inst->U.I.Opcode == RC_OPCODE_ELSE) {
+ break;
+
+ case RC_OPCODE_ELSE:
if (!s->CurrentBranchDepth) {
rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
return;
}
- struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1];
+ branch = &s->Branches[s->CurrentBranchDepth - 1];
branch->Else = newip;
/* actual instruction is filled in at ENDIF time */
- } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
+ break;
+
+ case RC_OPCODE_ENDIF:
if (!s->CurrentBranchDepth) {
rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
return;
}
- struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1];
- branch->Endif = newip;
-
+ branch = &s->Branches[s->CurrentBranchDepth - 1];
+
+ if(inst->Prev->U.I.Opcode == RC_OPCODE_BRK){
+ branch->Endif = --s->Code->inst_end;
+ s->Code->inst[branch->Endif].inst2 |=
+ R500_FC_B_OP0_DECR;
+ }
+ else{
+ branch->Endif = newip;
+
+ s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
+ | R500_FC_A_OP_NONE /* no address stack */
+ | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
+ | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
+ | R500_FC_B_OP1_NONE /* no branch counter if stay */
+ | R500_FC_B_POP_CNT(1)
+ ;
+ s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+ }
s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
| R500_FC_A_OP_NONE /* no address stack */
| R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
| R500_FC_B_OP0_INCR /* increment branch counter if stay */
+ | R500_FC_IGNORE_UNCOVERED
;
if (branch->Else >= 0) {
@@ -421,17 +486,10 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst
s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
}
- s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
- | R500_FC_A_OP_NONE /* no address stack */
- | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
- | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
- | R500_FC_B_OP1_NONE /* no branch counter if stay */
- | R500_FC_B_POP_CNT(1)
- ;
- s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
s->CurrentBranchDepth--;
- } else {
+ break;
+ default:
rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name);
}
}
@@ -486,6 +544,10 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi
code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
}
+ /* Use FULL flow control mode if branches are nested deep enough.
+ * We don not need to enable FULL flow control mode for loops, becasue
+ * we aren't using the hardware loop instructions.
+ */
if (s.MaxBranchDepth >= 4) {
if (code->max_temp_idx < 1)
code->max_temp_idx = 1;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index 1979e7e4e49..d03689763bc 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -235,8 +235,11 @@ struct rX00_fragment_program_code {
};
-#define VSF_MAX_FRAGMENT_LENGTH (255*4)
-#define VSF_MAX_FRAGMENT_TEMPS (14)
+#define R300_VS_MAX_ALU 256
+#define R300_VS_MAX_ALU_DWORDS (R300_VS_MAX_ALU * 4)
+#define R500_VS_MAX_ALU 1024
+#define R500_VS_MAX_ALU_DWORDS (R500_VS_MAX_ALU * 4)
+#define R300_VS_MAX_TEMPS 32
#define VSF_MAX_INPUTS 32
#define VSF_MAX_OUTPUTS 32
@@ -244,8 +247,8 @@ struct rX00_fragment_program_code {
struct r300_vertex_program_code {
int length;
union {
- uint32_t d[VSF_MAX_FRAGMENT_LENGTH];
- float f[VSF_MAX_FRAGMENT_LENGTH];
+ uint32_t d[R500_VS_MAX_ALU_DWORDS];
+ float f[R500_VS_MAX_ALU_DWORDS];
} body;
int pos_end;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
index e3c2c83c0cf..fbb4235c223 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
@@ -202,32 +202,65 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f
inst = inst->Prev) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- if (opcode->IsFlowControl) {
- if (opcode->Opcode == RC_OPCODE_ENDIF) {
- push_branch(&s);
- } else {
- if (s.BranchStackSize) {
- struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1];
-
- if (opcode->Opcode == RC_OPCODE_IF) {
- or_updatemasks(&s.R,
- &s.R,
- branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
-
- s.BranchStackSize--;
- } else if (opcode->Opcode == RC_OPCODE_ELSE) {
- if (branch->HaveElse) {
- rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__);
- } else {
- memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
- memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
- branch->HaveElse = 1;
- }
+ switch(opcode->Opcode){
+ /* Mark all sources in the loop body as used before doing
+ * normal deadcode analysis. This is probably not optimal.
+ */
+ case RC_OPCODE_ENDLOOP:
+ {
+ int endloops = 1;
+ struct rc_instruction *ptr;
+ for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){
+ opcode = rc_get_opcode_info(ptr->U.I.Opcode);
+ if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
+ endloops--;
+ continue;
+ }
+ if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){
+ endloops++;
+ continue;
+ }
+ if(opcode->HasDstReg){
+ int src = 0;
+ unsigned int srcmasks[3];
+ rc_compute_sources_for_writemask(ptr,
+ ptr->U.I.DstReg.WriteMask, srcmasks);
+ for(src=0; src < opcode->NumSrcRegs; src++){
+ mark_used(&s,
+ ptr->U.I.SrcReg[src].File,
+ ptr->U.I.SrcReg[src].Index,
+ srcmasks[src]);
+ }
+ }
+ }
+ break;
+ }
+ case RC_OPCODE_CONTINUE:
+ case RC_OPCODE_BRK:
+ case RC_OPCODE_BGNLOOP:
+ break;
+ case RC_OPCODE_ENDIF:
+ push_branch(&s);
+ break;
+ default:
+ if (opcode->IsFlowControl && s.BranchStackSize) {
+ struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1];
+ if (opcode->Opcode == RC_OPCODE_IF) {
+ or_updatemasks(&s.R,
+ &s.R,
+ branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
+
+ s.BranchStackSize--;
+ } else if (opcode->Opcode == RC_OPCODE_ELSE) {
+ if (branch->HaveElse) {
+ rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__);
} else {
- rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);
+ memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
+ memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
+ branch->HaveElse = 1;
}
} else {
- rc_error(c, "%s: Unexpected control flow instruction\n", __FUNCTION__);
+ rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);
}
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
index 4c5d29f4217..131e9e7436d 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
@@ -38,22 +38,6 @@
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
-struct emulate_loop_state {
- struct radeon_compiler * C;
- struct loop_info * Loops;
- unsigned int LoopCount;
- unsigned int LoopReserved;
-};
-
-struct loop_info {
- struct rc_instruction * BeginLoop;
- struct rc_instruction * Cond;
- struct rc_instruction * If;
- struct rc_instruction * Brk;
- struct rc_instruction * EndIf;
- struct rc_instruction * EndLoop;
-};
-
struct const_value {
struct radeon_compiler * C;
@@ -94,22 +78,13 @@ static int src_reg_is_immediate(struct rc_src_register * src,
c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE;
}
-static unsigned int loop_count_instructions(struct loop_info * loop)
+static unsigned int loop_calc_iterations(struct emulate_loop_state *s,
+ struct loop_info * loop, unsigned int max_instructions)
{
- unsigned int count = 0;
- struct rc_instruction * inst = loop->BeginLoop->Next;
- while(inst != loop->EndLoop){
- count++;
- inst = inst->Next;
- }
- return count;
-}
-
-static unsigned int loop_calc_iterations(struct loop_info * loop,
- unsigned int loop_count, unsigned int max_instructions)
-{
- unsigned int icount = loop_count_instructions(loop);
- return max_instructions / (loop_count * icount);
+ unsigned int total_i = rc_recompute_ips(s->C);
+ unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1;
+ /* +1 because the program already has one iteration of the loop. */
+ return 1 + ((max_instructions - total_i) / (s->LoopCount * loop_i));
}
static void loop_unroll(struct emulate_loop_state * s,
@@ -214,8 +189,7 @@ static void get_incr_amount(void * data, struct rc_instruction * inst,
}
static int transform_const_loop(struct emulate_loop_state * s,
- struct loop_info * loop,
- struct rc_instruction * cond)
+ struct loop_info * loop)
{
int end_loops = 1;
int iterations;
@@ -228,13 +202,13 @@ static int transform_const_loop(struct emulate_loop_state * s,
/* Find the counter and the upper limit */
- if(src_reg_is_immediate(&cond->U.I.SrcReg[0], s->C)){
- limit = &cond->U.I.SrcReg[0];
- counter = &cond->U.I.SrcReg[1];
+ if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], s->C)){
+ limit = &loop->Cond->U.I.SrcReg[0];
+ counter = &loop->Cond->U.I.SrcReg[1];
}
- else if(src_reg_is_immediate(&cond->U.I.SrcReg[1], s->C)){
- limit = &cond->U.I.SrcReg[1];
- counter = &cond->U.I.SrcReg[0];
+ else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], s->C)){
+ limit = &loop->Cond->U.I.SrcReg[1];
+ counter = &loop->Cond->U.I.SrcReg[0];
}
else{
DBG("No constant limit.\n");
@@ -293,8 +267,22 @@ static int transform_const_loop(struct emulate_loop_state * s,
* simple, since we only support increment and decrement loops.
*/
limit_value = get_constant_value(s->C, limit, 0);
- iterations = (int) ((limit_value - counter_value.Value) /
+ DBG("Limit is %f.\n", limit_value);
+ switch(loop->Cond->U.I.Opcode){
+ case RC_OPCODE_SGT:
+ case RC_OPCODE_SLT:
+ iterations = (int) ceilf((limit_value - counter_value.Value) /
count_inst.Amount);
+ break;
+
+ case RC_OPCODE_SLE:
+ case RC_OPCODE_SGE:
+ iterations = (int) floorf((limit_value - counter_value.Value) /
+ count_inst.Amount) + 1;
+ break;
+ default:
+ return 0;
+ }
DBG("Loop will have %d iterations.\n", iterations);
@@ -414,7 +402,7 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
}
/* Check if the number of loops is known at compile time. */
- if(transform_const_loop(s, loop, ptr)){
+ if(transform_const_loop(s, loop)){
return loop->BeginLoop->Next;
}
@@ -425,9 +413,14 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
return loop->EndLoop;
}
-static void rc_transform_loops(struct emulate_loop_state * s)
+void rc_transform_unroll_loops(struct radeon_compiler *c,
+ struct emulate_loop_state * s)
{
- struct rc_instruction * ptr = s->C->Program.Instructions.Next;
+ struct rc_instruction * ptr;
+
+ memset(s, 0, sizeof(struct emulate_loop_state));
+ s->C = c;
+ ptr = s->C->Program.Instructions.Next;
while(ptr != &s->C->Program.Instructions) {
if(ptr->Type == RC_INSTRUCTION_NORMAL &&
ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
@@ -440,7 +433,7 @@ static void rc_transform_loops(struct emulate_loop_state * s)
}
}
-static void rc_unroll_loops(struct emulate_loop_state *s,
+void rc_emulate_loops(struct emulate_loop_state *s,
unsigned int max_instructions)
{
int i;
@@ -451,24 +444,8 @@ static void rc_unroll_loops(struct emulate_loop_state *s,
if(!s->Loops[i].EndLoop){
continue;
}
- unsigned int iterations = loop_calc_iterations(&s->Loops[i],
- s->LoopCount, max_instructions);
+ unsigned int iterations = loop_calc_iterations(s, &s->Loops[i],
+ max_instructions);
loop_unroll(s, &s->Loops[i], iterations);
}
}
-
-void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions)
-{
- struct emulate_loop_state s;
-
- memset(&s, 0, sizeof(struct emulate_loop_state));
- s.C = c;
-
- /* We may need to move these two operations to r3xx_(vert|frag)prog.c
- * and run the optimization passes between them in order to increase
- * the number of unrolls we can do for each loop.
- */
- rc_transform_loops(&s);
-
- rc_unroll_loops(&s, max_instructions);
-}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
index ddcf1c0fabe..7748813c4eb 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
@@ -7,6 +7,26 @@
struct radeon_compiler;
-void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions);
+struct loop_info {
+ struct rc_instruction * BeginLoop;
+ struct rc_instruction * Cond;
+ struct rc_instruction * If;
+ struct rc_instruction * Brk;
+ struct rc_instruction * EndIf;
+ struct rc_instruction * EndLoop;
+};
+
+struct emulate_loop_state {
+ struct radeon_compiler * C;
+ struct loop_info * Loops;
+ unsigned int LoopCount;
+ unsigned int LoopReserved;
+};
+
+void rc_transform_unroll_loops(struct radeon_compiler *c,
+ struct emulate_loop_state * s);
+
+void rc_emulate_loops(struct emulate_loop_state *s,
+ unsigned int max_instructions);
#endif /* RADEON_EMULATE_LOOPS_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
index 1dc16855dc1..04f234f11d8 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
@@ -386,6 +386,12 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.NumSrcRegs = 0,
},
{
+ .Opcode = RC_OPCODE_CONTINUE,
+ .Name = "CONTINUE",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 0
+ },
+ {
.Opcode = RC_OPCODE_REPL_ALPHA,
.Name = "REPL_ALPHA",
.HasDstReg = 1
@@ -393,6 +399,10 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
{
.Opcode = RC_OPCODE_BEGIN_TEX,
.Name = "BEGIN_TEX"
+ },
+ {
+ .Opcode = RC_OPCODE_KILP,
+ .Name = "KILP",
}
};
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
index 91c82ac0890..8b9fa07dde2 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
@@ -187,6 +187,8 @@ typedef enum {
RC_OPCODE_ENDLOOP,
+ RC_OPCODE_CONTINUE,
+
/** special instruction, used in R300-R500 fragment program pair instructions
* indicates that the result of the alpha operation shall be replicated
* across all other channels */
@@ -197,6 +199,9 @@ typedef enum {
* can run simultaneously. */
RC_OPCODE_BEGIN_TEX,
+ /** Stop execution of the shader (GLSL discard) */
+ RC_OPCODE_KILP,
+
MAX_RC_OPCODE
} rc_opcode;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
index 21d72108886..eca06515367 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -75,6 +75,15 @@ struct peephole_state {
int BranchDepth;
};
+/**
+ * This is a callback function that is meant to be passed to
+ * rc_for_all_reads_mask. This function will be called once for each source
+ * register in inst.
+ * @param inst The instruction that the source register belongs to.
+ * @param file The register file of the source register.
+ * @param index The index of the source register.
+ * @param mask The components of the source register that are being read from.
+ */
static void peephole_scan_read(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
@@ -153,6 +162,11 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo
for(struct rc_instruction * inst = inst_mov->Next;
inst != &c->Program.Instructions;
inst = inst->Next) {
+ /* XXX In the future we might be able to make the optimizer
+ * smart enough to handle loops. */
+ if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP){
+ return;
+ }
rc_for_all_reads_mask(inst, peephole_scan_read, &s);
rc_for_all_writes_mask(inst, peephole_scan_write, &s);
if (s.Conflict)
@@ -161,7 +175,8 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo
if (s.BranchDepth >= 0) {
if (inst->U.I.Opcode == RC_OPCODE_IF) {
s.BranchDepth++;
- } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
+ } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF
+ || inst->U.I.Opcode == RC_OPCODE_ELSE) {
s.BranchDepth--;
if (s.BranchDepth < 0) {
s.DefinedMask &= ~s.MovMask;
@@ -208,7 +223,8 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo
if (s.BranchDepth >= 0) {
if (inst->U.I.Opcode == RC_OPCODE_IF) {
s.BranchDepth++;
- } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
+ } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF
+ || inst->U.I.Opcode == RC_OPCODE_ELSE) {
s.BranchDepth--;
if (s.BranchDepth < 0)
break; /* no more readers after this point */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
index a279549ff89..fc540496c41 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
@@ -141,12 +141,28 @@ static void add_inst_to_list(struct schedule_instruction ** list, struct schedul
*list = inst;
}
+static void add_inst_to_list_end(struct schedule_instruction ** list,
+ struct schedule_instruction * inst)
+{
+ if(!*list){
+ *list = inst;
+ }else{
+ struct schedule_instruction * temp = *list;
+ while(temp->NextReady){
+ temp = temp->NextReady;
+ }
+ temp->NextReady = inst;
+ }
+}
+
static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst)
{
DBG("%i is now ready\n", sinst->Instruction->IP);
+ /* Adding Ready TEX instructions to the end of the "Ready List" helps
+ * us emit TEX instructions in blocks without losing our place. */
if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
- add_inst_to_list(&s->ReadyTEX, sinst);
+ add_inst_to_list_end(&s->ReadyTEX, sinst);
else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
add_inst_to_list(&s->ReadyRGB, sinst);
else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
@@ -163,11 +179,14 @@ static void decrease_dependencies(struct schedule_state * s, struct schedule_ins
instruction_ready(s, sinst);
}
-static void commit_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
-{
- DBG("%i: commit\n", sinst->Instruction->IP);
-
- for(unsigned int i = 0; i < sinst->NumReadValues; ++i) {
+/**
+ * This function decreases the dependencies of the next instruction that
+ * wants to write to each of sinst's read values.
+ */
+static void commit_update_reads(struct schedule_state * s,
+ struct schedule_instruction * sinst){
+ unsigned int i;
+ for(i = 0; i < sinst->NumReadValues; ++i) {
struct reg_value * v = sinst->ReadValues[i];
assert(v->NumReaders > 0);
v->NumReaders--;
@@ -176,8 +195,12 @@ static void commit_instruction(struct schedule_state * s, struct schedule_instru
decrease_dependencies(s, v->Next->Writer);
}
}
+}
- for(unsigned int i = 0; i < sinst->NumWriteValues; ++i) {
+static void commit_update_writes(struct schedule_state * s,
+ struct schedule_instruction * sinst){
+ unsigned int i;
+ for(i = 0; i < sinst->NumWriteValues; ++i) {
struct reg_value * v = sinst->WriteValues[i];
if (v->NumReaders) {
for(struct reg_value_reader * r = v->Readers; r; r = r->Next) {
@@ -196,6 +219,15 @@ static void commit_instruction(struct schedule_state * s, struct schedule_instru
}
}
+static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
+{
+ DBG("%i: commit\n", sinst->Instruction->IP);
+
+ commit_update_reads(s, sinst);
+
+ commit_update_writes(s, sinst);
+}
+
/**
* Emit all ready texture instructions in a single block.
*
@@ -208,21 +240,37 @@ static void emit_all_tex(struct schedule_state * s, struct rc_instruction * befo
assert(s->ReadyTEX);
- /* Don't let the ready list change under us! */
- readytex = s->ReadyTEX;
- s->ReadyTEX = 0;
-
/* Node marker for R300 */
struct rc_instruction * inst_begin = rc_insert_new_instruction(s->C, before->Prev);
inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
/* Link texture instructions back in */
+ readytex = s->ReadyTEX;
while(readytex) {
- struct schedule_instruction * tex = readytex;
+ rc_insert_instruction(before->Prev, readytex->Instruction);
+ DBG("%i: commit TEX reads\n", readytex->Instruction->IP);
+
+ /* All of the TEX instructions in the same TEX block have
+ * their source registers read from before any of the
+ * instructions in that block write to their destination
+ * registers. This means that when we commit a TEX
+ * instruction, any other TEX instruction that wants to write
+ * to one of the committed instruction's source register can be
+ * marked as ready and should be emitted in the same TEX
+ * block. This prevents the following sequence from being
+ * emitted in two different TEX blocks:
+ * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0];
+ * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0];
+ */
+ commit_update_reads(s, readytex);
+ readytex = readytex->NextReady;
+ }
+ readytex = s->ReadyTEX;
+ s->ReadyTEX = 0;
+ while(readytex){
+ DBG("%i: commit TEX writes\n", readytex->Instruction->IP);
+ commit_update_writes(s, readytex);
readytex = readytex->NextReady;
-
- rc_insert_instruction(before->Prev, tex->Instruction);
- commit_instruction(s, tex);
}
}
@@ -328,7 +376,7 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor
}
rc_insert_instruction(before->Prev, sinst->Instruction);
- commit_instruction(s, sinst);
+ commit_alu_instruction(s, sinst);
} else {
struct schedule_instruction **prgb;
struct schedule_instruction **palpha;
@@ -346,8 +394,8 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor
*prgb = (*prgb)->NextReady;
*palpha = (*palpha)->NextReady;
rc_insert_instruction(before->Prev, psirgb->Instruction);
- commit_instruction(s, psirgb);
- commit_instruction(s, psialpha);
+ commit_alu_instruction(s, psirgb);
+ commit_alu_instruction(s, psialpha);
goto success;
}
}
@@ -357,7 +405,7 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor
s->ReadyRGB = s->ReadyRGB->NextReady;
rc_insert_instruction(before->Prev, sinst->Instruction);
- commit_instruction(s, sinst);
+ commit_alu_instruction(s, sinst);
success: ;
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
index c922d3d9a44..3cc28972934 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -973,3 +973,32 @@ int radeonTransformDeriv(struct radeon_compiler* c,
return 1;
}
+
+/**
+ * IF Temp[0].x -\
+ * KILP - > KIL -abs(Temp[0].x)
+ * ENDIF -/
+ *
+ * This needs to be done in its own pass, because it modifies the instructions
+ * before and after KILP.
+ */
+void radeonTransformKILP(struct radeon_compiler * c)
+{
+ struct rc_instruction * inst;
+ for (inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions; inst = inst->Next) {
+
+ if (inst->U.I.Opcode != RC_OPCODE_KILP
+ || inst->Prev->U.I.Opcode != RC_OPCODE_IF
+ || inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) {
+ continue;
+ }
+ inst->U.I.Opcode = RC_OPCODE_KIL;
+ inst->U.I.SrcReg[0] = negate(absolute(inst->Prev->U.I.SrcReg[0]));
+
+ /* Remove IF */
+ rc_remove_instruction(inst->Prev);
+ /* Remove ENDIF */
+ rc_remove_instruction(inst->Next);
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h
index 77d444476f2..e6e2cc20c5a 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h
@@ -60,4 +60,6 @@ int radeonTransformDeriv(
struct rc_instruction * inst,
void*);
+void radeonTransformKILP(struct radeon_compiler * c);
+
#endif /* __RADEON_PROGRAM_ALU_H_ */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c
new file mode 100644
index 00000000000..31c98668838
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright 2010 Tom Stellard <[email protected]>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ */
+
+#include "radeon_rename_regs.h"
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+struct reg_rename {
+ int old_index;
+ int new_index;
+ int temp_index;
+};
+
+static void rename_reg(void * data, struct rc_instruction * inst,
+ rc_register_file * file, unsigned int * index)
+{
+ struct reg_rename *r = data;
+
+ if(r->old_index == *index && *file == RC_FILE_TEMPORARY) {
+ *index = r->new_index;
+ }
+ else if(r->new_index == *index && *file == RC_FILE_TEMPORARY) {
+ *index = r->temp_index;
+ }
+}
+
+static void rename_all(
+ struct radeon_compiler *c,
+ struct rc_instruction * start,
+ unsigned int old,
+ unsigned int new,
+ unsigned int temp)
+{
+ struct rc_instruction * inst;
+ struct reg_rename r;
+ r.old_index = old;
+ r.new_index = new;
+ r.temp_index = temp;
+ for(inst = start; inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ rc_remap_registers(inst, rename_reg, &r);
+ }
+}
+
+/**
+ * This function renames registers in an attempt to get the code close to
+ * SSA form. After this function has completed, most of the register are only
+ * written to one time, with a few exceptions. For example, this block of code
+ * will not be modified by this function:
+ * Mov Temp[0].x Const[0].x
+ * Mov Temp[0].y Const[0].y
+ * Basically, destination registers will be renamed if:
+ * 1. There have been no previous writes to that register
+ * or
+ * 2. If the instruction is writting to the exact components (no more, no less)
+ * of a register that has been written to by previous instructions.
+ *
+ * This function assumes all the instructions are still of type
+ * RC_INSTRUCTION_NORMAL.
+ */
+void rc_rename_regs(struct radeon_compiler * c)
+{
+ unsigned int cur_index = 0;
+ unsigned int icount;
+ struct rc_instruction * inst;
+ unsigned int * masks;
+
+ /* The number of instructions in the program is also the maximum
+ * number of temp registers that could potentially be used. */
+ icount = rc_recompute_ips(c);
+ masks = memory_pool_malloc(&c->Pool, icount * sizeof(unsigned int));
+ memset(masks, 0, icount * sizeof(unsigned int));
+
+ for(inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ const struct rc_opcode_info * info;
+ if(inst->Type != RC_INSTRUCTION_NORMAL) {
+ rc_error(c, "%s only works with normal instructions.",
+ __FUNCTION__);
+ return;
+ }
+ unsigned int old_index, temp_index;
+ struct rc_dst_register * dst = &inst->U.I.DstReg;
+ info = rc_get_opcode_info(inst->U.I.Opcode);
+ if(!info->HasDstReg || dst->File != RC_FILE_TEMPORARY) {
+ continue;
+ }
+ if(dst->Index >= icount || !masks[dst->Index] ||
+ masks[dst->Index] == dst->WriteMask) {
+ old_index = dst->Index;
+ /* We need to set dst->Index here so get free temporary
+ * will work. */
+ dst->Index = cur_index++;
+ temp_index = rc_find_free_temporary(c);
+ rename_all(c, inst->Next, old_index,
+ dst->Index, temp_index);
+ }
+ assert(dst->Index < icount);
+ masks[dst->Index] |= dst->WriteMask;
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.h b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.h
new file mode 100644
index 00000000000..4323b995d84
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.h
@@ -0,0 +1,9 @@
+
+#ifndef RADEON_RENAME_REGS_H
+#define RADEON_RENAME_REGS_H
+
+struct radeon_compiler;
+
+void rc_rename_regs(struct radeon_compiler * c);
+
+#endif /* RADEON_RENAME_REGS_H */
diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
index 6992ca59dbf..e4b302bbad9 100644
--- a/src/mesa/drivers/dri/r300/r300_context.c
+++ b/src/mesa/drivers/dri/r300/r300_context.c
@@ -376,13 +376,12 @@ static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen)
ctx->Const.MaxDrawBuffers = 1;
ctx->Const.MaxColorAttachments = 1;
- /* currently bogus data */
if (r300->options.hw_tcl_enabled) {
- ctx->Const.VertexProgram.MaxNativeInstructions = VSF_MAX_FRAGMENT_LENGTH / 4;
- ctx->Const.VertexProgram.MaxNativeAluInstructions = VSF_MAX_FRAGMENT_LENGTH / 4;
- ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */
+ ctx->Const.VertexProgram.MaxNativeInstructions = 255;
+ ctx->Const.VertexProgram.MaxNativeAluInstructions = 255;
+ ctx->Const.VertexProgram.MaxNativeAttribs = 16;
ctx->Const.VertexProgram.MaxNativeTemps = 32;
- ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */
+ ctx->Const.VertexProgram.MaxNativeParameters = 256;
ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
}
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
index fbb609b9f61..99540e3354f 100644
--- a/src/mesa/drivers/dri/r300/r300_context.h
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -43,7 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "radeon_common.h"
#include "main/mtypes.h"
-#include "shader/prog_instruction.h"
+#include "program/prog_instruction.h"
#include "compiler/radeon_code.h"
struct r300_context;
diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c
index 282c0e18bca..5ae9f49840b 100644
--- a/src/mesa/drivers/dri/r300/r300_draw.c
+++ b/src/mesa/drivers/dri/r300/r300_draw.c
@@ -523,8 +523,7 @@ static void r300AllocDmaRegions(GLcontext *ctx, const struct gl_client_array *in
r300ConvertAttrib(ctx, count, input[i], &vbuf->attribs[index]);
} else {
if (input[i]->BufferObj->Name) {
- if (stride % 4 != 0) {
- assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0);
+ if (stride % 4 != 0 || (intptr_t)input[i]->Ptr % 4 != 0) {
r300AlignDataToDword(ctx, input[i], count, &vbuf->attribs[index]);
vbuf->attribs[index].is_named_bo = GL_FALSE;
} else {
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
index 7be2f74b5b2..95f4306f604 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
@@ -38,8 +38,8 @@
#include "r300_fragprog_common.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
#include "compiler/radeon_compiler.h"
diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h
index ac93563ed9e..f25264b6f2d 100644
--- a/src/mesa/drivers/dri/r300/r300_reg.h
+++ b/src/mesa/drivers/dri/r300/r300_reg.h
@@ -3066,8 +3066,8 @@ enum {
# define R500_FC_B_OP0_NONE (0 << 24)
# define R500_FC_B_OP0_DECR (1 << 24)
# define R500_FC_B_OP0_INCR (2 << 24)
-# define R500_FC_B_OP1_DECR (0 << 26)
-# define R500_FC_B_OP1_NONE (1 << 26)
+# define R500_FC_B_OP1_NONE (0 << 26)
+# define R500_FC_B_OP1_DECR (1 << 26)
# define R500_FC_B_OP1_INCR (2 << 26)
# define R500_FC_IGNORE_UNCOVERED (1 << 28)
#define R500_US_FC_INT_CONST_0 0x4c00
diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c
index 9c24166ec5b..a9bddf05779 100644
--- a/src/mesa/drivers/dri/r300/r300_shader.c
+++ b/src/mesa/drivers/dri/r300/r300_shader.c
@@ -27,7 +27,7 @@
#include "main/glheader.h"
-#include "shader/program.h"
+#include "program/program.h"
#include "tnl/tnl.h"
#include "r300_context.h"
#include "r300_fragprog_common.h"
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index fa33be49989..0113eecaa3a 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -49,8 +49,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "drivers/common/meta.h"
#include "swrast/swrast.h"
#include "swrast_setup/swrast_setup.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_statevars.h"
+#include "program/prog_parameter.h"
+#include "program/prog_statevars.h"
#include "vbo/vbo.h"
#include "tnl/tnl.h"
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c
index a1fe3780294..67d8b2b3286 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.c
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.c
@@ -31,12 +31,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
-#include "shader/program.h"
-#include "shader/programopt.h"
-#include "shader/prog_instruction.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
-#include "shader/prog_statevars.h"
+#include "program/program.h"
+#include "program/programopt.h"
+#include "program/prog_instruction.h"
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
+#include "program/prog_statevars.h"
#include "tnl/tnl.h"
#include "compiler/radeon_compiler.h"
diff --git a/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c
index 9f9dec840b4..471a3723cb9 100644
--- a/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c
+++ b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c
@@ -28,8 +28,8 @@
#include "radeon_mesa_to_rc.h"
#include "main/mtypes.h"
-#include "shader/prog_instruction.h"
-#include "shader/prog_parameter.h"
+#include "program/prog_instruction.h"
+#include "program/prog_parameter.h"
#include "compiler/radeon_compiler.h"
#include "compiler/radeon_program.h"
diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c
index afe2d55dc7c..8013553f679 100644
--- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c
+++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c
@@ -46,7 +46,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r600_context.h"
#include "radeon_reg.h"
#include "r600_cmdbuf.h"
-#include "r600_emit.h"
#include "radeon_bocs_wrapper.h"
#include "radeon_reg.h"
diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.h b/src/mesa/drivers/dri/r600/r600_cmdbuf.h
index dff00096999..78fccd0b601 100644
--- a/src/mesa/drivers/dri/r600/r600_cmdbuf.h
+++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.h
@@ -37,7 +37,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define __R600_CMDBUF_H__
#include "r600_context.h"
-#include "r600_emit.h"
#define RADEON_CP_PACKET3_NOP 0xC0001000
#define RADEON_CP_PACKET3_NEXT_CHAR 0xC0001900
diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c
index f4aed4e87fd..84d9d423124 100644
--- a/src/mesa/drivers/dri/r600/r600_context.c
+++ b/src/mesa/drivers/dri/r600/r600_context.c
@@ -59,7 +59,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "radeon_buffer_objects.h"
#include "radeon_span.h"
#include "r600_cmdbuf.h"
-#include "r600_emit.h"
#include "radeon_bocs_wrapper.h"
#include "radeon_queryobj.h"
#include "r600_blit.h"
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c
index de5c5d89fea..99a33df4fcb 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.c
+++ b/src/mesa/drivers/dri/r600/r700_assembler.c
@@ -32,7 +32,7 @@
#include "main/mtypes.h"
#include "main/imports.h"
-#include "shader/prog_parameter.h"
+#include "program/prog_parameter.h"
#include "radeon_debug.h"
#include "r600_context.h"
@@ -293,7 +293,9 @@ GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
case 2:
format = FMT_16_16; break;
case 3:
- format = FMT_16_16_16; break;
+ /* 3 comp GL_SHORT vertex format doesnt work on r700
+ 4 somehow works, test - sauerbraten */
+ format = FMT_16_16_16_16; break;
case 4:
format = FMT_16_16_16_16; break;
default:
@@ -1262,7 +1264,7 @@ GLboolean checkop3(r700_AssemblerBase* pAsm)
{
if( GL_FALSE == mov_temp(pAsm, 1) )
{
- return 1;
+ return GL_FALSE;
}
}
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h
index 2d3c32487e6..dbc6cdb1903 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.h
+++ b/src/mesa/drivers/dri/r600/r700_assembler.h
@@ -28,7 +28,7 @@
#define _R700_ASSEMBLER_H_
#include "main/mtypes.h"
-#include "shader/prog_instruction.h"
+#include "program/prog_instruction.h"
#include "r700_chip.h"
#include "r700_shaderinst.h"
diff --git a/src/mesa/drivers/dri/r600/r700_chip.h b/src/mesa/drivers/dri/r600/r700_chip.h
index ae249e15fd4..0b6b72f8501 100644
--- a/src/mesa/drivers/dri/r600/r700_chip.h
+++ b/src/mesa/drivers/dri/r600/r700_chip.h
@@ -27,7 +27,9 @@
#ifndef _R700_CHIP_H_
#define _R700_CHIP_H_
-#include "r600_context.h"
+#include <GL/gl.h>
+
+#include "radeon_common_context.h"
#include "r600_reg.h"
#include "r600_reg_auto_r6xx.h"
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c
index fbb808e0662..f9d84b6ed68 100644
--- a/src/mesa/drivers/dri/r600/r700_fragprog.c
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.c
@@ -32,12 +32,13 @@
#include <math.h>
#include "main/imports.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_statevars.h"
-#include "shader/program.h"
+#include "program/prog_parameter.h"
+#include "program/prog_statevars.h"
+#include "program/program.h"
#include "r600_context.h"
#include "r600_cmdbuf.h"
+#include "r600_emit.h"
#include "r700_fragprog.h"
@@ -586,7 +587,9 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_T, PNT_SPRITE_OVRD_Y_shift, PNT_SPRITE_OVRD_Y_mask);
SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_0, PNT_SPRITE_OVRD_Z_shift, PNT_SPRITE_OVRD_Z_mask);
SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_1, PNT_SPRITE_OVRD_W_shift, PNT_SPRITE_OVRD_W_mask);
- if(ctx->Point.SpriteOrigin == GL_LOWER_LEFT)
+ /* Like e.g. viewport and winding, point sprite coordinates are
+ * inverted when rendering to FBO. */
+ if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) == !ctx->DrawBuffer->Name)
SETbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit);
else
CLEARbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit);
diff --git a/src/mesa/drivers/dri/r600/r700_oglprog.c b/src/mesa/drivers/dri/r600/r700_oglprog.c
index b7124e644a3..83517925115 100644
--- a/src/mesa/drivers/dri/r600/r700_oglprog.c
+++ b/src/mesa/drivers/dri/r600/r700_oglprog.c
@@ -29,7 +29,7 @@
#include "main/glheader.h"
#include "main/imports.h"
-#include "shader/program.h"
+#include "program/program.h"
#include "tnl/tnl.h"
#include "r600_context.h"
diff --git a/src/mesa/drivers/dri/r600/r700_oglprog.h b/src/mesa/drivers/dri/r600/r700_oglprog.h
index fe2e9d19749..4d421338678 100644
--- a/src/mesa/drivers/dri/r600/r700_oglprog.h
+++ b/src/mesa/drivers/dri/r600/r700_oglprog.h
@@ -27,7 +27,7 @@
#ifndef _R700_OGLPROG_H_
#define _R700_OGLPROG_H_
-#include "r600_context.h"
+#include "main/dd.h"
extern void r700InitShaderFuncs(struct dd_function_table *functions);
diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c
index ac64bbf874f..5ea8918611c 100644
--- a/src/mesa/drivers/dri/r600/r700_state.c
+++ b/src/mesa/drivers/dri/r600/r700_state.c
@@ -41,8 +41,8 @@
#include "main/framebuffer.h"
#include "drivers/common/meta.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_statevars.h"
+#include "program/prog_parameter.h"
+#include "program/prog_statevars.h"
#include "vbo/vbo.h"
#include "r600_context.h"
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c
index 14dd2a5482c..137f3007ced 100644
--- a/src/mesa/drivers/dri/r600/r700_vertprog.c
+++ b/src/mesa/drivers/dri/r600/r700_vertprog.c
@@ -35,14 +35,15 @@
#include "main/mtypes.h"
#include "tnl/t_context.h"
-#include "shader/program.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_statevars.h"
+#include "program/program.h"
+#include "program/prog_parameter.h"
+#include "program/prog_statevars.h"
#include "radeon_debug.h"
#include "r600_context.h"
#include "r600_cmdbuf.h"
-#include "shader/programopt.h"
+#include "r600_emit.h"
+#include "program/programopt.h"
#include "r700_debug.h"
#include "r700_vertprog.h"
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c
index 94f476617b6..5a7d52c4d2f 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c
@@ -300,10 +300,10 @@ void radeonDestroyContext(__DRIcontext *driContextPriv )
_mesa_meta_free(radeon->glCtx);
if (radeon == current) {
- radeon_firevertices(radeon);
_mesa_make_current(NULL, NULL, NULL);
}
+ radeon_firevertices(radeon);
if (!is_empty_list(&radeon->dma.reserved)) {
rcommonFlushCmdBuf( radeon, __FUNCTION__ );
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
index 6cd1d87de24..c877e6c1765 100644
--- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
+++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
@@ -602,17 +602,17 @@ int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *t
__FUNCTION__, texObj ,t->minLod, t->maxLod);
radeon_mipmap_tree *dst_miptree;
- dst_miptree = get_biggest_matching_miptree(t, t->minLod, t->maxLod);
+ dst_miptree = get_biggest_matching_miptree(t, t->base.BaseLevel, t->base.MaxLevel);
+ radeon_miptree_unreference(&t->mt);
if (!dst_miptree) {
- radeon_miptree_unreference(&t->mt);
radeon_try_alloc_miptree(rmesa, t);
- dst_miptree = t->mt;
radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
"%s: No matching miptree found, allocated new one %p\n",
__FUNCTION__, t->mt);
} else {
+ radeon_miptree_reference(dst_miptree, &t->mt);
radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
"%s: Using miptree %p\n", __FUNCTION__, t->mt);
}
@@ -629,7 +629,7 @@ int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *t
"Checking image level %d, face %d, mt %p ... ",
level, face, img->mt);
- if (img->mt != dst_miptree) {
+ if (img->mt != t->mt) {
radeon_print(RADEON_TEXTURE, RADEON_TRACE,
"MIGRATING\n");
@@ -637,7 +637,7 @@ int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *t
if (src_bo && radeon_bo_is_referenced_by_cs(src_bo, rmesa->cmdbuf.cs)) {
radeon_firevertices(rmesa);
}
- migrate_image_to_miptree(dst_miptree, img, face, level);
+ migrate_image_to_miptree(t->mt, img, face, level);
} else
radeon_print(RADEON_TEXTURE, RADEON_TRACE, "OK\n");
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
index 4f59511a528..82107cc6aeb 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
@@ -52,7 +52,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "radeon_tex.h"
#elif defined(RADEON_R200)
#include "r200_context.h"
-#include "r200_ioctl.h"
#include "r200_tex.h"
#elif defined(RADEON_R300)
#include "r300_context.h"
@@ -338,12 +337,6 @@ static const __DRItexBufferExtension radeonTexBufferExtension = {
#endif
#if defined(RADEON_R200)
-static const __DRIallocateExtension r200AllocateExtension = {
- { __DRI_ALLOCATE, __DRI_ALLOCATE_VERSION },
- r200AllocateMemoryMESA,
- r200FreeMemoryMESA,
- r200GetMemoryOffsetMESA
-};
static const __DRItexOffsetExtension r200texOffsetExtension = {
{ __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
@@ -1209,7 +1202,6 @@ radeonCreateScreen( __DRIscreen *sPriv )
i = 0;
screen->extensions[i++] = &driCopySubBufferExtension.base;
- screen->extensions[i++] = &driFrameTrackingExtension.base;
screen->extensions[i++] = &driReadDrawableExtension;
if ( screen->irq != 0 ) {
@@ -1222,9 +1214,6 @@ radeonCreateScreen( __DRIscreen *sPriv )
#endif
#if defined(RADEON_R200)
- if (IS_R200_CLASS(screen))
- screen->extensions[i++] = &r200AllocateExtension.base;
-
screen->extensions[i++] = &r200texOffsetExtension.base;
#endif
@@ -1366,8 +1355,8 @@ radeonCreateScreen2(__DRIscreen *sPriv)
i = 0;
screen->extensions[i++] = &driCopySubBufferExtension.base;
- screen->extensions[i++] = &driFrameTrackingExtension.base;
screen->extensions[i++] = &driReadDrawableExtension;
+ screen->extensions[i++] = &dri2ConfigQueryExtension.base;
if ( screen->irq != 0 ) {
screen->extensions[i++] = &driSwapControlExtension.base;
@@ -1379,9 +1368,6 @@ radeonCreateScreen2(__DRIscreen *sPriv)
#endif
#if defined(RADEON_R200)
- if (IS_R200_CLASS(screen))
- screen->extensions[i++] = &r200AllocateExtension.base;
-
screen->extensions[i++] = &r200TexBufferExtension.base;
#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex_getimage.c b/src/mesa/drivers/dri/radeon/radeon_tex_getimage.c
index 3ababb1ef53..f878b48e5f9 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tex_getimage.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tex_getimage.c
@@ -31,6 +31,7 @@
#include "radeon_common_context.h"
#include "radeon_texture.h"
+#include "radeon_mipmap_tree.h"
#include "main/texgetimage.h"
@@ -51,7 +52,15 @@ radeon_get_tex_image(GLcontext * ctx, GLenum target, GLint level,
__func__, ctx, texObj, image, compressed);
if (image->mt) {
+ radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
/* Map the texture image read-only */
+ if (radeon_bo_is_referenced_by_cs(image->mt->bo, rmesa->cmdbuf.cs)) {
+ radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
+ "%s: called for texture that is queued for GPU processing\n",
+ __func__);
+ radeon_firevertices(rmesa);
+ }
+
radeon_teximage_map(image, GL_FALSE);
} else {
/* Image hasn't been uploaded to a miptree yet */
diff --git a/src/mesa/drivers/dri/sis/sis_state.c b/src/mesa/drivers/dri/sis/sis_state.c
index a22195ccceb..6173231a82e 100644
--- a/src/mesa/drivers/dri/sis/sis_state.c
+++ b/src/mesa/drivers/dri/sis/sis_state.c
@@ -37,6 +37,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "sis_lock.h"
#include "main/context.h"
+#include "main/macros.h"
#include "swrast/swrast.h"
#include "vbo/vbo.h"
#include "tnl/tnl.h"
diff --git a/src/mesa/drivers/dri/unichrome/via_screen.c b/src/mesa/drivers/dri/unichrome/via_screen.c
index ee10b569bf1..4b3e9d5a38f 100644
--- a/src/mesa/drivers/dri/unichrome/via_screen.c
+++ b/src/mesa/drivers/dri/unichrome/via_screen.c
@@ -166,7 +166,6 @@ viaInitDriver(__DRIscreen *sPriv)
viaScreen->sareaPrivOffset = gDRIPriv->sarea_priv_offset;
i = 0;
- viaScreen->extensions[i++] = &driFrameTrackingExtension.base;
viaScreen->extensions[i++] = &driReadDrawableExtension;
if ( viaScreen->irqEnabled ) {
viaScreen->extensions[i++] = &driSwapControlExtension.base;