aboutsummaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri
diff options
context:
space:
mode:
authorStéphane Marchesin <[email protected]>2011-08-26 17:37:25 -0700
committerStéphane Marchesin <[email protected]>2011-08-26 17:37:25 -0700
commitf8e6d19f3f40931be741b44d3edf210c38e13f0f (patch)
treee99e4c619901412ac6448534b0f57ce1c4295c6b /src/mesa/drivers/dri
parent974c49ed176de55aadb335a2956ef5dfec774a23 (diff)
parente3b0e3776646d0367206e4544229622eb22fe9f8 (diff)
Merge branch 'master' of git://anongit.freedesktop.org/mesa/mesa
Diffstat (limited to 'src/mesa/drivers/dri')
-rw-r--r--src/mesa/drivers/dri/common/xmlconfig.c2
-rw-r--r--src/mesa/drivers/dri/common/xmlpool.h2
-rw-r--r--src/mesa/drivers/dri/common/xmlpool/options.h60
-rw-r--r--src/mesa/drivers/dri/common/xmlpool/t_options.h30
-rw-r--r--src/mesa/drivers/dri/i915/i830_vtbl.c7
-rw-r--r--src/mesa/drivers/dri/i915/i915_fragprog.c21
-rw-r--r--src/mesa/drivers/dri/i915/i915_program.c14
-rw-r--r--src/mesa/drivers/dri/i965/Makefile6
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h72
-rw-r--r--src/mesa/drivers/dri/i965/brw_curbe.c27
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h141
-rw-r--r--src/mesa/drivers/dri/i965/brw_disasm.c49
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw_upload.c12
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h30
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c55
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp379
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h109
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_emit.cpp84
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp239
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp55
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp16
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp151
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c28
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.cpp59
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.h6
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_dump.c20
-rw-r--r--src/mesa/drivers/dri/i965/brw_tex_layout.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp161
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h489
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_emit.cpp854
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp234
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp2156
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c66
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.h5
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_constval.c12
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c63
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_state.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_vtbl.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c36
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c15
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_fp.c14
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass0.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_sampler_state.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_vs_state.c57
-rw-r--r--src/mesa/drivers/dri/i965/gen6_wm_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen7_sampler_state.c7
-rw-r--r--src/mesa/drivers/dri/i965/gen7_vs_state.c10
-rw-r--r--src/mesa/drivers/dri/i965/gen7_wm_state.c10
-rw-r--r--src/mesa/drivers/dri/intel/intel_batchbuffer.c34
-rw-r--r--src/mesa/drivers/dri/intel/intel_blit.c4
-rw-r--r--src/mesa/drivers/dri/intel/intel_buffer_objects.c104
-rw-r--r--src/mesa/drivers/dri/intel/intel_clear.c15
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.c16
-rw-r--r--src/mesa/drivers/dri/intel/intel_fbo.c45
-rw-r--r--src/mesa/drivers/dri/intel/intel_fbo.h1
-rw-r--r--src/mesa/drivers/dri/intel/intel_mipmap_tree.c2
-rw-r--r--src/mesa/drivers/dri/intel/intel_pixel_bitmap.c9
-rw-r--r--src/mesa/drivers/dri/intel/intel_reg.h1
-rw-r--r--src/mesa/drivers/dri/intel/intel_screen.h9
-rw-r--r--src/mesa/drivers/dri/intel/intel_span.c88
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex.c5
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_copy.c101
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_image.c35
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_obj.h5
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_subimage.c6
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_validate.c12
-rw-r--r--src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c24
-rw-r--r--src/mesa/drivers/dri/r200/r200_ioctl.c3
-rw-r--r--src/mesa/drivers/dri/r200/r200_tex.c1
-rw-r--r--src/mesa/drivers/dri/r200/r200_texstate.c6
-rw-r--r--src/mesa/drivers/dri/r200/r200_vertprog.c8
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_optimize.c35
-rw-r--r--src/mesa/drivers/dri/r300/r300_draw.c22
-rw-r--r--src/mesa/drivers/dri/r300/r300_tex.c1
-rw-r--r--src/mesa/drivers/dri/r300/r300_texstate.c5
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_fragprog.c8
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_render.c20
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_tex.c8
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_vertprog.c16
-rw-r--r--src/mesa/drivers/dri/r600/r600_cmdbuf.c2
-rw-r--r--src/mesa/drivers/dri/r600/r600_tex.c1
-rw-r--r--src/mesa/drivers/dri/r600/r600_texstate.c5
-rw-r--r--src/mesa/drivers/dri/r600/r700_fragprog.c8
-rw-r--r--src/mesa/drivers/dri/r600/r700_render.c20
-rw-r--r--src/mesa/drivers/dri/r600/r700_vertprog.c16
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h6
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_buffer_objects.c27
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common.c8
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common_context.c2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_cs_legacy.c2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_ioctl.c3
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_lock.c10
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_tex.c1
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_tex_copy.c55
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_texstate.c9
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_texture.c24
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_texture.h5
99 files changed, 5556 insertions, 1192 deletions
diff --git a/src/mesa/drivers/dri/common/xmlconfig.c b/src/mesa/drivers/dri/common/xmlconfig.c
index 77967ac2a43..12dd31bb162 100644
--- a/src/mesa/drivers/dri/common/xmlconfig.c
+++ b/src/mesa/drivers/dri/common/xmlconfig.c
@@ -567,7 +567,7 @@ static void parseOptInfoAttr (struct OptInfoData *data, const XML_Char **attr) {
} else
defaultVal = attrVal[OA_DEFAULT];
if (!parseValue (&cache->values[opt], cache->info[opt].type, defaultVal))
- XML_FATAL ("illegal default value: %s.", defaultVal);
+ XML_FATAL ("illegal default value for %s: %s.", cache->info[opt].name, defaultVal);
if (attrVal[OA_VALID]) {
if (cache->info[opt].type == DRI_BOOL)
diff --git a/src/mesa/drivers/dri/common/xmlpool.h b/src/mesa/drivers/dri/common/xmlpool.h
index 587517ea10a..ffea430024d 100644
--- a/src/mesa/drivers/dri/common/xmlpool.h
+++ b/src/mesa/drivers/dri/common/xmlpool.h
@@ -60,7 +60,7 @@
#define DRI_CONF_OPT_BEGIN(name,type,def) \
"<option name=\""#name"\" type=\""#type"\" default=\""#def"\">\n"
-/** \brief Begin an option definition with qouted default value */
+/** \brief Begin an option definition with quoted default value */
#define DRI_CONF_OPT_BEGIN_Q(name,type,def) \
"<option name=\""#name"\" type=\""#type"\" default="#def">\n"
diff --git a/src/mesa/drivers/dri/common/xmlpool/options.h b/src/mesa/drivers/dri/common/xmlpool/options.h
index d76595578c7..1e584ba086a 100644
--- a/src/mesa/drivers/dri/common/xmlpool/options.h
+++ b/src/mesa/drivers/dri/common/xmlpool/options.h
@@ -425,6 +425,66 @@ DRI_CONF_OPT_BEGIN(hyperz,bool,def) \
DRI_CONF_DESC(sv,"Använd HyperZ för att maximera prestandan") \
DRI_CONF_OPT_END
+#define DRI_CONF_PP_CELSHADE(def) \
+DRI_CONF_OPT_BEGIN_V(pp_celshade,enum,def,"0:1") \
+ DRI_CONF_DESC(en,"A post-processing filter to cel-shade the output") \
+ DRI_CONF_DESC(de,"A post-processing filter to cel-shade the output") \
+ DRI_CONF_DESC(es,"A post-processing filter to cel-shade the output") \
+ DRI_CONF_DESC(nl,"A post-processing filter to cel-shade the output") \
+ DRI_CONF_DESC(fr,"A post-processing filter to cel-shade the output") \
+ DRI_CONF_DESC(sv,"A post-processing filter to cel-shade the output") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NORED(def) \
+DRI_CONF_OPT_BEGIN_V(pp_nored,enum,def,"0:1") \
+ DRI_CONF_DESC(en,"A post-processing filter to remove the red channel") \
+ DRI_CONF_DESC(de,"A post-processing filter to remove the red channel") \
+ DRI_CONF_DESC(es,"A post-processing filter to remove the red channel") \
+ DRI_CONF_DESC(nl,"A post-processing filter to remove the red channel") \
+ DRI_CONF_DESC(fr,"A post-processing filter to remove the red channel") \
+ DRI_CONF_DESC(sv,"A post-processing filter to remove the red channel") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NOGREEN(def) \
+DRI_CONF_OPT_BEGIN_V(pp_nogreen,enum,def,"0:1") \
+ DRI_CONF_DESC(en,"A post-processing filter to remove the green channel") \
+ DRI_CONF_DESC(de,"A post-processing filter to remove the green channel") \
+ DRI_CONF_DESC(es,"A post-processing filter to remove the green channel") \
+ DRI_CONF_DESC(nl,"A post-processing filter to remove the green channel") \
+ DRI_CONF_DESC(fr,"A post-processing filter to remove the green channel") \
+ DRI_CONF_DESC(sv,"A post-processing filter to remove the green channel") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NOBLUE(def) \
+DRI_CONF_OPT_BEGIN_V(pp_noblue,enum,def,"0:1") \
+ DRI_CONF_DESC(en,"A post-processing filter to remove the blue channel") \
+ DRI_CONF_DESC(de,"A post-processing filter to remove the blue channel") \
+ DRI_CONF_DESC(es,"A post-processing filter to remove the blue channel") \
+ DRI_CONF_DESC(nl,"A post-processing filter to remove the blue channel") \
+ DRI_CONF_DESC(fr,"A post-processing filter to remove the blue channel") \
+ DRI_CONF_DESC(sv,"A post-processing filter to remove the blue channel") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_JIMENEZMLAA(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa,int,def, # min ":" # max ) \
+ DRI_CONF_DESC(en,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+ DRI_CONF_DESC(de,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+ DRI_CONF_DESC(es,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+ DRI_CONF_DESC(nl,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+ DRI_CONF_DESC(fr,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+ DRI_CONF_DESC(sv,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_JIMENEZMLAA_COLOR(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa_color,int,def, # min ":" # max ) \
+ DRI_CONF_DESC(en,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+ DRI_CONF_DESC(de,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+ DRI_CONF_DESC(es,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+ DRI_CONF_DESC(nl,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+ DRI_CONF_DESC(fr,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+ DRI_CONF_DESC(sv,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+DRI_CONF_OPT_END
+
#define DRI_CONF_MAX_TEXTURE_UNITS(def,min,max) \
DRI_CONF_OPT_BEGIN_V(texture_units,int,def, # min ":" # max ) \
DRI_CONF_DESC(en,"Number of texture units used") \
diff --git a/src/mesa/drivers/dri/common/xmlpool/t_options.h b/src/mesa/drivers/dri/common/xmlpool/t_options.h
index 5fd6ec65bf8..2427aa77f5b 100644
--- a/src/mesa/drivers/dri/common/xmlpool/t_options.h
+++ b/src/mesa/drivers/dri/common/xmlpool/t_options.h
@@ -191,6 +191,36 @@ DRI_CONF_OPT_BEGIN(hyperz,bool,def) \
DRI_CONF_DESC(en,gettext("Use HyperZ to boost performance")) \
DRI_CONF_OPT_END
+#define DRI_CONF_PP_CELSHADE(def) \
+DRI_CONF_OPT_BEGIN_V(pp_celshade,enum,def,"0:1") \
+ DRI_CONF_DESC(en,gettext("A post-processing filter to cel-shade the output")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NORED(def) \
+DRI_CONF_OPT_BEGIN_V(pp_nored,enum,def,"0:1") \
+ DRI_CONF_DESC(en,gettext("A post-processing filter to remove the red channel")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NOGREEN(def) \
+DRI_CONF_OPT_BEGIN_V(pp_nogreen,enum,def,"0:1") \
+ DRI_CONF_DESC(en,gettext("A post-processing filter to remove the green channel")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NOBLUE(def) \
+DRI_CONF_OPT_BEGIN_V(pp_noblue,enum,def,"0:1") \
+ DRI_CONF_DESC(en,gettext("A post-processing filter to remove the blue channel")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_JIMENEZMLAA(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa,int,def, # min ":" # max ) \
+ DRI_CONF_DESC(en,gettext("Morphological anti-aliasing based on Jimenez\\\' MLAA. 0 to disable, 8 for default quality")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_JIMENEZMLAA_COLOR(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa_color,int,def, # min ":" # max ) \
+ DRI_CONF_DESC(en,gettext("Morphological anti-aliasing based on Jimenez\\\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps")) \
+DRI_CONF_OPT_END
+
#define DRI_CONF_MAX_TEXTURE_UNITS(def,min,max) \
DRI_CONF_OPT_BEGIN_V(texture_units,int,def, # min ":" # max ) \
DRI_CONF_DESC(en,gettext("Number of texture units used")) \
diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c
index 6d43726beb1..ed5286fd7d9 100644
--- a/src/mesa/drivers/dri/i915/i830_vtbl.c
+++ b/src/mesa/drivers/dri/i915/i830_vtbl.c
@@ -881,6 +881,12 @@ i830_invalidate_state(struct intel_context *intel, GLuint new_state)
i830_update_provoking_vertex(&intel->ctx);
}
+static bool
+i830_is_hiz_depth_format(struct intel_context *intel, gl_format format)
+{
+ return false;
+}
+
void
i830InitVtbl(struct i830_context *i830)
{
@@ -898,4 +904,5 @@ i830InitVtbl(struct i830_context *i830)
i830->intel.vtbl.finish_batch = intel_finish_vb;
i830->intel.vtbl.invalidate_state = i830_invalidate_state;
i830->intel.vtbl.render_target_supported = i830_render_target_supported;
+ i830->intel.vtbl.is_hiz_depth_format = i830_is_hiz_depth_format;
}
diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c
index 6e1d7092237..d155b85ffca 100644
--- a/src/mesa/drivers/dri/i915/i915_fragprog.c
+++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
@@ -175,10 +175,8 @@ src_vector(struct i915_fragment_program *p,
case PROGRAM_STATE_VAR:
case PROGRAM_NAMED_PARAM:
case PROGRAM_UNIFORM:
- src =
- i915_emit_param4fv(p,
- program->Base.Parameters->ParameterValues[source->
- Index]);
+ src = i915_emit_param4fv(p,
+ &program->Base.Parameters->ParameterValues[source->Index][0].f);
break;
default:
@@ -303,7 +301,7 @@ do { \
/*
* TODO: consider moving this into core
*/
-static void calc_live_regs( struct i915_fragment_program *p )
+static bool calc_live_regs( struct i915_fragment_program *p )
{
const struct gl_fragment_program *program = &p->FragProg;
GLuint regsUsed = 0xffff0000;
@@ -317,6 +315,9 @@ static void calc_live_regs( struct i915_fragment_program *p )
/* Register is written to: unmark as live for this and preceeding ops */
if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+ if (inst->DstReg.Index > 16)
+ return false;
+
live_components[inst->DstReg.Index] &= ~inst->DstReg.WriteMask;
if (live_components[inst->DstReg.Index] == 0)
regsUsed &= ~(1 << inst->DstReg.Index);
@@ -327,6 +328,9 @@ static void calc_live_regs( struct i915_fragment_program *p )
if (inst->SrcReg[a].File == PROGRAM_TEMPORARY) {
unsigned c;
+ if (inst->SrcReg[a].Index > 16)
+ return false;
+
regsUsed |= 1 << inst->SrcReg[a].Index;
for (c = 0; c < 4; c++) {
@@ -340,6 +344,8 @@ static void calc_live_regs( struct i915_fragment_program *p )
p->usedRegs[i] = regsUsed;
}
+
+ return true;
}
static GLuint get_live_regs( struct i915_fragment_program *p,
@@ -394,7 +400,10 @@ upload_program(struct i915_fragment_program *p)
/* Not always needed:
*/
- calc_live_regs(p);
+ if (!calc_live_regs(p)) {
+ i915_program_error(p, "Could not allocate registers");
+ return;
+ }
while (1) {
GLuint src0, src1, src2, flags;
diff --git a/src/mesa/drivers/dri/i915/i915_program.c b/src/mesa/drivers/dri/i915/i915_program.c
index ca1949b223e..0a600d30bef 100644
--- a/src/mesa/drivers/dri/i915/i915_program.c
+++ b/src/mesa/drivers/dri/i915/i915_program.c
@@ -442,14 +442,16 @@ i915_emit_param4fv(struct i915_fragment_program * p, const GLfloat * values)
void
i915_program_error(struct i915_fragment_program *p, const char *fmt, ...)
{
- va_list args;
+ if (unlikely((INTEL_DEBUG & (DEBUG_WM | DEBUG_FALLBACKS)) != 0)) {
+ va_list args;
- fprintf(stderr, "i915_program_error: ");
- va_start(args, fmt);
- vfprintf(stderr, fmt, args);
- va_end(args);
+ fprintf(stderr, "i915_program_error: ");
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
- fprintf(stderr, "\n");
+ fprintf(stderr, "\n");
+ }
p->error = 1;
}
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index 44f28cd9d15..d9c885da65b 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -124,7 +124,11 @@ CXX_SOURCES = \
brw_fs_reg_allocate.cpp \
brw_fs_schedule_instructions.cpp \
brw_fs_vector_splitting.cpp \
- brw_shader.cpp
+ brw_shader.cpp \
+ brw_vec4.cpp \
+ brw_vec4_emit.cpp \
+ brw_vec4_reg_allocate.cpp \
+ brw_vec4_visitor.cpp
ASM_SOURCES =
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 471015cf9d0..df63fe1d52c 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -212,6 +212,7 @@ enum state_struct_type {
AUB_TRACE_BINDING_TABLE = 0x101,
AUB_TRACE_SURFACE_STATE = 0x102,
AUB_TRACE_VS_CONSTANTS = 0x103,
+ AUB_TRACE_WM_CONSTANTS = 0x104,
};
/** Subclass of Mesa vertex program */
@@ -247,6 +248,7 @@ enum param_conversion {
PARAM_CONVERT_F2I,
PARAM_CONVERT_F2U,
PARAM_CONVERT_F2B,
+ PARAM_CONVERT_ZERO,
};
/* Data about a particular attempt to compile a program. Note that
@@ -310,12 +312,20 @@ struct brw_vs_prog_data {
GLuint total_grf;
GLbitfield64 outputs_written;
GLuint nr_params; /**< number of float params/constants */
+ GLuint total_scratch;
GLuint inputs_read;
/* Used for calculating urb partitions:
*/
GLuint urb_entry_size;
+
+ const float *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */
+ enum param_conversion param_convert[MAX_UNIFORMS * 4];
+ const float *pull_param[MAX_UNIFORMS * 4];
+ enum param_conversion pull_param_convert[MAX_UNIFORMS * 4];
+
+ bool uses_new_param_layout;
};
@@ -528,7 +538,7 @@ struct brw_context
* the CURBE, the depth buffer, and a query BO.
*/
drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + BRW_WM_MAX_SURF + 16];
- int validated_bo_count;
+ unsigned int validated_bo_count;
} state;
struct brw_cache cache;
@@ -662,6 +672,7 @@ struct brw_context
struct brw_vs_prog_data *prog_data;
int8_t *constant_map; /* variable array following prog_data */
+ drm_intel_bo *scratch_bo;
drm_intel_bo *const_bo;
/** Offset in the program cache to the VS program */
uint32_t prog_offset;
@@ -674,6 +685,23 @@ struct brw_context
uint32_t push_const_offset; /* Offset in the batchbuffer */
int push_const_size; /* in 256-bit register increments */
+
+ /** @{ register allocator */
+
+ struct ra_regs *regs;
+
+ /**
+ * Array of the ra classes for the unaligned contiguous register
+ * block sizes used.
+ */
+ int *classes;
+
+ /**
+ * Mapping for register-allocated objects in *regs to the first
+ * GRF for that object.
+ */
+ uint8_t *ra_reg_to_grf;
+ /** @} */
} vs;
struct {
@@ -726,7 +754,6 @@ struct brw_context
GLuint render_surf;
GLuint nr_surfaces;
- GLuint max_threads;
drm_intel_bo *scratch_bo;
GLuint sampler_count;
@@ -747,6 +774,29 @@ struct brw_context
* Pre-gen6, push constants live in the CURBE.
*/
uint32_t push_const_offset;
+
+ /** @{ register allocator */
+
+ struct ra_regs *regs;
+
+ /** Array of the ra classes for the unaligned contiguous
+ * register block sizes used.
+ */
+ int *classes;
+
+ /**
+ * Mapping for register-allocated objects in *regs to the first
+ * GRF for that object.
+ */
+ uint8_t *ra_reg_to_grf;
+
+ /**
+ * ra class for the aligned pairs we use for PLN, which doesn't
+ * appear in *classes.
+ */
+ int aligned_pairs_class;
+
+ /** @} */
} wm;
@@ -827,6 +877,10 @@ void brw_validate_textures( struct brw_context *brw );
*/
void brwInitFragProgFuncs( struct dd_function_table *functions );
+int brw_get_scratch_size(int size);
+void brw_get_scratch_bo(struct intel_context *intel,
+ drm_intel_bo **scratch_bo, int size);
+
/* brw_urb.c
*/
@@ -874,7 +928,7 @@ brw_fragment_program_const(const struct gl_fragment_program *p)
}
static inline
-float convert_param(enum param_conversion conversion, float param)
+float convert_param(enum param_conversion conversion, const float *param)
{
union {
float f;
@@ -884,21 +938,23 @@ float convert_param(enum param_conversion conversion, float param)
switch (conversion) {
case PARAM_NO_CONVERT:
- return param;
+ return *param;
case PARAM_CONVERT_F2I:
- fi.i = param;
+ fi.i = *param;
return fi.f;
case PARAM_CONVERT_F2U:
- fi.u = param;
+ fi.u = *param;
return fi.f;
case PARAM_CONVERT_F2B:
- if (param != 0.0)
+ if (*param != 0.0)
fi.i = 1;
else
fi.i = 0;
return fi.f;
+ case PARAM_CONVERT_ZERO:
+ return 0.0;
default:
- return param;
+ return *param;
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index ae11c487a2c..960be10006e 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -203,7 +203,7 @@ static void prepare_constant_buffer(struct brw_context *brw)
/* copy float constants */
for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
buf[offset + i] = convert_param(brw->wm.prog_data->param_convert[i],
- *brw->wm.prog_data->param[i]);
+ brw->wm.prog_data->param[i]);
}
}
@@ -244,15 +244,22 @@ static void prepare_constant_buffer(struct brw_context *brw)
GLuint offset = brw->curbe.vs_start * 16;
GLuint nr = brw->vs.prog_data->nr_params / 4;
- /* Load the subset of push constants that will get used when
- * we also have a pull constant buffer.
- */
- for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
- if (brw->vs.constant_map[i] != -1) {
- assert(brw->vs.constant_map[i] <= nr);
- memcpy(buf + offset + brw->vs.constant_map[i] * 4,
- vp->program.Base.Parameters->ParameterValues[i],
- 4 * sizeof(float));
+ if (brw->vs.prog_data->uses_new_param_layout) {
+ for (i = 0; i < brw->vs.prog_data->nr_params; i++) {
+ buf[offset + i] = convert_param(brw->vs.prog_data->param_convert[i],
+ brw->vs.prog_data->param[i]);
+ }
+ } else {
+ /* Load the subset of push constants that will get used when
+ * we also have a pull constant buffer.
+ */
+ for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
+ if (brw->vs.constant_map[i] != -1) {
+ assert(brw->vs.constant_map[i] <= nr);
+ memcpy(buf + offset + brw->vs.constant_map[i] * 4,
+ vp->program.Base.Parameters->ParameterValues[i],
+ 4 * sizeof(float));
+ }
}
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 0a3027d04ad..d1799c0ab4f 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -557,58 +557,93 @@
#define BRW_WE_ALL 1
/** @} */
-#define BRW_OPCODE_MOV 1
-#define BRW_OPCODE_SEL 2
-#define BRW_OPCODE_NOT 4
-#define BRW_OPCODE_AND 5
-#define BRW_OPCODE_OR 6
-#define BRW_OPCODE_XOR 7
-#define BRW_OPCODE_SHR 8
-#define BRW_OPCODE_SHL 9
-#define BRW_OPCODE_RSR 10
-#define BRW_OPCODE_RSL 11
-#define BRW_OPCODE_ASR 12
-#define BRW_OPCODE_CMP 16
-#define BRW_OPCODE_CMPN 17
-#define BRW_OPCODE_JMPI 32
-#define BRW_OPCODE_IF 34
-#define BRW_OPCODE_IFF 35
-#define BRW_OPCODE_ELSE 36
-#define BRW_OPCODE_ENDIF 37
-#define BRW_OPCODE_DO 38
-#define BRW_OPCODE_WHILE 39
-#define BRW_OPCODE_BREAK 40
-#define BRW_OPCODE_CONTINUE 41
-#define BRW_OPCODE_HALT 42
-#define BRW_OPCODE_MSAVE 44
-#define BRW_OPCODE_MRESTORE 45
-#define BRW_OPCODE_PUSH 46
-#define BRW_OPCODE_POP 47
-#define BRW_OPCODE_WAIT 48
-#define BRW_OPCODE_SEND 49
-#define BRW_OPCODE_SENDC 50
-#define BRW_OPCODE_MATH 56
-#define BRW_OPCODE_ADD 64
-#define BRW_OPCODE_MUL 65
-#define BRW_OPCODE_AVG 66
-#define BRW_OPCODE_FRC 67
-#define BRW_OPCODE_RNDU 68
-#define BRW_OPCODE_RNDD 69
-#define BRW_OPCODE_RNDE 70
-#define BRW_OPCODE_RNDZ 71
-#define BRW_OPCODE_MAC 72
-#define BRW_OPCODE_MACH 73
-#define BRW_OPCODE_LZD 74
-#define BRW_OPCODE_SAD2 80
-#define BRW_OPCODE_SADA2 81
-#define BRW_OPCODE_DP4 84
-#define BRW_OPCODE_DPH 85
-#define BRW_OPCODE_DP3 86
-#define BRW_OPCODE_DP2 87
-#define BRW_OPCODE_DPA2 88
-#define BRW_OPCODE_LINE 89
-#define BRW_OPCODE_PLN 90
-#define BRW_OPCODE_NOP 126
+enum opcode {
+ /* These are the actual hardware opcodes. */
+ BRW_OPCODE_MOV = 1,
+ BRW_OPCODE_SEL = 2,
+ BRW_OPCODE_NOT = 4,
+ BRW_OPCODE_AND = 5,
+ BRW_OPCODE_OR = 6,
+ BRW_OPCODE_XOR = 7,
+ BRW_OPCODE_SHR = 8,
+ BRW_OPCODE_SHL = 9,
+ BRW_OPCODE_RSR = 10,
+ BRW_OPCODE_RSL = 11,
+ BRW_OPCODE_ASR = 12,
+ BRW_OPCODE_CMP = 16,
+ BRW_OPCODE_CMPN = 17,
+ BRW_OPCODE_JMPI = 32,
+ BRW_OPCODE_IF = 34,
+ BRW_OPCODE_IFF = 35,
+ BRW_OPCODE_ELSE = 36,
+ BRW_OPCODE_ENDIF = 37,
+ BRW_OPCODE_DO = 38,
+ BRW_OPCODE_WHILE = 39,
+ BRW_OPCODE_BREAK = 40,
+ BRW_OPCODE_CONTINUE = 41,
+ BRW_OPCODE_HALT = 42,
+ BRW_OPCODE_MSAVE = 44,
+ BRW_OPCODE_MRESTORE = 45,
+ BRW_OPCODE_PUSH = 46,
+ BRW_OPCODE_POP = 47,
+ BRW_OPCODE_WAIT = 48,
+ BRW_OPCODE_SEND = 49,
+ BRW_OPCODE_SENDC = 50,
+ BRW_OPCODE_MATH = 56,
+ BRW_OPCODE_ADD = 64,
+ BRW_OPCODE_MUL = 65,
+ BRW_OPCODE_AVG = 66,
+ BRW_OPCODE_FRC = 67,
+ BRW_OPCODE_RNDU = 68,
+ BRW_OPCODE_RNDD = 69,
+ BRW_OPCODE_RNDE = 70,
+ BRW_OPCODE_RNDZ = 71,
+ BRW_OPCODE_MAC = 72,
+ BRW_OPCODE_MACH = 73,
+ BRW_OPCODE_LZD = 74,
+ BRW_OPCODE_SAD2 = 80,
+ BRW_OPCODE_SADA2 = 81,
+ BRW_OPCODE_DP4 = 84,
+ BRW_OPCODE_DPH = 85,
+ BRW_OPCODE_DP3 = 86,
+ BRW_OPCODE_DP2 = 87,
+ BRW_OPCODE_DPA2 = 88,
+ BRW_OPCODE_LINE = 89,
+ BRW_OPCODE_PLN = 90,
+ BRW_OPCODE_NOP = 126,
+
+ /* These are compiler backend opcodes that get translated into other
+ * instructions.
+ */
+ FS_OPCODE_FB_WRITE = 128,
+ SHADER_OPCODE_RCP,
+ SHADER_OPCODE_RSQ,
+ SHADER_OPCODE_SQRT,
+ SHADER_OPCODE_EXP2,
+ SHADER_OPCODE_LOG2,
+ SHADER_OPCODE_POW,
+ SHADER_OPCODE_SIN,
+ SHADER_OPCODE_COS,
+ FS_OPCODE_DDX,
+ FS_OPCODE_DDY,
+ FS_OPCODE_PIXEL_X,
+ FS_OPCODE_PIXEL_Y,
+ FS_OPCODE_CINTERP,
+ FS_OPCODE_LINTERP,
+ FS_OPCODE_TEX,
+ FS_OPCODE_TXB,
+ FS_OPCODE_TXD,
+ FS_OPCODE_TXL,
+ FS_OPCODE_TXS,
+ FS_OPCODE_DISCARD,
+ FS_OPCODE_SPILL,
+ FS_OPCODE_UNSPILL,
+ FS_OPCODE_PULL_CONSTANT_LOAD,
+
+ VS_OPCODE_URB_WRITE,
+ VS_OPCODE_SCRATCH_READ,
+ VS_OPCODE_SCRATCH_WRITE,
+};
#define BRW_PREDICATE_NONE 0
#define BRW_PREDICATE_NORMAL 1
@@ -734,7 +769,6 @@
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1
#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
-#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2
#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2
#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3
#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3
@@ -747,6 +781,7 @@
#define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS 4
#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5
#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE 6
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10
/* for GEN5 only */
#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index af41c848308..927b0b4acc9 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -309,6 +309,35 @@ char *target_function[16] = {
[BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner"
};
+char *target_function_gen6[16] = {
+ [BRW_MESSAGE_TARGET_NULL] = "null",
+ [BRW_MESSAGE_TARGET_MATH] = "math",
+ [BRW_MESSAGE_TARGET_SAMPLER] = "sampler",
+ [BRW_MESSAGE_TARGET_GATEWAY] = "gateway",
+ [GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE] = "sampler",
+ [GEN6_MESSAGE_TARGET_DP_RENDER_CACHE] = "render",
+ [GEN6_MESSAGE_TARGET_DP_CONST_CACHE] = "const",
+ [BRW_MESSAGE_TARGET_URB] = "urb",
+ [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner"
+};
+
+char *dp_rc_msg_type_gen6[16] = {
+ [BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ] = "OWORD block read",
+ [GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ] = "RT UNORM read",
+ [GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ] = "OWORD dual block read",
+ [GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ] = "media block read",
+ [GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ] = "OWORD unaligned block read",
+ [GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ] = "DWORD scattered read",
+ [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE] = "DWORD atomic write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE] = "OWORD block write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE] = "OWORD dual block write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE] = "media block write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE] = "DWORD scattered write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE] = "RT write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE] = "streamed VB write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE] = "RT UNORMc write",
+};
+
char *math_function[16] = {
[BRW_MATH_FUNCTION_INV] = "inv",
[BRW_MATH_FUNCTION_LOG] = "log",
@@ -927,8 +956,14 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
newline (file);
pad (file, 16);
space = 0;
- err |= control (file, "target function", target_function,
- target, &space);
+
+ if (gen >= 6) {
+ err |= control (file, "target function", target_function_gen6,
+ target, &space);
+ } else {
+ err |= control (file, "target function", target_function,
+ target, &space);
+ }
switch (target) {
case BRW_MESSAGE_TARGET_MATH:
@@ -985,9 +1020,16 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
inst->bits3.dp_read.msg_type);
}
break;
+
case BRW_MESSAGE_TARGET_DATAPORT_WRITE:
if (gen >= 6) {
- format (file, " (%d, %d, %d, %d, %d, %d)",
+ format (file, " (");
+
+ err |= control (file, "DP rc message type",
+ dp_rc_msg_type_gen6,
+ inst->bits3.gen6_dp.msg_type, &space);
+
+ format (file, ", %d, %d, %d, %d, %d, %d)",
inst->bits3.gen6_dp.binding_table_index,
inst->bits3.gen6_dp.msg_control,
inst->bits3.gen6_dp.msg_type,
@@ -1003,6 +1045,7 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
inst->bits3.dp_write.send_commit_msg);
}
break;
+
case BRW_MESSAGE_TARGET_URB:
if (gen >= 5) {
format (file, " %d", inst->bits3.urb_gen5.offset);
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 56a46ced6e3..7bc69c612e3 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -689,17 +689,17 @@ static void brw_prepare_indices(struct brw_context *brw)
* rebase it into a temporary.
*/
if ((get_size(index_buffer->type) - 1) & offset) {
- GLubyte *map = ctx->Driver.MapBuffer(ctx,
- GL_ELEMENT_ARRAY_BUFFER_ARB,
- GL_DYNAMIC_DRAW_ARB,
- bufferobj);
- map += offset;
+ GLubyte *map = ctx->Driver.MapBufferRange(ctx,
+ offset,
+ ib_size,
+ GL_MAP_WRITE_BIT,
+ bufferobj);
intel_upload_data(&brw->intel, map, ib_size, ib_type_size,
&bo, &offset);
brw->ib.start_vertex_offset = offset / ib_type_size;
- ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj);
+ ctx->Driver.UnmapBuffer(ctx, bufferobj);
} else {
/* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading
* the index buffer state when we're just moving the start index
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 72d50eadbce..af50305fc2b 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -44,6 +44,9 @@
#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
+#define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1)
+#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2)
+#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3)
#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
@@ -798,6 +801,12 @@ void brw_init_compile(struct brw_context *, struct brw_compile *p,
void *mem_ctx);
const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz );
+struct brw_instruction *brw_next_insn(struct brw_compile *p, GLuint opcode);
+void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
+ struct brw_reg dest);
+void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
+ struct brw_reg reg);
+
/* Helpers for regular instructions:
*/
@@ -852,6 +861,27 @@ ROUND(RNDE)
/* Helpers for SEND instruction:
*/
+void brw_set_dp_read_message(struct brw_compile *p,
+ struct brw_instruction *insn,
+ GLuint binding_table_index,
+ GLuint msg_control,
+ GLuint msg_type,
+ GLuint target_cache,
+ GLuint msg_length,
+ GLuint response_length);
+
+void brw_set_dp_write_message(struct brw_compile *p,
+ struct brw_instruction *insn,
+ GLuint binding_table_index,
+ GLuint msg_control,
+ GLuint msg_type,
+ GLuint msg_length,
+ GLboolean header_present,
+ GLuint pixel_scoreboard_clear,
+ GLuint response_length,
+ GLuint end_of_thread,
+ GLuint send_commit_msg);
+
void brw_urb_WRITE(struct brw_compile *p,
struct brw_reg dest,
GLuint msg_reg_nr,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index e7370f36064..c5013de7ec1 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -89,9 +89,9 @@ gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
}
-static void brw_set_dest(struct brw_compile *p,
- struct brw_instruction *insn,
- struct brw_reg dest)
+void
+brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
+ struct brw_reg dest)
{
if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
dest.file != BRW_MESSAGE_REGISTER_FILE)
@@ -221,9 +221,9 @@ validate_reg(struct brw_instruction *insn, struct brw_reg reg)
/* 10. Check destination issues. */
}
-static void brw_set_src0(struct brw_compile *p,
- struct brw_instruction *insn,
- struct brw_reg reg)
+void
+brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
+ struct brw_reg reg)
{
if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
assert(reg.nr < 128);
@@ -504,17 +504,18 @@ static void brw_set_urb_message( struct brw_compile *p,
}
}
-static void brw_set_dp_write_message( struct brw_compile *p,
- struct brw_instruction *insn,
- GLuint binding_table_index,
- GLuint msg_control,
- GLuint msg_type,
- GLuint msg_length,
- GLboolean header_present,
- GLuint pixel_scoreboard_clear,
- GLuint response_length,
- GLuint end_of_thread,
- GLuint send_commit_msg)
+void
+brw_set_dp_write_message(struct brw_compile *p,
+ struct brw_instruction *insn,
+ GLuint binding_table_index,
+ GLuint msg_control,
+ GLuint msg_type,
+ GLuint msg_length,
+ GLboolean header_present,
+ GLuint pixel_scoreboard_clear,
+ GLuint response_length,
+ GLuint end_of_thread,
+ GLuint send_commit_msg)
{
struct brw_context *brw = p->brw;
struct intel_context *intel = &brw->intel;
@@ -570,7 +571,7 @@ static void brw_set_dp_write_message( struct brw_compile *p,
}
}
-static void
+void
brw_set_dp_read_message(struct brw_compile *p,
struct brw_instruction *insn,
GLuint binding_table_index,
@@ -709,9 +710,9 @@ static void brw_set_sampler_message(struct brw_compile *p,
}
-
-static struct brw_instruction *next_insn( struct brw_compile *p,
- GLuint opcode )
+#define next_insn brw_next_insn
+struct brw_instruction *
+brw_next_insn(struct brw_compile *p, GLuint opcode)
{
struct brw_instruction *insn;
@@ -732,7 +733,6 @@ static struct brw_instruction *next_insn( struct brw_compile *p,
return insn;
}
-
static struct brw_instruction *brw_alu1( struct brw_compile *p,
GLuint opcode,
struct brw_reg dest,
@@ -1341,8 +1341,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
brw_set_src1(p, insn, brw_imm_ud(0));
insn->bits3.break_cont.jip = br * (do_insn - insn);
- insn->header.execution_size = do_insn->header.execution_size;
- assert(insn->header.execution_size == BRW_EXECUTE_8);
+ insn->header.execution_size = BRW_EXECUTE_8;
} else if (intel->gen == 6) {
insn = next_insn(p, BRW_OPCODE_WHILE);
@@ -1351,8 +1350,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- insn->header.execution_size = do_insn->header.execution_size;
- assert(insn->header.execution_size == BRW_EXECUTE_8);
+ insn->header.execution_size = BRW_EXECUTE_8;
} else {
if (p->single_program_flow) {
insn = next_insn(p, BRW_OPCODE_ADD);
@@ -2246,10 +2244,13 @@ void brw_urb_WRITE(struct brw_compile *p,
if (intel->gen == 7) {
/* Enable Channel Masks in the URB_WRITE_HWORD message header */
+ brw_push_insn_state(p);
+ brw_set_access_mode(p, BRW_ALIGN_1);
brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
BRW_REGISTER_TYPE_UD),
retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
brw_imm_ud(0xff00));
+ brw_pop_insn_state(p);
}
insn = next_insn(p, BRW_OPCODE_SEND);
@@ -2311,7 +2312,7 @@ brw_find_loop_end(struct brw_compile *p, int start)
if (insn->header.opcode == BRW_OPCODE_WHILE) {
int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
: insn->bits3.break_cont.jip;
- if (ip + jip / br < start)
+ if (ip + jip / br <= start)
return ip;
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index b5ea943387d..0b0445ea142 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -143,20 +143,21 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
return 0;
switch (inst->opcode) {
- case FS_OPCODE_RCP:
- case FS_OPCODE_RSQ:
- case FS_OPCODE_SQRT:
- case FS_OPCODE_EXP2:
- case FS_OPCODE_LOG2:
- case FS_OPCODE_SIN:
- case FS_OPCODE_COS:
+ case SHADER_OPCODE_RCP:
+ case SHADER_OPCODE_RSQ:
+ case SHADER_OPCODE_SQRT:
+ case SHADER_OPCODE_EXP2:
+ case SHADER_OPCODE_LOG2:
+ case SHADER_OPCODE_SIN:
+ case SHADER_OPCODE_COS:
return 1 * c->dispatch_width / 8;
- case FS_OPCODE_POW:
+ case SHADER_OPCODE_POW:
return 2 * c->dispatch_width / 8;
case FS_OPCODE_TEX:
case FS_OPCODE_TXB:
case FS_OPCODE_TXD:
case FS_OPCODE_TXL:
+ case FS_OPCODE_TXS:
return 1;
case FS_OPCODE_FB_WRITE:
return 2;
@@ -181,29 +182,26 @@ fs_visitor::virtual_grf_alloc(int size)
virtual_grf_array_size *= 2;
virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
virtual_grf_array_size);
-
- /* This slot is always unused. */
- virtual_grf_sizes[0] = 0;
}
virtual_grf_sizes[virtual_grf_next] = size;
return virtual_grf_next++;
}
/** Fixed HW reg constructor. */
-fs_reg::fs_reg(enum register_file file, int hw_reg)
+fs_reg::fs_reg(enum register_file file, int reg)
{
init();
this->file = file;
- this->hw_reg = hw_reg;
+ this->reg = reg;
this->type = BRW_REGISTER_TYPE_F;
}
/** Fixed HW reg constructor. */
-fs_reg::fs_reg(enum register_file file, int hw_reg, uint32_t type)
+fs_reg::fs_reg(enum register_file file, int reg, uint32_t type)
{
init();
this->file = file;
- this->hw_reg = hw_reg;
+ this->reg = reg;
this->type = type;
}
@@ -242,11 +240,12 @@ import_uniforms_callback(const void *key,
* This brings in those uniform definitions
*/
void
-fs_visitor::import_uniforms(struct hash_table *src_variable_ht)
+fs_visitor::import_uniforms(fs_visitor *v)
{
- hash_table_call_foreach(src_variable_ht,
+ hash_table_call_foreach(v->variable_ht,
import_uniforms_callback,
variable_ht);
+ this->params_remap = v->params_remap;
}
/* Our support for uniforms is piggy-backed on the struct
@@ -281,23 +280,27 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
assert(param < ARRAY_SIZE(c->prog_data.param));
- switch (type->base_type) {
- case GLSL_TYPE_FLOAT:
+ if (ctx->Const.NativeIntegers) {
c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
- break;
- case GLSL_TYPE_UINT:
- c->prog_data.param_convert[param] = PARAM_CONVERT_F2U;
- break;
- case GLSL_TYPE_INT:
- c->prog_data.param_convert[param] = PARAM_CONVERT_F2I;
- break;
- case GLSL_TYPE_BOOL:
- c->prog_data.param_convert[param] = PARAM_CONVERT_F2B;
- break;
- default:
- assert(!"not reached");
- c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
- break;
+ } else {
+ switch (type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
+ break;
+ case GLSL_TYPE_UINT:
+ c->prog_data.param_convert[param] = PARAM_CONVERT_F2U;
+ break;
+ case GLSL_TYPE_INT:
+ c->prog_data.param_convert[param] = PARAM_CONVERT_F2I;
+ break;
+ case GLSL_TYPE_BOOL:
+ c->prog_data.param_convert[param] = PARAM_CONVERT_F2B;
+ break;
+ default:
+ assert(!"not reached");
+ c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
+ break;
+ }
}
this->param_index[param] = loc;
this->param_offset[param] = i;
@@ -463,9 +466,21 @@ fs_visitor::emit_general_interpolation(ir_variable *ir)
} else {
/* Perspective interpolation case. */
for (unsigned int k = 0; k < type->vector_elements; k++) {
- struct brw_reg interp = interp_reg(location, k);
- emit(FS_OPCODE_LINTERP, attr,
- this->delta_x, this->delta_y, fs_reg(interp));
+ /* FINISHME: At some point we probably want to push
+ * this farther by giving similar treatment to the
+ * other potentially constant components of the
+ * attribute, as well as making brw_vs_constval.c
+ * handle varyings other than gl_TexCoord.
+ */
+ if (location >= FRAG_ATTRIB_TEX0 &&
+ location <= FRAG_ATTRIB_TEX7 &&
+ k == 3 && !(c->key.proj_attrib_mask & (1 << location))) {
+ emit(BRW_OPCODE_MOV, attr, fs_reg(1.0f));
+ } else {
+ struct brw_reg interp = interp_reg(location, k);
+ emit(FS_OPCODE_LINTERP, attr,
+ this->delta_x, this->delta_y, fs_reg(interp));
+ }
attr.reg_offset++;
}
@@ -512,16 +527,16 @@ fs_visitor::emit_frontfacing_interpolation(ir_variable *ir)
}
fs_inst *
-fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src)
+fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src)
{
switch (opcode) {
- case FS_OPCODE_RCP:
- case FS_OPCODE_RSQ:
- case FS_OPCODE_SQRT:
- case FS_OPCODE_EXP2:
- case FS_OPCODE_LOG2:
- case FS_OPCODE_SIN:
- case FS_OPCODE_COS:
+ case SHADER_OPCODE_RCP:
+ case SHADER_OPCODE_RSQ:
+ case SHADER_OPCODE_SQRT:
+ case SHADER_OPCODE_EXP2:
+ case SHADER_OPCODE_LOG2:
+ case SHADER_OPCODE_SIN:
+ case SHADER_OPCODE_COS:
break;
default:
assert(!"not reached: bad math opcode");
@@ -555,12 +570,12 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src)
}
fs_inst *
-fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1)
+fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
{
int base_mrf = 2;
fs_inst *inst;
- assert(opcode == FS_OPCODE_POW);
+ assert(opcode == SHADER_OPCODE_POW);
if (intel->gen >= 6) {
/* Can't do hstride == 0 args to gen6 math, so expand it out.
@@ -605,7 +620,7 @@ fs_visitor::setup_paramvalues_refs()
/* Set up the pointers to ParamValues now that that array is finalized. */
for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
c->prog_data.param[i] =
- fp->Base.Parameters->ParameterValues[this->param_index[i]] +
+ (const float *)fp->Base.Parameters->ParameterValues[this->param_index[i]] +
this->param_offset[i];
}
}
@@ -621,12 +636,12 @@ fs_visitor::assign_curb_setup()
}
/* Map the offsets in the UNIFORM file to fixed HW regs. */
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
for (unsigned int i = 0; i < 3; i++) {
if (inst->src[i].file == UNIFORM) {
- int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+ int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
struct brw_reg brw_reg = brw_vec1_grf(c->nr_payload_regs +
constant_nr / 8,
constant_nr % 8);
@@ -684,8 +699,8 @@ fs_visitor::assign_urb_setup()
/* Offset all the urb_setup[] index by the actual position of the
* setup regs, now that the location of the constants has been chosen.
*/
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
if (inst->opcode == FS_OPCODE_LINTERP) {
assert(inst->src[2].file == FIXED_HW_REG);
@@ -739,8 +754,8 @@ fs_visitor::split_virtual_grfs()
split_grf[this->delta_x.reg] = false;
}
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
/* Texturing produces 4 contiguous registers, so no splitting. */
if (inst->is_tex()) {
@@ -763,8 +778,8 @@ fs_visitor::split_virtual_grfs()
}
}
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
if (inst->dst.file == GRF &&
split_grf[inst->dst.reg] &&
@@ -786,6 +801,86 @@ fs_visitor::split_virtual_grfs()
this->live_intervals_valid = false;
}
+bool
+fs_visitor::remove_dead_constants()
+{
+ if (c->dispatch_width == 8) {
+ this->params_remap = ralloc_array(mem_ctx, int, c->prog_data.nr_params);
+
+ for (unsigned int i = 0; i < c->prog_data.nr_params; i++)
+ this->params_remap[i] = -1;
+
+ /* Find which params are still in use. */
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
+
+ for (int i = 0; i < 3; i++) {
+ int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
+
+ if (inst->src[i].file != UNIFORM)
+ continue;
+
+ assert(constant_nr < (int)c->prog_data.nr_params);
+
+ /* For now, set this to non-negative. We'll give it the
+ * actual new number in a moment, in order to keep the
+ * register numbers nicely ordered.
+ */
+ this->params_remap[constant_nr] = 0;
+ }
+ }
+
+ /* Figure out what the new numbers for the params will be. At some
+ * point when we're doing uniform array access, we're going to want
+ * to keep the distinction between .reg and .reg_offset, but for
+ * now we don't care.
+ */
+ unsigned int new_nr_params = 0;
+ for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
+ if (this->params_remap[i] != -1) {
+ this->params_remap[i] = new_nr_params++;
+ }
+ }
+
+ /* Update the list of params to be uploaded to match our new numbering. */
+ for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
+ int remapped = this->params_remap[i];
+
+ if (remapped == -1)
+ continue;
+
+ /* We've already done setup_paramvalues_refs() so no need to worry
+ * about param_index and param_offset.
+ */
+ c->prog_data.param[remapped] = c->prog_data.param[i];
+ c->prog_data.param_convert[remapped] = c->prog_data.param_convert[i];
+ }
+
+ c->prog_data.nr_params = new_nr_params;
+ } else {
+ /* This should have been generated in the 8-wide pass already. */
+ assert(this->params_remap);
+ }
+
+ /* Now do the renumbering of the shader to remove unused params. */
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
+
+ for (int i = 0; i < 3; i++) {
+ int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
+
+ if (inst->src[i].file != UNIFORM)
+ continue;
+
+ assert(this->params_remap[constant_nr] != -1);
+ inst->src[i].reg = this->params_remap[constant_nr];
+ inst->src[i].reg_offset = 0;
+ }
+ }
+
+ return true;
+}
+
/**
* Choose accesses from the UNIFORM file to demote to using the pull
* constant buffer.
@@ -815,14 +910,14 @@ fs_visitor::setup_pull_constants()
int pull_uniform_base = max_uniform_components;
int pull_uniform_count = c->prog_data.nr_params - pull_uniform_base;
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
for (int i = 0; i < 3; i++) {
if (inst->src[i].file != UNIFORM)
continue;
- int uniform_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+ int uniform_nr = inst->src[i].reg + inst->src[i].reg_offset;
if (uniform_nr < pull_uniform_base)
continue;
@@ -871,8 +966,8 @@ fs_visitor::calculate_live_intervals()
}
int ip = 0;
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
if (inst->opcode == BRW_OPCODE_DO) {
if (loop_depth++ == 0)
@@ -892,7 +987,7 @@ fs_visitor::calculate_live_intervals()
}
} else {
for (unsigned int i = 0; i < 3; i++) {
- if (inst->src[i].file == GRF && inst->src[i].reg != 0) {
+ if (inst->src[i].file == GRF) {
int reg = inst->src[i].reg;
if (!loop_depth) {
@@ -908,7 +1003,7 @@ fs_visitor::calculate_live_intervals()
}
}
}
- if (inst->dst.file == GRF && inst->dst.reg != 0) {
+ if (inst->dst.file == GRF) {
int reg = inst->dst.reg;
if (!loop_depth) {
@@ -945,8 +1040,8 @@ fs_visitor::propagate_constants()
calculate_live_intervals();
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
if (inst->opcode != BRW_OPCODE_MOV ||
inst->predicated ||
@@ -965,11 +1060,9 @@ fs_visitor::propagate_constants()
/* Found a move of a constant to a GRF. Find anything else using the GRF
* before it's written, and replace it with the constant if we can.
*/
- exec_list_iterator scan_iter = iter;
- scan_iter.next();
- for (; scan_iter.has_next(); scan_iter.next()) {
- fs_inst *scan_inst = (fs_inst *)scan_iter.get();
-
+ for (fs_inst *scan_inst = (fs_inst *)inst->next;
+ !scan_inst->is_tail_sentinel();
+ scan_inst = (fs_inst *)scan_inst->next) {
if (scan_inst->opcode == BRW_OPCODE_DO ||
scan_inst->opcode == BRW_OPCODE_WHILE ||
scan_inst->opcode == BRW_OPCODE_ELSE ||
@@ -1046,6 +1139,24 @@ fs_visitor::propagate_constants()
progress = true;
}
break;
+
+ case SHADER_OPCODE_RCP:
+ /* The hardware doesn't do math on immediate values
+ * (because why are you doing that, seriously?), but
+ * the correct answer is to just constant fold it
+ * anyway.
+ */
+ assert(i == 0);
+ if (inst->src[0].imm.f != 0.0f) {
+ scan_inst->opcode = BRW_OPCODE_MOV;
+ scan_inst->src[0] = inst->src[0];
+ scan_inst->src[0].imm.f = 1.0f / scan_inst->src[0].imm.f;
+ progress = true;
+ }
+ break;
+
+ default:
+ break;
}
}
@@ -1063,6 +1174,49 @@ fs_visitor::propagate_constants()
return progress;
}
+
+
+/**
+ * Attempts to move immediate constants into the immediate
+ * constant slot of following instructions.
+ *
+ * Immediate constants are a bit tricky -- they have to be in the last
+ * operand slot, you can't do abs/negate on them,
+ */
+
+bool
+fs_visitor::opt_algebraic()
+{
+ bool progress = false;
+
+ calculate_live_intervals();
+
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
+
+ switch (inst->opcode) {
+ case BRW_OPCODE_MUL:
+ if (inst->src[1].file != IMM)
+ continue;
+
+ /* a * 1.0 = a */
+ if (inst->src[1].type == BRW_REGISTER_TYPE_F &&
+ inst->src[1].imm.f == 1.0) {
+ inst->opcode = BRW_OPCODE_MOV;
+ inst->src[1] = reg_undef;
+ progress = true;
+ break;
+ }
+
+ break;
+ default:
+ break;
+ }
+ }
+
+ return progress;
+}
+
/**
* Must be called after calculate_live_intervales() to remove unused
* writes to registers -- register allocation will fail otherwise
@@ -1077,8 +1231,8 @@ fs_visitor::dead_code_eliminate()
calculate_live_intervals();
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list_safe(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) {
inst->remove();
@@ -1101,8 +1255,8 @@ fs_visitor::register_coalesce()
int if_depth = 0;
int loop_depth = 0;
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list_safe(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
/* Make sure that we dominate the instructions we're going to
* scan for interfering with our coalescing, or we won't have
@@ -1123,6 +1277,8 @@ fs_visitor::register_coalesce()
case BRW_OPCODE_ENDIF:
if_depth--;
break;
+ default:
+ break;
}
if (loop_depth || if_depth)
continue;
@@ -1130,7 +1286,8 @@ fs_visitor::register_coalesce()
if (inst->opcode != BRW_OPCODE_MOV ||
inst->predicated ||
inst->saturate ||
- inst->dst.file != GRF || inst->src[0].file != GRF ||
+ inst->dst.file != GRF || (inst->src[0].file != GRF &&
+ inst->src[0].file != UNIFORM)||
inst->dst.type != inst->src[0].type)
continue;
@@ -1141,11 +1298,10 @@ fs_visitor::register_coalesce()
* program.
*/
bool interfered = false;
- exec_list_iterator scan_iter = iter;
- scan_iter.next();
- for (; scan_iter.has_next(); scan_iter.next()) {
- fs_inst *scan_inst = (fs_inst *)scan_iter.get();
+ for (fs_inst *scan_inst = (fs_inst *)inst->next;
+ !scan_inst->is_tail_sentinel();
+ scan_inst = (fs_inst *)scan_inst->next) {
if (scan_inst->dst.file == GRF) {
if (scan_inst->dst.reg == inst->dst.reg &&
(scan_inst->dst.reg_offset == inst->dst.reg_offset ||
@@ -1153,7 +1309,8 @@ fs_visitor::register_coalesce()
interfered = true;
break;
}
- if (scan_inst->dst.reg == inst->src[0].reg &&
+ if (inst->src[0].file == GRF &&
+ scan_inst->dst.reg == inst->src[0].reg &&
(scan_inst->dst.reg_offset == inst->src[0].reg_offset ||
scan_inst->is_tex())) {
interfered = true;
@@ -1161,10 +1318,13 @@ fs_visitor::register_coalesce()
}
}
- /* The gen6 MATH instruction can't handle source modifiers, so avoid
- * coalescing those for now. We should do something more specific.
+ /* The gen6 MATH instruction can't handle source modifiers or
+ * unusual register regions, so avoid coalescing those for
+ * now. We should do something more specific.
*/
- if (intel->gen >= 6 && scan_inst->is_math() && has_source_modifiers) {
+ if (intel->gen >= 6 &&
+ scan_inst->is_math() &&
+ (has_source_modifiers || inst->src[0].file == UNIFORM)) {
interfered = true;
break;
}
@@ -1176,19 +1336,17 @@ fs_visitor::register_coalesce()
/* Rewrite the later usage to point at the source of the move to
* be removed.
*/
- for (exec_list_iterator scan_iter = iter; scan_iter.has_next();
- scan_iter.next()) {
- fs_inst *scan_inst = (fs_inst *)scan_iter.get();
-
+ for (fs_inst *scan_inst = inst;
+ !scan_inst->is_tail_sentinel();
+ scan_inst = (fs_inst *)scan_inst->next) {
for (int i = 0; i < 3; i++) {
if (scan_inst->src[i].file == GRF &&
scan_inst->src[i].reg == inst->dst.reg &&
scan_inst->src[i].reg_offset == inst->dst.reg_offset) {
- scan_inst->src[i].reg = inst->src[0].reg;
- scan_inst->src[i].reg_offset = inst->src[0].reg_offset;
- scan_inst->src[i].abs |= inst->src[0].abs;
- scan_inst->src[i].negate ^= inst->src[0].negate;
- scan_inst->src[i].smear = inst->src[0].smear;
+ fs_reg new_src = inst->src[0];
+ new_src.negate ^= scan_inst->src[i].negate;
+ new_src.abs |= scan_inst->src[i].abs;
+ scan_inst->src[i] = new_src;
}
}
}
@@ -1212,8 +1370,8 @@ fs_visitor::compute_to_mrf()
calculate_live_intervals();
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list_safe(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
int ip = next_ip;
next_ip++;
@@ -1228,9 +1386,9 @@ fs_visitor::compute_to_mrf()
/* Work out which hardware MRF registers are written by this
* instruction.
*/
- int mrf_low = inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+ int mrf_low = inst->dst.reg & ~BRW_MRF_COMPR4;
int mrf_high;
- if (inst->dst.hw_reg & BRW_MRF_COMPR4) {
+ if (inst->dst.reg & BRW_MRF_COMPR4) {
mrf_high = mrf_low + 4;
} else if (c->dispatch_width == 16 &&
(!inst->force_uncompressed && !inst->force_sechalf)) {
@@ -1297,7 +1455,7 @@ fs_visitor::compute_to_mrf()
if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
/* Found the creator of our MRF's source value. */
scan_inst->dst.file = MRF;
- scan_inst->dst.hw_reg = inst->dst.hw_reg;
+ scan_inst->dst.reg = inst->dst.reg;
scan_inst->saturate |= inst->saturate;
inst->remove();
progress = true;
@@ -1334,10 +1492,10 @@ fs_visitor::compute_to_mrf()
/* If somebody else writes our MRF here, we can't
* compute-to-MRF before that.
*/
- int scan_mrf_low = scan_inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+ int scan_mrf_low = scan_inst->dst.reg & ~BRW_MRF_COMPR4;
int scan_mrf_high;
- if (scan_inst->dst.hw_reg & BRW_MRF_COMPR4) {
+ if (scan_inst->dst.reg & BRW_MRF_COMPR4) {
scan_mrf_high = scan_mrf_low + 4;
} else if (c->dispatch_width == 16 &&
(!scan_inst->force_uncompressed &&
@@ -1392,8 +1550,8 @@ fs_visitor::remove_duplicate_mrf_writes()
memset(last_mrf_move, 0, sizeof(last_mrf_move));
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list_safe(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
switch (inst->opcode) {
case BRW_OPCODE_DO:
@@ -1409,7 +1567,7 @@ fs_visitor::remove_duplicate_mrf_writes()
if (inst->opcode == BRW_OPCODE_MOV &&
inst->dst.file == MRF) {
- fs_inst *prev_inst = last_mrf_move[inst->dst.hw_reg];
+ fs_inst *prev_inst = last_mrf_move[inst->dst.reg];
if (prev_inst && inst->equals(prev_inst)) {
inst->remove();
progress = true;
@@ -1419,7 +1577,7 @@ fs_visitor::remove_duplicate_mrf_writes()
/* Clear out the last-write records for MRFs that were overwritten. */
if (inst->dst.file == MRF) {
- last_mrf_move[inst->dst.hw_reg] = NULL;
+ last_mrf_move[inst->dst.reg] = NULL;
}
if (inst->mlen > 0) {
@@ -1445,7 +1603,7 @@ fs_visitor::remove_duplicate_mrf_writes()
inst->dst.file == MRF &&
inst->src[0].file == GRF &&
!inst->predicated) {
- last_mrf_move[inst->dst.hw_reg] = inst;
+ last_mrf_move[inst->dst.reg] = inst;
}
}
@@ -1527,8 +1685,8 @@ fs_visitor::run()
/* Generate FS IR for main(). (the visitor only descends into
* functions called "main").
*/
- foreach_iter(exec_list_iterator, iter, *shader->ir) {
- ir_instruction *ir = (ir_instruction *)iter.get();
+ foreach_list(node, &*shader->ir) {
+ ir_instruction *ir = (ir_instruction *)node;
base_ir = ir;
this->result = reg_undef;
ir->accept(this);
@@ -1550,11 +1708,14 @@ fs_visitor::run()
progress = remove_duplicate_mrf_writes() || progress;
progress = propagate_constants() || progress;
+ progress = opt_algebraic() || progress;
progress = register_coalesce() || progress;
progress = compute_to_mrf() || progress;
progress = dead_code_eliminate() || progress;
} while (progress);
+ remove_dead_constants();
+
schedule_instructions();
assign_curb_setup();
@@ -1563,7 +1724,7 @@ fs_visitor::run()
if (0) {
/* Debug of register spilling: Go spill everything. */
int virtual_grf_count = virtual_grf_next;
- for (int i = 1; i < virtual_grf_count; i++) {
+ for (int i = 0; i < virtual_grf_count; i++) {
spill_reg(i);
}
}
@@ -1625,7 +1786,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
fs_visitor v(c, prog, shader);
if (!v.run()) {
prog->LinkStatus = GL_FALSE;
- prog->InfoLog = ralloc_strdup(prog, v.fail_msg);
+ ralloc_strcat(&prog->InfoLog, v.fail_msg);
return false;
}
@@ -1633,7 +1794,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
if (intel->gen >= 5 && c->prog_data.nr_pull_params == 0) {
c->dispatch_width = 16;
fs_visitor v2(c, prog, shader);
- v2.import_uniforms(v.variable_ht);
+ v2.import_uniforms(&v);
v2.run();
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 2bf850e5dea..10f45f30fe9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -25,6 +25,8 @@
*
*/
+#include "brw_shader.h"
+
extern "C" {
#include <sys/types.h>
@@ -51,37 +53,10 @@ enum register_file {
MRF = BRW_MESSAGE_REGISTER_FILE,
IMM = BRW_IMMEDIATE_VALUE,
FIXED_HW_REG, /* a struct brw_reg */
- UNIFORM, /* prog_data->params[hw_reg] */
+ UNIFORM, /* prog_data->params[reg] */
BAD_FILE
};
-enum fs_opcodes {
- FS_OPCODE_FB_WRITE = 256,
- FS_OPCODE_RCP,
- FS_OPCODE_RSQ,
- FS_OPCODE_SQRT,
- FS_OPCODE_EXP2,
- FS_OPCODE_LOG2,
- FS_OPCODE_POW,
- FS_OPCODE_SIN,
- FS_OPCODE_COS,
- FS_OPCODE_DDX,
- FS_OPCODE_DDY,
- FS_OPCODE_PIXEL_X,
- FS_OPCODE_PIXEL_Y,
- FS_OPCODE_CINTERP,
- FS_OPCODE_LINTERP,
- FS_OPCODE_TEX,
- FS_OPCODE_TXB,
- FS_OPCODE_TXD,
- FS_OPCODE_TXL,
- FS_OPCODE_DISCARD,
- FS_OPCODE_SPILL,
- FS_OPCODE_UNSPILL,
- FS_OPCODE_PULL_CONSTANT_LOAD,
-};
-
-
class fs_reg {
public:
/* Callers of this ralloc-based new need not call delete. It's
@@ -99,7 +74,6 @@ public:
void init()
{
memset(this, 0, sizeof(*this));
- this->hw_reg = -1;
this->smear = -1;
}
@@ -146,8 +120,8 @@ public:
this->type = fixed_hw_reg.type;
}
- fs_reg(enum register_file file, int hw_reg);
- fs_reg(enum register_file file, int hw_reg, uint32_t type);
+ fs_reg(enum register_file file, int reg);
+ fs_reg(enum register_file file, int reg, uint32_t type);
fs_reg(class fs_visitor *v, const struct glsl_type *type);
bool equals(fs_reg *r)
@@ -155,7 +129,6 @@ public:
return (file == r->file &&
reg == r->reg &&
reg_offset == r->reg_offset &&
- hw_reg == r->hw_reg &&
type == r->type &&
negate == r->negate &&
abs == r->abs &&
@@ -167,12 +140,17 @@ public:
/** Register file: ARF, GRF, MRF, IMM. */
enum register_file file;
- /** virtual register number. 0 = fixed hw reg */
+ /**
+ * Register number. For ARF/MRF, it's the hardware register. For
+ * GRF, it's a virtual register number until register allocation
+ */
int reg;
- /** Offset within the virtual register. */
+ /**
+ * For virtual registers, this is a hardware register offset from
+ * the start of the register block (for example, a constant index
+ * in an array access).
+ */
int reg_offset;
- /** HW register number. Generally unset until register allocation. */
- int hw_reg;
/** Register type. BRW_REGISTER_TYPE_* */
int type;
bool negate;
@@ -224,13 +202,13 @@ public:
init();
}
- fs_inst(int opcode)
+ fs_inst(enum opcode opcode)
{
init();
this->opcode = opcode;
}
- fs_inst(int opcode, fs_reg dst)
+ fs_inst(enum opcode opcode, fs_reg dst)
{
init();
this->opcode = opcode;
@@ -240,7 +218,7 @@ public:
assert(dst.reg_offset >= 0);
}
- fs_inst(int opcode, fs_reg dst, fs_reg src0)
+ fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0)
{
init();
this->opcode = opcode;
@@ -253,7 +231,7 @@ public:
assert(src[0].reg_offset >= 0);
}
- fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
+ fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
{
init();
this->opcode = opcode;
@@ -269,7 +247,7 @@ public:
assert(src[1].reg_offset >= 0);
}
- fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
+ fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
{
init();
this->opcode = opcode;
@@ -313,22 +291,23 @@ public:
return (opcode == FS_OPCODE_TEX ||
opcode == FS_OPCODE_TXB ||
opcode == FS_OPCODE_TXD ||
- opcode == FS_OPCODE_TXL);
+ opcode == FS_OPCODE_TXL ||
+ opcode == FS_OPCODE_TXS);
}
bool is_math()
{
- return (opcode == FS_OPCODE_RCP ||
- opcode == FS_OPCODE_RSQ ||
- opcode == FS_OPCODE_SQRT ||
- opcode == FS_OPCODE_EXP2 ||
- opcode == FS_OPCODE_LOG2 ||
- opcode == FS_OPCODE_SIN ||
- opcode == FS_OPCODE_COS ||
- opcode == FS_OPCODE_POW);
+ return (opcode == SHADER_OPCODE_RCP ||
+ opcode == SHADER_OPCODE_RSQ ||
+ opcode == SHADER_OPCODE_SQRT ||
+ opcode == SHADER_OPCODE_EXP2 ||
+ opcode == SHADER_OPCODE_LOG2 ||
+ opcode == SHADER_OPCODE_SIN ||
+ opcode == SHADER_OPCODE_COS ||
+ opcode == SHADER_OPCODE_POW);
}
- int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
+ enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
fs_reg dst;
fs_reg src[3];
bool saturate;
@@ -402,7 +381,7 @@ public:
this->base_ir = NULL;
this->virtual_grf_sizes = NULL;
- this->virtual_grf_next = 1;
+ this->virtual_grf_next = 0;
this->virtual_grf_array_size = 0;
this->virtual_grf_def = NULL;
this->virtual_grf_use = NULL;
@@ -421,7 +400,7 @@ public:
fs_reg *variable_storage(ir_variable *var);
int virtual_grf_alloc(int size);
- void import_uniforms(struct hash_table *src_variable_ht);
+ void import_uniforms(fs_visitor *v);
void visit(ir_variable *ir);
void visit(ir_assignment *ir);
@@ -445,27 +424,28 @@ public:
fs_inst *emit(fs_inst inst);
- fs_inst *emit(int opcode)
+ fs_inst *emit(enum opcode opcode)
{
return emit(fs_inst(opcode));
}
- fs_inst *emit(int opcode, fs_reg dst)
+ fs_inst *emit(enum opcode opcode, fs_reg dst)
{
return emit(fs_inst(opcode, dst));
}
- fs_inst *emit(int opcode, fs_reg dst, fs_reg src0)
+ fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0)
{
return emit(fs_inst(opcode, dst, src0));
}
- fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
+ fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
{
return emit(fs_inst(opcode, dst, src0, src1));
}
- fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
+ fs_inst *emit(enum opcode opcode, fs_reg dst,
+ fs_reg src0, fs_reg src1, fs_reg src2)
{
return emit(fs_inst(opcode, dst, src0, src1, src2));
}
@@ -485,9 +465,11 @@ public:
void setup_pull_constants();
void calculate_live_intervals();
bool propagate_constants();
+ bool opt_algebraic();
bool register_coalesce();
bool compute_to_mrf();
bool dead_code_eliminate();
+ bool remove_dead_constants();
bool remove_duplicate_mrf_writes();
bool virtual_grf_interferes(int a, int b);
void schedule_instructions();
@@ -524,8 +506,8 @@ public:
int sampler);
fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
int sampler);
- fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0);
- fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0, fs_reg src1);
+ fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0);
+ fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1);
bool try_emit_saturate(ir_expression *ir);
void emit_bool_to_cond_code(ir_rvalue *condition);
void emit_if_gen6(ir_if *ir);
@@ -565,6 +547,13 @@ public:
int *virtual_grf_use;
bool live_intervals_valid;
+ /* This is the map from UNIFORM hw_reg + reg_offset as generated by
+ * the visitor to the packed uniform number after
+ * remove_dead_constants() that represents the actual uploaded
+ * uniform index.
+ */
+ int *params_remap;
+
struct hash_table *variable_ht;
ir_variable *frag_color, *frag_data, *frag_depth;
int first_non_payload_grf;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 1d89b8f1d11..28efbd3605f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -59,7 +59,8 @@ fs_visitor::generate_fb_write(fs_inst *inst)
if (inst->target > 0) {
/* Set the render target index for choosing BLEND_STATE. */
- brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2),
+ brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+ inst->base_mrf, 2),
BRW_REGISTER_TYPE_UD),
brw_imm_ud(inst->target));
}
@@ -145,43 +146,12 @@ void
fs_visitor::generate_math(fs_inst *inst,
struct brw_reg dst, struct brw_reg *src)
{
- int op;
-
- switch (inst->opcode) {
- case FS_OPCODE_RCP:
- op = BRW_MATH_FUNCTION_INV;
- break;
- case FS_OPCODE_RSQ:
- op = BRW_MATH_FUNCTION_RSQ;
- break;
- case FS_OPCODE_SQRT:
- op = BRW_MATH_FUNCTION_SQRT;
- break;
- case FS_OPCODE_EXP2:
- op = BRW_MATH_FUNCTION_EXP;
- break;
- case FS_OPCODE_LOG2:
- op = BRW_MATH_FUNCTION_LOG;
- break;
- case FS_OPCODE_POW:
- op = BRW_MATH_FUNCTION_POW;
- break;
- case FS_OPCODE_SIN:
- op = BRW_MATH_FUNCTION_SIN;
- break;
- case FS_OPCODE_COS:
- op = BRW_MATH_FUNCTION_COS;
- break;
- default:
- assert(!"not reached: unknown math function");
- op = 0;
- break;
- }
+ int op = brw_math_function(inst->opcode);
if (intel->gen >= 6) {
assert(inst->mlen == 0);
- if (inst->opcode == FS_OPCODE_POW) {
+ if (inst->opcode == SHADER_OPCODE_POW) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math2(p, dst, op, src[0], src[1]);
@@ -272,10 +242,16 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
}
break;
+ case FS_OPCODE_TXS:
+ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
+ break;
case FS_OPCODE_TXD:
/* There is no sample_d_c message; comparisons are done manually */
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
break;
+ default:
+ assert(!"not reached");
+ break;
}
} else {
switch (inst->opcode) {
@@ -316,6 +292,14 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
assert(inst->mlen == 7 || inst->mlen == 10);
msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS;
break;
+ case FS_OPCODE_TXS:
+ assert(inst->mlen == 3);
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_RESINFO;
+ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+ break;
+ default:
+ assert(!"not reached");
+ break;
}
}
assert(msg_type != -1);
@@ -537,11 +521,9 @@ brw_reg_from_fs_reg(fs_reg *reg)
case ARF:
case MRF:
if (reg->smear == -1) {
- brw_reg = brw_vec8_reg(reg->file,
- reg->hw_reg, 0);
+ brw_reg = brw_vec8_reg(reg->file, reg->reg, 0);
} else {
- brw_reg = brw_vec1_reg(reg->file,
- reg->hw_reg, reg->smear);
+ brw_reg = brw_vec1_reg(reg->file, reg->reg, reg->smear);
}
brw_reg = retype(brw_reg, reg->type);
if (reg->sechalf)
@@ -608,8 +590,8 @@ fs_visitor::generate_code()
prog->Name, c->dispatch_width);
}
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
struct brw_reg src[3], dst;
if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
@@ -656,6 +638,11 @@ fs_visitor::generate_code()
case BRW_OPCODE_MUL:
brw_MUL(p, dst, src[0], src[1]);
break;
+ case BRW_OPCODE_MACH:
+ brw_set_acc_write_control(p, 1);
+ brw_MACH(p, dst, src[0], src[1]);
+ brw_set_acc_write_control(p, 0);
+ break;
case BRW_OPCODE_FRC:
brw_FRC(p, dst, src[0]);
@@ -770,14 +757,14 @@ fs_visitor::generate_code()
}
break;
- case FS_OPCODE_RCP:
- case FS_OPCODE_RSQ:
- case FS_OPCODE_SQRT:
- case FS_OPCODE_EXP2:
- case FS_OPCODE_LOG2:
- case FS_OPCODE_POW:
- case FS_OPCODE_SIN:
- case FS_OPCODE_COS:
+ case SHADER_OPCODE_RCP:
+ case SHADER_OPCODE_RSQ:
+ case SHADER_OPCODE_SQRT:
+ case SHADER_OPCODE_EXP2:
+ case SHADER_OPCODE_LOG2:
+ case SHADER_OPCODE_POW:
+ case SHADER_OPCODE_SIN:
+ case SHADER_OPCODE_COS:
generate_math(inst, dst, src);
break;
case FS_OPCODE_PIXEL_X:
@@ -796,6 +783,7 @@ fs_visitor::generate_code()
case FS_OPCODE_TXB:
case FS_OPCODE_TXD:
case FS_OPCODE_TXL:
+ case FS_OPCODE_TXS:
generate_tex(inst, dst, src[0]);
break;
case FS_OPCODE_DISCARD:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index b4689d2c293..7c5414ac26c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -25,23 +25,6 @@
*
*/
-extern "C" {
-
-#include <sys/types.h>
-
-#include "main/macros.h"
-#include "main/shaderobj.h"
-#include "main/uniforms.h"
-#include "program/prog_parameter.h"
-#include "program/prog_print.h"
-#include "program/prog_optimize.h"
-#include "program/register_allocate.h"
-#include "program/sampler.h"
-#include "program/hash_table.h"
-#include "brw_context.h"
-#include "brw_eu.h"
-#include "brw_wm.h"
-}
#include "brw_fs.h"
#include "../glsl/glsl_types.h"
#include "../glsl/ir_optimization.h"
@@ -50,45 +33,115 @@ extern "C" {
static void
assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width)
{
- if (reg->file == GRF && reg->reg != 0) {
+ if (reg->file == GRF) {
assert(reg->reg_offset >= 0);
- reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width;
- reg->reg = 0;
+ reg->reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width;
+ reg->reg_offset = 0;
}
}
void
fs_visitor::assign_regs_trivial()
{
- int last_grf = 0;
- int hw_reg_mapping[this->virtual_grf_next];
+ int hw_reg_mapping[this->virtual_grf_next + 1];
int i;
int reg_width = c->dispatch_width / 8;
- hw_reg_mapping[0] = 0;
/* Note that compressed instructions require alignment to 2 registers. */
- hw_reg_mapping[1] = ALIGN(this->first_non_payload_grf, reg_width);
- for (i = 2; i < this->virtual_grf_next; i++) {
+ hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width);
+ for (i = 1; i <= this->virtual_grf_next; i++) {
hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
this->virtual_grf_sizes[i - 1] * reg_width);
}
- last_grf = hw_reg_mapping[i - 1] + (this->virtual_grf_sizes[i - 1] *
- reg_width);
+ this->grf_used = hw_reg_mapping[this->virtual_grf_next];
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
assign_reg(hw_reg_mapping, &inst->dst, reg_width);
assign_reg(hw_reg_mapping, &inst->src[0], reg_width);
assign_reg(hw_reg_mapping, &inst->src[1], reg_width);
}
- if (last_grf >= BRW_MAX_GRF) {
+ if (this->grf_used >= BRW_MAX_GRF) {
fail("Ran out of regs on trivial allocator (%d/%d)\n",
- last_grf, BRW_MAX_GRF);
+ this->grf_used, BRW_MAX_GRF);
+ }
+
+}
+
+static void
+brw_alloc_reg_set_for_classes(struct brw_context *brw,
+ int *class_sizes,
+ int class_count,
+ int reg_width,
+ int base_reg_count)
+{
+ struct intel_context *intel = &brw->intel;
+
+ /* Compute the total number of registers across all classes. */
+ int ra_reg_count = 0;
+ for (int i = 0; i < class_count; i++) {
+ ra_reg_count += base_reg_count - (class_sizes[i] - 1);
+ }
+
+ ralloc_free(brw->wm.ra_reg_to_grf);
+ brw->wm.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count);
+ ralloc_free(brw->wm.regs);
+ brw->wm.regs = ra_alloc_reg_set(ra_reg_count);
+ ralloc_free(brw->wm.classes);
+ brw->wm.classes = ralloc_array(brw, int, class_count + 1);
+
+ brw->wm.aligned_pairs_class = -1;
+
+ /* Now, add the registers to their classes, and add the conflicts
+ * between them and the base GRF registers (and also each other).
+ */
+ int reg = 0;
+ int pairs_base_reg = 0;
+ int pairs_reg_count = 0;
+ for (int i = 0; i < class_count; i++) {
+ int class_reg_count = base_reg_count - (class_sizes[i] - 1);
+ brw->wm.classes[i] = ra_alloc_reg_class(brw->wm.regs);
+
+ /* Save this off for the aligned pair class at the end. */
+ if (class_sizes[i] == 2) {
+ pairs_base_reg = reg;
+ pairs_reg_count = class_reg_count;
+ }
+
+ for (int j = 0; j < class_reg_count; j++) {
+ ra_class_add_reg(brw->wm.regs, brw->wm.classes[i], reg);
+
+ brw->wm.ra_reg_to_grf[reg] = j;
+
+ for (int base_reg = j;
+ base_reg < j + class_sizes[i];
+ base_reg++) {
+ ra_add_transitive_reg_conflict(brw->wm.regs, base_reg, reg);
+ }
+
+ reg++;
+ }
+ }
+ assert(reg == ra_reg_count);
+
+ /* Add a special class for aligned pairs, which we'll put delta_x/y
+ * in on gen5 so that we can do PLN.
+ */
+ if (brw->has_pln && reg_width == 1 && intel->gen < 6) {
+ brw->wm.aligned_pairs_class = ra_alloc_reg_class(brw->wm.regs);
+
+ for (int i = 0; i < pairs_reg_count; i++) {
+ if ((brw->wm.ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) {
+ ra_class_add_reg(brw->wm.regs, brw->wm.aligned_pairs_class,
+ pairs_base_reg + i);
+ }
+ }
+ class_count++;
}
- this->grf_used = last_grf + reg_width;
+ ra_set_finalize(brw->wm.regs);
}
bool
@@ -101,12 +154,11 @@ fs_visitor::assign_regs()
* for reg_width == 2.
*/
int reg_width = c->dispatch_width / 8;
- int hw_reg_mapping[this->virtual_grf_next + 1];
+ int hw_reg_mapping[this->virtual_grf_next];
int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width);
int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width;
int class_sizes[base_reg_count];
int class_count = 0;
- int aligned_pair_class = -1;
calculate_live_intervals();
@@ -125,7 +177,7 @@ fs_visitor::assign_regs()
*/
class_sizes[class_count++] = 2;
}
- for (int r = 1; r < this->virtual_grf_next; r++) {
+ for (int r = 0; r < this->virtual_grf_next; r++) {
int i;
for (i = 0; i < class_count; i++) {
@@ -141,94 +193,26 @@ fs_visitor::assign_regs()
}
}
- int ra_reg_count = 0;
- int class_base_reg[class_count];
- int class_reg_count[class_count];
- int classes[class_count + 1];
-
- for (int i = 0; i < class_count; i++) {
- class_base_reg[i] = ra_reg_count;
- class_reg_count[i] = base_reg_count - (class_sizes[i] - 1);
- ra_reg_count += class_reg_count[i];
- }
-
- struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count);
- for (int i = 0; i < class_count; i++) {
- classes[i] = ra_alloc_reg_class(regs);
-
- for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
- ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r);
- }
+ brw_alloc_reg_set_for_classes(brw, class_sizes, class_count,
+ reg_width, base_reg_count);
- /* Add conflicts between our contiguous registers aliasing
- * base regs and other register classes' contiguous registers
- * that alias base regs, or the base regs themselves for classes[0].
- */
- for (int c = 0; c <= i; c++) {
- for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
- for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1));
- c_r < MIN2(class_reg_count[c], i_r + class_sizes[i]);
- c_r++) {
-
- if (0) {
- printf("%d/%d conflicts %d/%d\n",
- class_sizes[i], first_assigned_grf + i_r,
- class_sizes[c], first_assigned_grf + c_r);
- }
-
- ra_add_reg_conflict(regs,
- class_base_reg[i] + i_r,
- class_base_reg[c] + c_r);
- }
- }
- }
- }
-
- /* Add a special class for aligned pairs, which we'll put delta_x/y
- * in on gen5 so that we can do PLN.
- */
- if (brw->has_pln && reg_width == 1 && intel->gen < 6) {
- int reg_count = (base_reg_count - 1) / 2;
- int unaligned_pair_class = 1;
- assert(class_sizes[unaligned_pair_class] == 2);
-
- aligned_pair_class = class_count;
- classes[aligned_pair_class] = ra_alloc_reg_class(regs);
- class_sizes[aligned_pair_class] = 2;
- class_base_reg[aligned_pair_class] = 0;
- class_reg_count[aligned_pair_class] = 0;
- int start = (first_assigned_grf & 1) ? 1 : 0;
-
- for (int i = 0; i < reg_count; i++) {
- ra_class_add_reg(regs, classes[aligned_pair_class],
- class_base_reg[unaligned_pair_class] + i * 2 + start);
- }
- class_count++;
- }
-
- ra_set_finalize(regs);
-
- struct ra_graph *g = ra_alloc_interference_graph(regs,
+ struct ra_graph *g = ra_alloc_interference_graph(brw->wm.regs,
this->virtual_grf_next);
- /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1
- * with nodes.
- */
- ra_set_node_class(g, 0, classes[0]);
- for (int i = 1; i < this->virtual_grf_next; i++) {
+ for (int i = 0; i < this->virtual_grf_next; i++) {
for (int c = 0; c < class_count; c++) {
if (class_sizes[c] == this->virtual_grf_sizes[i]) {
- if (aligned_pair_class >= 0 &&
+ if (brw->wm.aligned_pairs_class >= 0 &&
this->delta_x.reg == i) {
- ra_set_node_class(g, i, classes[aligned_pair_class]);
+ ra_set_node_class(g, i, brw->wm.aligned_pairs_class);
} else {
- ra_set_node_class(g, i, classes[c]);
+ ra_set_node_class(g, i, brw->wm.classes[c]);
}
break;
}
}
- for (int j = 1; j < i; j++) {
+ for (int j = 0; j < i; j++) {
if (virtual_grf_interferes(i, j)) {
ra_add_node_interference(g, i, j);
}
@@ -253,7 +237,6 @@ fs_visitor::assign_regs()
ralloc_free(g);
- ralloc_free(regs);
return false;
}
@@ -263,28 +246,18 @@ fs_visitor::assign_regs()
* numbers.
*/
this->grf_used = first_assigned_grf;
- hw_reg_mapping[0] = 0; /* unused */
- for (int i = 1; i < this->virtual_grf_next; i++) {
+ for (int i = 0; i < this->virtual_grf_next; i++) {
int reg = ra_get_node_reg(g, i);
- int hw_reg = -1;
-
- for (int c = 0; c < class_count; c++) {
- if (reg >= class_base_reg[c] &&
- reg < class_base_reg[c] + class_reg_count[c]) {
- hw_reg = reg - class_base_reg[c];
- break;
- }
- }
- assert(hw_reg >= 0);
- hw_reg_mapping[i] = first_assigned_grf + hw_reg * reg_width;
+ hw_reg_mapping[i] = (first_assigned_grf +
+ brw->wm.ra_reg_to_grf[reg] * reg_width);
this->grf_used = MAX2(this->grf_used,
hw_reg_mapping[i] + this->virtual_grf_sizes[i] *
reg_width);
}
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
assign_reg(hw_reg_mapping, &inst->dst, reg_width);
assign_reg(hw_reg_mapping, &inst->src[0], reg_width);
@@ -292,7 +265,6 @@ fs_visitor::assign_regs()
}
ralloc_free(g);
- ralloc_free(regs);
return true;
}
@@ -336,8 +308,8 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
* spill/unspill we'll have to do, and guess that the insides of
* loops run 10 times.
*/
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
for (unsigned int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF) {
@@ -370,6 +342,9 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
if (inst->dst.file == GRF)
no_spill[inst->dst.reg] = true;
break;
+
+ default:
+ break;
}
}
@@ -394,8 +369,8 @@ fs_visitor::spill_reg(int spill_reg)
* virtual grf of the same size. For most instructions, though, we
* could just spill/unspill the GRF being accessed.
*/
- foreach_iter(exec_list_iterator, iter, this->instructions) {
- fs_inst *inst = (fs_inst *)iter.get();
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
for (unsigned int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF &&
diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
index d8218c26edb..0ea4e5c36f0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -25,21 +25,6 @@
*
*/
-extern "C" {
-
-#include <sys/types.h>
-
-#include "main/macros.h"
-#include "main/shaderobj.h"
-#include "main/uniforms.h"
-#include "program/prog_optimize.h"
-#include "program/register_allocate.h"
-#include "program/sampler.h"
-#include "program/hash_table.h"
-#include "brw_context.h"
-#include "brw_eu.h"
-#include "brw_wm.h"
-}
#include "brw_fs.h"
#include "../glsl/glsl_types.h"
#include "../glsl/ir_optimization.h"
@@ -84,26 +69,26 @@ public:
int math_latency = 22;
switch (inst->opcode) {
- case FS_OPCODE_RCP:
+ case SHADER_OPCODE_RCP:
this->latency = 1 * chans * math_latency;
break;
- case FS_OPCODE_RSQ:
+ case SHADER_OPCODE_RSQ:
this->latency = 2 * chans * math_latency;
break;
- case FS_OPCODE_SQRT:
- case FS_OPCODE_LOG2:
+ case SHADER_OPCODE_SQRT:
+ case SHADER_OPCODE_LOG2:
/* full precision log. partial is 2. */
this->latency = 3 * chans * math_latency;
break;
- case FS_OPCODE_EXP2:
+ case SHADER_OPCODE_EXP2:
/* full precision. partial is 3, same throughput. */
this->latency = 4 * chans * math_latency;
break;
- case FS_OPCODE_POW:
+ case SHADER_OPCODE_POW:
this->latency = 8 * chans * math_latency;
break;
- case FS_OPCODE_SIN:
- case FS_OPCODE_COS:
+ case SHADER_OPCODE_SIN:
+ case SHADER_OPCODE_COS:
/* minimum latency, max is 12 rounds. */
this->latency = 5 * chans * math_latency;
break;
@@ -283,8 +268,8 @@ instruction_scheduler::calculate_deps()
memset(last_mrf_write, 0, sizeof(last_mrf_write));
/* top-to-bottom dependencies: RAW and WAW. */
- foreach_iter(exec_list_iterator, iter, instructions) {
- schedule_node *n = (schedule_node *)iter.get();
+ foreach_list(node, &instructions) {
+ schedule_node *n = (schedule_node *)node;
fs_inst *inst = n->inst;
/* read-after-write deps. */
@@ -321,12 +306,12 @@ instruction_scheduler::calculate_deps()
add_dep(last_grf_write[inst->dst.reg], n);
last_grf_write[inst->dst.reg] = n;
} else if (inst->dst.file == MRF) {
- int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+ int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
add_dep(last_mrf_write[reg], n);
last_mrf_write[reg] = n;
if (is_compressed(inst)) {
- if (inst->dst.hw_reg & BRW_MRF_COMPR4)
+ if (inst->dst.reg & BRW_MRF_COMPR4)
reg += 4;
else
reg++;
@@ -401,12 +386,12 @@ instruction_scheduler::calculate_deps()
if (inst->dst.file == GRF) {
last_grf_write[inst->dst.reg] = n;
} else if (inst->dst.file == MRF) {
- int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+ int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
last_mrf_write[reg] = n;
if (is_compressed(inst)) {
- if (inst->dst.hw_reg & BRW_MRF_COMPR4)
+ if (inst->dst.reg & BRW_MRF_COMPR4)
reg += 4;
else
reg++;
@@ -437,8 +422,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header)
int time = 0;
/* Remove non-DAG heads from the list. */
- foreach_iter(exec_list_iterator, iter, instructions) {
- schedule_node *n = (schedule_node *)iter.get();
+ foreach_list_safe(node, &instructions) {
+ schedule_node *n = (schedule_node *)node;
if (n->parent_count != 0)
n->remove();
}
@@ -447,8 +432,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header)
schedule_node *chosen = NULL;
int chosen_time = 0;
- foreach_iter(exec_list_iterator, iter, instructions) {
- schedule_node *n = (schedule_node *)iter.get();
+ foreach_list(node, &instructions) {
+ schedule_node *n = (schedule_node *)node;
if (!chosen || n->unblocked_time < chosen_time) {
chosen = n;
@@ -490,8 +475,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header)
* progress until the first is done.
*/
if (chosen->inst->is_math()) {
- foreach_iter(exec_list_iterator, iter, instructions) {
- schedule_node *n = (schedule_node *)iter.get();
+ foreach_list(node, &instructions) {
+ schedule_node *n = (schedule_node *)node;
if (n->inst->is_math())
n->unblocked_time = MAX2(n->unblocked_time,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
index 530ffa26580..a9a60c2fd8a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
@@ -122,8 +122,8 @@ ir_vector_reference_visitor::get_variable_entry(ir_variable *var)
break;
}
- foreach_iter(exec_list_iterator, iter, this->variable_list) {
- variable_entry *entry = (variable_entry *)iter.get();
+ foreach_list(node, &this->variable_list) {
+ variable_entry *entry = (variable_entry *)node;
if (entry->var == var)
return entry;
}
@@ -222,8 +222,8 @@ ir_vector_splitting_visitor::get_splitting_entry(ir_variable *var)
if (!var->type->is_vector())
return NULL;
- foreach_iter(exec_list_iterator, iter, *this->variable_list) {
- variable_entry *entry = (variable_entry *)iter.get();
+ foreach_list(node, &*this->variable_list) {
+ variable_entry *entry = (variable_entry *)node;
if (entry->var == var) {
return entry;
}
@@ -341,8 +341,8 @@ brw_do_vector_splitting(exec_list *instructions)
visit_list_elements(&refs, instructions);
/* Trim out variables we can't split. */
- foreach_iter(exec_list_iterator, iter, refs.variable_list) {
- variable_entry *entry = (variable_entry *)iter.get();
+ foreach_list_safe(node, &refs.variable_list) {
+ variable_entry *entry = (variable_entry *)node;
if (debug) {
printf("vector %s@%p: decl %d, whole_access %d\n",
@@ -363,8 +363,8 @@ brw_do_vector_splitting(exec_list *instructions)
/* Replace the decls of the vectors to be split with their split
* components.
*/
- foreach_iter(exec_list_iterator, iter, refs.variable_list) {
- variable_entry *entry = (variable_entry *)iter.get();
+ foreach_list(node, &refs.variable_list) {
+ variable_entry *entry = (variable_entry *)node;
const struct glsl_type *type;
type = glsl_type::get_instance(entry->var->type->base_type, 1, 1);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index cbe5cf428c5..cdaf543c88b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -142,9 +142,7 @@ fs_visitor::visit(ir_dereference_array *ir)
this->result.type = brw_type_for_base_type(ir->type);
if (index) {
- assert(this->result.file == UNIFORM ||
- (this->result.file == GRF &&
- this->result.reg != 0));
+ assert(this->result.file == UNIFORM || this->result.file == GRF);
this->result.reg_offset += index->value.i[0] * element_size;
} else {
assert(!"FINISHME: non-constant array element");
@@ -252,14 +250,14 @@ fs_visitor::visit(ir_expression *ir)
break;
case ir_unop_rcp:
- emit_math(FS_OPCODE_RCP, this->result, op[0]);
+ emit_math(SHADER_OPCODE_RCP, this->result, op[0]);
break;
case ir_unop_exp2:
- emit_math(FS_OPCODE_EXP2, this->result, op[0]);
+ emit_math(SHADER_OPCODE_EXP2, this->result, op[0]);
break;
case ir_unop_log2:
- emit_math(FS_OPCODE_LOG2, this->result, op[0]);
+ emit_math(SHADER_OPCODE_LOG2, this->result, op[0]);
break;
case ir_unop_exp:
case ir_unop_log:
@@ -267,11 +265,11 @@ fs_visitor::visit(ir_expression *ir)
break;
case ir_unop_sin:
case ir_unop_sin_reduced:
- emit_math(FS_OPCODE_SIN, this->result, op[0]);
+ emit_math(SHADER_OPCODE_SIN, this->result, op[0]);
break;
case ir_unop_cos:
case ir_unop_cos_reduced:
- emit_math(FS_OPCODE_COS, this->result, op[0]);
+ emit_math(SHADER_OPCODE_COS, this->result, op[0]);
break;
case ir_unop_dFdx:
@@ -289,7 +287,23 @@ fs_visitor::visit(ir_expression *ir)
break;
case ir_binop_mul:
- emit(BRW_OPCODE_MUL, this->result, op[0], op[1]);
+ if (ir->type->is_integer()) {
+ /* For integer multiplication, the MUL uses the low 16 bits
+ * of one of the operands (src0 on gen6, src1 on gen7). The
+ * MACH accumulates in the contribution of the upper 16 bits
+ * of that operand.
+ *
+ * FINISHME: Emit just the MUL if we know an operand is small
+ * enough.
+ */
+ struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
+
+ emit(BRW_OPCODE_MUL, acc, op[0], op[1]);
+ emit(BRW_OPCODE_MACH, reg_null_d, op[0], op[1]);
+ emit(BRW_OPCODE_MOV, this->result, fs_reg(acc));
+ } else {
+ emit(BRW_OPCODE_MUL, this->result, op[0], op[1]);
+ }
break;
case ir_binop_div:
assert(!"not reached: should be handled by ir_div_to_mul_rcp");
@@ -342,11 +356,11 @@ fs_visitor::visit(ir_expression *ir)
break;
case ir_unop_sqrt:
- emit_math(FS_OPCODE_SQRT, this->result, op[0]);
+ emit_math(SHADER_OPCODE_SQRT, this->result, op[0]);
break;
case ir_unop_rsq:
- emit_math(FS_OPCODE_RSQ, this->result, op[0]);
+ emit_math(SHADER_OPCODE_RSQ, this->result, op[0]);
break;
case ir_unop_i2u:
@@ -425,7 +439,7 @@ fs_visitor::visit(ir_expression *ir)
break;
case ir_binop_pow:
- emit_math(FS_OPCODE_POW, this->result, op[0], op[1]);
+ emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]);
break;
case ir_unop_bit_not:
@@ -496,7 +510,7 @@ fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r,
void
fs_visitor::visit(ir_assignment *ir)
{
- struct fs_reg l, r;
+ fs_reg l, r;
fs_inst *inst;
/* FINISHME: arrays on the lhs */
@@ -603,9 +617,11 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
/* gen4's SIMD8 sampler always has the slots for u,v,r present. */
mlen += 3;
} else if (ir->op == ir_txd) {
+ this->result = reg_undef;
ir->lod_info.grad.dPdx->accept(this);
fs_reg dPdx = this->result;
+ this->result = reg_undef;
ir->lod_info.grad.dPdy->accept(this);
fs_reg dPdy = this->result;
@@ -620,6 +636,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
* dPdx = dudx, dvdx, drdx
* dPdy = dudy, dvdy, drdy
*
+ * 1-arg: Does not exist.
+ *
* 2-arg: dudx dvdx dudy dvdy
* dPdx.x dPdx.y dPdy.x dPdy.y
* m4 m5 m6 m7
@@ -631,18 +649,26 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) {
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx);
dPdx.reg_offset++;
- mlen++;
}
+ mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2);
for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) {
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy);
dPdy.reg_offset++;
- mlen++;
}
+ mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2);
+ } else if (ir->op == ir_txs) {
+ /* There's no SIMD8 resinfo message on Gen4. Use SIMD16 instead. */
+ simd16 = true;
+ this->result = reg_undef;
+ ir->lod_info.lod->accept(this);
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result);
+ mlen += 2;
} else {
/* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod
* instructions. We'll need to do SIMD16 here.
*/
+ simd16 = true;
assert(ir->op == ir_txb || ir->op == ir_txl);
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
@@ -671,16 +697,19 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
/* The unused upper half. */
mlen++;
+ }
+ if (simd16) {
/* Now, since we're doing simd16, the return is 2 interleaved
* vec4s where the odd-indexed ones are junk. We'll need to move
* this weirdness around to the expected layout.
*/
- simd16 = true;
orig_dst = dst;
- dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type,
- 2));
- dst.type = BRW_REGISTER_TYPE_F;
+ const glsl_type *vec_type =
+ glsl_type::get_instance(ir->type->base_type, 4, 1);
+ dst = fs_reg(this, glsl_type::get_array_instance(vec_type, 2));
+ dst.type = intel->is_g4x ? brw_type_for_base_type(ir->type)
+ : BRW_REGISTER_TYPE_F;
}
fs_inst *inst = NULL;
@@ -697,6 +726,9 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
case ir_txd:
inst = emit(FS_OPCODE_TXD, dst);
break;
+ case ir_txs:
+ inst = emit(FS_OPCODE_TXS, dst);
+ break;
case ir_txf:
assert(!"GLSL 1.30 features unsupported");
break;
@@ -732,6 +764,8 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
int base_mrf = 2;
int reg_width = c->dispatch_width / 8;
bool header_present = false;
+ const int vector_elements =
+ ir->coordinate ? ir->coordinate->type->vector_elements : 0;
if (ir->offset) {
/* The offsets set up by the ir_texture visitor are in the
@@ -742,7 +776,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
base_mrf--;
}
- for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
+ for (int i = 0; i < vector_elements; i++) {
fs_inst *inst = emit(BRW_OPCODE_MOV,
fs_reg(MRF, base_mrf + mlen + i * reg_width),
coordinate);
@@ -750,7 +784,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
inst->saturate = true;
coordinate.reg_offset++;
}
- mlen += ir->coordinate->type->vector_elements * reg_width;
+ mlen += vector_elements * reg_width;
if (ir->shadow_comparitor && ir->op != ir_txd) {
mlen = MAX2(mlen, header_present + 4 * reg_width);
@@ -786,9 +820,11 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
inst = emit(FS_OPCODE_TXL, dst);
break;
case ir_txd: {
+ this->result = reg_undef;
ir->lod_info.grad.dPdx->accept(this);
fs_reg dPdx = this->result;
+ this->result = reg_undef;
ir->lod_info.grad.dPdy->accept(this);
fs_reg dPdy = this->result;
@@ -816,6 +852,13 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
inst = emit(FS_OPCODE_TXD, dst);
break;
}
+ case ir_txs:
+ this->result = reg_undef;
+ ir->lod_info.lod->accept(this);
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result);
+ mlen += reg_width;
+ inst = emit(FS_OPCODE_TXS, dst);
+ break;
case ir_txf:
assert(!"GLSL 1.30 features unsupported");
break;
@@ -850,6 +893,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
}
if (ir->shadow_comparitor && ir->op != ir_txd) {
+ this->result = reg_undef;
ir->shadow_comparitor->accept(this);
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
mlen += reg_width;
@@ -860,11 +904,13 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
case ir_tex:
break;
case ir_txb:
+ this->result = reg_undef;
ir->lod_info.bias->accept(this);
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
mlen += reg_width;
break;
case ir_txl:
+ this->result = reg_undef;
ir->lod_info.lod->accept(this);
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
mlen += reg_width;
@@ -873,9 +919,11 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
if (c->dispatch_width == 16)
fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode.");
+ this->result = reg_undef;
ir->lod_info.grad.dPdx->accept(this);
fs_reg dPdx = this->result;
+ this->result = reg_undef;
ir->lod_info.grad.dPdy->accept(this);
fs_reg dPdy = this->result;
@@ -900,13 +948,19 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
}
break;
}
+ case ir_txs:
+ this->result = reg_undef;
+ ir->lod_info.lod->accept(this);
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result);
+ mlen += reg_width;
+ break;
case ir_txf:
assert(!"GLSL 1.30 features unsupported");
break;
}
/* Set up the coordinate (except for TXD where it was done earlier) */
- if (ir->op != ir_txd) {
+ if (ir->op != ir_txd && ir->op != ir_txs) {
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
coordinate);
@@ -924,7 +978,8 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break;
case ir_txl: inst = emit(FS_OPCODE_TXL, dst); break;
case ir_txd: inst = emit(FS_OPCODE_TXD, dst); break;
- case ir_txf: assert(!"TXF unsupported.");
+ case ir_txf: assert(!"TXF unsupported."); break;
+ case ir_txs: inst = emit(FS_OPCODE_TXS, dst); break;
}
inst->base_mrf = base_mrf;
inst->mlen = mlen;
@@ -959,7 +1014,8 @@ fs_visitor::visit(ir_texture *ir)
}
this->result = reg_undef;
- ir->coordinate->accept(this);
+ if (ir->coordinate)
+ ir->coordinate->accept(this);
fs_reg coordinate = this->result;
if (ir->offset != NULL) {
@@ -1000,7 +1056,8 @@ fs_visitor::visit(ir_texture *ir)
* texture coordinates. We use the program parameter state
* tracking to get the scaling factor.
*/
- if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) {
+ if (intel->gen < 6 &&
+ ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) {
struct gl_program_parameter_list *params = c->fp->program.Base.Parameters;
int tokens[STATE_LENGTH] = {
STATE_INTERNAL,
@@ -1046,7 +1103,7 @@ fs_visitor::visit(ir_texture *ir)
/* Writemasking doesn't eliminate channels on SIMD8 texture
* samples, so don't worry about them.
*/
- fs_reg dst = fs_reg(this, glsl_type::vec4_type);
+ fs_reg dst = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 4, 1));
if (intel->gen >= 7) {
inst = emit_texture_gen7(ir, dst, coordinate, sampler);
@@ -1070,6 +1127,7 @@ fs_visitor::visit(ir_texture *ir)
if (hw_compare_supported) {
inst->shadow_compare = true;
} else {
+ this->result = reg_undef;
ir->shadow_comparitor->accept(this);
fs_reg ref = this->result;
@@ -1465,8 +1523,8 @@ fs_visitor::visit(ir_if *ir)
inst->predicated = true;
}
- foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
- ir_instruction *ir = (ir_instruction *)iter.get();
+ foreach_list(node, &ir->then_instructions) {
+ ir_instruction *ir = (ir_instruction *)node;
this->base_ir = ir;
this->result = reg_undef;
ir->accept(this);
@@ -1475,8 +1533,8 @@ fs_visitor::visit(ir_if *ir)
if (!ir->else_instructions.is_empty()) {
emit(BRW_OPCODE_ELSE);
- foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
- ir_instruction *ir = (ir_instruction *)iter.get();
+ foreach_list(node, &ir->else_instructions) {
+ ir_instruction *ir = (ir_instruction *)node;
this->base_ir = ir;
this->result = reg_undef;
ir->accept(this);
@@ -1526,8 +1584,8 @@ fs_visitor::visit(ir_loop *ir)
inst->predicated = true;
}
- foreach_iter(exec_list_iterator, iter, ir->body_instructions) {
- ir_instruction *ir = (ir_instruction *)iter.get();
+ foreach_list(node, &ir->body_instructions) {
+ ir_instruction *ir = (ir_instruction *)node;
this->base_ir = ir;
this->result = reg_undef;
@@ -1583,8 +1641,8 @@ fs_visitor::visit(ir_function *ir)
assert(sig);
- foreach_iter(exec_list_iterator, iter, sig->body) {
- ir_instruction *ir = (ir_instruction *)iter.get();
+ foreach_list(node, &sig->body) {
+ ir_instruction *ir = (ir_instruction *)node;
this->base_ir = ir;
this->result = reg_undef;
ir->accept(this);
@@ -1684,7 +1742,7 @@ fs_visitor::emit_interpolation_setup_gen4()
interp_reg(FRAG_ATTRIB_WPOS, 3));
/* Compute the pixel 1/W value from wpos.w. */
this->pixel_w = fs_reg(this, glsl_type::float_type);
- emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w);
+ emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w);
this->current_annotation = NULL;
}
@@ -1721,7 +1779,7 @@ fs_visitor::emit_interpolation_setup_gen6()
this->current_annotation = "compute pos.w";
this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0));
this->wpos_w = fs_reg(this, glsl_type::float_type);
- emit_math(FS_OPCODE_RCP, this->wpos_w, this->pixel_w);
+ emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);
this->delta_x = fs_reg(brw_vec8_grf(2, 0));
this->delta_y = fs_reg(brw_vec8_grf(3, 0));
@@ -1733,6 +1791,7 @@ void
fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color)
{
int reg_width = c->dispatch_width / 8;
+ fs_inst *inst;
if (c->dispatch_width == 8 || intel->gen == 6) {
/* SIMD8 write looks like:
@@ -1751,8 +1810,10 @@ fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color)
* m + 6: a0
* m + 7: a1
*/
- emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index * reg_width),
- color);
+ inst = emit(BRW_OPCODE_MOV,
+ fs_reg(MRF, first_color_mrf + index * reg_width),
+ color);
+ inst->saturate = c->key.clamp_fragment_color;
} else {
/* pre-gen6 SIMD16 single source DP write looks like:
* m + 0: r0
@@ -1770,16 +1831,22 @@ fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color)
* usual destination + 1 for the second half we get
* destination + 4.
*/
- emit(BRW_OPCODE_MOV,
- fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index), color);
+ inst = emit(BRW_OPCODE_MOV,
+ fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index),
+ color);
+ inst->saturate = c->key.clamp_fragment_color;
} else {
push_force_uncompressed();
- emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), color);
+ inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index),
+ color);
+ inst->saturate = c->key.clamp_fragment_color;
pop_force_uncompressed();
push_force_sechalf();
color.sechalf = true;
- emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), color);
+ inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4),
+ color);
+ inst->saturate = c->key.clamp_fragment_color;
pop_force_sechalf();
color.sechalf = false;
}
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 03cebbb824b..f7e6e7c81d1 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -46,7 +46,7 @@ static void upload_drawing_rect(struct brw_context *brw)
struct gl_context *ctx = &intel->ctx;
BEGIN_BATCH(4);
- OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE);
+ OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
OUT_BATCH(0); /* xmin, ymin */
OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
((ctx->DrawBuffer->Height - 1) << 16));
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 6674f1640c8..09b5be4c96e 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -226,6 +226,34 @@ static GLboolean brwProgramStringNotify( struct gl_context *ctx,
return GL_TRUE;
}
+/* Per-thread scratch space is a power-of-two multiple of 1KB. */
+int
+brw_get_scratch_size(int size)
+{
+ int i;
+
+ for (i = 1024; i < size; i *= 2)
+ ;
+
+ return i;
+}
+
+void
+brw_get_scratch_bo(struct intel_context *intel,
+ drm_intel_bo **scratch_bo, int size)
+{
+ drm_intel_bo *old_bo = *scratch_bo;
+
+ if (old_bo && old_bo->size < size) {
+ drm_intel_bo_unreference(old_bo);
+ old_bo = NULL;
+ }
+
+ if (!old_bo) {
+ *scratch_bo = drm_intel_bo_alloc(intel->bufmgr, "scratch bo", size, 4096);
+ }
+}
+
void brwInitFragProgFuncs( struct dd_function_table *functions )
{
assert(functions->ProgramStringNotify == _tnl_program_string);
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 9471883fb2b..3ff6bbaed47 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -24,6 +24,7 @@
extern "C" {
#include "main/macros.h"
#include "brw_context.h"
+#include "brw_vs.h"
}
#include "brw_fs.h"
#include "../glsl/ir_optimization.h"
@@ -67,6 +68,9 @@ brw_shader_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
if (!brw_fs_precompile(ctx, prog))
return false;
+ if (!brw_vs_precompile(ctx, prog))
+ return false;
+
return true;
}
@@ -75,10 +79,15 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
{
struct brw_context *brw = brw_context(ctx);
struct intel_context *intel = &brw->intel;
+ unsigned int stage;
+
+ for (stage = 0; stage < ARRAY_SIZE(prog->_LinkedShaders); stage++) {
+ struct brw_shader *shader =
+ (struct brw_shader *)prog->_LinkedShaders[stage];
+
+ if (!shader)
+ continue;
- struct brw_shader *shader =
- (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
- if (shader != NULL) {
void *mem_ctx = ralloc_context(NULL);
bool progress;
@@ -106,18 +115,22 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
brw_do_cubemap_normalize(shader->ir);
lower_noise(shader->ir);
lower_quadop_vector(shader->ir, false);
+
+ bool input = true;
+ bool output = stage == MESA_SHADER_FRAGMENT;
+ bool temp = stage == MESA_SHADER_FRAGMENT;
+ bool uniform = true;
+
lower_variable_index_to_cond_assign(shader->ir,
- GL_TRUE, /* input */
- GL_TRUE, /* output */
- GL_TRUE, /* temp */
- GL_TRUE /* uniform */
- );
+ input, output, temp, uniform);
do {
progress = false;
- brw_do_channel_expressions(shader->ir);
- brw_do_vector_splitting(shader->ir);
+ if (stage == MESA_SHADER_FRAGMENT) {
+ brw_do_channel_expressions(shader->ir);
+ brw_do_vector_splitting(shader->ir);
+ }
progress = do_lower_jumps(shader->ir, true, true,
true, /* main return */
@@ -192,3 +205,29 @@ brw_conditional_for_comparison(unsigned int op)
return BRW_CONDITIONAL_NZ;
}
}
+
+uint32_t
+brw_math_function(enum opcode op)
+{
+ switch (op) {
+ case SHADER_OPCODE_RCP:
+ return BRW_MATH_FUNCTION_INV;
+ case SHADER_OPCODE_RSQ:
+ return BRW_MATH_FUNCTION_RSQ;
+ case SHADER_OPCODE_SQRT:
+ return BRW_MATH_FUNCTION_SQRT;
+ case SHADER_OPCODE_EXP2:
+ return BRW_MATH_FUNCTION_EXP;
+ case SHADER_OPCODE_LOG2:
+ return BRW_MATH_FUNCTION_LOG;
+ case SHADER_OPCODE_POW:
+ return BRW_MATH_FUNCTION_POW;
+ case SHADER_OPCODE_SIN:
+ return BRW_MATH_FUNCTION_SIN;
+ case SHADER_OPCODE_COS:
+ return BRW_MATH_FUNCTION_COS;
+ default:
+ assert(!"not reached: unknown math function");
+ return 0;
+ }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 4c568a26caa..1054d7a589e 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -21,5 +21,11 @@
* IN THE SOFTWARE.
*/
+#include <stdint.h>
+#include "brw_defines.h"
+
+#pragma once
+
int brw_type_for_base_type(const struct glsl_type *type);
uint32_t brw_conditional_for_comparison(unsigned int op);
+uint32_t brw_math_function(enum opcode op);
diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c
index b9e5cc1a534..cb7a3ef73d3 100644
--- a/src/mesa/drivers/dri/i965/brw_state_dump.c
+++ b/src/mesa/drivers/dri/i965/brw_state_dump.c
@@ -455,6 +455,23 @@ dump_vs_constants(struct brw_context *brw, uint32_t offset, uint32_t size)
}
}
+static void
+dump_wm_constants(struct brw_context *brw, uint32_t offset, uint32_t size)
+{
+ const char *name = "WM_CONST";
+ struct intel_context *intel = &brw->intel;
+ uint32_t *as_uint = intel->batch.bo->virtual + offset;
+ float *as_float = intel->batch.bo->virtual + offset;
+ int i;
+
+ for (i = 0; i < size / 4; i += 4) {
+ batch_out(brw, name, offset, i, "%3d: (% f % f % f % f) (0x%08x 0x%08x 0x%08x 0x%08x)\n",
+ i / 4,
+ as_float[i], as_float[i + 1], as_float[i + 2], as_float[i + 3],
+ as_uint[i], as_uint[i + 1], as_uint[i + 2], as_uint[i + 3]);
+ }
+}
+
static void dump_binding_table(struct brw_context *brw, uint32_t offset,
uint32_t size)
{
@@ -602,6 +619,9 @@ dump_state_batch(struct brw_context *brw)
case AUB_TRACE_VS_CONSTANTS:
dump_vs_constants(brw, offset, size);
break;
+ case AUB_TRACE_WM_CONSTANTS:
+ dump_wm_constants(brw, offset, size);
+ break;
default:
break;
}
diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index f462f32b19a..46a417a08ed 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -60,7 +60,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel,
* given in Volume 1 of the BSpec.
*/
h0 = ALIGN(mt->height0, align_h);
- h1 = ALIGN(minify(h0), align_h);
+ h1 = ALIGN(minify(mt->height0), align_h);
qpitch = (h0 + h1 + (intel->gen >= 7 ? 12 : 11) * align_h);
if (mt->compressed)
qpitch /= 4;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
new file mode 100644
index 00000000000..760bc1f7acd
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -0,0 +1,161 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_vec4.h"
+extern "C" {
+#include "main/macros.h"
+#include "program/prog_parameter.h"
+}
+
+#define MAX_INSTRUCTION (1 << 30)
+
+namespace brw {
+
+void
+vec4_visitor::calculate_live_intervals()
+{
+ int *def = ralloc_array(mem_ctx, int, virtual_grf_count);
+ int *use = ralloc_array(mem_ctx, int, virtual_grf_count);
+ int loop_depth = 0;
+ int loop_start = 0;
+
+ if (this->live_intervals_valid)
+ return;
+
+ for (int i = 0; i < virtual_grf_count; i++) {
+ def[i] = MAX_INSTRUCTION;
+ use[i] = -1;
+ }
+
+ int ip = 0;
+ foreach_list(node, &this->instructions) {
+ vec4_instruction *inst = (vec4_instruction *)node;
+
+ if (inst->opcode == BRW_OPCODE_DO) {
+ if (loop_depth++ == 0)
+ loop_start = ip;
+ } else if (inst->opcode == BRW_OPCODE_WHILE) {
+ loop_depth--;
+
+ if (loop_depth == 0) {
+ /* Patches up the use of vars marked for being live across
+ * the whole loop.
+ */
+ for (int i = 0; i < virtual_grf_count; i++) {
+ if (use[i] == loop_start) {
+ use[i] = ip;
+ }
+ }
+ }
+ } else {
+ for (unsigned int i = 0; i < 3; i++) {
+ if (inst->src[i].file == GRF) {
+ int reg = inst->src[i].reg;
+
+ if (!loop_depth) {
+ use[reg] = ip;
+ } else {
+ def[reg] = MIN2(loop_start, def[reg]);
+ use[reg] = loop_start;
+
+ /* Nobody else is going to go smash our start to
+ * later in the loop now, because def[reg] now
+ * points before the bb header.
+ */
+ }
+ }
+ }
+ if (inst->dst.file == GRF) {
+ int reg = inst->dst.reg;
+
+ if (!loop_depth) {
+ def[reg] = MIN2(def[reg], ip);
+ } else {
+ def[reg] = MIN2(def[reg], loop_start);
+ }
+ }
+ }
+
+ ip++;
+ }
+
+ ralloc_free(this->virtual_grf_def);
+ ralloc_free(this->virtual_grf_use);
+ this->virtual_grf_def = def;
+ this->virtual_grf_use = use;
+
+ this->live_intervals_valid = true;
+}
+
+bool
+vec4_visitor::virtual_grf_interferes(int a, int b)
+{
+ int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]);
+ int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]);
+
+ /* We can't handle dead register writes here, without iterating
+ * over the whole instruction stream to find every single dead
+ * write to that register to compare to the live interval of the
+ * other register. Just assert that dead_code_eliminate() has been
+ * called.
+ */
+ assert((this->virtual_grf_use[a] != -1 ||
+ this->virtual_grf_def[a] == MAX_INSTRUCTION) &&
+ (this->virtual_grf_use[b] != -1 ||
+ this->virtual_grf_def[b] == MAX_INSTRUCTION));
+
+ return start < end;
+}
+
+/**
+ * Must be called after calculate_live_intervales() to remove unused
+ * writes to registers -- register allocation will fail otherwise
+ * because something deffed but not used won't be considered to
+ * interfere with other regs.
+ */
+bool
+vec4_visitor::dead_code_eliminate()
+{
+ bool progress = false;
+ int pc = 0;
+
+ calculate_live_intervals();
+
+ foreach_list_safe(node, &this->instructions) {
+ vec4_instruction *inst = (vec4_instruction *)node;
+
+ if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) {
+ inst->remove();
+ progress = true;
+ }
+
+ pc++;
+ }
+
+ if (progress)
+ live_intervals_valid = false;
+
+ return progress;
+}
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
new file mode 100644
index 00000000000..1db910e2b99
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -0,0 +1,489 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef BRW_VEC4_H
+#define BRW_VEC4_H
+
+#include <stdint.h>
+#include "brw_shader.h"
+#include "main/compiler.h"
+#include "program/hash_table.h"
+
+extern "C" {
+#include "brw_vs.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+};
+
+#include "../glsl/ir.h"
+
+namespace brw {
+
+class dst_reg;
+
+/**
+ * Common helper for constructing swizzles. When only a subset of
+ * channels of a vec4 are used, we don't want to reference the other
+ * channels, as that will tell optimization passes that those other
+ * channels are used.
+ */
+static int
+swizzle_for_size(int size)
+{
+ int size_swizzles[4] = {
+ BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
+ BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
+ BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
+ };
+
+ assert((size >= 1) && (size <= 4));
+ return size_swizzles[size - 1];
+}
+
+enum register_file {
+ ARF = BRW_ARCHITECTURE_REGISTER_FILE,
+ GRF = BRW_GENERAL_REGISTER_FILE,
+ MRF = BRW_MESSAGE_REGISTER_FILE,
+ IMM = BRW_IMMEDIATE_VALUE,
+ HW_REG, /* a struct brw_reg */
+ ATTR,
+ UNIFORM, /* prog_data->params[hw_reg] */
+ BAD_FILE
+};
+
+class reg
+{
+public:
+ /** Register file: ARF, GRF, MRF, IMM. */
+ enum register_file file;
+ /** virtual register number. 0 = fixed hw reg */
+ int reg;
+ /** Offset within the virtual register. */
+ int reg_offset;
+ /** Register type. BRW_REGISTER_TYPE_* */
+ int type;
+ bool sechalf;
+ struct brw_reg fixed_hw_reg;
+ int smear; /* -1, or a channel of the reg to smear to all channels. */
+
+ /** Value for file == BRW_IMMMEDIATE_FILE */
+ union {
+ int32_t i;
+ uint32_t u;
+ float f;
+ } imm;
+};
+
+class src_reg : public reg
+{
+public:
+ /* Callers of this ralloc-based new need not call delete. It's
+ * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+ static void* operator new(size_t size, void *ctx)
+ {
+ void *node;
+
+ node = ralloc_size(ctx, size);
+ assert(node != NULL);
+
+ return node;
+ }
+
+ void init()
+ {
+ memset(this, 0, sizeof(*this));
+
+ this->file = BAD_FILE;
+ }
+
+ src_reg(register_file file, int reg, const glsl_type *type)
+ {
+ init();
+
+ this->file = file;
+ this->reg = reg;
+ if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
+ this->swizzle = swizzle_for_size(type->vector_elements);
+ else
+ this->swizzle = SWIZZLE_XYZW;
+ }
+
+ /** Generic unset register constructor. */
+ src_reg()
+ {
+ init();
+ }
+
+ src_reg(float f)
+ {
+ init();
+
+ this->file = IMM;
+ this->type = BRW_REGISTER_TYPE_F;
+ this->imm.f = f;
+ }
+
+ src_reg(uint32_t u)
+ {
+ init();
+
+ this->file = IMM;
+ this->type = BRW_REGISTER_TYPE_UD;
+ this->imm.f = u;
+ }
+
+ src_reg(int32_t i)
+ {
+ init();
+
+ this->file = IMM;
+ this->type = BRW_REGISTER_TYPE_D;
+ this->imm.i = i;
+ }
+
+ src_reg(class vec4_visitor *v, const struct glsl_type *type);
+
+ explicit src_reg(dst_reg reg);
+
+ GLuint swizzle; /**< SWIZZLE_XYZW swizzles from Mesa. */
+ bool negate;
+ bool abs;
+
+ src_reg *reladdr;
+};
+
+class dst_reg : public reg
+{
+public:
+ /* Callers of this ralloc-based new need not call delete. It's
+ * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+ static void* operator new(size_t size, void *ctx)
+ {
+ void *node;
+
+ node = ralloc_size(ctx, size);
+ assert(node != NULL);
+
+ return node;
+ }
+
+ void init()
+ {
+ memset(this, 0, sizeof(*this));
+ this->file = BAD_FILE;
+ this->writemask = WRITEMASK_XYZW;
+ }
+
+ dst_reg()
+ {
+ init();
+ }
+
+ dst_reg(register_file file, int reg)
+ {
+ init();
+
+ this->file = file;
+ this->reg = reg;
+ }
+
+ dst_reg(struct brw_reg reg)
+ {
+ init();
+
+ this->file = HW_REG;
+ this->fixed_hw_reg = reg;
+ }
+
+ dst_reg(class vec4_visitor *v, const struct glsl_type *type);
+
+ explicit dst_reg(src_reg reg);
+
+ int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
+
+ src_reg *reladdr;
+};
+
+class vec4_instruction : public exec_node {
+public:
+ /* Callers of this ralloc-based new need not call delete. It's
+ * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+ static void* operator new(size_t size, void *ctx)
+ {
+ void *node;
+
+ node = rzalloc_size(ctx, size);
+ assert(node != NULL);
+
+ return node;
+ }
+
+ struct brw_reg get_dst(void);
+ struct brw_reg get_src(int i);
+
+ enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
+ dst_reg dst;
+ src_reg src[3];
+
+ bool saturate;
+ bool predicate_inverse;
+ uint32_t predicate;
+
+ int conditional_mod; /**< BRW_CONDITIONAL_* */
+
+ int sampler;
+ int target; /**< MRT target. */
+ bool shadow_compare;
+
+ bool eot;
+ bool header_present;
+ int mlen; /**< SEND message length */
+ int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
+
+ uint32_t offset; /* spill/unspill offset */
+ /** @{
+ * Annotation for the generated IR. One of the two can be set.
+ */
+ ir_instruction *ir;
+ const char *annotation;
+};
+
+class vec4_visitor : public ir_visitor
+{
+public:
+ vec4_visitor(struct brw_vs_compile *c,
+ struct gl_shader_program *prog, struct brw_shader *shader);
+ ~vec4_visitor();
+
+ dst_reg dst_null_f()
+ {
+ return dst_reg(brw_null_reg());
+ }
+
+ dst_reg dst_null_d()
+ {
+ return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ }
+
+ dst_reg dst_null_cmp()
+ {
+ if (intel->gen > 4)
+ return dst_null_d();
+ else
+ return dst_null_f();
+ }
+
+ struct brw_context *brw;
+ const struct gl_vertex_program *vp;
+ struct intel_context *intel;
+ struct gl_context *ctx;
+ struct brw_vs_compile *c;
+ struct brw_vs_prog_data *prog_data;
+ struct brw_compile *p;
+ struct brw_shader *shader;
+ struct gl_shader_program *prog;
+ void *mem_ctx;
+ exec_list instructions;
+
+ char *fail_msg;
+ bool failed;
+
+ /**
+ * GLSL IR currently being processed, which is associated with our
+ * driver IR instructions for debugging purposes.
+ */
+ ir_instruction *base_ir;
+ const char *current_annotation;
+
+ int *virtual_grf_sizes;
+ int virtual_grf_count;
+ int virtual_grf_array_size;
+ int first_non_payload_grf;
+ int *virtual_grf_def;
+ int *virtual_grf_use;
+ bool live_intervals_valid;
+
+ dst_reg *variable_storage(ir_variable *var);
+
+ void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
+
+ src_reg src_reg_for_float(float val);
+
+ /**
+ * \name Visit methods
+ *
+ * As typical for the visitor pattern, there must be one \c visit method for
+ * each concrete subclass of \c ir_instruction. Virtual base classes within
+ * the hierarchy should not have \c visit methods.
+ */
+ /*@{*/
+ virtual void visit(ir_variable *);
+ virtual void visit(ir_loop *);
+ virtual void visit(ir_loop_jump *);
+ virtual void visit(ir_function_signature *);
+ virtual void visit(ir_function *);
+ virtual void visit(ir_expression *);
+ virtual void visit(ir_swizzle *);
+ virtual void visit(ir_dereference_variable *);
+ virtual void visit(ir_dereference_array *);
+ virtual void visit(ir_dereference_record *);
+ virtual void visit(ir_assignment *);
+ virtual void visit(ir_constant *);
+ virtual void visit(ir_call *);
+ virtual void visit(ir_return *);
+ virtual void visit(ir_discard *);
+ virtual void visit(ir_texture *);
+ virtual void visit(ir_if *);
+ /*@}*/
+
+ src_reg result;
+
+ /* Regs for vertex results. Generated at ir_variable visiting time
+ * for the ir->location's used.
+ */
+ dst_reg output_reg[VERT_RESULT_MAX];
+ int uniform_size[MAX_UNIFORMS];
+ int uniforms;
+
+ struct hash_table *variable_ht;
+
+ bool run(void);
+ void fail(const char *msg, ...);
+
+ int virtual_grf_alloc(int size);
+ int setup_uniform_values(int loc, const glsl_type *type);
+ void setup_builtin_uniform_values(ir_variable *ir);
+ int setup_attributes(int payload_reg);
+ int setup_uniforms(int payload_reg);
+ void setup_payload();
+ void reg_allocate_trivial();
+ void reg_allocate();
+ void move_grf_array_access_to_scratch();
+ void calculate_live_intervals();
+ bool dead_code_eliminate();
+ bool virtual_grf_interferes(int a, int b);
+
+ vec4_instruction *emit(enum opcode opcode);
+
+ vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0);
+
+ vec4_instruction *emit(enum opcode opcode, dst_reg dst,
+ src_reg src0, src_reg src1);
+
+ vec4_instruction *emit(enum opcode opcode, dst_reg dst,
+ src_reg src0, src_reg src1, src_reg src2);
+
+ bool try_rewrite_rhs_to_dst(ir_assignment *ir,
+ dst_reg dst,
+ src_reg src,
+ vec4_instruction *pre_rhs_inst,
+ vec4_instruction *last_rhs_inst);
+
+ /** Walks an exec_list of ir_instruction and sends it through this visitor. */
+ void visit_instructions(const exec_list *list);
+
+ void emit_bool_to_cond_code(ir_rvalue *ir);
+ void emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1);
+ void emit_if_gen6(ir_if *ir);
+
+ void emit_block_move(dst_reg *dst, src_reg *src,
+ const struct glsl_type *type, bool predicated);
+
+ void emit_constant_values(dst_reg *dst, ir_constant *value);
+
+ /**
+ * Emit the correct dot-product instruction for the type of arguments
+ */
+ void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
+
+ void emit_scalar(ir_instruction *ir, enum prog_opcode op,
+ dst_reg dst, src_reg src0);
+
+ void emit_scalar(ir_instruction *ir, enum prog_opcode op,
+ dst_reg dst, src_reg src0, src_reg src1);
+
+ void emit_scs(ir_instruction *ir, enum prog_opcode op,
+ dst_reg dst, const src_reg &src);
+
+ void emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src);
+ void emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src);
+ void emit_math(enum opcode opcode, dst_reg dst, src_reg src);
+ void emit_math2_gen6(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
+ void emit_math2_gen4(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
+ void emit_math(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
+
+ int emit_vue_header_gen6(int header_mrf);
+ int emit_vue_header_gen4(int header_mrf);
+ void emit_urb_writes(void);
+
+ src_reg get_scratch_offset(vec4_instruction *inst,
+ src_reg *reladdr, int reg_offset);
+ void emit_scratch_read(vec4_instruction *inst,
+ dst_reg dst,
+ src_reg orig_src,
+ int base_offset);
+ void emit_scratch_write(vec4_instruction *inst,
+ src_reg temp,
+ dst_reg orig_dst,
+ int base_offset);
+
+ GLboolean try_emit_sat(ir_expression *ir);
+
+ bool process_move_condition(ir_rvalue *ir);
+
+ void generate_code();
+ void generate_vs_instruction(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg *src);
+
+ void generate_math1_gen4(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src);
+ void generate_math1_gen6(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src);
+ void generate_math2_gen4(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1);
+ void generate_math2_gen6(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1);
+
+ void generate_urb_write(vec4_instruction *inst);
+ void generate_oword_dual_block_offsets(struct brw_reg m1,
+ struct brw_reg index);
+ void generate_scratch_write(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg index);
+ void generate_scratch_read(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg index);
+};
+
+} /* namespace brw */
+
+#endif /* BRW_VEC4_H */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
new file mode 100644
index 00000000000..65ac7d9dc09
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -0,0 +1,854 @@
+/* Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_vec4.h"
+#include "../glsl/ir_print_visitor.h"
+
+extern "C" {
+#include "brw_eu.h"
+};
+
+using namespace brw;
+
+namespace brw {
+
+int
+vec4_visitor::setup_attributes(int payload_reg)
+{
+ int nr_attributes;
+ int attribute_map[VERT_ATTRIB_MAX];
+
+ nr_attributes = 0;
+ for (int i = 0; i < VERT_ATTRIB_MAX; i++) {
+ if (prog_data->inputs_read & BITFIELD64_BIT(i)) {
+ attribute_map[i] = payload_reg + nr_attributes;
+ nr_attributes++;
+
+ /* Do GL_FIXED rescaling for GLES2.0. Our GL_FIXED
+ * attributes come in as floating point conversions of the
+ * integer values.
+ */
+ if (c->key.gl_fixed_input_size[i] != 0) {
+ struct brw_reg reg = brw_vec8_grf(attribute_map[i], 0);
+
+ brw_MUL(p,
+ brw_writemask(reg, (1 << c->key.gl_fixed_input_size[i]) - 1),
+ reg, brw_imm_f(1.0 / 65536.0));
+ }
+ }
+ }
+
+ foreach_list(node, &this->instructions) {
+ vec4_instruction *inst = (vec4_instruction *)node;
+
+ for (int i = 0; i < 3; i++) {
+ if (inst->src[i].file != ATTR)
+ continue;
+
+ int grf = attribute_map[inst->src[i].reg + inst->src[i].reg_offset];
+
+ struct brw_reg reg = brw_vec8_grf(grf, 0);
+ reg.dw1.bits.swizzle = inst->src[i].swizzle;
+ if (inst->src[i].abs)
+ reg = brw_abs(reg);
+ if (inst->src[i].negate)
+ reg = negate(reg);
+
+ inst->src[i].file = HW_REG;
+ inst->src[i].fixed_hw_reg = reg;
+ }
+ }
+
+ /* The BSpec says we always have to read at least one thing from
+ * the VF, and it appears that the hardware wedges otherwise.
+ */
+ if (nr_attributes == 0)
+ nr_attributes = 1;
+
+ prog_data->urb_read_length = (nr_attributes + 1) / 2;
+
+ return payload_reg + nr_attributes;
+}
+
+int
+vec4_visitor::setup_uniforms(int reg)
+{
+ /* User clip planes from curbe:
+ */
+ if (c->key.nr_userclip) {
+ if (intel->gen >= 6) {
+ for (int i = 0; i < c->key.nr_userclip; i++) {
+ c->userplane[i] = stride(brw_vec4_grf(reg + i / 2,
+ (i % 2) * 4), 0, 4, 1);
+ }
+ reg += ALIGN(c->key.nr_userclip, 2) / 2;
+ } else {
+ for (int i = 0; i < c->key.nr_userclip; i++) {
+ c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2,
+ (i % 2) * 4), 0, 4, 1);
+ }
+ reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2;
+ }
+ }
+
+ /* The pre-gen6 VS requires that some push constants get loaded no
+ * matter what, or the GPU would hang.
+ */
+ if (intel->gen < 6 && this->uniforms == 0) {
+ this->uniform_size[this->uniforms] = 1;
+
+ for (unsigned int i = 0; i < 4; i++) {
+ unsigned int slot = this->uniforms * 4 + i;
+
+ c->prog_data.param[slot] = NULL;
+ c->prog_data.param_convert[slot] = PARAM_CONVERT_ZERO;
+ }
+
+ this->uniforms++;
+ reg++;
+ } else {
+ reg += ALIGN(uniforms, 2) / 2;
+ }
+
+ /* for now, we are not doing any elimination of unused slots, nor
+ * are we packing our uniforms.
+ */
+ c->prog_data.nr_params = this->uniforms * 4;
+
+ c->prog_data.curb_read_length = reg - 1;
+ c->prog_data.uses_new_param_layout = true;
+
+ return reg;
+}
+
+void
+vec4_visitor::setup_payload(void)
+{
+ int reg = 0;
+
+ /* The payload always contains important data in g0, which contains
+ * the URB handles that are passed on to the URB write at the end
+ * of the thread. So, we always start push constants at g1.
+ */
+ reg++;
+
+ reg = setup_uniforms(reg);
+
+ reg = setup_attributes(reg);
+
+ this->first_non_payload_grf = reg;
+}
+
+struct brw_reg
+vec4_instruction::get_dst(void)
+{
+ struct brw_reg brw_reg;
+
+ switch (dst.file) {
+ case GRF:
+ brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0);
+ brw_reg = retype(brw_reg, dst.type);
+ brw_reg.dw1.bits.writemask = dst.writemask;
+ break;
+
+ case HW_REG:
+ brw_reg = dst.fixed_hw_reg;
+ break;
+
+ case BAD_FILE:
+ brw_reg = brw_null_reg();
+ break;
+
+ default:
+ assert(!"not reached");
+ brw_reg = brw_null_reg();
+ break;
+ }
+ return brw_reg;
+}
+
+struct brw_reg
+vec4_instruction::get_src(int i)
+{
+ struct brw_reg brw_reg;
+
+ switch (src[i].file) {
+ case GRF:
+ brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0);
+ brw_reg = retype(brw_reg, src[i].type);
+ brw_reg.dw1.bits.swizzle = src[i].swizzle;
+ if (src[i].abs)
+ brw_reg = brw_abs(brw_reg);
+ if (src[i].negate)
+ brw_reg = negate(brw_reg);
+ break;
+
+ case IMM:
+ switch (src[i].type) {
+ case BRW_REGISTER_TYPE_F:
+ brw_reg = brw_imm_f(src[i].imm.f);
+ break;
+ case BRW_REGISTER_TYPE_D:
+ brw_reg = brw_imm_d(src[i].imm.i);
+ break;
+ case BRW_REGISTER_TYPE_UD:
+ brw_reg = brw_imm_ud(src[i].imm.u);
+ break;
+ default:
+ assert(!"not reached");
+ brw_reg = brw_null_reg();
+ break;
+ }
+ break;
+
+ case UNIFORM:
+ brw_reg = stride(brw_vec4_grf(1 + (src[i].reg + src[i].reg_offset) / 2,
+ ((src[i].reg + src[i].reg_offset) % 2) * 4),
+ 0, 4, 1);
+ brw_reg = retype(brw_reg, src[i].type);
+ brw_reg.dw1.bits.swizzle = src[i].swizzle;
+ if (src[i].abs)
+ brw_reg = brw_abs(brw_reg);
+ if (src[i].negate)
+ brw_reg = negate(brw_reg);
+ break;
+
+ case HW_REG:
+ brw_reg = src[i].fixed_hw_reg;
+ break;
+
+ case BAD_FILE:
+ /* Probably unused. */
+ brw_reg = brw_null_reg();
+ break;
+ case ATTR:
+ default:
+ assert(!"not reached");
+ brw_reg = brw_null_reg();
+ break;
+ }
+
+ return brw_reg;
+}
+
+void
+vec4_visitor::generate_math1_gen4(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ brw_math(p,
+ dst,
+ brw_math_function(inst->opcode),
+ BRW_MATH_SATURATE_NONE,
+ inst->base_mrf,
+ src,
+ BRW_MATH_DATA_SCALAR,
+ BRW_MATH_PRECISION_FULL);
+}
+
+static void
+check_gen6_math_src_arg(struct brw_reg src)
+{
+ /* Source swizzles are ignored. */
+ assert(!src.abs);
+ assert(!src.negate);
+ assert(src.dw1.bits.swizzle = BRW_SWIZZLE_XYZW);
+}
+
+void
+vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ /* Can't do writemask because math can't be align16. */
+ assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
+ check_gen6_math_src_arg(src);
+
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_math(p,
+ dst,
+ brw_math_function(inst->opcode),
+ BRW_MATH_SATURATE_NONE,
+ inst->base_mrf,
+ src,
+ BRW_MATH_DATA_SCALAR,
+ BRW_MATH_PRECISION_FULL);
+ brw_set_access_mode(p, BRW_ALIGN_16);
+}
+
+void
+vec4_visitor::generate_math2_gen6(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ /* Can't do writemask because math can't be align16. */
+ assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
+ /* Source swizzles are ignored. */
+ check_gen6_math_src_arg(src0);
+ check_gen6_math_src_arg(src1);
+
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_math2(p,
+ dst,
+ brw_math_function(inst->opcode),
+ src0, src1);
+ brw_set_access_mode(p, BRW_ALIGN_16);
+}
+
+void
+vec4_visitor::generate_math2_gen4(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ /* Can't do writemask because math can't be align16. */
+ assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
+
+ brw_MOV(p, brw_message_reg(inst->base_mrf + 1), src1);
+
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_math(p,
+ dst,
+ brw_math_function(inst->opcode),
+ BRW_MATH_SATURATE_NONE,
+ inst->base_mrf,
+ src0,
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+ brw_set_access_mode(p, BRW_ALIGN_16);
+}
+
+void
+vec4_visitor::generate_urb_write(vec4_instruction *inst)
+{
+ brw_urb_WRITE(p,
+ brw_null_reg(), /* dest */
+ inst->base_mrf, /* starting mrf reg nr */
+ brw_vec8_grf(0, 0), /* src */
+ false, /* allocate */
+ true, /* used */
+ inst->mlen,
+ 0, /* response len */
+ inst->eot, /* eot */
+ inst->eot, /* writes complete */
+ inst->offset, /* urb destination offset */
+ BRW_URB_SWIZZLE_INTERLEAVE);
+}
+
+void
+vec4_visitor::generate_oword_dual_block_offsets(struct brw_reg m1,
+ struct brw_reg index)
+{
+ int second_vertex_offset;
+
+ if (intel->gen >= 6)
+ second_vertex_offset = 1;
+ else
+ second_vertex_offset = 16;
+
+ m1 = retype(m1, BRW_REGISTER_TYPE_D);
+
+ /* Set up M1 (message payload). Only the block offsets in M1.0 and
+ * M1.4 are used, and the rest are ignored.
+ */
+ struct brw_reg m1_0 = suboffset(vec1(m1), 0);
+ struct brw_reg m1_4 = suboffset(vec1(m1), 4);
+ struct brw_reg index_0 = suboffset(vec1(index), 0);
+ struct brw_reg index_4 = suboffset(vec1(index), 4);
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_access_mode(p, BRW_ALIGN_1);
+
+ brw_MOV(p, m1_0, index_0);
+
+ brw_set_predicate_inverse(p, true);
+ if (index.file == BRW_IMMEDIATE_VALUE) {
+ index_4.dw1.ud++;
+ brw_MOV(p, m1_4, index_4);
+ } else {
+ brw_ADD(p, m1_4, index_4, brw_imm_d(second_vertex_offset));
+ }
+
+ brw_pop_insn_state(p);
+}
+
+void
+vec4_visitor::generate_scratch_read(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg index)
+{
+ if (intel->gen >= 6) {
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV(p,
+ retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D),
+ retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D));
+ brw_pop_insn_state(p);
+ }
+
+ generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
+ index);
+
+ uint32_t msg_type;
+
+ if (intel->gen >= 6)
+ msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+ else if (intel->gen == 5 || intel->is_g4x)
+ msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+ else
+ msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+
+ /* Each of the 8 channel enables is considered for whether each
+ * dword is written.
+ */
+ struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+ brw_set_dest(p, send, dst);
+ brw_set_src0(p, send, brw_message_reg(inst->base_mrf));
+ brw_set_dp_read_message(p, send,
+ 255, /* binding table index: stateless access */
+ BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
+ msg_type,
+ BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
+ 2, /* mlen */
+ 1 /* rlen */);
+}
+
+void
+vec4_visitor::generate_scratch_write(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg index)
+{
+ /* If the instruction is predicated, we'll predicate the send, not
+ * the header setup.
+ */
+ brw_set_predicate_control(p, false);
+
+ if (intel->gen >= 6) {
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV(p,
+ retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D),
+ retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D));
+ brw_pop_insn_state(p);
+ }
+
+ generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
+ index);
+
+ brw_MOV(p,
+ retype(brw_message_reg(inst->base_mrf + 2), BRW_REGISTER_TYPE_D),
+ retype(src, BRW_REGISTER_TYPE_D));
+
+ uint32_t msg_type;
+
+ if (intel->gen >= 6)
+ msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
+ else
+ msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
+
+ brw_set_predicate_control(p, inst->predicate);
+
+ /* Each of the 8 channel enables is considered for whether each
+ * dword is written.
+ */
+ struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+ brw_set_dest(p, send, dst);
+ brw_set_src0(p, send, brw_message_reg(inst->base_mrf));
+ brw_set_dp_write_message(p, send,
+ 255, /* binding table index: stateless access */
+ BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
+ msg_type,
+ 3, /* mlen */
+ true, /* header present */
+ false, /* pixel scoreboard */
+ 0, /* rlen */
+ false, /* eot */
+ false /* commit */);
+}
+
+void
+vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
+ struct brw_reg dst,
+ struct brw_reg *src)
+{
+ vec4_instruction *inst = (vec4_instruction *)instruction;
+
+ switch (inst->opcode) {
+ case SHADER_OPCODE_RCP:
+ case SHADER_OPCODE_RSQ:
+ case SHADER_OPCODE_SQRT:
+ case SHADER_OPCODE_EXP2:
+ case SHADER_OPCODE_LOG2:
+ case SHADER_OPCODE_SIN:
+ case SHADER_OPCODE_COS:
+ if (intel->gen >= 6) {
+ generate_math1_gen6(inst, dst, src[0]);
+ } else {
+ generate_math1_gen4(inst, dst, src[0]);
+ }
+ break;
+
+ case SHADER_OPCODE_POW:
+ if (intel->gen >= 6) {
+ generate_math2_gen6(inst, dst, src[0], src[1]);
+ } else {
+ generate_math2_gen4(inst, dst, src[0], src[1]);
+ }
+ break;
+
+ case VS_OPCODE_URB_WRITE:
+ generate_urb_write(inst);
+ break;
+
+ case VS_OPCODE_SCRATCH_READ:
+ generate_scratch_read(inst, dst, src[0]);
+ break;
+
+ case VS_OPCODE_SCRATCH_WRITE:
+ generate_scratch_write(inst, dst, src[0], src[1]);
+ break;
+
+ default:
+ if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
+ fail("unsupported opcode in `%s' in VS\n",
+ brw_opcodes[inst->opcode].name);
+ } else {
+ fail("Unsupported opcode %d in VS", inst->opcode);
+ }
+ }
+}
+
+bool
+vec4_visitor::run()
+{
+ /* Generate VS IR for main(). (the visitor only descends into
+ * functions called "main").
+ */
+ visit_instructions(shader->ir);
+
+ emit_urb_writes();
+
+ /* Before any optimization, push array accesses out to scratch
+ * space where we need them to be. This pass may allocate new
+ * virtual GRFs, so we want to do it early. It also makes sure
+ * that we have reladdr computations available for CSE, since we'll
+ * often do repeated subexpressions for those.
+ */
+ move_grf_array_access_to_scratch();
+
+ bool progress;
+ do {
+ progress = false;
+ progress = dead_code_eliminate() || progress;
+ } while (progress);
+
+ if (failed)
+ return false;
+
+ setup_payload();
+ reg_allocate();
+
+ if (failed)
+ return false;
+
+ brw_set_access_mode(p, BRW_ALIGN_16);
+
+ generate_code();
+
+ return !failed;
+}
+
+void
+vec4_visitor::generate_code()
+{
+ int last_native_inst = p->nr_insn;
+ const char *last_annotation_string = NULL;
+ ir_instruction *last_annotation_ir = NULL;
+
+ int loop_stack_array_size = 16;
+ int loop_stack_depth = 0;
+ brw_instruction **loop_stack =
+ rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size);
+ int *if_depth_in_loop =
+ rzalloc_array(this->mem_ctx, int, loop_stack_array_size);
+
+
+ if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+ printf("Native code for vertex shader %d:\n", prog->Name);
+ }
+
+ foreach_list(node, &this->instructions) {
+ vec4_instruction *inst = (vec4_instruction *)node;
+ struct brw_reg src[3], dst;
+
+ if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+ if (last_annotation_ir != inst->ir) {
+ last_annotation_ir = inst->ir;
+ if (last_annotation_ir) {
+ printf(" ");
+ last_annotation_ir->print();
+ printf("\n");
+ }
+ }
+ if (last_annotation_string != inst->annotation) {
+ last_annotation_string = inst->annotation;
+ if (last_annotation_string)
+ printf(" %s\n", last_annotation_string);
+ }
+ }
+
+ for (unsigned int i = 0; i < 3; i++) {
+ src[i] = inst->get_src(i);
+ }
+ dst = inst->get_dst();
+
+ brw_set_conditionalmod(p, inst->conditional_mod);
+ brw_set_predicate_control(p, inst->predicate);
+ brw_set_predicate_inverse(p, inst->predicate_inverse);
+ brw_set_saturate(p, inst->saturate);
+
+ switch (inst->opcode) {
+ case BRW_OPCODE_MOV:
+ brw_MOV(p, dst, src[0]);
+ break;
+ case BRW_OPCODE_ADD:
+ brw_ADD(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_MUL:
+ brw_MUL(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_MACH:
+ brw_set_acc_write_control(p, 1);
+ brw_MACH(p, dst, src[0], src[1]);
+ brw_set_acc_write_control(p, 0);
+ break;
+
+ case BRW_OPCODE_FRC:
+ brw_FRC(p, dst, src[0]);
+ break;
+ case BRW_OPCODE_RNDD:
+ brw_RNDD(p, dst, src[0]);
+ break;
+ case BRW_OPCODE_RNDE:
+ brw_RNDE(p, dst, src[0]);
+ break;
+ case BRW_OPCODE_RNDZ:
+ brw_RNDZ(p, dst, src[0]);
+ break;
+
+ case BRW_OPCODE_AND:
+ brw_AND(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_OR:
+ brw_OR(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_XOR:
+ brw_XOR(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_NOT:
+ brw_NOT(p, dst, src[0]);
+ break;
+ case BRW_OPCODE_ASR:
+ brw_ASR(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_SHR:
+ brw_SHR(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_SHL:
+ brw_SHL(p, dst, src[0], src[1]);
+ break;
+
+ case BRW_OPCODE_CMP:
+ brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
+ break;
+ case BRW_OPCODE_SEL:
+ brw_SEL(p, dst, src[0], src[1]);
+ break;
+
+ case BRW_OPCODE_DP4:
+ brw_DP4(p, dst, src[0], src[1]);
+ break;
+
+ case BRW_OPCODE_DP3:
+ brw_DP3(p, dst, src[0], src[1]);
+ break;
+
+ case BRW_OPCODE_DP2:
+ brw_DP2(p, dst, src[0], src[1]);
+ break;
+
+ case BRW_OPCODE_IF:
+ if (inst->src[0].file != BAD_FILE) {
+ /* The instruction has an embedded compare (only allowed on gen6) */
+ assert(intel->gen == 6);
+ gen6_IF(p, inst->conditional_mod, src[0], src[1]);
+ } else {
+ struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8);
+ brw_inst->header.predicate_control = inst->predicate;
+ }
+ if_depth_in_loop[loop_stack_depth]++;
+ break;
+
+ case BRW_OPCODE_ELSE:
+ brw_ELSE(p);
+ break;
+ case BRW_OPCODE_ENDIF:
+ brw_ENDIF(p);
+ if_depth_in_loop[loop_stack_depth]--;
+ break;
+
+ case BRW_OPCODE_DO:
+ loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
+ if (loop_stack_array_size <= loop_stack_depth) {
+ loop_stack_array_size *= 2;
+ loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *,
+ loop_stack_array_size);
+ if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int,
+ loop_stack_array_size);
+ }
+ if_depth_in_loop[loop_stack_depth] = 0;
+ break;
+
+ case BRW_OPCODE_BREAK:
+ brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case BRW_OPCODE_CONTINUE:
+ /* FINISHME: We need to write the loop instruction support still. */
+ if (intel->gen >= 6)
+ gen6_CONT(p, loop_stack[loop_stack_depth - 1]);
+ else
+ brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+
+ case BRW_OPCODE_WHILE: {
+ struct brw_instruction *inst0, *inst1;
+ GLuint br = 1;
+
+ if (intel->gen >= 5)
+ br = 2;
+
+ assert(loop_stack_depth > 0);
+ loop_stack_depth--;
+ inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
+ if (intel->gen < 6) {
+ /* patch all the BREAK/CONT instructions from last BGNLOOP */
+ while (inst0 > loop_stack[loop_stack_depth]) {
+ inst0--;
+ if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+ inst0->bits3.if_else.jump_count == 0) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+ }
+ else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+ inst0->bits3.if_else.jump_count == 0) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+ }
+ }
+ }
+ }
+ break;
+
+ default:
+ generate_vs_instruction(inst, dst, src);
+ break;
+ }
+
+ if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+ for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
+ if (0) {
+ printf("0x%08x 0x%08x 0x%08x 0x%08x ",
+ ((uint32_t *)&p->store[i])[3],
+ ((uint32_t *)&p->store[i])[2],
+ ((uint32_t *)&p->store[i])[1],
+ ((uint32_t *)&p->store[i])[0]);
+ }
+ brw_disasm(stdout, &p->store[i], intel->gen);
+ }
+ }
+
+ last_native_inst = p->nr_insn;
+ }
+
+ if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+ printf("\n");
+ }
+
+ ralloc_free(loop_stack);
+ ralloc_free(if_depth_in_loop);
+
+ brw_set_uip_jip(p);
+
+ /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS
+ * emit issues, it doesn't get the jump distances into the output,
+ * which is often something we want to debug. So this is here in
+ * case you're doing that.
+ */
+ if (0) {
+ if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+ for (unsigned int i = 0; i < p->nr_insn; i++) {
+ printf("0x%08x 0x%08x 0x%08x 0x%08x ",
+ ((uint32_t *)&p->store[i])[3],
+ ((uint32_t *)&p->store[i])[2],
+ ((uint32_t *)&p->store[i])[1],
+ ((uint32_t *)&p->store[i])[0]);
+ brw_disasm(stdout, &p->store[i], intel->gen);
+ }
+ }
+ }
+}
+
+extern "C" {
+
+bool
+brw_vs_emit(struct gl_shader_program *prog, struct brw_vs_compile *c)
+{
+ if (!prog)
+ return false;
+
+ struct brw_shader *shader =
+ (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
+ if (!shader)
+ return false;
+
+ if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+ printf("GLSL IR for native vertex shader %d:\n", prog->Name);
+ _mesa_print_ir(shader->ir, NULL);
+ printf("\n\n");
+ }
+
+ vec4_visitor v(c, prog, shader);
+ if (!v.run()) {
+ prog->LinkStatus = GL_FALSE;
+ ralloc_strcat(&prog->InfoLog, v.fail_msg);
+ return false;
+ }
+
+ return true;
+}
+
+} /* extern "C" */
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
new file mode 100644
index 00000000000..3f052ff64cf
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -0,0 +1,234 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+extern "C" {
+#include "main/macros.h"
+#include "program/register_allocate.h"
+} /* extern "C" */
+
+#include "brw_vec4.h"
+#include "../glsl/ir_print_visitor.h"
+
+using namespace brw;
+
+namespace brw {
+
+static void
+assign(int *reg_hw_locations, reg *reg)
+{
+ if (reg->file == GRF) {
+ reg->reg = reg_hw_locations[reg->reg];
+ }
+}
+
+void
+vec4_visitor::reg_allocate_trivial()
+{
+ int hw_reg_mapping[this->virtual_grf_count];
+ bool virtual_grf_used[this->virtual_grf_count];
+ int i;
+ int next;
+
+ /* Calculate which virtual GRFs are actually in use after whatever
+ * optimization passes have occurred.
+ */
+ for (int i = 0; i < this->virtual_grf_count; i++) {
+ virtual_grf_used[i] = false;
+ }
+
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ vec4_instruction *inst = (vec4_instruction *)iter.get();
+
+ if (inst->dst.file == GRF)
+ virtual_grf_used[inst->dst.reg] = true;
+
+ for (int i = 0; i < 3; i++) {
+ if (inst->src[i].file == GRF)
+ virtual_grf_used[inst->src[i].reg] = true;
+ }
+ }
+
+ hw_reg_mapping[0] = this->first_non_payload_grf;
+ next = hw_reg_mapping[0] + this->virtual_grf_sizes[0];
+ for (i = 1; i < this->virtual_grf_count; i++) {
+ if (virtual_grf_used[i]) {
+ hw_reg_mapping[i] = next;
+ next += this->virtual_grf_sizes[i];
+ }
+ }
+ prog_data->total_grf = next;
+
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ vec4_instruction *inst = (vec4_instruction *)iter.get();
+
+ assign(hw_reg_mapping, &inst->dst);
+ assign(hw_reg_mapping, &inst->src[0]);
+ assign(hw_reg_mapping, &inst->src[1]);
+ assign(hw_reg_mapping, &inst->src[2]);
+ }
+
+ if (prog_data->total_grf > BRW_MAX_GRF) {
+ fail("Ran out of regs on trivial allocator (%d/%d)\n",
+ prog_data->total_grf, BRW_MAX_GRF);
+ }
+}
+
+static void
+brw_alloc_reg_set_for_classes(struct brw_context *brw,
+ int *class_sizes,
+ int class_count,
+ int base_reg_count)
+{
+ /* Compute the total number of registers across all classes. */
+ int ra_reg_count = 0;
+ for (int i = 0; i < class_count; i++) {
+ ra_reg_count += base_reg_count - (class_sizes[i] - 1);
+ }
+
+ ralloc_free(brw->vs.ra_reg_to_grf);
+ brw->vs.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count);
+ ralloc_free(brw->vs.regs);
+ brw->vs.regs = ra_alloc_reg_set(ra_reg_count);
+ ralloc_free(brw->vs.classes);
+ brw->vs.classes = ralloc_array(brw, int, class_count + 1);
+
+ /* Now, add the registers to their classes, and add the conflicts
+ * between them and the base GRF registers (and also each other).
+ */
+ int reg = 0;
+ for (int i = 0; i < class_count; i++) {
+ int class_reg_count = base_reg_count - (class_sizes[i] - 1);
+ brw->vs.classes[i] = ra_alloc_reg_class(brw->vs.regs);
+
+ for (int j = 0; j < class_reg_count; j++) {
+ ra_class_add_reg(brw->vs.regs, brw->vs.classes[i], reg);
+
+ brw->vs.ra_reg_to_grf[reg] = j;
+
+ for (int base_reg = j;
+ base_reg < j + class_sizes[i];
+ base_reg++) {
+ ra_add_transitive_reg_conflict(brw->vs.regs, base_reg, reg);
+ }
+
+ reg++;
+ }
+ }
+ assert(reg == ra_reg_count);
+
+ ra_set_finalize(brw->vs.regs);
+}
+
+void
+vec4_visitor::reg_allocate()
+{
+ int hw_reg_mapping[virtual_grf_count];
+ int first_assigned_grf = this->first_non_payload_grf;
+ int base_reg_count = BRW_MAX_GRF - first_assigned_grf;
+ int class_sizes[base_reg_count];
+ int class_count = 0;
+
+ /* Using the trivial allocator can be useful in debugging undefined
+ * register access as a result of broken optimization passes.
+ */
+ if (0) {
+ reg_allocate_trivial();
+ return;
+ }
+
+ calculate_live_intervals();
+
+ /* Set up the register classes.
+ *
+ * The base registers store a vec4. However, we'll need larger
+ * storage for arrays, structures, and matrices, which will be sets
+ * of contiguous registers.
+ */
+ class_sizes[class_count++] = 1;
+
+ for (int r = 0; r < virtual_grf_count; r++) {
+ int i;
+
+ for (i = 0; i < class_count; i++) {
+ if (class_sizes[i] == this->virtual_grf_sizes[r])
+ break;
+ }
+ if (i == class_count) {
+ if (this->virtual_grf_sizes[r] >= base_reg_count) {
+ fail("Object too large to register allocate.\n");
+ }
+
+ class_sizes[class_count++] = this->virtual_grf_sizes[r];
+ }
+ }
+
+ brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, base_reg_count);
+
+ struct ra_graph *g = ra_alloc_interference_graph(brw->vs.regs,
+ virtual_grf_count);
+
+ for (int i = 0; i < virtual_grf_count; i++) {
+ for (int c = 0; c < class_count; c++) {
+ if (class_sizes[c] == this->virtual_grf_sizes[i]) {
+ ra_set_node_class(g, i, brw->vs.classes[c]);
+ break;
+ }
+ }
+
+ for (int j = 0; j < i; j++) {
+ if (virtual_grf_interferes(i, j)) {
+ ra_add_node_interference(g, i, j);
+ }
+ }
+ }
+
+ if (!ra_allocate_no_spills(g)) {
+ ralloc_free(g);
+ fail("No register spilling support yet\n");
+ }
+
+ /* Get the chosen virtual registers for each node, and map virtual
+ * regs in the register classes back down to real hardware reg
+ * numbers.
+ */
+ prog_data->total_grf = first_assigned_grf;
+ for (int i = 0; i < virtual_grf_count; i++) {
+ int reg = ra_get_node_reg(g, i);
+
+ hw_reg_mapping[i] = first_assigned_grf + brw->vs.ra_reg_to_grf[reg];
+ prog_data->total_grf = MAX2(prog_data->total_grf, hw_reg_mapping[i] + 1);
+ }
+
+ foreach_list(node, &this->instructions) {
+ vec4_instruction *inst = (vec4_instruction *)node;
+
+ assign(hw_reg_mapping, &inst->dst);
+ assign(hw_reg_mapping, &inst->src[0]);
+ assign(hw_reg_mapping, &inst->src[1]);
+ assign(hw_reg_mapping, &inst->src[2]);
+ }
+
+ ralloc_free(g);
+}
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
new file mode 100644
index 00000000000..b3a07bd0539
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -0,0 +1,2156 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_vec4.h"
+extern "C" {
+#include "main/macros.h"
+#include "program/prog_parameter.h"
+}
+
+namespace brw {
+
+src_reg::src_reg(dst_reg reg)
+{
+ init();
+
+ this->file = reg.file;
+ this->reg = reg.reg;
+ this->reg_offset = reg.reg_offset;
+ this->type = reg.type;
+ this->reladdr = reg.reladdr;
+ this->fixed_hw_reg = reg.fixed_hw_reg;
+
+ int swizzles[4];
+ int next_chan = 0;
+ int last = 0;
+
+ for (int i = 0; i < 4; i++) {
+ if (!(reg.writemask & (1 << i)))
+ continue;
+
+ swizzles[next_chan++] = last = i;
+ }
+
+ for (; next_chan < 4; next_chan++) {
+ swizzles[next_chan] = last;
+ }
+
+ this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
+ swizzles[2], swizzles[3]);
+}
+
+dst_reg::dst_reg(src_reg reg)
+{
+ init();
+
+ this->file = reg.file;
+ this->reg = reg.reg;
+ this->reg_offset = reg.reg_offset;
+ this->type = reg.type;
+ this->writemask = WRITEMASK_XYZW;
+ this->reladdr = reg.reladdr;
+ this->fixed_hw_reg = reg.fixed_hw_reg;
+}
+
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode, dst_reg dst,
+ src_reg src0, src_reg src1, src_reg src2)
+{
+ vec4_instruction *inst = new(mem_ctx) vec4_instruction();
+
+ inst->opcode = opcode;
+ inst->dst = dst;
+ inst->src[0] = src0;
+ inst->src[1] = src1;
+ inst->src[2] = src2;
+ inst->ir = this->base_ir;
+ inst->annotation = this->current_annotation;
+
+ this->instructions.push_tail(inst);
+
+ return inst;
+}
+
+
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
+{
+ return emit(opcode, dst, src0, src1, src_reg());
+}
+
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
+{
+ assert(dst.writemask != 0);
+ return emit(opcode, dst, src0, src_reg(), src_reg());
+}
+
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode)
+{
+ return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg());
+}
+
+void
+vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
+{
+ static enum opcode dot_opcodes[] = {
+ BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
+ };
+
+ emit(dot_opcodes[elements - 2], dst, src0, src1);
+}
+
+void
+vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
+{
+ /* The gen6 math instruction ignores the source modifiers --
+ * swizzle, abs, negate, and at least some parts of the register
+ * region description.
+ */
+ src_reg temp_src = src_reg(this, glsl_type::vec4_type);
+ emit(BRW_OPCODE_MOV, dst_reg(temp_src), src);
+
+ if (dst.writemask != WRITEMASK_XYZW) {
+ /* The gen6 math instruction must be align1, so we can't do
+ * writemasks.
+ */
+ dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
+
+ emit(opcode, temp_dst, temp_src);
+
+ emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
+ } else {
+ emit(opcode, dst, temp_src);
+ }
+}
+
+void
+vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
+{
+ vec4_instruction *inst = emit(opcode, dst, src);
+ inst->base_mrf = 1;
+ inst->mlen = 1;
+}
+
+void
+vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
+{
+ switch (opcode) {
+ case SHADER_OPCODE_RCP:
+ case SHADER_OPCODE_RSQ:
+ case SHADER_OPCODE_SQRT:
+ case SHADER_OPCODE_EXP2:
+ case SHADER_OPCODE_LOG2:
+ case SHADER_OPCODE_SIN:
+ case SHADER_OPCODE_COS:
+ break;
+ default:
+ assert(!"not reached: bad math opcode");
+ return;
+ }
+
+ if (intel->gen >= 6) {
+ return emit_math1_gen6(opcode, dst, src);
+ } else {
+ return emit_math1_gen4(opcode, dst, src);
+ }
+}
+
+void
+vec4_visitor::emit_math2_gen6(enum opcode opcode,
+ dst_reg dst, src_reg src0, src_reg src1)
+{
+ src_reg expanded;
+
+ /* The gen6 math instruction ignores the source modifiers --
+ * swizzle, abs, negate, and at least some parts of the register
+ * region description. Move the sources to temporaries to make it
+ * generally work.
+ */
+
+ expanded = src_reg(this, glsl_type::vec4_type);
+ emit(BRW_OPCODE_MOV, dst_reg(expanded), src0);
+ src0 = expanded;
+
+ expanded = src_reg(this, glsl_type::vec4_type);
+ emit(BRW_OPCODE_MOV, dst_reg(expanded), src1);
+ src1 = expanded;
+
+ if (dst.writemask != WRITEMASK_XYZW) {
+ /* The gen6 math instruction must be align1, so we can't do
+ * writemasks.
+ */
+ dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
+
+ emit(opcode, temp_dst, src0, src1);
+
+ emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
+ } else {
+ emit(opcode, dst, src0, src1);
+ }
+}
+
+void
+vec4_visitor::emit_math2_gen4(enum opcode opcode,
+ dst_reg dst, src_reg src0, src_reg src1)
+{
+ vec4_instruction *inst = emit(opcode, dst, src0, src1);
+ inst->base_mrf = 1;
+ inst->mlen = 2;
+}
+
+void
+vec4_visitor::emit_math(enum opcode opcode,
+ dst_reg dst, src_reg src0, src_reg src1)
+{
+ assert(opcode == SHADER_OPCODE_POW);
+
+ if (intel->gen >= 6) {
+ return emit_math2_gen6(opcode, dst, src0, src1);
+ } else {
+ return emit_math2_gen4(opcode, dst, src0, src1);
+ }
+}
+
+void
+vec4_visitor::visit_instructions(const exec_list *list)
+{
+ foreach_list(node, list) {
+ ir_instruction *ir = (ir_instruction *)node;
+
+ base_ir = ir;
+ ir->accept(this);
+ }
+}
+
+
+static int
+type_size(const struct glsl_type *type)
+{
+ unsigned int i;
+ int size;
+
+ switch (type->base_type) {
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_BOOL:
+ if (type->is_matrix()) {
+ return type->matrix_columns;
+ } else {
+ /* Regardless of size of vector, it gets a vec4. This is bad
+ * packing for things like floats, but otherwise arrays become a
+ * mess. Hopefully a later pass over the code can pack scalars
+ * down if appropriate.
+ */
+ return 1;
+ }
+ case GLSL_TYPE_ARRAY:
+ assert(type->length > 0);
+ return type_size(type->fields.array) * type->length;
+ case GLSL_TYPE_STRUCT:
+ size = 0;
+ for (i = 0; i < type->length; i++) {
+ size += type_size(type->fields.structure[i].type);
+ }
+ return size;
+ case GLSL_TYPE_SAMPLER:
+ /* Samplers take up one slot in UNIFORMS[], but they're baked in
+ * at link time.
+ */
+ return 1;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+int
+vec4_visitor::virtual_grf_alloc(int size)
+{
+ if (virtual_grf_array_size <= virtual_grf_count) {
+ if (virtual_grf_array_size == 0)
+ virtual_grf_array_size = 16;
+ else
+ virtual_grf_array_size *= 2;
+ virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
+ virtual_grf_array_size);
+ }
+ virtual_grf_sizes[virtual_grf_count] = size;
+ return virtual_grf_count++;
+}
+
+src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
+{
+ init();
+
+ this->file = GRF;
+ this->reg = v->virtual_grf_alloc(type_size(type));
+
+ if (type->is_array() || type->is_record()) {
+ this->swizzle = BRW_SWIZZLE_NOOP;
+ } else {
+ this->swizzle = swizzle_for_size(type->vector_elements);
+ }
+
+ this->type = brw_type_for_base_type(type);
+}
+
+dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
+{
+ init();
+
+ this->file = GRF;
+ this->reg = v->virtual_grf_alloc(type_size(type));
+
+ if (type->is_array() || type->is_record()) {
+ this->writemask = WRITEMASK_XYZW;
+ } else {
+ this->writemask = (1 << type->vector_elements) - 1;
+ }
+
+ this->type = brw_type_for_base_type(type);
+}
+
+/* Our support for uniforms is piggy-backed on the struct
+ * gl_fragment_program, because that's where the values actually
+ * get stored, rather than in some global gl_shader_program uniform
+ * store.
+ */
+int
+vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
+{
+ unsigned int offset = 0;
+ float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
+
+ if (type->is_matrix()) {
+ const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
+ type->vector_elements,
+ 1);
+
+ for (unsigned int i = 0; i < type->matrix_columns; i++) {
+ offset += setup_uniform_values(loc + offset, column);
+ }
+
+ return offset;
+ }
+
+ switch (type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_BOOL:
+ for (unsigned int i = 0; i < type->vector_elements; i++) {
+ int slot = this->uniforms * 4 + i;
+ switch (type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
+ break;
+ case GLSL_TYPE_UINT:
+ c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U;
+ break;
+ case GLSL_TYPE_INT:
+ c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I;
+ break;
+ case GLSL_TYPE_BOOL:
+ c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B;
+ break;
+ default:
+ assert(!"not reached");
+ c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
+ break;
+ }
+ c->prog_data.param[slot] = &values[i];
+ }
+
+ for (unsigned int i = type->vector_elements; i < 4; i++) {
+ c->prog_data.param_convert[this->uniforms * 4 + i] =
+ PARAM_CONVERT_ZERO;
+ c->prog_data.param[this->uniforms * 4 + i] = NULL;
+ }
+
+ this->uniform_size[this->uniforms] = type->vector_elements;
+ this->uniforms++;
+
+ return 1;
+
+ case GLSL_TYPE_STRUCT:
+ for (unsigned int i = 0; i < type->length; i++) {
+ offset += setup_uniform_values(loc + offset,
+ type->fields.structure[i].type);
+ }
+ return offset;
+
+ case GLSL_TYPE_ARRAY:
+ for (unsigned int i = 0; i < type->length; i++) {
+ offset += setup_uniform_values(loc + offset, type->fields.array);
+ }
+ return offset;
+
+ case GLSL_TYPE_SAMPLER:
+ /* The sampler takes up a slot, but we don't use any values from it. */
+ return 1;
+
+ default:
+ assert(!"not reached");
+ return 0;
+ }
+}
+
+/* Our support for builtin uniforms is even scarier than non-builtin.
+ * It sits on top of the PROG_STATE_VAR parameters that are
+ * automatically updated from GL context state.
+ */
+void
+vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
+{
+ const ir_state_slot *const slots = ir->state_slots;
+ assert(ir->state_slots != NULL);
+
+ for (unsigned int i = 0; i < ir->num_state_slots; i++) {
+ /* This state reference has already been setup by ir_to_mesa,
+ * but we'll get the same index back here. We can reference
+ * ParameterValues directly, since unlike brw_fs.cpp, we never
+ * add new state references during compile.
+ */
+ int index = _mesa_add_state_reference(this->vp->Base.Parameters,
+ (gl_state_index *)slots[i].tokens);
+ float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
+
+ this->uniform_size[this->uniforms] = 0;
+ /* Add each of the unique swizzled channels of the element.
+ * This will end up matching the size of the glsl_type of this field.
+ */
+ int last_swiz = -1;
+ for (unsigned int j = 0; j < 4; j++) {
+ int swiz = GET_SWZ(slots[i].swizzle, j);
+ last_swiz = swiz;
+
+ c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
+ c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT;
+ if (swiz <= last_swiz)
+ this->uniform_size[this->uniforms]++;
+ }
+ this->uniforms++;
+ }
+}
+
+dst_reg *
+vec4_visitor::variable_storage(ir_variable *var)
+{
+ return (dst_reg *)hash_table_find(this->variable_ht, var);
+}
+
+void
+vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
+{
+ ir_expression *expr = ir->as_expression();
+
+ if (expr) {
+ src_reg op[2];
+ vec4_instruction *inst;
+
+ assert(expr->get_num_operands() <= 2);
+ for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
+ assert(expr->operands[i]->type->is_scalar());
+
+ expr->operands[i]->accept(this);
+ op[i] = this->result;
+ }
+
+ switch (expr->operation) {
+ case ir_unop_logic_not:
+ inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1));
+ inst->conditional_mod = BRW_CONDITIONAL_Z;
+ break;
+
+ case ir_binop_logic_xor:
+ inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ break;
+
+ case ir_binop_logic_or:
+ inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ break;
+
+ case ir_binop_logic_and:
+ inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ break;
+
+ case ir_unop_f2b:
+ if (intel->gen >= 6) {
+ inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f));
+ } else {
+ inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]);
+ }
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ break;
+
+ case ir_unop_i2b:
+ if (intel->gen >= 6) {
+ inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
+ } else {
+ inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]);
+ }
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ break;
+
+ case ir_binop_greater:
+ case ir_binop_gequal:
+ case ir_binop_less:
+ case ir_binop_lequal:
+ case ir_binop_equal:
+ case ir_binop_all_equal:
+ case ir_binop_nequal:
+ case ir_binop_any_nequal:
+ inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
+ inst->conditional_mod =
+ brw_conditional_for_comparison(expr->operation);
+ break;
+
+ default:
+ assert(!"not reached");
+ break;
+ }
+ return;
+ }
+
+ ir->accept(this);
+
+ if (intel->gen >= 6) {
+ vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(),
+ this->result, src_reg(1));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ } else {
+ vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ }
+}
+
+/**
+ * Emit a gen6 IF statement with the comparison folded into the IF
+ * instruction.
+ */
+void
+vec4_visitor::emit_if_gen6(ir_if *ir)
+{
+ ir_expression *expr = ir->condition->as_expression();
+
+ if (expr) {
+ src_reg op[2];
+ vec4_instruction *inst;
+ dst_reg temp;
+
+ assert(expr->get_num_operands() <= 2);
+ for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
+ expr->operands[i]->accept(this);
+ op[i] = this->result;
+ }
+
+ switch (expr->operation) {
+ case ir_unop_logic_not:
+ inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_Z;
+ return;
+
+ case ir_binop_logic_xor:
+ inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ return;
+
+ case ir_binop_logic_or:
+ temp = dst_reg(this, glsl_type::bool_type);
+ emit(BRW_OPCODE_OR, temp, op[0], op[1]);
+ inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ return;
+
+ case ir_binop_logic_and:
+ temp = dst_reg(this, glsl_type::bool_type);
+ emit(BRW_OPCODE_AND, temp, op[0], op[1]);
+ inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ return;
+
+ case ir_unop_f2b:
+ inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ return;
+
+ case ir_unop_i2b:
+ inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ return;
+
+ case ir_binop_greater:
+ case ir_binop_gequal:
+ case ir_binop_less:
+ case ir_binop_lequal:
+ case ir_binop_equal:
+ case ir_binop_nequal:
+ inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
+ inst->conditional_mod =
+ brw_conditional_for_comparison(expr->operation);
+ return;
+
+ case ir_binop_all_equal:
+ inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_Z;
+
+ inst = emit(BRW_OPCODE_IF);
+ inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
+ return;
+
+ case ir_binop_any_nequal:
+ inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+ inst = emit(BRW_OPCODE_IF);
+ inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+ return;
+
+ case ir_unop_any:
+ inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+ inst = emit(BRW_OPCODE_IF);
+ inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+ return;
+
+ default:
+ assert(!"not reached");
+ inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ return;
+ }
+ return;
+ }
+
+ ir->condition->accept(this);
+
+ vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(),
+ this->result, src_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+}
+
+void
+vec4_visitor::visit(ir_variable *ir)
+{
+ dst_reg *reg = NULL;
+
+ if (variable_storage(ir))
+ return;
+
+ switch (ir->mode) {
+ case ir_var_in:
+ reg = new(mem_ctx) dst_reg(ATTR, ir->location);
+ break;
+
+ case ir_var_out:
+ reg = new(mem_ctx) dst_reg(this, ir->type);
+
+ for (int i = 0; i < type_size(ir->type); i++) {
+ output_reg[ir->location + i] = *reg;
+ output_reg[ir->location + i].reg_offset = i;
+ output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F;
+ }
+ break;
+
+ case ir_var_auto:
+ case ir_var_temporary:
+ reg = new(mem_ctx) dst_reg(this, ir->type);
+ break;
+
+ case ir_var_uniform:
+ reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
+
+ if (!strncmp(ir->name, "gl_", 3)) {
+ setup_builtin_uniform_values(ir);
+ } else {
+ setup_uniform_values(ir->location, ir->type);
+ }
+ break;
+
+ default:
+ assert(!"not reached");
+ }
+
+ reg->type = brw_type_for_base_type(ir->type);
+ hash_table_insert(this->variable_ht, reg, ir);
+}
+
+void
+vec4_visitor::visit(ir_loop *ir)
+{
+ dst_reg counter;
+
+ /* We don't want debugging output to print the whole body of the
+ * loop as the annotation.
+ */
+ this->base_ir = NULL;
+
+ if (ir->counter != NULL) {
+ this->base_ir = ir->counter;
+ ir->counter->accept(this);
+ counter = *(variable_storage(ir->counter));
+
+ if (ir->from != NULL) {
+ this->base_ir = ir->from;
+ ir->from->accept(this);
+
+ emit(BRW_OPCODE_MOV, counter, this->result);
+ }
+ }
+
+ emit(BRW_OPCODE_DO);
+
+ if (ir->to) {
+ this->base_ir = ir->to;
+ ir->to->accept(this);
+
+ vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst_null_d(),
+ src_reg(counter), this->result);
+ inst->conditional_mod = brw_conditional_for_comparison(ir->cmp);
+
+ inst = emit(BRW_OPCODE_BREAK);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ }
+
+ visit_instructions(&ir->body_instructions);
+
+
+ if (ir->increment) {
+ this->base_ir = ir->increment;
+ ir->increment->accept(this);
+ emit(BRW_OPCODE_ADD, counter, src_reg(counter), this->result);
+ }
+
+ emit(BRW_OPCODE_WHILE);
+}
+
+void
+vec4_visitor::visit(ir_loop_jump *ir)
+{
+ switch (ir->mode) {
+ case ir_loop_jump::jump_break:
+ emit(BRW_OPCODE_BREAK);
+ break;
+ case ir_loop_jump::jump_continue:
+ emit(BRW_OPCODE_CONTINUE);
+ break;
+ }
+}
+
+
+void
+vec4_visitor::visit(ir_function_signature *ir)
+{
+ assert(0);
+ (void)ir;
+}
+
+void
+vec4_visitor::visit(ir_function *ir)
+{
+ /* Ignore function bodies other than main() -- we shouldn't see calls to
+ * them since they should all be inlined.
+ */
+ if (strcmp(ir->name, "main") == 0) {
+ const ir_function_signature *sig;
+ exec_list empty;
+
+ sig = ir->matching_signature(&empty);
+
+ assert(sig);
+
+ visit_instructions(&sig->body);
+ }
+}
+
+GLboolean
+vec4_visitor::try_emit_sat(ir_expression *ir)
+{
+ ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
+ if (!sat_src)
+ return false;
+
+ sat_src->accept(this);
+ src_reg src = this->result;
+
+ this->result = src_reg(this, ir->type);
+ vec4_instruction *inst;
+ inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src);
+ inst->saturate = true;
+
+ return true;
+}
+
+void
+vec4_visitor::emit_bool_comparison(unsigned int op,
+ dst_reg dst, src_reg src0, src_reg src1)
+{
+ /* original gen4 does destination conversion before comparison. */
+ if (intel->gen < 5)
+ dst.type = src0.type;
+
+ vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1);
+ inst->conditional_mod = brw_conditional_for_comparison(op);
+
+ dst.type = BRW_REGISTER_TYPE_D;
+ emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1));
+}
+
+void
+vec4_visitor::visit(ir_expression *ir)
+{
+ unsigned int operand;
+ src_reg op[Elements(ir->operands)];
+ src_reg result_src;
+ dst_reg result_dst;
+ vec4_instruction *inst;
+
+ if (try_emit_sat(ir))
+ return;
+
+ for (operand = 0; operand < ir->get_num_operands(); operand++) {
+ this->result.file = BAD_FILE;
+ ir->operands[operand]->accept(this);
+ if (this->result.file == BAD_FILE) {
+ printf("Failed to get tree for expression operand:\n");
+ ir->operands[operand]->print();
+ exit(1);
+ }
+ op[operand] = this->result;
+
+ /* Matrix expression operands should have been broken down to vector
+ * operations already.
+ */
+ assert(!ir->operands[operand]->type->is_matrix());
+ }
+
+ int vector_elements = ir->operands[0]->type->vector_elements;
+ if (ir->operands[1]) {
+ vector_elements = MAX2(vector_elements,
+ ir->operands[1]->type->vector_elements);
+ }
+
+ this->result.file = BAD_FILE;
+
+ /* Storage for our result. Ideally for an assignment we'd be using
+ * the actual storage for the result here, instead.
+ */
+ result_src = src_reg(this, ir->type);
+ /* convenience for the emit functions below. */
+ result_dst = dst_reg(result_src);
+ /* If nothing special happens, this is the result. */
+ this->result = result_src;
+ /* Limit writes to the channels that will be used by result_src later.
+ * This does limit this temp's use as a temporary for multi-instruction
+ * sequences.
+ */
+ result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+
+ switch (ir->operation) {
+ case ir_unop_logic_not:
+ /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
+ * ones complement of the whole register, not just bit 0.
+ */
+ emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1));
+ break;
+ case ir_unop_neg:
+ op[0].negate = !op[0].negate;
+ this->result = op[0];
+ break;
+ case ir_unop_abs:
+ op[0].abs = true;
+ op[0].negate = false;
+ this->result = op[0];
+ break;
+
+ case ir_unop_sign:
+ emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f));
+
+ inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
+ inst->conditional_mod = BRW_CONDITIONAL_G;
+ inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f));
+ inst->predicate = BRW_PREDICATE_NORMAL;
+
+ inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
+ inst->conditional_mod = BRW_CONDITIONAL_L;
+ inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f));
+ inst->predicate = BRW_PREDICATE_NORMAL;
+
+ break;
+
+ case ir_unop_rcp:
+ emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
+ break;
+
+ case ir_unop_exp2:
+ emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
+ break;
+ case ir_unop_log2:
+ emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
+ break;
+ case ir_unop_exp:
+ case ir_unop_log:
+ assert(!"not reached: should be handled by ir_explog_to_explog2");
+ break;
+ case ir_unop_sin:
+ case ir_unop_sin_reduced:
+ emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
+ break;
+ case ir_unop_cos:
+ case ir_unop_cos_reduced:
+ emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
+ break;
+
+ case ir_unop_dFdx:
+ case ir_unop_dFdy:
+ assert(!"derivatives not valid in vertex shader");
+ break;
+
+ case ir_unop_noise:
+ assert(!"not reached: should be handled by lower_noise");
+ break;
+
+ case ir_binop_add:
+ emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]);
+ break;
+ case ir_binop_sub:
+ assert(!"not reached: should be handled by ir_sub_to_add_neg");
+ break;
+
+ case ir_binop_mul:
+ if (ir->type->is_integer()) {
+ /* For integer multiplication, the MUL uses the low 16 bits
+ * of one of the operands (src0 on gen6, src1 on gen7). The
+ * MACH accumulates in the contribution of the upper 16 bits
+ * of that operand.
+ *
+ * FINISHME: Emit just the MUL if we know an operand is small
+ * enough.
+ */
+ struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
+
+ emit(BRW_OPCODE_MUL, acc, op[0], op[1]);
+ emit(BRW_OPCODE_MACH, dst_null_d(), op[0], op[1]);
+ emit(BRW_OPCODE_MOV, result_dst, src_reg(acc));
+ } else {
+ emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]);
+ }
+ break;
+ case ir_binop_div:
+ assert(!"not reached: should be handled by ir_div_to_mul_rcp");
+ case ir_binop_mod:
+ assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
+ break;
+
+ case ir_binop_less:
+ case ir_binop_greater:
+ case ir_binop_lequal:
+ case ir_binop_gequal:
+ case ir_binop_equal:
+ case ir_binop_nequal: {
+ dst_reg temp = result_dst;
+ /* original gen4 does implicit conversion before comparison. */
+ if (intel->gen < 5)
+ temp.type = op[0].type;
+
+ inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
+ inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
+ emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1));
+ break;
+ }
+
+ case ir_binop_all_equal:
+ /* "==" operator producing a scalar boolean. */
+ if (ir->operands[0]->type->is_vector() ||
+ ir->operands[1]->type->is_vector()) {
+ inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_Z;
+
+ emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
+ inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
+ inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
+ } else {
+ dst_reg temp = result_dst;
+ /* original gen4 does implicit conversion before comparison. */
+ if (intel->gen < 5)
+ temp.type = op[0].type;
+
+ inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_Z;
+ emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
+ }
+ break;
+ case ir_binop_any_nequal:
+ /* "!=" operator producing a scalar boolean. */
+ if (ir->operands[0]->type->is_vector() ||
+ ir->operands[1]->type->is_vector()) {
+ inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+ emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
+ inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
+ inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+ } else {
+ dst_reg temp = result_dst;
+ /* original gen4 does implicit conversion before comparison. */
+ if (intel->gen < 5)
+ temp.type = op[0].type;
+
+ inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
+ }
+ break;
+
+ case ir_unop_any:
+ inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+ emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
+
+ inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
+ inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+ break;
+
+ case ir_binop_logic_xor:
+ emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
+ break;
+
+ case ir_binop_logic_or:
+ emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
+ break;
+
+ case ir_binop_logic_and:
+ emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
+ break;
+
+ case ir_binop_dot:
+ assert(ir->operands[0]->type->is_vector());
+ assert(ir->operands[0]->type == ir->operands[1]->type);
+ emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
+ break;
+
+ case ir_unop_sqrt:
+ emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
+ break;
+ case ir_unop_rsq:
+ emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
+ break;
+ case ir_unop_i2f:
+ case ir_unop_i2u:
+ case ir_unop_u2i:
+ case ir_unop_u2f:
+ case ir_unop_b2f:
+ case ir_unop_b2i:
+ case ir_unop_f2i:
+ emit(BRW_OPCODE_MOV, result_dst, op[0]);
+ break;
+ case ir_unop_f2b:
+ case ir_unop_i2b: {
+ dst_reg temp = result_dst;
+ /* original gen4 does implicit conversion before comparison. */
+ if (intel->gen < 5)
+ temp.type = op[0].type;
+
+ inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1));
+ break;
+ }
+
+ case ir_unop_trunc:
+ emit(BRW_OPCODE_RNDZ, result_dst, op[0]);
+ break;
+ case ir_unop_ceil:
+ op[0].negate = !op[0].negate;
+ inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
+ this->result.negate = true;
+ break;
+ case ir_unop_floor:
+ inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
+ break;
+ case ir_unop_fract:
+ inst = emit(BRW_OPCODE_FRC, result_dst, op[0]);
+ break;
+ case ir_unop_round_even:
+ emit(BRW_OPCODE_RNDE, result_dst, op[0]);
+ break;
+
+ case ir_binop_min:
+ inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_L;
+
+ inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ break;
+ case ir_binop_max:
+ inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_G;
+
+ inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ break;
+
+ case ir_binop_pow:
+ emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
+ break;
+
+ case ir_unop_bit_not:
+ inst = emit(BRW_OPCODE_NOT, result_dst, op[0]);
+ break;
+ case ir_binop_bit_and:
+ inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
+ break;
+ case ir_binop_bit_xor:
+ inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
+ break;
+ case ir_binop_bit_or:
+ inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
+ break;
+
+ case ir_binop_lshift:
+ case ir_binop_rshift:
+ assert(!"GLSL 1.30 features unsupported");
+ break;
+
+ case ir_quadop_vector:
+ assert(!"not reached: should be handled by lower_quadop_vector");
+ break;
+ }
+}
+
+
+void
+vec4_visitor::visit(ir_swizzle *ir)
+{
+ src_reg src;
+ int i = 0;
+ int swizzle[4];
+
+ /* Note that this is only swizzles in expressions, not those on the left
+ * hand side of an assignment, which do write masking. See ir_assignment
+ * for that.
+ */
+
+ ir->val->accept(this);
+ src = this->result;
+ assert(src.file != BAD_FILE);
+
+ for (i = 0; i < ir->type->vector_elements; i++) {
+ switch (i) {
+ case 0:
+ swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
+ break;
+ case 1:
+ swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
+ break;
+ case 2:
+ swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
+ break;
+ case 3:
+ swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
+ break;
+ }
+ }
+ for (; i < 4; i++) {
+ /* Replicate the last channel out. */
+ swizzle[i] = swizzle[ir->type->vector_elements - 1];
+ }
+
+ src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
+
+ this->result = src;
+}
+
+void
+vec4_visitor::visit(ir_dereference_variable *ir)
+{
+ const struct glsl_type *type = ir->type;
+ dst_reg *reg = variable_storage(ir->var);
+
+ if (!reg) {
+ fail("Failed to find variable storage for %s\n", ir->var->name);
+ this->result = src_reg(brw_null_reg());
+ return;
+ }
+
+ this->result = src_reg(*reg);
+
+ if (type->is_scalar() || type->is_vector() || type->is_matrix())
+ this->result.swizzle = swizzle_for_size(type->vector_elements);
+}
+
+void
+vec4_visitor::visit(ir_dereference_array *ir)
+{
+ ir_constant *constant_index;
+ src_reg src;
+ int element_size = type_size(ir->type);
+
+ constant_index = ir->array_index->constant_expression_value();
+
+ ir->array->accept(this);
+ src = this->result;
+
+ if (constant_index) {
+ src.reg_offset += constant_index->value.i[0] * element_size;
+ } else {
+ /* Variable index array dereference. It eats the "vec4" of the
+ * base of the array and an index that offsets the Mesa register
+ * index.
+ */
+ ir->array_index->accept(this);
+
+ src_reg index_reg;
+
+ if (element_size == 1) {
+ index_reg = this->result;
+ } else {
+ index_reg = src_reg(this, glsl_type::int_type);
+
+ emit(BRW_OPCODE_MUL, dst_reg(index_reg),
+ this->result, src_reg(element_size));
+ }
+
+ if (src.reladdr) {
+ src_reg temp = src_reg(this, glsl_type::int_type);
+
+ emit(BRW_OPCODE_ADD, dst_reg(temp), *src.reladdr, index_reg);
+
+ index_reg = temp;
+ }
+
+ src.reladdr = ralloc(mem_ctx, src_reg);
+ memcpy(src.reladdr, &index_reg, sizeof(index_reg));
+ }
+
+ /* If the type is smaller than a vec4, replicate the last channel out. */
+ if (ir->type->is_scalar() || ir->type->is_vector())
+ src.swizzle = swizzle_for_size(ir->type->vector_elements);
+ else
+ src.swizzle = BRW_SWIZZLE_NOOP;
+ src.type = brw_type_for_base_type(ir->type);
+
+ this->result = src;
+}
+
+void
+vec4_visitor::visit(ir_dereference_record *ir)
+{
+ unsigned int i;
+ const glsl_type *struct_type = ir->record->type;
+ int offset = 0;
+
+ ir->record->accept(this);
+
+ for (i = 0; i < struct_type->length; i++) {
+ if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
+ break;
+ offset += type_size(struct_type->fields.structure[i].type);
+ }
+
+ /* If the type is smaller than a vec4, replicate the last channel out. */
+ if (ir->type->is_scalar() || ir->type->is_vector())
+ this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
+ else
+ this->result.swizzle = BRW_SWIZZLE_NOOP;
+ this->result.type = brw_type_for_base_type(ir->type);
+
+ this->result.reg_offset += offset;
+}
+
+/**
+ * We want to be careful in assignment setup to hit the actual storage
+ * instead of potentially using a temporary like we might with the
+ * ir_dereference handler.
+ */
+static dst_reg
+get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
+{
+ /* The LHS must be a dereference. If the LHS is a variable indexed array
+ * access of a vector, it must be separated into a series conditional moves
+ * before reaching this point (see ir_vec_index_to_cond_assign).
+ */
+ assert(ir->as_dereference());
+ ir_dereference_array *deref_array = ir->as_dereference_array();
+ if (deref_array) {
+ assert(!deref_array->array->type->is_vector());
+ }
+
+ /* Use the rvalue deref handler for the most part. We'll ignore
+ * swizzles in it and write swizzles using writemask, though.
+ */
+ ir->accept(v);
+ return dst_reg(v->result);
+}
+
+void
+vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
+ const struct glsl_type *type, bool predicated)
+{
+ if (type->base_type == GLSL_TYPE_STRUCT) {
+ for (unsigned int i = 0; i < type->length; i++) {
+ emit_block_move(dst, src, type->fields.structure[i].type, predicated);
+ }
+ return;
+ }
+
+ if (type->is_array()) {
+ for (unsigned int i = 0; i < type->length; i++) {
+ emit_block_move(dst, src, type->fields.array, predicated);
+ }
+ return;
+ }
+
+ if (type->is_matrix()) {
+ const struct glsl_type *vec_type;
+
+ vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
+ type->vector_elements, 1);
+
+ for (int i = 0; i < type->matrix_columns; i++) {
+ emit_block_move(dst, src, vec_type, predicated);
+ }
+ return;
+ }
+
+ assert(type->is_scalar() || type->is_vector());
+
+ dst->type = brw_type_for_base_type(type);
+ src->type = dst->type;
+
+ dst->writemask = (1 << type->vector_elements) - 1;
+
+ /* Do we need to worry about swizzling a swizzle? */
+ assert(src->swizzle = BRW_SWIZZLE_NOOP);
+ src->swizzle = swizzle_for_size(type->vector_elements);
+
+ vec4_instruction *inst = emit(BRW_OPCODE_MOV, *dst, *src);
+ if (predicated)
+ inst->predicate = BRW_PREDICATE_NORMAL;
+
+ dst->reg_offset++;
+ src->reg_offset++;
+}
+
+
+/* If the RHS processing resulted in an instruction generating a
+ * temporary value, and it would be easy to rewrite the instruction to
+ * generate its result right into the LHS instead, do so. This ends
+ * up reliably removing instructions where it can be tricky to do so
+ * later without real UD chain information.
+ */
+bool
+vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
+ dst_reg dst,
+ src_reg src,
+ vec4_instruction *pre_rhs_inst,
+ vec4_instruction *last_rhs_inst)
+{
+ /* This could be supported, but it would take more smarts. */
+ if (ir->condition)
+ return false;
+
+ if (pre_rhs_inst == last_rhs_inst)
+ return false; /* No instructions generated to work with. */
+
+ /* Make sure the last instruction generated our source reg. */
+ if (src.file != GRF ||
+ src.file != last_rhs_inst->dst.file ||
+ src.reg != last_rhs_inst->dst.reg ||
+ src.reg_offset != last_rhs_inst->dst.reg_offset ||
+ src.reladdr ||
+ src.abs ||
+ src.negate ||
+ last_rhs_inst->predicate != BRW_PREDICATE_NONE)
+ return false;
+
+ /* Check that that last instruction fully initialized the channels
+ * we want to use, in the order we want to use them. We could
+ * potentially reswizzle the operands of many instructions so that
+ * we could handle out of order channels, but don't yet.
+ */
+ for (int i = 0; i < 4; i++) {
+ if (dst.writemask & (1 << i)) {
+ if (!(last_rhs_inst->dst.writemask & (1 << i)))
+ return false;
+
+ if (BRW_GET_SWZ(src.swizzle, i) != i)
+ return false;
+ }
+ }
+
+ /* Success! Rewrite the instruction. */
+ last_rhs_inst->dst.file = dst.file;
+ last_rhs_inst->dst.reg = dst.reg;
+ last_rhs_inst->dst.reg_offset = dst.reg_offset;
+ last_rhs_inst->dst.reladdr = dst.reladdr;
+ last_rhs_inst->dst.writemask &= dst.writemask;
+
+ return true;
+}
+
+void
+vec4_visitor::visit(ir_assignment *ir)
+{
+ dst_reg dst = get_assignment_lhs(ir->lhs, this);
+
+ if (!ir->lhs->type->is_scalar() &&
+ !ir->lhs->type->is_vector()) {
+ ir->rhs->accept(this);
+ src_reg src = this->result;
+
+ if (ir->condition) {
+ emit_bool_to_cond_code(ir->condition);
+ }
+
+ emit_block_move(&dst, &src, ir->rhs->type, ir->condition != NULL);
+ return;
+ }
+
+ /* Now we're down to just a scalar/vector with writemasks. */
+ int i;
+
+ vec4_instruction *pre_rhs_inst, *last_rhs_inst;
+ pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
+
+ ir->rhs->accept(this);
+
+ last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
+
+ src_reg src = this->result;
+
+ int swizzles[4];
+ int first_enabled_chan = 0;
+ int src_chan = 0;
+
+ assert(ir->lhs->type->is_vector() ||
+ ir->lhs->type->is_scalar());
+ dst.writemask = ir->write_mask;
+
+ for (int i = 0; i < 4; i++) {
+ if (dst.writemask & (1 << i)) {
+ first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
+ break;
+ }
+ }
+
+ /* Swizzle a small RHS vector into the channels being written.
+ *
+ * glsl ir treats write_mask as dictating how many channels are
+ * present on the RHS while in our instructions we need to make
+ * those channels appear in the slots of the vec4 they're written to.
+ */
+ for (int i = 0; i < 4; i++) {
+ if (dst.writemask & (1 << i))
+ swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
+ else
+ swizzles[i] = first_enabled_chan;
+ }
+ src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
+ swizzles[2], swizzles[3]);
+
+ if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
+ return;
+ }
+
+ if (ir->condition) {
+ emit_bool_to_cond_code(ir->condition);
+ }
+
+ for (i = 0; i < type_size(ir->lhs->type); i++) {
+ vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src);
+
+ if (ir->condition)
+ inst->predicate = BRW_PREDICATE_NORMAL;
+
+ dst.reg_offset++;
+ src.reg_offset++;
+ }
+}
+
+void
+vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
+{
+ if (ir->type->base_type == GLSL_TYPE_STRUCT) {
+ foreach_list(node, &ir->components) {
+ ir_constant *field_value = (ir_constant *)node;
+
+ emit_constant_values(dst, field_value);
+ }
+ return;
+ }
+
+ if (ir->type->is_array()) {
+ for (unsigned int i = 0; i < ir->type->length; i++) {
+ emit_constant_values(dst, ir->array_elements[i]);
+ }
+ return;
+ }
+
+ if (ir->type->is_matrix()) {
+ for (int i = 0; i < ir->type->matrix_columns; i++) {
+ for (int j = 0; j < ir->type->vector_elements; j++) {
+ dst->writemask = 1 << j;
+ dst->type = BRW_REGISTER_TYPE_F;
+
+ emit(BRW_OPCODE_MOV, *dst,
+ src_reg(ir->value.f[i * ir->type->vector_elements + j]));
+ }
+ dst->reg_offset++;
+ }
+ return;
+ }
+
+ for (int i = 0; i < ir->type->vector_elements; i++) {
+ dst->writemask = 1 << i;
+ dst->type = brw_type_for_base_type(ir->type);
+
+ switch (ir->type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.f[i]));
+ break;
+ case GLSL_TYPE_INT:
+ emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.i[i]));
+ break;
+ case GLSL_TYPE_UINT:
+ emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.u[i]));
+ break;
+ case GLSL_TYPE_BOOL:
+ emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.b[i]));
+ break;
+ default:
+ assert(!"Non-float/uint/int/bool constant");
+ break;
+ }
+ }
+ dst->reg_offset++;
+}
+
+void
+vec4_visitor::visit(ir_constant *ir)
+{
+ dst_reg dst = dst_reg(this, ir->type);
+ this->result = src_reg(dst);
+
+ emit_constant_values(&dst, ir);
+}
+
+void
+vec4_visitor::visit(ir_call *ir)
+{
+ assert(!"not reached");
+}
+
+void
+vec4_visitor::visit(ir_texture *ir)
+{
+ /* FINISHME: Implement vertex texturing.
+ *
+ * With 0 vertex samplers available, the linker will reject
+ * programs that do vertex texturing, but after our visitor has
+ * run.
+ */
+}
+
+void
+vec4_visitor::visit(ir_return *ir)
+{
+ assert(!"not reached");
+}
+
+void
+vec4_visitor::visit(ir_discard *ir)
+{
+ assert(!"not reached");
+}
+
+void
+vec4_visitor::visit(ir_if *ir)
+{
+ /* Don't point the annotation at the if statement, because then it plus
+ * the then and else blocks get printed.
+ */
+ this->base_ir = ir->condition;
+
+ if (intel->gen == 6) {
+ emit_if_gen6(ir);
+ } else {
+ emit_bool_to_cond_code(ir->condition);
+ vec4_instruction *inst = emit(BRW_OPCODE_IF);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ }
+
+ visit_instructions(&ir->then_instructions);
+
+ if (!ir->else_instructions.is_empty()) {
+ this->base_ir = ir->condition;
+ emit(BRW_OPCODE_ELSE);
+
+ visit_instructions(&ir->else_instructions);
+ }
+
+ this->base_ir = ir->condition;
+ emit(BRW_OPCODE_ENDIF);
+}
+
+int
+vec4_visitor::emit_vue_header_gen4(int header_mrf)
+{
+ /* Get the position */
+ src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
+
+ /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
+ dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
+
+ current_annotation = "NDC";
+ dst_reg ndc_w = ndc;
+ ndc_w.writemask = WRITEMASK_W;
+ src_reg pos_w = pos;
+ pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
+ emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
+
+ dst_reg ndc_xyz = ndc;
+ ndc_xyz.writemask = WRITEMASK_XYZ;
+
+ emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w));
+
+ if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
+ c->key.nr_userclip || brw->has_negative_rhw_bug) {
+ dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
+ GLuint i;
+
+ emit(BRW_OPCODE_MOV, header1, 0u);
+
+ if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
+ assert(!"finishme: psiz");
+ src_reg psiz;
+
+ header1.writemask = WRITEMASK_W;
+ emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11);
+ emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8);
+ }
+
+ for (i = 0; i < c->key.nr_userclip; i++) {
+ vec4_instruction *inst;
+
+ inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()),
+ pos, src_reg(c->userplane[i]));
+ inst->conditional_mod = BRW_CONDITIONAL_L;
+
+ emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ }
+
+ /* i965 clipping workaround:
+ * 1) Test for -ve rhw
+ * 2) If set,
+ * set ndc = (0,0,0,0)
+ * set ucp[6] = 1
+ *
+ * Later, clipping will detect ucp[6] and ensure the primitive is
+ * clipped against all fixed planes.
+ */
+ if (brw->has_negative_rhw_bug) {
+#if 0
+ /* FINISHME */
+ brw_CMP(p,
+ vec8(brw_null_reg()),
+ BRW_CONDITIONAL_L,
+ brw_swizzle1(ndc, 3),
+ brw_imm_f(0));
+
+ brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
+ brw_MOV(p, ndc, brw_imm_f(0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+#endif
+ }
+
+ header1.writemask = WRITEMASK_XYZW;
+ emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1));
+ } else {
+ emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++),
+ BRW_REGISTER_TYPE_UD), 0u);
+ }
+
+ if (intel->gen == 5) {
+ /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
+ * dword 0-3 (m1) of the header is indices, point width, clip flags.
+ * dword 4-7 (m2) is the ndc position (set above)
+ * dword 8-11 (m3) of the vertex header is the 4D space position
+ * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
+ * m6 is a pad so that the vertex element data is aligned
+ * m7 is the first vertex data we fill.
+ */
+ current_annotation = "NDC";
+ emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
+
+ current_annotation = "gl_Position";
+ emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
+
+ /* user clip distance. */
+ header_mrf += 2;
+
+ /* Pad so that vertex element data is aligned. */
+ header_mrf++;
+ } else {
+ /* There are 8 dwords in VUE header pre-Ironlake:
+ * dword 0-3 (m1) is indices, point width, clip flags.
+ * dword 4-7 (m2) is ndc position (set above)
+ *
+ * dword 8-11 (m3) is the first vertex data.
+ */
+ current_annotation = "NDC";
+ emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
+
+ current_annotation = "gl_Position";
+ emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
+ }
+
+ return header_mrf;
+}
+
+int
+vec4_visitor::emit_vue_header_gen6(int header_mrf)
+{
+ struct brw_reg reg;
+
+ /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
+ * dword 0-3 (m2) of the header is indices, point width, clip flags.
+ * dword 4-7 (m3) is the 4D space position
+ * dword 8-15 (m4,m5) of the vertex header is the user clip distance if
+ * enabled.
+ *
+ * m4 or 6 is the first vertex element data we fill.
+ */
+
+ current_annotation = "indices, point width, clip flags";
+ reg = brw_message_reg(header_mrf++);
+ emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0));
+ if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
+ emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W),
+ src_reg(output_reg[VERT_RESULT_PSIZ]));
+ }
+
+ current_annotation = "gl_Position";
+ emit(BRW_OPCODE_MOV,
+ brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS]));
+
+ current_annotation = "user clip distances";
+ if (c->key.nr_userclip) {
+ for (int i = 0; i < c->key.nr_userclip; i++) {
+ struct brw_reg m;
+ if (i < 4)
+ m = brw_message_reg(header_mrf);
+ else
+ m = brw_message_reg(header_mrf + 1);
+
+ emit(BRW_OPCODE_DP4,
+ dst_reg(brw_writemask(m, 1 << (i & 3))),
+ src_reg(c->userplane[i]));
+ }
+ header_mrf += 2;
+ }
+
+ current_annotation = NULL;
+
+ return header_mrf;
+}
+
+static int
+align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
+{
+ struct intel_context *intel = &brw->intel;
+
+ if (intel->gen >= 6) {
+ /* URB data written (does not include the message header reg) must
+ * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
+ * section 5.4.3.2.2: URB_INTERLEAVED.
+ *
+ * URB entries are allocated on a multiple of 1024 bits, so an
+ * extra 128 bits written here to make the end align to 256 is
+ * no problem.
+ */
+ if ((mlen % 2) != 1)
+ mlen++;
+ }
+
+ return mlen;
+}
+
+/**
+ * Generates the VUE payload plus the 1 or 2 URB write instructions to
+ * complete the VS thread.
+ *
+ * The VUE layout is documented in Volume 2a.
+ */
+void
+vec4_visitor::emit_urb_writes()
+{
+ /* MRF 0 is reserved for the debugger, so start with message header
+ * in MRF 1.
+ */
+ int base_mrf = 1;
+ int mrf = base_mrf;
+ int urb_entry_size;
+ uint64_t outputs_remaining = c->prog_data.outputs_written;
+ /* In the process of generating our URB write message contents, we
+ * may need to unspill a register or load from an array. Those
+ * reads would use MRFs 14-15.
+ */
+ int max_usable_mrf = 13;
+
+ /* FINISHME: edgeflag */
+
+ /* First mrf is the g0-based message header containing URB handles and such,
+ * which is implied in VS_OPCODE_URB_WRITE.
+ */
+ mrf++;
+
+ if (intel->gen >= 6) {
+ mrf = emit_vue_header_gen6(mrf);
+ } else {
+ mrf = emit_vue_header_gen4(mrf);
+ }
+
+ /* Set up the VUE data for the first URB write */
+ int attr;
+ for (attr = 0; attr < VERT_RESULT_MAX; attr++) {
+ if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
+ continue;
+
+ outputs_remaining &= ~BITFIELD64_BIT(attr);
+
+ /* This is set up in the VUE header. */
+ if (attr == VERT_RESULT_HPOS)
+ continue;
+
+ /* This is loaded into the VUE header, and thus doesn't occupy
+ * an attribute slot.
+ */
+ if (attr == VERT_RESULT_PSIZ)
+ continue;
+
+ vec4_instruction *inst = emit(BRW_OPCODE_MOV, brw_message_reg(mrf++),
+ src_reg(output_reg[attr]));
+
+ if ((attr == VERT_RESULT_COL0 ||
+ attr == VERT_RESULT_COL1 ||
+ attr == VERT_RESULT_BFC0 ||
+ attr == VERT_RESULT_BFC1) &&
+ c->key.clamp_vertex_color) {
+ inst->saturate = true;
+ }
+
+ /* If this was MRF 15, we can't fit anything more into this URB
+ * WRITE. Note that base_mrf of 1 means that MRF 15 is an
+ * even-numbered amount of URB write data, which will meet
+ * gen6's requirements for length alignment.
+ */
+ if (mrf > max_usable_mrf) {
+ attr++;
+ break;
+ }
+ }
+
+ vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
+ inst->base_mrf = base_mrf;
+ inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
+ inst->eot = !outputs_remaining;
+
+ urb_entry_size = mrf - base_mrf;
+
+ /* Optional second URB write */
+ if (outputs_remaining) {
+ mrf = base_mrf + 1;
+
+ for (; attr < VERT_RESULT_MAX; attr++) {
+ if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
+ continue;
+
+ assert(mrf < max_usable_mrf);
+
+ emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
+ }
+
+ inst = emit(VS_OPCODE_URB_WRITE);
+ inst->base_mrf = base_mrf;
+ inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
+ inst->eot = true;
+ /* URB destination offset. In the previous write, we got MRFs
+ * 2-13 minus the one header MRF, so 12 regs. URB offset is in
+ * URB row increments, and each of our MRFs is half of one of
+ * those, since we're doing interleaved writes.
+ */
+ inst->offset = (max_usable_mrf - base_mrf) / 2;
+
+ urb_entry_size += mrf - base_mrf;
+ }
+
+ if (intel->gen == 6)
+ c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8;
+ else
+ c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4;
+}
+
+src_reg
+vec4_visitor::get_scratch_offset(vec4_instruction *inst,
+ src_reg *reladdr, int reg_offset)
+{
+ /* Because we store the values to scratch interleaved like our
+ * vertex data, we need to scale the vec4 index by 2.
+ */
+ int message_header_scale = 2;
+
+ /* Pre-gen6, the message header uses byte offsets instead of vec4
+ * (16-byte) offset units.
+ */
+ if (intel->gen < 6)
+ message_header_scale *= 16;
+
+ if (reladdr) {
+ src_reg index = src_reg(this, glsl_type::int_type);
+
+ vec4_instruction *add = emit(BRW_OPCODE_ADD,
+ dst_reg(index),
+ *reladdr,
+ src_reg(reg_offset));
+ /* Move our new instruction from the tail to its correct place. */
+ add->remove();
+ inst->insert_before(add);
+
+ vec4_instruction *mul = emit(BRW_OPCODE_MUL, dst_reg(index),
+ index, src_reg(message_header_scale));
+ mul->remove();
+ inst->insert_before(mul);
+
+ return index;
+ } else {
+ return src_reg(reg_offset * message_header_scale);
+ }
+}
+
+/**
+ * Emits an instruction before @inst to load the value named by @orig_src
+ * from scratch space at @base_offset to @temp.
+ */
+void
+vec4_visitor::emit_scratch_read(vec4_instruction *inst,
+ dst_reg temp, src_reg orig_src,
+ int base_offset)
+{
+ int reg_offset = base_offset + orig_src.reg_offset;
+ src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
+
+ vec4_instruction *scratch_read_inst = emit(VS_OPCODE_SCRATCH_READ,
+ temp, index);
+
+ scratch_read_inst->base_mrf = 14;
+ scratch_read_inst->mlen = 1;
+ /* Move our instruction from the tail to its correct place. */
+ scratch_read_inst->remove();
+ inst->insert_before(scratch_read_inst);
+}
+
+/**
+ * Emits an instruction after @inst to store the value to be written
+ * to @orig_dst to scratch space at @base_offset, from @temp.
+ */
+void
+vec4_visitor::emit_scratch_write(vec4_instruction *inst,
+ src_reg temp, dst_reg orig_dst,
+ int base_offset)
+{
+ int reg_offset = base_offset + orig_dst.reg_offset;
+ src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset);
+
+ dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
+ orig_dst.writemask));
+ vec4_instruction *scratch_write_inst = emit(VS_OPCODE_SCRATCH_WRITE,
+ dst, temp, index);
+ scratch_write_inst->base_mrf = 13;
+ scratch_write_inst->mlen = 2;
+ scratch_write_inst->predicate = inst->predicate;
+ /* Move our instruction from the tail to its correct place. */
+ scratch_write_inst->remove();
+ inst->insert_after(scratch_write_inst);
+}
+
+/**
+ * We can't generally support array access in GRF space, because a
+ * single instruction's destination can only span 2 contiguous
+ * registers. So, we send all GRF arrays that get variable index
+ * access to scratch space.
+ */
+void
+vec4_visitor::move_grf_array_access_to_scratch()
+{
+ int scratch_loc[this->virtual_grf_count];
+
+ for (int i = 0; i < this->virtual_grf_count; i++) {
+ scratch_loc[i] = -1;
+ }
+
+ /* First, calculate the set of virtual GRFs that need to be punted
+ * to scratch due to having any array access on them, and where in
+ * scratch.
+ */
+ foreach_list(node, &this->instructions) {
+ vec4_instruction *inst = (vec4_instruction *)node;
+
+ if (inst->dst.file == GRF && inst->dst.reladdr &&
+ scratch_loc[inst->dst.reg] == -1) {
+ scratch_loc[inst->dst.reg] = c->last_scratch;
+ c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4;
+ }
+
+ for (int i = 0 ; i < 3; i++) {
+ src_reg *src = &inst->src[i];
+
+ if (src->file == GRF && src->reladdr &&
+ scratch_loc[src->reg] == -1) {
+ scratch_loc[src->reg] = c->last_scratch;
+ c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4;
+ }
+ }
+ }
+
+ /* Now, for anything that will be accessed through scratch, rewrite
+ * it to load/store. Note that this is a _safe list walk, because
+ * we may generate a new scratch_write instruction after the one
+ * we're processing.
+ */
+ foreach_list_safe(node, &this->instructions) {
+ vec4_instruction *inst = (vec4_instruction *)node;
+
+ /* Set up the annotation tracking for new generated instructions. */
+ base_ir = inst->ir;
+ current_annotation = inst->annotation;
+
+ if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
+ src_reg temp = src_reg(this, glsl_type::vec4_type);
+
+ emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]);
+
+ inst->dst.file = temp.file;
+ inst->dst.reg = temp.reg;
+ inst->dst.reg_offset = temp.reg_offset;
+ inst->dst.reladdr = NULL;
+ }
+
+ for (int i = 0 ; i < 3; i++) {
+ if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
+ continue;
+
+ dst_reg temp = dst_reg(this, glsl_type::vec4_type);
+
+ emit_scratch_read(inst, temp, inst->src[i],
+ scratch_loc[inst->src[i].reg]);
+
+ inst->src[i].file = temp.file;
+ inst->src[i].reg = temp.reg;
+ inst->src[i].reg_offset = temp.reg_offset;
+ inst->src[i].reladdr = NULL;
+ }
+ }
+}
+
+
+vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
+ struct gl_shader_program *prog,
+ struct brw_shader *shader)
+{
+ this->c = c;
+ this->p = &c->func;
+ this->brw = p->brw;
+ this->intel = &brw->intel;
+ this->ctx = &intel->ctx;
+ this->prog = prog;
+ this->shader = shader;
+
+ this->mem_ctx = ralloc_context(NULL);
+ this->failed = false;
+
+ this->base_ir = NULL;
+ this->current_annotation = NULL;
+
+ this->c = c;
+ this->vp = prog->VertexProgram;
+ this->prog_data = &c->prog_data;
+
+ this->variable_ht = hash_table_ctor(0,
+ hash_table_pointer_hash,
+ hash_table_pointer_compare);
+
+ this->virtual_grf_def = NULL;
+ this->virtual_grf_use = NULL;
+ this->virtual_grf_sizes = NULL;
+ this->virtual_grf_count = 0;
+ this->virtual_grf_array_size = 0;
+ this->live_intervals_valid = false;
+
+ this->uniforms = 0;
+
+ this->variable_ht = hash_table_ctor(0,
+ hash_table_pointer_hash,
+ hash_table_pointer_compare);
+}
+
+vec4_visitor::~vec4_visitor()
+{
+ ralloc_free(this->mem_ctx);
+ hash_table_dtor(this->variable_ht);
+}
+
+
+void
+vec4_visitor::fail(const char *format, ...)
+{
+ va_list va;
+ char *msg;
+
+ if (failed)
+ return;
+
+ failed = true;
+
+ va_start(va, format);
+ msg = ralloc_vasprintf(mem_ctx, format, va);
+ va_end(va);
+ msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
+
+ this->fail_msg = msg;
+
+ if (INTEL_DEBUG & DEBUG_VS) {
+ fprintf(stderr, "%s", msg);
+ }
+}
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index a9ad5311fe3..3373e707d98 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -30,6 +30,7 @@
*/
+#include "main/compiler.h"
#include "brw_context.h"
#include "brw_vs.h"
#include "brw_util.h"
@@ -39,17 +40,21 @@
#include "../glsl/ralloc.h"
-static void do_vs_prog( struct brw_context *brw,
- struct brw_vertex_program *vp,
- struct brw_vs_prog_key *key )
+static bool
+do_vs_prog(struct brw_context *brw,
+ struct gl_shader_program *prog,
+ struct brw_vertex_program *vp,
+ struct brw_vs_prog_key *key)
{
struct gl_context *ctx = &brw->intel.ctx;
+ struct intel_context *intel = &brw->intel;
GLuint program_size;
const GLuint *program;
struct brw_vs_compile c;
void *mem_ctx;
int aux_size;
int i;
+ static int new_vs = -1;
memset(&c, 0, sizeof(c));
memcpy(&c.key, key, sizeof(*key));
@@ -85,7 +90,25 @@ static void do_vs_prog( struct brw_context *brw,
/* Emit GEN4 code.
*/
- brw_vs_emit(&c);
+ if (new_vs == -1)
+ new_vs = getenv("INTEL_NEW_VS") != NULL;
+
+ if (new_vs && prog) {
+ if (!brw_vs_emit(prog, &c)) {
+ ralloc_free(mem_ctx);
+ return false;
+ }
+ } else {
+ brw_old_vs_emit(&c);
+ }
+
+ /* Scratch space is used for register spilling */
+ if (c.last_scratch) {
+ c.prog_data.total_scratch = brw_get_scratch_size(c.last_scratch);
+
+ brw_get_scratch_bo(intel, &brw->vs.scratch_bo,
+ c.prog_data.total_scratch * brw->vs_max_threads);
+ }
/* get the program
*/
@@ -111,6 +134,8 @@ static void do_vs_prog( struct brw_context *brw,
&c.prog_data, aux_size,
&brw->vs.prog_offset, &brw->vs.prog_data);
ralloc_free(mem_ctx);
+
+ return true;
}
@@ -155,13 +180,15 @@ static void brw_upload_vs_prog(struct brw_context *brw)
if (!brw_search_cache(&brw->cache, BRW_VS_PROG,
&key, sizeof(key),
&brw->vs.prog_offset, &brw->vs.prog_data)) {
- do_vs_prog(brw, vp, &key);
+ bool success = do_vs_prog(brw, ctx->Shader.CurrentVertexProgram,
+ vp, &key);
+
+ assert(success);
}
brw->vs.constant_map = ((int8_t *)brw->vs.prog_data +
sizeof(*brw->vs.prog_data));
}
-
/* See brw_vs.c:
*/
const struct brw_tracked_state brw_vs_prog = {
@@ -174,3 +201,30 @@ const struct brw_tracked_state brw_vs_prog = {
},
.prepare = brw_upload_vs_prog
};
+
+bool
+brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_vs_prog_key key;
+ struct gl_vertex_program *vp = prog->VertexProgram;
+ struct brw_vertex_program *bvp = brw_vertex_program(vp);
+ uint32_t old_prog_offset = brw->vs.prog_offset;
+ struct brw_vs_prog_data *old_prog_data = brw->vs.prog_data;
+ bool success;
+
+ if (!vp)
+ return true;
+
+ memset(&key, 0, sizeof(key));
+
+ key.program_string_id = bvp->id;
+ key.clamp_vertex_color = true;
+
+ success = do_vs_prog(brw, prog, bvp, &key);
+
+ brw->vs.prog_offset = old_prog_offset;
+ brw->vs.prog_data = old_prog_data;
+
+ return success;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 432994a8534..beccb381ee2 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -66,6 +66,7 @@ struct brw_vs_compile {
GLuint first_output;
GLuint nr_outputs;
GLuint first_overflow_output; /**< VERT_ATTRIB_x */
+ GLuint last_scratch;
GLuint first_tmp;
GLuint last_tmp;
@@ -92,6 +93,8 @@ struct brw_vs_compile {
GLboolean needs_stack;
};
-void brw_vs_emit( struct brw_vs_compile *c );
+bool brw_vs_emit(struct gl_shader_program *prog, struct brw_vs_compile *c);
+void brw_old_vs_emit(struct brw_vs_compile *c);
+bool brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog);
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c
index 9fdfebe9f76..47cc0a7da7a 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_constval.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c
@@ -194,19 +194,11 @@ static void calc_wm_input_sizes( struct brw_context *brw )
/* BRW_NEW_VERTEX_PROGRAM */
const struct brw_vertex_program *vp =
brw_vertex_program_const(brw->vertex_program);
- /* BRW_NEW_FRAGMENT_PROGRAM */
- struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram;
/* BRW_NEW_INPUT_DIMENSIONS */
struct tracker t;
GLuint insn;
GLuint i;
- /* If we're going to go through brw_fs.cpp, we don't end up using
- * brw->wm.input_size_masks.
- */
- if (prog && prog->_LinkedShaders[MESA_SHADER_FRAGMENT])
- return;
-
memset(&t, 0, sizeof(t));
/* _NEW_LIGHT */
@@ -246,9 +238,7 @@ static void calc_wm_input_sizes( struct brw_context *brw )
const struct brw_tracked_state brw_wm_input_sizes = {
.dirty = {
.mesa = _NEW_LIGHT,
- .brw = (BRW_NEW_FRAGMENT_PROGRAM |
- BRW_NEW_VERTEX_PROGRAM |
- BRW_NEW_INPUT_DIMENSIONS),
+ .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_INPUT_DIMENSIONS,
.cache = 0
},
.prepare = calc_wm_input_sizes
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 9d733344a26..bfee811e13d 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -1096,31 +1096,6 @@ static void emit_lrp_noalias(struct brw_vs_compile *c,
brw_MAC(p, dst, arg0, arg1);
}
-/** 3 or 4-component vector normalization */
-static void emit_nrm( struct brw_vs_compile *c,
- struct brw_reg dst,
- struct brw_reg arg0,
- int num_comps)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg tmp = get_tmp(c);
-
- /* tmp = dot(arg0, arg0) */
- if (num_comps == 3)
- brw_DP3(p, tmp, arg0, arg0);
- else
- brw_DP4(p, tmp, arg0, arg0);
-
- /* tmp = 1 / sqrt(tmp) */
- emit_math1(c, BRW_MATH_FUNCTION_RSQ, tmp, tmp, BRW_MATH_PRECISION_FULL);
-
- /* dst = arg0 * tmp */
- brw_MUL(p, dst, arg0, tmp);
-
- release_tmp(c, tmp);
-}
-
-
static struct brw_reg
get_constant(struct brw_vs_compile *c,
const struct prog_instruction *inst,
@@ -1359,7 +1334,7 @@ get_src_reg( struct brw_vs_compile *c,
if (component >= 0) {
params = c->vp->program.Base.Parameters;
- f = params->ParameterValues[src->Index][component];
+ f = params->ParameterValues[src->Index][component].f;
if (src->Abs)
f = fabs(f);
@@ -1821,6 +1796,9 @@ accumulator_contains(struct brw_vs_compile *c, struct brw_reg val)
if (val.address_mode != BRW_ADDRESS_DIRECT)
return GL_FALSE;
+ if (val.negate || val.abs)
+ return GL_FALSE;
+
switch (prev_insn->header.opcode) {
case BRW_OPCODE_MOV:
case BRW_OPCODE_MAC:
@@ -1900,7 +1878,7 @@ brw_vs_rescale_gl_fixed(struct brw_vs_compile *c)
/* Emit the vertex program instructions here.
*/
-void brw_vs_emit(struct brw_vs_compile *c )
+void brw_old_vs_emit(struct brw_vs_compile *c )
{
#define MAX_IF_DEPTH 32
#define MAX_LOOP_DEPTH 32
@@ -1980,9 +1958,22 @@ void brw_vs_emit(struct brw_vs_compile *c )
const struct prog_src_register *src = &inst->SrcReg[i];
index = src->Index;
file = src->File;
- if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
- args[i] = c->output_regs[index].reg;
- else
+ if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) {
+ /* Can't just make get_arg "do the right thing" here because
+ * other callers of get_arg and get_src_reg don't expect any
+ * special behavior for the c->output_regs[index].used_in_src
+ * case.
+ */
+ args[i] = c->output_regs[index].reg;
+ args[i].dw1.bits.swizzle =
+ BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0),
+ GET_SWZ(src->Swizzle, 1),
+ GET_SWZ(src->Swizzle, 2),
+ GET_SWZ(src->Swizzle, 3));
+
+ /* Note this is ok for non-swizzle ARB_vp instructions */
+ args[i].negate = src->Negate ? 1 : 0;
+ } else
args[i] = get_arg(c, inst, i);
}
@@ -1993,7 +1984,11 @@ void brw_vs_emit(struct brw_vs_compile *c )
index = inst->DstReg.Index;
file = inst->DstReg.File;
if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
- dst = c->output_regs[index].reg;
+ /* Can't just make get_dst "do the right thing" here because other
+ * callers of get_dst don't expect any special behavior for the
+ * c->output_regs[index].used_in_src case.
+ */
+ dst = brw_writemask(c->output_regs[index].reg, inst->DstReg.WriteMask);
else
dst = get_dst(c, inst->DstReg);
@@ -2025,12 +2020,6 @@ void brw_vs_emit(struct brw_vs_compile *c )
case OPCODE_DPH:
brw_DPH(p, dst, args[0], args[1]);
break;
- case OPCODE_NRM3:
- emit_nrm(c, dst, args[0], 3);
- break;
- case OPCODE_NRM4:
- emit_nrm(c, dst, args[0], 4);
- break;
case OPCODE_DST:
unalias2(c, dst, args[0], args[1], emit_dst_noalias);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
index fc4373ab311..29b3e47ab0c 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@@ -77,6 +77,16 @@ brw_prepare_vs_unit(struct brw_context *brw)
else
vs->thread1.binding_table_entry_count = brw->vs.nr_surfaces;
+ if (brw->vs.prog_data->total_scratch != 0) {
+ vs->thread2.scratch_space_base_pointer =
+ brw->vs.scratch_bo->offset >> 10; /* reloc */
+ vs->thread2.per_thread_scratch_space =
+ ffs(brw->vs.prog_data->total_scratch) - 11;
+ } else {
+ vs->thread2.scratch_space_base_pointer = 0;
+ vs->thread2.per_thread_scratch_space = 0;
+ }
+
vs->thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length;
vs->thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length;
vs->thread3.dispatch_grf_start_reg = 1;
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index 55dbd4fa8b0..40360b23fff 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -213,6 +213,7 @@ static void brw_new_batch( struct intel_context *intel )
brw->state_batch_count = 0;
brw->vb.nr_current_buffers = 0;
+ brw->ib.type = -1;
/* Mark that the current program cache BO has been used by the GPU.
* It will be reallocated if we need to put new programs in for the
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index b0dfdd536aa..e76832515fe 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -206,10 +206,6 @@ bool do_wm_prog(struct brw_context *brw,
*/
return false;
}
- c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN);
- c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN);
- c->vreg = rzalloc_array(c, struct brw_wm_value, BRW_WM_MAX_VREG);
- c->refs = rzalloc_array(c, struct brw_wm_ref, BRW_WM_MAX_REF);
} else {
void *instruction = c->instruction;
void *prog_instructions = c->prog_instructions;
@@ -232,6 +228,13 @@ bool do_wm_prog(struct brw_context *brw,
if (!brw_wm_fs_emit(brw, c, prog))
return false;
} else {
+ if (!c->instruction) {
+ c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN);
+ c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN);
+ c->vreg = rzalloc_array(c, struct brw_wm_value, BRW_WM_MAX_VREG);
+ c->refs = rzalloc_array(c, struct brw_wm_ref, BRW_WM_MAX_REF);
+ }
+
/* Fallback for fixed function and ARB_fp shaders. */
c->dispatch_width = 16;
brw_wm_payload_setup(brw, c);
@@ -241,29 +244,10 @@ bool do_wm_prog(struct brw_context *brw,
/* Scratch space is used for register spilling */
if (c->last_scratch) {
- uint32_t total_scratch;
-
- /* Per-thread scratch space is power-of-two sized. */
- for (c->prog_data.total_scratch = 1024;
- c->prog_data.total_scratch <= c->last_scratch;
- c->prog_data.total_scratch *= 2) {
- /* empty */
- }
- total_scratch = c->prog_data.total_scratch * brw->wm_max_threads;
+ c->prog_data.total_scratch = brw_get_scratch_size(c->last_scratch);
- if (brw->wm.scratch_bo && total_scratch > brw->wm.scratch_bo->size) {
- drm_intel_bo_unreference(brw->wm.scratch_bo);
- brw->wm.scratch_bo = NULL;
- }
- if (brw->wm.scratch_bo == NULL) {
- brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
- "wm scratch",
- total_scratch,
- 4096);
- }
- }
- else {
- c->prog_data.total_scratch = 0;
+ brw_get_scratch_bo(intel, &brw->wm.scratch_bo,
+ c->prog_data.total_scratch * brw->wm_max_threads);
}
if (unlikely(INTEL_DEBUG & DEBUG_WM))
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index f61757a8cac..6ea4a7d6e50 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -1094,9 +1094,16 @@ void emit_tex(struct brw_wm_compile *c,
if (intel->gen < 5 && c->dispatch_width == 8)
nr_texcoords = 3;
- /* For shadow comparisons, we have to supply u,v,r. */
- if (shadow)
- nr_texcoords = 3;
+ if (shadow) {
+ if (intel->gen < 7) {
+ /* For shadow comparisons, we have to supply u,v,r. */
+ nr_texcoords = 3;
+ } else {
+ /* On Ivybridge, the shadow comparitor comes first. Just load it. */
+ brw_MOV(p, brw_message_reg(cur_mrf), arg[2]);
+ cur_mrf += mrf_per_channel;
+ }
+ }
/* Emit the texcoords. */
for (i = 0; i < nr_texcoords; i++) {
@@ -1113,7 +1120,7 @@ void emit_tex(struct brw_wm_compile *c,
}
/* Fill in the shadow comparison reference value. */
- if (shadow) {
+ if (shadow && intel->gen < 7) {
if (intel->gen >= 5) {
/* Fill in the cube map array index value. */
brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
index 7cd3edad235..bd46bd8de43 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_fp.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -535,15 +535,15 @@ static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
GLfloat s3)
{
struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
- GLfloat values[4];
+ gl_constant_value values[4];
GLuint idx;
GLuint swizzle;
struct prog_src_register reg;
- values[0] = s0;
- values[1] = s1;
- values[2] = s2;
- values[3] = s3;
+ values[0].f = s0;
+ values[1].f = s1;
+ values[2].f = s2;
+ values[3].f = s3;
idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
reg = src_reg(PROGRAM_STATE_VAR, idx);
@@ -664,6 +664,8 @@ static void precalc_lit( struct brw_wm_compile *c,
static void precalc_tex( struct brw_wm_compile *c,
const struct prog_instruction *inst )
{
+ struct brw_compile *p = &c->func;
+ struct intel_context *intel = &p->brw->intel;
struct prog_src_register coord;
struct prog_dst_register tmpcoord = { 0 };
const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
@@ -727,7 +729,7 @@ static void precalc_tex( struct brw_wm_compile *c,
release_temp(c, tmp0);
release_temp(c, tmp1);
}
- else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
+ else if (intel->gen < 6 && inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
struct prog_src_register scale =
search_or_add_param5( c,
STATE_INTERNAL,
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
index f78bdc31866..ccf9dc2bc18 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
@@ -205,14 +205,14 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c,
case PROGRAM_CONSTANT:
/* These are invarient:
*/
- ref = get_const_ref(c, &plist->ParameterValues[idx][component]);
+ ref = get_const_ref(c, &plist->ParameterValues[idx][component].f);
break;
case PROGRAM_STATE_VAR:
case PROGRAM_UNIFORM:
/* These may change from run to run:
*/
- ref = get_param_ref(c, &plist->ParameterValues[idx][component] );
+ ref = get_param_ref(c, &plist->ParameterValues[idx][component].f );
break;
default:
diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
index 98146136703..6834ebad780 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@@ -289,6 +289,13 @@ static void brw_update_sampler_state(struct brw_context *brw,
sampler->ss1.max_lod = U_FIXED(CLAMP(gl_sampler->MaxLod, 0, 13), 6);
sampler->ss1.min_lod = U_FIXED(CLAMP(gl_sampler->MinLod, 0, 13), 6);
+ /* On Gen6+, the sampler can handle non-normalized texture
+ * rectangle coordinates natively
+ */
+ if (intel->gen >= 6 && texObj->Target == GL_TEXTURE_RECTANGLE) {
+ sampler->ss3.non_normalized_coord = 1;
+ }
+
upload_default_color(brw, gl_sampler, unit);
if (intel->gen >= 6) {
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index fb4fb146f8d..ad909789d82 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -342,7 +342,7 @@ prepare_wm_pull_constants(struct brw_context *brw)
constants = brw->wm.const_bo->virtual;
for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
constants[i] = convert_param(brw->wm.prog_data->pull_param_convert[i],
- *brw->wm.prog_data->pull_param[i]);
+ brw->wm.prog_data->pull_param[i]);
}
drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index fb4cdbaadf9..b94121e8437 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -81,12 +81,21 @@ gen6_prepare_vs_push_constants(struct brw_context *brw)
params_uploaded++;
}
- for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
- if (brw->vs.constant_map[i] != -1) {
- memcpy(param + brw->vs.constant_map[i] * 4,
- vp->program.Base.Parameters->ParameterValues[i],
- 4 * sizeof(float));
- params_uploaded++;
+ if (brw->vs.prog_data->uses_new_param_layout) {
+ for (i = 0; i < brw->vs.prog_data->nr_params; i++) {
+ *param = convert_param(brw->vs.prog_data->param_convert[i],
+ brw->vs.prog_data->param[i]);
+ param++;
+ }
+ params_uploaded += brw->vs.prog_data->nr_params / 4;
+ } else {
+ for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
+ if (brw->vs.constant_map[i] != -1) {
+ memcpy(param + brw->vs.constant_map[i] * 4,
+ vp->program.Base.Parameters->ParameterValues[i],
+ 4 * sizeof(float));
+ params_uploaded++;
+ }
}
}
@@ -151,7 +160,15 @@ upload_vs_state(struct brw_context *brw)
OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
GEN6_VS_FLOATING_POINT_MODE_ALT |
(brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
- OUT_BATCH(0); /* scratch space base offset */
+
+ if (brw->vs.prog_data->total_scratch) {
+ OUT_RELOC(brw->vs.scratch_bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ ffs(brw->vs.prog_data->total_scratch) - 11);
+ } else {
+ OUT_BATCH(0);
+ }
+
OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
(brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
(0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
@@ -160,6 +177,32 @@ upload_vs_state(struct brw_context *brw)
GEN6_VS_STATISTICS_ENABLE |
GEN6_VS_ENABLE);
ADVANCE_BATCH();
+
+ /* Based on my reading of the simulator, the VS constants don't get
+ * pulled into the VS FF unit until an appropriate pipeline flush
+ * happens, and instead the 3DSTATE_CONSTANT_VS packet just adds
+ * references to them into a little FIFO. The flushes are common,
+ * but don't reliably happen between this and a 3DPRIMITIVE, causing
+ * the primitive to use the wrong constants. Then the FIFO
+ * containing the constant setup gets added to again on the next
+ * constants change, and eventually when a flush does happen the
+ * unit is overwhelmed by constant changes and dies.
+ *
+ * To avoid this, send a PIPE_CONTROL down the line that will
+ * update the unit immediately loading the constants. The flush
+ * type bits here were those set by the STATE_BASE_ADDRESS whose
+ * move in a82a43e8d99e1715dd11c9c091b5ab734079b6a6 triggered the
+ * bug reports that led to this workaround, and may be more than
+ * what is strictly required to avoid the issue.
+ */
+ BEGIN_BATCH(4);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+ OUT_BATCH(PIPE_CONTROL_DEPTH_STALL |
+ PIPE_CONTROL_INSTRUCTION_FLUSH |
+ PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+ OUT_BATCH(0); /* address */
+ OUT_BATCH(0); /* write data */
+ ADVANCE_BATCH();
}
const struct brw_tracked_state gen6_vs_state = {
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 185da9c355f..07e9995f53b 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -54,14 +54,14 @@ gen6_prepare_wm_push_constants(struct brw_context *brw)
float *constants;
unsigned int i;
- constants = brw_state_batch(brw, AUB_TRACE_NO_TYPE,
+ constants = brw_state_batch(brw, AUB_TRACE_WM_CONSTANTS,
brw->wm.prog_data->nr_params *
sizeof(float),
32, &brw->wm.push_const_offset);
for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
constants[i] = convert_param(brw->wm.prog_data->param_convert[i],
- *brw->wm.prog_data->param[i]);
+ brw->wm.prog_data->param[i]);
}
if (0) {
diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c b/src/mesa/drivers/dri/i965/gen7_sampler_state.c
index e787c21f4d1..aee67c87472 100644
--- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c
@@ -157,6 +157,13 @@ gen7_update_sampler_state(struct brw_context *brw, int unit,
sampler->ss1.max_lod = U_FIXED(CLAMP(gl_sampler->MaxLod, 0, 13), 8);
sampler->ss1.min_lod = U_FIXED(CLAMP(gl_sampler->MinLod, 0, 13), 8);
+ /* The sampler can handle non-normalized texture rectangle coordinates
+ * natively
+ */
+ if (texObj->Target == GL_TEXTURE_RECTANGLE) {
+ sampler->ss3.non_normalized_coord = 1;
+ }
+
upload_default_color(brw, gl_sampler, unit);
sampler->ss2.default_color_pointer = brw->wm.sdc_offset[unit] >> 5;
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index 0fad3d2fb68..f3cd5d15bf0 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -71,7 +71,15 @@ upload_vs_state(struct brw_context *brw)
OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
GEN6_VS_FLOATING_POINT_MODE_ALT |
(brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
- OUT_BATCH(0); /* scratch space base offset */
+
+ if (brw->vs.prog_data->total_scratch) {
+ OUT_RELOC(brw->vs.scratch_bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ ffs(brw->vs.prog_data->total_scratch) - 11);
+ } else {
+ OUT_BATCH(0);
+ }
+
OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
(brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
(0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index a102ca772b3..55a603e887a 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -58,7 +58,7 @@ gen7_prepare_wm_constants(struct brw_context *brw)
for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
constants[i] = convert_param(brw->wm.prog_data->param_convert[i],
- *brw->wm.prog_data->param[i]);
+ brw->wm.prog_data->param[i]);
}
if (0) {
@@ -228,7 +228,13 @@ upload_ps_state(struct brw_context *brw)
OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
OUT_BATCH(brw->wm.prog_offset);
OUT_BATCH(dw2);
- OUT_BATCH(0); /* scratch space base offset */
+ if (brw->wm.prog_data->total_scratch) {
+ OUT_RELOC(brw->wm.scratch_bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ ffs(brw->wm.prog_data->total_scratch) - 11);
+ } else {
+ OUT_BATCH(0);
+ }
OUT_BATCH(dw4);
OUT_BATCH(dw5);
OUT_BATCH(0); /* kernel 1 pointer */
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index b61a2ffef19..db4343be10c 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -308,12 +308,29 @@ emit:
* [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
* =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
*
- * XXX: There is also a workaround that would appear to apply to this
- * workaround, but it doesn't appear to be necessary so far:
+ * And the workaround for these two requires this workaround first:
*
- * Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
+ * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
* BEFORE the pipe-control with a post-sync op and no write-cache
* flushes.
+ *
+ * And this last workaround is tricky because of the requirements on
+ * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
+ * volume 2 part 1:
+ *
+ * "1 of the following must also be set:
+ * - Render Target Cache Flush Enable ([12] of DW1)
+ * - Depth Cache Flush Enable ([0] of DW1)
+ * - Stall at Pixel Scoreboard ([1] of DW1)
+ * - Depth Stall ([13] of DW1)
+ * - Post-Sync Operation ([13] of DW1)
+ * - Notify Enable ([8] of DW1)"
+ *
+ * The cache flushes require the workaround flush that triggered this
+ * one, so we can't use it. Depth stall would trigger the same.
+ * Post-sync nonzero is what triggered this second workaround, so we
+ * can't use that one either. Notify enable is IRQs, which aren't
+ * really our business. That leaves only stall at scoreboard.
*/
void
intel_emit_post_sync_nonzero_flush(struct intel_context *intel)
@@ -323,9 +340,17 @@ intel_emit_post_sync_nonzero_flush(struct intel_context *intel)
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+ OUT_BATCH(PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_STALL_AT_SCOREBOARD);
+ OUT_BATCH(0); /* address */
+ OUT_BATCH(0); /* write data */
+ ADVANCE_BATCH();
+
+ BEGIN_BATCH(4);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL);
OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
OUT_RELOC(intel->batch.workaround_bo,
- I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT, 0);
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
OUT_BATCH(0); /* write data */
ADVANCE_BATCH();
@@ -365,6 +390,7 @@ intel_batchbuffer_emit_mi_flush(struct intel_context *intel)
OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |
PIPE_CONTROL_WRITE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_TC_FLUSH |
PIPE_CONTROL_NO_WRITE);
OUT_BATCH(0); /* write address */
OUT_BATCH(0); /* write data */
diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c
index 30be1b9382f..b18dd2922d9 100644
--- a/src/mesa/drivers/dri/intel/intel_blit.c
+++ b/src/mesa/drivers/dri/intel/intel_blit.c
@@ -541,8 +541,8 @@ intel_set_teximage_alpha_to_one(struct gl_context *ctx,
/* get dest x/y in destination texture */
intel_miptree_get_image_offset(intel_image->mt,
- intel_image->level,
- intel_image->face,
+ intel_image->base.Level,
+ intel_image->base.Face,
0,
&image_x, &image_y);
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index 439d6fc8247..d908975fc87 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -41,8 +41,7 @@
#include "intel_regions.h"
static GLboolean
-intel_bufferobj_unmap(struct gl_context * ctx,
- GLenum target, struct gl_buffer_object *obj);
+intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj);
/** Allocates a new drm_intel_bo to store the data for the buffer object. */
static void
@@ -122,7 +121,7 @@ intel_bufferobj_free(struct gl_context * ctx, struct gl_buffer_object *obj)
* (though it does if you call glDeleteBuffers)
*/
if (obj->Pointer)
- intel_bufferobj_unmap(ctx, 0, obj);
+ intel_bufferobj_unmap(ctx, obj);
free(intel_obj->sys_buffer);
if (intel_obj->region) {
@@ -203,7 +202,6 @@ intel_bufferobj_data(struct gl_context * ctx,
*/
static void
intel_bufferobj_subdata(struct gl_context * ctx,
- GLenum target,
GLintptrARB offset,
GLsizeiptrARB size,
const GLvoid * data, struct gl_buffer_object *obj)
@@ -276,82 +274,28 @@ intel_bufferobj_subdata(struct gl_context * ctx,
*/
static void
intel_bufferobj_get_subdata(struct gl_context * ctx,
- GLenum target,
GLintptrARB offset,
GLsizeiptrARB size,
GLvoid * data, struct gl_buffer_object *obj)
{
struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+ struct intel_context *intel = intel_context(ctx);
assert(intel_obj);
if (intel_obj->sys_buffer)
memcpy(data, (char *)intel_obj->sys_buffer + offset, size);
- else
- drm_intel_bo_get_subdata(intel_obj->buffer, offset, size, data);
-}
-
-
-
-/**
- * Called via glMapBufferARB().
- */
-static void *
-intel_bufferobj_map(struct gl_context * ctx,
- GLenum target,
- GLenum access, struct gl_buffer_object *obj)
-{
- struct intel_context *intel = intel_context(ctx);
- struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
- GLboolean read_only = (access == GL_READ_ONLY_ARB);
- GLboolean write_only = (access == GL_WRITE_ONLY_ARB);
-
- assert(intel_obj);
-
- if (intel_obj->sys_buffer) {
- if (!read_only && intel_obj->source) {
- release_buffer(intel_obj);
- }
-
- if (!intel_obj->buffer || intel_obj->source) {
- obj->Pointer = intel_obj->sys_buffer;
- obj->Length = obj->Size;
- obj->Offset = 0;
- return obj->Pointer;
+ else {
+ if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) {
+ intel_batchbuffer_flush(intel);
}
-
- free(intel_obj->sys_buffer);
- intel_obj->sys_buffer = NULL;
- }
-
- /* Flush any existing batchbuffer that might reference this data. */
- if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer))
- intel_flush(ctx);
-
- if (intel_obj->region)
- intel_bufferobj_cow(intel, intel_obj);
-
- if (intel_obj->buffer == NULL) {
- obj->Pointer = NULL;
- return NULL;
- }
-
- if (write_only) {
- drm_intel_gem_bo_map_gtt(intel_obj->buffer);
- intel_obj->mapped_gtt = GL_TRUE;
- } else {
- drm_intel_bo_map(intel_obj->buffer, !read_only);
- intel_obj->mapped_gtt = GL_FALSE;
+ drm_intel_bo_get_subdata(intel_obj->buffer, offset, size, data);
}
+}
- obj->Pointer = intel_obj->buffer->virtual;
- obj->Length = obj->Size;
- obj->Offset = 0;
- return obj->Pointer;
-}
/**
- * Called via glMapBufferRange().
+ * Called via glMapBufferRange and glMapBuffer
*
* The goal of this extension is to allow apps to accumulate their rendering
* at the same time as they accumulate their buffer object. Without it,
@@ -368,12 +312,11 @@ intel_bufferobj_map(struct gl_context * ctx,
*/
static void *
intel_bufferobj_map_range(struct gl_context * ctx,
- GLenum target, GLintptr offset, GLsizeiptr length,
+ GLintptr offset, GLsizeiptr length,
GLbitfield access, struct gl_buffer_object *obj)
{
struct intel_context *intel = intel_context(ctx);
struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
- GLboolean read_only = (access == GL_READ_ONLY_ARB);
assert(intel_obj);
@@ -385,6 +328,9 @@ intel_bufferobj_map_range(struct gl_context * ctx,
obj->AccessFlags = access;
if (intel_obj->sys_buffer) {
+ const bool read_only =
+ (access & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)) == GL_MAP_READ_BIT;
+
if (!read_only && intel_obj->source)
release_buffer(intel_obj);
@@ -468,7 +414,7 @@ intel_bufferobj_map_range(struct gl_context * ctx,
* would defeat the point.
*/
static void
-intel_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target,
+intel_bufferobj_flush_mapped_range(struct gl_context *ctx,
GLintptr offset, GLsizeiptr length,
struct gl_buffer_object *obj)
{
@@ -502,8 +448,7 @@ intel_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target,
* Called via glUnmapBuffer().
*/
static GLboolean
-intel_bufferobj_unmap(struct gl_context * ctx,
- GLenum target, struct gl_buffer_object *obj)
+intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj)
{
struct intel_context *intel = intel_context(ctx);
struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
@@ -758,23 +703,23 @@ intel_bufferobj_copy_subdata(struct gl_context *ctx,
* not overlap.
*/
if (src == dst) {
- char *ptr = intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER,
- GL_READ_WRITE, dst);
+ char *ptr = intel_bufferobj_map_range(ctx, 0, dst->Size,
+ GL_MAP_READ_BIT, dst);
memmove(ptr + write_offset, ptr + read_offset, size);
- intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst);
+ intel_bufferobj_unmap(ctx, dst);
} else {
const char *src_ptr;
char *dst_ptr;
- src_ptr = intel_bufferobj_map(ctx, GL_COPY_READ_BUFFER,
- GL_READ_ONLY, src);
- dst_ptr = intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER,
- GL_WRITE_ONLY, dst);
+ src_ptr = intel_bufferobj_map_range(ctx, 0, src->Size,
+ GL_MAP_READ_BIT, src);
+ dst_ptr = intel_bufferobj_map_range(ctx, 0, dst->Size,
+ GL_MAP_WRITE_BIT, dst);
memcpy(dst_ptr + write_offset, src_ptr + read_offset, size);
- intel_bufferobj_unmap(ctx, GL_COPY_READ_BUFFER, src);
- intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst);
+ intel_bufferobj_unmap(ctx, src);
+ intel_bufferobj_unmap(ctx, dst);
}
return;
}
@@ -924,7 +869,6 @@ intelInitBufferObjectFuncs(struct dd_function_table *functions)
functions->BufferData = intel_bufferobj_data;
functions->BufferSubData = intel_bufferobj_subdata;
functions->GetBufferSubData = intel_bufferobj_get_subdata;
- functions->MapBuffer = intel_bufferobj_map;
functions->MapBufferRange = intel_bufferobj_map_range;
functions->FlushMappedBufferRange = intel_bufferobj_flush_mapped_range;
functions->UnmapBuffer = intel_bufferobj_unmap;
diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c
index dfca03c14bf..76d33f9b37e 100644
--- a/src/mesa/drivers/dri/intel/intel_clear.c
+++ b/src/mesa/drivers/dri/intel/intel_clear.c
@@ -116,13 +116,13 @@ intelClear(struct gl_context *ctx, GLbitfield mask)
}
/* HW color buffers (front, back, aux, generic FBO, etc) */
- if (colorMask == ~0) {
+ if (intel->gen < 6 && colorMask == ~0) {
/* clear all R,G,B,A */
blit_mask |= (mask & BUFFER_BITS_COLOR);
}
else {
/* glColorMask in effect */
- tri_mask |= (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT));
+ tri_mask |= (mask & BUFFER_BITS_COLOR);
}
/* Make sure we have up to date buffers before we start looking at
@@ -143,6 +143,12 @@ intelClear(struct gl_context *ctx, GLbitfield mask)
*/
tri_mask |= BUFFER_BIT_STENCIL;
}
+ else if (intel->has_separate_stencil &&
+ stencilRegion->tiling == I915_TILING_NONE) {
+ /* The stencil buffer is actually W tiled, which the hardware
+ * cannot blit to. */
+ tri_mask |= BUFFER_BIT_STENCIL;
+ }
else {
/* clearing all stencil bits, use blitting */
blit_mask |= BUFFER_BIT_STENCIL;
@@ -182,7 +188,10 @@ intelClear(struct gl_context *ctx, GLbitfield mask)
if (tri_mask) {
debug_mask("tri", tri_mask);
- _mesa_meta_Clear(&intel->ctx, tri_mask);
+ if (ctx->Extensions.ARB_fragment_shader)
+ _mesa_meta_glsl_Clear(&intel->ctx, tri_mask);
+ else
+ _mesa_meta_Clear(&intel->ctx, tri_mask);
}
}
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index 2ba13632569..14342ef6246 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -1439,7 +1439,12 @@ intel_verify_dri2_has_hiz(struct intel_context *intel,
assert(stencil_rb->Base.Format == MESA_FORMAT_S8);
assert(depth_rb && depth_rb->Base.Format == MESA_FORMAT_X8_Z24);
- if (stencil_rb->region->tiling == I915_TILING_Y) {
+ if (stencil_rb->region->tiling == I915_TILING_NONE) {
+ /*
+ * The stencil buffer is actually W tiled. The region's tiling is
+ * I915_TILING_NONE, however, because the GTT is incapable of W
+ * fencing.
+ */
intel->intelScreen->dri2_has_hiz = INTEL_DRI2_HAS_HIZ_TRUE;
return;
} else {
@@ -1449,6 +1454,13 @@ intel_verify_dri2_has_hiz(struct intel_context *intel,
* a combined depth/stencil buffer. Discard the hiz buffer too.
*/
intel->intelScreen->dri2_has_hiz = INTEL_DRI2_HAS_HIZ_FALSE;
+ if (intel->must_use_separate_stencil) {
+ _mesa_problem(&intel->ctx,
+ "intel_context requires separate stencil, but the "
+ "DRIscreen does not support it. You may need to "
+ "upgrade the Intel X driver to 2.16.0");
+ abort();
+ }
/* 1. Discard depth and stencil renderbuffers. */
_mesa_remove_renderbuffer(fb, BUFFER_DEPTH);
@@ -1527,7 +1539,7 @@ intel_verify_dri2_has_hiz(struct intel_context *intel,
* Presently, however, no verification or clean up is necessary, and
* execution should not reach here. If the framebuffer still has a hiz
* region, then we have already set dri2_has_hiz to true after
- * confirming above that the stencil buffer is Y tiled.
+ * confirming above that the stencil buffer is W tiled.
*/
assert(0);
}
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 55bcc757873..754f9f202d1 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -173,6 +173,9 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer
if (irb->Base.Format == MESA_FORMAT_S8) {
/*
+ * The stencil buffer is W tiled. However, we request from the kernel a
+ * non-tiled buffer because the GTT is incapable of W fencing.
+ *
* The stencil buffer has quirky pitch requirements. From Vol 2a,
* 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
* The pitch must be set to 2x the value computed based on width, as
@@ -180,14 +183,13 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer
* To accomplish this, we resort to the nasty hack of doubling the drm
* region's cpp and halving its height.
*
- * If we neglect to double the pitch, then drm_intel_gem_bo_map_gtt()
- * maps the memory incorrectly.
+ * If we neglect to double the pitch, then render corruption occurs.
*/
irb->region = intel_region_alloc(intel->intelScreen,
- I915_TILING_Y,
+ I915_TILING_NONE,
cpp * 2,
- width,
- height / 2,
+ ALIGN(width, 64),
+ ALIGN((height + 1) / 2, 64),
GL_TRUE);
if (!irb->region)
return false;
@@ -594,17 +596,15 @@ intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb,
struct intel_texture_image *intel_image,
int zoffset)
{
- struct intel_mipmap_tree *mt = intel_image->mt;
unsigned int dst_x, dst_y;
/* compute offset of the particular 2D image within the texture region */
intel_miptree_get_image_offset(intel_image->mt,
- intel_image->level,
- intel_image->face,
+ intel_image->base.Level,
+ intel_image->base.Face,
zoffset,
&dst_x, &dst_y);
- irb->draw_offset = (dst_y * mt->region->pitch + dst_x) * mt->cpp;
irb->draw_x = dst_x;
irb->draw_y = dst_y;
}
@@ -645,6 +645,22 @@ intel_renderbuffer_tile_offsets(struct intel_renderbuffer *irb,
}
}
+#ifndef I915
+static bool
+need_tile_offset_workaround(struct brw_context *brw,
+ struct intel_renderbuffer *irb)
+{
+ uint32_t tile_x, tile_y;
+
+ if (brw->has_surface_tile_offset)
+ return false;
+
+ intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y);
+
+ return tile_x != 0 || tile_y != 0;
+}
+#endif
+
/**
* Called by glFramebufferTexture[123]DEXT() (and other places) to
* prepare for rendering into texture memory. This might be called
@@ -698,8 +714,7 @@ intel_render_texture(struct gl_context * ctx,
intel_image->used_as_render_target = GL_TRUE;
#ifndef I915
- if (!brw_context(ctx)->has_surface_tile_offset &&
- (irb->draw_offset & 4095) != 0) {
+ if (need_tile_offset_workaround(brw_context(ctx), irb)) {
/* Original gen4 hardware couldn't draw to a non-tile-aligned
* destination in a miptree unless you actually setup your
* renderbuffer as a miptree and used the fragile
@@ -713,8 +728,8 @@ intel_render_texture(struct gl_context * ctx,
new_mt = intel_miptree_create(intel, image->TexObject->Target,
intel_image->base.TexFormat,
- intel_image->level,
- intel_image->level,
+ intel_image->base.Level,
+ intel_image->base.Level,
intel_image->base.Width,
intel_image->base.Height,
intel_image->base.Depth,
@@ -722,8 +737,8 @@ intel_render_texture(struct gl_context * ctx,
intel_miptree_image_copy(intel,
new_mt,
- intel_image->face,
- intel_image->level,
+ intel_image->base.Face,
+ intel_image->base.Level,
old_mt);
intel_miptree_release(intel, &intel_image->mt);
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h
index f7f99a4f00c..2487994fde5 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.h
+++ b/src/mesa/drivers/dri/intel/intel_fbo.h
@@ -58,7 +58,6 @@ struct intel_renderbuffer
/** \} */
- GLuint draw_offset; /**< Offset of drawing address within the region */
GLuint draw_x, draw_y; /**< Offset of drawing within the region */
};
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index 4e711de1ce1..f36240d7f1d 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -227,7 +227,7 @@ intel_miptree_match_image(struct intel_mipmap_tree *mt,
struct gl_texture_image *image)
{
struct intel_texture_image *intelImage = intel_texture_image(image);
- GLuint level = intelImage->level;
+ GLuint level = intelImage->base.Level;
/* Images with borders are never pulled into mipmap trees. */
if (image->Border)
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
index 86d0ef2d748..d9873a303ee 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -74,9 +74,9 @@ static const GLubyte *map_pbo( struct gl_context *ctx,
return NULL;
}
- buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
- GL_READ_ONLY_ARB,
- unpack->BufferObj);
+ buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size,
+ GL_MAP_READ_BIT,
+ unpack->BufferObj);
if (!buf) {
_mesa_error(ctx, GL_INVALID_OPERATION, "glBitmap(PBO is mapped)");
return NULL;
@@ -292,8 +292,7 @@ out:
if (_mesa_is_bufferobj(unpack->BufferObj)) {
/* done with PBO so unmap it now */
- ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
- unpack->BufferObj);
+ ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj);
}
intel_check_front_buffer_rendering(intel);
diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h
index 5aa629150cf..a98a669af21 100644
--- a/src/mesa/drivers/dri/intel/intel_reg.h
+++ b/src/mesa/drivers/dri/intel/intel_reg.h
@@ -75,6 +75,7 @@
#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4)
#define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1 << 3)
#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1 << 2)
+#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1)
#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
#define PIPE_CONTROL_PPGTT_WRITE (0 << 2)
#define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2)
diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h
index b2013af1a29..9dd6a525566 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.h
+++ b/src/mesa/drivers/dri/intel/intel_screen.h
@@ -63,9 +63,12 @@
* x8_z24 and s8).
*
* Eventually, intel_update_renderbuffers() makes a DRI2 request for
- * DRI2BufferStencil and DRI2BufferHiz. If the returned buffers are Y tiled,
- * then we joyfully set intel_screen.dri2_has_hiz to true and continue as if
- * nothing happend.
+ * DRI2BufferStencil and DRI2BufferHiz. If the stencil buffer's tiling is
+ * I915_TILING_NONE [1], then we joyfully set intel_screen.dri2_has_hiz to
+ * true and continue as if nothing happend.
+ *
+ * [1] The stencil buffer is actually W tiled. However, we request from the
+ * kernel a non-tiled buffer because the GTT is incapable of W fencing.
*
* If the buffers are X tiled, however, the handshake has failed and we must
* clean up.
diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c
index 153803fba09..2e1c80c4766 100644
--- a/src/mesa/drivers/dri/intel/intel_span.c
+++ b/src/mesa/drivers/dri/intel/intel_span.c
@@ -131,38 +131,84 @@ intel_set_span_functions(struct intel_context *intel,
int miny = 0; \
int maxx = rb->Width; \
int maxy = rb->Height; \
- int stride = rb->RowStride; \
- uint8_t *buf = rb->Data; \
+ \
+ /* \
+ * Here we ignore rb->Data and rb->RowStride as set by \
+ * intelSpanRenderStart. Since intel_offset_S8 decodes the W tile \
+ * manually, the region's *real* base address and stride is \
+ * required. \
+ */ \
+ struct intel_renderbuffer *irb = intel_renderbuffer(rb); \
+ uint8_t *buf = irb->region->buffer->virtual; \
+ unsigned stride = irb->region->pitch; \
+ unsigned height = 2 * irb->region->height; \
+ bool flip = rb->Name == 0; \
+ int y_scale = flip ? -1 : 1; \
+ int y_bias = flip ? (height - 1) : 0; \
-/* Don't flip y. */
#undef Y_FLIP
-#define Y_FLIP(y) y
+#define Y_FLIP(y) (y_scale * (y) + y_bias)
/**
* \brief Get pointer offset into stencil buffer.
*
- * The stencil buffer interleaves two rows into one. Yay for crazy hardware.
- * The table below demonstrates how the pointer arithmetic behaves for a buffer
- * with positive stride (s=stride).
- *
- * x | y | byte offset
- * --------------------------
- * 0 | 0 | 0
- * 0 | 1 | 1
- * 1 | 0 | 2
- * 1 | 1 | 3
- * ... | ... | ...
- * 0 | 2 | s
- * 0 | 3 | s + 1
- * 1 | 2 | s + 2
- * 1 | 3 | s + 3
+ * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we
+ * must decode the tile's layout in software.
*
+ * See
+ * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile
+ * Format.
+ * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm
*
+ * Even though the returned offset is always positive, the return type is
+ * signed due to
+ * commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137
+ * mesa: Fix return type of _mesa_get_format_bytes() (#37351)
*/
static inline intptr_t
-intel_offset_S8(int stride, GLint x, GLint y)
+intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y)
{
- return 2 * ((y / 2) * stride + x) + y % 2;
+ uint32_t tile_size = 4096;
+ uint32_t tile_width = 64;
+ uint32_t tile_height = 64;
+ uint32_t row_size = 64 * stride;
+
+ uint32_t tile_x = x / tile_width;
+ uint32_t tile_y = y / tile_height;
+
+ /* The byte's address relative to the tile's base addres. */
+ uint32_t byte_x = x % tile_width;
+ uint32_t byte_y = y % tile_height;
+
+ uintptr_t u = tile_y * row_size
+ + tile_x * tile_size
+ + 512 * (byte_x / 8)
+ + 64 * (byte_y / 8)
+ + 32 * ((byte_y / 4) % 2)
+ + 16 * ((byte_x / 4) % 2)
+ + 8 * ((byte_y / 2) % 2)
+ + 4 * ((byte_x / 2) % 2)
+ + 2 * (byte_y % 2)
+ + 1 * (byte_x % 2);
+
+ /*
+ * Errata for Gen5:
+ *
+ * An additional offset is needed which is not documented in the PRM.
+ *
+ * if ((byte_x / 8) % 2 == 1) {
+ * if ((byte_y / 8) % 2) == 0) {
+ * u += 64;
+ * } else {
+ * u -= 64;
+ * }
+ * }
+ *
+ * The offset is expressed more tersely as
+ * u += ((int) x & 0x8) * (8 - (((int) y & 0x8) << 1));
+ */
+
+ return u;
}
#define WRITE_STENCIL(x, y, src) buf[intel_offset_S8(stride, x, y)] = src;
diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c
index 21c4a1dddba..ee0cd252375 100644
--- a/src/mesa/drivers/dri/intel/intel_tex.c
+++ b/src/mesa/drivers/dri/intel/intel_tex.c
@@ -95,17 +95,12 @@ intelGenerateMipmap(struct gl_context *ctx, GLenum target,
if (!_mesa_is_format_compressed(first_image->TexFormat)) {
GLuint nr_faces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
GLuint face, i;
- /* Update the level information in our private data in the new images,
- * since it didn't get set as part of a normal TexImage path.
- */
for (face = 0; face < nr_faces; face++) {
for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) {
struct intel_texture_image *intelImage =
intel_texture_image(texObj->Image[face][i]);
if (!intelImage)
break;
- intelImage->level = i;
- intelImage->face = face;
/* Unreference the miptree to signal that the new Data is a
* bare pointer from mesa.
*/
diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c
index 1a3643da593..600bd1251e0 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c
@@ -118,8 +118,8 @@ intel_copy_texsubimage(struct intel_context *intel,
/* get dest x/y in destination texture */
intel_miptree_get_image_offset(intelImage->mt,
- intelImage->level,
- intelImage->face,
+ intelImage->base.Level,
+ intelImage->base.Face,
0,
&image_x, &image_y);
@@ -164,101 +164,6 @@ intel_copy_texsubimage(struct intel_context *intel,
static void
-intelCopyTexImage1D(struct gl_context * ctx, GLenum target, GLint level,
- GLenum internalFormat,
- GLint x, GLint y, GLsizei width, GLint border)
-{
- struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
- struct gl_texture_object *texObj =
- _mesa_select_tex_object(ctx, texUnit, target);
- struct gl_texture_image *texImage =
- _mesa_select_tex_image(ctx, texObj, target, level);
- int srcx, srcy, dstx, dsty, height;
-
- if (border)
- goto fail;
-
- /* Setup or redefine the texture object, mipmap tree and texture
- * image. Don't populate yet.
- */
- ctx->Driver.TexImage1D(ctx, target, level, internalFormat,
- width, border,
- GL_RGBA, CHAN_TYPE, NULL,
- &ctx->DefaultPacking, texObj, texImage);
- srcx = x;
- srcy = y;
- dstx = 0;
- dsty = 0;
- height = 1;
- if (!_mesa_clip_copytexsubimage(ctx,
- &dstx, &dsty,
- &srcx, &srcy,
- &width, &height))
- return;
-
- if (!intel_copy_texsubimage(intel_context(ctx), target,
- intel_texture_image(texImage),
- internalFormat, 0, 0, x, y, width, height))
- goto fail;
-
- return;
-
- fail:
- fallback_debug("%s - fallback to swrast\n", __FUNCTION__);
- _mesa_meta_CopyTexImage1D(ctx, target, level, internalFormat, x, y,
- width, border);
-}
-
-
-static void
-intelCopyTexImage2D(struct gl_context * ctx, GLenum target, GLint level,
- GLenum internalFormat,
- GLint x, GLint y, GLsizei width, GLsizei height,
- GLint border)
-{
- struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
- struct gl_texture_object *texObj =
- _mesa_select_tex_object(ctx, texUnit, target);
- struct gl_texture_image *texImage =
- _mesa_select_tex_image(ctx, texObj, target, level);
- int srcx, srcy, dstx, dsty;
-
- if (border)
- goto fail;
-
- /* Setup or redefine the texture object, mipmap tree and texture
- * image. Don't populate yet.
- */
- ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
- width, height, border,
- GL_RGBA, GL_UNSIGNED_BYTE, NULL,
- &ctx->DefaultPacking, texObj, texImage);
-
- srcx = x;
- srcy = y;
- dstx = 0;
- dsty = 0;
- if (!_mesa_clip_copytexsubimage(ctx,
- &dstx, &dsty,
- &srcx, &srcy,
- &width, &height))
- return;
-
- if (!intel_copy_texsubimage(intel_context(ctx), target,
- intel_texture_image(texImage),
- internalFormat, 0, 0, x, y, width, height))
- goto fail;
-
- return;
-
- fail:
- fallback_debug("%s - fallback to swrast\n", __FUNCTION__);
- _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y,
- width, height, border);
-}
-
-
-static void
intelCopyTexSubImage1D(struct gl_context * ctx, GLenum target, GLint level,
GLint xoffset, GLint x, GLint y, GLsizei width)
{
@@ -312,8 +217,6 @@ intelCopyTexSubImage2D(struct gl_context * ctx, GLenum target, GLint level,
void
intelInitTextureCopyImageFuncs(struct dd_function_table *functions)
{
- functions->CopyTexImage1D = intelCopyTexImage1D;
- functions->CopyTexImage2D = intelCopyTexImage2D;
functions->CopyTexSubImage1D = intelCopyTexSubImage1D;
functions->CopyTexSubImage2D = intelCopyTexSubImage2D;
}
diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c
index 1f8b885bbec..4ee66847255 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_image.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_image.c
@@ -63,7 +63,7 @@ intel_miptree_create_for_teximage(struct intel_context *intel,
if (intelImage->base.Border)
return NULL;
- if (intelImage->level > intelObj->base.BaseLevel &&
+ if (intelImage->base.Level > intelObj->base.BaseLevel &&
(intelImage->base.Width == 1 ||
(intelObj->base.Target != GL_TEXTURE_1D &&
intelImage->base.Height == 1) ||
@@ -74,19 +74,19 @@ intel_miptree_create_for_teximage(struct intel_context *intel,
* likely base level width/height/depth for a full mipmap stack
* from this info, so just allocate this one level.
*/
- firstLevel = intelImage->level;
- lastLevel = intelImage->level;
+ firstLevel = intelImage->base.Level;
+ lastLevel = intelImage->base.Level;
} else {
/* If this image disrespects BaseLevel, allocate from level zero.
* Usually BaseLevel == 0, so it's unlikely to happen.
*/
- if (intelImage->level < intelObj->base.BaseLevel)
+ if (intelImage->base.Level < intelObj->base.BaseLevel)
firstLevel = 0;
else
firstLevel = intelObj->base.BaseLevel;
/* Figure out image dimensions at start level. */
- for (i = intelImage->level; i > firstLevel; i--) {
+ for (i = intelImage->base.Level; i > firstLevel; i--) {
width <<= 1;
if (height != 1)
height <<= 1;
@@ -101,7 +101,7 @@ intel_miptree_create_for_teximage(struct intel_context *intel,
*/
if ((intelObj->base.Sampler.MinFilter == GL_NEAREST ||
intelObj->base.Sampler.MinFilter == GL_LINEAR) &&
- intelImage->level == firstLevel &&
+ intelImage->base.Level == firstLevel &&
(intel->gen < 4 || firstLevel == 0)) {
lastLevel = firstLevel;
} else {
@@ -186,8 +186,8 @@ try_pbo_upload(struct intel_context *intel,
else
src_stride = width;
- intel_miptree_get_image_offset(intelImage->mt, intelImage->level,
- intelImage->face, 0,
+ intel_miptree_get_image_offset(intelImage->mt, intelImage->base.Level,
+ intelImage->base.Face, 0,
&dst_x, &dst_y);
dst_stride = intelImage->mt->region->pitch;
@@ -243,8 +243,8 @@ try_pbo_zcopy(struct intel_context *intel,
else
src_stride = width;
- intel_miptree_get_image_offset(intelImage->mt, intelImage->level,
- intelImage->face, 0,
+ intel_miptree_get_image_offset(intelImage->mt, intelImage->base.Level,
+ intelImage->base.Face, 0,
&dst_x, &dst_y);
dst_stride = intelImage->mt->region->pitch;
@@ -407,9 +407,6 @@ intelTexImage(struct gl_context * ctx,
DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__,
_mesa_lookup_enum_by_nr(target), level, width, height, depth, border);
- intelImage->face = _mesa_tex_target_to_face(target);
- intelImage->level = level;
-
if (_mesa_is_format_compressed(texImage->TexFormat)) {
texelBytes = 0;
}
@@ -514,8 +511,8 @@ intelTexImage(struct gl_context * ctx,
}
texImage->Data = intel_miptree_image_map(intel,
intelImage->mt,
- intelImage->face,
- intelImage->level,
+ intelImage->base.Face,
+ intelImage->base.Level,
&dstRowStride,
intelImage->base.ImageOffsets);
}
@@ -684,8 +681,8 @@ intel_get_tex_image(struct gl_context * ctx, GLenum target, GLint level,
intelImage->base.Data =
intel_miptree_image_map(intel,
intelImage->mt,
- intelImage->face,
- intelImage->level,
+ intelImage->base.Face,
+ intelImage->base.Level,
&intelImage->base.RowStride,
intelImage->base.ImageOffsets);
intelImage->base.RowStride /= intelImage->mt->cpp;
@@ -816,8 +813,6 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
rb->region->width, rb->region->height, 1,
0, internalFormat, texFormat);
- intelImage->face = _mesa_tex_target_to_face(target);
- intelImage->level = level;
texImage->RowStride = rb->region->pitch;
intel_miptree_reference(&intelImage->mt, intelObj->mt);
@@ -874,8 +869,6 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target,
image->region->width, image->region->height, 1,
0, image->internal_format, image->format);
- intelImage->face = _mesa_tex_target_to_face(target);
- intelImage->level = 0;
texImage->RowStride = image->region->pitch;
intel_miptree_reference(&intelImage->mt, intelObj->mt);
diff --git a/src/mesa/drivers/dri/intel/intel_tex_obj.h b/src/mesa/drivers/dri/intel/intel_tex_obj.h
index a9ae2ec5429..e7a4318b8d8 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_obj.h
+++ b/src/mesa/drivers/dri/intel/intel_tex_obj.h
@@ -52,11 +52,6 @@ struct intel_texture_image
{
struct gl_texture_image base;
- /* These aren't stored in gl_texture_image
- */
- GLuint level;
- GLuint face;
-
/* If intelImage->mt != NULL, image data is stored here.
* Else if intelImage->base.Data != NULL, image is stored there.
* Else there is no image data.
diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
index 8b43c406cf9..5fd2cc36234 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
@@ -113,7 +113,7 @@ intelTexSubimage(struct gl_context * ctx,
dstRowStride = pitch;
intel_miptree_get_image_offset(intelImage->mt, level,
- intelImage->face, 0,
+ intelImage->base.Face, 0,
&blit_x, &blit_y);
blit_x += xoffset;
blit_y += yoffset;
@@ -122,8 +122,8 @@ intelTexSubimage(struct gl_context * ctx,
} else {
texImage->Data = intel_miptree_image_map(intel,
intelImage->mt,
- intelImage->face,
- intelImage->level,
+ intelImage->base.Face,
+ intelImage->base.Level,
&dstRowStride,
texImage->ImageOffsets);
}
diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c
index 7135a6276fe..31ac689ad77 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c
@@ -42,8 +42,8 @@ copy_image_data_to_tree(struct intel_context *intel,
*/
intel_miptree_image_copy(intel,
intelObj->mt,
- intelImage->face,
- intelImage->level, intelImage->mt);
+ intelImage->base.Face,
+ intelImage->base.Level, intelImage->mt);
intel_miptree_release(intel, &intelImage->mt);
}
@@ -54,8 +54,8 @@ copy_image_data_to_tree(struct intel_context *intel,
*/
intel_miptree_image_data(intel,
intelObj->mt,
- intelImage->face,
- intelImage->level,
+ intelImage->base.Face,
+ intelImage->base.Level,
intelImage->base.Data,
intelImage->base.RowStride,
intelImage->base.RowStride *
@@ -177,8 +177,8 @@ intel_tex_map_level_images(struct intel_context *intel,
intelImage->base.Data =
intel_miptree_image_map(intel,
intelImage->mt,
- intelImage->face,
- intelImage->level,
+ intelImage->base.Face,
+ intelImage->base.Level,
&intelImage->base.RowStride,
intelImage->base.ImageOffsets);
/* convert stride to texels, not bytes */
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
index e60b91f64be..433590c4181 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
@@ -107,7 +107,7 @@ nouveau_bufferobj_data(struct gl_context *ctx, GLenum target, GLsizeiptrARB size
}
static void
-nouveau_bufferobj_subdata(struct gl_context *ctx, GLenum target, GLintptrARB offset,
+nouveau_bufferobj_subdata(struct gl_context *ctx, GLintptrARB offset,
GLsizeiptrARB size, const GLvoid *data,
struct gl_buffer_object *obj)
{
@@ -115,7 +115,7 @@ nouveau_bufferobj_subdata(struct gl_context *ctx, GLenum target, GLintptrARB off
}
static void
-nouveau_bufferobj_get_subdata(struct gl_context *ctx, GLenum target, GLintptrARB offset,
+nouveau_bufferobj_get_subdata(struct gl_context *ctx, GLintptrARB offset,
GLsizeiptrARB size, GLvoid *data,
struct gl_buffer_object *obj)
{
@@ -123,23 +123,6 @@ nouveau_bufferobj_get_subdata(struct gl_context *ctx, GLenum target, GLintptrARB
}
static void *
-nouveau_bufferobj_map(struct gl_context *ctx, GLenum target, GLenum access,
- struct gl_buffer_object *obj)
-{
- unsigned flags = 0;
-
- if (access == GL_READ_ONLY_ARB ||
- access == GL_READ_WRITE_ARB)
- flags |= GL_MAP_READ_BIT;
- if (access == GL_WRITE_ONLY_ARB ||
- access == GL_READ_WRITE_ARB)
- flags |= GL_MAP_WRITE_BIT;
-
- return ctx->Driver.MapBufferRange(ctx, target, 0, obj->Size, flags,
- obj);
-}
-
-static void *
nouveau_bufferobj_map_range(struct gl_context *ctx, GLenum target, GLintptr offset,
GLsizeiptr length, GLbitfield access,
struct gl_buffer_object *obj)
@@ -169,7 +152,7 @@ nouveau_bufferobj_map_range(struct gl_context *ctx, GLenum target, GLintptr offs
}
static GLboolean
-nouveau_bufferobj_unmap(struct gl_context *ctx, GLenum target, struct gl_buffer_object *obj)
+nouveau_bufferobj_unmap(struct gl_context *ctx, struct gl_buffer_object *obj)
{
assert(obj->Pointer);
@@ -189,7 +172,6 @@ nouveau_bufferobj_functions_init(struct dd_function_table *functions)
functions->BufferData = nouveau_bufferobj_data;
functions->BufferSubData = nouveau_bufferobj_subdata;
functions->GetBufferSubData = nouveau_bufferobj_get_subdata;
- functions->MapBuffer = nouveau_bufferobj_map;
functions->MapBufferRange = nouveau_bufferobj_map_range;
functions->UnmapBuffer = nouveau_bufferobj_unmap;
}
diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c
index 02201cb53d6..44a794da396 100644
--- a/src/mesa/drivers/dri/r200/r200_ioctl.c
+++ b/src/mesa/drivers/dri/r200/r200_ioctl.c
@@ -185,7 +185,6 @@ static void r200Clear( struct gl_context *ctx, GLbitfield mask )
r200ContextPtr rmesa = R200_CONTEXT(ctx);
__DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon);
GLuint flags = 0;
- GLuint color_mask = 0;
GLuint orig_mask = mask;
if ( R200_DEBUG & RADEON_IOCTL ) {
@@ -206,13 +205,11 @@ static void r200Clear( struct gl_context *ctx, GLbitfield mask )
if ( mask & BUFFER_BIT_FRONT_LEFT ) {
flags |= RADEON_FRONT;
- color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
mask &= ~BUFFER_BIT_FRONT_LEFT;
}
if ( mask & BUFFER_BIT_BACK_LEFT ) {
flags |= RADEON_BACK;
- color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
mask &= ~BUFFER_BIT_BACK_LEFT;
}
diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c
index d42e8f12041..91e77f9f7da 100644
--- a/src/mesa/drivers/dri/r200/r200_tex.c
+++ b/src/mesa/drivers/dri/r200/r200_tex.c
@@ -527,7 +527,6 @@ void r200InitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *fu
functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
if (radeon->radeonScreen->kernel_mm) {
- functions->CopyTexImage2D = radeonCopyTexImage2D;
functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
}
diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c
index 7adf9ad73ed..8c9bd6d00b2 100644
--- a/src/mesa/drivers/dri/r200/r200_texstate.c
+++ b/src/mesa/drivers/dri/r200/r200_texstate.c
@@ -773,18 +773,12 @@ void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format
struct radeon_renderbuffer *rb;
radeon_texture_image *rImage;
radeonContextPtr radeon;
- r200ContextPtr rmesa;
struct radeon_framebuffer *rfb;
radeonTexObjPtr t;
uint32_t pitch_val;
- uint32_t internalFormat, format;
gl_format texFormat;
- format = GL_UNSIGNED_BYTE;
- internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
-
radeon = pDRICtx->driverPrivate;
- rmesa = pDRICtx->driverPrivate;
rfb = dPriv->driverPrivate;
texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c
index 63e03b0e0c7..cf44d7f459c 100644
--- a/src/mesa/drivers/dri/r200/r200_vertprog.c
+++ b/src/mesa/drivers/dri/r200/r200_vertprog.c
@@ -126,10 +126,10 @@ static GLboolean r200VertexProgUpdateParams(struct gl_context *ctx, struct r200_
case PROGRAM_NAMED_PARAM:
//fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
case PROGRAM_CONSTANT:
- *fcmd++ = paramList->ParameterValues[pi][0];
- *fcmd++ = paramList->ParameterValues[pi][1];
- *fcmd++ = paramList->ParameterValues[pi][2];
- *fcmd++ = paramList->ParameterValues[pi][3];
+ *fcmd++ = paramList->ParameterValues[pi][0].f;
+ *fcmd++ = paramList->ParameterValues[pi][1].f;
+ *fcmd++ = paramList->ParameterValues[pi][2].f;
+ *fcmd++ = paramList->ParameterValues[pi][3].f;
break;
default:
_mesa_problem(NULL, "Bad param type in %s", __FUNCTION__);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
index b24274259f4..39dcb21d4f4 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -561,28 +561,29 @@ static int peephole_add_presub_add(
struct rc_instruction * inst_add)
{
unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
- struct rc_src_register * src1 = NULL;
- unsigned int i;
-
- if (!is_presub_candidate(c, inst_add))
- return 0;
+ unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
+ unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
return 0;
- /* XXX This isn't fully implemented, is it? */
- /* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */
- for (i = 0; i < 2; i++) {
- if (inst_add->U.I.SrcReg[i].Abs)
- return 0;
+ /* src0 and src1 can't have absolute values */
+ if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
+ return 0;
- /* XXX This looks weird, but it's basically what was here before this commit (see git blame): */
- if ((inst_add->U.I.SrcReg[i].Negate & dstmask) != dstmask && !src1) {
- src1 = &inst_add->U.I.SrcReg[i];
- }
- }
+ /* presub_replace_add() assumes only one is negative */
+ if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
+ return 0;
+
+ /* if src0 is negative, at least all bits of dstmask have to be set */
+ if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
+ return 0;
- if (!src1)
+ /* if src1 is negative, at least all bits of dstmask have to be set */
+ if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
+ return 0;
+
+ if (!is_presub_candidate(c, inst_add))
return 0;
if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
@@ -615,7 +616,7 @@ static void presub_replace_inv(
* of the add instruction must have the constatnt 1 swizzle. This function
* does not check const registers to see if their value is 1.0, so it should
* be called after the constant_folding optimization.
- * @return
+ * @return
* 0 if the ADD instruction is still part of the program.
* 1 if the ADD instruction is no longer part of the program.
*/
diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c
index 0c4d8537c61..5587c16dd44 100644
--- a/src/mesa/drivers/dri/r300/r300_draw.c
+++ b/src/mesa/drivers/dri/r300/r300_draw.c
@@ -84,7 +84,8 @@ static void r300FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
GLboolean mapped_named_bo = GL_FALSE;
if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) {
- ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+ ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size,
+ GL_MAP_READ_BIT, mesa_ind_buf->obj);
mapped_named_bo = GL_TRUE;
assert(mesa_ind_buf->obj->Pointer != NULL);
}
@@ -138,7 +139,7 @@ static void r300FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
r300->ind_buf.count = mesa_ind_buf->count;
if (mapped_named_bo) {
- ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+ ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
}
}
@@ -163,7 +164,10 @@ static void r300SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
GLboolean mapped_named_bo = GL_FALSE;
if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) {
- ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+ ctx->Driver.MapBufferRange(ctx, 0,
+ mesa_ind_buf->obj->Size,
+ GL_MAP_READ_BIT,
+ mesa_ind_buf->obj);
assert(mesa_ind_buf->obj->Pointer != NULL);
mapped_named_bo = GL_TRUE;
}
@@ -184,7 +188,7 @@ static void r300SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
r300->ind_buf.count = mesa_ind_buf->count;
if (mapped_named_bo) {
- ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+ ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
}
} else {
r300FixupIndexBuffer(ctx, mesa_ind_buf);
@@ -235,7 +239,8 @@ static void r300ConvertAttrib(struct gl_context *ctx, int count, const struct gl
if (input->BufferObj->Name) {
if (!input->BufferObj->Pointer) {
- ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+ ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+ GL_MAP_READ_BIT, input->BufferObj);
mapped_named_bo = GL_TRUE;
}
@@ -286,7 +291,7 @@ static void r300ConvertAttrib(struct gl_context *ctx, int count, const struct gl
radeon_bo_unmap(attr->bo);
if (mapped_named_bo) {
- ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+ ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
}
}
@@ -302,7 +307,8 @@ static void r300AlignDataToDword(struct gl_context *ctx, const struct gl_client_
radeon_bo_map(attr->bo, 1);
if (!input->BufferObj->Pointer) {
- ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+ ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+ GL_MAP_READ_BIT, input->BufferObj);
mapped_named_bo = GL_TRUE;
}
@@ -321,7 +327,7 @@ static void r300AlignDataToDword(struct gl_context *ctx, const struct gl_client_
}
if (mapped_named_bo) {
- ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+ ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
}
radeon_bo_unmap(attr->bo);
diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c
index 590d9afe14a..93d8fe185ef 100644
--- a/src/mesa/drivers/dri/r300/r300_tex.c
+++ b/src/mesa/drivers/dri/r300/r300_tex.c
@@ -379,7 +379,6 @@ void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *fun
functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
if (radeon->radeonScreen->kernel_mm) {
- functions->CopyTexImage2D = radeonCopyTexImage2D;
functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
}
diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c
index e24ad6f088d..e4388a021ed 100644
--- a/src/mesa/drivers/dri/r300/r300_texstate.c
+++ b/src/mesa/drivers/dri/r300/r300_texstate.c
@@ -427,13 +427,8 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format
struct radeon_framebuffer *rfb;
radeonTexObjPtr t;
uint32_t pitch_val;
- uint32_t internalFormat, type, format;
gl_format texFormat;
- type = GL_BGRA;
- format = GL_UNSIGNED_BYTE;
- internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
-
radeon = pDRICtx->driverPrivate;
rmesa = pDRICtx->driverPrivate;
diff --git a/src/mesa/drivers/dri/r600/evergreen_fragprog.c b/src/mesa/drivers/dri/r600/evergreen_fragprog.c
index e527c379b62..cc584ca2b35 100644
--- a/src/mesa/drivers/dri/r600/evergreen_fragprog.c
+++ b/src/mesa/drivers/dri/r600/evergreen_fragprog.c
@@ -752,10 +752,10 @@ GLboolean evergreenSetupFPconstants(struct gl_context * ctx)
unNumParamData = paramList->NumParameters;
for(ui=0; ui<unNumParamData; ui++) {
- evergreen->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
- evergreen->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
- evergreen->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
- evergreen->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+ evergreen->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f;
+ evergreen->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f;
+ evergreen->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f;
+ evergreen->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f;
}
/* alloc multiple of 16 constants */
diff --git a/src/mesa/drivers/dri/r600/evergreen_render.c b/src/mesa/drivers/dri/r600/evergreen_render.c
index 4507be29d86..74563caf47c 100644
--- a/src/mesa/drivers/dri/r600/evergreen_render.c
+++ b/src/mesa/drivers/dri/r600/evergreen_render.c
@@ -403,7 +403,8 @@ static void evergreenConvertAttrib(struct gl_context *ctx, int count,
{
if (!input->BufferObj->Pointer)
{
- ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+ ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+ GL_MAP_READ_BIT, input->BufferObj);
mapped_named_bo = GL_TRUE;
}
@@ -456,7 +457,7 @@ static void evergreenConvertAttrib(struct gl_context *ctx, int count,
if (mapped_named_bo)
{
- ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+ ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
}
}
@@ -470,7 +471,8 @@ static void evergreenFixupIndexBuffer(struct gl_context *ctx, const struct _mesa
if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
{
- ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+ ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size,
+ GL_MAP_READ_BIT, mesa_ind_buf->obj);
mapped_named_bo = GL_TRUE;
assert(mesa_ind_buf->obj->Pointer != NULL);
}
@@ -531,7 +533,7 @@ static void evergreenFixupIndexBuffer(struct gl_context *ctx, const struct _mesa
if (mapped_named_bo)
{
- ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+ ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
}
}
@@ -606,7 +608,8 @@ static void evergreenSetupIndexBuffer(struct gl_context *ctx, const struct _mesa
if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
{
- ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+ ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size,
+ GL_MAP_READ_BIT, mesa_ind_buf->obj);
assert(mesa_ind_buf->obj->Pointer != NULL);
mapped_named_bo = GL_TRUE;
}
@@ -629,7 +632,7 @@ static void evergreenSetupIndexBuffer(struct gl_context *ctx, const struct _mesa
if (mapped_named_bo)
{
- ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+ ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
}
}
else
@@ -655,7 +658,8 @@ static void evergreenAlignDataToDword(struct gl_context *ctx,
if (!input->BufferObj->Pointer)
{
- ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+ ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+ GL_MAP_READ_BIT, input->BufferObj->obj);
mapped_named_bo = GL_TRUE;
}
@@ -675,7 +679,7 @@ static void evergreenAlignDataToDword(struct gl_context *ctx,
radeon_bo_unmap(attr->bo);
if (mapped_named_bo)
{
- ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+ ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
}
attr->stride = dst_stride;
diff --git a/src/mesa/drivers/dri/r600/evergreen_tex.c b/src/mesa/drivers/dri/r600/evergreen_tex.c
index 33a5f277683..d240a216817 100644
--- a/src/mesa/drivers/dri/r600/evergreen_tex.c
+++ b/src/mesa/drivers/dri/r600/evergreen_tex.c
@@ -1288,19 +1288,12 @@ void evergreenSetTexBuffer(__DRIcontext *pDRICtx, GLint target, GLint glx_textur
struct radeon_renderbuffer *rb;
radeon_texture_image *rImage;
radeonContextPtr radeon;
- context_t *rmesa;
struct radeon_framebuffer *rfb;
radeonTexObjPtr t;
uint32_t pitch_val;
- uint32_t internalFormat, type, format;
gl_format texFormat;
- type = GL_BGRA;
- format = GL_UNSIGNED_BYTE;
- internalFormat = (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
-
radeon = pDRICtx->driverPrivate;
- rmesa = pDRICtx->driverPrivate;
rfb = dPriv->driverPrivate;
texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
@@ -1688,7 +1681,6 @@ void evergreenInitTextureFuncs(radeonContextPtr radeon, struct dd_function_table
functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
if (radeon->radeonScreen->kernel_mm) {
- functions->CopyTexImage2D = radeonCopyTexImage2D;
functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
}
diff --git a/src/mesa/drivers/dri/r600/evergreen_vertprog.c b/src/mesa/drivers/dri/r600/evergreen_vertprog.c
index 018869b9996..117916ac78f 100644
--- a/src/mesa/drivers/dri/r600/evergreen_vertprog.c
+++ b/src/mesa/drivers/dri/r600/evergreen_vertprog.c
@@ -684,17 +684,17 @@ GLboolean evergreenSetupVPconstants(struct gl_context * ctx)
for(ui=0; ui<unNumParamData; ui++) {
if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM)
{
- evergreen->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0];
- evergreen->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1];
- evergreen->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2];
- evergreen->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3];
+ evergreen->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0].f;
+ evergreen->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1].f;
+ evergreen->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2].f;
+ evergreen->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3].f;
}
else
{
- evergreen->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
- evergreen->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
- evergreen->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
- evergreen->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+ evergreen->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f;
+ evergreen->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f;
+ evergreen->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f;
+ evergreen->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f;
}
}
diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c
index ce2f7779563..74f048b1062 100644
--- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c
+++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c
@@ -259,13 +259,11 @@ static int r600_cs_process_relocs(struct radeon_cs_int *csi,
uint32_t * reloc_chunk,
uint32_t * length_dw_reloc_chunk)
{
- struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm;
struct r600_cs_reloc_legacy *relocs;
int i, j, r;
uint32_t offset_dw = 0;
- csm = (struct r600_cs_manager_legacy*)csi->csm;
relocs = (struct r600_cs_reloc_legacy *)csi->relocs;
restart:
for (i = 0; i < csi->crelocs; i++) {
diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c
index eb7ed30c7a3..3efa1d197fa 100644
--- a/src/mesa/drivers/dri/r600/r600_tex.c
+++ b/src/mesa/drivers/dri/r600/r600_tex.c
@@ -470,7 +470,6 @@ void r600InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *fun
functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
if (radeon->radeonScreen->kernel_mm) {
- functions->CopyTexImage2D = radeonCopyTexImage2D;
functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
}
diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c
index 949db29c189..65fae7195fd 100644
--- a/src/mesa/drivers/dri/r600/r600_texstate.c
+++ b/src/mesa/drivers/dri/r600/r600_texstate.c
@@ -1141,13 +1141,8 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo
struct radeon_framebuffer *rfb;
radeonTexObjPtr t;
uint32_t pitch_val;
- uint32_t internalFormat, type, format;
gl_format texFormat;
- type = GL_BGRA;
- format = GL_UNSIGNED_BYTE;
- internalFormat = (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
-
radeon = pDRICtx->driverPrivate;
rmesa = pDRICtx->driverPrivate;
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c
index 40494cd6af0..6f9834e68fe 100644
--- a/src/mesa/drivers/dri/r600/r700_fragprog.c
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.c
@@ -778,10 +778,10 @@ GLboolean r700SetupFragmentProgram(struct gl_context * ctx)
unNumParamData = paramList->NumParameters;
for(ui=0; ui<unNumParamData; ui++) {
- r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
- r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
- r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
- r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+ r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f;
+ r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f;
+ r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f;
+ r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f;
}
/* Load fp constants to gpu */
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
index 0f7a7a46b71..a565c9f2087 100644
--- a/src/mesa/drivers/dri/r600/r700_render.c
+++ b/src/mesa/drivers/dri/r600/r700_render.c
@@ -490,7 +490,8 @@ static void r700ConvertAttrib(struct gl_context *ctx, int count,
{
if (!input->BufferObj->Pointer)
{
- ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+ ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+ GL_MAP_READ_BIT, input->BufferObj);
mapped_named_bo = GL_TRUE;
}
@@ -543,7 +544,7 @@ static void r700ConvertAttrib(struct gl_context *ctx, int count,
if (mapped_named_bo)
{
- ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+ ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
}
}
@@ -564,7 +565,8 @@ static void r700AlignDataToDword(struct gl_context *ctx,
if (!input->BufferObj->Pointer)
{
- ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+ ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+ GL_MAP_READ_BIT, input->BufferObj);
mapped_named_bo = GL_TRUE;
}
@@ -584,7 +586,7 @@ static void r700AlignDataToDword(struct gl_context *ctx,
radeon_bo_unmap(attr->bo);
if (mapped_named_bo)
{
- ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+ ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
}
attr->stride = dst_stride;
@@ -727,7 +729,8 @@ static void r700FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
{
- ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+ ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size,
+ GL_MAP_READ_BIT, mesa_ind_buf->obj);
mapped_named_bo = GL_TRUE;
assert(mesa_ind_buf->obj->Pointer != NULL);
}
@@ -788,7 +791,7 @@ static void r700FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
if (mapped_named_bo)
{
- ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+ ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
}
}
@@ -813,7 +816,8 @@ static void r700SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
{
- ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+ ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size,
+ GL_MAP_READ_BIT, mesa_ind_buf->obj);
assert(mesa_ind_buf->obj->Pointer != NULL);
mapped_named_bo = GL_TRUE;
}
@@ -836,7 +840,7 @@ static void r700SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
if (mapped_named_bo)
{
- ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+ ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
}
}
else
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c
index 7d4be9180a0..b1e2742b27d 100644
--- a/src/mesa/drivers/dri/r600/r700_vertprog.c
+++ b/src/mesa/drivers/dri/r600/r700_vertprog.c
@@ -720,17 +720,17 @@ GLboolean r700SetupVertexProgram(struct gl_context * ctx)
for(ui=0; ui<unNumParamData; ui++) {
if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM)
{
- r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0];
- r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1];
- r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2];
- r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3];
+ r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0].f;
+ r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1].f;
+ r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2].f;
+ r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3].f;
}
else
{
- r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
- r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
- r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
- r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+ r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f;
+ r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f;
+ r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f;
+ r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f;
}
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
index 607b7470d4b..a74c6c7a575 100644
--- a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
+++ b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
@@ -78,6 +78,9 @@ static inline uint32_t radeon_gem_name_bo(struct radeon_bo *dummy)
static inline void *radeon_bo_manager_gem_ctor(int fd)
{
+ fprintf(stderr, "[%s:%u] Mesa built without Radeon libdrm support.\n",
+ __func__, __LINE__);
+
return NULL;
}
@@ -87,6 +90,9 @@ static inline void radeon_bo_manager_gem_dtor(void *dummy)
static inline void *radeon_cs_manager_gem_ctor(int fd)
{
+ fprintf(stderr, "[%s:%u] Mesa built without Radeon libdrm support.\n",
+ __func__, __LINE__);
+
return NULL;
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
index 0d1af726c07..7b59c0377f8 100644
--- a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
+++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
@@ -130,7 +130,6 @@ radeonBufferData(struct gl_context * ctx,
*/
static void
radeonBufferSubData(struct gl_context * ctx,
- GLenum target,
GLintptrARB offset,
GLsizeiptrARB size,
const GLvoid * data,
@@ -155,7 +154,6 @@ radeonBufferSubData(struct gl_context * ctx,
*/
static void
radeonGetBufferSubData(struct gl_context * ctx,
- GLenum target,
GLintptrARB offset,
GLsizeiptrARB size,
GLvoid * data,
@@ -171,17 +169,18 @@ radeonGetBufferSubData(struct gl_context * ctx,
}
/**
- * Called via glMapBufferARB()
+ * Called via glMapBuffer() and glMapBufferRange()
*/
static void *
-radeonMapBuffer(struct gl_context * ctx,
- GLenum target,
- GLenum access,
- struct gl_buffer_object *obj)
+radeonMapBufferRange(struct gl_context * ctx,
+ GLintptr offset, GLsizeiptr length,
+ GLbitfield access, struct gl_buffer_object *obj)
{
struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+ const GLboolean write_only =
+ (access & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)) == GL_MAP_WRITE_BIT;
- if (access == GL_WRITE_ONLY_ARB) {
+ if (write_only) {
ctx->Driver.Flush(ctx);
}
@@ -190,12 +189,13 @@ radeonMapBuffer(struct gl_context * ctx,
return NULL;
}
- radeon_bo_map(radeon_obj->bo, access == GL_WRITE_ONLY_ARB);
+ obj->Offset = offset;
+ obj->Length = length;
+ obj->AccessFlags = access;
- obj->Pointer = radeon_obj->bo->ptr;
- obj->Length = obj->Size;
- obj->Offset = 0;
+ radeon_bo_map(radeon_obj->bo, write_only);
+ obj->Pointer = radeon_obj->bo->ptr + offset;
return obj->Pointer;
}
@@ -205,7 +205,6 @@ radeonMapBuffer(struct gl_context * ctx,
*/
static GLboolean
radeonUnmapBuffer(struct gl_context * ctx,
- GLenum target,
struct gl_buffer_object *obj)
{
struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
@@ -229,6 +228,6 @@ radeonInitBufferObjectFuncs(struct dd_function_table *functions)
functions->BufferData = radeonBufferData;
functions->BufferSubData = radeonBufferSubData;
functions->GetBufferSubData = radeonGetBufferSubData;
- functions->MapBuffer = radeonMapBuffer;
+ functions->MapBufferRange = radeonMapBufferRange;
functions->UnmapBuffer = radeonUnmapBuffer;
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c
index bfc307ca987..e7a6623cf84 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common.c
@@ -436,7 +436,6 @@ void radeonCopyBuffer( __DRIdrawable *dPriv,
const drm_clip_rect_t *rect)
{
radeonContextPtr rmesa;
- struct radeon_framebuffer *rfb;
GLint nbox, i, ret;
assert(dPriv);
@@ -447,8 +446,6 @@ void radeonCopyBuffer( __DRIdrawable *dPriv,
LOCK_HARDWARE(rmesa);
- rfb = dPriv->driverPrivate;
-
if ( RADEON_DEBUG & RADEON_IOCTL ) {
fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx );
}
@@ -527,8 +524,6 @@ static GLboolean radeonPageFlip( __DRIdrawable *dPriv )
{
radeonContextPtr radeon;
GLint ret;
- __DRIscreen *psp;
- struct radeon_renderbuffer *rrb;
struct radeon_framebuffer *rfb;
assert(dPriv);
@@ -537,9 +532,6 @@ static GLboolean radeonPageFlip( __DRIdrawable *dPriv )
radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
rfb = dPriv->driverPrivate;
- rrb = (void *)rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
-
- psp = dPriv->driScreenPriv;
LOCK_HARDWARE(radeon);
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c
index bf8925f61d0..c08b79484af 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c
@@ -515,7 +515,6 @@ void radeon_prepare_render(radeonContextPtr radeon)
__DRIcontext *driContext = radeon->dri.context;
__DRIdrawable *drawable;
__DRIscreen *screen;
- struct radeon_framebuffer *draw;
screen = driContext->driScreenPriv;
if (!screen->dri2.loader)
@@ -527,7 +526,6 @@ void radeon_prepare_render(radeonContextPtr radeon)
radeon_update_renderbuffers(driContext, drawable, GL_FALSE);
/* Intel driver does the equivalent of this, no clue if it is needed:*/
- draw = drawable->driverPrivate;
radeon_draw_buffer(radeon->glCtx, radeon->glCtx->DrawBuffer);
driContext->dri2.draw_stamp = drawable->dri2.stamp;
diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
index c2722a4e195..5595b705b15 100644
--- a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
+++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
@@ -218,11 +218,9 @@ static int cs_end(struct radeon_cs_int *cs,
static int cs_process_relocs(struct radeon_cs_int *cs)
{
- struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
struct cs_reloc_legacy *relocs;
int i, j, r;
- csm = (struct cs_manager_legacy*)cs->csm;
relocs = (struct cs_reloc_legacy *)cs->relocs;
restart:
for (i = 0; i < cs->crelocs; i++)
diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
index a91d8727792..c23e9c2d2a2 100644
--- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
@@ -560,7 +560,6 @@ static void radeonClear( struct gl_context *ctx, GLbitfield mask )
r100ContextPtr rmesa = R100_CONTEXT(ctx);
__DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon);
GLuint flags = 0;
- GLuint color_mask = 0;
GLuint orig_mask = mask;
if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) {
@@ -582,13 +581,11 @@ static void radeonClear( struct gl_context *ctx, GLbitfield mask )
if ( mask & BUFFER_BIT_FRONT_LEFT ) {
flags |= RADEON_FRONT;
- color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
mask &= ~BUFFER_BIT_FRONT_LEFT;
}
if ( mask & BUFFER_BIT_BACK_LEFT ) {
flags |= RADEON_BACK;
- color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
mask &= ~BUFFER_BIT_BACK_LEFT;
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c
index 7b6bd36dcf7..ae8a212f806 100644
--- a/src/mesa/drivers/dri/radeon/radeon_lock.c
+++ b/src/mesa/drivers/dri/radeon/radeon_lock.c
@@ -114,16 +114,6 @@ void radeon_lock_hardware(radeonContextPtr radeon
)
{
char ret = 0;
- struct radeon_framebuffer *rfb = NULL;
- struct radeon_renderbuffer *rrb = NULL;
-
- if (radeon_get_drawable(radeon)) {
- rfb = radeon_get_drawable(radeon)->driverPrivate;
-
- if (rfb)
- rrb = radeon_get_renderbuffer(&rfb->base,
- rfb->base._ColorDrawBufferIndexes[0]);
- }
if (!radeon->radeonScreen->driScreen->dri2.enabled) {
if (ATOMIC_INC_AND_FETCH(radeon->dri.hwLockCount) > 1)
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c
index 25a8ddf7b6a..a0b5506ae76 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tex.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tex.c
@@ -455,7 +455,6 @@ void radeonInitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *
functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
if (radeon->radeonScreen->kernel_mm) {
- functions->CopyTexImage2D = radeonCopyTexImage2D;
functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
index f14dfa25d40..94ff3c4a727 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
@@ -141,61 +141,6 @@ do_copy_texsubimage(struct gl_context *ctx,
}
void
-radeonCopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level,
- GLenum internalFormat,
- GLint x, GLint y, GLsizei width, GLsizei height,
- GLint border)
-{
- struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
- struct gl_texture_object *texObj =
- _mesa_select_tex_object(ctx, texUnit, target);
- struct gl_texture_image *texImage =
- _mesa_select_tex_image(ctx, texObj, target, level);
- int srcx, srcy, dstx, dsty;
-
- radeonContextPtr radeon = RADEON_CONTEXT(ctx);
- radeon_prepare_render(radeon);
-
- if (border)
- goto fail;
-
- /* Setup or redefine the texture object, mipmap tree and texture
- * image. Don't populate yet.
- */
- ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
- width, height, border,
- GL_RGBA, GL_UNSIGNED_BYTE, NULL,
- &ctx->DefaultPacking, texObj, texImage);
-
- srcx = x;
- srcy = y;
- dstx = 0;
- dsty = 0;
- if (!_mesa_clip_copytexsubimage(ctx,
- &dstx, &dsty,
- &srcx, &srcy,
- &width, &height)) {
- return;
- }
-
- if (!do_copy_texsubimage(ctx, target, level,
- radeon_tex_obj(texObj), (radeon_texture_image *)texImage,
- 0, 0, x, y, width, height)) {
- goto fail;
- }
-
- return;
-
-fail:
- radeon_print(RADEON_FALLBACKS, RADEON_NORMAL,
- "Falling back to sw for glCopyTexImage2D (internalFormat %s, border %d)\n",
- _mesa_lookup_enum_by_nr(internalFormat), border);
-
- _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y,
- width, height, border);
-}
-
-void
radeonCopyTexSubImage2D(struct gl_context *ctx, GLenum target, GLint level,
GLint xoffset, GLint yoffset,
GLint x, GLint y,
diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c
index 9ba98e303a7..430309392a0 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texstate.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c
@@ -648,18 +648,12 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_form
struct radeon_renderbuffer *rb;
radeon_texture_image *rImage;
radeonContextPtr radeon;
- r100ContextPtr rmesa;
struct radeon_framebuffer *rfb;
radeonTexObjPtr t;
uint32_t pitch_val;
- uint32_t internalFormat, format;
gl_format texFormat;
- format = GL_UNSIGNED_BYTE;
- internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? GL_RGB : GL_RGBA);
-
radeon = pDRICtx->driverPrivate;
- rmesa = pDRICtx->driverPrivate;
rfb = dPriv->driverPrivate;
texUnit = _mesa_get_current_tex_unit(radeon->glCtx);
@@ -1018,7 +1012,7 @@ static GLboolean radeon_validate_texgen( struct gl_context *ctx, GLuint unit )
static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int unit)
{
const struct gl_texture_image *firstImage;
- GLint log2Width, log2Height, log2Depth, texelBytes;
+ GLint log2Width, log2Height, texelBytes;
if ( t->bo ) {
return GL_TRUE;
@@ -1033,7 +1027,6 @@ static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int
log2Width = firstImage->WidthLog2;
log2Height = firstImage->HeightLog2;
- log2Depth = firstImage->DepthLog2;
texelBytes = _mesa_get_format_bytes(firstImage->TexFormat);
if (!t->image_override) {
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c
index ce0df32bfe4..ad7e4c146a4 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texture.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.c
@@ -787,18 +787,6 @@ static void radeon_teximage(
radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
"%s %dd: texObj %p, texImage %p, face %d, level %d\n",
__func__, dims, texObj, texImage, face, level);
- {
- struct radeon_bo *bo;
- bo = !image->mt ? image->bo : image->mt->bo;
- if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) {
- radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
- "%s Calling teximage for texture that is "
- "queued for GPU processing.\n",
- __func__);
- radeon_firevertices(rmesa);
- }
- }
-
t->validated = GL_FALSE;
@@ -820,6 +808,18 @@ static void radeon_teximage(
}
}
+ {
+ struct radeon_bo *bo;
+ bo = !image->mt ? image->bo : image->mt->bo;
+ if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) {
+ radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
+ "%s Calling teximage for texture that is "
+ "queued for GPU processing.\n",
+ __func__);
+ radeon_firevertices(rmesa);
+ }
+ }
+
/* Upload texture image; note that the spec allows pixels to be NULL */
if (compressed) {
pixels = _mesa_validate_pbo_compressed_teximage(
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.h b/src/mesa/drivers/dri/radeon/radeon_texture.h
index 538a07fbba8..6fc06d967dd 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texture.h
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.h
@@ -126,11 +126,6 @@ void radeonGetCompressedTexImage(struct gl_context *ctx, GLenum target, GLint le
struct gl_texture_object *texObj,
struct gl_texture_image *texImage);
-void radeonCopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level,
- GLenum internalFormat,
- GLint x, GLint y, GLsizei width, GLsizei height,
- GLint border);
-
void radeonCopyTexSubImage2D(struct gl_context *ctx, GLenum target, GLint level,
GLint xoffset, GLint yoffset,
GLint x, GLint y,