summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/drivers/common/meta.c3
-rw-r--r--src/mesa/drivers/dri/i965/Makefile1
-rw-r--r--src/mesa/drivers/dri/i965/brw_cc.c231
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h28
-rw-r--r--src/mesa/drivers/dri/i965/brw_curbe.c12
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c24
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c62
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c16
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.c49
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h37
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_batch.c49
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_cache.c73
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_dump.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c14
-rw-r--r--src/mesa/drivers/dri/i965/brw_structs.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_surface_state.c190
-rw-r--r--src/mesa/drivers/dri/i965/brw_vtbl.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_sampler_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c520
-rw-r--r--src/mesa/drivers/dri/i965/gen6_scissor_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/gen6_sf_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_viewport_state.c37
-rw-r--r--src/mesa/drivers/dri/i965/gen6_wm_state.c3
-rw-r--r--src/mesa/drivers/dri/intel/intel_batchbuffer.c7
-rw-r--r--src/mesa/drivers/dri/intel/intel_batchbuffer.h4
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.c15
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.h2
-rw-r--r--src/mesa/drivers/dri/intel/intel_fbo.c9
-rw-r--r--src/mesa/drivers/dri/intel/intel_mipmap_tree.c16
-rw-r--r--src/mesa/drivers/dri/intel/intel_span.c2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/Makefile1
-rwxr-xr-xsrc/mesa/drivers/dri/r300/compiler/SConscript1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c9
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c17
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.h11
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c474
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h12
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c18
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h6
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.c9
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.c4
-rw-r--r--src/mesa/drivers/dri/r600/r700_fragprog.c17
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_texture.c11
-rw-r--r--src/mesa/drivers/osmesa/Makefile6
-rw-r--r--src/mesa/main/arbprogram.h4
-rw-r--r--src/mesa/main/fbobject.c53
-rw-r--r--src/mesa/main/framebuffer.c1
-rw-r--r--src/mesa/state_tracker/st_cb_texture.c2
-rw-r--r--src/mesa/swrast_setup/ss_triangle.c2
53 files changed, 1225 insertions, 875 deletions
diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index 629ec0ffec5..c548e104203 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -2400,6 +2400,9 @@ _mesa_meta_GenerateMipmap(GLcontext *ctx, GLenum target,
break;
}
+ /* Set MaxLevel large enough to hold the new level when we allocate it */
+ _mesa_TexParameteri(target, GL_TEXTURE_MAX_LEVEL, dstLevel);
+
/* Create empty dest image */
if (target == GL_TEXTURE_1D) {
_mesa_TexImage1D(target, dstLevel, srcImage->InternalFormat,
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index a0039e800d2..831981558d8 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -61,6 +61,7 @@ DRIVER_SOURCES = \
brw_sf.c \
brw_sf_emit.c \
brw_sf_state.c \
+ brw_state.c \
brw_state_batch.c \
brw_state_cache.c \
brw_state_dump.c \
diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c
index c9e42a1529b..cfce5d31405 100644
--- a/src/mesa/drivers/dri/i965/brw_cc.c
+++ b/src/mesa/drivers/dri/i965/brw_cc.c
@@ -36,7 +36,8 @@
#include "brw_util.h"
#include "main/macros.h"
-static void prepare_cc_vp( struct brw_context *brw )
+void
+brw_update_cc_vp(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
struct brw_cc_viewport ccv;
@@ -54,40 +55,9 @@ static void prepare_cc_vp( struct brw_context *brw )
}
drm_intel_bo_unreference(brw->cc.vp_bo);
- brw->cc.vp_bo = brw_cache_data(&brw->cache, BRW_CC_VP, &ccv, sizeof(ccv),
- NULL, 0);
+ brw->cc.vp_bo = brw_cache_data(&brw->cache, BRW_CC_VP, &ccv, sizeof(ccv));
}
-const struct brw_tracked_state brw_cc_vp = {
- .dirty = {
- .mesa = _NEW_VIEWPORT | _NEW_TRANSFORM,
- .brw = BRW_NEW_CONTEXT,
- .cache = 0
- },
- .prepare = prepare_cc_vp
-};
-
-struct brw_cc_unit_key {
- GLboolean stencil, stencil_two_side, color_blend, alpha_enabled;
-
- GLenum stencil_func[2], stencil_fail_op[2];
- GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2];
- GLubyte stencil_ref[2], stencil_write_mask[2], stencil_test_mask[2];
- GLenum logic_op;
-
- GLenum blend_eq_rgb, blend_eq_a;
- GLenum blend_src_rgb, blend_src_a;
- GLenum blend_dst_rgb, blend_dst_a;
-
- GLenum alpha_func;
- GLclampf alpha_ref;
-
- GLboolean dither;
-
- GLboolean depth_test, depth_write;
- GLenum depth_func;
-};
-
/**
* Modify blend function to force destination alpha to 1.0
*
@@ -110,136 +80,83 @@ fix_xRGB_alpha(GLenum function)
return function;
}
-static void
-cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key)
+static void prepare_cc_unit(struct brw_context *brw)
{
- GLcontext *ctx = &brw->intel.ctx;
- const unsigned back = ctx->Stencil._BackFace;
-
- memset(key, 0, sizeof(*key));
-
- key->stencil = ctx->Stencil._Enabled;
- key->stencil_two_side = ctx->Stencil._TestTwoSide;
-
- if (key->stencil) {
- key->stencil_func[0] = ctx->Stencil.Function[0];
- key->stencil_fail_op[0] = ctx->Stencil.FailFunc[0];
- key->stencil_pass_depth_fail_op[0] = ctx->Stencil.ZFailFunc[0];
- key->stencil_pass_depth_pass_op[0] = ctx->Stencil.ZPassFunc[0];
- key->stencil_ref[0] = ctx->Stencil.Ref[0];
- key->stencil_write_mask[0] = ctx->Stencil.WriteMask[0];
- key->stencil_test_mask[0] = ctx->Stencil.ValueMask[0];
- }
- if (key->stencil_two_side) {
- key->stencil_func[1] = ctx->Stencil.Function[back];
- key->stencil_fail_op[1] = ctx->Stencil.FailFunc[back];
- key->stencil_pass_depth_fail_op[1] = ctx->Stencil.ZFailFunc[back];
- key->stencil_pass_depth_pass_op[1] = ctx->Stencil.ZPassFunc[back];
- key->stencil_ref[1] = ctx->Stencil.Ref[back];
- key->stencil_write_mask[1] = ctx->Stencil.WriteMask[back];
- key->stencil_test_mask[1] = ctx->Stencil.ValueMask[back];
- }
-
- if (ctx->Color._LogicOpEnabled)
- key->logic_op = ctx->Color.LogicOp;
- else
- key->logic_op = GL_COPY;
-
- key->color_blend = ctx->Color.BlendEnabled;
- if (key->color_blend) {
- key->blend_eq_rgb = ctx->Color.BlendEquationRGB;
- key->blend_eq_a = ctx->Color.BlendEquationA;
- key->blend_src_rgb = ctx->Color.BlendSrcRGB;
- key->blend_dst_rgb = ctx->Color.BlendDstRGB;
- key->blend_src_a = ctx->Color.BlendSrcA;
- key->blend_dst_a = ctx->Color.BlendDstA;
-
- /* If the renderbuffer is XRGB, we have to frob the blend function to
- * force the destination alpha to 1.0. This means replacing GL_DST_ALPHA
- * with GL_ONE and GL_ONE_MINUS_DST_ALPHA with GL_ZERO.
- */
- if (ctx->DrawBuffer->Visual.alphaBits == 0) {
- key->blend_src_rgb = fix_xRGB_alpha(key->blend_src_rgb);
- key->blend_src_a = fix_xRGB_alpha(key->blend_src_a);
- key->blend_dst_rgb = fix_xRGB_alpha(key->blend_dst_rgb);
- key->blend_dst_a = fix_xRGB_alpha(key->blend_dst_a);
- }
- }
-
- key->alpha_enabled = ctx->Color.AlphaEnabled;
- if (key->alpha_enabled) {
- key->alpha_func = ctx->Color.AlphaFunc;
- key->alpha_ref = ctx->Color.AlphaRef;
- }
-
- key->dither = ctx->Color.DitherFlag;
-
- key->depth_test = ctx->Depth.Test;
- if (key->depth_test) {
- key->depth_func = ctx->Depth.Func;
- key->depth_write = ctx->Depth.Mask;
- }
+ brw_add_validated_bo(brw, brw->cc.vp_bo);
}
/**
* Creates the state cache entry for the given CC unit key.
*/
-static drm_intel_bo *
-cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)
+static void upload_cc_unit(struct brw_context *brw)
{
+ GLcontext *ctx = &brw->intel.ctx;
struct brw_cc_unit_state cc;
- drm_intel_bo *bo;
+ void *map;
memset(&cc, 0, sizeof(cc));
/* _NEW_STENCIL */
- if (key->stencil) {
+ if (ctx->Stencil._Enabled) {
+ const unsigned back = ctx->Stencil._BackFace;
+
cc.cc0.stencil_enable = 1;
cc.cc0.stencil_func =
- intel_translate_compare_func(key->stencil_func[0]);
+ intel_translate_compare_func(ctx->Stencil.Function[0]);
cc.cc0.stencil_fail_op =
- intel_translate_stencil_op(key->stencil_fail_op[0]);
+ intel_translate_stencil_op(ctx->Stencil.FailFunc[0]);
cc.cc0.stencil_pass_depth_fail_op =
- intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]);
+ intel_translate_stencil_op(ctx->Stencil.ZFailFunc[0]);
cc.cc0.stencil_pass_depth_pass_op =
- intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]);
- cc.cc1.stencil_ref = key->stencil_ref[0];
- cc.cc1.stencil_write_mask = key->stencil_write_mask[0];
- cc.cc1.stencil_test_mask = key->stencil_test_mask[0];
+ intel_translate_stencil_op(ctx->Stencil.ZPassFunc[0]);
+ cc.cc1.stencil_ref = ctx->Stencil.Ref[0];
+ cc.cc1.stencil_write_mask = ctx->Stencil.WriteMask[0];
+ cc.cc1.stencil_test_mask = ctx->Stencil.ValueMask[0];
- if (key->stencil_two_side) {
+ if (ctx->Stencil._TestTwoSide) {
cc.cc0.bf_stencil_enable = 1;
cc.cc0.bf_stencil_func =
- intel_translate_compare_func(key->stencil_func[1]);
+ intel_translate_compare_func(ctx->Stencil.Function[back]);
cc.cc0.bf_stencil_fail_op =
- intel_translate_stencil_op(key->stencil_fail_op[1]);
+ intel_translate_stencil_op(ctx->Stencil.FailFunc[back]);
cc.cc0.bf_stencil_pass_depth_fail_op =
- intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]);
+ intel_translate_stencil_op(ctx->Stencil.ZFailFunc[back]);
cc.cc0.bf_stencil_pass_depth_pass_op =
- intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]);
- cc.cc1.bf_stencil_ref = key->stencil_ref[1];
- cc.cc2.bf_stencil_write_mask = key->stencil_write_mask[1];
- cc.cc2.bf_stencil_test_mask = key->stencil_test_mask[1];
+ intel_translate_stencil_op(ctx->Stencil.ZPassFunc[back]);
+ cc.cc1.bf_stencil_ref = ctx->Stencil.Ref[back];
+ cc.cc2.bf_stencil_write_mask = ctx->Stencil.WriteMask[back];
+ cc.cc2.bf_stencil_test_mask = ctx->Stencil.ValueMask[back];
}
/* Not really sure about this:
*/
- if (key->stencil_write_mask[0] ||
- (key->stencil_two_side && key->stencil_write_mask[1]))
+ if (ctx->Stencil.WriteMask[0] ||
+ (ctx->Stencil._TestTwoSide && ctx->Stencil.WriteMask[back]))
cc.cc0.stencil_write_enable = 1;
}
/* _NEW_COLOR */
- if (key->logic_op != GL_COPY) {
+ if (ctx->Color._LogicOpEnabled && ctx->Color.LogicOp != GL_COPY) {
cc.cc2.logicop_enable = 1;
- cc.cc5.logicop_func = intel_translate_logic_op(key->logic_op);
- } else if (key->color_blend) {
- GLenum eqRGB = key->blend_eq_rgb;
- GLenum eqA = key->blend_eq_a;
- GLenum srcRGB = key->blend_src_rgb;
- GLenum dstRGB = key->blend_dst_rgb;
- GLenum srcA = key->blend_src_a;
- GLenum dstA = key->blend_dst_a;
+ cc.cc5.logicop_func = intel_translate_logic_op(ctx->Color.LogicOp);
+ } else if (ctx->Color.BlendEnabled) {
+ GLenum eqRGB = ctx->Color.BlendEquationRGB;
+ GLenum eqA = ctx->Color.BlendEquationA;
+ GLenum srcRGB = ctx->Color.BlendSrcRGB;
+ GLenum dstRGB = ctx->Color.BlendDstRGB;
+ GLenum srcA = ctx->Color.BlendSrcA;
+ GLenum dstA = ctx->Color.BlendDstA;
+
+ /* If the renderbuffer is XRGB, we have to frob the blend function to
+ * force the destination alpha to 1.0. This means replacing GL_DST_ALPHA
+ * with GL_ONE and GL_ONE_MINUS_DST_ALPHA with GL_ZERO.
+ */
+ if (ctx->DrawBuffer->Visual.alphaBits == 0) {
+ srcRGB = fix_xRGB_alpha(srcRGB);
+ srcA = fix_xRGB_alpha(srcA);
+ dstRGB = fix_xRGB_alpha(dstRGB);
+ dstA = fix_xRGB_alpha(dstA);
+ }
if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
srcRGB = dstRGB = GL_ONE;
@@ -263,25 +180,27 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)
eqA != eqRGB);
}
- if (key->alpha_enabled) {
+ if (ctx->Color.AlphaEnabled) {
cc.cc3.alpha_test = 1;
- cc.cc3.alpha_test_func = intel_translate_compare_func(key->alpha_func);
+ cc.cc3.alpha_test_func =
+ intel_translate_compare_func(ctx->Color.AlphaFunc);
cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
- UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], key->alpha_ref);
+ UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], ctx->Color.AlphaRef);
}
- if (key->dither) {
+ if (ctx->Color.DitherFlag) {
cc.cc5.dither_enable = 1;
cc.cc6.y_dither_offset = 0;
cc.cc6.x_dither_offset = 0;
}
/* _NEW_DEPTH */
- if (key->depth_test) {
+ if (ctx->Depth.Test) {
cc.cc2.depth_test = 1;
- cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func);
- cc.cc2.depth_write_enable = key->depth_write;
+ cc.cc2.depth_test_function =
+ intel_translate_compare_func(ctx->Depth.Func);
+ cc.cc2.depth_write_enable = ctx->Depth.Mask;
}
/* CACHE_NEW_CC_VP */
@@ -290,43 +209,25 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)
if (INTEL_DEBUG & DEBUG_STATS)
cc.cc5.statistics_enable = 1;
- bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT,
- key, sizeof(*key),
- &brw->cc.vp_bo, 1,
- &cc, sizeof(cc));
+ map = brw_state_batch(brw, sizeof(cc), 64,
+ &brw->cc.state_bo, &brw->cc.state_offset);
+ memcpy(map, &cc, sizeof(cc));
+ brw->state.dirty.cache |= CACHE_NEW_CC_UNIT;
/* Emit CC viewport relocation */
- drm_intel_bo_emit_reloc(bo, offsetof(struct brw_cc_unit_state, cc4),
+ drm_intel_bo_emit_reloc(brw->cc.state_bo, (brw->cc.state_offset +
+ offsetof(struct brw_cc_unit_state,
+ cc4)),
brw->cc.vp_bo, 0,
I915_GEM_DOMAIN_INSTRUCTION, 0);
-
- return bo;
-}
-
-static void prepare_cc_unit( struct brw_context *brw )
-{
- struct brw_cc_unit_key key;
-
- cc_unit_populate_key(brw, &key);
-
- drm_intel_bo_unreference(brw->cc.state_bo);
- brw->cc.state_bo = brw_search_cache(&brw->cache, BRW_CC_UNIT,
- &key, sizeof(key),
- &brw->cc.vp_bo, 1,
- NULL);
-
- if (brw->cc.state_bo == NULL)
- brw->cc.state_bo = cc_unit_create_from_key(brw, &key);
}
const struct brw_tracked_state brw_cc_unit = {
.dirty = {
.mesa = _NEW_STENCIL | _NEW_COLOR | _NEW_DEPTH,
- .brw = 0,
+ .brw = BRW_NEW_BATCH,
.cache = CACHE_NEW_CC_VP
},
.prepare = prepare_cc_unit,
+ .emit = upload_cc_unit,
};
-
-
-
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index e688431b126..6d064b822e5 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -34,8 +34,6 @@
#include "main/api_noop.h"
#include "main/macros.h"
#include "main/simple_list.h"
-#include "program/shader_api.h"
-
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_draw.h"
@@ -54,6 +52,9 @@ static void brwInitDriverFunctions( struct dd_function_table *functions )
brwInitFragProgFuncs( functions );
brw_init_queryobj_functions(functions);
+
+ functions->Enable = brw_enable;
+ functions->DepthRange = brw_depth_range;
}
GLboolean brwCreateContext( int api,
@@ -187,6 +188,11 @@ GLboolean brwCreateContext( int api,
brw_draw_init( brw );
+ /* Now that most driver functions are hooked up, initialize some of the
+ * immediate state.
+ */
+ brw_update_cc_vp(brw);
+
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index d97634c1c60..cc4e6638e8b 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -131,6 +131,7 @@ struct brw_context;
#define BRW_NEW_WM_INPUT_DIMENSIONS 0x100
#define BRW_NEW_PSP 0x800
#define BRW_NEW_WM_SURFACES 0x1000
+#define BRW_NEW_BINDING_TABLE 0x2000
#define BRW_NEW_INDICES 0x4000
#define BRW_NEW_VERTICES 0x8000
/**
@@ -143,6 +144,8 @@ struct brw_context;
#define BRW_NEW_NR_WM_SURFACES 0x40000
#define BRW_NEW_NR_VS_SURFACES 0x80000
#define BRW_NEW_INDEX_BUFFER 0x100000
+#define BRW_NEW_VS_CONSTBUF 0x200000
+#define BRW_NEW_WM_CONSTBUF 0x200000
struct brw_state_flags {
/** State update flags signalled by mesa internals */
@@ -160,7 +163,6 @@ struct brw_state_flags {
struct brw_vertex_program {
struct gl_vertex_program program;
GLuint id;
- drm_intel_bo *const_buffer; /** Program constant buffer/surface */
GLboolean use_const_buffer;
};
@@ -172,7 +174,6 @@ struct brw_fragment_program {
GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */
GLboolean use_const_buffer;
- drm_intel_bo *const_buffer; /** Program constant buffer/surface */
/** for debugging, which texture units are referenced */
GLbitfield tex_units_used;
@@ -301,8 +302,6 @@ enum brw_cache_id {
BRW_CLIP_VP,
BRW_CLIP_UNIT,
BRW_CLIP_PROG,
- BRW_SS_SURFACE,
- BRW_SS_SURF_BIND,
BRW_MAX_CACHE
};
@@ -376,8 +375,6 @@ struct brw_tracked_state {
#define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP)
#define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT)
#define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG)
-#define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE)
-#define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND)
struct brw_cached_batch_item {
struct header *header;
@@ -460,12 +457,11 @@ struct brw_context
* consisting of the vertex buffers, pipelined state pointers,
* the CURBE, the depth buffer, and a query BO.
*/
- drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + 16];
+ drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + BRW_WM_MAX_SURF + 16];
int validated_bo_count;
} state;
- struct brw_cache cache; /** non-surface items */
- struct brw_cache surface_cache; /* surface items */
+ struct brw_cache cache;
struct brw_cached_batch_item *cached_batch_items;
struct {
@@ -594,10 +590,13 @@ struct brw_context
drm_intel_bo *prog_bo;
drm_intel_bo *state_bo;
+ drm_intel_bo *const_bo;
/** Binding table of pointers to surf_bo entries */
drm_intel_bo *bind_bo;
+ uint32_t bind_bo_offset;
drm_intel_bo *surf_bo[BRW_VS_MAX_SURF];
+ uint32_t surf_offset[BRW_VS_MAX_SURF];
GLuint nr_surfaces;
} vs;
@@ -649,10 +648,13 @@ struct brw_context
/** Binding table of pointers to surf_bo entries */
drm_intel_bo *bind_bo;
+ uint32_t bind_bo_offset;
drm_intel_bo *surf_bo[BRW_WM_MAX_SURF];
+ uint32_t surf_offset[BRW_WM_MAX_SURF];
drm_intel_bo *prog_bo;
drm_intel_bo *state_bo;
+ drm_intel_bo *const_bo;
} wm;
@@ -667,6 +669,7 @@ struct brw_context
drm_intel_bo *color_calc_state_bo;
drm_intel_bo *state_bo;
+ uint32_t state_offset;
} cc;
struct {
@@ -727,6 +730,9 @@ void brwInitFragProgFuncs( struct dd_function_table *functions );
*/
void brw_upload_urb_fence(struct brw_context *brw);
+/* brw_cc.c */
+void brw_update_cc_vp(struct brw_context *brw);
+
/* brw_curbe.c
*/
void brw_upload_cs_urb_state(struct brw_context *brw);
@@ -734,6 +740,10 @@ void brw_upload_cs_urb_state(struct brw_context *brw);
/* brw_disasm.c */
int brw_disasm (FILE *file, struct brw_instruction *inst, int gen);
+/* brw_state.c */
+void brw_enable(GLcontext * ctx, GLenum cap, GLboolean state);
+void brw_depth_range(GLcontext *ctx, GLclampd nearval, GLclampd farval);
+
/*======================================================================
* Inline conversion functions. These are better-typed than the
* macros used previously:
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index 3d52f6f6047..8196d8ca625 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -182,8 +182,6 @@ static void prepare_constant_buffer(struct brw_context *brw)
GLcontext *ctx = &brw->intel.ctx;
const struct brw_vertex_program *vp =
brw_vertex_program_const(brw->vertex_program);
- const struct brw_fragment_program *fp =
- brw_fragment_program_const(brw->fragment_program);
const GLuint sz = brw->curbe.total_size;
const GLuint bufsz = sz * 16 * sizeof(GLfloat);
GLfloat *buf;
@@ -200,8 +198,6 @@ static void prepare_constant_buffer(struct brw_context *brw)
if (brw->curbe.wm_size) {
GLuint offset = brw->curbe.wm_start * 16;
- _mesa_load_state_parameters(ctx, fp->program.Base.Parameters);
-
/* copy float constants */
for (i = 0; i < brw->wm.prog_data->nr_params; i++)
buf[offset + i] = *brw->wm.prog_data->param[i];
@@ -244,14 +240,6 @@ static void prepare_constant_buffer(struct brw_context *brw)
GLuint offset = brw->curbe.vs_start * 16;
GLuint nr = brw->vs.prog_data->nr_params / 4;
- if (brw->vertex_program->IsNVProgram)
- _mesa_load_tracked_matrices(ctx);
-
- /* Updates the ParamaterValues[i] pointers for all parameters of the
- * basic type of PROGRAM_STATE_VAR.
- */
- _mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
-
if (vp->use_const_buffer) {
/* Load the subset of push constants that will get used when
* we also have a pull constant buffer.
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 2d3556b8054..39bf5b63fc2 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -998,7 +998,7 @@
# define GEN6_WM_LINE_AA_WIDTH_2_0 (2 << 14)
# define GEN6_WM_LINE_AA_WIDTH_4_0 (3 << 14)
# define GEN6_WM_POLYGON_STIPPLE_ENABLE (1 << 13)
-# define GEN6_WM_LINE_STIPPLE_ENABLE (1 << 12)
+# define GEN6_WM_LINE_STIPPLE_ENABLE (1 << 11)
# define GEN6_WM_OMASK_TO_RENDER_TARGET (1 << 9)
# define GEN6_WM_USES_SOURCE_W (1 << 8)
# define GEN6_WM_DUAL_SOURCE_BLEND_ENABLE (1 << 7)
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 3e305c89686..16331cc3ac0 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -151,9 +151,6 @@ static void brw_emit_prim(struct brw_context *brw,
prim_packet.start_instance_location = 0;
prim_packet.base_vert_location = prim->basevertex;
- /* Can't wrap here, since we rely on the validated state. */
- intel->no_batch_wrap = GL_TRUE;
-
/* If we're set to always flush, do it before and after the primitive emit.
* We want to catch both missed flushes that hurt instruction/state cache
* and missed flushes of the render cache as it heads to other parts of
@@ -169,8 +166,6 @@ static void brw_emit_prim(struct brw_context *brw,
if (intel->always_flush_cache) {
intel_batchbuffer_emit_mi_flush(intel->batch);
}
-
- intel->no_batch_wrap = GL_FALSE;
}
static void brw_merge_inputs( struct brw_context *brw,
@@ -394,11 +389,14 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
}
}
+ intel->no_batch_wrap = GL_TRUE;
brw_upload_state(brw);
}
brw_emit_prim(brw, &prim[i], hw_prim);
+ intel->no_batch_wrap = GL_FALSE;
+
retval = GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 175899b0268..34dfe10cb93 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -286,6 +286,7 @@ static void brw_set_ff_sync_message(struct brw_context *brw,
GLuint response_length,
GLboolean end_of_thread)
{
+ struct intel_context *intel = &brw->intel;
brw_set_src1(insn, brw_imm_d(0));
insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
@@ -298,8 +299,12 @@ static void brw_set_ff_sync_message(struct brw_context *brw,
insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
insn->bits3.urb_gen5.msg_length = 1;
insn->bits3.urb_gen5.end_of_thread = end_of_thread;
- insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
- insn->bits2.send_gen5.end_of_thread = end_of_thread;
+ if (intel->gen >= 6) {
+ insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
+ } else {
+ insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
+ insn->bits2.send_gen5.end_of_thread = end_of_thread;
+ }
}
static void brw_set_urb_message( struct brw_context *brw,
@@ -966,10 +971,25 @@ void brw_math_16( struct brw_compile *p,
struct brw_reg src,
GLuint precision )
{
+ struct intel_context *intel = &p->brw->intel;
struct brw_instruction *insn;
GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
+ if (intel->gen >= 6) {
+ insn = next_insn(p, BRW_OPCODE_MATH);
+
+ /* Math is the same ISA format as other opcodes, except that CondModifier
+ * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
+ */
+ insn->header.destreg__conditionalmod = function;
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, src);
+ brw_set_src1(insn, brw_null_reg());
+ return;
+ }
+
/* First instruction:
*/
brw_push_insn_state(p);
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 35908ee7b69..572175f463e 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -96,18 +96,12 @@ const struct brw_tracked_state brw_drawing_rect = {
.emit = upload_drawing_rect
};
-static void prepare_binding_table_pointers(struct brw_context *brw)
-{
- brw_add_validated_bo(brw, brw->vs.bind_bo);
- brw_add_validated_bo(brw, brw->wm.bind_bo);
-}
-
/**
* Upload the binding table pointers, which point each stage's array of surface
* state pointers.
*
* The binding table pointers are relative to the surface state base address,
- * which is 0.
+ * which points at the batchbuffer containing the streamed batch state.
*/
static void upload_binding_table_pointers(struct brw_context *brw)
{
@@ -115,24 +109,20 @@ static void upload_binding_table_pointers(struct brw_context *brw)
BEGIN_BATCH(6);
OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
- if (brw->vs.bind_bo != NULL)
- OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */
- else
- OUT_BATCH(0);
+ OUT_BATCH(brw->vs.bind_bo_offset);
OUT_BATCH(0); /* gs */
OUT_BATCH(0); /* clip */
OUT_BATCH(0); /* sf */
- OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */
+ OUT_BATCH(brw->wm.bind_bo_offset);
ADVANCE_BATCH();
}
const struct brw_tracked_state brw_binding_table_pointers = {
.dirty = {
.mesa = 0,
- .brw = BRW_NEW_BATCH,
- .cache = CACHE_NEW_SURF_BIND,
+ .brw = BRW_NEW_BATCH | BRW_NEW_BINDING_TABLE,
+ .cache = 0,
},
- .prepare = prepare_binding_table_pointers,
.emit = upload_binding_table_pointers,
};
@@ -141,7 +131,7 @@ const struct brw_tracked_state brw_binding_table_pointers = {
* state pointers.
*
* The binding table pointers are relative to the surface state base address,
- * which is 0.
+ * which points at the batchbuffer containing the streamed batch state.
*/
static void upload_gen6_binding_table_pointers(struct brw_context *brw)
{
@@ -153,22 +143,18 @@ static void upload_gen6_binding_table_pointers(struct brw_context *brw)
GEN6_BINDING_TABLE_MODIFY_GS |
GEN6_BINDING_TABLE_MODIFY_PS |
(4 - 2));
- if (brw->vs.bind_bo != NULL)
- OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */
- else
- OUT_BATCH(0);
+ OUT_BATCH(brw->vs.bind_bo_offset); /* vs */
OUT_BATCH(0); /* gs */
- OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */
+ OUT_BATCH(brw->wm.bind_bo_offset); /* wm/ps */
ADVANCE_BATCH();
}
const struct brw_tracked_state gen6_binding_table_pointers = {
.dirty = {
.mesa = 0,
- .brw = BRW_NEW_BATCH,
- .cache = CACHE_NEW_SURF_BIND,
+ .brw = BRW_NEW_BATCH | BRW_NEW_BINDING_TABLE,
+ .cache = 0,
},
- .prepare = prepare_binding_table_pointers,
.emit = upload_gen6_binding_table_pointers,
};
@@ -199,7 +185,8 @@ static void upload_pipelined_state_pointers(struct brw_context *brw )
OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
OUT_RELOC(brw->wm.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
- OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+ brw->cc.state_offset);
ADVANCE_BATCH();
brw->state.dirty.brw |= BRW_NEW_PSP;
@@ -213,7 +200,6 @@ static void prepare_psp_urb_cbs(struct brw_context *brw)
brw_add_validated_bo(brw, brw->clip.state_bo);
brw_add_validated_bo(brw, brw->sf.state_bo);
brw_add_validated_bo(brw, brw->wm.state_bo);
- brw_add_validated_bo(brw, brw->cc.state_bo);
}
static void upload_psp_urb_cbs(struct brw_context *brw )
@@ -590,23 +576,23 @@ const struct brw_tracked_state brw_invarient_state = {
/**
* Define the base addresses which some state is referenced from.
*
- * This allows us to avoid having to emit relocations in many places for
- * cached state, and instead emit pointers inside of large, mostly-static
- * state pools. This comes at the expense of memory, and more expensive cache
- * misses.
+ * This allows us to avoid having to emit relocations for the objects,
+ * and is actually required for binding table pointers on gen6.
+ *
+ * Surface state base address covers binding table pointers and
+ * surface state objects, but not the surfaces that the surface state
+ * objects point to.
*/
static void upload_state_base_address( struct brw_context *brw )
{
struct intel_context *intel = &brw->intel;
- /* Output the structure (brw_state_base_address) directly to the
- * batchbuffer, so we can emit relocations inline.
- */
if (intel->gen >= 6) {
BEGIN_BATCH(10);
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
OUT_BATCH(1); /* General state base address */
- OUT_BATCH(1); /* Surface state base address */
+ OUT_RELOC(intel->batch->buf, I915_GEM_DOMAIN_SAMPLER, 0,
+ 1); /* Surface state base address */
OUT_BATCH(1); /* Dynamic state base address */
OUT_BATCH(1); /* Indirect object base address */
OUT_BATCH(1); /* Instruction base address */
@@ -619,7 +605,8 @@ static void upload_state_base_address( struct brw_context *brw )
BEGIN_BATCH(8);
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
OUT_BATCH(1); /* General state base address */
- OUT_BATCH(1); /* Surface state base address */
+ OUT_RELOC(intel->batch->buf, I915_GEM_DOMAIN_SAMPLER, 0,
+ 1); /* Surface state base address */
OUT_BATCH(1); /* Indirect object base address */
OUT_BATCH(1); /* Instruction base address */
OUT_BATCH(1); /* General state upper bound */
@@ -630,7 +617,8 @@ static void upload_state_base_address( struct brw_context *brw )
BEGIN_BATCH(6);
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
OUT_BATCH(1); /* General state base address */
- OUT_BATCH(1); /* Surface state base address */
+ OUT_RELOC(intel->batch->buf, I915_GEM_DOMAIN_SAMPLER, 0,
+ 1); /* Surface state base address */
OUT_BATCH(1); /* Indirect object base address */
OUT_BATCH(1); /* General state upper bound */
OUT_BATCH(1); /* Indirect object upper bound */
@@ -641,7 +629,7 @@ static void upload_state_base_address( struct brw_context *brw )
const struct brw_tracked_state brw_state_base_address = {
.dirty = {
.mesa = 0,
- .brw = BRW_NEW_CONTEXT,
+ .brw = BRW_NEW_BATCH,
.cache = 0,
},
.emit = upload_state_base_address
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index cc9ac6d5749..aeed24d4e14 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -31,10 +31,10 @@
#include "main/imports.h"
#include "main/enums.h"
+#include "main/shaderobj.h"
#include "program/prog_parameter.h"
#include "program/program.h"
#include "program/programopt.h"
-#include "program/shader_api.h"
#include "tnl/tnl.h"
#include "brw_context.h"
@@ -95,20 +95,6 @@ static struct gl_program *brwNewProgram( GLcontext *ctx,
static void brwDeleteProgram( GLcontext *ctx,
struct gl_program *prog )
{
- if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
- struct gl_fragment_program *fp = (struct gl_fragment_program *) prog;
- struct brw_fragment_program *brw_fp = brw_fragment_program(fp);
-
- drm_intel_bo_unreference(brw_fp->const_buffer);
- }
-
- if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
- struct gl_vertex_program *vp = (struct gl_vertex_program *) prog;
- struct brw_vertex_program *brw_vp = brw_vertex_program(vp);
-
- drm_intel_bo_unreference(brw_vp->const_buffer);
- }
-
_mesa_delete_program( ctx, prog );
}
diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c
index a95acb4cf82..e290ca92f60 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_state.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_state.c
@@ -105,8 +105,7 @@ static void upload_sf_vp(struct brw_context *brw)
}
drm_intel_bo_unreference(brw->sf.vp_bo);
- brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP, &sfv, sizeof(sfv),
- NULL, 0);
+ brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP, &sfv, sizeof(sfv));
}
const struct brw_tracked_state brw_sf_vp = {
diff --git a/src/mesa/drivers/dri/i965/brw_state.c b/src/mesa/drivers/dri/i965/brw_state.c
new file mode 100644
index 00000000000..1e77e427d38
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_state.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <[email protected]>
+ *
+ */
+
+#include "brw_context.h"
+
+void
+brw_enable(GLcontext *ctx, GLenum cap, GLboolean state)
+{
+ struct brw_context *brw = brw_context(ctx);
+
+ switch (cap) {
+ case GL_DEPTH_CLAMP:
+ brw_update_cc_vp(brw);
+ break;
+ }
+}
+
+void
+brw_depth_range(GLcontext *ctx, GLclampd nearval, GLclampd farval)
+{
+ struct brw_context *brw = brw_context(ctx);
+
+ if (ctx->Transform.DepthClamp)
+ brw_update_cc_vp(brw);
+}
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 85949215e82..40eece276b7 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -48,10 +48,11 @@ brw_add_validated_bo(struct brw_context *brw, drm_intel_bo *bo)
const struct brw_tracked_state brw_blend_constant_color;
const struct brw_tracked_state brw_cc_unit;
-const struct brw_tracked_state brw_cc_vp;
const struct brw_tracked_state brw_check_fallback;
const struct brw_tracked_state brw_clip_prog;
const struct brw_tracked_state brw_clip_unit;
+const struct brw_tracked_state brw_vs_constants;
+const struct brw_tracked_state brw_wm_constants;
const struct brw_tracked_state brw_constant_buffer;
const struct brw_tracked_state brw_curbe_offsets;
const struct brw_tracked_state brw_invarient_state;
@@ -80,6 +81,7 @@ const struct brw_tracked_state brw_wm_prog;
const struct brw_tracked_state brw_wm_samplers;
const struct brw_tracked_state brw_wm_constant_surface;
const struct brw_tracked_state brw_wm_surfaces;
+const struct brw_tracked_state brw_wm_binding_table;
const struct brw_tracked_state brw_wm_unit;
const struct brw_tracked_state brw_psp_urb_cbs;
@@ -93,7 +95,6 @@ const struct brw_tracked_state brw_index_buffer;
const struct brw_tracked_state gen6_binding_table_pointers;
const struct brw_tracked_state gen6_blend_state;
const struct brw_tracked_state gen6_cc_state_pointers;
-const struct brw_tracked_state gen6_cc_vp;
const struct brw_tracked_state gen6_clip_state;
const struct brw_tracked_state gen6_clip_vp;
const struct brw_tracked_state gen6_color_calc_state;
@@ -108,20 +109,6 @@ const struct brw_tracked_state gen6_viewport_state;
const struct brw_tracked_state gen6_vs_state;
const struct brw_tracked_state gen6_wm_state;
-/**
- * Use same key for WM and VS surfaces.
- */
-struct brw_surface_key {
- GLenum target, depthmode;
- drm_intel_bo *bo;
- GLint format, internal_format;
- GLint first_level, last_level;
- GLint width, height, depth;
- GLint pitch, cpp;
- uint32_t tiling;
- GLuint offset;
-};
-
/***********************************************************************
* brw_state.c
*/
@@ -137,9 +124,7 @@ void brw_clear_validated_bos(struct brw_context *brw);
drm_intel_bo *brw_cache_data(struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *data,
- GLuint size,
- drm_intel_bo **reloc_bufs,
- GLuint nr_reloc_bufs);
+ GLuint size);
drm_intel_bo *brw_upload_cache(struct brw_cache *cache,
enum brw_cache_id cache_id,
@@ -173,7 +158,6 @@ void brw_state_cache_check_size( struct brw_context *brw );
void brw_init_caches( struct brw_context *brw );
void brw_destroy_caches( struct brw_context *brw );
-void brw_state_cache_bo_delete(struct brw_cache *cache, drm_intel_bo *bo);
/***********************************************************************
* brw_state_batch.c
@@ -186,10 +170,17 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
GLuint sz );
void brw_destroy_batch_cache( struct brw_context *brw );
void brw_clear_batch_cache( struct brw_context *brw );
+void *brw_state_batch(struct brw_context *brw,
+ int size,
+ int alignment,
+ drm_intel_bo **out_bo,
+ uint32_t *out_offset);
/* brw_wm_surface_state.c */
-drm_intel_bo *
-brw_create_constant_surface( struct brw_context *brw,
- struct brw_surface_key *key );
+void brw_create_constant_surface(struct brw_context *brw,
+ drm_intel_bo *bo,
+ int width,
+ drm_intel_bo **out_bo,
+ uint32_t *out_offset);
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c
index 39019412fda..be3989eb7db 100644
--- a/src/mesa/drivers/dri/i965/brw_state_batch.c
+++ b/src/mesa/drivers/dri/i965/brw_state_batch.c
@@ -97,3 +97,52 @@ void brw_destroy_batch_cache( struct brw_context *brw )
{
brw_clear_batch_cache(brw);
}
+
+/**
+ * Allocates a block of space in the batchbuffer for indirect state.
+ *
+ * We don't want to allocate separate BOs for every bit of indirect
+ * state in the driver. It means overallocating by a significant
+ * margin (4096 bytes, even if the object is just a 20-byte surface
+ * state), and more buffers to walk and count for aperture size checking.
+ *
+ * However, due to the restrictions inposed by the aperture size
+ * checking performance hacks, we can't have the batch point at a
+ * separate indirect state buffer, because once the batch points at
+ * it, no more relocations can be added to it. So, we sneak these
+ * buffers in at the top of the batchbuffer.
+ */
+void *
+brw_state_batch(struct brw_context *brw,
+ int size,
+ int alignment,
+ drm_intel_bo **out_bo,
+ uint32_t *out_offset)
+{
+ struct intel_batchbuffer *batch = brw->intel.batch;
+ uint32_t offset;
+
+ assert(size < batch->buf->size);
+ offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment);
+
+ /* If allocating from the top would wrap below the batchbuffer, or
+ * if the batch's used space (plus the reserved pad) collides with our
+ * space, then flush and try again.
+ */
+ if (batch->state_batch_offset < size ||
+ offset < batch->ptr - batch->map + batch->reserved_space) {
+ intel_batchbuffer_flush(batch);
+ offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment);
+ }
+
+ batch->state_batch_offset = offset;
+
+ if (*out_bo != batch->buf) {
+ drm_intel_bo_unreference(*out_bo);
+ drm_intel_bo_reference(batch->buf);
+ *out_bo = batch->buf;
+ }
+
+ *out_offset = offset;
+ return batch->map + offset;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
index ea81ad13417..b31d84953a1 100644
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -310,9 +310,7 @@ drm_intel_bo *
brw_cache_data(struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *data,
- GLuint data_size,
- drm_intel_bo **reloc_bufs,
- GLuint nr_reloc_bufs)
+ GLuint data_size)
{
drm_intel_bo *bo;
struct brw_cache_item *item, lookup;
@@ -321,8 +319,8 @@ brw_cache_data(struct brw_cache *cache,
lookup.cache_id = cache_id;
lookup.key = data;
lookup.key_size = data_size;
- lookup.reloc_bufs = reloc_bufs;
- lookup.nr_reloc_bufs = nr_reloc_bufs;
+ lookup.reloc_bufs = NULL;
+ lookup.nr_reloc_bufs = 0;
hash = hash_key(&lookup);
lookup.hash = hash;
@@ -335,7 +333,7 @@ brw_cache_data(struct brw_cache *cache,
bo = brw_upload_cache(cache, cache_id,
data, data_size,
- reloc_bufs, nr_reloc_bufs,
+ NULL, 0,
data, data_size);
return bo;
@@ -396,29 +394,10 @@ brw_init_non_surface_cache(struct brw_context *brw)
brw_init_cache_id(cache, "DEPTH_STENCIL_STATE", BRW_DEPTH_STENCIL_STATE);
}
-
-static void
-brw_init_surface_cache(struct brw_context *brw)
-{
- struct brw_cache *cache = &brw->surface_cache;
-
- cache->brw = brw;
-
- cache->size = 7;
- cache->n_items = 0;
- cache->items = (struct brw_cache_item **)
- calloc(1, cache->size * sizeof(struct brw_cache_item));
-
- brw_init_cache_id(cache, "SS_SURFACE", BRW_SS_SURFACE);
- brw_init_cache_id(cache, "SS_SURF_BIND", BRW_SS_SURF_BIND);
-}
-
-
void
brw_init_caches(struct brw_context *brw)
{
brw_init_non_surface_cache(brw);
- brw_init_surface_cache(brw);
}
@@ -452,56 +431,17 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
brw->state.dirty.cache |= ~0;
}
-/* Clear all entries from the cache that point to the given bo.
- *
- * This lets us release memory for reuse earlier for known-dead buffers,
- * at the cost of walking the entire hash table.
- */
-void
-brw_state_cache_bo_delete(struct brw_cache *cache, drm_intel_bo *bo)
-{
- struct brw_cache_item **prev;
- GLuint i;
-
- if (INTEL_DEBUG & DEBUG_STATE)
- printf("%s\n", __FUNCTION__);
-
- for (i = 0; i < cache->size; i++) {
- for (prev = &cache->items[i]; *prev;) {
- struct brw_cache_item *c = *prev;
-
- if (drm_intel_bo_references(c->bo, bo)) {
- int j;
-
- *prev = c->next;
-
- for (j = 0; j < c->nr_reloc_bufs; j++)
- drm_intel_bo_unreference(c->reloc_bufs[j]);
- drm_intel_bo_unreference(c->bo);
- free((void *)c->key);
- free(c);
- cache->n_items--;
- } else {
- prev = &c->next;
- }
- }
- }
-}
-
void
brw_state_cache_check_size(struct brw_context *brw)
{
if (INTEL_DEBUG & DEBUG_STATE)
printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items);
- /* un-tuned guess. We've got around 20 state objects for a total of around
- * 32k, so 1000 of them is around 1.5MB.
+ /* un-tuned guess. Each object is generally a page, so 1000 of them is 4 MB of
+ * state cache.
*/
if (brw->cache.n_items > 1000)
brw_clear_cache(brw, &brw->cache);
-
- if (brw->surface_cache.n_items > 1000)
- brw_clear_cache(brw, &brw->surface_cache);
}
@@ -528,5 +468,4 @@ void
brw_destroy_caches(struct brw_context *brw)
{
brw_destroy_cache(brw, &brw->cache);
- brw_destroy_cache(brw, &brw->surface_cache);
}
diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c
index cb66806ebf3..d410861bdf6 100644
--- a/src/mesa/drivers/dri/i965/brw_state_dump.c
+++ b/src/mesa/drivers/dri/i965/brw_state_dump.c
@@ -111,8 +111,8 @@ static void dump_wm_surface_state(struct brw_context *brw)
continue;
}
drm_intel_bo_map(surf_bo, GL_FALSE);
- surfoff = surf_bo->offset;
- surf = (struct brw_surface_state *)(surf_bo->virtual);
+ surfoff = surf_bo->offset + brw->wm.surf_offset[i];
+ surf = (struct brw_surface_state *)(surf_bo->virtual + brw->wm.surf_offset[i]);
sprintf(name, "WM SS%d", i);
state_out(name, surf, surfoff, 0, "%s %s\n",
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 49629ba2289..f92a19c2aa0 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -61,12 +61,15 @@ static const struct brw_tracked_state *gen4_atoms[] =
&brw_curbe_offsets,
&brw_recalculate_urb_fence,
- &brw_cc_vp,
&brw_cc_unit,
+ &brw_vs_constants, /* Before vs_surfaces and constant_buffer */
+ &brw_wm_constants, /* Before wm_surfaces and constant_buffer */
+
&brw_vs_surfaces, /* must do before unit */
&brw_wm_constant_surface, /* must do before wm surfaces/bind bo */
&brw_wm_surfaces, /* must do before samplers and unit */
+ &brw_wm_binding_table,
&brw_wm_samplers,
&brw_wm_unit,
@@ -113,7 +116,6 @@ const struct brw_tracked_state *gen6_atoms[] =
&gen6_clip_vp,
&gen6_sf_vp,
- &gen6_cc_vp,
/* Command packets: */
&brw_invarient_state,
@@ -126,9 +128,13 @@ const struct brw_tracked_state *gen6_atoms[] =
&gen6_depth_stencil_state, /* must do before cc unit */
&gen6_cc_state_pointers,
+ &brw_vs_constants, /* Before vs_surfaces and constant_buffer */
+ &brw_wm_constants, /* Before wm_surfaces and constant_buffer */
+
&brw_vs_surfaces, /* must do before unit */
&brw_wm_constant_surface, /* must do before wm surfaces/bind bo */
&brw_wm_surfaces, /* must do before samplers and unit */
+ &brw_wm_binding_table,
&brw_wm_samplers,
&gen6_sampler_state,
@@ -266,6 +272,8 @@ static struct dirty_bit_map brw_bits[] = {
DEFINE_BIT(BRW_NEW_CONTEXT),
DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS),
DEFINE_BIT(BRW_NEW_PSP),
+ DEFINE_BIT(BRW_NEW_WM_SURFACES),
+ DEFINE_BIT(BRW_NEW_BINDING_TABLE),
DEFINE_BIT(BRW_NEW_INDICES),
DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
DEFINE_BIT(BRW_NEW_VERTICES),
@@ -292,8 +300,6 @@ static struct dirty_bit_map cache_bits[] = {
DEFINE_BIT(CACHE_NEW_CLIP_VP),
DEFINE_BIT(CACHE_NEW_CLIP_UNIT),
DEFINE_BIT(CACHE_NEW_CLIP_PROG),
- DEFINE_BIT(CACHE_NEW_SURFACE),
- DEFINE_BIT(CACHE_NEW_SURF_BIND),
{0, 0, 0}
};
diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h
index f17fe485306..2a7fa5b6997 100644
--- a/src/mesa/drivers/dri/i965/brw_structs.h
+++ b/src/mesa/drivers/dri/i965/brw_structs.h
@@ -278,7 +278,7 @@ struct brw_aa_line_parameters
struct header header;
struct {
- GLuint aa_coverage_scope:8;
+ GLuint aa_coverage_slope:8;
GLuint pad0:8;
GLuint aa_coverage_bias:8;
GLuint pad1:8;
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
index 568c2e3b030..0250a68d292 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -42,42 +42,59 @@
* Otherwise, constants go through the CURBEs using the brw_constant_buffer
* state atom.
*/
-static drm_intel_bo *
-brw_vs_update_constant_buffer(struct brw_context *brw)
+static void
+prepare_vs_constants(struct brw_context *brw)
{
+ GLcontext *ctx = &brw->intel.ctx;
struct intel_context *intel = &brw->intel;
struct brw_vertex_program *vp =
(struct brw_vertex_program *) brw->vertex_program;
const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
const int size = params->NumParameters * 4 * sizeof(GLfloat);
- drm_intel_bo *const_buffer;
int i;
- /* BRW_NEW_VERTEX_PROGRAM */
- if (!vp->use_const_buffer)
- return NULL;
-
- const_buffer = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer",
- size, 64);
-
- /* _NEW_PROGRAM_CONSTANTS */
+ if (vp->program.IsNVProgram)
+ _mesa_load_tracked_matrices(ctx);
/* Updates the ParamaterValues[i] pointers for all parameters of the
* basic type of PROGRAM_STATE_VAR.
*/
_mesa_load_state_parameters(&brw->intel.ctx, vp->program.Base.Parameters);
- drm_intel_gem_bo_map_gtt(const_buffer);
+ /* BRW_NEW_VERTEX_PROGRAM */
+ if (!vp->use_const_buffer) {
+ if (brw->vs.const_bo) {
+ drm_intel_bo_unreference(brw->vs.const_bo);
+ brw->vs.const_bo = NULL;
+ brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF;
+ }
+ return;
+ }
+
+ /* _NEW_PROGRAM_CONSTANTS */
+ drm_intel_bo_unreference(brw->vs.const_bo);
+ brw->vs.const_bo = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer",
+ size, 64);
+
+ drm_intel_gem_bo_map_gtt(brw->vs.const_bo);
for (i = 0; i < params->NumParameters; i++) {
- memcpy(const_buffer->virtual + i * 4 * sizeof(float),
+ memcpy(brw->vs.const_bo->virtual + i * 4 * sizeof(float),
params->ParameterValues[i],
4 * sizeof(float));
}
- drm_intel_gem_bo_unmap_gtt(const_buffer);
-
- return const_buffer;
+ drm_intel_gem_bo_unmap_gtt(brw->vs.const_bo);
+ brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF;
}
+const struct brw_tracked_state brw_vs_constants = {
+ .dirty = {
+ .mesa = (_NEW_PROGRAM_CONSTANTS),
+ .brw = (BRW_NEW_VERTEX_PROGRAM),
+ .cache = 0
+ },
+ .prepare = prepare_vs_constants,
+};
+
/**
* Update the surface state for a VS constant buffer.
*
@@ -88,101 +105,41 @@ brw_update_vs_constant_surface( GLcontext *ctx,
GLuint surf)
{
struct brw_context *brw = brw_context(ctx);
- struct brw_surface_key key;
struct brw_vertex_program *vp =
(struct brw_vertex_program *) brw->vertex_program;
const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
assert(surf == 0);
- /* If we're in this state update atom, we need to update VS constants, so
- * free the old buffer and create a new one for the new contents.
- */
- drm_intel_bo_unreference(vp->const_buffer);
- vp->const_buffer = brw_vs_update_constant_buffer(brw);
-
/* If there's no constant buffer, then no surface BO is needed to point at
* it.
*/
- if (vp->const_buffer == NULL) {
+ if (brw->vs.const_bo == NULL) {
drm_intel_bo_unreference(brw->vs.surf_bo[surf]);
brw->vs.surf_bo[surf] = NULL;
return;
}
- memset(&key, 0, sizeof(key));
-
- key.format = MESA_FORMAT_RGBA_FLOAT32;
- key.internal_format = GL_RGBA;
- key.bo = vp->const_buffer;
- key.depthmode = GL_NONE;
- key.pitch = params->NumParameters;
- key.width = params->NumParameters;
- key.height = 1;
- key.depth = 1;
- key.cpp = 16;
-
- /*
- printf("%s:\n", __FUNCTION__);
- printf(" width %d height %d depth %d cpp %d pitch %d\n",
- key.width, key.height, key.depth, key.cpp, key.pitch);
- */
-
- drm_intel_bo_unreference(brw->vs.surf_bo[surf]);
- brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
- BRW_SS_SURFACE,
- &key, sizeof(key),
- &key.bo, 1,
- NULL);
- if (brw->vs.surf_bo[surf] == NULL) {
- brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key);
- }
+ brw_create_constant_surface(brw, brw->vs.const_bo, params->NumParameters,
+ &brw->vs.surf_bo[surf],
+ &brw->vs.surf_offset[surf]);
}
-/**
- * Constructs the binding table for the VS surface state.
- */
-static drm_intel_bo *
-brw_vs_get_binding_table(struct brw_context *brw)
+static void
+prepare_vs_surfaces(struct brw_context *brw)
{
- drm_intel_bo *bind_bo;
-
- bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
- NULL, 0,
- brw->vs.surf_bo, BRW_VS_MAX_SURF,
- NULL);
-
- if (bind_bo == NULL) {
- GLuint data_size = BRW_VS_MAX_SURF * sizeof(GLuint);
- uint32_t data[BRW_VS_MAX_SURF];
- int i;
-
- for (i = 0; i < BRW_VS_MAX_SURF; i++)
- if (brw->vs.surf_bo[i])
- data[i] = brw->vs.surf_bo[i]->offset;
- else
- data[i] = 0;
-
- bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
- NULL, 0,
- brw->vs.surf_bo, BRW_VS_MAX_SURF,
- data, data_size);
-
- /* Emit binding table relocations to surface state */
- for (i = 0; i < BRW_VS_MAX_SURF; i++) {
- if (brw->vs.surf_bo[i] != NULL) {
- /* The presumed offsets were set in the data values for
- * brw_upload_cache.
- */
- drm_intel_bo_emit_reloc(bind_bo, i * 4,
- brw->vs.surf_bo[i], 0,
- I915_GEM_DOMAIN_INSTRUCTION, 0);
- }
- }
+ int nr_surfaces = 0;
+
+ if (brw->vs.const_bo) {
+ brw_add_validated_bo(brw, brw->vs.const_bo);
+ nr_surfaces = 1;
}
- return bind_bo;
+ if (brw->vs.nr_surfaces != nr_surfaces) {
+ brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES;
+ brw->vs.nr_surfaces = nr_surfaces;
+ }
}
/**
@@ -192,43 +149,50 @@ brw_vs_get_binding_table(struct brw_context *brw)
* to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit and
* CACHE_NEW_SURF_BIND for the binding table upload.
*/
-static void prepare_vs_surfaces(struct brw_context *brw )
+static void upload_vs_surfaces(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
+ uint32_t *bind;
int i;
- int nr_surfaces = 0;
-
- brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER);
- for (i = 0; i < BRW_VS_MAX_SURF; i++) {
- if (brw->vs.surf_bo[i] != NULL) {
- nr_surfaces = i + 1;
+ /* BRW_NEW_NR_VS_SURFACES */
+ if (brw->vs.nr_surfaces == 0) {
+ if (brw->vs.bind_bo) {
+ drm_intel_bo_unreference(brw->vs.bind_bo);
+ brw->vs.bind_bo = NULL;
+ brw->state.dirty.brw |= BRW_NEW_BINDING_TABLE;
}
+ return;
}
- if (brw->vs.nr_surfaces != nr_surfaces) {
- brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES;
- brw->vs.nr_surfaces = nr_surfaces;
- }
+ brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER);
- /* Note that we don't end up updating the bind_bo if we don't have a
- * surface to be pointing at. This should be relatively harmless, as it
- * just slightly increases our working set size.
+ /* Might want to calculate nr_surfaces first, to avoid taking up so much
+ * space for the binding table. (once we have vs samplers)
*/
- if (brw->vs.nr_surfaces != 0) {
- drm_intel_bo_unreference(brw->vs.bind_bo);
- brw->vs.bind_bo = brw_vs_get_binding_table(brw);
+ bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_VS_MAX_SURF,
+ 32, &brw->vs.bind_bo, &brw->vs.bind_bo_offset);
+
+ for (i = 0; i < BRW_VS_MAX_SURF; i++) {
+ /* BRW_NEW_VS_CONSTBUF */
+ if (brw->vs.surf_bo[i]) {
+ bind[i] = brw->vs.surf_offset[i];
+ } else {
+ bind[i] = 0;
+ }
}
+
+ brw->state.dirty.brw |= BRW_NEW_BINDING_TABLE;
}
const struct brw_tracked_state brw_vs_surfaces = {
.dirty = {
- .mesa = (_NEW_PROGRAM_CONSTANTS),
- .brw = (BRW_NEW_VERTEX_PROGRAM),
+ .mesa = 0,
+ .brw = (BRW_NEW_VS_CONSTBUF |
+ BRW_NEW_NR_VS_SURFACES |
+ BRW_NEW_BATCH),
.cache = 0
},
.prepare = prepare_vs_surfaces,
+ .emit = upload_vs_surfaces,
};
-
-
-
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index a02e958c5e6..14227a51332 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -83,6 +83,7 @@ static void brw_destroy_context( struct intel_context *intel )
dri_bo_release(&brw->vs.prog_bo);
dri_bo_release(&brw->vs.state_bo);
dri_bo_release(&brw->vs.bind_bo);
+ dri_bo_release(&brw->vs.const_bo);
dri_bo_release(&brw->gs.prog_bo);
dri_bo_release(&brw->gs.state_bo);
dri_bo_release(&brw->clip.prog_bo);
@@ -99,6 +100,7 @@ static void brw_destroy_context( struct intel_context *intel )
dri_bo_release(&brw->wm.sampler_bo);
dri_bo_release(&brw->wm.prog_bo);
dri_bo_release(&brw->wm.state_bo);
+ dri_bo_release(&brw->wm.const_bo);
dri_bo_release(&brw->cc.prog_bo);
dri_bo_release(&brw->cc.state_bo);
dri_bo_release(&brw->cc.vp_bo);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
index 9fbabdc2852..1fc802cfa65 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@@ -74,7 +74,7 @@ static drm_intel_bo *upload_default_color( struct brw_context *brw,
COPY_4V(sdc.color, color);
return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR,
- &sdc, sizeof(sdc), NULL, 0);
+ &sdc, sizeof(sdc));
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index c7b61240e75..17b016b569b 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -196,36 +196,40 @@ brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
}
}
-static drm_intel_bo *
-brw_create_texture_surface( struct brw_context *brw,
- struct brw_surface_key *key )
+static void
+brw_update_texture_surface( GLcontext *ctx, GLuint unit )
{
+ struct brw_context *brw = brw_context(ctx);
+ struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
+ struct intel_texture_object *intelObj = intel_texture_object(tObj);
+ struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel];
+ const GLuint surf_index = SURF_INDEX_TEXTURE(unit);
struct brw_surface_state surf;
- drm_intel_bo *bo;
+ void *map;
memset(&surf, 0, sizeof(surf));
surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
- surf.ss0.surface_type = translate_tex_target(key->target);
- surf.ss0.surface_format = translate_tex_format(key->format,
- key->internal_format,
- key->depthmode);
+ surf.ss0.surface_type = translate_tex_target(tObj->Target);
+ surf.ss0.surface_format = translate_tex_format(firstImage->TexFormat,
+ firstImage->InternalFormat,
+ tObj->DepthMode);
/* This is ok for all textures with channel width 8bit or less:
*/
/* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
- surf.ss1.base_addr = key->bo->offset; /* reloc */
+ surf.ss1.base_addr = intelObj->mt->region->buffer->offset; /* reloc */
- surf.ss2.mip_count = key->last_level - key->first_level;
- surf.ss2.width = key->width - 1;
- surf.ss2.height = key->height - 1;
- brw_set_surface_tiling(&surf, key->tiling);
- surf.ss3.pitch = (key->pitch * key->cpp) - 1;
- surf.ss3.depth = key->depth - 1;
+ surf.ss2.mip_count = intelObj->lastLevel - intelObj->firstLevel;
+ surf.ss2.width = firstImage->Width - 1;
+ surf.ss2.height = firstImage->Height - 1;
+ brw_set_surface_tiling(&surf, intelObj->mt->region->tiling);
+ surf.ss3.pitch = (intelObj->mt->region->pitch * intelObj->mt->cpp) - 1;
+ surf.ss3.depth = firstImage->Depth - 1;
surf.ss4.min_lod = 0;
- if (key->target == GL_TEXTURE_CUBE_MAP) {
+ if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
surf.ss0.cube_pos_x = 1;
surf.ss0.cube_pos_y = 1;
surf.ss0.cube_pos_z = 1;
@@ -234,71 +238,33 @@ brw_create_texture_surface( struct brw_context *brw,
surf.ss0.cube_neg_z = 1;
}
- bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
- key, sizeof(*key),
- &key->bo, 1,
- &surf, sizeof(surf));
+ map = brw_state_batch(brw, sizeof(surf), 32,
+ &brw->wm.surf_bo[surf_index],
+ &brw->wm.surf_offset[surf_index]);
+ memcpy(map, &surf, sizeof(surf));
/* Emit relocation to surface contents */
- drm_intel_bo_emit_reloc(bo, offsetof(struct brw_surface_state, ss1),
- key->bo, 0,
+ drm_intel_bo_emit_reloc(brw->wm.surf_bo[surf_index],
+ brw->wm.surf_offset[surf_index] +
+ offsetof(struct brw_surface_state, ss1),
+ intelObj->mt->region->buffer, 0,
I915_GEM_DOMAIN_SAMPLER, 0);
-
- return bo;
-}
-
-static void
-brw_update_texture_surface( GLcontext *ctx, GLuint unit )
-{
- struct brw_context *brw = brw_context(ctx);
- struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
- struct intel_texture_object *intelObj = intel_texture_object(tObj);
- struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel];
- struct brw_surface_key key;
- const GLuint surf = SURF_INDEX_TEXTURE(unit);
-
- memset(&key, 0, sizeof(key));
-
- key.format = firstImage->TexFormat;
- key.internal_format = firstImage->InternalFormat;
- key.pitch = intelObj->mt->region->pitch;
- key.depth = firstImage->Depth;
- key.bo = intelObj->mt->region->buffer;
- key.offset = 0;
-
- key.target = tObj->Target;
- key.depthmode = tObj->DepthMode;
- key.first_level = intelObj->firstLevel;
- key.last_level = intelObj->lastLevel;
- key.width = firstImage->Width;
- key.height = firstImage->Height;
- key.cpp = intelObj->mt->cpp;
- key.tiling = intelObj->mt->region->tiling;
-
- drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
- brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
- BRW_SS_SURFACE,
- &key, sizeof(key),
- &key.bo, 1,
- NULL);
- if (brw->wm.surf_bo[surf] == NULL) {
- brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key);
- }
}
-
-
/**
* Create the constant buffer surface. Vertex/fragment shader constants will be
* read from this buffer with Data Port Read instructions/messages.
*/
-drm_intel_bo *
-brw_create_constant_surface( struct brw_context *brw,
- struct brw_surface_key *key )
+void
+brw_create_constant_surface(struct brw_context *brw,
+ drm_intel_bo *bo,
+ int width,
+ drm_intel_bo **out_bo,
+ uint32_t *out_offset)
{
- const GLint w = key->width - 1;
+ const GLint w = width - 1;
struct brw_surface_state surf;
- drm_intel_bo *bo;
+ void *map;
memset(&surf, 0, sizeof(surf));
@@ -306,29 +272,26 @@ brw_create_constant_surface( struct brw_context *brw,
surf.ss0.surface_type = BRW_SURFACE_BUFFER;
surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
- assert(key->bo);
- surf.ss1.base_addr = key->bo->offset; /* reloc */
+ assert(bo);
+ surf.ss1.base_addr = bo->offset; /* reloc */
surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */
surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */
surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */
- surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */
- brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */
-
- bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
- key, sizeof(*key),
- &key->bo, 1,
- &surf, sizeof(surf));
+ surf.ss3.pitch = (width * 16) - 1; /* ignored?? */
+ brw_set_surface_tiling(&surf, I915_TILING_NONE); /* tiling now allowed */
+
+ map = brw_state_batch(brw, sizeof(surf), 32, out_bo, out_offset);
+ memcpy(map, &surf, sizeof(surf));
/* Emit relocation to surface contents. Section 5.1.1 of the gen4
* bspec ("Data Cache") says that the data cache does not exist as
* a separate cache and is just the sampler cache.
*/
- drm_intel_bo_emit_reloc(bo, offsetof(struct brw_surface_state, ss1),
- key->bo, 0,
+ drm_intel_bo_emit_reloc(*out_bo, (*out_offset +
+ offsetof(struct brw_surface_state, ss1)),
+ bo, 0,
I915_GEM_DOMAIN_SAMPLER, 0);
-
- return bo;
}
/* Creates a new WM constant buffer reflecting the current fragment program's
@@ -337,89 +300,45 @@ brw_create_constant_surface( struct brw_context *brw,
* Otherwise, constants go through the CURBEs using the brw_constant_buffer
* state atom.
*/
-static drm_intel_bo *
-brw_wm_update_constant_buffer(struct brw_context *brw)
+static void
+prepare_wm_constants(struct brw_context *brw)
{
+ GLcontext *ctx = &brw->intel.ctx;
struct intel_context *intel = &brw->intel;
struct brw_fragment_program *fp =
(struct brw_fragment_program *) brw->fragment_program;
const struct gl_program_parameter_list *params = fp->program.Base.Parameters;
const int size = params->NumParameters * 4 * sizeof(GLfloat);
- drm_intel_bo *const_buffer;
-
- /* BRW_NEW_FRAGMENT_PROGRAM */
- if (!fp->use_const_buffer)
- return NULL;
-
- const_buffer = drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer",
- size, 64);
- /* _NEW_PROGRAM_CONSTANTS */
- drm_intel_bo_subdata(const_buffer, 0, size, params->ParameterValues);
-
- return const_buffer;
-}
+ _mesa_load_state_parameters(ctx, fp->program.Base.Parameters);
-/**
- * Update the surface state for a WM constant buffer.
- * The constant buffer will be (re)allocated here if needed.
- */
-static void
-brw_update_wm_constant_surface( GLcontext *ctx,
- GLuint surf)
-{
- struct brw_context *brw = brw_context(ctx);
- struct brw_surface_key key;
- struct brw_fragment_program *fp =
- (struct brw_fragment_program *) brw->fragment_program;
- const struct gl_program_parameter_list *params =
- fp->program.Base.Parameters;
-
- /* If we're in this state update atom, we need to update WM constants, so
- * free the old buffer and create a new one for the new contents.
- */
- drm_intel_bo_unreference(fp->const_buffer);
- fp->const_buffer = brw_wm_update_constant_buffer(brw);
-
- /* If there's no constant buffer, then no surface BO is needed to point at
- * it.
- */
- if (fp->const_buffer == NULL) {
- drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
- brw->wm.surf_bo[surf] = NULL;
+ /* BRW_NEW_FRAGMENT_PROGRAM */
+ if (!fp->use_const_buffer) {
+ if (brw->wm.const_bo) {
+ drm_intel_bo_unreference(brw->wm.const_bo);
+ brw->wm.const_bo = NULL;
+ brw->state.dirty.brw |= BRW_NEW_WM_CONSTBUF;
+ }
return;
}
- memset(&key, 0, sizeof(key));
+ drm_intel_bo_unreference(brw->wm.const_bo);
+ brw->wm.const_bo = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer",
+ size, 64);
- key.format = MESA_FORMAT_RGBA_FLOAT32;
- key.internal_format = GL_RGBA;
- key.bo = fp->const_buffer;
- key.depthmode = GL_NONE;
- key.pitch = params->NumParameters;
- key.width = params->NumParameters;
- key.height = 1;
- key.depth = 1;
- key.cpp = 16;
-
- /*
- printf("%s:\n", __FUNCTION__);
- printf(" width %d height %d depth %d cpp %d pitch %d\n",
- key.width, key.height, key.depth, key.cpp, key.pitch);
- */
-
- drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
- brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
- BRW_SS_SURFACE,
- &key, sizeof(key),
- &key.bo, 1,
- NULL);
- if (brw->wm.surf_bo[surf] == NULL) {
- brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key);
- }
- brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
+ /* _NEW_PROGRAM_CONSTANTS */
+ drm_intel_bo_subdata(brw->wm.const_bo, 0, size, params->ParameterValues);
}
+const struct brw_tracked_state brw_wm_constants = {
+ .dirty = {
+ .mesa = (_NEW_PROGRAM_CONSTANTS),
+ .brw = (BRW_NEW_FRAGMENT_PROGRAM),
+ .cache = 0
+ },
+ .prepare = prepare_wm_constants,
+};
+
/**
* Updates surface / buffer for fragment shader constant buffer, if
* one is required.
@@ -428,20 +347,18 @@ brw_update_wm_constant_surface( GLcontext *ctx,
* BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for
* inclusion in the binding table.
*/
-static void prepare_wm_constant_surface(struct brw_context *brw )
+static void upload_wm_constant_surface(struct brw_context *brw )
{
- GLcontext *ctx = &brw->intel.ctx;
+ GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER;
struct brw_fragment_program *fp =
(struct brw_fragment_program *) brw->fragment_program;
- GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER;
-
- drm_intel_bo_unreference(fp->const_buffer);
- fp->const_buffer = brw_wm_update_constant_buffer(brw);
+ const struct gl_program_parameter_list *params =
+ fp->program.Base.Parameters;
/* If there's no constant buffer, then no surface BO is needed to point at
* it.
*/
- if (fp->const_buffer == 0) {
+ if (brw->wm.const_bo == 0) {
if (brw->wm.surf_bo[surf] != NULL) {
drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
brw->wm.surf_bo[surf] = NULL;
@@ -450,16 +367,20 @@ static void prepare_wm_constant_surface(struct brw_context *brw )
return;
}
- brw_update_wm_constant_surface(ctx, surf);
+ brw_create_constant_surface(brw, brw->wm.const_bo, params->NumParameters,
+ &brw->wm.surf_bo[surf],
+ &brw->wm.surf_offset[surf]);
+ brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
}
const struct brw_tracked_state brw_wm_constant_surface = {
.dirty = {
- .mesa = (_NEW_PROGRAM_CONSTANTS),
- .brw = (BRW_NEW_FRAGMENT_PROGRAM),
+ .mesa = 0,
+ .brw = (BRW_NEW_WM_CONSTBUF |
+ BRW_NEW_BATCH),
.cache = 0
},
- .prepare = prepare_wm_constant_surface,
+ .emit = upload_wm_constant_surface,
};
@@ -488,6 +409,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
uint32_t draw_x;
uint32_t draw_y;
} key;
+ struct brw_surface_state surf;
+ void *map;
memset(&key, 0, sizeof(key));
@@ -554,137 +477,123 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
(ctx->Color.BlendEnabled & (1 << unit)));
}
- drm_intel_bo_unreference(brw->wm.surf_bo[unit]);
- brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache,
- BRW_SS_SURFACE,
- &key, sizeof(key),
- &region_bo, 1,
- NULL);
-
- if (brw->wm.surf_bo[unit] == NULL) {
- struct brw_surface_state surf;
-
- memset(&surf, 0, sizeof(surf));
+ memset(&surf, 0, sizeof(surf));
- surf.ss0.surface_format = key.surface_format;
- surf.ss0.surface_type = key.surface_type;
- if (key.tiling == I915_TILING_NONE) {
- surf.ss1.base_addr = (key.draw_x + key.draw_y * key.pitch) * key.cpp;
+ surf.ss0.surface_format = key.surface_format;
+ surf.ss0.surface_type = key.surface_type;
+ if (key.tiling == I915_TILING_NONE) {
+ surf.ss1.base_addr = (key.draw_x + key.draw_y * key.pitch) * key.cpp;
+ } else {
+ uint32_t tile_base, tile_x, tile_y;
+ uint32_t pitch = key.pitch * key.cpp;
+
+ if (key.tiling == I915_TILING_X) {
+ tile_x = key.draw_x % (512 / key.cpp);
+ tile_y = key.draw_y % 8;
+ tile_base = ((key.draw_y / 8) * (8 * pitch));
+ tile_base += (key.draw_x - tile_x) / (512 / key.cpp) * 4096;
} else {
- uint32_t tile_base, tile_x, tile_y;
- uint32_t pitch = key.pitch * key.cpp;
-
- if (key.tiling == I915_TILING_X) {
- tile_x = key.draw_x % (512 / key.cpp);
- tile_y = key.draw_y % 8;
- tile_base = ((key.draw_y / 8) * (8 * pitch));
- tile_base += (key.draw_x - tile_x) / (512 / key.cpp) * 4096;
- } else {
- /* Y */
- tile_x = key.draw_x % (128 / key.cpp);
- tile_y = key.draw_y % 32;
- tile_base = ((key.draw_y / 32) * (32 * pitch));
- tile_base += (key.draw_x - tile_x) / (128 / key.cpp) * 4096;
- }
- assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
- assert(tile_x % 4 == 0);
- assert(tile_y % 2 == 0);
- /* Note that the low bits of these fields are missing, so
- * there's the possibility of getting in trouble.
- */
- surf.ss1.base_addr = tile_base;
- surf.ss5.x_offset = tile_x / 4;
- surf.ss5.y_offset = tile_y / 2;
- }
- if (region_bo != NULL)
- surf.ss1.base_addr += region_bo->offset; /* reloc */
-
- surf.ss2.width = key.width - 1;
- surf.ss2.height = key.height - 1;
- brw_set_surface_tiling(&surf, key.tiling);
- surf.ss3.pitch = (key.pitch * key.cpp) - 1;
-
- if (intel->gen < 6) {
- /* _NEW_COLOR */
- surf.ss0.color_blend = key.color_blend;
- surf.ss0.writedisable_red = !key.color_mask[0];
- surf.ss0.writedisable_green = !key.color_mask[1];
- surf.ss0.writedisable_blue = !key.color_mask[2];
- surf.ss0.writedisable_alpha = !key.color_mask[3];
+ /* Y */
+ tile_x = key.draw_x % (128 / key.cpp);
+ tile_y = key.draw_y % 32;
+ tile_base = ((key.draw_y / 32) * (32 * pitch));
+ tile_base += (key.draw_x - tile_x) / (128 / key.cpp) * 4096;
}
+ assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
+ assert(tile_x % 4 == 0);
+ assert(tile_y % 2 == 0);
+ /* Note that the low bits of these fields are missing, so
+ * there's the possibility of getting in trouble.
+ */
+ surf.ss1.base_addr = tile_base;
+ surf.ss5.x_offset = tile_x / 4;
+ surf.ss5.y_offset = tile_y / 2;
+ }
+ if (region_bo != NULL)
+ surf.ss1.base_addr += region_bo->offset; /* reloc */
- /* Key size will never match key size for textures, so we're safe. */
- brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache,
- BRW_SS_SURFACE,
- &key, sizeof(key),
- &region_bo, 1,
- &surf, sizeof(surf));
- if (region_bo != NULL) {
- /* We might sample from it, and we might render to it, so flag
- * them both. We might be able to figure out from other state
- * a more restrictive relocation to emit.
- */
- drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit],
- offsetof(struct brw_surface_state, ss1),
- region_bo,
- surf.ss1.base_addr - region_bo->offset,
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER);
- }
+ surf.ss2.width = key.width - 1;
+ surf.ss2.height = key.height - 1;
+ brw_set_surface_tiling(&surf, key.tiling);
+ surf.ss3.pitch = (key.pitch * key.cpp) - 1;
+
+ if (intel->gen < 6) {
+ /* _NEW_COLOR */
+ surf.ss0.color_blend = key.color_blend;
+ surf.ss0.writedisable_red = !key.color_mask[0];
+ surf.ss0.writedisable_green = !key.color_mask[1];
+ surf.ss0.writedisable_blue = !key.color_mask[2];
+ surf.ss0.writedisable_alpha = !key.color_mask[3];
}
-}
+ map = brw_state_batch(brw, sizeof(surf), 32,
+ &brw->wm.surf_bo[unit],
+ &brw->wm.surf_offset[unit]);
+ memcpy(map, &surf, sizeof(surf));
+
+ if (region_bo != NULL) {
+ drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit],
+ brw->wm.surf_offset[unit] +
+ offsetof(struct brw_surface_state, ss1),
+ region_bo,
+ surf.ss1.base_addr - region_bo->offset,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER);
+ }
+}
-/**
- * Constructs the binding table for the WM surface state, which maps unit
- * numbers to surface state objects.
- */
-static drm_intel_bo *
-brw_wm_get_binding_table(struct brw_context *brw)
+static void
+prepare_wm_surfaces(struct brw_context *brw)
{
- drm_intel_bo *bind_bo;
-
- assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF);
-
- bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
- NULL, 0,
- brw->wm.surf_bo, brw->wm.nr_surfaces,
- NULL);
-
- if (bind_bo == NULL) {
- GLuint data_size = brw->wm.nr_surfaces * sizeof(GLuint);
- uint32_t data[BRW_WM_MAX_SURF];
- int i;
-
- for (i = 0; i < brw->wm.nr_surfaces; i++)
- if (brw->wm.surf_bo[i])
- data[i] = brw->wm.surf_bo[i]->offset;
- else
- data[i] = 0;
-
- bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
- NULL, 0,
- brw->wm.surf_bo, brw->wm.nr_surfaces,
- data, data_size);
-
- /* Emit binding table relocations to surface state */
- for (i = 0; i < BRW_WM_MAX_SURF; i++) {
- if (brw->wm.surf_bo[i] != NULL) {
- drm_intel_bo_emit_reloc(bind_bo, i * sizeof(GLuint),
- brw->wm.surf_bo[i], 0,
- I915_GEM_DOMAIN_INSTRUCTION, 0);
- }
+ GLcontext *ctx = &brw->intel.ctx;
+ int i;
+ int nr_surfaces = 0;
+
+ if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
+ for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+ struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+ struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+ struct intel_region *region = irb ? irb->region : NULL;
+
+ brw_add_validated_bo(brw, region->buffer);
+ nr_surfaces = SURF_INDEX_DRAW(i) + 1;
+ }
+ }
+
+ if (brw->wm.const_bo) {
+ brw_add_validated_bo(brw, brw->wm.const_bo);
+ nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1;
+ }
+
+ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+ const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
+ struct gl_texture_object *tObj = texUnit->_Current;
+ struct intel_texture_object *intelObj = intel_texture_object(tObj);
+
+ if (texUnit->_ReallyEnabled) {
+ brw_add_validated_bo(brw, intelObj->mt->region->buffer);
+ nr_surfaces = SURF_INDEX_TEXTURE(i) + 1;
}
}
- return bind_bo;
+ /* Have to update this in our prepare, since the unit's prepare
+ * relies on it.
+ */
+ if (brw->wm.nr_surfaces != nr_surfaces) {
+ brw->wm.nr_surfaces = nr_surfaces;
+ brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES;
+ }
}
-static void prepare_wm_surfaces(struct brw_context *brw )
+/**
+ * Constructs the set of surface state objects pointed to by the
+ * binding table.
+ */
+static void
+upload_wm_surfaces(struct brw_context *brw)
{
GLcontext *ctx = &brw->intel.ctx;
GLuint i;
- int old_nr_surfaces;
/* _NEW_BUFFERS | _NEW_COLOR */
/* Update surfaces for drawing buffers */
@@ -698,32 +607,21 @@ static void prepare_wm_surfaces(struct brw_context *brw )
brw_update_renderbuffer_surface(brw, NULL, 0);
}
- old_nr_surfaces = brw->wm.nr_surfaces;
- brw->wm.nr_surfaces = BRW_MAX_DRAW_BUFFERS;
-
- if (brw->wm.surf_bo[SURF_INDEX_FRAG_CONST_BUFFER] != NULL)
- brw->wm.nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1;
-
/* Update surfaces for textures */
for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
const GLuint surf = SURF_INDEX_TEXTURE(i);
- /* _NEW_TEXTURE, BRW_NEW_TEXDATA */
+ /* _NEW_TEXTURE */
if (texUnit->_ReallyEnabled) {
brw_update_texture_surface(ctx, i);
- brw->wm.nr_surfaces = surf + 1;
} else {
drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
brw->wm.surf_bo[surf] = NULL;
}
}
- drm_intel_bo_unreference(brw->wm.bind_bo);
- brw->wm.bind_bo = brw_wm_get_binding_table(brw);
-
- if (brw->wm.nr_surfaces != old_nr_surfaces)
- brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES;
+ brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
}
const struct brw_tracked_state brw_wm_surfaces = {
@@ -731,12 +629,48 @@ const struct brw_tracked_state brw_wm_surfaces = {
.mesa = (_NEW_COLOR |
_NEW_TEXTURE |
_NEW_BUFFERS),
- .brw = (BRW_NEW_CONTEXT |
- BRW_NEW_WM_SURFACES),
+ .brw = (BRW_NEW_BATCH),
.cache = 0
},
.prepare = prepare_wm_surfaces,
+ .emit = upload_wm_surfaces,
};
+/**
+ * Constructs the binding table for the WM surface state, which maps unit
+ * numbers to surface state objects.
+ */
+static void
+brw_wm_upload_binding_table(struct brw_context *brw)
+{
+ uint32_t *bind;
+ int i;
+
+ /* Might want to calculate nr_surfaces first, to avoid taking up so much
+ * space for the binding table.
+ */
+ bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_WM_MAX_SURF,
+ 32, &brw->wm.bind_bo, &brw->wm.bind_bo_offset);
+
+ for (i = 0; i < BRW_WM_MAX_SURF; i++) {
+ /* BRW_NEW_WM_SURFACES */
+ bind[i] = brw->wm.surf_offset[i];
+ if (brw->wm.surf_bo[i]) {
+ bind[i] = brw->wm.surf_offset[i];
+ } else {
+ bind[i] = 0;
+ }
+ }
+ brw->state.dirty.brw |= BRW_NEW_BINDING_TABLE;
+}
+const struct brw_tracked_state brw_wm_binding_table = {
+ .dirty = {
+ .mesa = 0,
+ .brw = (BRW_NEW_BATCH |
+ BRW_NEW_WM_SURFACES),
+ .cache = 0
+ },
+ .emit = brw_wm_upload_binding_table,
+};
diff --git a/src/mesa/drivers/dri/i965/gen6_scissor_state.c b/src/mesa/drivers/dri/i965/gen6_scissor_state.c
index 2e21e5f7335..34a9dc234c2 100644
--- a/src/mesa/drivers/dri/i965/gen6_scissor_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_scissor_state.c
@@ -63,8 +63,7 @@ prepare_scissor_state(struct brw_context *brw)
drm_intel_bo_unreference(brw->sf.state_bo);
brw->sf.state_bo = brw_cache_data(&brw->cache, BRW_SF_UNIT,
- &scissor, sizeof(scissor),
- NULL, 0);
+ &scissor, sizeof(scissor));
}
const struct brw_tracked_state gen6_scissor_state = {
diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c
index 8d96b44f1dc..51940efb443 100644
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -87,7 +87,7 @@ upload_sf_state(struct brw_context *brw)
if (ctx->Polygon.CullFlag) {
switch (ctx->Polygon.CullFaceMode) {
case GL_FRONT:
- dw3 |= GEN6_SF_CULL_BOTH;
+ dw3 |= GEN6_SF_CULL_FRONT;
break;
case GL_BACK:
dw3 |= GEN6_SF_CULL_BACK;
diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c
index 0c2aa4206c6..301c68e7f9e 100644
--- a/src/mesa/drivers/dri/i965/gen6_viewport_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c
@@ -29,7 +29,6 @@
#include "brw_state.h"
#include "brw_defines.h"
#include "intel_batchbuffer.h"
-#include "main/macros.h"
/* The clip VP defines the guardband region where expensive clipping is skipped
* and fragments are allowed to be generated and clipped out cheaply by the SF.
@@ -51,8 +50,7 @@ prepare_clip_vp(struct brw_context *brw)
drm_intel_bo_unreference(brw->clip.vp_bo);
brw->clip.vp_bo = brw_cache_data(&brw->cache, BRW_CLIP_VP,
- &vp, sizeof(vp),
- NULL, 0);
+ &vp, sizeof(vp));
}
const struct brw_tracked_state gen6_clip_vp = {
@@ -95,8 +93,7 @@ prepare_sf_vp(struct brw_context *brw)
drm_intel_bo_unreference(brw->sf.vp_bo);
brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP,
- &sfv, sizeof(sfv),
- NULL, 0);
+ &sfv, sizeof(sfv));
}
const struct brw_tracked_state gen6_sf_vp = {
@@ -108,36 +105,6 @@ const struct brw_tracked_state gen6_sf_vp = {
.prepare = prepare_sf_vp,
};
-static void
-prepare_cc_vp(struct brw_context *brw)
-{
- GLcontext *ctx = &brw->intel.ctx;
- struct brw_cc_viewport ccv;
-
- /* _NEW_TRANSOFORM */
- if (ctx->Transform.DepthClamp) {
- /* _NEW_VIEWPORT */
- ccv.min_depth = MIN2(ctx->Viewport.Near, ctx->Viewport.Far);
- ccv.max_depth = MAX2(ctx->Viewport.Near, ctx->Viewport.Far);
- } else {
- ccv.min_depth = 0.0;
- ccv.max_depth = 1.0;
- }
-
- drm_intel_bo_unreference(brw->cc.vp_bo);
- brw->cc.vp_bo = brw_cache_data(&brw->cache, BRW_CC_VP, &ccv, sizeof(ccv),
- NULL, 0);
-}
-
-const struct brw_tracked_state gen6_cc_vp = {
- .dirty = {
- .mesa = _NEW_VIEWPORT | _NEW_TRANSFORM,
- .brw = 0,
- .cache = 0,
- },
- .prepare = prepare_cc_vp,
-};
-
static void prepare_viewport_state_pointers(struct brw_context *brw)
{
brw_add_validated_bo(brw, brw->sf.state_bo);
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 325f6b43d30..863c85449d9 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -98,7 +98,8 @@ upload_wm_state(struct brw_context *brw)
/* CACHE_NEW_SAMPLER */
dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN6_WM_SAMPLER_COUNT_SHIFT;
- dw4 |= (1 << GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
+ dw4 |= (brw->wm.prog_data->first_curbe_grf <<
+ GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT;
dw5 |= GEN6_WM_DISPATCH_ENABLE;
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index 1116bccd8e7..698445c5268 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -49,6 +49,7 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch)
batch->ptr = batch->map;
batch->reserved_space = BATCH_RESERVED;
batch->dirty_state = ~0;
+ batch->state_batch_offset = batch->size;
}
struct intel_batchbuffer *
@@ -84,6 +85,12 @@ do_flush_locked(struct intel_batchbuffer *batch, GLuint used)
int x_off = 0, y_off = 0;
drm_intel_bo_subdata(batch->buf, 0, used, batch->buffer);
+ if (batch->state_batch_offset != batch->size) {
+ drm_intel_bo_subdata(batch->buf,
+ batch->state_batch_offset,
+ batch->size - batch->state_batch_offset,
+ batch->buffer + batch->state_batch_offset);
+ }
batch->ptr = NULL;
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
index f4ac1825cd1..ae53f455117 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
@@ -23,6 +23,7 @@ struct intel_batchbuffer
GLubyte *ptr;
GLuint size;
+ uint32_t state_batch_offset;
#ifdef DEBUG
/** Tracking of BEGIN_BATCH()/OUT_BATCH()/ADVANCE_BATCH() debugging */
@@ -92,7 +93,8 @@ static INLINE uint32_t float_as_int(float f)
static INLINE GLint
intel_batchbuffer_space(struct intel_batchbuffer *batch)
{
- return (batch->size - batch->reserved_space) - (batch->ptr - batch->map);
+ return (batch->state_batch_offset - batch->reserved_space) -
+ (batch->ptr - batch->map);
}
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index dec47974f2a..5f2035d79c9 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -827,8 +827,6 @@ intelDestroyContext(__DRIcontext * driContextPriv)
assert(intel); /* should never be null */
if (intel) {
- GLboolean release_texture_heaps;
-
INTEL_FIREVERTICES(intel);
_mesa_meta_free(&intel->ctx);
@@ -837,7 +835,6 @@ intelDestroyContext(__DRIcontext * driContextPriv)
intel->vtbl.destroy(intel);
- release_texture_heaps = (intel->ctx.Shared->RefCount == 1);
_swsetup_DestroyContext(&intel->ctx);
_tnl_DestroyContext(&intel->ctx);
_vbo_DestroyContext(&intel->ctx);
@@ -855,18 +852,6 @@ intelDestroyContext(__DRIcontext * driContextPriv)
drm_intel_bo_unreference(intel->first_post_swapbuffers_batch);
intel->first_post_swapbuffers_batch = NULL;
- if (release_texture_heaps) {
- /* Nothing is currently done here to free texture heaps;
- * but we're not using the texture heap utilities, so I
- * rather think we shouldn't. I've taken a look, and can't
- * find any private texture data hanging around anywhere, but
- * I'm not yet certain there isn't any at all...
- */
- /* if (INTEL_DEBUG & DEBUG_TEXTURE)
- fprintf(stderr, "do something to free texture heaps\n");
- */
- }
-
driDestroyOptionCache(&intel->optionCache);
/* free the Mesa context */
diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
index 14ff4a96950..c7ac2de01e6 100644
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -261,6 +261,8 @@ extern char *__progname;
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
#define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1))
+#define ROUND_DOWN_TO(value, alignment) (ALIGN(value - alignment - 1, \
+ alignment))
#define IS_POWER_OF_TWO(val) (((val) & (val - 1)) == 0)
static INLINE uint32_t
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 8f61f1f5b24..4a83886fc16 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -42,9 +42,6 @@
#include "intel_fbo.h"
#include "intel_mipmap_tree.h"
#include "intel_regions.h"
-#ifndef I915
-#include "brw_state.h"
-#endif
#define FILE_DEBUG_FLAG DEBUG_FBO
@@ -296,12 +293,6 @@ intel_renderbuffer_set_region(struct intel_context *intel,
old = rb->region;
rb->region = NULL;
intel_region_reference(&rb->region, region);
-#ifndef I915
- if (old) {
- brw_state_cache_bo_delete(&brw_context(&intel->ctx)->surface_cache,
- old->buffer);
- }
-#endif
intel_region_release(&old);
}
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index 71ef7a8e39b..39ac0205fa1 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -29,9 +29,6 @@
#include "intel_mipmap_tree.h"
#include "intel_regions.h"
#include "intel_tex_layout.h"
-#ifndef I915
-#include "brw_state.h"
-#endif
#include "main/enums.h"
#define FILE_DEBUG_FLAG DEBUG_MIPTREE
@@ -203,19 +200,6 @@ intel_miptree_release(struct intel_context *intel,
DBG("%s deleting %p\n", __FUNCTION__, *mt);
-#ifndef I915
- /* Free up cached binding tables holding a reference on our buffer, to
- * avoid excessive memory consumption.
- *
- * This isn't as aggressive as we could be, as we'd like to do
- * it from any time we free the last ref on a region. But intel_region.c
- * is context-agnostic. Perhaps our constant state cache should be, as
- * well.
- */
- brw_state_cache_bo_delete(&brw_context(&intel->ctx)->surface_cache,
- (*mt)->region->buffer);
-#endif
-
intel_region_release(&((*mt)->region));
for (i = 0; i < MAX_TEXTURE_LEVELS; i++) {
diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c
index c30552c5a79..fb840c1020d 100644
--- a/src/mesa/drivers/dri/intel/intel_span.c
+++ b/src/mesa/drivers/dri/intel/intel_span.c
@@ -257,6 +257,8 @@ intelSpanRenderStart(GLcontext * ctx)
for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
if (ctx->Texture.Unit[i]._ReallyEnabled) {
struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current;
+
+ intel_finalize_mipmap_tree(intel, i);
intel_tex_map_images(intel, intel_texture_object(texObj));
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile
index 34d22b45591..ff3801dc676 100644
--- a/src/mesa/drivers/dri/r300/compiler/Makefile
+++ b/src/mesa/drivers/dri/r300/compiler/Makefile
@@ -9,6 +9,7 @@ C_SOURCES = \
radeon_code.c \
radeon_compiler.c \
radeon_emulate_branches.c \
+ radeon_emulate_loops.c \
radeon_program.c \
radeon_program_print.c \
radeon_opcodes.c \
diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript
index 663926e3216..50d9cdb7f2d 100755
--- a/src/mesa/drivers/dri/r300/compiler/SConscript
+++ b/src/mesa/drivers/dri/r300/compiler/SConscript
@@ -23,6 +23,7 @@ r300compiler = env.ConvenienceLibrary(
'radeon_pair_regalloc.c',
'radeon_optimize.c',
'radeon_emulate_branches.c',
+ 'radeon_emulate_loops.c',
'radeon_dataflow.c',
'radeon_dataflow_deadcode.c',
'radeon_dataflow_swizzles.c',
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index 7f3b88ed759..bbdfa0d56f9 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -26,6 +26,7 @@
#include "radeon_dataflow.h"
#include "radeon_emulate_branches.h"
+#include "radeon_emulate_loops.h"
#include "radeon_program_alu.h"
#include "radeon_program_tex.h"
#include "r300_fragprog.h"
@@ -103,6 +104,14 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
/* XXX Ideally this should be done only for r3xx, but since
* we don't have branching support for r5xx, we use the emulation
* on all chipsets. */
+
+ if (c->Base.is_r500) {
+ rc_emulate_loops(&c->Base, R500_PFS_MAX_INST);
+ } else {
+ rc_emulate_loops(&c->Base, R300_PFS_MAX_ALU_INST);
+ }
+ debug_program_log(c, "after emulate loops");
+
rc_emulate_branches(&c->Base);
debug_program_log(c, "after emulate branches");
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index 507b2e532fe..e984797e2d3 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -30,6 +30,7 @@
#include "radeon_program_alu.h"
#include "radeon_swizzle.h"
#include "radeon_emulate_branches.h"
+#include "radeon_emulate_loops.h"
/*
* Take an already-setup and valid source then swizzle it appropriately to
@@ -348,7 +349,8 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
if (!valid_dst(compiler->code, &vpi->DstReg))
continue;
- if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) {
+ if (compiler->code->length >= R500_VS_MAX_ALU_DWORDS ||
+ (compiler->code->length >= R300_VS_MAX_ALU_DWORDS && !compiler->Base.is_r500)) {
rc_error(&compiler->Base, "Vertex program has too many instructions\n");
return;
}
@@ -404,7 +406,7 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
{
struct rc_instruction *inst;
unsigned int num_orig_temps = 0;
- char hwtemps[VSF_MAX_FRAGMENT_TEMPS];
+ char hwtemps[R300_VS_MAX_TEMPS];
struct temporary_allocation * ta;
unsigned int i, j;
@@ -463,11 +465,11 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
unsigned int orig = inst->U.I.DstReg.Index;
if (!ta[orig].Allocated) {
- for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) {
+ for(j = 0; j < R300_VS_MAX_TEMPS; ++j) {
if (!hwtemps[j])
break;
}
- if (j >= VSF_MAX_FRAGMENT_TEMPS) {
+ if (j >= R300_VS_MAX_TEMPS) {
fprintf(stderr, "Out of hw temporaries\n");
} else {
ta[orig].Allocated = 1;
@@ -600,6 +602,13 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
/* XXX Ideally this should be done only for r3xx, but since
* we don't have branching support for r5xx, we use the emulation
* on all chipsets. */
+ if (compiler->Base.is_r500){
+ rc_emulate_loops(&compiler->Base, R500_VS_MAX_ALU);
+ } else {
+ rc_emulate_loops(&compiler->Base, R300_VS_MAX_ALU);
+ }
+ debug_program_log(compiler, "after emulate loops");
+
rc_emulate_branches(&compiler->Base);
debug_program_log(compiler, "after emulate branches");
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index 1979e7e4e49..d03689763bc 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -235,8 +235,11 @@ struct rX00_fragment_program_code {
};
-#define VSF_MAX_FRAGMENT_LENGTH (255*4)
-#define VSF_MAX_FRAGMENT_TEMPS (14)
+#define R300_VS_MAX_ALU 256
+#define R300_VS_MAX_ALU_DWORDS (R300_VS_MAX_ALU * 4)
+#define R500_VS_MAX_ALU 1024
+#define R500_VS_MAX_ALU_DWORDS (R500_VS_MAX_ALU * 4)
+#define R300_VS_MAX_TEMPS 32
#define VSF_MAX_INPUTS 32
#define VSF_MAX_OUTPUTS 32
@@ -244,8 +247,8 @@ struct rX00_fragment_program_code {
struct r300_vertex_program_code {
int length;
union {
- uint32_t d[VSF_MAX_FRAGMENT_LENGTH];
- float f[VSF_MAX_FRAGMENT_LENGTH];
+ uint32_t d[R500_VS_MAX_ALU_DWORDS];
+ float f[R500_VS_MAX_ALU_DWORDS];
} body;
int pos_end;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
new file mode 100644
index 00000000000..4c5d29f4217
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
@@ -0,0 +1,474 @@
+/*
+ * Copyright 2010 Tom Stellard <[email protected]>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ */
+
+#include "radeon_emulate_loops.h"
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+struct emulate_loop_state {
+ struct radeon_compiler * C;
+ struct loop_info * Loops;
+ unsigned int LoopCount;
+ unsigned int LoopReserved;
+};
+
+struct loop_info {
+ struct rc_instruction * BeginLoop;
+ struct rc_instruction * Cond;
+ struct rc_instruction * If;
+ struct rc_instruction * Brk;
+ struct rc_instruction * EndIf;
+ struct rc_instruction * EndLoop;
+};
+
+struct const_value {
+
+ struct radeon_compiler * C;
+ struct rc_src_register * Src;
+ float Value;
+ int HasValue;
+};
+
+struct count_inst {
+ struct radeon_compiler * C;
+ int Index;
+ rc_swizzle Swz;
+ float Amount;
+ int Unknown;
+};
+
+static float get_constant_value(struct radeon_compiler * c,
+ struct rc_src_register * src,
+ int chan)
+{
+ float base = 1.0f;
+ int swz = GET_SWZ(src->Swizzle, chan);
+ if(swz >= 4 || src->Index >= c->Program.Constants.Count ){
+ rc_error(c, "get_constant_value: Can't find a value.\n");
+ return 0.0f;
+ }
+ if(GET_BIT(src->Negate, chan)){
+ base = -1.0f;
+ }
+ return base *
+ c->Program.Constants.Constants[src->Index].u.Immediate[swz];
+}
+
+static int src_reg_is_immediate(struct rc_src_register * src,
+ struct radeon_compiler * c)
+{
+ return src->File == RC_FILE_CONSTANT &&
+ c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE;
+}
+
+static unsigned int loop_count_instructions(struct loop_info * loop)
+{
+ unsigned int count = 0;
+ struct rc_instruction * inst = loop->BeginLoop->Next;
+ while(inst != loop->EndLoop){
+ count++;
+ inst = inst->Next;
+ }
+ return count;
+}
+
+static unsigned int loop_calc_iterations(struct loop_info * loop,
+ unsigned int loop_count, unsigned int max_instructions)
+{
+ unsigned int icount = loop_count_instructions(loop);
+ return max_instructions / (loop_count * icount);
+}
+
+static void loop_unroll(struct emulate_loop_state * s,
+ struct loop_info *loop, unsigned int iterations)
+{
+ unsigned int i;
+ struct rc_instruction * ptr;
+ struct rc_instruction * first = loop->BeginLoop->Next;
+ struct rc_instruction * last = loop->EndLoop->Prev;
+ struct rc_instruction * append_to = last;
+ rc_remove_instruction(loop->BeginLoop);
+ rc_remove_instruction(loop->EndLoop);
+ for( i = 1; i < iterations; i++){
+ for(ptr = first; ptr != last->Next; ptr = ptr->Next){
+ struct rc_instruction *new = rc_alloc_instruction(s->C);
+ memcpy(new, ptr, sizeof(struct rc_instruction));
+ rc_insert_instruction(append_to, new);
+ append_to = new;
+ }
+ }
+}
+
+
+static void update_const_value(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ struct const_value * value = data;
+ if(value->Src->File != file ||
+ value->Src->Index != index ||
+ !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){
+ return;
+ }
+ switch(inst->U.I.Opcode){
+ case RC_OPCODE_MOV:
+ if(!src_reg_is_immediate(&inst->U.I.SrcReg[0], value->C)){
+ return;
+ }
+ value->HasValue = 1;
+ value->Value =
+ get_constant_value(value->C, &inst->U.I.SrcReg[0], 0);
+ break;
+ }
+}
+
+static void get_incr_amount(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ struct count_inst * count_inst = data;
+ int amnt_src_index;
+ const struct rc_opcode_info * opcode;
+ float amount;
+
+ if(file != RC_FILE_TEMPORARY ||
+ count_inst->Index != index ||
+ (1 << GET_SWZ(count_inst->Swz,0) != mask)){
+ return;
+ }
+ /* Find the index of the counter register. */
+ opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ if(opcode->NumSrcRegs != 2){
+ count_inst->Unknown = 1;
+ return;
+ }
+ if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY &&
+ inst->U.I.SrcReg[0].Index == count_inst->Index &&
+ inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){
+ amnt_src_index = 1;
+ } else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY &&
+ inst->U.I.SrcReg[1].Index == count_inst->Index &&
+ inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){
+ amnt_src_index = 0;
+ }
+ else{
+ count_inst->Unknown = 1;
+ return;
+ }
+ if(src_reg_is_immediate(&inst->U.I.SrcReg[amnt_src_index],
+ count_inst->C)){
+ amount = get_constant_value(count_inst->C,
+ &inst->U.I.SrcReg[amnt_src_index], 0);
+ }
+ else{
+ count_inst->Unknown = 1 ;
+ return;
+ }
+ switch(inst->U.I.Opcode){
+ case RC_OPCODE_ADD:
+ count_inst->Amount += amount;
+ break;
+ case RC_OPCODE_SUB:
+ if(amnt_src_index == 0){
+ count_inst->Unknown = 0;
+ return;
+ }
+ count_inst->Amount -= amount;
+ break;
+ default:
+ count_inst->Unknown = 1;
+ return;
+ }
+
+}
+
+static int transform_const_loop(struct emulate_loop_state * s,
+ struct loop_info * loop,
+ struct rc_instruction * cond)
+{
+ int end_loops = 1;
+ int iterations;
+ struct count_inst count_inst;
+ float limit_value;
+ struct rc_src_register * counter;
+ struct rc_src_register * limit;
+ struct const_value counter_value;
+ struct rc_instruction * inst;
+
+ /* Find the counter and the upper limit */
+
+ if(src_reg_is_immediate(&cond->U.I.SrcReg[0], s->C)){
+ limit = &cond->U.I.SrcReg[0];
+ counter = &cond->U.I.SrcReg[1];
+ }
+ else if(src_reg_is_immediate(&cond->U.I.SrcReg[1], s->C)){
+ limit = &cond->U.I.SrcReg[1];
+ counter = &cond->U.I.SrcReg[0];
+ }
+ else{
+ DBG("No constant limit.\n");
+ return 0;
+ }
+
+ /* Find the initial value of the counter */
+ counter_value.Src = counter;
+ counter_value.Value = 0.0f;
+ counter_value.HasValue = 0;
+ counter_value.C = s->C;
+ for(inst = s->C->Program.Instructions.Next; inst != loop->BeginLoop;
+ inst = inst->Next){
+ rc_for_all_writes_mask(inst, update_const_value, &counter_value);
+ }
+ if(!counter_value.HasValue){
+ DBG("Initial counter value cannot be determined.\n");
+ return 0;
+ }
+ DBG("Initial counter value is %f\n", counter_value.Value);
+ /* Determine how the counter is modified each loop */
+ count_inst.C = s->C;
+ count_inst.Index = counter->Index;
+ count_inst.Swz = counter->Swizzle;
+ count_inst.Amount = 0.0f;
+ count_inst.Unknown = 0;
+ for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){
+ switch(inst->U.I.Opcode){
+ /* XXX In the future we might want to try to unroll nested
+ * loops here.*/
+ case RC_OPCODE_BGNLOOP:
+ end_loops++;
+ break;
+ case RC_OPCODE_ENDLOOP:
+ loop->EndLoop = inst;
+ end_loops--;
+ break;
+ /* XXX Check if the counter is modified within an if statement.
+ */
+ case RC_OPCODE_IF:
+ break;
+ default:
+ rc_for_all_writes_mask(inst, get_incr_amount, &count_inst);
+ if(count_inst.Unknown){
+ return 0;
+ }
+ break;
+ }
+ }
+ /* Infinite loop */
+ if(count_inst.Amount == 0.0f){
+ return 0;
+ }
+ DBG("Counter is increased by %f each iteration.\n", count_inst.Amount);
+ /* Calculate the number of iterations of this loop. Keeping this
+ * simple, since we only support increment and decrement loops.
+ */
+ limit_value = get_constant_value(s->C, limit, 0);
+ iterations = (int) ((limit_value - counter_value.Value) /
+ count_inst.Amount);
+
+ DBG("Loop will have %d iterations.\n", iterations);
+
+ /* Prepare loop for unrolling */
+ rc_remove_instruction(loop->Cond);
+ rc_remove_instruction(loop->If);
+ rc_remove_instruction(loop->Brk);
+ rc_remove_instruction(loop->EndIf);
+
+ loop_unroll(s, loop, iterations);
+ loop->EndLoop = NULL;
+ return 1;
+}
+
+/**
+ * This function prepares a loop to be unrolled by converting it into an if
+ * statement. Here is an outline of the conversion process:
+ * BGNLOOP; -> BGNLOOP;
+ * <Additional conditional code> -> <Additional conditional code>
+ * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2];
+ * IF temp[0]; -> IF temp[0];
+ * BRK; ->
+ * ENDIF; -> <Loop Body>
+ * <Loop Body> -> ENDIF;
+ * ENDLOOP; -> ENDLOOP
+ *
+ * @param inst A pointer to a BGNLOOP instruction.
+ * @return If the loop can be unrolled, a pointer to the first instruction of
+ * the unrolled loop.
+ * Otherwise, A pointer to the ENDLOOP instruction.
+ * Null if there is an error.
+ */
+static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
+ struct rc_instruction * inst)
+{
+ struct loop_info *loop;
+ struct rc_instruction * ptr;
+
+ memory_pool_array_reserve(&s->C->Pool, struct loop_info,
+ s->Loops, s->LoopCount, s->LoopReserved, 1);
+
+ loop = &s->Loops[s->LoopCount++];
+ memset(loop, 0, sizeof(struct loop_info));
+ if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){
+ rc_error(s->C, "expected BGNLOOP\n", __FUNCTION__);
+ return NULL;
+ }
+ loop->BeginLoop = inst;
+
+ for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next){
+ switch(ptr->U.I.Opcode){
+ case RC_OPCODE_BGNLOOP:
+ /* Nested loop */
+ ptr = transform_loop(s, ptr);
+ if(!ptr){
+ return NULL;
+ }
+ break;
+ case RC_OPCODE_BRK:
+ loop->Brk = ptr;
+ if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF){
+ rc_error(s->C,
+ "%s: expected ENDIF\n",__FUNCTION__);
+ return NULL;
+ }
+ loop->EndIf = ptr->Next;
+ if(ptr->Prev->U.I.Opcode != RC_OPCODE_IF){
+ rc_error(s->C,
+ "%s: expected IF\n", __FUNCTION__);
+ return NULL;
+ }
+ loop->If = ptr->Prev;
+ switch(loop->If->Prev->U.I.Opcode){
+ case RC_OPCODE_SLT:
+ case RC_OPCODE_SGE:
+ case RC_OPCODE_SGT:
+ case RC_OPCODE_SLE:
+ case RC_OPCODE_SEQ:
+ case RC_OPCODE_SNE:
+ break;
+ default:
+ rc_error(s->C, "%s expected conditional\n",
+ __FUNCTION__);
+ return NULL;
+ }
+ loop->Cond = loop->If->Prev;
+ ptr = loop->EndIf;
+ break;
+ case RC_OPCODE_ENDLOOP:
+ loop->EndLoop = ptr;
+ break;
+ }
+ }
+ /* Reverse the conditional instruction */
+ switch(loop->Cond->U.I.Opcode){
+ case RC_OPCODE_SGE:
+ loop->Cond->U.I.Opcode = RC_OPCODE_SLT;
+ break;
+ case RC_OPCODE_SLT:
+ loop->Cond->U.I.Opcode = RC_OPCODE_SGE;
+ break;
+ case RC_OPCODE_SLE:
+ loop->Cond->U.I.Opcode = RC_OPCODE_SGT;
+ break;
+ case RC_OPCODE_SGT:
+ loop->Cond->U.I.Opcode = RC_OPCODE_SLE;
+ break;
+ case RC_OPCODE_SEQ:
+ loop->Cond->U.I.Opcode = RC_OPCODE_SNE;
+ break;
+ case RC_OPCODE_SNE:
+ loop->Cond->U.I.Opcode = RC_OPCODE_SEQ;
+ break;
+ default:
+ rc_error(s->C, "loop->Cond is not a conditional.\n");
+ return NULL;
+ }
+
+ /* Check if the number of loops is known at compile time. */
+ if(transform_const_loop(s, loop, ptr)){
+ return loop->BeginLoop->Next;
+ }
+
+ /* Prepare the loop to be unrolled */
+ rc_remove_instruction(loop->Brk);
+ rc_remove_instruction(loop->EndIf);
+ rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf);
+ return loop->EndLoop;
+}
+
+static void rc_transform_loops(struct emulate_loop_state * s)
+{
+ struct rc_instruction * ptr = s->C->Program.Instructions.Next;
+ while(ptr != &s->C->Program.Instructions) {
+ if(ptr->Type == RC_INSTRUCTION_NORMAL &&
+ ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
+ ptr = transform_loop(s, ptr);
+ if(!ptr){
+ return;
+ }
+ }
+ ptr = ptr->Next;
+ }
+}
+
+static void rc_unroll_loops(struct emulate_loop_state *s,
+ unsigned int max_instructions)
+{
+ int i;
+ /* Iterate backwards of the list of loops so that loops that nested
+ * loops are unrolled first.
+ */
+ for( i = s->LoopCount - 1; i >= 0; i-- ){
+ if(!s->Loops[i].EndLoop){
+ continue;
+ }
+ unsigned int iterations = loop_calc_iterations(&s->Loops[i],
+ s->LoopCount, max_instructions);
+ loop_unroll(s, &s->Loops[i], iterations);
+ }
+}
+
+void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions)
+{
+ struct emulate_loop_state s;
+
+ memset(&s, 0, sizeof(struct emulate_loop_state));
+ s.C = c;
+
+ /* We may need to move these two operations to r3xx_(vert|frag)prog.c
+ * and run the optimization passes between them in order to increase
+ * the number of unrolls we can do for each loop.
+ */
+ rc_transform_loops(&s);
+
+ rc_unroll_loops(&s, max_instructions);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
new file mode 100644
index 00000000000..ddcf1c0fabe
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
@@ -0,0 +1,12 @@
+
+
+#ifndef RADEON_EMULATE_LOOPS_H
+#define RADEON_EMULATE_LOOPS_H
+
+#define MAX_ITERATIONS 8
+
+struct radeon_compiler;
+
+void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions);
+
+#endif /* RADEON_EMULATE_LOOPS_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
index d593b3e81ae..1dc16855dc1 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
@@ -368,6 +368,24 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.NumSrcRegs = 0
},
{
+ .Opcode = RC_OPCODE_BGNLOOP,
+ .Name = "BGNLOOP",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 0
+ },
+ {
+ .Opcode = RC_OPCODE_BRK,
+ .Name = "BRK",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 0
+ },
+ {
+ .Opcode = RC_OPCODE_ENDLOOP,
+ .Name = "ENDLOOP",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 0,
+ },
+ {
.Opcode = RC_OPCODE_REPL_ALPHA,
.Name = "REPL_ALPHA",
.HasDstReg = 1
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
index 87a2e23084c..91c82ac0890 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
@@ -180,6 +180,12 @@ typedef enum {
/** branch instruction: has no effect */
RC_OPCODE_ENDIF,
+
+ RC_OPCODE_BGNLOOP,
+
+ RC_OPCODE_BRK,
+
+ RC_OPCODE_ENDLOOP,
/** special instruction, used in R300-R500 fragment program pair instructions
* indicates that the result of the alpha operation shall be replicated
diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
index 6992ca59dbf..e4b302bbad9 100644
--- a/src/mesa/drivers/dri/r300/r300_context.c
+++ b/src/mesa/drivers/dri/r300/r300_context.c
@@ -376,13 +376,12 @@ static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen)
ctx->Const.MaxDrawBuffers = 1;
ctx->Const.MaxColorAttachments = 1;
- /* currently bogus data */
if (r300->options.hw_tcl_enabled) {
- ctx->Const.VertexProgram.MaxNativeInstructions = VSF_MAX_FRAGMENT_LENGTH / 4;
- ctx->Const.VertexProgram.MaxNativeAluInstructions = VSF_MAX_FRAGMENT_LENGTH / 4;
- ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */
+ ctx->Const.VertexProgram.MaxNativeInstructions = 255;
+ ctx->Const.VertexProgram.MaxNativeAluInstructions = 255;
+ ctx->Const.VertexProgram.MaxNativeAttribs = 16;
ctx->Const.VertexProgram.MaxNativeTemps = 32;
- ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */
+ ctx->Const.VertexProgram.MaxNativeParameters = 256;
ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
}
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c
index 61133e686f1..88d6b06df56 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.c
+++ b/src/mesa/drivers/dri/r600/r700_assembler.c
@@ -6159,7 +6159,7 @@ GLboolean callPreSub(r700_AssemblerBase* pAsm,
}
if(uNumValidSrc > 0)
{
- prelude_cf_ptr = pAsm->cf_current_alu_clause_ptr;
+ prelude_cf_ptr = (R700ControlFlowGenericClause*) pAsm->cf_current_alu_clause_ptr;
pAsm->alu_x_opcode = SQ_CF_INST_ALU;
}
@@ -6279,7 +6279,7 @@ GLboolean callPreSub(r700_AssemblerBase* pAsm,
next_ins(pAsm);
- pAsm->callers[pAsm->unCallerArrayPointer - 1].finale_cf_ptr = pAsm->cf_current_alu_clause_ptr;
+ pAsm->callers[pAsm->unCallerArrayPointer - 1].finale_cf_ptr = (R700ControlFlowGenericClause*) pAsm->cf_current_alu_clause_ptr;
pAsm->callers[pAsm->unCallerArrayPointer - 1].prelude_cf_ptr = prelude_cf_ptr;
pAsm->alu_x_opcode = SQ_CF_INST_ALU;
}
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c
index 5a90f729e68..aab1a7947ab 100644
--- a/src/mesa/drivers/dri/r600/r700_fragprog.c
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.c
@@ -563,11 +563,15 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
/* see if we need any point_sprite replacements, also increase num_interp
* as there's no vp output for them */
- for (i = FRAG_ATTRIB_TEX0; i<= FRAG_ATTRIB_TEX7; i++)
+ if (ctx->Point.PointSprite)
{
- if(ctx->Point.CoordReplace[i - FRAG_ATTRIB_TEX0] == GL_TRUE) {
- ui++;
- point_sprite = GL_TRUE;
+ for (i = FRAG_ATTRIB_TEX0; i<= FRAG_ATTRIB_TEX7; i++)
+ {
+ if (ctx->Point.CoordReplace[i - FRAG_ATTRIB_TEX0] == GL_TRUE)
+ {
+ ui++;
+ point_sprite = GL_TRUE;
+ }
}
}
@@ -670,8 +674,9 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
for(i=0; i<8; i++)
{
+ GLboolean coord_replace = ctx->Point.PointSprite && ctx->Point.CoordReplace[i];
unBit = 1 << (VERT_RESULT_TEX0 + i);
- if((OutputsWritten & unBit) || (ctx->Point.CoordReplace[i] == GL_TRUE))
+ if ((OutputsWritten & unBit) || coord_replace)
{
ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i];
SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
@@ -679,7 +684,7 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
SEMANTIC_shift, SEMANTIC_mask);
CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
/* ARB_point_sprite */
- if(ctx->Point.CoordReplace[i] == GL_TRUE)
+ if (coord_replace)
{
SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit);
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c
index bcac125baf4..d2b190e42e0 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texture.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.c
@@ -593,12 +593,7 @@ static int image_matches_texture_obj(struct gl_texture_object *texObj,
if (!baseImage)
return 0;
- /* Check image level against object BaseLevel, but not MaxLevel. MaxLevel is not
- * the highest level that can be assigned to the miptree.
- */
- const unsigned maxLevel = texObj->BaseLevel + baseImage->MaxLog2;
- if (level < texObj->BaseLevel || level > maxLevel
- || level > RADEON_MIPTREE_MAX_TEXTURE_LEVELS)
+ if (level < texObj->BaseLevel || level > texObj->MaxLevel)
return 0;
const unsigned levelDiff = level - texObj->BaseLevel;
@@ -620,7 +615,9 @@ static void teximage_assign_miptree(radeonContextPtr rmesa,
radeonTexObj *t = radeon_tex_obj(texObj);
radeon_texture_image* image = get_radeon_texture_image(texImage);
- /* check image for dimension and level compatibility with texture */
+ /* Since miptree holds only images for levels <BaseLevel..MaxLevel>
+ * don't allocate the miptree if the teximage won't fit.
+ */
if (!image_matches_texture_obj(texObj, texImage, level))
return;
diff --git a/src/mesa/drivers/osmesa/Makefile b/src/mesa/drivers/osmesa/Makefile
index ea49a896590..c6b4a040851 100644
--- a/src/mesa/drivers/osmesa/Makefile
+++ b/src/mesa/drivers/osmesa/Makefile
@@ -20,17 +20,11 @@ INCLUDE_DIRS = \
-I$(TOP)/src/mesa \
-I$(TOP)/src/mesa/main
-# Standalone osmesa needs to be linked with core Mesa APIs
-ifeq ($(DRIVER_DIRS), osmesa)
CORE_MESA = \
$(TOP)/src/mesa/libmesa.a \
$(TOP)/src/mapi/glapi/libglapi.a \
$(TOP)/src/glsl/cl/libglslcl.a \
$(TOP)/src/glsl/pp/libglslpp.a
-else
-CORE_MESA =
-endif
-
.c.o:
$(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@
diff --git a/src/mesa/main/arbprogram.h b/src/mesa/main/arbprogram.h
index df16513e398..787ffd62f4b 100644
--- a/src/mesa/main/arbprogram.h
+++ b/src/mesa/main/arbprogram.h
@@ -27,6 +27,10 @@
#define ARBPROGRAM_H
+#include "compiler.h"
+#include "glheader.h"
+
+
extern void GLAPIENTRY
_mesa_BindProgram(GLenum target, GLuint id);
diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 1a2e9b1da6f..48b9904642a 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -147,6 +147,8 @@ invalidate_framebuffer(struct gl_framebuffer *fb)
/**
* Given a GL_*_ATTACHMENTn token, return a pointer to the corresponding
* gl_renderbuffer_attachment object.
+ * This function is only used for user-created FB objects, not the
+ * default / window-system FB object.
* If \p attachment is GL_DEPTH_STENCIL_ATTACHMENT, return a pointer to
* the depth buffer attachment point.
*/
@@ -156,6 +158,8 @@ _mesa_get_attachment(GLcontext *ctx, struct gl_framebuffer *fb,
{
GLuint i;
+ assert(fb->Name > 0);
+
switch (attachment) {
case GL_COLOR_ATTACHMENT0_EXT:
case GL_COLOR_ATTACHMENT1_EXT:
@@ -195,6 +199,45 @@ _mesa_get_attachment(GLcontext *ctx, struct gl_framebuffer *fb,
/**
+ * As above, but only used for getting attachments of the default /
+ * window-system framebuffer (not user-created framebuffer objects).
+ */
+static struct gl_renderbuffer_attachment *
+_mesa_get_fb0_attachment(GLcontext *ctx, struct gl_framebuffer *fb,
+ GLenum attachment)
+{
+ assert(fb->Name == 0);
+
+ switch (attachment) {
+ case GL_FRONT_LEFT:
+ return &fb->Attachment[BUFFER_FRONT_LEFT];
+ case GL_FRONT_RIGHT:
+ return &fb->Attachment[BUFFER_FRONT_RIGHT];
+ case GL_BACK_LEFT:
+ return &fb->Attachment[BUFFER_BACK_LEFT];
+ case GL_BACK_RIGHT:
+ return &fb->Attachment[BUFFER_BACK_RIGHT];
+ case GL_AUX0:
+ if (fb->Visual.numAuxBuffers == 1) {
+ return &fb->Attachment[BUFFER_AUX0];
+ }
+ return NULL;
+ case GL_DEPTH_BUFFER:
+ /* fall-through / new in GL 3.0 */
+ case GL_DEPTH_ATTACHMENT_EXT:
+ return &fb->Attachment[BUFFER_DEPTH];
+ case GL_STENCIL_BUFFER:
+ /* fall-through / new in GL 3.0 */
+ case GL_STENCIL_ATTACHMENT_EXT:
+ return &fb->Attachment[BUFFER_STENCIL];
+ default:
+ return NULL;
+ }
+}
+
+
+
+/**
* Remove any texture or renderbuffer attached to the given attachment
* point. Update reference counts, etc.
*/
@@ -1878,12 +1921,14 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment,
}
if (buffer->Name == 0) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetFramebufferAttachmentParameterivEXT");
- return;
+ /* the default / window-system FBO */
+ att = _mesa_get_fb0_attachment(ctx, buffer, attachment);
+ }
+ else {
+ /* user-created framebuffer FBO */
+ att = _mesa_get_attachment(ctx, buffer, attachment);
}
- att = _mesa_get_attachment(ctx, buffer, attachment);
if (att == NULL) {
_mesa_error(ctx, GL_INVALID_ENUM,
"glGetFramebufferAttachmentParameterivEXT(attachment)");
diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c
index 01f84180af7..56558cfcc1e 100644
--- a/src/mesa/main/framebuffer.c
+++ b/src/mesa/main/framebuffer.c
@@ -879,6 +879,7 @@ _mesa_source_buffer_exists(GLcontext *ctx, GLenum format)
return GL_FALSE;
}
ASSERT(_mesa_get_format_bits(ctx->ReadBuffer->_ColorReadBuffer->Format, GL_RED_BITS) > 0 ||
+ _mesa_get_format_bits(ctx->ReadBuffer->_ColorReadBuffer->Format, GL_ALPHA_BITS) > 0 ||
_mesa_get_format_bits(ctx->ReadBuffer->_ColorReadBuffer->Format, GL_INDEX_BITS) > 0);
break;
case GL_DEPTH:
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index 2101b9bc18d..8f7ebeed976 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -1242,8 +1242,6 @@ st_CompressedTexSubImage2D(GLcontext *ctx, GLenum target, GLint level,
assert(xoffset % util_format_get_blockwidth(pformat) == 0);
assert(yoffset % util_format_get_blockheight(pformat) == 0);
- assert(width % util_format_get_blockwidth(pformat) == 0);
- assert(height % util_format_get_blockheight(pformat) == 0);
for (y = 0; y < height; y += util_format_get_blockheight(pformat)) {
/* don't need to adjust for xoffset and yoffset as st_texture_image_map does that */
diff --git a/src/mesa/swrast_setup/ss_triangle.c b/src/mesa/swrast_setup/ss_triangle.c
index bad0d819460..f22bc52f0a8 100644
--- a/src/mesa/swrast_setup/ss_triangle.c
+++ b/src/mesa/swrast_setup/ss_triangle.c
@@ -159,7 +159,7 @@ static void _swsetup_render_tri(GLcontext *ctx,
}
#define SS_COLOR(a,b) UNCLAMPED_FLOAT_TO_RGBA_CHAN(a,b)
-#define SS_SPEC(a,b) UNCLAMPED_FLOAT_TO_RGB_CHAN(a,b)
+#define SS_SPEC(a,b) COPY_4V(a,b)
#define SS_IND(a,b) (a = b)
#define IND (0)