aboutsummaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri')
-rw-r--r--src/mesa/drivers/dri/i915/i830_context.h8
-rw-r--r--src/mesa/drivers/dri/i915/i830_vtbl.c32
-rw-r--r--src/mesa/drivers/dri/i915/i915_context.h8
-rw-r--r--src/mesa/drivers/dri/i915/i915_vtbl.c30
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_line.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_tri.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h22
-rw-r--r--src/mesa/drivers/dri/i965/brw_curbe.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h7
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c55
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw_upload.c20
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h8
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c24
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c88
-rw-r--r--src/mesa/drivers/dri/i965/brw_queryobj.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h19
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_batch.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c52
-rw-r--r--src/mesa/drivers/dri/i965/brw_structs.h4
-rw-r--r--src/mesa/drivers/dri/i965/brw_urb.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c11
-rw-r--r--src/mesa/drivers/dri/i965/brw_vtbl.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.h4
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_glsl.c739
-rw-r--r--src/mesa/drivers/dri/intel/intel_batchbuffer.c30
-rw-r--r--src/mesa/drivers/dri/intel/intel_batchbuffer.h12
-rw-r--r--src/mesa/drivers/dri/intel/intel_blit.c65
-rw-r--r--src/mesa/drivers/dri/intel/intel_buffers.c173
-rw-r--r--src/mesa/drivers/dri/intel/intel_buffers.h7
-rw-r--r--src/mesa/drivers/dri/intel/intel_chipset.h6
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.c35
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.h18
-rw-r--r--src/mesa/drivers/dri/intel/intel_pixel_bitmap.c6
-rw-r--r--src/mesa/drivers/dri/intel/intel_pixel_draw.c53
-rw-r--r--src/mesa/drivers/dri/intel/intel_reg.h5
-rw-r--r--src/mesa/drivers/dri/intel/intel_regions.c43
-rw-r--r--src/mesa/drivers/dri/intel/intel_span.c141
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_screen.c2
40 files changed, 1288 insertions, 475 deletions
diff --git a/src/mesa/drivers/dri/i915/i830_context.h b/src/mesa/drivers/dri/i915/i830_context.h
index a298c1407de..1bdb32049d7 100644
--- a/src/mesa/drivers/dri/i915/i830_context.h
+++ b/src/mesa/drivers/dri/i915/i830_context.h
@@ -57,7 +57,13 @@
#define I830_DESTREG_SR0 7
#define I830_DESTREG_SR1 8
#define I830_DESTREG_SR2 9
-#define I830_DEST_SETUP_SIZE 10
+#define I830_DESTREG_DRAWRECT0 10
+#define I830_DESTREG_DRAWRECT1 11
+#define I830_DESTREG_DRAWRECT2 12
+#define I830_DESTREG_DRAWRECT3 13
+#define I830_DESTREG_DRAWRECT4 14
+#define I830_DESTREG_DRAWRECT5 15
+#define I830_DEST_SETUP_SIZE 16
#define I830_CTXREG_STATE1 0
#define I830_CTXREG_STATE2 1
diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c
index 773a8b4dd01..3b3ff2bceda 100644
--- a/src/mesa/drivers/dri/i915/i830_vtbl.c
+++ b/src/mesa/drivers/dri/i915/i830_vtbl.c
@@ -449,7 +449,8 @@ i830_emit_state(struct intel_context *intel)
aper_array[aper_count++] = intel->batch->buf;
if (dirty & I830_UPLOAD_BUFFERS) {
aper_array[aper_count++] = state->draw_region->buffer;
- aper_array[aper_count++] = state->depth_region->buffer;
+ if (state->depth_region)
+ aper_array[aper_count++] = state->depth_region->buffer;
}
for (i = 0; i < I830_TEX_UNITS; i++)
@@ -512,6 +513,16 @@ i830_emit_state(struct intel_context *intel)
OUT_BATCH(state->Buffer[I830_DESTREG_SR0]);
OUT_BATCH(state->Buffer[I830_DESTREG_SR1]);
OUT_BATCH(state->Buffer[I830_DESTREG_SR2]);
+
+ if (intel->constant_cliprect) {
+ assert(state->Buffer[I830_DESTREG_DRAWRECT0] != MI_NOOP);
+ OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT0]);
+ OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT1]);
+ OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT2]);
+ OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT3]);
+ OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT4]);
+ OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT5]);
+ }
ADVANCE_BATCH();
}
@@ -591,6 +602,7 @@ i830_state_draw_region(struct intel_context *intel,
struct intel_region *depth_region)
{
struct i830_context *i830 = i830_context(&intel->ctx);
+ GLcontext *ctx = &intel->ctx;
GLuint value;
ASSERT(state == &i830->state || state == &i830->meta);
@@ -643,6 +655,24 @@ i830_state_draw_region(struct intel_context *intel,
}
state->Buffer[I830_DESTREG_DV1] = value;
+ if (intel->constant_cliprect) {
+ state->Buffer[I830_DESTREG_DRAWRECT0] = _3DSTATE_DRAWRECT_INFO;
+ state->Buffer[I830_DESTREG_DRAWRECT1] = 0;
+ state->Buffer[I830_DESTREG_DRAWRECT2] = 0; /* xmin, ymin */
+ state->Buffer[I830_DESTREG_DRAWRECT3] =
+ (ctx->DrawBuffer->Width & 0xffff) |
+ (ctx->DrawBuffer->Height << 16);
+ state->Buffer[I830_DESTREG_DRAWRECT4] = 0; /* xoff, yoff */
+ state->Buffer[I830_DESTREG_DRAWRECT5] = 0;
+ } else {
+ state->Buffer[I830_DESTREG_DRAWRECT0] = MI_NOOP;
+ state->Buffer[I830_DESTREG_DRAWRECT1] = MI_NOOP;
+ state->Buffer[I830_DESTREG_DRAWRECT2] = MI_NOOP;
+ state->Buffer[I830_DESTREG_DRAWRECT3] = MI_NOOP;
+ state->Buffer[I830_DESTREG_DRAWRECT4] = MI_NOOP;
+ state->Buffer[I830_DESTREG_DRAWRECT5] = MI_NOOP;
+ }
+
I830_STATECHANGE(i830, I830_UPLOAD_BUFFERS);
diff --git a/src/mesa/drivers/dri/i915/i915_context.h b/src/mesa/drivers/dri/i915/i915_context.h
index a2376e50e15..87bbf5f9271 100644
--- a/src/mesa/drivers/dri/i915/i915_context.h
+++ b/src/mesa/drivers/dri/i915/i915_context.h
@@ -65,7 +65,13 @@
#define I915_DESTREG_SR0 9
#define I915_DESTREG_SR1 10
#define I915_DESTREG_SR2 11
-#define I915_DEST_SETUP_SIZE 12
+#define I915_DESTREG_DRAWRECT0 12
+#define I915_DESTREG_DRAWRECT1 13
+#define I915_DESTREG_DRAWRECT2 14
+#define I915_DESTREG_DRAWRECT3 15
+#define I915_DESTREG_DRAWRECT4 16
+#define I915_DESTREG_DRAWRECT5 17
+#define I915_DEST_SETUP_SIZE 18
#define I915_CTXREG_STATE4 0
#define I915_CTXREG_LI 1
diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c
index 7431a9cf76d..e79c955d64d 100644
--- a/src/mesa/drivers/dri/i915/i915_vtbl.c
+++ b/src/mesa/drivers/dri/i915/i915_vtbl.c
@@ -399,6 +399,17 @@ i915_emit_state(struct intel_context *intel)
OUT_BATCH(state->Buffer[I915_DESTREG_SR0]);
OUT_BATCH(state->Buffer[I915_DESTREG_SR1]);
OUT_BATCH(state->Buffer[I915_DESTREG_SR2]);
+
+ if (intel->constant_cliprect) {
+ assert(state->Buffer[I915_DESTREG_DRAWRECT0] != MI_NOOP);
+ OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT0]);
+ OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT1]);
+ OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT2]);
+ OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT3]);
+ OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT4]);
+ OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT5]);
+ }
+
ADVANCE_BATCH();
}
@@ -521,6 +532,7 @@ i915_state_draw_region(struct intel_context *intel,
struct intel_region *depth_region)
{
struct i915_context *i915 = i915_context(&intel->ctx);
+ GLcontext *ctx = &intel->ctx;
GLuint value;
ASSERT(state == &i915->state || state == &i915->meta);
@@ -573,6 +585,24 @@ i915_state_draw_region(struct intel_context *intel,
}
state->Buffer[I915_DESTREG_DV1] = value;
+ if (intel->constant_cliprect) {
+ state->Buffer[I915_DESTREG_DRAWRECT0] = _3DSTATE_DRAWRECT_INFO;
+ state->Buffer[I915_DESTREG_DRAWRECT1] = 0;
+ state->Buffer[I915_DESTREG_DRAWRECT2] = 0; /* xmin, ymin */
+ state->Buffer[I915_DESTREG_DRAWRECT3] =
+ (ctx->DrawBuffer->Width & 0xffff) |
+ (ctx->DrawBuffer->Height << 16);
+ state->Buffer[I915_DESTREG_DRAWRECT4] = 0; /* xoff, yoff */
+ state->Buffer[I915_DESTREG_DRAWRECT5] = 0;
+ } else {
+ state->Buffer[I915_DESTREG_DRAWRECT0] = MI_NOOP;
+ state->Buffer[I915_DESTREG_DRAWRECT1] = MI_NOOP;
+ state->Buffer[I915_DESTREG_DRAWRECT2] = MI_NOOP;
+ state->Buffer[I915_DESTREG_DRAWRECT3] = MI_NOOP;
+ state->Buffer[I915_DESTREG_DRAWRECT4] = MI_NOOP;
+ state->Buffer[I915_DESTREG_DRAWRECT5] = MI_NOOP;
+ }
+
I915_STATECHANGE(i915, I915_UPLOAD_BUFFERS);
}
diff --git a/src/mesa/drivers/dri/i965/brw_clip_line.c b/src/mesa/drivers/dri/i965/brw_clip_line.c
index c87e5b9a120..c45d48dff8e 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_line.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_line.c
@@ -148,7 +148,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
brw_clip_init_clipmask(c);
/* -ve rhw workaround */
- if (!(BRW_IS_GM45(p->brw) || BRW_IS_G4X(p->brw))) {
+ if (!BRW_IS_G4X(p->brw)) {
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
brw_imm_ud(1<<20));
diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c
index 82d1e873577..740c7cbd109 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_state.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_state.c
@@ -102,7 +102,7 @@ clip_unit_create_from_key(struct brw_context *brw,
clip.clip5.api_mode = BRW_CLIP_API_OGL;
clip.clip5.clip_mode = key->clip_mode;
- if (BRW_IS_GM45(brw) || BRW_IS_G4X(brw))
+ if (BRW_IS_G4X(brw))
clip.clip5.negative_w_clip_test = 1;
clip.clip6.clipper_viewport_state_ptr = 0;
diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c
index 8459b59b460..1dbba37fe7e 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_tri.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c
@@ -526,7 +526,7 @@ void brw_emit_tri_clip( struct brw_clip_compile *c )
/* if -ve rhw workaround bit is set,
do cliptest */
- if (!(BRW_IS_GM45(p->brw) || BRW_IS_G4X(p->brw))) {
+ if (!BRW_IS_G4X(p->brw)) {
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
brw_imm_ud(1<<20));
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 474158b484b..e2bc08a6cb7 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -39,6 +39,7 @@
#include "brw_context.h"
#include "brw_defines.h"
#include "brw_draw.h"
+#include "brw_state.h"
#include "brw_vs.h"
#include "intel_tex.h"
#include "intel_blit.h"
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 1c6a0dede0b..e3904be977f 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -433,7 +433,6 @@ struct brw_context
GLuint primitive;
GLboolean emit_state_always;
- GLboolean wrap;
GLboolean tmp_fallback;
GLboolean no_batch_wrap;
@@ -445,6 +444,19 @@ struct brw_context
GLuint nr_draw_regions;
struct intel_region *draw_regions[MAX_DRAW_BUFFERS];
struct intel_region *depth_region;
+
+ /**
+ * List of buffers accumulated in brw_validate_state to receive
+ * dri_bo_check_aperture treatment before exec, so we can know if we
+ * should flush the batch and try again before emitting primitives.
+ *
+ * This can be a fixed number as we only have a limited number of
+ * objects referenced from the batchbuffer in a primitive emit,
+ * consisting of the vertex buffers, pipelined state pointers,
+ * the CURBE, the depth buffer, and a query BO.
+ */
+ dri_bo *validated_bos[VERT_ATTRIB_MAX + 16];
+ int validated_bo_count;
} state;
struct brw_state_pointers attribs;
@@ -680,14 +692,6 @@ void brw_emit_query_begin(struct brw_context *brw);
void brw_emit_query_end(struct brw_context *brw);
/*======================================================================
- * brw_state.c
- */
-void brw_validate_state( struct brw_context *brw );
-void brw_init_state( struct brw_context *brw );
-void brw_destroy_state( struct brw_context *brw );
-
-
-/*======================================================================
* brw_state_dump.c
*/
void brw_debug_batch(struct intel_context *intel);
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index 7cddd3a7dee..c7bac7b0c52 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -307,6 +307,7 @@ static void prepare_constant_buffer(struct brw_context *brw)
dri_bo_subdata(brw->curbe.curbe_bo, brw->curbe.curbe_offset, bufsz, buf);
}
+ brw_add_validated_bo(brw, brw->curbe.curbe_bo);
/* Because this provokes an action (ie copy the constants into the
* URB), it shouldn't be shortcircuited if identical to the
@@ -328,13 +329,6 @@ static void emit_constant_buffer(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
GLuint sz = brw->curbe.total_size;
- dri_bo *aper_array[] = {
- brw->intel.batch->buf,
- brw->curbe.curbe_bo,
- };
-
- if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array)))
- intel_batchbuffer_flush(intel->batch);
BEGIN_BATCH(2, IGNORE_CLIPRECTS);
if (sz == 0) {
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 0593e8d5f5f..39c32255f8b 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -798,10 +798,9 @@
#include "intel_chipset.h"
-#define BRW_IS_GM45(brw) (IS_GM45_GM((brw)->intel.intelScreen->deviceID))
#define BRW_IS_G4X(brw) (IS_G4X((brw)->intel.intelScreen->deviceID))
-#define CMD_PIPELINE_SELECT(brw) ((BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965)
-#define CMD_VF_STATISTICS(brw) ((BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965)
-#define URB_SIZES(brw) ((BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) ? 384 : 256) /* 512 bit unit */
+#define CMD_PIPELINE_SELECT(brw) (BRW_IS_G4X(brw) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965)
+#define CMD_VF_STATISTICS(brw) (BRW_IS_G4X(brw) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965)
+#define URB_SIZES(brw) (BRW_IS_G4X(brw) ? 384 : 256) /* 512 bit units */
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 6c71b4abcf0..d87b8f8a848 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -256,6 +256,7 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
struct intel_context *intel = intel_context(ctx);
struct brw_context *brw = brw_context(ctx);
GLboolean retval = GL_FALSE;
+ GLboolean warn = GL_FALSE;
GLuint i;
if (ctx->NewState)
@@ -282,30 +283,25 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
LOCK_HARDWARE(intel);
- if (brw->intel.numClipRects == 0) {
+ if (!intel->constant_cliprect && intel->driDrawable->numClipRects == 0) {
UNLOCK_HARDWARE(intel);
return GL_TRUE;
}
+ /* Flush the batch if it's approaching full, so that we don't wrap while
+ * we've got validated state that needs to be in the same batch as the
+ * primitives. This fraction is just a guess (minimal full state plus
+ * a primitive is around 512 bytes), and would be better if we had
+ * an upper bound of how much we might emit in a single
+ * brw_try_draw_prims().
+ */
+ intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4,
+ LOOP_CLIPRECTS);
{
- /* Flush the batch if it's approaching full, so that we don't wrap while
- * we've got validated state that needs to be in the same batch as the
- * primitives. This fraction is just a guess (minimal full state plus
- * a primitive is around 512 bytes), and would be better if we had
- * an upper bound of how much we might emit in a single
- * brw_try_draw_prims().
- */
- if (intel->batch->ptr - intel->batch->map > intel->batch->size * 3 / 4
- /* brw_emit_prim may change the cliprect_mode to LOOP_CLIPRECTS */
- || intel->batch->cliprect_mode != LOOP_CLIPRECTS)
- intel_batchbuffer_flush(intel->batch);
-
/* Set the first primitive early, ahead of validate_state:
*/
brw_set_prim(brw, prim[0].mode);
- /* XXX: Need to separate validate and upload of state.
- */
brw_validate_state( brw );
/* Various fallback checks:
@@ -316,6 +312,31 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
if (check_fallbacks( brw, prim, nr_prims ))
goto out;
+ /* Check that we can fit our state in with our existing batchbuffer, or
+ * flush otherwise.
+ */
+ if (dri_bufmgr_check_aperture_space(brw->state.validated_bos,
+ brw->state.validated_bo_count)) {
+ static GLboolean warned;
+ intel_batchbuffer_flush(intel->batch);
+
+ /* Validate the state after we flushed the batch (which would have
+ * changed the set of dirty state). If we still fail to
+ * check_aperture, warn of what's happening, but attempt to continue
+ * on since it may succeed anyway, and the user would probably rather
+ * see a failure and a warning than a fallback.
+ */
+ brw_validate_state(brw);
+ if (!warned &&
+ dri_bufmgr_check_aperture_space(brw->state.validated_bos,
+ brw->state.validated_bo_count)) {
+ warn = GL_TRUE;
+ warned = GL_TRUE;
+ }
+ }
+
+ brw_upload_state(brw);
+
for (i = 0; i < nr_prims; i++) {
brw_emit_prim(brw, &prim[i]);
}
@@ -326,6 +347,10 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
out:
UNLOCK_HARDWARE(intel);
+ if (warn)
+ fprintf(stderr, "i965: Single primitive emit potentially exceeded "
+ "available aperture space\n");
+
if (!retval)
DBG("%s failed\n", __FUNCTION__);
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 7b88b5eaa1e..4080c5e3228 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -250,10 +250,10 @@ static void get_space( struct brw_context *brw,
wrap_buffers(brw, size);
}
+ assert(*bo_return == NULL);
dri_bo_reference(brw->vb.upload.bo);
*bo_return = brw->vb.upload.bo;
*offset_return = brw->vb.upload.offset;
-
brw->vb.upload.offset += size;
}
@@ -359,6 +359,14 @@ static void brw_prepare_vertices(struct brw_context *brw)
input->offset = (unsigned long)input->glarray->Ptr;
input->stride = input->glarray->StrideB;
} else {
+ if (input->bo != NULL) {
+ /* Already-uploaded vertex data is present from a previous
+ * prepare_vertices, but we had to re-validate state due to
+ * check_aperture failing and a new batch being produced.
+ */
+ continue;
+ }
+
/* Queue the buffer object up to be uploaded in the next pass,
* when we've decided if we're doing interleaved or not.
*/
@@ -417,6 +425,12 @@ static void brw_prepare_vertices(struct brw_context *brw)
}
brw_prepare_query_begin(brw);
+
+ for (i = 0; i < nr_enabled; i++) {
+ struct brw_vertex_element *input = enabled[i];
+
+ brw_add_validated_bo(brw, input->bo);
+ }
}
static void brw_emit_vertices(struct brw_context *brw)
@@ -512,7 +526,7 @@ static void brw_prepare_indices(struct brw_context *brw)
struct intel_context *intel = &brw->intel;
const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
GLuint ib_size;
- dri_bo *bo;
+ dri_bo *bo = NULL;
struct gl_buffer_object *bufferobj;
GLuint offset;
@@ -561,6 +575,8 @@ static void brw_prepare_indices(struct brw_context *brw)
dri_bo_unreference(brw->ib.bo);
brw->ib.bo = bo;
brw->ib.offset = offset;
+
+ brw_add_validated_bo(brw, brw->ib.bo);
}
static void brw_emit_indices(struct brw_context *brw)
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 207b8b7ca38..49b422ee2ff 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -65,7 +65,7 @@ struct brw_reg
GLuint abs:1; /* source only */
GLuint vstride:4; /* source only */
GLuint width:3; /* src only, align1 only */
- GLuint hstride:2; /* src only, align1 only */
+ GLuint hstride:2; /* align1 only */
GLuint address_mode:1; /* relative addressing, hopefully! */
GLuint pad0:1;
@@ -432,6 +432,12 @@ static INLINE struct brw_reg brw_uw8_grf( GLuint nr,
return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
+static INLINE struct brw_reg brw_uw16_grf( GLuint nr,
+ GLuint subnr )
+{
+ return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
static INLINE struct brw_reg brw_null_reg( void )
{
return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 0bfbec9d140..ce4cf46cfa6 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -64,7 +64,9 @@ static void brw_set_dest( struct brw_instruction *insn,
if (insn->header.access_mode == BRW_ALIGN_1) {
insn->bits1.da1.dest_subreg_nr = dest.subnr;
- insn->bits1.da1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
+ if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+ dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+ insn->bits1.da1.dest_horiz_stride = dest.hstride;
}
else {
insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
@@ -78,7 +80,9 @@ static void brw_set_dest( struct brw_instruction *insn,
*/
if (insn->header.access_mode == BRW_ALIGN_1) {
insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
- insn->bits1.ia1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
+ if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+ dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+ insn->bits1.ia1.dest_horiz_stride = dest.hstride;
}
else {
insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
@@ -329,14 +333,14 @@ static void brw_set_sampler_message(struct brw_context *brw,
{
brw_set_src1(insn, brw_imm_d(0));
- if (BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) {
- insn->bits3.sampler_gm45_g4x.binding_table_index = binding_table_index;
- insn->bits3.sampler_gm45_g4x.sampler = sampler;
- insn->bits3.sampler_gm45_g4x.msg_type = msg_type;
- insn->bits3.sampler_gm45_g4x.response_length = response_length;
- insn->bits3.sampler_gm45_g4x.msg_length = msg_length;
- insn->bits3.sampler_gm45_g4x.end_of_thread = eot;
- insn->bits3.sampler_gm45_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
+ if (BRW_IS_G4X(brw)) {
+ insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
+ insn->bits3.sampler_g4x.sampler = sampler;
+ insn->bits3.sampler_g4x.msg_type = msg_type;
+ insn->bits3.sampler_g4x.response_length = response_length;
+ insn->bits3.sampler_g4x.msg_length = msg_length;
+ insn->bits3.sampler_g4x.end_of_thread = eot;
+ insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
} else {
insn->bits3.sampler.binding_table_index = binding_table_index;
insn->bits3.sampler.sampler = sampler;
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 487c638ce21..627705fa9ba 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -71,6 +71,38 @@ const struct brw_tracked_state brw_blend_constant_color = {
.emit = upload_blend_constant_color
};
+/* Constant single cliprect for framebuffer object or DRI2 drawing */
+static void upload_drawing_rect(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ GLcontext *ctx = &intel->ctx;
+
+ if (!intel->constant_cliprect)
+ return;
+
+ BEGIN_BATCH(4, NO_LOOP_CLIPRECTS);
+ OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965);
+ OUT_BATCH(0); /* xmin, ymin */
+ OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
+ ((ctx->DrawBuffer->Height - 1) << 16));
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state brw_drawing_rect = {
+ .dirty = {
+ .mesa = _NEW_BUFFERS,
+ .brw = 0,
+ .cache = 0
+ },
+ .emit = upload_drawing_rect
+};
+
+static void prepare_binding_table_pointers(struct brw_context *brw)
+{
+ brw_add_validated_bo(brw, brw->wm.bind_bo);
+}
+
/**
* Upload the binding table pointers, which point each stage's array of surface
* state pointers.
@@ -81,13 +113,6 @@ const struct brw_tracked_state brw_blend_constant_color = {
static void upload_binding_table_pointers(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
- dri_bo *aper_array[] = {
- intel->batch->buf,
- brw->wm.bind_bo,
- };
-
- if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array)))
- intel_batchbuffer_flush(intel->batch);
BEGIN_BATCH(6, IGNORE_CLIPRECTS);
OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
@@ -107,6 +132,7 @@ const struct brw_tracked_state brw_binding_table_pointers = {
.brw = BRW_NEW_BATCH,
.cache = CACHE_NEW_SURF_BIND,
},
+ .prepare = prepare_binding_table_pointers,
.emit = upload_binding_table_pointers,
};
@@ -140,21 +166,18 @@ static void upload_pipelined_state_pointers(struct brw_context *brw )
brw->state.dirty.brw |= BRW_NEW_PSP;
}
-static void upload_psp_urb_cbs(struct brw_context *brw )
+
+static void prepare_psp_urb_cbs(struct brw_context *brw)
{
- struct intel_context *intel = &brw->intel;
- dri_bo *aper_array[] = {
- intel->batch->buf,
- brw->vs.state_bo,
- brw->gs.state_bo,
- brw->clip.state_bo,
- brw->wm.state_bo,
- brw->cc.state_bo,
- };
-
- if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array)))
- intel_batchbuffer_flush(intel->batch);
+ brw_add_validated_bo(brw, brw->vs.state_bo);
+ brw_add_validated_bo(brw, brw->gs.state_bo);
+ brw_add_validated_bo(brw, brw->clip.state_bo);
+ brw_add_validated_bo(brw, brw->wm.state_bo);
+ brw_add_validated_bo(brw, brw->cc.state_bo);
+}
+static void upload_psp_urb_cbs(struct brw_context *brw )
+{
upload_pipelined_state_pointers(brw);
brw_upload_urb_fence(brw);
brw_upload_constant_buffer_state(brw);
@@ -172,14 +195,23 @@ const struct brw_tracked_state brw_psp_urb_cbs = {
CACHE_NEW_WM_UNIT |
CACHE_NEW_CC_UNIT)
},
+ .prepare = prepare_psp_urb_cbs,
.emit = upload_psp_urb_cbs,
};
+static void prepare_depthbuffer(struct brw_context *brw)
+{
+ struct intel_region *region = brw->state.depth_region;
+
+ if (region != NULL)
+ brw_add_validated_bo(brw, region->buffer);
+}
+
static void emit_depthbuffer(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
struct intel_region *region = brw->state.depth_region;
- unsigned int len = (BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) ? sizeof(struct brw_depthbuffer_gm45_g4x) / 4 : sizeof(struct brw_depthbuffer) / 4;
+ unsigned int len = BRW_IS_G4X(brw) ? 6 : 5;
if (region == NULL) {
BEGIN_BATCH(len, IGNORE_CLIPRECTS);
@@ -190,16 +222,12 @@ static void emit_depthbuffer(struct brw_context *brw)
OUT_BATCH(0);
OUT_BATCH(0);
- if (BRW_IS_GM45(brw) || BRW_IS_G4X(brw))
+ if (BRW_IS_G4X(brw))
OUT_BATCH(0);
ADVANCE_BATCH();
} else {
unsigned int format;
- dri_bo *aper_array[] = {
- intel->batch->buf,
- region->buffer
- };
switch (region->cpp) {
case 2:
@@ -216,9 +244,6 @@ static void emit_depthbuffer(struct brw_context *brw)
return;
}
- if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array)))
- intel_batchbuffer_flush(intel->batch);
-
BEGIN_BATCH(len, IGNORE_CLIPRECTS);
OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
OUT_BATCH(((region->pitch * region->cpp) - 1) |
@@ -234,7 +259,7 @@ static void emit_depthbuffer(struct brw_context *brw)
((region->height - 1) << 19));
OUT_BATCH(0);
- if (BRW_IS_GM45(brw) || BRW_IS_G4X(brw))
+ if (BRW_IS_G4X(brw))
OUT_BATCH(0);
ADVANCE_BATCH();
@@ -247,6 +272,7 @@ const struct brw_tracked_state brw_depthbuffer = {
.brw = BRW_NEW_DEPTH_BUFFER | BRW_NEW_BATCH,
.cache = 0,
},
+ .prepare = prepare_depthbuffer,
.emit = emit_depthbuffer,
};
@@ -318,7 +344,7 @@ static void upload_aa_line_parameters(struct brw_context *brw)
{
struct brw_aa_line_parameters balp;
- if (!(BRW_IS_GM45(brw) || BRW_IS_G4X(brw)))
+ if (!BRW_IS_G4X(brw))
return;
/* use legacy aa line coverage computation */
diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c
index a1a1353dee7..cb9169e2eef 100644
--- a/src/mesa/drivers/dri/i965/brw_queryobj.c
+++ b/src/mesa/drivers/dri/i965/brw_queryobj.c
@@ -42,6 +42,7 @@
#include "main/imports.h"
#include "brw_context.h"
+#include "brw_state.h"
#include "intel_batchbuffer.h"
#include "intel_reg.h"
@@ -163,10 +164,6 @@ void
brw_prepare_query_begin(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
- dri_bo *aper_array[] = {
- intel->batch->buf,
- brw->query.bo,
- };
/* Skip if we're not doing any queries. */
if (is_empty_list(&brw->query.active_head))
@@ -182,8 +179,7 @@ brw_prepare_query_begin(struct brw_context *brw)
brw->query.index = 0;
}
- if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array)))
- intel_batchbuffer_flush(intel->batch);
+ brw_add_validated_bo(brw, brw->query.bo);
}
/** Called just before primitive drawing to get a beginning PS_DEPTH_COUNT. */
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 4c04036ef08..bb22c03eeb6 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -35,6 +35,16 @@
#include "brw_context.h"
+static inline void
+brw_add_validated_bo(struct brw_context *brw, dri_bo *bo)
+{
+ assert(brw->state.validated_bo_count < ARRAY_SIZE(brw->state.validated_bos));
+
+ if (bo != NULL) {
+ dri_bo_reference(bo);
+ brw->state.validated_bos[brw->state.validated_bo_count++] = bo;
+ }
+};
const struct brw_tracked_state brw_blend_constant_color;
const struct brw_tracked_state brw_cc_unit;
@@ -79,10 +89,19 @@ const struct brw_tracked_state brw_pipe_control;
const struct brw_tracked_state brw_clear_surface_cache;
const struct brw_tracked_state brw_clear_batch_cache;
+const struct brw_tracked_state brw_drawing_rect;
const struct brw_tracked_state brw_indices;
const struct brw_tracked_state brw_vertices;
/***********************************************************************
+ * brw_state.c
+ */
+void brw_validate_state(struct brw_context *brw);
+void brw_upload_state(struct brw_context *brw);
+void brw_init_state(struct brw_context *brw);
+void brw_destroy_state(struct brw_context *brw);
+
+/***********************************************************************
* brw_state_cache.c
*/
dri_bo *brw_cache_data(struct brw_cache *cache,
diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c
index 94ef9248686..dc87859f3f5 100644
--- a/src/mesa/drivers/dri/i965/brw_state_batch.c
+++ b/src/mesa/drivers/dri/i965/brw_state_batch.c
@@ -97,8 +97,6 @@ void brw_clear_batch_cache_flush( struct brw_context *brw )
{
clear_batch_cache(brw);
- brw->wrap = 0;
-
/* brw_do_flush(brw, BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); */
brw->state.dirty.mesa |= ~0;
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index b6a52843a81..7a642bd2a8f 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -45,7 +45,6 @@ const struct brw_tracked_state *atoms[] =
{
&brw_check_fallback,
- &brw_active_vertprog,
&brw_wm_input_sizes,
&brw_vs_prog,
&brw_gs_prog,
@@ -99,6 +98,7 @@ const struct brw_tracked_state *atoms[] =
&brw_psp_urb_cbs,
#endif
+ &brw_drawing_rect,
&brw_indices,
&brw_vertices,
@@ -168,6 +168,18 @@ static void xor_states( struct brw_state_flags *result,
result->cache = a->cache ^ b->cache;
}
+static void
+brw_clear_validated_bos(struct brw_context *brw)
+{
+ int i;
+
+ /* Clear the last round of validated bos */
+ for (i = 0; i < brw->state.validated_bo_count; i++) {
+ dri_bo_unreference(brw->state.validated_bos[i]);
+ brw->state.validated_bos[i] = NULL;
+ }
+ brw->state.validated_bo_count = 0;
+}
/***********************************************************************
* Emit all state:
@@ -176,14 +188,14 @@ void brw_validate_state( struct brw_context *brw )
{
struct intel_context *intel = &brw->intel;
struct brw_state_flags *state = &brw->state.dirty;
- GLuint i, count, pass = 0;
- dri_bo *last_batch_bo = NULL;
+ GLuint i;
+
+ brw_clear_validated_bos(brw);
state->mesa |= brw->intel.NewGLState;
brw->intel.NewGLState = 0;
- if (brw->wrap)
- state->brw |= BRW_NEW_CONTEXT;
+ brw_add_validated_bo(brw, intel->batch->buf);
if (brw->emit_state_always) {
state->mesa |= ~0;
@@ -199,6 +211,10 @@ void brw_validate_state( struct brw_context *brw )
brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
}
+ if (brw->vertex_program != brw->attribs.VertexProgram->_Current) {
+ brw->vertex_program = brw->attribs.VertexProgram->_Current;
+ brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
+ }
if (state->mesa == 0 &&
state->cache == 0 &&
@@ -210,8 +226,6 @@ void brw_validate_state( struct brw_context *brw )
brw->intel.Fallback = 0;
- count = 0;
-
/* do prepare stage for all atoms */
for (i = 0; i < Elements(atoms); i++) {
const struct brw_tracked_state *atom = brw->state.atoms[i];
@@ -225,19 +239,15 @@ void brw_validate_state( struct brw_context *brw )
}
}
}
+}
- if (brw->intel.Fallback)
- return;
- /* We're about to try to set up a coherent state in the batchbuffer for
- * the emission of primitives. If we exceed the aperture size in any of the
- * emit() calls, we need to go back to square 1 and try setting up again.
- */
-got_flushed:
- dri_bo_unreference(last_batch_bo);
- last_batch_bo = intel->batch->buf;
- dri_bo_reference(last_batch_bo);
- assert(pass++ <= 2);
+void brw_upload_state(struct brw_context *brw)
+{
+ struct brw_state_flags *state = &brw->state.dirty;
+ int i;
+
+ brw_clear_validated_bos(brw);
if (INTEL_DEBUG) {
/* Debug version which enforces various sanity checks on the
@@ -262,8 +272,6 @@ got_flushed:
if (check_state(state, &atom->dirty)) {
if (atom->emit) {
atom->emit( brw );
- if (intel->batch->buf != last_batch_bo)
- goto got_flushed;
}
}
@@ -288,15 +296,11 @@ got_flushed:
if (check_state(state, &atom->dirty)) {
if (atom->emit) {
atom->emit( brw );
- if (intel->batch->buf != last_batch_bo)
- goto got_flushed;
}
}
}
}
- dri_bo_unreference(last_batch_bo);
-
if (!brw->intel.Fallback)
memset(state, 0, sizeof(*state));
}
diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h
index ec865c925a7..4e577d0f6a8 100644
--- a/src/mesa/drivers/dri/i965/brw_structs.h
+++ b/src/mesa/drivers/dri/i965/brw_structs.h
@@ -175,7 +175,7 @@ struct brw_depthbuffer
} dword4;
};
-struct brw_depthbuffer_gm45_g4x
+struct brw_depthbuffer_g4x
{
union header_union header;
@@ -1405,7 +1405,7 @@ struct brw_instruction
GLuint msg_target:4;
GLuint pad1:3;
GLuint end_of_thread:1;
- } sampler_gm45_g4x;
+ } sampler_g4x;
struct brw_urb_immediate urb;
diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c
index 1116ade0a47..1a004176de1 100644
--- a/src/mesa/drivers/dri/i965/brw_urb.c
+++ b/src/mesa/drivers/dri/i965/brw_urb.c
@@ -92,9 +92,9 @@ static void recalculate_urb_fence( struct brw_context *brw )
if (brw->urb.vsize < vsize ||
brw->urb.sfsize < sfsize ||
brw->urb.csize < csize ||
- (brw->urb.constrained && (brw->urb.vsize > brw->urb.vsize ||
- brw->urb.sfsize > brw->urb.sfsize ||
- brw->urb.csize > brw->urb.csize))) {
+ (brw->urb.constrained && (brw->urb.vsize > vsize ||
+ brw->urb.sfsize > sfsize ||
+ brw->urb.csize > csize))) {
brw->urb.csize = csize;
@@ -114,6 +114,10 @@ static void recalculate_urb_fence( struct brw_context *brw )
brw->urb.nr_sf_entries = limits[SF].min_nr_entries;
brw->urb.nr_cs_entries = limits[CS].min_nr_entries;
+ /* Mark us as operating with constrained nr_entries, so that next
+ * time we recalculate we'll resize the fences in the hope of
+ * escaping constrained mode and getting back to normal performance.
+ */
brw->urb.constrained = 1;
if (!check_urb_layout(brw)) {
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 9de05408ba9..25b4ee85cb0 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -818,8 +818,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
}
- /* Build ndc coords? TODO: Shortcircuit when w is known to be one.
- */
+ /* Build ndc coords */
if (!c->key.know_w_is_one) {
ndc = get_tmp(c);
emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL);
@@ -829,12 +828,12 @@ static void emit_vertex_write( struct brw_vs_compile *c)
ndc = pos;
}
- /* This includes the workaround for -ve rhw, so is no longer an
- * optional step:
+ /* Update the header for point size, user clipping flags, and -ve rhw
+ * workaround.
*/
if ((c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) ||
c->key.nr_userclip ||
- !c->key.know_w_is_one)
+ (!BRW_IS_G4X(p->brw) && !c->key.know_w_is_one))
{
struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
GLuint i;
@@ -867,7 +866,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
* Later, clipping will detect ucp[6] and ensure the primitive is
* clipped against all fixed planes.
*/
- if (!(BRW_IS_GM45(p->brw) || BRW_IS_G4X(p->brw)) && !c->key.know_w_is_one) {
+ if (!BRW_IS_G4X(p->brw) && !c->key.know_w_is_one) {
brw_CMP(p,
vec8(brw_null_reg()),
BRW_CONDITIONAL_L,
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index a64e437860f..2d4c81274e6 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -62,7 +62,6 @@ dri_bo_release(dri_bo **bo)
*/
static void brw_destroy_context( struct intel_context *intel )
{
- GLcontext *ctx = &intel->ctx;
struct brw_context *brw = brw_context(&intel->ctx);
int i;
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index 297617ee2dc..896390c17b8 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -157,6 +157,7 @@ struct brw_wm_instruction {
#define BRW_WM_MAX_PARAM 256
#define BRW_WM_MAX_CONST 256
#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS
+#define BRW_WM_MAX_SUBROUTINE 16
@@ -246,7 +247,10 @@ struct brw_wm_compile {
struct brw_reg stack;
struct brw_reg emit_mask_reg;
GLuint reg_index;
+ GLuint tmp_regs[BRW_WM_MAX_GRF];
GLuint tmp_index;
+ GLuint tmp_max;
+ GLuint subroutines[BRW_WM_MAX_SUBROUTINE];
};
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index 4d5e11f4b6f..cb728190f5c 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -4,6 +4,10 @@
#include "brw_eu.h"
#include "brw_wm.h"
+enum _subroutine {
+ SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4
+};
+
/* Only guess, need a flag in gl_fragment_program later */
GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
{
@@ -12,13 +16,17 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
struct prog_instruction *inst = &fp->Base.Instructions[i];
switch (inst->Opcode) {
case OPCODE_IF:
- case OPCODE_INT:
+ case OPCODE_TRUNC:
case OPCODE_ENDIF:
case OPCODE_CAL:
case OPCODE_BRK:
case OPCODE_RET:
case OPCODE_DDX:
case OPCODE_DDY:
+ case OPCODE_NOISE1:
+ case OPCODE_NOISE2:
+ case OPCODE_NOISE3:
+ case OPCODE_NOISE4:
case OPCODE_BGNLOOP:
return GL_TRUE;
default:
@@ -47,13 +55,26 @@ static int get_scalar_dst_index(struct prog_instruction *inst)
static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
{
struct brw_reg reg;
- reg = brw_vec8_grf(c->tmp_index--, 0);
+ if(c->tmp_index == c->tmp_max)
+ c->tmp_regs[ c->tmp_max++ ] = c->reg_index++;
+
+ reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0);
return reg;
}
-static void release_tmps(struct brw_wm_compile *c)
+static int mark_tmps(struct brw_wm_compile *c)
+{
+ return c->tmp_index;
+}
+
+static struct brw_reg lookup_tmp( struct brw_wm_compile *c, int index )
+{
+ return brw_vec8_grf( c->tmp_regs[ index ], 0 );
+}
+
+static void release_tmps(struct brw_wm_compile *c, int mark)
{
- c->tmp_index = 127;
+ c->tmp_index = mark;
}
static struct brw_reg
@@ -155,6 +176,68 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c,
src->NegateBase, src->Abs);
}
+/* Subroutines are minimal support for resusable instruction sequences.
+ They are implemented as simply as possible to minimise overhead: there
+ is no explicit support for communication between the caller and callee
+ other than saving the return address in a temporary register, nor is
+ there any automatic local storage. This implies that great care is
+ required before attempting reentrancy or any kind of nested
+ subroutine invocations. */
+static void invoke_subroutine( struct brw_wm_compile *c,
+ enum _subroutine subroutine,
+ void (*emit)( struct brw_wm_compile * ) )
+{
+ struct brw_compile *p = &c->func;
+
+ assert( subroutine < BRW_WM_MAX_SUBROUTINE );
+
+ if( c->subroutines[ subroutine ] ) {
+ /* subroutine previously emitted: reuse existing instructions */
+
+ int mark = mark_tmps( c );
+ struct brw_reg return_address = retype( alloc_tmp( c ),
+ BRW_REGISTER_TYPE_UD );
+ int here = p->nr_insn;
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 2 << 4 ) );
+
+ brw_ADD( p, brw_ip_reg(), brw_ip_reg(),
+ brw_imm_d( ( c->subroutines[ subroutine ] -
+ here - 1 ) << 4 ) );
+ brw_pop_insn_state(p);
+
+ release_tmps( c, mark );
+ } else {
+ /* previously unused subroutine: emit, and mark for later reuse */
+
+ int mark = mark_tmps( c );
+ struct brw_reg return_address = retype( alloc_tmp( c ),
+ BRW_REGISTER_TYPE_UD );
+ struct brw_instruction *calc;
+ int base = p->nr_insn;
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ calc = brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 0 ) );
+ brw_pop_insn_state(p);
+
+ c->subroutines[ subroutine ] = p->nr_insn;
+
+ emit( c );
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV( p, brw_ip_reg(), return_address );
+ brw_pop_insn_state(p);
+
+ brw_set_src1( calc, brw_imm_ud( ( p->nr_insn - base ) << 4 ) );
+
+ release_tmps( c, mark );
+ }
+}
+
static void emit_abs( struct brw_wm_compile *c,
struct prog_instruction *inst)
{
@@ -172,7 +255,7 @@ static void emit_abs( struct brw_wm_compile *c,
brw_set_saturate(p, 0);
}
-static void emit_int( struct brw_wm_compile *c,
+static void emit_trunc( struct brw_wm_compile *c,
struct prog_instruction *inst)
{
int i;
@@ -778,6 +861,7 @@ static void emit_lrp(struct brw_wm_compile *c,
GLuint mask = inst->DstReg.WriteMask;
struct brw_reg dst, tmp1, tmp2, src0, src1, src2;
int i;
+ int mark = mark_tmps(c);
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
dst = get_dst_reg(c, inst, i, 1);
@@ -804,7 +888,7 @@ static void emit_lrp(struct brw_wm_compile *c,
brw_MAC(p, dst, src0, tmp1);
brw_set_saturate(p, 0);
}
- release_tmps(c);
+ release_tmps(c, mark);
}
}
@@ -957,6 +1041,633 @@ static void emit_ddy(struct brw_wm_compile *c,
brw_set_saturate(p, 0);
}
+static __inline struct brw_reg high_words( struct brw_reg reg )
+{
+ return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ),
+ 0, 8, 2 );
+}
+
+static __inline struct brw_reg low_words( struct brw_reg reg )
+{
+ return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 );
+}
+
+static __inline struct brw_reg even_bytes( struct brw_reg reg )
+{
+ return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 );
+}
+
+static __inline struct brw_reg odd_bytes( struct brw_reg reg )
+{
+ return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ),
+ 0, 16, 2 );
+}
+
+/* One-, two- and three-dimensional Perlin noise, similar to the description
+ in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */
+static void noise1_sub( struct brw_wm_compile *c ) {
+
+ struct brw_compile *p = &c->func;
+ struct brw_reg param,
+ x0, x1, /* gradients at each end */
+ t, tmp[ 2 ], /* float temporaries */
+ itmp[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */
+ int i;
+ int mark = mark_tmps( c );
+
+ x0 = alloc_tmp( c );
+ x1 = alloc_tmp( c );
+ t = alloc_tmp( c );
+ tmp[ 0 ] = alloc_tmp( c );
+ tmp[ 1 ] = alloc_tmp( c );
+ itmp[ 0 ] = retype( tmp[ 0 ], BRW_REGISTER_TYPE_UD );
+ itmp[ 1 ] = retype( tmp[ 1 ], BRW_REGISTER_TYPE_UD );
+ itmp[ 2 ] = retype( x0, BRW_REGISTER_TYPE_UD );
+ itmp[ 3 ] = retype( x1, BRW_REGISTER_TYPE_UD );
+ itmp[ 4 ] = retype( t, BRW_REGISTER_TYPE_UD );
+
+ param = lookup_tmp( c, mark - 2 );
+
+ brw_set_access_mode( p, BRW_ALIGN_1 );
+
+ brw_MOV( p, itmp[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
+
+ /* Arrange the two end coordinates into scalars (itmp0/itmp1) to
+ be hashed. Also compute the remainder (offset within the unit
+ length), interleaved to reduce register dependency penalties. */
+ brw_RNDD( p, itmp[ 0 ], param );
+ brw_FRC( p, param, param );
+ brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 1 ) );
+ brw_MOV( p, itmp[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
+ brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
+
+ /* We're now ready to perform the hashing. The two hashes are
+ interleaved for performance. The hash function used is
+ designed to rapidly achieve avalanche and require only 32x16
+ bit multiplication, and 16-bit swizzles (which we get for
+ free). We can't use immediate operands in the multiplies,
+ because immediates are permitted only in src1 and the 16-bit
+ factor is permitted only in src0. */
+ for( i = 0; i < 2; i++ )
+ brw_MUL( p, itmp[ i ], itmp[ 2 ], itmp[ i ] );
+ for( i = 0; i < 2; i++ )
+ brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+ high_words( itmp[ i ] ) );
+ for( i = 0; i < 2; i++ )
+ brw_MUL( p, itmp[ i ], itmp[ 3 ], itmp[ i ] );
+ for( i = 0; i < 2; i++ )
+ brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+ high_words( itmp[ i ] ) );
+ for( i = 0; i < 2; i++ )
+ brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] );
+ for( i = 0; i < 2; i++ )
+ brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+ high_words( itmp[ i ] ) );
+
+ /* Now we want to initialise the two gradients based on the
+ hashes. Format conversion from signed integer to float leaves
+ everything scaled too high by a factor of pow( 2, 31 ), but
+ we correct for that right at the end. */
+ brw_ADD( p, t, param, brw_imm_f( -1.0 ) );
+ brw_MOV( p, x0, retype( tmp[ 0 ], BRW_REGISTER_TYPE_D ) );
+ brw_MOV( p, x1, retype( tmp[ 1 ], BRW_REGISTER_TYPE_D ) );
+
+ brw_MUL( p, x0, x0, param );
+ brw_MUL( p, x1, x1, t );
+
+ /* We interpolate between the gradients using the polynomial
+ 6t^5 - 15t^4 + 10t^3 (Perlin). */
+ brw_MUL( p, tmp[ 0 ], param, brw_imm_f( 6.0 ) );
+ brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) );
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param );
+ brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) );
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param );
+ brw_ADD( p, x1, x1, negate( x0 ) ); /* unrelated work to fill the
+ pipeline */
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param );
+ brw_MUL( p, param, tmp[ 0 ], param );
+ brw_MUL( p, x1, x1, param );
+ brw_ADD( p, x0, x0, x1 );
+ /* scale by pow( 2, -30 ), to compensate for the format conversion
+ above and an extra factor of 2 so that a single gradient covers
+ the [-1,1] range */
+ brw_MUL( p, param, x0, brw_imm_f( 0.000000000931322574615478515625 ) );
+
+ release_tmps( c, mark );
+}
+
+static void emit_noise1( struct brw_wm_compile *c,
+ struct prog_instruction *inst )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src, param, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+ int mark = mark_tmps( c );
+
+ assert( mark == 0 );
+
+ src = get_src_reg( c, inst->SrcReg, 0, 1 );
+
+ param = alloc_tmp( c );
+
+ brw_MOV( p, param, src );
+
+ invoke_subroutine( c, SUB_NOISE1, noise1_sub );
+
+ /* Fill in the result: */
+ brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i, 1);
+ brw_MOV( p, dst, param );
+ }
+ }
+ if( inst->SaturateMode == SATURATE_ZERO_ONE )
+ brw_set_saturate( p, 0 );
+
+ release_tmps( c, mark );
+}
+
+static void noise2_sub( struct brw_wm_compile *c ) {
+
+ struct brw_compile *p = &c->func;
+ struct brw_reg param0, param1,
+ x0y0, x0y1, x1y0, x1y1, /* gradients at each corner */
+ t, tmp[ 4 ], /* float temporaries */
+ itmp[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */
+ int i;
+ int mark = mark_tmps( c );
+
+ x0y0 = alloc_tmp( c );
+ x0y1 = alloc_tmp( c );
+ x1y0 = alloc_tmp( c );
+ x1y1 = alloc_tmp( c );
+ t = alloc_tmp( c );
+ for( i = 0; i < 4; i++ ) {
+ tmp[ i ] = alloc_tmp( c );
+ itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD );
+ }
+ itmp[ 4 ] = retype( x0y0, BRW_REGISTER_TYPE_UD );
+ itmp[ 5 ] = retype( x0y1, BRW_REGISTER_TYPE_UD );
+ itmp[ 6 ] = retype( x1y0, BRW_REGISTER_TYPE_UD );
+
+ param0 = lookup_tmp( c, mark - 3 );
+ param1 = lookup_tmp( c, mark - 2 );
+
+ brw_set_access_mode( p, BRW_ALIGN_1 );
+
+ /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to
+ be hashed. Also compute the remainders (offsets within the unit
+ square), interleaved to reduce register dependency penalties. */
+ brw_RNDD( p, itmp[ 0 ], param0 );
+ brw_RNDD( p, itmp[ 1 ], param1 );
+ brw_FRC( p, param0, param0 );
+ brw_FRC( p, param1, param1 );
+ brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
+ brw_ADD( p, high_words( itmp[ 0 ] ), high_words( itmp[ 0 ] ),
+ low_words( itmp[ 1 ] ) );
+ brw_MOV( p, itmp[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
+ brw_MOV( p, itmp[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
+ brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 0x10000 ) );
+ brw_ADD( p, itmp[ 2 ], itmp[ 0 ], brw_imm_ud( 0x1 ) );
+ brw_ADD( p, itmp[ 3 ], itmp[ 0 ], brw_imm_ud( 0x10001 ) );
+
+ /* We're now ready to perform the hashing. The four hashes are
+ interleaved for performance. The hash function used is
+ designed to rapidly achieve avalanche and require only 32x16
+ bit multiplication, and 16-bit swizzles (which we get for
+ free). We can't use immediate operands in the multiplies,
+ because immediates are permitted only in src1 and the 16-bit
+ factor is permitted only in src0. */
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] );
+ for( i = 0; i < 4; i++ )
+ brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+ high_words( itmp[ i ] ) );
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, itmp[ i ], itmp[ 5 ], itmp[ i ] );
+ for( i = 0; i < 4; i++ )
+ brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+ high_words( itmp[ i ] ) );
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, itmp[ i ], itmp[ 6 ], itmp[ i ] );
+ for( i = 0; i < 4; i++ )
+ brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+ high_words( itmp[ i ] ) );
+
+ /* Now we want to initialise the four gradients based on the
+ hashes. Format conversion from signed integer to float leaves
+ everything scaled too high by a factor of pow( 2, 15 ), but
+ we correct for that right at the end. */
+ brw_ADD( p, t, param0, brw_imm_f( -1.0 ) );
+ brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) );
+ brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) );
+ brw_MOV( p, x1y0, low_words( tmp[ 2 ] ) );
+ brw_MOV( p, x1y1, low_words( tmp[ 3 ] ) );
+
+ brw_MOV( p, tmp[ 0 ], high_words( tmp[ 0 ] ) );
+ brw_MOV( p, tmp[ 1 ], high_words( tmp[ 1 ] ) );
+ brw_MOV( p, tmp[ 2 ], high_words( tmp[ 2 ] ) );
+ brw_MOV( p, tmp[ 3 ], high_words( tmp[ 3 ] ) );
+
+ brw_MUL( p, x1y0, x1y0, t );
+ brw_MUL( p, x1y1, x1y1, t );
+ brw_ADD( p, t, param1, brw_imm_f( -1.0 ) );
+ brw_MUL( p, x0y0, x0y0, param0 );
+ brw_MUL( p, x0y1, x0y1, param0 );
+
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param1 );
+ brw_MUL( p, tmp[ 2 ], tmp[ 2 ], param1 );
+ brw_MUL( p, tmp[ 1 ], tmp[ 1 ], t );
+ brw_MUL( p, tmp[ 3 ], tmp[ 3 ], t );
+
+ brw_ADD( p, x0y0, x0y0, tmp[ 0 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 2 ] );
+ brw_ADD( p, x0y1, x0y1, tmp[ 1 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 3 ] );
+
+ /* We interpolate between the gradients using the polynomial
+ 6t^5 - 15t^4 + 10t^3 (Perlin). */
+ brw_MUL( p, tmp[ 0 ], param0, brw_imm_f( 6.0 ) );
+ brw_MUL( p, tmp[ 1 ], param1, brw_imm_f( 6.0 ) );
+ brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) );
+ brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( -15.0 ) );
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 );
+ brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 );
+ brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work to fill the
+ pipeline */
+ brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) );
+ brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( 10.0 ) );
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 );
+ brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 );
+ brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work to fill the
+ pipeline */
+ brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 );
+ brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 );
+ brw_MUL( p, param0, tmp[ 0 ], param0 );
+ brw_MUL( p, param1, tmp[ 1 ], param1 );
+
+ /* Here we interpolate in the y dimension... */
+ brw_MUL( p, x0y1, x0y1, param1 );
+ brw_MUL( p, x1y1, x1y1, param1 );
+ brw_ADD( p, x0y0, x0y0, x0y1 );
+ brw_ADD( p, x1y0, x1y0, x1y1 );
+
+ /* And now in x. There are horrible register dependencies here,
+ but we have nothing else to do. */
+ brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
+ brw_MUL( p, x1y0, x1y0, param0 );
+ brw_ADD( p, x0y0, x0y0, x1y0 );
+
+ /* scale by pow( 2, -15 ), as described above */
+ brw_MUL( p, param0, x0y0, brw_imm_f( 0.000030517578125 ) );
+
+ release_tmps( c, mark );
+}
+
+static void emit_noise2( struct brw_wm_compile *c,
+ struct prog_instruction *inst )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, src1, param0, param1, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+ int mark = mark_tmps( c );
+
+ assert( mark == 0 );
+
+ src0 = get_src_reg( c, inst->SrcReg, 0, 1 );
+ src1 = get_src_reg( c, inst->SrcReg, 1, 1 );
+
+ param0 = alloc_tmp( c );
+ param1 = alloc_tmp( c );
+
+ brw_MOV( p, param0, src0 );
+ brw_MOV( p, param1, src1 );
+
+ invoke_subroutine( c, SUB_NOISE2, noise2_sub );
+
+ /* Fill in the result: */
+ brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i, 1);
+ brw_MOV( p, dst, param0 );
+ }
+ }
+ if( inst->SaturateMode == SATURATE_ZERO_ONE )
+ brw_set_saturate( p, 0 );
+
+ release_tmps( c, mark );
+}
+
+/* The three-dimensional case is much like the one- and two- versions above,
+ but since the number of corners is rapidly growing we now pack 16 16-bit
+ hashes into each register to extract more parallelism from the EUs. */
+static void noise3_sub( struct brw_wm_compile *c ) {
+
+ struct brw_compile *p = &c->func;
+ struct brw_reg param0, param1, param2,
+ x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */
+ xi, yi, zi, /* interpolation coefficients */
+ t, tmp[ 8 ], /* float temporaries */
+ itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
+ wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
+ int i;
+ int mark = mark_tmps( c );
+
+ x0y0 = alloc_tmp( c );
+ x0y1 = alloc_tmp( c );
+ x1y0 = alloc_tmp( c );
+ x1y1 = alloc_tmp( c );
+ xi = alloc_tmp( c );
+ yi = alloc_tmp( c );
+ zi = alloc_tmp( c );
+ t = alloc_tmp( c );
+ for( i = 0; i < 8; i++ ) {
+ tmp[ i ] = alloc_tmp( c );
+ itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD );
+ wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 );
+ }
+
+ param0 = lookup_tmp( c, mark - 4 );
+ param1 = lookup_tmp( c, mark - 3 );
+ param2 = lookup_tmp( c, mark - 2 );
+
+ brw_set_access_mode( p, BRW_ALIGN_1 );
+
+ /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
+ be hashed. Also compute the remainders (offsets within the unit
+ cube), interleaved to reduce register dependency penalties. */
+ brw_RNDD( p, itmp[ 0 ], param0 );
+ brw_RNDD( p, itmp[ 1 ], param1 );
+ brw_RNDD( p, itmp[ 2 ], param2 );
+ brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xBC8F ) ); /* constant used later */
+ brw_MOV( p, itmp[ 5 ], brw_imm_ud( 0xD0BD ) ); /* constant used later */
+ brw_MOV( p, itmp[ 6 ], brw_imm_ud( 0x9B93 ) ); /* constant used later */
+ brw_FRC( p, param0, param0 );
+ brw_FRC( p, param1, param1 );
+ brw_FRC( p, param2, param2 );
+ /* Since we now have only 16 bits of precision in the hash, we must
+ be more careful about thorough mixing to maintain entropy as we
+ squash the input vector into a small scalar. */
+ brw_MUL( p, brw_acc_reg(), itmp[ 4 ], itmp[ 0 ] );
+ brw_MAC( p, brw_acc_reg(), itmp[ 5 ], itmp[ 1 ] );
+ brw_MAC( p, itmp[ 0 ], itmp[ 6 ], itmp[ 2 ] );
+ brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ),
+ brw_imm_uw( 0xBC8F ) );
+
+ /* Temporarily disable the execution mask while we work with ExecSize=16
+ channels (the mask is set for ExecSize=8 and is probably incorrect).
+ Although this might cause execution of unwanted channels, the code
+ writes only to temporary registers and has no side effects, so
+ disabling the mask is harmless. */
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) );
+ brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) );
+ brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) );
+
+ /* We're now ready to perform the hashing. The eight hashes are
+ interleaved for performance. The hash function used is
+ designed to rapidly achieve avalanche and require only 16x16
+ bit multiplication, and 8-bit swizzles (which we get for
+ free). */
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) );
+ for( i = 0; i < 4; i++ )
+ brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),
+ odd_bytes( wtmp[ i ] ) );
+ for( i = 0; i < 4; i++ )
+ brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) );
+ for( i = 0; i < 4; i++ )
+ brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),
+ odd_bytes( wtmp[ i ] ) );
+ brw_pop_insn_state( p );
+
+ /* Now we want to initialise the four rear gradients based on the
+ hashes. Format conversion from signed integer to float leaves
+ everything scaled too high by a factor of pow( 2, 15 ), but
+ we correct for that right at the end. */
+ /* x component */
+ brw_ADD( p, t, param0, brw_imm_f( -1.0 ) );
+ brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) );
+ brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) );
+ brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) );
+ brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) );
+
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) );
+ brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) );
+ brw_pop_insn_state( p );
+
+ brw_MUL( p, x1y0, x1y0, t );
+ brw_MUL( p, x1y1, x1y1, t );
+ brw_ADD( p, t, param1, brw_imm_f( -1.0 ) );
+ brw_MUL( p, x0y0, x0y0, param0 );
+ brw_MUL( p, x0y1, x0y1, param0 );
+
+ /* y component */
+ brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
+ brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
+ brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
+ brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
+
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) );
+ brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) );
+ brw_pop_insn_state( p );
+
+ brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+ brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+ brw_ADD( p, t, param0, brw_imm_f( -1.0 ) );
+ brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 );
+ brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 );
+
+ brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+ brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+
+ /* z component */
+ brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
+ brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
+ brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
+ brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
+
+ brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param2 );
+ brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param2 );
+ brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param2 );
+ brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param2 );
+
+ brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+ brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+
+ /* We interpolate between the gradients using the polynomial
+ 6t^5 - 15t^4 + 10t^3 (Perlin). */
+ brw_MUL( p, xi, param0, brw_imm_f( 6.0 ) );
+ brw_MUL( p, yi, param1, brw_imm_f( 6.0 ) );
+ brw_MUL( p, zi, param2, brw_imm_f( 6.0 ) );
+ brw_ADD( p, xi, xi, brw_imm_f( -15.0 ) );
+ brw_ADD( p, yi, yi, brw_imm_f( -15.0 ) );
+ brw_ADD( p, zi, zi, brw_imm_f( -15.0 ) );
+ brw_MUL( p, xi, xi, param0 );
+ brw_MUL( p, yi, yi, param1 );
+ brw_MUL( p, zi, zi, param2 );
+ brw_ADD( p, xi, xi, brw_imm_f( 10.0 ) );
+ brw_ADD( p, yi, yi, brw_imm_f( 10.0 ) );
+ brw_ADD( p, zi, zi, brw_imm_f( 10.0 ) );
+ brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work */
+ brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work */
+ brw_MUL( p, xi, xi, param0 );
+ brw_MUL( p, yi, yi, param1 );
+ brw_MUL( p, zi, zi, param2 );
+ brw_MUL( p, xi, xi, param0 );
+ brw_MUL( p, yi, yi, param1 );
+ brw_MUL( p, zi, zi, param2 );
+ brw_MUL( p, xi, xi, param0 );
+ brw_MUL( p, yi, yi, param1 );
+ brw_MUL( p, zi, zi, param2 );
+
+ /* Here we interpolate in the y dimension... */
+ brw_MUL( p, x0y1, x0y1, yi );
+ brw_MUL( p, x1y1, x1y1, yi );
+ brw_ADD( p, x0y0, x0y0, x0y1 );
+ brw_ADD( p, x1y0, x1y0, x1y1 );
+
+ /* And now in x. Leave the result in tmp[ 0 ] (see below)... */
+ brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
+ brw_MUL( p, x1y0, x1y0, xi );
+ brw_ADD( p, tmp[ 0 ], x0y0, x1y0 );
+
+ /* Now do the same thing for the front four gradients... */
+ /* x component */
+ brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) );
+ brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) );
+ brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) );
+ brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) );
+
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) );
+ brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) );
+ brw_pop_insn_state( p );
+
+ brw_MUL( p, x1y0, x1y0, t );
+ brw_MUL( p, x1y1, x1y1, t );
+ brw_ADD( p, t, param1, brw_imm_f( -1.0 ) );
+ brw_MUL( p, x0y0, x0y0, param0 );
+ brw_MUL( p, x0y1, x0y1, param0 );
+
+ /* y component */
+ brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
+ brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
+ brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
+ brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
+
+ brw_push_insn_state( p );
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) );
+ brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) );
+ brw_pop_insn_state( p );
+
+ brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+ brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+ brw_ADD( p, t, param2, brw_imm_f( -1.0 ) );
+ brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 );
+ brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 );
+
+ brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+ brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+
+ /* z component */
+ brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
+ brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
+ brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
+ brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
+
+ brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );
+ brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+ brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );
+ brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+
+ brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+ brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+ brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+ brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+
+ /* The interpolation coefficients are still around from last time, so
+ again interpolate in the y dimension... */
+ brw_ADD( p, x0y1, x0y1, negate( x0y0 ) );
+ brw_ADD( p, x1y1, x1y1, negate( x1y0 ) );
+ brw_MUL( p, x0y1, x0y1, yi );
+ brw_MUL( p, x1y1, x1y1, yi );
+ brw_ADD( p, x0y0, x0y0, x0y1 );
+ brw_ADD( p, x1y0, x1y0, x1y1 );
+
+ /* And now in x. The rear face is in tmp[ 0 ] (see above), so this
+ time put the front face in tmp[ 1 ] and we're nearly there... */
+ brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
+ brw_MUL( p, x1y0, x1y0, xi );
+ brw_ADD( p, tmp[ 1 ], x0y0, x1y0 );
+
+ /* The final interpolation, in the z dimension: */
+ brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) );
+ brw_MUL( p, tmp[ 1 ], tmp[ 1 ], zi );
+ brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] );
+
+ /* scale by pow( 2, -15 ), as described above */
+ brw_MUL( p, param0, tmp[ 0 ], brw_imm_f( 0.000030517578125 ) );
+
+ release_tmps( c, mark );
+}
+
+static void emit_noise3( struct brw_wm_compile *c,
+ struct prog_instruction *inst )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg src0, src1, src2, param0, param1, param2, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+ int mark = mark_tmps( c );
+
+ assert( mark == 0 );
+
+ src0 = get_src_reg( c, inst->SrcReg, 0, 1 );
+ src1 = get_src_reg( c, inst->SrcReg, 1, 1 );
+ src2 = get_src_reg( c, inst->SrcReg, 2, 1 );
+
+ param0 = alloc_tmp( c );
+ param1 = alloc_tmp( c );
+ param2 = alloc_tmp( c );
+
+ brw_MOV( p, param0, src0 );
+ brw_MOV( p, param1, src1 );
+ brw_MOV( p, param2, src2 );
+
+ invoke_subroutine( c, SUB_NOISE3, noise3_sub );
+
+ /* Fill in the result: */
+ brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
+ for (i = 0 ; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i, 1);
+ brw_MOV( p, dst, param0 );
+ }
+ }
+ if( inst->SaturateMode == SATURATE_ZERO_ONE )
+ brw_set_saturate( p, 0 );
+
+ release_tmps( c, mark );
+}
+
static void emit_wpos_xy(struct brw_wm_compile *c,
struct prog_instruction *inst)
{
@@ -1201,8 +1912,8 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
case OPCODE_LRP:
emit_lrp(c, inst);
break;
- case OPCODE_INT:
- emit_int(c, inst);
+ case OPCODE_TRUNC:
+ emit_trunc(c, inst);
break;
case OPCODE_MOV:
emit_mov(c, inst);
@@ -1276,6 +1987,17 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
case OPCODE_MAD:
emit_mad(c, inst);
break;
+ case OPCODE_NOISE1:
+ emit_noise1(c, inst);
+ break;
+ case OPCODE_NOISE2:
+ emit_noise2(c, inst);
+ break;
+ case OPCODE_NOISE3:
+ emit_noise3(c, inst);
+ break;
+ /* case OPCODE_NOISE4: */
+ /* not yet implemented */
case OPCODE_TEX:
emit_tex(c, inst);
break;
@@ -1368,7 +2090,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
{
brw_wm_pass_fp(c);
- c->tmp_index = 127;
brw_wm_emit_glsl(brw, c);
c->prog_data.total_grf = c->reg_index;
c->prog_data.total_scratch = 0;
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index 7dbc646370d..c9b88b0ae1f 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -30,6 +30,7 @@
#include "intel_decode.h"
#include "intel_reg.h"
#include "intel_bufmgr.h"
+#include "intel_buffers.h"
/* Relocations in kernel space:
* - pass dma buffer seperately
@@ -133,6 +134,9 @@ do_flush_locked(struct intel_batchbuffer *batch,
{
struct intel_context *intel = batch->intel;
int ret = 0;
+ unsigned int num_cliprects = 0;
+ struct drm_clip_rect *cliprects = NULL;
+ int x_off = 0, y_off = 0;
if (batch->buffer)
dri_bo_subdata (batch->buf, 0, used, batch->buffer);
@@ -142,23 +146,21 @@ do_flush_locked(struct intel_batchbuffer *batch,
batch->map = NULL;
batch->ptr = NULL;
- /* Throw away non-effective packets. Won't work once we have
- * hardware contexts which would preserve statechanges beyond a
- * single buffer.
- */
- if (!(intel->numClipRects == 0 &&
- batch->cliprect_mode == LOOP_CLIPRECTS) || intel->no_hw) {
- dri_bo_exec(batch->buf, used,
- intel->pClipRects,
- batch->cliprect_mode != LOOP_CLIPRECTS ?
- 0 : intel->numClipRects,
- (((GLuint) intel->drawX) & 0xffff) |
- (((GLuint) intel->drawY) << 16));
+ if (batch->cliprect_mode == LOOP_CLIPRECTS) {
+ intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off);
+ }
+ /* Dispatch the batchbuffer, if it has some effect (nonzero cliprects).
+ * Can't short-circuit like this once we have hardware contexts, but we
+ * should always be in DRI2 mode by then anyway.
+ */
+ if ((batch->cliprect_mode != LOOP_CLIPRECTS ||
+ num_cliprects != 0) && !intel->no_hw) {
+ dri_bo_exec(batch->buf, used, cliprects, num_cliprects,
+ (x_off & 0xffff) | (y_off << 16));
}
- if (intel->numClipRects == 0 &&
- batch->cliprect_mode == LOOP_CLIPRECTS) {
+ if (batch->cliprect_mode == LOOP_CLIPRECTS && num_cliprects == 0) {
if (allow_unlock) {
/* If we are not doing any actual user-visible rendering,
* do a sched_yield to keep the app from pegging the cpu while
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
index 1f8096b32ec..8129996979f 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
@@ -19,6 +19,9 @@ enum cliprect_mode {
/**
* Batchbuffer contents require looping over per cliprect at batch submit
* time.
+ *
+ * This will be upgraded to NO_LOOP_CLIPRECTS when there's a single
+ * constant cliprect, as in DRI2 or FBO rendering.
*/
LOOP_CLIPRECTS,
/**
@@ -29,8 +32,10 @@ enum cliprect_mode {
/**
* Batchbuffer contents contain drawing that already handles cliprects, such
* as 2D drawing to front/back/depth that doesn't respect DRAWING_RECTANGLE.
+ *
* Equivalent behavior to NO_LOOP_CLIPRECTS, but may not persist in batch
- * outside of LOCK/UNLOCK.
+ * outside of LOCK/UNLOCK. This is upgraded to just NO_LOOP_CLIPRECTS when
+ * there's a constant cliprect, as in DRI2 or FBO rendering.
*/
REFERENCES_CLIPRECTS
};
@@ -115,6 +120,11 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
if (intel_batchbuffer_space(batch) < sz)
intel_batchbuffer_flush(batch);
+ if ((cliprect_mode == LOOP_CLIPRECTS ||
+ cliprect_mode == REFERENCES_CLIPRECTS) &&
+ batch->intel->constant_cliprect)
+ cliprect_mode = NO_LOOP_CLIPRECTS;
+
if (cliprect_mode != IGNORE_CLIPRECTS) {
if (batch->cliprect_mode == IGNORE_CLIPRECTS) {
batch->cliprect_mode = cliprect_mode;
diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c
index 2917401e023..e1046f4a5de 100644
--- a/src/mesa/drivers/dri/intel/intel_blit.c
+++ b/src/mesa/drivers/dri/intel/intel_blit.c
@@ -272,24 +272,53 @@ intelEmitCopyBlit(struct intel_context *intel,
GLshort w, GLshort h,
GLenum logic_op)
{
- GLuint CMD, BR13;
+ GLuint CMD, BR13, pass = 0;
int dst_y2 = dst_y + h;
int dst_x2 = dst_x + w;
dri_bo *aper_array[3];
BATCH_LOCALS;
/* do space/cliprects check before going any further */
- intel_batchbuffer_require_space(intel->batch, 8 * 4, NO_LOOP_CLIPRECTS);
- again:
- aper_array[0] = intel->batch->buf;
- aper_array[1] = dst_buffer;
- aper_array[2] = src_buffer;
-
- if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) {
- intel_batchbuffer_flush(intel->batch);
- goto again;
+ do {
+ aper_array[0] = intel->batch->buf;
+ aper_array[1] = dst_buffer;
+ aper_array[2] = src_buffer;
+
+ if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) {
+ intel_batchbuffer_flush(intel->batch);
+ pass++;
+ } else
+ break;
+ } while (pass < 2);
+
+ if (pass >= 2) {
+ GLboolean locked = GL_FALSE;
+ if (!intel->locked) {
+ LOCK_HARDWARE(intel);
+ locked = GL_TRUE;
+ }
+
+ dri_bo_map(dst_buffer, GL_TRUE);
+ dri_bo_map(src_buffer, GL_FALSE);
+ _mesa_copy_rect((GLubyte *)dst_buffer->virtual + dst_offset,
+ cpp,
+ dst_pitch,
+ dst_x, dst_y,
+ w, h,
+ (GLubyte *)src_buffer->virtual + src_offset,
+ src_pitch,
+ src_x, src_y);
+
+ dri_bo_unmap(src_buffer);
+ dri_bo_unmap(dst_buffer);
+
+ if (locked)
+ UNLOCK_HARDWARE(intel);
+
+ return;
}
+ intel_batchbuffer_require_space(intel->batch, 8 * 4, NO_LOOP_CLIPRECTS);
DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
__FUNCTION__,
src_buffer, src_pitch, src_offset, src_x, src_y,
@@ -393,6 +422,9 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
struct gl_framebuffer *fb = ctx->DrawBuffer;
GLuint clear_depth;
GLbitfield skipBuffers = 0;
+ unsigned int num_cliprects;
+ struct drm_clip_rect *cliprects;
+ int x_off, y_off;
BATCH_LOCALS;
/*
@@ -417,7 +449,8 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
intelFlush(&intel->ctx);
LOCK_HARDWARE(intel);
- if (intel->numClipRects) {
+ intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off);
+ if (num_cliprects) {
GLint cx, cy, cw, ch;
drm_clip_rect_t clear;
int i;
@@ -432,15 +465,15 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
/* clearing a window */
/* flip top to bottom */
- clear.x1 = cx + intel->drawX;
+ clear.x1 = cx + x_off;
clear.y1 = intel->driDrawable->y + intel->driDrawable->h - cy - ch;
clear.x2 = clear.x1 + cw;
clear.y2 = clear.y1 + ch;
}
else {
/* clearing FBO */
- assert(intel->numClipRects == 1);
- assert(intel->pClipRects == &intel->fboRect);
+ assert(num_cliprects == 1);
+ assert(cliprects == &intel->fboRect);
clear.x1 = cx;
clear.y1 = cy;
clear.x2 = clear.x1 + cw;
@@ -448,8 +481,8 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
/* no change to mask */
}
- for (i = 0; i < intel->numClipRects; i++) {
- const drm_clip_rect_t *box = &intel->pClipRects[i];
+ for (i = 0; i < num_cliprects; i++) {
+ const drm_clip_rect_t *box = &cliprects[i];
drm_clip_rect_t b;
GLuint buf;
GLuint clearMask = mask; /* use copy, since we modify it below */
diff --git a/src/mesa/drivers/dri/intel/intel_buffers.c b/src/mesa/drivers/dri/intel/intel_buffers.c
index f5eaf765f38..f8f009c6a30 100644
--- a/src/mesa/drivers/dri/intel/intel_buffers.c
+++ b/src/mesa/drivers/dri/intel/intel_buffers.c
@@ -123,99 +123,40 @@ intel_readbuf_region(struct intel_context *intel)
return NULL;
}
-
-
-/**
- * Update the following fields for rendering to a user-created FBO:
- * intel->numClipRects
- * intel->pClipRects
- * intel->drawX
- * intel->drawY
- */
-static void
-intelSetRenderbufferClipRects(struct intel_context *intel)
-{
- /* If the batch contents require looping over cliprects, flush them before
- * we go changing which cliprects get referenced when that happens.
- */
- if (intel->batch->cliprect_mode == LOOP_CLIPRECTS &&
- (intel->fboRect.x2 != intel->ctx.DrawBuffer->Width ||
- intel->fboRect.x2 != intel->ctx.DrawBuffer->Height))
- intel_batchbuffer_flush(intel->batch);
-
- assert(intel->ctx.DrawBuffer->Width > 0);
- assert(intel->ctx.DrawBuffer->Height > 0);
- intel->fboRect.x1 = 0;
- intel->fboRect.y1 = 0;
- intel->fboRect.x2 = intel->ctx.DrawBuffer->Width;
- intel->fboRect.y2 = intel->ctx.DrawBuffer->Height;
- intel->numClipRects = 1;
- intel->pClipRects = &intel->fboRect;
- intel->drawX = 0;
- intel->drawY = 0;
-}
-
-
-/**
- * As above, but for rendering to front buffer of a window.
- * \sa intelSetRenderbufferClipRects
- */
-static void
-intelSetFrontClipRects(struct intel_context *intel)
-{
- __DRIdrawablePrivate *dPriv = intel->driDrawable;
-
- if (!dPriv)
- return;
-
- /* If the batch contents require looping over cliprects, flush them before
- * we go changing which cliprects get referenced when that happens.
- */
- if (intel->batch->cliprect_mode == LOOP_CLIPRECTS &&
- intel->pClipRects != dPriv->pClipRects)
- intel_batchbuffer_flush(intel->batch);
- intel->numClipRects = dPriv->numClipRects;
- intel->pClipRects = dPriv->pClipRects;
- intel->drawX = dPriv->x;
- intel->drawY = dPriv->y;
-}
-
-
-/**
- * As above, but for rendering to back buffer of a window.
- */
-static void
-intelSetBackClipRects(struct intel_context *intel)
+void
+intel_get_cliprects(struct intel_context *intel,
+ struct drm_clip_rect **cliprects,
+ unsigned int *num_cliprects,
+ int *x_off, int *y_off)
{
__DRIdrawablePrivate *dPriv = intel->driDrawable;
- struct intel_framebuffer *intel_fb;
-
- if (!dPriv)
- return;
-
- intel_fb = dPriv->driverPrivate;
+ struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
- if (intel_fb->pf_active || dPriv->numBackClipRects == 0) {
+ if (intel->constant_cliprect) {
+ /* FBO or DRI2 rendering, which can just use the fb's size. */
+ intel->fboRect.x1 = 0;
+ intel->fboRect.y1 = 0;
+ intel->fboRect.x2 = intel->ctx.DrawBuffer->Width;
+ intel->fboRect.y2 = intel->ctx.DrawBuffer->Height;
+
+ *cliprects = &intel->fboRect;
+ *num_cliprects = 1;
+ *x_off = 0;
+ *y_off = 0;
+ } else if (intel->front_cliprects ||
+ intel_fb->pf_active || dPriv->numBackClipRects == 0) {
/* use the front clip rects */
- if (intel->batch->cliprect_mode == LOOP_CLIPRECTS &&
- intel->pClipRects != dPriv->pClipRects)
- intel_batchbuffer_flush(intel->batch);
-
- intel->numClipRects = dPriv->numClipRects;
- intel->pClipRects = dPriv->pClipRects;
- intel->drawX = dPriv->x;
- intel->drawY = dPriv->y;
+ *cliprects = dPriv->pClipRects;
+ *num_cliprects = dPriv->numClipRects;
+ *x_off = dPriv->x;
+ *y_off = dPriv->y;
}
else {
/* use the back clip rects */
- if (intel->batch->cliprect_mode == LOOP_CLIPRECTS &&
- intel->pClipRects != dPriv->pBackClipRects)
- intel_batchbuffer_flush(intel->batch);
-
- intel->numClipRects = dPriv->numBackClipRects;
- intel->pClipRects = dPriv->pBackClipRects;
- intel->drawX = dPriv->backX;
- intel->drawY = dPriv->backY;
+ *num_cliprects = dPriv->numBackClipRects;
+ *cliprects = dPriv->pBackClipRects;
+ *x_off = dPriv->backX;
+ *y_off = dPriv->backY;
}
}
@@ -300,29 +241,6 @@ intelWindowMoved(struct intel_context *intel)
__DRIdrawablePrivate *dPriv = intel->driDrawable;
struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
- if (!intel->ctx.DrawBuffer) {
- /* when would this happen? -BP */
- intelSetFrontClipRects(intel);
- }
- else if (intel->ctx.DrawBuffer->Name != 0) {
- /* drawing to user-created FBO - do nothing */
- /* Cliprects would be set from intelDrawBuffer() */
- }
- else {
- /* drawing to a window */
- switch (intel_fb->Base._ColorDrawBufferIndexes[0]) {
- case BUFFER_FRONT_LEFT:
- intelSetFrontClipRects(intel);
- break;
- case BUFFER_BACK_LEFT:
- intelSetBackClipRects(intel);
- break;
- default:
- intelSetFrontClipRects(intel);
- }
-
- }
-
if (!intel->intelScreen->driScrnPriv->dri2.enabled &&
intel->intelScreen->driScrnPriv->ddx_version.minor >= 7) {
volatile struct drm_i915_sarea *sarea = intel->sarea;
@@ -894,7 +812,6 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
struct intel_context *intel = intel_context(ctx);
struct intel_region *colorRegions[MAX_DRAW_BUFFERS], *depthRegion = NULL;
struct intel_renderbuffer *irbDepth = NULL, *irbStencil = NULL;
- int front = 0; /* drawing to front color buffer? */
if (!fb) {
/* this can happen during the initial context initialization */
@@ -927,52 +844,44 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
*/
if (fb->_NumColorDrawBuffers == 0) {
/* writing to 0 */
- FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_TRUE);
colorRegions[0] = NULL;
-
- if (fb->Name != 0)
- intelSetRenderbufferClipRects(intel);
+ intel->constant_cliprect = GL_TRUE;
} else if (fb->_NumColorDrawBuffers > 1) {
int i;
struct intel_renderbuffer *irb;
- FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE);
- if (fb->Name != 0)
- intelSetRenderbufferClipRects(intel);
for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
irb = intel_renderbuffer(fb->_ColorDrawBuffers[i]);
- colorRegions[i] = (irb && irb->region) ? irb->region : NULL;
+ colorRegions[i] = irb ? irb->region : NULL;
}
+ intel->constant_cliprect = GL_TRUE;
}
else {
- /* draw to exactly one color buffer */
- /*_mesa_debug(ctx, "Hardware rendering\n");*/
- FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE);
- if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
- front = 1;
- }
-
- /*
- * Get the intel_renderbuffer for the colorbuffer we're drawing into.
- * And set up cliprects.
+ /* Get the intel_renderbuffer for the single colorbuffer we're drawing
+ * into, and set up cliprects if it's .
*/
if (fb->Name == 0) {
+ intel->constant_cliprect = intel->driScreen->dri2.enabled;
/* drawing to window system buffer */
- if (front) {
- intelSetFrontClipRects(intel);
+ if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
+ if (!intel->constant_cliprect && !intel->front_cliprects)
+ intel_batchbuffer_flush(intel->batch);
+ intel->front_cliprects = GL_TRUE;
colorRegions[0] = intel_get_rb_region(fb, BUFFER_FRONT_LEFT);
}
else {
- intelSetBackClipRects(intel);
+ if (!intel->constant_cliprect && intel->front_cliprects)
+ intel_batchbuffer_flush(intel->batch);
+ intel->front_cliprects = GL_FALSE;
colorRegions[0]= intel_get_rb_region(fb, BUFFER_BACK_LEFT);
}
}
else {
/* drawing to user-created FBO */
struct intel_renderbuffer *irb;
- intelSetRenderbufferClipRects(intel);
irb = intel_renderbuffer(fb->_ColorDrawBuffers[0]);
colorRegions[0] = (irb && irb->region) ? irb->region : NULL;
+ intel->constant_cliprect = GL_TRUE;
}
}
diff --git a/src/mesa/drivers/dri/intel/intel_buffers.h b/src/mesa/drivers/dri/intel/intel_buffers.h
index a669a854317..e5afb37dd1a 100644
--- a/src/mesa/drivers/dri/intel/intel_buffers.h
+++ b/src/mesa/drivers/dri/intel/intel_buffers.h
@@ -29,6 +29,8 @@
#ifndef INTEL_BUFFERS_H
#define INTEL_BUFFERS_H
+#include "dri_util.h"
+#include "drm.h"
struct intel_context;
struct intel_framebuffer;
@@ -53,4 +55,9 @@ extern void intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb);
extern void intelInitBufferFuncs(struct dd_function_table *functions);
+void intel_get_cliprects(struct intel_context *intel,
+ struct drm_clip_rect **cliprects,
+ unsigned int *num_cliprects,
+ int *x_off, int *y_off);
+
#endif /* INTEL_BUFFERS_H */
diff --git a/src/mesa/drivers/dri/intel/intel_chipset.h b/src/mesa/drivers/dri/intel/intel_chipset.h
index 170efd060ae..d1b4941601e 100644
--- a/src/mesa/drivers/dri/intel/intel_chipset.h
+++ b/src/mesa/drivers/dri/intel/intel_chipset.h
@@ -68,11 +68,12 @@
devid == PCI_CHIP_I965_GME || \
devid == PCI_CHIP_GM45_GM)
-#define IS_GM45_GM(devid) (devid == PCI_CHIP_GM45_GM)
-#define IS_G4X(devid) (devid == PCI_CHIP_IGD_E_G || \
+#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \
devid == PCI_CHIP_Q45_G || \
devid == PCI_CHIP_G45_G || \
devid == PCI_CHIP_G41_G)
+#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM)
+#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid))
#define IS_915(devid) (devid == PCI_CHIP_I915_G || \
devid == PCI_CHIP_E7221_G || \
@@ -91,7 +92,6 @@
devid == PCI_CHIP_I965_GM || \
devid == PCI_CHIP_I965_GME || \
devid == PCI_CHIP_I946_GZ || \
- IS_GM45_GM(devid) || \
IS_G4X(devid))
#define IS_9XX(devid) (IS_915(devid) || \
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index 2b3a9b9d371..9ac18e69609 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -588,9 +588,6 @@ intelInitContext(struct intel_context *intel,
intel->driFd = sPriv->fd;
intel->driHwLock = sPriv->lock;
- intel->width = intelScreen->width;
- intel->height = intelScreen->height;
-
driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache,
intel->driScreen->myNum,
IS_965(intelScreen->deviceID) ? "i965" : "i915");
@@ -932,38 +929,6 @@ intelContendedLock(struct intel_context *intel, GLuint flags)
sarea->ctxOwner, intel->hHWContext);
}
- if (sarea->width != intel->width || sarea->height != intel->height) {
- int numClipRects = intel->numClipRects;
-
- /*
- * FIXME: Really only need to do this when drawing to a
- * common back- or front buffer.
- */
-
- /*
- * This will essentially drop the outstanding batchbuffer on
- * the floor.
- */
- intel->numClipRects = 0;
-
- if (intel->Fallback)
- _swrast_flush(&intel->ctx);
-
- if (!IS_965(intel->intelScreen->deviceID))
- INTEL_FIREVERTICES(intel);
-
- if (intel->batch->map != intel->batch->ptr)
- intel_batchbuffer_flush(intel->batch);
-
- intel->numClipRects = numClipRects;
-
- /* force window update */
- intel->lastStamp = 0;
-
- intel->width = sarea->width;
- intel->height = sarea->height;
- }
-
/* Drawable changed?
*/
if (dPriv && intel->lastStamp != dPriv->lastStamp) {
diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
index 554159ac441..3938af4c72c 100644
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -235,10 +235,18 @@ struct intel_context
/* These refer to the current drawing buffer:
*/
- int drawX, drawY; /**< origin of drawing area within region */
- GLuint numClipRects; /**< cliprects for drawing */
- drm_clip_rect_t *pClipRects;
struct gl_texture_object *frame_buffer_texobj;
+ /**
+ * Set to true if a single constant cliprect should be used in the
+ * batchbuffer. Otherwise, cliprects must be calculated at batchbuffer
+ * flush time while the lock is held.
+ */
+ GLboolean constant_cliprect;
+ /**
+ * In !constant_cliprect mode, set to true if the front cliprects should be
+ * used instead of back.
+ */
+ GLboolean front_cliprects;
drm_clip_rect_t fboRect; /**< cliprect for FBO rendering */
int perf_boxes;
@@ -271,10 +279,6 @@ struct intel_context
*/
driOptionCache optionCache;
- /* Last seen width/height of the screen */
- int width;
- int height;
-
int64_t swap_ust;
int64_t swap_missed_ust;
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
index 89635198931..0565197ea08 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -191,11 +191,7 @@ do_blit_bitmap( GLcontext *ctx,
color8888 = INTEL_PACKCOLOR8888(ubcolor[0], ubcolor[1], ubcolor[2], ubcolor[3]);
color565 = INTEL_PACKCOLOR565(ubcolor[0], ubcolor[1], ubcolor[2]);
- /* Does zoom apply to bitmaps?
- */
- if (!intel_check_blit_fragment_ops(ctx, tmpColor[3] == 1.0F) ||
- ctx->Pixel.ZoomX != 1.0F ||
- ctx->Pixel.ZoomY != 1.0F)
+ if (!intel_check_blit_fragment_ops(ctx, tmpColor[3] == 1.0F))
return GL_FALSE;
LOCK_HARDWARE(intel);
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c
index 50518a68792..8ebbc95a1d0 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_draw.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c
@@ -75,19 +75,21 @@ intel_texture_drawpixels(GLcontext * ctx,
/* We're going to mess with texturing with no regard to existing texture
* state, so if there is some set up we have to bail.
*/
- if (ctx->Texture._EnabledUnits != 0)
+ if (ctx->Texture._EnabledUnits != 0) {
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "glDrawPixels() fallback: texturing enabled\n");
return GL_FALSE;
+ }
/* Can't do textured DrawPixels with a fragment program, unless we were
* to generate a new program that sampled our texture and put the results
* in the fragment color before the user's program started.
*/
- if (ctx->FragmentProgram.Enabled)
- return GL_FALSE;
-
- /* Don't even want to think about it */
- if (format == GL_COLOR_INDEX)
+ if (ctx->FragmentProgram.Enabled) {
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "glDrawPixels() fallback: fragment program enabled\n");
return GL_FALSE;
+ }
/* We don't have a way to generate fragments with stencil values which *
* will set the resulting stencil value.
@@ -108,8 +110,12 @@ intel_texture_drawpixels(GLcontext * ctx,
* the color buffer, and sample the texture values into the fragment depth
* in a program.
*/
- if (format == GL_DEPTH_COMPONENT)
+ if (format == GL_DEPTH_COMPONENT) {
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr,
+ "glDrawPixels() fallback: format == GL_DEPTH_COMPONENT\n");
return GL_FALSE;
+ }
_mesa_PushAttrib(GL_ENABLE_BIT | GL_TRANSFORM_BIT | GL_TEXTURE_BIT |
GL_CURRENT_BIT);
@@ -141,22 +147,27 @@ intel_texture_drawpixels(GLcontext * ctx,
_mesa_PushMatrix();
_mesa_LoadIdentity();
+ /* Create the vertex buffer based on the current raster pos. The x and y
+ * we're handed are ctx->Current.RasterPos[0,1] rounded to integers.
+ * We also apply the depth. However, the W component is already multiplied
+ * into ctx->Current.RasterPos[0,1,2] and we can ignore it at this point.
+ */
vertices[0][0] = x;
vertices[0][1] = y;
vertices[0][2] = ctx->Current.RasterPos[2];
- vertices[0][3] = ctx->Current.RasterPos[3];
+ vertices[0][3] = 1.0;
vertices[1][0] = x + width * ctx->Pixel.ZoomX;
vertices[1][1] = y;
vertices[1][2] = ctx->Current.RasterPos[2];
- vertices[1][3] = ctx->Current.RasterPos[3];
+ vertices[1][3] = 1.0;
vertices[2][0] = x + width * ctx->Pixel.ZoomX;
vertices[2][1] = y + height * ctx->Pixel.ZoomY;
vertices[2][2] = ctx->Current.RasterPos[2];
- vertices[2][3] = ctx->Current.RasterPos[3];
+ vertices[2][3] = 1.0;
vertices[3][0] = x;
vertices[3][1] = y + height * ctx->Pixel.ZoomY;
vertices[3][2] = ctx->Current.RasterPos[2];
- vertices[3][3] = ctx->Current.RasterPos[3];
+ vertices[3][3] = 1.0;
texcoords[0][0] = 0.0;
texcoords[0][1] = 0.0;
@@ -212,8 +223,12 @@ intel_stencil_drawpixels(GLcontext * ctx,
return GL_TRUE;
/* Can't do a per-bit writemask while treating stencil as rgba data. */
- if ((ctx->Stencil.WriteMask[0] & 0xff) != 0xff)
+ if ((ctx->Stencil.WriteMask[0] & 0xff) != 0xff) {
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "glDrawPixels(STENCIL_INDEX) fallback: "
+ "stencil mask enabled\n");
return GL_FALSE;
+ }
/* We use FBOs for our wrapping of the depthbuffer into a color
* destination.
@@ -224,21 +239,29 @@ intel_stencil_drawpixels(GLcontext * ctx,
/* We're going to mess with texturing with no regard to existing texture
* state, so if there is some set up we have to bail.
*/
- if (ctx->Texture._EnabledUnits != 0)
+ if (ctx->Texture._EnabledUnits != 0) {
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "glDrawPixels(STENCIL_INDEX) fallback: "
+ "texturing enabled\n");
return GL_FALSE;
+ }
/* Can't do textured DrawPixels with a fragment program, unless we were
* to generate a new program that sampled our texture and put the results
* in the fragment color before the user's program started.
*/
- if (ctx->FragmentProgram.Enabled)
+ if (ctx->FragmentProgram.Enabled) {
+ if (INTEL_DEBUG & DEBUG_FALLBACKS)
+ fprintf(stderr, "glDrawPixels(STENCIL_INDEX) fallback: "
+ "fragment program enabled\n");
return GL_FALSE;
+ }
/* Check that we can load in a texture this big. */
if (width > (1 << (ctx->Const.MaxTextureLevels - 1)) ||
height > (1 << (ctx->Const.MaxTextureLevels - 1))) {
if (INTEL_DEBUG & DEBUG_FALLBACKS)
- fprintf(stderr, "glDrawPixels(STENCIL_IDNEX) fallback: "
+ fprintf(stderr, "glDrawPixels(STENCIL_INDEX) fallback: "
"bitmap too large (%dx%d)\n",
width, height);
return GL_FALSE;
diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h
index c21f4080935..81a7386e429 100644
--- a/src/mesa/drivers/dri/intel/intel_reg.h
+++ b/src/mesa/drivers/dri/intel/intel_reg.h
@@ -29,6 +29,8 @@
#define CMD_2D (0x2 << 29)
#define CMD_3D (0x3 << 29)
+#define MI_NOOP (CMD_MI | 0)
+
#define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23)
#define MI_FLUSH (CMD_MI | (4 << 23))
@@ -44,6 +46,9 @@
#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 (CMD_3D | (0x1d<<24) | (0x04<<16))
#define I1_LOAD_S(n) (1<<(4+n))
+#define _3DSTATE_DRAWRECT_INFO (CMD_3D | (0x1d<<24) | (0x80<<16) | 0x3)
+#define _3DSTATE_DRAWRECT_INFO_I965 (CMD_3D | (3 << 27) | (1 << 24) | 0x2)
+
/** @{
*
* PIPE_CONTROL operation, a combination MI_FLUSH and register write with
diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c
index 45faf64c713..8dbcc3050ee 100644
--- a/src/mesa/drivers/dri/intel/intel_regions.c
+++ b/src/mesa/drivers/dri/intel/intel_regions.c
@@ -79,30 +79,6 @@ intel_region_unmap(struct intel_context *intel, struct intel_region *region)
}
}
-static int
-intel_set_region_tiling_gem(struct intel_context *intel,
- struct intel_region *region,
- uint32_t bo_handle)
-{
- struct drm_i915_gem_get_tiling get_tiling;
- int ret;
-
- memset(&get_tiling, 0, sizeof(get_tiling));
-
- get_tiling.handle = bo_handle;
- ret = ioctl(intel->driFd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling);
- if (ret != 0) {
- fprintf(stderr, "Failed to get tiling state for region: %s\n",
- strerror(errno));
- return ret;
- }
-
- region->tiling = get_tiling.tiling_mode;
- region->bit_6_swizzle = get_tiling.swizzle_mode;
-
- return 0;
-}
-
static struct intel_region *
intel_region_alloc_internal(struct intel_context *intel,
GLuint cpp,
@@ -151,6 +127,7 @@ intel_region_alloc_for_handle(struct intel_context *intel,
{
struct intel_region *region;
dri_bo *buffer;
+ int ret;
buffer = intel_bo_gem_create_from_name(intel->bufmgr, name, handle);
@@ -159,7 +136,14 @@ intel_region_alloc_for_handle(struct intel_context *intel,
if (region == NULL)
return region;
- intel_set_region_tiling_gem(intel, region, handle);
+ ret = dri_bo_get_tiling(region->buffer, &region->tiling,
+ &region->bit_6_swizzle);
+ if (ret != 0) {
+ fprintf(stderr, "Couldn't get tiling of buffer %d (%s): %s\n",
+ handle, name, strerror(-ret));
+ intel_region_release(&region);
+ return NULL;
+ }
return region;
}
@@ -489,7 +473,14 @@ intel_recreate_static(struct intel_context *intel,
name,
region_desc->bo_handle);
- intel_set_region_tiling_gem(intel, region, region_desc->bo_handle);
+ ret = dri_bo_get_tiling(region->buffer, &region->tiling,
+ &region->bit_6_swizzle);
+ if (ret != 0) {
+ fprintf(stderr, "Couldn't get tiling of buffer %d (%s): %s\n",
+ region_desc->bo_handle, name, strerror(-ret));
+ intel_region_release(&region);
+ return NULL;
+ }
} else {
if (region->classic_map != NULL) {
drmUnmap(region->classic_map,
diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c
index 8e2b4456f81..8f4e681ffea 100644
--- a/src/mesa/drivers/dri/intel/intel_span.c
+++ b/src/mesa/drivers/dri/intel/intel_span.c
@@ -30,6 +30,7 @@
#include "main/mtypes.h"
#include "main/colormac.h"
+#include "intel_buffers.h"
#include "intel_fbo.h"
#include "intel_screen.h"
#include "intel_span.h"
@@ -131,12 +132,8 @@ pwrite_8(struct intel_renderbuffer *irb, uint32_t offset, uint8_t val)
}
static uint32_t no_tile_swizzle(struct intel_renderbuffer *irb,
- struct intel_context *intel,
int x, int y)
{
- x += intel->drawX;
- y += intel->drawY;
-
return (y * irb->region->pitch + x) * irb->region->cpp;
}
@@ -145,7 +142,6 @@ static uint32_t no_tile_swizzle(struct intel_renderbuffer *irb,
*/
static uint32_t x_tile_swizzle(struct intel_renderbuffer *irb,
- struct intel_context *intel,
int x, int y)
{
int tile_stride;
@@ -155,9 +151,6 @@ static uint32_t x_tile_swizzle(struct intel_renderbuffer *irb,
int tile_off, tile_base;
tile_stride = (irb->pfPitch * irb->region->cpp) << 3;
-
- x += intel->drawX;
- y += intel->drawY;
xbyte = x * irb->region->cpp;
@@ -204,7 +197,6 @@ static uint32_t x_tile_swizzle(struct intel_renderbuffer *irb,
}
static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
- struct intel_context *intel,
int x, int y)
{
int tile_stride;
@@ -214,9 +206,6 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
int tile_off, tile_base;
tile_stride = (irb->pfPitch * irb->region->cpp) << 5;
-
- x += intel->drawX;
- y += intel->drawY;
xbyte = x * irb->region->cpp;
@@ -268,8 +257,12 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
struct intel_renderbuffer *irb = intel_renderbuffer(rb); \
const GLint yScale = irb->RenderToTexture ? 1 : -1; \
const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1; \
+ unsigned int num_cliprects; \
+ struct drm_clip_rect *cliprects; \
+ int x_off, y_off; \
GLuint p; \
- (void) p;
+ (void) p; \
+ intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off);
/* XXX FBO: this is identical to the macro in spantmp2.h except we get
* the cliprect info from the context, not the driDrawable.
@@ -277,12 +270,12 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
*/
#define HW_CLIPLOOP() \
do { \
- int _nc = intel->numClipRects; \
+ int _nc = num_cliprects; \
while ( _nc-- ) { \
- int minx = intel->pClipRects[_nc].x1 - intel->drawX; \
- int miny = intel->pClipRects[_nc].y1 - intel->drawY; \
- int maxx = intel->pClipRects[_nc].x2 - intel->drawX; \
- int maxy = intel->pClipRects[_nc].y2 - intel->drawY;
+ int minx = cliprects[_nc].x1 - x_off; \
+ int miny = cliprects[_nc].y1 - y_off; \
+ int maxx = cliprects[_nc].x2 - x_off; \
+ int maxy = cliprects[_nc].y2 - y_off;
#if 0
}}
@@ -295,6 +288,11 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
#define HW_UNLOCK()
+/* Convenience macros to avoid typing the swizzle argument over and over */
+#define NO_TILE(_X, _Y) no_tile_swizzle(irb, (_X) + x_off, (_Y) + y_off)
+#define X_TILE(_X, _Y) x_tile_swizzle(irb, (_X) + x_off, (_Y) + y_off)
+#define Y_TILE(_X, _Y) y_tile_swizzle(irb, (_X) + x_off, (_Y) + y_off)
+
/* 16 bit, RGB565 color spanline and pixel functions
*/
#define SPANTMP_PIXEL_FMT GL_RGB
@@ -302,8 +300,8 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
#define TAG(x) intel##x##_RGB565
#define TAG2(x,y) intel##x##_RGB565##y
-#define GET_VALUE(X, Y) pread_16(irb, no_tile_swizzle(irb, intel, X, Y))
-#define PUT_VALUE(X, Y, V) pwrite_16(irb, no_tile_swizzle(irb, intel, X, Y), V)
+#define GET_VALUE(X, Y) pread_16(irb, NO_TILE(X, Y))
+#define PUT_VALUE(X, Y, V) pwrite_16(irb, NO_TILE(X, Y), V)
#include "spantmp2.h"
/* 32 bit, ARGB8888 color spanline and pixel functions
@@ -313,8 +311,8 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
#define TAG(x) intel##x##_ARGB8888
#define TAG2(x,y) intel##x##_ARGB8888##y
-#define GET_VALUE(X, Y) pread_32(irb, no_tile_swizzle(irb, intel, X, Y))
-#define PUT_VALUE(X, Y, V) pwrite_32(irb, no_tile_swizzle(irb, intel, X, Y), V)
+#define GET_VALUE(X, Y) pread_32(irb, NO_TILE(X, Y))
+#define PUT_VALUE(X, Y, V) pwrite_32(irb, NO_TILE(X, Y), V)
#include "spantmp2.h"
/* 32 bit, xRGB8888 color spanline and pixel functions
@@ -324,8 +322,8 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
#define TAG(x) intel##x##_xRGB8888
#define TAG2(x,y) intel##x##_xRGB8888##y
-#define GET_VALUE(X, Y) pread_xrgb8888(irb, no_tile_swizzle(irb, intel, X, Y))
-#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, no_tile_swizzle(irb, intel, X, Y), V)
+#define GET_VALUE(X, Y) pread_xrgb8888(irb, NO_TILE(X, Y))
+#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, NO_TILE(X, Y), V)
#include "spantmp2.h"
/* 16 bit RGB565 color tile spanline and pixel functions
@@ -336,8 +334,8 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
#define TAG(x) intel_XTile_##x##_RGB565
#define TAG2(x,y) intel_XTile_##x##_RGB565##y
-#define GET_VALUE(X, Y) pread_16(irb, x_tile_swizzle(irb, intel, X, Y))
-#define PUT_VALUE(X, Y, V) pwrite_16(irb, x_tile_swizzle(irb, intel, X, Y), V)
+#define GET_VALUE(X, Y) pread_16(irb, X_TILE(X, Y))
+#define PUT_VALUE(X, Y, V) pwrite_16(irb, X_TILE(X, Y), V)
#include "spantmp2.h"
#define SPANTMP_PIXEL_FMT GL_RGB
@@ -345,8 +343,8 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
#define TAG(x) intel_YTile_##x##_RGB565
#define TAG2(x,y) intel_YTile_##x##_RGB565##y
-#define GET_VALUE(X, Y) pread_16(irb, y_tile_swizzle(irb, intel, X, Y))
-#define PUT_VALUE(X, Y, V) pwrite_16(irb, y_tile_swizzle(irb, intel, X, Y), V)
+#define GET_VALUE(X, Y) pread_16(irb, Y_TILE(X, Y))
+#define PUT_VALUE(X, Y, V) pwrite_16(irb, Y_TILE(X, Y), V)
#include "spantmp2.h"
/* 32 bit ARGB888 color tile spanline and pixel functions
@@ -357,8 +355,8 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
#define TAG(x) intel_XTile_##x##_ARGB8888
#define TAG2(x,y) intel_XTile_##x##_ARGB8888##y
-#define GET_VALUE(X, Y) pread_32(irb, x_tile_swizzle(irb, intel, X, Y))
-#define PUT_VALUE(X, Y, V) pwrite_32(irb, x_tile_swizzle(irb, intel, X, Y), V)
+#define GET_VALUE(X, Y) pread_32(irb, X_TILE(X, Y))
+#define PUT_VALUE(X, Y, V) pwrite_32(irb, X_TILE(X, Y), V)
#include "spantmp2.h"
#define SPANTMP_PIXEL_FMT GL_BGRA
@@ -366,8 +364,8 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
#define TAG(x) intel_YTile_##x##_ARGB8888
#define TAG2(x,y) intel_YTile_##x##_ARGB8888##y
-#define GET_VALUE(X, Y) pread_32(irb, y_tile_swizzle(irb, intel, X, Y))
-#define PUT_VALUE(X, Y, V) pwrite_32(irb, y_tile_swizzle(irb, intel, X, Y), V)
+#define GET_VALUE(X, Y) pread_32(irb, Y_TILE(X, Y))
+#define PUT_VALUE(X, Y, V) pwrite_32(irb, Y_TILE(X, Y), V)
#include "spantmp2.h"
/* 32 bit xRGB888 color tile spanline and pixel functions
@@ -378,8 +376,8 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
#define TAG(x) intel_XTile_##x##_xRGB8888
#define TAG2(x,y) intel_XTile_##x##_xRGB8888##y
-#define GET_VALUE(X, Y) pread_xrgb8888(irb, x_tile_swizzle(irb, intel, X, Y))
-#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, x_tile_swizzle(irb, intel, X, Y), V)
+#define GET_VALUE(X, Y) pread_xrgb8888(irb, X_TILE(X, Y))
+#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, X_TILE(X, Y), V)
#include "spantmp2.h"
#define SPANTMP_PIXEL_FMT GL_BGRA
@@ -387,15 +385,19 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
#define TAG(x) intel_YTile_##x##_xRGB8888
#define TAG2(x,y) intel_YTile_##x##_xRGB8888##y
-#define GET_VALUE(X, Y) pread_xrgb8888(irb, y_tile_swizzle(irb, intel, X, Y))
-#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, y_tile_swizzle(irb, intel, X, Y), V)
+#define GET_VALUE(X, Y) pread_xrgb8888(irb, Y_TILE(X, Y))
+#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, Y_TILE(X, Y), V)
#include "spantmp2.h"
#define LOCAL_DEPTH_VARS \
struct intel_context *intel = intel_context(ctx); \
struct intel_renderbuffer *irb = intel_renderbuffer(rb); \
const GLint yScale = irb->RenderToTexture ? 1 : -1; \
- const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1;
+ const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1; \
+ unsigned int num_cliprects; \
+ struct drm_clip_rect *cliprects; \
+ int x_off, y_off; \
+ intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off);
#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
@@ -404,10 +406,8 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
** 16-bit depthbuffer functions.
**/
#define VALUE_TYPE GLushort
-#define WRITE_DEPTH(_x, _y, d) \
- pwrite_16(irb, no_tile_swizzle(irb, intel, _x, _y), d)
-#define READ_DEPTH(d, _x, _y) \
- d = pread_16(irb, no_tile_swizzle(irb, intel, _x, _y))
+#define WRITE_DEPTH(_x, _y, d) pwrite_16(irb, NO_TILE(_x, _y), d)
+#define READ_DEPTH(d, _x, _y) d = pread_16(irb, NO_TILE(_x, _y))
#define TAG(x) intel##x##_z16
#include "depthtmp.h"
@@ -416,10 +416,8 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
** 16-bit x tile depthbuffer functions.
**/
#define VALUE_TYPE GLushort
-#define WRITE_DEPTH(_x, _y, d) \
- pwrite_16(irb, x_tile_swizzle(irb, intel, _x, _y), d)
-#define READ_DEPTH(d, _x, _y) \
- d = pread_16(irb, x_tile_swizzle(irb, intel, _x, _y))
+#define WRITE_DEPTH(_x, _y, d) pwrite_16(irb, X_TILE(_x, _y), d)
+#define READ_DEPTH(d, _x, _y) d = pread_16(irb, X_TILE(_x, _y))
#define TAG(x) intel_XTile_##x##_z16
#include "depthtmp.h"
@@ -427,10 +425,8 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
** 16-bit y tile depthbuffer functions.
**/
#define VALUE_TYPE GLushort
-#define WRITE_DEPTH(_x, _y, d) \
- pwrite_16(irb, y_tile_swizzle(irb, intel, _x, _y), d)
-#define READ_DEPTH(d, _x, _y) \
- d = pread_16(irb, y_tile_swizzle(irb, intel, _x, _y))
+#define WRITE_DEPTH(_x, _y, d) pwrite_16(irb, Y_TILE(_x, _y), d)
+#define READ_DEPTH(d, _x, _y) d = pread_16(irb, Y_TILE(_x, _y))
#define TAG(x) intel_YTile_##x##_z16
#include "depthtmp.h"
@@ -445,12 +441,11 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
/* Change ZZZS -> SZZZ */
#define WRITE_DEPTH(_x, _y, d) \
- pwrite_32(irb, no_tile_swizzle(irb, intel, _x, _y), \
- ((d) >> 8) | ((d) << 24))
+ pwrite_32(irb, NO_TILE(_x, _y), ((d) >> 8) | ((d) << 24))
/* Change SZZZ -> ZZZS */
#define READ_DEPTH( d, _x, _y ) { \
- GLuint tmp = pread_32(irb, no_tile_swizzle(irb, intel, _x, _y)); \
+ GLuint tmp = pread_32(irb, NO_TILE(_x, _y)); \
d = (tmp << 8) | (tmp >> 24); \
}
@@ -468,12 +463,11 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
/* Change ZZZS -> SZZZ */
#define WRITE_DEPTH(_x, _y, d) \
- pwrite_32(irb, x_tile_swizzle(irb, intel, _x, _y), \
- ((d) >> 8) | ((d) << 24)) \
+ pwrite_32(irb, X_TILE(_x, _y), ((d) >> 8) | ((d) << 24))
/* Change SZZZ -> ZZZS */
#define READ_DEPTH( d, _x, _y ) { \
- GLuint tmp = pread_32(irb, x_tile_swizzle(irb, intel, _x, _y)); \
+ GLuint tmp = pread_32(irb, X_TILE(_x, _y)); \
d = (tmp << 8) | (tmp >> 24); \
}
@@ -490,12 +484,11 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
/* Change ZZZS -> SZZZ */
#define WRITE_DEPTH(_x, _y, d) \
- pwrite_32(irb, y_tile_swizzle(irb, intel, _x, _y), \
- ((d) >> 8) | ((d) << 24))
+ pwrite_32(irb, Y_TILE(_x, _y), ((d) >> 8) | ((d) << 24))
/* Change SZZZ -> ZZZS */
#define READ_DEPTH( d, _x, _y ) { \
- GLuint tmp = pread_32(irb, y_tile_swizzle(irb, intel, _x, _y)); \
+ GLuint tmp = pread_32(irb, Y_TILE(_x, _y)); \
d = (tmp << 8) | (tmp >> 24); \
}
@@ -506,36 +499,24 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
/**
** 8-bit stencil function (XXX FBO: This is obsolete)
**/
-#define WRITE_STENCIL(_x, _y, d) \
- pwrite_8(irb, no_tile_swizzle(irb, intel, _x, _y) + 3, d)
-
-#define READ_STENCIL(d, _x, _y) \
- d = pread_8(irb, no_tile_swizzle(irb, intel, _x, _y) + 3);
-
+#define WRITE_STENCIL(_x, _y, d) pwrite_8(irb, NO_TILE(_x, _y) + 3, d)
+#define READ_STENCIL(d, _x, _y) d = pread_8(irb, NO_TILE(_x, _y) + 3);
#define TAG(x) intel##x##_z24_s8
#include "stenciltmp.h"
/**
** 8-bit x-tile stencil function (XXX FBO: This is obsolete)
**/
-#define WRITE_STENCIL(_x, _y, d) \
- pwrite_8(irb, x_tile_swizzle(irb, intel, _x, _y) + 3, d)
-
-#define READ_STENCIL(d, _x, _y) \
- d = pread_8(irb, x_tile_swizzle(irb, intel, _x, _y) + 3);
-
+#define WRITE_STENCIL(_x, _y, d) pwrite_8(irb, X_TILE(_x, _y) + 3, d)
+#define READ_STENCIL(d, _x, _y) d = pread_8(irb, X_TILE(_x, _y) + 3);
#define TAG(x) intel_XTile_##x##_z24_s8
#include "stenciltmp.h"
/**
** 8-bit y-tile stencil function (XXX FBO: This is obsolete)
**/
-#define WRITE_STENCIL(_x, _y, d) \
- pwrite_8(irb, y_tile_swizzle(irb, intel, _x, _y) + 3, d)
-
-#define READ_STENCIL(d, _x, _y) \
- d = pread_8(irb, y_tile_swizzle(irb, intel, _x, _y) + 3)
-
+#define WRITE_STENCIL(_x, _y, d) pwrite_8(irb, Y_TILE(_x, _y) + 3, d)
+#define READ_STENCIL(d, _x, _y) d = pread_8(irb, Y_TILE(_x, _y) + 3)
#define TAG(x) intel_YTile_##x##_z24_s8
#include "stenciltmp.h"
@@ -602,14 +583,10 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map)
if (tex) {
/* render to texture */
ASSERT(att->Renderbuffer);
- if (map) {
- struct gl_texture_image *texImg;
- texImg = tex->Image[att->CubeMapFace][att->TextureLevel];
+ if (map)
intel_tex_map_images(intel, intel_texture_object(tex));
- }
- else {
+ else
intel_tex_unmap_images(intel, intel_texture_object(tex));
- }
}
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
index 05107dd2ada..5f32dd575e3 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
@@ -900,7 +900,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
screen->depthHasSurface = (sPriv->ddx_version.major > 4) ||
/* these chips don't use tiled z without hyperz. So always pretend
we have set up a surface which will cause linear reads/writes */
- ((screen->chip_family & RADEON_CLASS_R100) &&
+ (IS_R100_CLASS(screen) &&
!(screen->chip_flags & RADEON_CHIPSET_TCL));
if ( dri_priv->textureSize == 0 ) {