diff options
Diffstat (limited to 'src/mesa/drivers/dri')
40 files changed, 1288 insertions, 475 deletions
diff --git a/src/mesa/drivers/dri/i915/i830_context.h b/src/mesa/drivers/dri/i915/i830_context.h index a298c1407de..1bdb32049d7 100644 --- a/src/mesa/drivers/dri/i915/i830_context.h +++ b/src/mesa/drivers/dri/i915/i830_context.h @@ -57,7 +57,13 @@ #define I830_DESTREG_SR0 7 #define I830_DESTREG_SR1 8 #define I830_DESTREG_SR2 9 -#define I830_DEST_SETUP_SIZE 10 +#define I830_DESTREG_DRAWRECT0 10 +#define I830_DESTREG_DRAWRECT1 11 +#define I830_DESTREG_DRAWRECT2 12 +#define I830_DESTREG_DRAWRECT3 13 +#define I830_DESTREG_DRAWRECT4 14 +#define I830_DESTREG_DRAWRECT5 15 +#define I830_DEST_SETUP_SIZE 16 #define I830_CTXREG_STATE1 0 #define I830_CTXREG_STATE2 1 diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index 773a8b4dd01..3b3ff2bceda 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -449,7 +449,8 @@ i830_emit_state(struct intel_context *intel) aper_array[aper_count++] = intel->batch->buf; if (dirty & I830_UPLOAD_BUFFERS) { aper_array[aper_count++] = state->draw_region->buffer; - aper_array[aper_count++] = state->depth_region->buffer; + if (state->depth_region) + aper_array[aper_count++] = state->depth_region->buffer; } for (i = 0; i < I830_TEX_UNITS; i++) @@ -512,6 +513,16 @@ i830_emit_state(struct intel_context *intel) OUT_BATCH(state->Buffer[I830_DESTREG_SR0]); OUT_BATCH(state->Buffer[I830_DESTREG_SR1]); OUT_BATCH(state->Buffer[I830_DESTREG_SR2]); + + if (intel->constant_cliprect) { + assert(state->Buffer[I830_DESTREG_DRAWRECT0] != MI_NOOP); + OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT0]); + OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT1]); + OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT2]); + OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT3]); + OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT4]); + OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT5]); + } ADVANCE_BATCH(); } @@ -591,6 +602,7 @@ i830_state_draw_region(struct intel_context *intel, struct intel_region *depth_region) { struct i830_context *i830 = i830_context(&intel->ctx); + GLcontext *ctx = &intel->ctx; GLuint value; ASSERT(state == &i830->state || state == &i830->meta); @@ -643,6 +655,24 @@ i830_state_draw_region(struct intel_context *intel, } state->Buffer[I830_DESTREG_DV1] = value; + if (intel->constant_cliprect) { + state->Buffer[I830_DESTREG_DRAWRECT0] = _3DSTATE_DRAWRECT_INFO; + state->Buffer[I830_DESTREG_DRAWRECT1] = 0; + state->Buffer[I830_DESTREG_DRAWRECT2] = 0; /* xmin, ymin */ + state->Buffer[I830_DESTREG_DRAWRECT3] = + (ctx->DrawBuffer->Width & 0xffff) | + (ctx->DrawBuffer->Height << 16); + state->Buffer[I830_DESTREG_DRAWRECT4] = 0; /* xoff, yoff */ + state->Buffer[I830_DESTREG_DRAWRECT5] = 0; + } else { + state->Buffer[I830_DESTREG_DRAWRECT0] = MI_NOOP; + state->Buffer[I830_DESTREG_DRAWRECT1] = MI_NOOP; + state->Buffer[I830_DESTREG_DRAWRECT2] = MI_NOOP; + state->Buffer[I830_DESTREG_DRAWRECT3] = MI_NOOP; + state->Buffer[I830_DESTREG_DRAWRECT4] = MI_NOOP; + state->Buffer[I830_DESTREG_DRAWRECT5] = MI_NOOP; + } + I830_STATECHANGE(i830, I830_UPLOAD_BUFFERS); diff --git a/src/mesa/drivers/dri/i915/i915_context.h b/src/mesa/drivers/dri/i915/i915_context.h index a2376e50e15..87bbf5f9271 100644 --- a/src/mesa/drivers/dri/i915/i915_context.h +++ b/src/mesa/drivers/dri/i915/i915_context.h @@ -65,7 +65,13 @@ #define I915_DESTREG_SR0 9 #define I915_DESTREG_SR1 10 #define I915_DESTREG_SR2 11 -#define I915_DEST_SETUP_SIZE 12 +#define I915_DESTREG_DRAWRECT0 12 +#define I915_DESTREG_DRAWRECT1 13 +#define I915_DESTREG_DRAWRECT2 14 +#define I915_DESTREG_DRAWRECT3 15 +#define I915_DESTREG_DRAWRECT4 16 +#define I915_DESTREG_DRAWRECT5 17 +#define I915_DEST_SETUP_SIZE 18 #define I915_CTXREG_STATE4 0 #define I915_CTXREG_LI 1 diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 7431a9cf76d..e79c955d64d 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -399,6 +399,17 @@ i915_emit_state(struct intel_context *intel) OUT_BATCH(state->Buffer[I915_DESTREG_SR0]); OUT_BATCH(state->Buffer[I915_DESTREG_SR1]); OUT_BATCH(state->Buffer[I915_DESTREG_SR2]); + + if (intel->constant_cliprect) { + assert(state->Buffer[I915_DESTREG_DRAWRECT0] != MI_NOOP); + OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT0]); + OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT1]); + OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT2]); + OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT3]); + OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT4]); + OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT5]); + } + ADVANCE_BATCH(); } @@ -521,6 +532,7 @@ i915_state_draw_region(struct intel_context *intel, struct intel_region *depth_region) { struct i915_context *i915 = i915_context(&intel->ctx); + GLcontext *ctx = &intel->ctx; GLuint value; ASSERT(state == &i915->state || state == &i915->meta); @@ -573,6 +585,24 @@ i915_state_draw_region(struct intel_context *intel, } state->Buffer[I915_DESTREG_DV1] = value; + if (intel->constant_cliprect) { + state->Buffer[I915_DESTREG_DRAWRECT0] = _3DSTATE_DRAWRECT_INFO; + state->Buffer[I915_DESTREG_DRAWRECT1] = 0; + state->Buffer[I915_DESTREG_DRAWRECT2] = 0; /* xmin, ymin */ + state->Buffer[I915_DESTREG_DRAWRECT3] = + (ctx->DrawBuffer->Width & 0xffff) | + (ctx->DrawBuffer->Height << 16); + state->Buffer[I915_DESTREG_DRAWRECT4] = 0; /* xoff, yoff */ + state->Buffer[I915_DESTREG_DRAWRECT5] = 0; + } else { + state->Buffer[I915_DESTREG_DRAWRECT0] = MI_NOOP; + state->Buffer[I915_DESTREG_DRAWRECT1] = MI_NOOP; + state->Buffer[I915_DESTREG_DRAWRECT2] = MI_NOOP; + state->Buffer[I915_DESTREG_DRAWRECT3] = MI_NOOP; + state->Buffer[I915_DESTREG_DRAWRECT4] = MI_NOOP; + state->Buffer[I915_DESTREG_DRAWRECT5] = MI_NOOP; + } + I915_STATECHANGE(i915, I915_UPLOAD_BUFFERS); } diff --git a/src/mesa/drivers/dri/i965/brw_clip_line.c b/src/mesa/drivers/dri/i965/brw_clip_line.c index c87e5b9a120..c45d48dff8e 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_line.c +++ b/src/mesa/drivers/dri/i965/brw_clip_line.c @@ -148,7 +148,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_clip_init_clipmask(c); /* -ve rhw workaround */ - if (!(BRW_IS_GM45(p->brw) || BRW_IS_G4X(p->brw))) { + if (!BRW_IS_G4X(p->brw)) { brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<20)); diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index 82d1e873577..740c7cbd109 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -102,7 +102,7 @@ clip_unit_create_from_key(struct brw_context *brw, clip.clip5.api_mode = BRW_CLIP_API_OGL; clip.clip5.clip_mode = key->clip_mode; - if (BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) + if (BRW_IS_G4X(brw)) clip.clip5.negative_w_clip_test = 1; clip.clip6.clipper_viewport_state_ptr = 0; diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c index 8459b59b460..1dbba37fe7e 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_tri.c +++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c @@ -526,7 +526,7 @@ void brw_emit_tri_clip( struct brw_clip_compile *c ) /* if -ve rhw workaround bit is set, do cliptest */ - if (!(BRW_IS_GM45(p->brw) || BRW_IS_G4X(p->brw))) { + if (!BRW_IS_G4X(p->brw)) { brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<20)); diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 474158b484b..e2bc08a6cb7 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -39,6 +39,7 @@ #include "brw_context.h" #include "brw_defines.h" #include "brw_draw.h" +#include "brw_state.h" #include "brw_vs.h" #include "intel_tex.h" #include "intel_blit.h" diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 1c6a0dede0b..e3904be977f 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -433,7 +433,6 @@ struct brw_context GLuint primitive; GLboolean emit_state_always; - GLboolean wrap; GLboolean tmp_fallback; GLboolean no_batch_wrap; @@ -445,6 +444,19 @@ struct brw_context GLuint nr_draw_regions; struct intel_region *draw_regions[MAX_DRAW_BUFFERS]; struct intel_region *depth_region; + + /** + * List of buffers accumulated in brw_validate_state to receive + * dri_bo_check_aperture treatment before exec, so we can know if we + * should flush the batch and try again before emitting primitives. + * + * This can be a fixed number as we only have a limited number of + * objects referenced from the batchbuffer in a primitive emit, + * consisting of the vertex buffers, pipelined state pointers, + * the CURBE, the depth buffer, and a query BO. + */ + dri_bo *validated_bos[VERT_ATTRIB_MAX + 16]; + int validated_bo_count; } state; struct brw_state_pointers attribs; @@ -680,14 +692,6 @@ void brw_emit_query_begin(struct brw_context *brw); void brw_emit_query_end(struct brw_context *brw); /*====================================================================== - * brw_state.c - */ -void brw_validate_state( struct brw_context *brw ); -void brw_init_state( struct brw_context *brw ); -void brw_destroy_state( struct brw_context *brw ); - - -/*====================================================================== * brw_state_dump.c */ void brw_debug_batch(struct intel_context *intel); diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 7cddd3a7dee..c7bac7b0c52 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -307,6 +307,7 @@ static void prepare_constant_buffer(struct brw_context *brw) dri_bo_subdata(brw->curbe.curbe_bo, brw->curbe.curbe_offset, bufsz, buf); } + brw_add_validated_bo(brw, brw->curbe.curbe_bo); /* Because this provokes an action (ie copy the constants into the * URB), it shouldn't be shortcircuited if identical to the @@ -328,13 +329,6 @@ static void emit_constant_buffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; GLuint sz = brw->curbe.total_size; - dri_bo *aper_array[] = { - brw->intel.batch->buf, - brw->curbe.curbe_bo, - }; - - if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) - intel_batchbuffer_flush(intel->batch); BEGIN_BATCH(2, IGNORE_CLIPRECTS); if (sz == 0) { diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 0593e8d5f5f..39c32255f8b 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -798,10 +798,9 @@ #include "intel_chipset.h" -#define BRW_IS_GM45(brw) (IS_GM45_GM((brw)->intel.intelScreen->deviceID)) #define BRW_IS_G4X(brw) (IS_G4X((brw)->intel.intelScreen->deviceID)) -#define CMD_PIPELINE_SELECT(brw) ((BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965) -#define CMD_VF_STATISTICS(brw) ((BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965) -#define URB_SIZES(brw) ((BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) ? 384 : 256) /* 512 bit unit */ +#define CMD_PIPELINE_SELECT(brw) (BRW_IS_G4X(brw) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965) +#define CMD_VF_STATISTICS(brw) (BRW_IS_G4X(brw) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965) +#define URB_SIZES(brw) (BRW_IS_G4X(brw) ? 384 : 256) /* 512 bit units */ #endif diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 6c71b4abcf0..d87b8f8a848 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -256,6 +256,7 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, struct intel_context *intel = intel_context(ctx); struct brw_context *brw = brw_context(ctx); GLboolean retval = GL_FALSE; + GLboolean warn = GL_FALSE; GLuint i; if (ctx->NewState) @@ -282,30 +283,25 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, LOCK_HARDWARE(intel); - if (brw->intel.numClipRects == 0) { + if (!intel->constant_cliprect && intel->driDrawable->numClipRects == 0) { UNLOCK_HARDWARE(intel); return GL_TRUE; } + /* Flush the batch if it's approaching full, so that we don't wrap while + * we've got validated state that needs to be in the same batch as the + * primitives. This fraction is just a guess (minimal full state plus + * a primitive is around 512 bytes), and would be better if we had + * an upper bound of how much we might emit in a single + * brw_try_draw_prims(). + */ + intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4, + LOOP_CLIPRECTS); { - /* Flush the batch if it's approaching full, so that we don't wrap while - * we've got validated state that needs to be in the same batch as the - * primitives. This fraction is just a guess (minimal full state plus - * a primitive is around 512 bytes), and would be better if we had - * an upper bound of how much we might emit in a single - * brw_try_draw_prims(). - */ - if (intel->batch->ptr - intel->batch->map > intel->batch->size * 3 / 4 - /* brw_emit_prim may change the cliprect_mode to LOOP_CLIPRECTS */ - || intel->batch->cliprect_mode != LOOP_CLIPRECTS) - intel_batchbuffer_flush(intel->batch); - /* Set the first primitive early, ahead of validate_state: */ brw_set_prim(brw, prim[0].mode); - /* XXX: Need to separate validate and upload of state. - */ brw_validate_state( brw ); /* Various fallback checks: @@ -316,6 +312,31 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, if (check_fallbacks( brw, prim, nr_prims )) goto out; + /* Check that we can fit our state in with our existing batchbuffer, or + * flush otherwise. + */ + if (dri_bufmgr_check_aperture_space(brw->state.validated_bos, + brw->state.validated_bo_count)) { + static GLboolean warned; + intel_batchbuffer_flush(intel->batch); + + /* Validate the state after we flushed the batch (which would have + * changed the set of dirty state). If we still fail to + * check_aperture, warn of what's happening, but attempt to continue + * on since it may succeed anyway, and the user would probably rather + * see a failure and a warning than a fallback. + */ + brw_validate_state(brw); + if (!warned && + dri_bufmgr_check_aperture_space(brw->state.validated_bos, + brw->state.validated_bo_count)) { + warn = GL_TRUE; + warned = GL_TRUE; + } + } + + brw_upload_state(brw); + for (i = 0; i < nr_prims; i++) { brw_emit_prim(brw, &prim[i]); } @@ -326,6 +347,10 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, out: UNLOCK_HARDWARE(intel); + if (warn) + fprintf(stderr, "i965: Single primitive emit potentially exceeded " + "available aperture space\n"); + if (!retval) DBG("%s failed\n", __FUNCTION__); diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 7b88b5eaa1e..4080c5e3228 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -250,10 +250,10 @@ static void get_space( struct brw_context *brw, wrap_buffers(brw, size); } + assert(*bo_return == NULL); dri_bo_reference(brw->vb.upload.bo); *bo_return = brw->vb.upload.bo; *offset_return = brw->vb.upload.offset; - brw->vb.upload.offset += size; } @@ -359,6 +359,14 @@ static void brw_prepare_vertices(struct brw_context *brw) input->offset = (unsigned long)input->glarray->Ptr; input->stride = input->glarray->StrideB; } else { + if (input->bo != NULL) { + /* Already-uploaded vertex data is present from a previous + * prepare_vertices, but we had to re-validate state due to + * check_aperture failing and a new batch being produced. + */ + continue; + } + /* Queue the buffer object up to be uploaded in the next pass, * when we've decided if we're doing interleaved or not. */ @@ -417,6 +425,12 @@ static void brw_prepare_vertices(struct brw_context *brw) } brw_prepare_query_begin(brw); + + for (i = 0; i < nr_enabled; i++) { + struct brw_vertex_element *input = enabled[i]; + + brw_add_validated_bo(brw, input->bo); + } } static void brw_emit_vertices(struct brw_context *brw) @@ -512,7 +526,7 @@ static void brw_prepare_indices(struct brw_context *brw) struct intel_context *intel = &brw->intel; const struct _mesa_index_buffer *index_buffer = brw->ib.ib; GLuint ib_size; - dri_bo *bo; + dri_bo *bo = NULL; struct gl_buffer_object *bufferobj; GLuint offset; @@ -561,6 +575,8 @@ static void brw_prepare_indices(struct brw_context *brw) dri_bo_unreference(brw->ib.bo); brw->ib.bo = bo; brw->ib.offset = offset; + + brw_add_validated_bo(brw, brw->ib.bo); } static void brw_emit_indices(struct brw_context *brw) diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 207b8b7ca38..49b422ee2ff 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -65,7 +65,7 @@ struct brw_reg GLuint abs:1; /* source only */ GLuint vstride:4; /* source only */ GLuint width:3; /* src only, align1 only */ - GLuint hstride:2; /* src only, align1 only */ + GLuint hstride:2; /* align1 only */ GLuint address_mode:1; /* relative addressing, hopefully! */ GLuint pad0:1; @@ -432,6 +432,12 @@ static INLINE struct brw_reg brw_uw8_grf( GLuint nr, return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); } +static INLINE struct brw_reg brw_uw16_grf( GLuint nr, + GLuint subnr ) +{ + return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + static INLINE struct brw_reg brw_null_reg( void ) { return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 0bfbec9d140..ce4cf46cfa6 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -64,7 +64,9 @@ static void brw_set_dest( struct brw_instruction *insn, if (insn->header.access_mode == BRW_ALIGN_1) { insn->bits1.da1.dest_subreg_nr = dest.subnr; - insn->bits1.da1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1; + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + insn->bits1.da1.dest_horiz_stride = dest.hstride; } else { insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; @@ -78,7 +80,9 @@ static void brw_set_dest( struct brw_instruction *insn, */ if (insn->header.access_mode == BRW_ALIGN_1) { insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; - insn->bits1.ia1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1; + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + insn->bits1.ia1.dest_horiz_stride = dest.hstride; } else { insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; @@ -329,14 +333,14 @@ static void brw_set_sampler_message(struct brw_context *brw, { brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) { - insn->bits3.sampler_gm45_g4x.binding_table_index = binding_table_index; - insn->bits3.sampler_gm45_g4x.sampler = sampler; - insn->bits3.sampler_gm45_g4x.msg_type = msg_type; - insn->bits3.sampler_gm45_g4x.response_length = response_length; - insn->bits3.sampler_gm45_g4x.msg_length = msg_length; - insn->bits3.sampler_gm45_g4x.end_of_thread = eot; - insn->bits3.sampler_gm45_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER; + if (BRW_IS_G4X(brw)) { + insn->bits3.sampler_g4x.binding_table_index = binding_table_index; + insn->bits3.sampler_g4x.sampler = sampler; + insn->bits3.sampler_g4x.msg_type = msg_type; + insn->bits3.sampler_g4x.response_length = response_length; + insn->bits3.sampler_g4x.msg_length = msg_length; + insn->bits3.sampler_g4x.end_of_thread = eot; + insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER; } else { insn->bits3.sampler.binding_table_index = binding_table_index; insn->bits3.sampler.sampler = sampler; diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 487c638ce21..627705fa9ba 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -71,6 +71,38 @@ const struct brw_tracked_state brw_blend_constant_color = { .emit = upload_blend_constant_color }; +/* Constant single cliprect for framebuffer object or DRI2 drawing */ +static void upload_drawing_rect(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; + + if (!intel->constant_cliprect) + return; + + BEGIN_BATCH(4, NO_LOOP_CLIPRECTS); + OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965); + OUT_BATCH(0); /* xmin, ymin */ + OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) | + ((ctx->DrawBuffer->Height - 1) << 16)); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +const struct brw_tracked_state brw_drawing_rect = { + .dirty = { + .mesa = _NEW_BUFFERS, + .brw = 0, + .cache = 0 + }, + .emit = upload_drawing_rect +}; + +static void prepare_binding_table_pointers(struct brw_context *brw) +{ + brw_add_validated_bo(brw, brw->wm.bind_bo); +} + /** * Upload the binding table pointers, which point each stage's array of surface * state pointers. @@ -81,13 +113,6 @@ const struct brw_tracked_state brw_blend_constant_color = { static void upload_binding_table_pointers(struct brw_context *brw) { struct intel_context *intel = &brw->intel; - dri_bo *aper_array[] = { - intel->batch->buf, - brw->wm.bind_bo, - }; - - if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) - intel_batchbuffer_flush(intel->batch); BEGIN_BATCH(6, IGNORE_CLIPRECTS); OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); @@ -107,6 +132,7 @@ const struct brw_tracked_state brw_binding_table_pointers = { .brw = BRW_NEW_BATCH, .cache = CACHE_NEW_SURF_BIND, }, + .prepare = prepare_binding_table_pointers, .emit = upload_binding_table_pointers, }; @@ -140,21 +166,18 @@ static void upload_pipelined_state_pointers(struct brw_context *brw ) brw->state.dirty.brw |= BRW_NEW_PSP; } -static void upload_psp_urb_cbs(struct brw_context *brw ) + +static void prepare_psp_urb_cbs(struct brw_context *brw) { - struct intel_context *intel = &brw->intel; - dri_bo *aper_array[] = { - intel->batch->buf, - brw->vs.state_bo, - brw->gs.state_bo, - brw->clip.state_bo, - brw->wm.state_bo, - brw->cc.state_bo, - }; - - if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) - intel_batchbuffer_flush(intel->batch); + brw_add_validated_bo(brw, brw->vs.state_bo); + brw_add_validated_bo(brw, brw->gs.state_bo); + brw_add_validated_bo(brw, brw->clip.state_bo); + brw_add_validated_bo(brw, brw->wm.state_bo); + brw_add_validated_bo(brw, brw->cc.state_bo); +} +static void upload_psp_urb_cbs(struct brw_context *brw ) +{ upload_pipelined_state_pointers(brw); brw_upload_urb_fence(brw); brw_upload_constant_buffer_state(brw); @@ -172,14 +195,23 @@ const struct brw_tracked_state brw_psp_urb_cbs = { CACHE_NEW_WM_UNIT | CACHE_NEW_CC_UNIT) }, + .prepare = prepare_psp_urb_cbs, .emit = upload_psp_urb_cbs, }; +static void prepare_depthbuffer(struct brw_context *brw) +{ + struct intel_region *region = brw->state.depth_region; + + if (region != NULL) + brw_add_validated_bo(brw, region->buffer); +} + static void emit_depthbuffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct intel_region *region = brw->state.depth_region; - unsigned int len = (BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) ? sizeof(struct brw_depthbuffer_gm45_g4x) / 4 : sizeof(struct brw_depthbuffer) / 4; + unsigned int len = BRW_IS_G4X(brw) ? 6 : 5; if (region == NULL) { BEGIN_BATCH(len, IGNORE_CLIPRECTS); @@ -190,16 +222,12 @@ static void emit_depthbuffer(struct brw_context *brw) OUT_BATCH(0); OUT_BATCH(0); - if (BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) + if (BRW_IS_G4X(brw)) OUT_BATCH(0); ADVANCE_BATCH(); } else { unsigned int format; - dri_bo *aper_array[] = { - intel->batch->buf, - region->buffer - }; switch (region->cpp) { case 2: @@ -216,9 +244,6 @@ static void emit_depthbuffer(struct brw_context *brw) return; } - if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) - intel_batchbuffer_flush(intel->batch); - BEGIN_BATCH(len, IGNORE_CLIPRECTS); OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH(((region->pitch * region->cpp) - 1) | @@ -234,7 +259,7 @@ static void emit_depthbuffer(struct brw_context *brw) ((region->height - 1) << 19)); OUT_BATCH(0); - if (BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) + if (BRW_IS_G4X(brw)) OUT_BATCH(0); ADVANCE_BATCH(); @@ -247,6 +272,7 @@ const struct brw_tracked_state brw_depthbuffer = { .brw = BRW_NEW_DEPTH_BUFFER | BRW_NEW_BATCH, .cache = 0, }, + .prepare = prepare_depthbuffer, .emit = emit_depthbuffer, }; @@ -318,7 +344,7 @@ static void upload_aa_line_parameters(struct brw_context *brw) { struct brw_aa_line_parameters balp; - if (!(BRW_IS_GM45(brw) || BRW_IS_G4X(brw))) + if (!BRW_IS_G4X(brw)) return; /* use legacy aa line coverage computation */ diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c index a1a1353dee7..cb9169e2eef 100644 --- a/src/mesa/drivers/dri/i965/brw_queryobj.c +++ b/src/mesa/drivers/dri/i965/brw_queryobj.c @@ -42,6 +42,7 @@ #include "main/imports.h" #include "brw_context.h" +#include "brw_state.h" #include "intel_batchbuffer.h" #include "intel_reg.h" @@ -163,10 +164,6 @@ void brw_prepare_query_begin(struct brw_context *brw) { struct intel_context *intel = &brw->intel; - dri_bo *aper_array[] = { - intel->batch->buf, - brw->query.bo, - }; /* Skip if we're not doing any queries. */ if (is_empty_list(&brw->query.active_head)) @@ -182,8 +179,7 @@ brw_prepare_query_begin(struct brw_context *brw) brw->query.index = 0; } - if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) - intel_batchbuffer_flush(intel->batch); + brw_add_validated_bo(brw, brw->query.bo); } /** Called just before primitive drawing to get a beginning PS_DEPTH_COUNT. */ diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 4c04036ef08..bb22c03eeb6 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -35,6 +35,16 @@ #include "brw_context.h" +static inline void +brw_add_validated_bo(struct brw_context *brw, dri_bo *bo) +{ + assert(brw->state.validated_bo_count < ARRAY_SIZE(brw->state.validated_bos)); + + if (bo != NULL) { + dri_bo_reference(bo); + brw->state.validated_bos[brw->state.validated_bo_count++] = bo; + } +}; const struct brw_tracked_state brw_blend_constant_color; const struct brw_tracked_state brw_cc_unit; @@ -79,10 +89,19 @@ const struct brw_tracked_state brw_pipe_control; const struct brw_tracked_state brw_clear_surface_cache; const struct brw_tracked_state brw_clear_batch_cache; +const struct brw_tracked_state brw_drawing_rect; const struct brw_tracked_state brw_indices; const struct brw_tracked_state brw_vertices; /*********************************************************************** + * brw_state.c + */ +void brw_validate_state(struct brw_context *brw); +void brw_upload_state(struct brw_context *brw); +void brw_init_state(struct brw_context *brw); +void brw_destroy_state(struct brw_context *brw); + +/*********************************************************************** * brw_state_cache.c */ dri_bo *brw_cache_data(struct brw_cache *cache, diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index 94ef9248686..dc87859f3f5 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -97,8 +97,6 @@ void brw_clear_batch_cache_flush( struct brw_context *brw ) { clear_batch_cache(brw); - brw->wrap = 0; - /* brw_do_flush(brw, BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); */ brw->state.dirty.mesa |= ~0; diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index b6a52843a81..7a642bd2a8f 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -45,7 +45,6 @@ const struct brw_tracked_state *atoms[] = { &brw_check_fallback, - &brw_active_vertprog, &brw_wm_input_sizes, &brw_vs_prog, &brw_gs_prog, @@ -99,6 +98,7 @@ const struct brw_tracked_state *atoms[] = &brw_psp_urb_cbs, #endif + &brw_drawing_rect, &brw_indices, &brw_vertices, @@ -168,6 +168,18 @@ static void xor_states( struct brw_state_flags *result, result->cache = a->cache ^ b->cache; } +static void +brw_clear_validated_bos(struct brw_context *brw) +{ + int i; + + /* Clear the last round of validated bos */ + for (i = 0; i < brw->state.validated_bo_count; i++) { + dri_bo_unreference(brw->state.validated_bos[i]); + brw->state.validated_bos[i] = NULL; + } + brw->state.validated_bo_count = 0; +} /*********************************************************************** * Emit all state: @@ -176,14 +188,14 @@ void brw_validate_state( struct brw_context *brw ) { struct intel_context *intel = &brw->intel; struct brw_state_flags *state = &brw->state.dirty; - GLuint i, count, pass = 0; - dri_bo *last_batch_bo = NULL; + GLuint i; + + brw_clear_validated_bos(brw); state->mesa |= brw->intel.NewGLState; brw->intel.NewGLState = 0; - if (brw->wrap) - state->brw |= BRW_NEW_CONTEXT; + brw_add_validated_bo(brw, intel->batch->buf); if (brw->emit_state_always) { state->mesa |= ~0; @@ -199,6 +211,10 @@ void brw_validate_state( struct brw_context *brw ) brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; } + if (brw->vertex_program != brw->attribs.VertexProgram->_Current) { + brw->vertex_program = brw->attribs.VertexProgram->_Current; + brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; + } if (state->mesa == 0 && state->cache == 0 && @@ -210,8 +226,6 @@ void brw_validate_state( struct brw_context *brw ) brw->intel.Fallback = 0; - count = 0; - /* do prepare stage for all atoms */ for (i = 0; i < Elements(atoms); i++) { const struct brw_tracked_state *atom = brw->state.atoms[i]; @@ -225,19 +239,15 @@ void brw_validate_state( struct brw_context *brw ) } } } +} - if (brw->intel.Fallback) - return; - /* We're about to try to set up a coherent state in the batchbuffer for - * the emission of primitives. If we exceed the aperture size in any of the - * emit() calls, we need to go back to square 1 and try setting up again. - */ -got_flushed: - dri_bo_unreference(last_batch_bo); - last_batch_bo = intel->batch->buf; - dri_bo_reference(last_batch_bo); - assert(pass++ <= 2); +void brw_upload_state(struct brw_context *brw) +{ + struct brw_state_flags *state = &brw->state.dirty; + int i; + + brw_clear_validated_bos(brw); if (INTEL_DEBUG) { /* Debug version which enforces various sanity checks on the @@ -262,8 +272,6 @@ got_flushed: if (check_state(state, &atom->dirty)) { if (atom->emit) { atom->emit( brw ); - if (intel->batch->buf != last_batch_bo) - goto got_flushed; } } @@ -288,15 +296,11 @@ got_flushed: if (check_state(state, &atom->dirty)) { if (atom->emit) { atom->emit( brw ); - if (intel->batch->buf != last_batch_bo) - goto got_flushed; } } } } - dri_bo_unreference(last_batch_bo); - if (!brw->intel.Fallback) memset(state, 0, sizeof(*state)); } diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index ec865c925a7..4e577d0f6a8 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -175,7 +175,7 @@ struct brw_depthbuffer } dword4; }; -struct brw_depthbuffer_gm45_g4x +struct brw_depthbuffer_g4x { union header_union header; @@ -1405,7 +1405,7 @@ struct brw_instruction GLuint msg_target:4; GLuint pad1:3; GLuint end_of_thread:1; - } sampler_gm45_g4x; + } sampler_g4x; struct brw_urb_immediate urb; diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c index 1116ade0a47..1a004176de1 100644 --- a/src/mesa/drivers/dri/i965/brw_urb.c +++ b/src/mesa/drivers/dri/i965/brw_urb.c @@ -92,9 +92,9 @@ static void recalculate_urb_fence( struct brw_context *brw ) if (brw->urb.vsize < vsize || brw->urb.sfsize < sfsize || brw->urb.csize < csize || - (brw->urb.constrained && (brw->urb.vsize > brw->urb.vsize || - brw->urb.sfsize > brw->urb.sfsize || - brw->urb.csize > brw->urb.csize))) { + (brw->urb.constrained && (brw->urb.vsize > vsize || + brw->urb.sfsize > sfsize || + brw->urb.csize > csize))) { brw->urb.csize = csize; @@ -114,6 +114,10 @@ static void recalculate_urb_fence( struct brw_context *brw ) brw->urb.nr_sf_entries = limits[SF].min_nr_entries; brw->urb.nr_cs_entries = limits[CS].min_nr_entries; + /* Mark us as operating with constrained nr_entries, so that next + * time we recalculate we'll resize the fences in the hope of + * escaping constrained mode and getting back to normal performance. + */ brw->urb.constrained = 1; if (!check_urb_layout(brw)) { diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 9de05408ba9..25b4ee85cb0 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -818,8 +818,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) } - /* Build ndc coords? TODO: Shortcircuit when w is known to be one. - */ + /* Build ndc coords */ if (!c->key.know_w_is_one) { ndc = get_tmp(c); emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); @@ -829,12 +828,12 @@ static void emit_vertex_write( struct brw_vs_compile *c) ndc = pos; } - /* This includes the workaround for -ve rhw, so is no longer an - * optional step: + /* Update the header for point size, user clipping flags, and -ve rhw + * workaround. */ if ((c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) || c->key.nr_userclip || - !c->key.know_w_is_one) + (!BRW_IS_G4X(p->brw) && !c->key.know_w_is_one)) { struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); GLuint i; @@ -867,7 +866,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) * Later, clipping will detect ucp[6] and ensure the primitive is * clipped against all fixed planes. */ - if (!(BRW_IS_GM45(p->brw) || BRW_IS_G4X(p->brw)) && !c->key.know_w_is_one) { + if (!BRW_IS_G4X(p->brw) && !c->key.know_w_is_one) { brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_L, diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index a64e437860f..2d4c81274e6 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -62,7 +62,6 @@ dri_bo_release(dri_bo **bo) */ static void brw_destroy_context( struct intel_context *intel ) { - GLcontext *ctx = &intel->ctx; struct brw_context *brw = brw_context(&intel->ctx); int i; diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 297617ee2dc..896390c17b8 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -157,6 +157,7 @@ struct brw_wm_instruction { #define BRW_WM_MAX_PARAM 256 #define BRW_WM_MAX_CONST 256 #define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS +#define BRW_WM_MAX_SUBROUTINE 16 @@ -246,7 +247,10 @@ struct brw_wm_compile { struct brw_reg stack; struct brw_reg emit_mask_reg; GLuint reg_index; + GLuint tmp_regs[BRW_WM_MAX_GRF]; GLuint tmp_index; + GLuint tmp_max; + GLuint subroutines[BRW_WM_MAX_SUBROUTINE]; }; diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 4d5e11f4b6f..cb728190f5c 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -4,6 +4,10 @@ #include "brw_eu.h" #include "brw_wm.h" +enum _subroutine { + SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4 +}; + /* Only guess, need a flag in gl_fragment_program later */ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) { @@ -12,13 +16,17 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) struct prog_instruction *inst = &fp->Base.Instructions[i]; switch (inst->Opcode) { case OPCODE_IF: - case OPCODE_INT: + case OPCODE_TRUNC: case OPCODE_ENDIF: case OPCODE_CAL: case OPCODE_BRK: case OPCODE_RET: case OPCODE_DDX: case OPCODE_DDY: + case OPCODE_NOISE1: + case OPCODE_NOISE2: + case OPCODE_NOISE3: + case OPCODE_NOISE4: case OPCODE_BGNLOOP: return GL_TRUE; default: @@ -47,13 +55,26 @@ static int get_scalar_dst_index(struct prog_instruction *inst) static struct brw_reg alloc_tmp(struct brw_wm_compile *c) { struct brw_reg reg; - reg = brw_vec8_grf(c->tmp_index--, 0); + if(c->tmp_index == c->tmp_max) + c->tmp_regs[ c->tmp_max++ ] = c->reg_index++; + + reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0); return reg; } -static void release_tmps(struct brw_wm_compile *c) +static int mark_tmps(struct brw_wm_compile *c) +{ + return c->tmp_index; +} + +static struct brw_reg lookup_tmp( struct brw_wm_compile *c, int index ) +{ + return brw_vec8_grf( c->tmp_regs[ index ], 0 ); +} + +static void release_tmps(struct brw_wm_compile *c, int mark) { - c->tmp_index = 127; + c->tmp_index = mark; } static struct brw_reg @@ -155,6 +176,68 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, src->NegateBase, src->Abs); } +/* Subroutines are minimal support for resusable instruction sequences. + They are implemented as simply as possible to minimise overhead: there + is no explicit support for communication between the caller and callee + other than saving the return address in a temporary register, nor is + there any automatic local storage. This implies that great care is + required before attempting reentrancy or any kind of nested + subroutine invocations. */ +static void invoke_subroutine( struct brw_wm_compile *c, + enum _subroutine subroutine, + void (*emit)( struct brw_wm_compile * ) ) +{ + struct brw_compile *p = &c->func; + + assert( subroutine < BRW_WM_MAX_SUBROUTINE ); + + if( c->subroutines[ subroutine ] ) { + /* subroutine previously emitted: reuse existing instructions */ + + int mark = mark_tmps( c ); + struct brw_reg return_address = retype( alloc_tmp( c ), + BRW_REGISTER_TYPE_UD ); + int here = p->nr_insn; + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 2 << 4 ) ); + + brw_ADD( p, brw_ip_reg(), brw_ip_reg(), + brw_imm_d( ( c->subroutines[ subroutine ] - + here - 1 ) << 4 ) ); + brw_pop_insn_state(p); + + release_tmps( c, mark ); + } else { + /* previously unused subroutine: emit, and mark for later reuse */ + + int mark = mark_tmps( c ); + struct brw_reg return_address = retype( alloc_tmp( c ), + BRW_REGISTER_TYPE_UD ); + struct brw_instruction *calc; + int base = p->nr_insn; + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + calc = brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 0 ) ); + brw_pop_insn_state(p); + + c->subroutines[ subroutine ] = p->nr_insn; + + emit( c ); + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV( p, brw_ip_reg(), return_address ); + brw_pop_insn_state(p); + + brw_set_src1( calc, brw_imm_ud( ( p->nr_insn - base ) << 4 ) ); + + release_tmps( c, mark ); + } +} + static void emit_abs( struct brw_wm_compile *c, struct prog_instruction *inst) { @@ -172,7 +255,7 @@ static void emit_abs( struct brw_wm_compile *c, brw_set_saturate(p, 0); } -static void emit_int( struct brw_wm_compile *c, +static void emit_trunc( struct brw_wm_compile *c, struct prog_instruction *inst) { int i; @@ -778,6 +861,7 @@ static void emit_lrp(struct brw_wm_compile *c, GLuint mask = inst->DstReg.WriteMask; struct brw_reg dst, tmp1, tmp2, src0, src1, src2; int i; + int mark = mark_tmps(c); for (i = 0; i < 4; i++) { if (mask & (1<<i)) { dst = get_dst_reg(c, inst, i, 1); @@ -804,7 +888,7 @@ static void emit_lrp(struct brw_wm_compile *c, brw_MAC(p, dst, src0, tmp1); brw_set_saturate(p, 0); } - release_tmps(c); + release_tmps(c, mark); } } @@ -957,6 +1041,633 @@ static void emit_ddy(struct brw_wm_compile *c, brw_set_saturate(p, 0); } +static __inline struct brw_reg high_words( struct brw_reg reg ) +{ + return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ), + 0, 8, 2 ); +} + +static __inline struct brw_reg low_words( struct brw_reg reg ) +{ + return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 ); +} + +static __inline struct brw_reg even_bytes( struct brw_reg reg ) +{ + return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 ); +} + +static __inline struct brw_reg odd_bytes( struct brw_reg reg ) +{ + return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ), + 0, 16, 2 ); +} + +/* One-, two- and three-dimensional Perlin noise, similar to the description + in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */ +static void noise1_sub( struct brw_wm_compile *c ) { + + struct brw_compile *p = &c->func; + struct brw_reg param, + x0, x1, /* gradients at each end */ + t, tmp[ 2 ], /* float temporaries */ + itmp[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */ + int i; + int mark = mark_tmps( c ); + + x0 = alloc_tmp( c ); + x1 = alloc_tmp( c ); + t = alloc_tmp( c ); + tmp[ 0 ] = alloc_tmp( c ); + tmp[ 1 ] = alloc_tmp( c ); + itmp[ 0 ] = retype( tmp[ 0 ], BRW_REGISTER_TYPE_UD ); + itmp[ 1 ] = retype( tmp[ 1 ], BRW_REGISTER_TYPE_UD ); + itmp[ 2 ] = retype( x0, BRW_REGISTER_TYPE_UD ); + itmp[ 3 ] = retype( x1, BRW_REGISTER_TYPE_UD ); + itmp[ 4 ] = retype( t, BRW_REGISTER_TYPE_UD ); + + param = lookup_tmp( c, mark - 2 ); + + brw_set_access_mode( p, BRW_ALIGN_1 ); + + brw_MOV( p, itmp[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */ + + /* Arrange the two end coordinates into scalars (itmp0/itmp1) to + be hashed. Also compute the remainder (offset within the unit + length), interleaved to reduce register dependency penalties. */ + brw_RNDD( p, itmp[ 0 ], param ); + brw_FRC( p, param, param ); + brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 1 ) ); + brw_MOV( p, itmp[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */ + brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */ + + /* We're now ready to perform the hashing. The two hashes are + interleaved for performance. The hash function used is + designed to rapidly achieve avalanche and require only 32x16 + bit multiplication, and 16-bit swizzles (which we get for + free). We can't use immediate operands in the multiplies, + because immediates are permitted only in src1 and the 16-bit + factor is permitted only in src0. */ + for( i = 0; i < 2; i++ ) + brw_MUL( p, itmp[ i ], itmp[ 2 ], itmp[ i ] ); + for( i = 0; i < 2; i++ ) + brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), + high_words( itmp[ i ] ) ); + for( i = 0; i < 2; i++ ) + brw_MUL( p, itmp[ i ], itmp[ 3 ], itmp[ i ] ); + for( i = 0; i < 2; i++ ) + brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), + high_words( itmp[ i ] ) ); + for( i = 0; i < 2; i++ ) + brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] ); + for( i = 0; i < 2; i++ ) + brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), + high_words( itmp[ i ] ) ); + + /* Now we want to initialise the two gradients based on the + hashes. Format conversion from signed integer to float leaves + everything scaled too high by a factor of pow( 2, 31 ), but + we correct for that right at the end. */ + brw_ADD( p, t, param, brw_imm_f( -1.0 ) ); + brw_MOV( p, x0, retype( tmp[ 0 ], BRW_REGISTER_TYPE_D ) ); + brw_MOV( p, x1, retype( tmp[ 1 ], BRW_REGISTER_TYPE_D ) ); + + brw_MUL( p, x0, x0, param ); + brw_MUL( p, x1, x1, t ); + + /* We interpolate between the gradients using the polynomial + 6t^5 - 15t^4 + 10t^3 (Perlin). */ + brw_MUL( p, tmp[ 0 ], param, brw_imm_f( 6.0 ) ); + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) ); + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param ); + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) ); + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param ); + brw_ADD( p, x1, x1, negate( x0 ) ); /* unrelated work to fill the + pipeline */ + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param ); + brw_MUL( p, param, tmp[ 0 ], param ); + brw_MUL( p, x1, x1, param ); + brw_ADD( p, x0, x0, x1 ); + /* scale by pow( 2, -30 ), to compensate for the format conversion + above and an extra factor of 2 so that a single gradient covers + the [-1,1] range */ + brw_MUL( p, param, x0, brw_imm_f( 0.000000000931322574615478515625 ) ); + + release_tmps( c, mark ); +} + +static void emit_noise1( struct brw_wm_compile *c, + struct prog_instruction *inst ) +{ + struct brw_compile *p = &c->func; + struct brw_reg src, param, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + int mark = mark_tmps( c ); + + assert( mark == 0 ); + + src = get_src_reg( c, inst->SrcReg, 0, 1 ); + + param = alloc_tmp( c ); + + brw_MOV( p, param, src ); + + invoke_subroutine( c, SUB_NOISE1, noise1_sub ); + + /* Fill in the result: */ + brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + brw_MOV( p, dst, param ); + } + } + if( inst->SaturateMode == SATURATE_ZERO_ONE ) + brw_set_saturate( p, 0 ); + + release_tmps( c, mark ); +} + +static void noise2_sub( struct brw_wm_compile *c ) { + + struct brw_compile *p = &c->func; + struct brw_reg param0, param1, + x0y0, x0y1, x1y0, x1y1, /* gradients at each corner */ + t, tmp[ 4 ], /* float temporaries */ + itmp[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */ + int i; + int mark = mark_tmps( c ); + + x0y0 = alloc_tmp( c ); + x0y1 = alloc_tmp( c ); + x1y0 = alloc_tmp( c ); + x1y1 = alloc_tmp( c ); + t = alloc_tmp( c ); + for( i = 0; i < 4; i++ ) { + tmp[ i ] = alloc_tmp( c ); + itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD ); + } + itmp[ 4 ] = retype( x0y0, BRW_REGISTER_TYPE_UD ); + itmp[ 5 ] = retype( x0y1, BRW_REGISTER_TYPE_UD ); + itmp[ 6 ] = retype( x1y0, BRW_REGISTER_TYPE_UD ); + + param0 = lookup_tmp( c, mark - 3 ); + param1 = lookup_tmp( c, mark - 2 ); + + brw_set_access_mode( p, BRW_ALIGN_1 ); + + /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to + be hashed. Also compute the remainders (offsets within the unit + square), interleaved to reduce register dependency penalties. */ + brw_RNDD( p, itmp[ 0 ], param0 ); + brw_RNDD( p, itmp[ 1 ], param1 ); + brw_FRC( p, param0, param0 ); + brw_FRC( p, param1, param1 ); + brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */ + brw_ADD( p, high_words( itmp[ 0 ] ), high_words( itmp[ 0 ] ), + low_words( itmp[ 1 ] ) ); + brw_MOV( p, itmp[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */ + brw_MOV( p, itmp[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */ + brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 0x10000 ) ); + brw_ADD( p, itmp[ 2 ], itmp[ 0 ], brw_imm_ud( 0x1 ) ); + brw_ADD( p, itmp[ 3 ], itmp[ 0 ], brw_imm_ud( 0x10001 ) ); + + /* We're now ready to perform the hashing. The four hashes are + interleaved for performance. The hash function used is + designed to rapidly achieve avalanche and require only 32x16 + bit multiplication, and 16-bit swizzles (which we get for + free). We can't use immediate operands in the multiplies, + because immediates are permitted only in src1 and the 16-bit + factor is permitted only in src0. */ + for( i = 0; i < 4; i++ ) + brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] ); + for( i = 0; i < 4; i++ ) + brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), + high_words( itmp[ i ] ) ); + for( i = 0; i < 4; i++ ) + brw_MUL( p, itmp[ i ], itmp[ 5 ], itmp[ i ] ); + for( i = 0; i < 4; i++ ) + brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), + high_words( itmp[ i ] ) ); + for( i = 0; i < 4; i++ ) + brw_MUL( p, itmp[ i ], itmp[ 6 ], itmp[ i ] ); + for( i = 0; i < 4; i++ ) + brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), + high_words( itmp[ i ] ) ); + + /* Now we want to initialise the four gradients based on the + hashes. Format conversion from signed integer to float leaves + everything scaled too high by a factor of pow( 2, 15 ), but + we correct for that right at the end. */ + brw_ADD( p, t, param0, brw_imm_f( -1.0 ) ); + brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) ); + brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) ); + brw_MOV( p, x1y0, low_words( tmp[ 2 ] ) ); + brw_MOV( p, x1y1, low_words( tmp[ 3 ] ) ); + + brw_MOV( p, tmp[ 0 ], high_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 1 ], high_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 2 ], high_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 3 ], high_words( tmp[ 3 ] ) ); + + brw_MUL( p, x1y0, x1y0, t ); + brw_MUL( p, x1y1, x1y1, t ); + brw_ADD( p, t, param1, brw_imm_f( -1.0 ) ); + brw_MUL( p, x0y0, x0y0, param0 ); + brw_MUL( p, x0y1, x0y1, param0 ); + + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param1 ); + brw_MUL( p, tmp[ 2 ], tmp[ 2 ], param1 ); + brw_MUL( p, tmp[ 1 ], tmp[ 1 ], t ); + brw_MUL( p, tmp[ 3 ], tmp[ 3 ], t ); + + brw_ADD( p, x0y0, x0y0, tmp[ 0 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 2 ] ); + brw_ADD( p, x0y1, x0y1, tmp[ 1 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 3 ] ); + + /* We interpolate between the gradients using the polynomial + 6t^5 - 15t^4 + 10t^3 (Perlin). */ + brw_MUL( p, tmp[ 0 ], param0, brw_imm_f( 6.0 ) ); + brw_MUL( p, tmp[ 1 ], param1, brw_imm_f( 6.0 ) ); + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) ); + brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( -15.0 ) ); + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 ); + brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 ); + brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work to fill the + pipeline */ + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) ); + brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( 10.0 ) ); + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 ); + brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 ); + brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work to fill the + pipeline */ + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 ); + brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 ); + brw_MUL( p, param0, tmp[ 0 ], param0 ); + brw_MUL( p, param1, tmp[ 1 ], param1 ); + + /* Here we interpolate in the y dimension... */ + brw_MUL( p, x0y1, x0y1, param1 ); + brw_MUL( p, x1y1, x1y1, param1 ); + brw_ADD( p, x0y0, x0y0, x0y1 ); + brw_ADD( p, x1y0, x1y0, x1y1 ); + + /* And now in x. There are horrible register dependencies here, + but we have nothing else to do. */ + brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); + brw_MUL( p, x1y0, x1y0, param0 ); + brw_ADD( p, x0y0, x0y0, x1y0 ); + + /* scale by pow( 2, -15 ), as described above */ + brw_MUL( p, param0, x0y0, brw_imm_f( 0.000030517578125 ) ); + + release_tmps( c, mark ); +} + +static void emit_noise2( struct brw_wm_compile *c, + struct prog_instruction *inst ) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, param0, param1, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + int mark = mark_tmps( c ); + + assert( mark == 0 ); + + src0 = get_src_reg( c, inst->SrcReg, 0, 1 ); + src1 = get_src_reg( c, inst->SrcReg, 1, 1 ); + + param0 = alloc_tmp( c ); + param1 = alloc_tmp( c ); + + brw_MOV( p, param0, src0 ); + brw_MOV( p, param1, src1 ); + + invoke_subroutine( c, SUB_NOISE2, noise2_sub ); + + /* Fill in the result: */ + brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + brw_MOV( p, dst, param0 ); + } + } + if( inst->SaturateMode == SATURATE_ZERO_ONE ) + brw_set_saturate( p, 0 ); + + release_tmps( c, mark ); +} + +/* The three-dimensional case is much like the one- and two- versions above, + but since the number of corners is rapidly growing we now pack 16 16-bit + hashes into each register to extract more parallelism from the EUs. */ +static void noise3_sub( struct brw_wm_compile *c ) { + + struct brw_compile *p = &c->func; + struct brw_reg param0, param1, param2, + x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */ + xi, yi, zi, /* interpolation coefficients */ + t, tmp[ 8 ], /* float temporaries */ + itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */ + wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */ + int i; + int mark = mark_tmps( c ); + + x0y0 = alloc_tmp( c ); + x0y1 = alloc_tmp( c ); + x1y0 = alloc_tmp( c ); + x1y1 = alloc_tmp( c ); + xi = alloc_tmp( c ); + yi = alloc_tmp( c ); + zi = alloc_tmp( c ); + t = alloc_tmp( c ); + for( i = 0; i < 8; i++ ) { + tmp[ i ] = alloc_tmp( c ); + itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD ); + wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 ); + } + + param0 = lookup_tmp( c, mark - 4 ); + param1 = lookup_tmp( c, mark - 3 ); + param2 = lookup_tmp( c, mark - 2 ); + + brw_set_access_mode( p, BRW_ALIGN_1 ); + + /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to + be hashed. Also compute the remainders (offsets within the unit + cube), interleaved to reduce register dependency penalties. */ + brw_RNDD( p, itmp[ 0 ], param0 ); + brw_RNDD( p, itmp[ 1 ], param1 ); + brw_RNDD( p, itmp[ 2 ], param2 ); + brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xBC8F ) ); /* constant used later */ + brw_MOV( p, itmp[ 5 ], brw_imm_ud( 0xD0BD ) ); /* constant used later */ + brw_MOV( p, itmp[ 6 ], brw_imm_ud( 0x9B93 ) ); /* constant used later */ + brw_FRC( p, param0, param0 ); + brw_FRC( p, param1, param1 ); + brw_FRC( p, param2, param2 ); + /* Since we now have only 16 bits of precision in the hash, we must + be more careful about thorough mixing to maintain entropy as we + squash the input vector into a small scalar. */ + brw_MUL( p, brw_acc_reg(), itmp[ 4 ], itmp[ 0 ] ); + brw_MAC( p, brw_acc_reg(), itmp[ 5 ], itmp[ 1 ] ); + brw_MAC( p, itmp[ 0 ], itmp[ 6 ], itmp[ 2 ] ); + brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ), + brw_imm_uw( 0xBC8F ) ); + + /* Temporarily disable the execution mask while we work with ExecSize=16 + channels (the mask is set for ExecSize=8 and is probably incorrect). + Although this might cause execution of unwanted channels, the code + writes only to temporary registers and has no side effects, so + disabling the mask is harmless. */ + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) ); + brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) ); + brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) ); + + /* We're now ready to perform the hashing. The eight hashes are + interleaved for performance. The hash function used is + designed to rapidly achieve avalanche and require only 16x16 + bit multiplication, and 8-bit swizzles (which we get for + free). */ + for( i = 0; i < 4; i++ ) + brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) ); + for( i = 0; i < 4; i++ ) + brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), + odd_bytes( wtmp[ i ] ) ); + for( i = 0; i < 4; i++ ) + brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) ); + for( i = 0; i < 4; i++ ) + brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), + odd_bytes( wtmp[ i ] ) ); + brw_pop_insn_state( p ); + + /* Now we want to initialise the four rear gradients based on the + hashes. Format conversion from signed integer to float leaves + everything scaled too high by a factor of pow( 2, 15 ), but + we correct for that right at the end. */ + /* x component */ + brw_ADD( p, t, param0, brw_imm_f( -1.0 ) ); + brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) ); + brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) ); + brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) ); + brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) ); + brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, x1y0, x1y0, t ); + brw_MUL( p, x1y1, x1y1, t ); + brw_ADD( p, t, param1, brw_imm_f( -1.0 ) ); + brw_MUL( p, x0y0, x0y0, param0 ); + brw_MUL( p, x0y1, x0y1, param0 ); + + /* y component */ + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) ); + brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + brw_ADD( p, t, param0, brw_imm_f( -1.0 ) ); + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 ); + + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + + /* z component */ + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); + + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param2 ); + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param2 ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param2 ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param2 ); + + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + + /* We interpolate between the gradients using the polynomial + 6t^5 - 15t^4 + 10t^3 (Perlin). */ + brw_MUL( p, xi, param0, brw_imm_f( 6.0 ) ); + brw_MUL( p, yi, param1, brw_imm_f( 6.0 ) ); + brw_MUL( p, zi, param2, brw_imm_f( 6.0 ) ); + brw_ADD( p, xi, xi, brw_imm_f( -15.0 ) ); + brw_ADD( p, yi, yi, brw_imm_f( -15.0 ) ); + brw_ADD( p, zi, zi, brw_imm_f( -15.0 ) ); + brw_MUL( p, xi, xi, param0 ); + brw_MUL( p, yi, yi, param1 ); + brw_MUL( p, zi, zi, param2 ); + brw_ADD( p, xi, xi, brw_imm_f( 10.0 ) ); + brw_ADD( p, yi, yi, brw_imm_f( 10.0 ) ); + brw_ADD( p, zi, zi, brw_imm_f( 10.0 ) ); + brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work */ + brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work */ + brw_MUL( p, xi, xi, param0 ); + brw_MUL( p, yi, yi, param1 ); + brw_MUL( p, zi, zi, param2 ); + brw_MUL( p, xi, xi, param0 ); + brw_MUL( p, yi, yi, param1 ); + brw_MUL( p, zi, zi, param2 ); + brw_MUL( p, xi, xi, param0 ); + brw_MUL( p, yi, yi, param1 ); + brw_MUL( p, zi, zi, param2 ); + + /* Here we interpolate in the y dimension... */ + brw_MUL( p, x0y1, x0y1, yi ); + brw_MUL( p, x1y1, x1y1, yi ); + brw_ADD( p, x0y0, x0y0, x0y1 ); + brw_ADD( p, x1y0, x1y0, x1y1 ); + + /* And now in x. Leave the result in tmp[ 0 ] (see below)... */ + brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); + brw_MUL( p, x1y0, x1y0, xi ); + brw_ADD( p, tmp[ 0 ], x0y0, x1y0 ); + + /* Now do the same thing for the front four gradients... */ + /* x component */ + brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) ); + brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) ); + brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) ); + brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) ); + brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, x1y0, x1y0, t ); + brw_MUL( p, x1y1, x1y1, t ); + brw_ADD( p, t, param1, brw_imm_f( -1.0 ) ); + brw_MUL( p, x0y0, x0y0, param0 ); + brw_MUL( p, x0y1, x0y1, param0 ); + + /* y component */ + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) ); + brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + brw_ADD( p, t, param2, brw_imm_f( -1.0 ) ); + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 ); + + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + + /* z component */ + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); + + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + + /* The interpolation coefficients are still around from last time, so + again interpolate in the y dimension... */ + brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); + brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); + brw_MUL( p, x0y1, x0y1, yi ); + brw_MUL( p, x1y1, x1y1, yi ); + brw_ADD( p, x0y0, x0y0, x0y1 ); + brw_ADD( p, x1y0, x1y0, x1y1 ); + + /* And now in x. The rear face is in tmp[ 0 ] (see above), so this + time put the front face in tmp[ 1 ] and we're nearly there... */ + brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); + brw_MUL( p, x1y0, x1y0, xi ); + brw_ADD( p, tmp[ 1 ], x0y0, x1y0 ); + + /* The final interpolation, in the z dimension: */ + brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) ); + brw_MUL( p, tmp[ 1 ], tmp[ 1 ], zi ); + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] ); + + /* scale by pow( 2, -15 ), as described above */ + brw_MUL( p, param0, tmp[ 0 ], brw_imm_f( 0.000030517578125 ) ); + + release_tmps( c, mark ); +} + +static void emit_noise3( struct brw_wm_compile *c, + struct prog_instruction *inst ) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, src2, param0, param1, param2, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + int mark = mark_tmps( c ); + + assert( mark == 0 ); + + src0 = get_src_reg( c, inst->SrcReg, 0, 1 ); + src1 = get_src_reg( c, inst->SrcReg, 1, 1 ); + src2 = get_src_reg( c, inst->SrcReg, 2, 1 ); + + param0 = alloc_tmp( c ); + param1 = alloc_tmp( c ); + param2 = alloc_tmp( c ); + + brw_MOV( p, param0, src0 ); + brw_MOV( p, param1, src1 ); + brw_MOV( p, param2, src2 ); + + invoke_subroutine( c, SUB_NOISE3, noise3_sub ); + + /* Fill in the result: */ + brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + brw_MOV( p, dst, param0 ); + } + } + if( inst->SaturateMode == SATURATE_ZERO_ONE ) + brw_set_saturate( p, 0 ); + + release_tmps( c, mark ); +} + static void emit_wpos_xy(struct brw_wm_compile *c, struct prog_instruction *inst) { @@ -1201,8 +1912,8 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_LRP: emit_lrp(c, inst); break; - case OPCODE_INT: - emit_int(c, inst); + case OPCODE_TRUNC: + emit_trunc(c, inst); break; case OPCODE_MOV: emit_mov(c, inst); @@ -1276,6 +1987,17 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_MAD: emit_mad(c, inst); break; + case OPCODE_NOISE1: + emit_noise1(c, inst); + break; + case OPCODE_NOISE2: + emit_noise2(c, inst); + break; + case OPCODE_NOISE3: + emit_noise3(c, inst); + break; + /* case OPCODE_NOISE4: */ + /* not yet implemented */ case OPCODE_TEX: emit_tex(c, inst); break; @@ -1368,7 +2090,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) { brw_wm_pass_fp(c); - c->tmp_index = 127; brw_wm_emit_glsl(brw, c); c->prog_data.total_grf = c->reg_index; c->prog_data.total_scratch = 0; diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 7dbc646370d..c9b88b0ae1f 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -30,6 +30,7 @@ #include "intel_decode.h" #include "intel_reg.h" #include "intel_bufmgr.h" +#include "intel_buffers.h" /* Relocations in kernel space: * - pass dma buffer seperately @@ -133,6 +134,9 @@ do_flush_locked(struct intel_batchbuffer *batch, { struct intel_context *intel = batch->intel; int ret = 0; + unsigned int num_cliprects = 0; + struct drm_clip_rect *cliprects = NULL; + int x_off = 0, y_off = 0; if (batch->buffer) dri_bo_subdata (batch->buf, 0, used, batch->buffer); @@ -142,23 +146,21 @@ do_flush_locked(struct intel_batchbuffer *batch, batch->map = NULL; batch->ptr = NULL; - /* Throw away non-effective packets. Won't work once we have - * hardware contexts which would preserve statechanges beyond a - * single buffer. - */ - if (!(intel->numClipRects == 0 && - batch->cliprect_mode == LOOP_CLIPRECTS) || intel->no_hw) { - dri_bo_exec(batch->buf, used, - intel->pClipRects, - batch->cliprect_mode != LOOP_CLIPRECTS ? - 0 : intel->numClipRects, - (((GLuint) intel->drawX) & 0xffff) | - (((GLuint) intel->drawY) << 16)); + if (batch->cliprect_mode == LOOP_CLIPRECTS) { + intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off); + } + /* Dispatch the batchbuffer, if it has some effect (nonzero cliprects). + * Can't short-circuit like this once we have hardware contexts, but we + * should always be in DRI2 mode by then anyway. + */ + if ((batch->cliprect_mode != LOOP_CLIPRECTS || + num_cliprects != 0) && !intel->no_hw) { + dri_bo_exec(batch->buf, used, cliprects, num_cliprects, + (x_off & 0xffff) | (y_off << 16)); } - if (intel->numClipRects == 0 && - batch->cliprect_mode == LOOP_CLIPRECTS) { + if (batch->cliprect_mode == LOOP_CLIPRECTS && num_cliprects == 0) { if (allow_unlock) { /* If we are not doing any actual user-visible rendering, * do a sched_yield to keep the app from pegging the cpu while diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index 1f8096b32ec..8129996979f 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -19,6 +19,9 @@ enum cliprect_mode { /** * Batchbuffer contents require looping over per cliprect at batch submit * time. + * + * This will be upgraded to NO_LOOP_CLIPRECTS when there's a single + * constant cliprect, as in DRI2 or FBO rendering. */ LOOP_CLIPRECTS, /** @@ -29,8 +32,10 @@ enum cliprect_mode { /** * Batchbuffer contents contain drawing that already handles cliprects, such * as 2D drawing to front/back/depth that doesn't respect DRAWING_RECTANGLE. + * * Equivalent behavior to NO_LOOP_CLIPRECTS, but may not persist in batch - * outside of LOCK/UNLOCK. + * outside of LOCK/UNLOCK. This is upgraded to just NO_LOOP_CLIPRECTS when + * there's a constant cliprect, as in DRI2 or FBO rendering. */ REFERENCES_CLIPRECTS }; @@ -115,6 +120,11 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch, if (intel_batchbuffer_space(batch) < sz) intel_batchbuffer_flush(batch); + if ((cliprect_mode == LOOP_CLIPRECTS || + cliprect_mode == REFERENCES_CLIPRECTS) && + batch->intel->constant_cliprect) + cliprect_mode = NO_LOOP_CLIPRECTS; + if (cliprect_mode != IGNORE_CLIPRECTS) { if (batch->cliprect_mode == IGNORE_CLIPRECTS) { batch->cliprect_mode = cliprect_mode; diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 2917401e023..e1046f4a5de 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -272,24 +272,53 @@ intelEmitCopyBlit(struct intel_context *intel, GLshort w, GLshort h, GLenum logic_op) { - GLuint CMD, BR13; + GLuint CMD, BR13, pass = 0; int dst_y2 = dst_y + h; int dst_x2 = dst_x + w; dri_bo *aper_array[3]; BATCH_LOCALS; /* do space/cliprects check before going any further */ - intel_batchbuffer_require_space(intel->batch, 8 * 4, NO_LOOP_CLIPRECTS); - again: - aper_array[0] = intel->batch->buf; - aper_array[1] = dst_buffer; - aper_array[2] = src_buffer; - - if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) { - intel_batchbuffer_flush(intel->batch); - goto again; + do { + aper_array[0] = intel->batch->buf; + aper_array[1] = dst_buffer; + aper_array[2] = src_buffer; + + if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) { + intel_batchbuffer_flush(intel->batch); + pass++; + } else + break; + } while (pass < 2); + + if (pass >= 2) { + GLboolean locked = GL_FALSE; + if (!intel->locked) { + LOCK_HARDWARE(intel); + locked = GL_TRUE; + } + + dri_bo_map(dst_buffer, GL_TRUE); + dri_bo_map(src_buffer, GL_FALSE); + _mesa_copy_rect((GLubyte *)dst_buffer->virtual + dst_offset, + cpp, + dst_pitch, + dst_x, dst_y, + w, h, + (GLubyte *)src_buffer->virtual + src_offset, + src_pitch, + src_x, src_y); + + dri_bo_unmap(src_buffer); + dri_bo_unmap(dst_buffer); + + if (locked) + UNLOCK_HARDWARE(intel); + + return; } + intel_batchbuffer_require_space(intel->batch, 8 * 4, NO_LOOP_CLIPRECTS); DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", __FUNCTION__, src_buffer, src_pitch, src_offset, src_x, src_y, @@ -393,6 +422,9 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) struct gl_framebuffer *fb = ctx->DrawBuffer; GLuint clear_depth; GLbitfield skipBuffers = 0; + unsigned int num_cliprects; + struct drm_clip_rect *cliprects; + int x_off, y_off; BATCH_LOCALS; /* @@ -417,7 +449,8 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) intelFlush(&intel->ctx); LOCK_HARDWARE(intel); - if (intel->numClipRects) { + intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off); + if (num_cliprects) { GLint cx, cy, cw, ch; drm_clip_rect_t clear; int i; @@ -432,15 +465,15 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) /* clearing a window */ /* flip top to bottom */ - clear.x1 = cx + intel->drawX; + clear.x1 = cx + x_off; clear.y1 = intel->driDrawable->y + intel->driDrawable->h - cy - ch; clear.x2 = clear.x1 + cw; clear.y2 = clear.y1 + ch; } else { /* clearing FBO */ - assert(intel->numClipRects == 1); - assert(intel->pClipRects == &intel->fboRect); + assert(num_cliprects == 1); + assert(cliprects == &intel->fboRect); clear.x1 = cx; clear.y1 = cy; clear.x2 = clear.x1 + cw; @@ -448,8 +481,8 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) /* no change to mask */ } - for (i = 0; i < intel->numClipRects; i++) { - const drm_clip_rect_t *box = &intel->pClipRects[i]; + for (i = 0; i < num_cliprects; i++) { + const drm_clip_rect_t *box = &cliprects[i]; drm_clip_rect_t b; GLuint buf; GLuint clearMask = mask; /* use copy, since we modify it below */ diff --git a/src/mesa/drivers/dri/intel/intel_buffers.c b/src/mesa/drivers/dri/intel/intel_buffers.c index f5eaf765f38..f8f009c6a30 100644 --- a/src/mesa/drivers/dri/intel/intel_buffers.c +++ b/src/mesa/drivers/dri/intel/intel_buffers.c @@ -123,99 +123,40 @@ intel_readbuf_region(struct intel_context *intel) return NULL; } - - -/** - * Update the following fields for rendering to a user-created FBO: - * intel->numClipRects - * intel->pClipRects - * intel->drawX - * intel->drawY - */ -static void -intelSetRenderbufferClipRects(struct intel_context *intel) -{ - /* If the batch contents require looping over cliprects, flush them before - * we go changing which cliprects get referenced when that happens. - */ - if (intel->batch->cliprect_mode == LOOP_CLIPRECTS && - (intel->fboRect.x2 != intel->ctx.DrawBuffer->Width || - intel->fboRect.x2 != intel->ctx.DrawBuffer->Height)) - intel_batchbuffer_flush(intel->batch); - - assert(intel->ctx.DrawBuffer->Width > 0); - assert(intel->ctx.DrawBuffer->Height > 0); - intel->fboRect.x1 = 0; - intel->fboRect.y1 = 0; - intel->fboRect.x2 = intel->ctx.DrawBuffer->Width; - intel->fboRect.y2 = intel->ctx.DrawBuffer->Height; - intel->numClipRects = 1; - intel->pClipRects = &intel->fboRect; - intel->drawX = 0; - intel->drawY = 0; -} - - -/** - * As above, but for rendering to front buffer of a window. - * \sa intelSetRenderbufferClipRects - */ -static void -intelSetFrontClipRects(struct intel_context *intel) -{ - __DRIdrawablePrivate *dPriv = intel->driDrawable; - - if (!dPriv) - return; - - /* If the batch contents require looping over cliprects, flush them before - * we go changing which cliprects get referenced when that happens. - */ - if (intel->batch->cliprect_mode == LOOP_CLIPRECTS && - intel->pClipRects != dPriv->pClipRects) - intel_batchbuffer_flush(intel->batch); - intel->numClipRects = dPriv->numClipRects; - intel->pClipRects = dPriv->pClipRects; - intel->drawX = dPriv->x; - intel->drawY = dPriv->y; -} - - -/** - * As above, but for rendering to back buffer of a window. - */ -static void -intelSetBackClipRects(struct intel_context *intel) +void +intel_get_cliprects(struct intel_context *intel, + struct drm_clip_rect **cliprects, + unsigned int *num_cliprects, + int *x_off, int *y_off) { __DRIdrawablePrivate *dPriv = intel->driDrawable; - struct intel_framebuffer *intel_fb; - - if (!dPriv) - return; - - intel_fb = dPriv->driverPrivate; + struct intel_framebuffer *intel_fb = dPriv->driverPrivate; - if (intel_fb->pf_active || dPriv->numBackClipRects == 0) { + if (intel->constant_cliprect) { + /* FBO or DRI2 rendering, which can just use the fb's size. */ + intel->fboRect.x1 = 0; + intel->fboRect.y1 = 0; + intel->fboRect.x2 = intel->ctx.DrawBuffer->Width; + intel->fboRect.y2 = intel->ctx.DrawBuffer->Height; + + *cliprects = &intel->fboRect; + *num_cliprects = 1; + *x_off = 0; + *y_off = 0; + } else if (intel->front_cliprects || + intel_fb->pf_active || dPriv->numBackClipRects == 0) { /* use the front clip rects */ - if (intel->batch->cliprect_mode == LOOP_CLIPRECTS && - intel->pClipRects != dPriv->pClipRects) - intel_batchbuffer_flush(intel->batch); - - intel->numClipRects = dPriv->numClipRects; - intel->pClipRects = dPriv->pClipRects; - intel->drawX = dPriv->x; - intel->drawY = dPriv->y; + *cliprects = dPriv->pClipRects; + *num_cliprects = dPriv->numClipRects; + *x_off = dPriv->x; + *y_off = dPriv->y; } else { /* use the back clip rects */ - if (intel->batch->cliprect_mode == LOOP_CLIPRECTS && - intel->pClipRects != dPriv->pBackClipRects) - intel_batchbuffer_flush(intel->batch); - - intel->numClipRects = dPriv->numBackClipRects; - intel->pClipRects = dPriv->pBackClipRects; - intel->drawX = dPriv->backX; - intel->drawY = dPriv->backY; + *num_cliprects = dPriv->numBackClipRects; + *cliprects = dPriv->pBackClipRects; + *x_off = dPriv->backX; + *y_off = dPriv->backY; } } @@ -300,29 +241,6 @@ intelWindowMoved(struct intel_context *intel) __DRIdrawablePrivate *dPriv = intel->driDrawable; struct intel_framebuffer *intel_fb = dPriv->driverPrivate; - if (!intel->ctx.DrawBuffer) { - /* when would this happen? -BP */ - intelSetFrontClipRects(intel); - } - else if (intel->ctx.DrawBuffer->Name != 0) { - /* drawing to user-created FBO - do nothing */ - /* Cliprects would be set from intelDrawBuffer() */ - } - else { - /* drawing to a window */ - switch (intel_fb->Base._ColorDrawBufferIndexes[0]) { - case BUFFER_FRONT_LEFT: - intelSetFrontClipRects(intel); - break; - case BUFFER_BACK_LEFT: - intelSetBackClipRects(intel); - break; - default: - intelSetFrontClipRects(intel); - } - - } - if (!intel->intelScreen->driScrnPriv->dri2.enabled && intel->intelScreen->driScrnPriv->ddx_version.minor >= 7) { volatile struct drm_i915_sarea *sarea = intel->sarea; @@ -894,7 +812,6 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) struct intel_context *intel = intel_context(ctx); struct intel_region *colorRegions[MAX_DRAW_BUFFERS], *depthRegion = NULL; struct intel_renderbuffer *irbDepth = NULL, *irbStencil = NULL; - int front = 0; /* drawing to front color buffer? */ if (!fb) { /* this can happen during the initial context initialization */ @@ -927,52 +844,44 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) */ if (fb->_NumColorDrawBuffers == 0) { /* writing to 0 */ - FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_TRUE); colorRegions[0] = NULL; - - if (fb->Name != 0) - intelSetRenderbufferClipRects(intel); + intel->constant_cliprect = GL_TRUE; } else if (fb->_NumColorDrawBuffers > 1) { int i; struct intel_renderbuffer *irb; - FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE); - if (fb->Name != 0) - intelSetRenderbufferClipRects(intel); for (i = 0; i < fb->_NumColorDrawBuffers; i++) { irb = intel_renderbuffer(fb->_ColorDrawBuffers[i]); - colorRegions[i] = (irb && irb->region) ? irb->region : NULL; + colorRegions[i] = irb ? irb->region : NULL; } + intel->constant_cliprect = GL_TRUE; } else { - /* draw to exactly one color buffer */ - /*_mesa_debug(ctx, "Hardware rendering\n");*/ - FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE); - if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) { - front = 1; - } - - /* - * Get the intel_renderbuffer for the colorbuffer we're drawing into. - * And set up cliprects. + /* Get the intel_renderbuffer for the single colorbuffer we're drawing + * into, and set up cliprects if it's . */ if (fb->Name == 0) { + intel->constant_cliprect = intel->driScreen->dri2.enabled; /* drawing to window system buffer */ - if (front) { - intelSetFrontClipRects(intel); + if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) { + if (!intel->constant_cliprect && !intel->front_cliprects) + intel_batchbuffer_flush(intel->batch); + intel->front_cliprects = GL_TRUE; colorRegions[0] = intel_get_rb_region(fb, BUFFER_FRONT_LEFT); } else { - intelSetBackClipRects(intel); + if (!intel->constant_cliprect && intel->front_cliprects) + intel_batchbuffer_flush(intel->batch); + intel->front_cliprects = GL_FALSE; colorRegions[0]= intel_get_rb_region(fb, BUFFER_BACK_LEFT); } } else { /* drawing to user-created FBO */ struct intel_renderbuffer *irb; - intelSetRenderbufferClipRects(intel); irb = intel_renderbuffer(fb->_ColorDrawBuffers[0]); colorRegions[0] = (irb && irb->region) ? irb->region : NULL; + intel->constant_cliprect = GL_TRUE; } } diff --git a/src/mesa/drivers/dri/intel/intel_buffers.h b/src/mesa/drivers/dri/intel/intel_buffers.h index a669a854317..e5afb37dd1a 100644 --- a/src/mesa/drivers/dri/intel/intel_buffers.h +++ b/src/mesa/drivers/dri/intel/intel_buffers.h @@ -29,6 +29,8 @@ #ifndef INTEL_BUFFERS_H #define INTEL_BUFFERS_H +#include "dri_util.h" +#include "drm.h" struct intel_context; struct intel_framebuffer; @@ -53,4 +55,9 @@ extern void intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb); extern void intelInitBufferFuncs(struct dd_function_table *functions); +void intel_get_cliprects(struct intel_context *intel, + struct drm_clip_rect **cliprects, + unsigned int *num_cliprects, + int *x_off, int *y_off); + #endif /* INTEL_BUFFERS_H */ diff --git a/src/mesa/drivers/dri/intel/intel_chipset.h b/src/mesa/drivers/dri/intel/intel_chipset.h index 170efd060ae..d1b4941601e 100644 --- a/src/mesa/drivers/dri/intel/intel_chipset.h +++ b/src/mesa/drivers/dri/intel/intel_chipset.h @@ -68,11 +68,12 @@ devid == PCI_CHIP_I965_GME || \ devid == PCI_CHIP_GM45_GM) -#define IS_GM45_GM(devid) (devid == PCI_CHIP_GM45_GM) -#define IS_G4X(devid) (devid == PCI_CHIP_IGD_E_G || \ +#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ devid == PCI_CHIP_Q45_G || \ devid == PCI_CHIP_G45_G || \ devid == PCI_CHIP_G41_G) +#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM) +#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid)) #define IS_915(devid) (devid == PCI_CHIP_I915_G || \ devid == PCI_CHIP_E7221_G || \ @@ -91,7 +92,6 @@ devid == PCI_CHIP_I965_GM || \ devid == PCI_CHIP_I965_GME || \ devid == PCI_CHIP_I946_GZ || \ - IS_GM45_GM(devid) || \ IS_G4X(devid)) #define IS_9XX(devid) (IS_915(devid) || \ diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 2b3a9b9d371..9ac18e69609 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -588,9 +588,6 @@ intelInitContext(struct intel_context *intel, intel->driFd = sPriv->fd; intel->driHwLock = sPriv->lock; - intel->width = intelScreen->width; - intel->height = intelScreen->height; - driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache, intel->driScreen->myNum, IS_965(intelScreen->deviceID) ? "i965" : "i915"); @@ -932,38 +929,6 @@ intelContendedLock(struct intel_context *intel, GLuint flags) sarea->ctxOwner, intel->hHWContext); } - if (sarea->width != intel->width || sarea->height != intel->height) { - int numClipRects = intel->numClipRects; - - /* - * FIXME: Really only need to do this when drawing to a - * common back- or front buffer. - */ - - /* - * This will essentially drop the outstanding batchbuffer on - * the floor. - */ - intel->numClipRects = 0; - - if (intel->Fallback) - _swrast_flush(&intel->ctx); - - if (!IS_965(intel->intelScreen->deviceID)) - INTEL_FIREVERTICES(intel); - - if (intel->batch->map != intel->batch->ptr) - intel_batchbuffer_flush(intel->batch); - - intel->numClipRects = numClipRects; - - /* force window update */ - intel->lastStamp = 0; - - intel->width = sarea->width; - intel->height = sarea->height; - } - /* Drawable changed? */ if (dPriv && intel->lastStamp != dPriv->lastStamp) { diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 554159ac441..3938af4c72c 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -235,10 +235,18 @@ struct intel_context /* These refer to the current drawing buffer: */ - int drawX, drawY; /**< origin of drawing area within region */ - GLuint numClipRects; /**< cliprects for drawing */ - drm_clip_rect_t *pClipRects; struct gl_texture_object *frame_buffer_texobj; + /** + * Set to true if a single constant cliprect should be used in the + * batchbuffer. Otherwise, cliprects must be calculated at batchbuffer + * flush time while the lock is held. + */ + GLboolean constant_cliprect; + /** + * In !constant_cliprect mode, set to true if the front cliprects should be + * used instead of back. + */ + GLboolean front_cliprects; drm_clip_rect_t fboRect; /**< cliprect for FBO rendering */ int perf_boxes; @@ -271,10 +279,6 @@ struct intel_context */ driOptionCache optionCache; - /* Last seen width/height of the screen */ - int width; - int height; - int64_t swap_ust; int64_t swap_missed_ust; diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c index 89635198931..0565197ea08 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c @@ -191,11 +191,7 @@ do_blit_bitmap( GLcontext *ctx, color8888 = INTEL_PACKCOLOR8888(ubcolor[0], ubcolor[1], ubcolor[2], ubcolor[3]); color565 = INTEL_PACKCOLOR565(ubcolor[0], ubcolor[1], ubcolor[2]); - /* Does zoom apply to bitmaps? - */ - if (!intel_check_blit_fragment_ops(ctx, tmpColor[3] == 1.0F) || - ctx->Pixel.ZoomX != 1.0F || - ctx->Pixel.ZoomY != 1.0F) + if (!intel_check_blit_fragment_ops(ctx, tmpColor[3] == 1.0F)) return GL_FALSE; LOCK_HARDWARE(intel); diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c index 50518a68792..8ebbc95a1d0 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_draw.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c @@ -75,19 +75,21 @@ intel_texture_drawpixels(GLcontext * ctx, /* We're going to mess with texturing with no regard to existing texture * state, so if there is some set up we have to bail. */ - if (ctx->Texture._EnabledUnits != 0) + if (ctx->Texture._EnabledUnits != 0) { + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "glDrawPixels() fallback: texturing enabled\n"); return GL_FALSE; + } /* Can't do textured DrawPixels with a fragment program, unless we were * to generate a new program that sampled our texture and put the results * in the fragment color before the user's program started. */ - if (ctx->FragmentProgram.Enabled) - return GL_FALSE; - - /* Don't even want to think about it */ - if (format == GL_COLOR_INDEX) + if (ctx->FragmentProgram.Enabled) { + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "glDrawPixels() fallback: fragment program enabled\n"); return GL_FALSE; + } /* We don't have a way to generate fragments with stencil values which * * will set the resulting stencil value. @@ -108,8 +110,12 @@ intel_texture_drawpixels(GLcontext * ctx, * the color buffer, and sample the texture values into the fragment depth * in a program. */ - if (format == GL_DEPTH_COMPONENT) + if (format == GL_DEPTH_COMPONENT) { + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, + "glDrawPixels() fallback: format == GL_DEPTH_COMPONENT\n"); return GL_FALSE; + } _mesa_PushAttrib(GL_ENABLE_BIT | GL_TRANSFORM_BIT | GL_TEXTURE_BIT | GL_CURRENT_BIT); @@ -141,22 +147,27 @@ intel_texture_drawpixels(GLcontext * ctx, _mesa_PushMatrix(); _mesa_LoadIdentity(); + /* Create the vertex buffer based on the current raster pos. The x and y + * we're handed are ctx->Current.RasterPos[0,1] rounded to integers. + * We also apply the depth. However, the W component is already multiplied + * into ctx->Current.RasterPos[0,1,2] and we can ignore it at this point. + */ vertices[0][0] = x; vertices[0][1] = y; vertices[0][2] = ctx->Current.RasterPos[2]; - vertices[0][3] = ctx->Current.RasterPos[3]; + vertices[0][3] = 1.0; vertices[1][0] = x + width * ctx->Pixel.ZoomX; vertices[1][1] = y; vertices[1][2] = ctx->Current.RasterPos[2]; - vertices[1][3] = ctx->Current.RasterPos[3]; + vertices[1][3] = 1.0; vertices[2][0] = x + width * ctx->Pixel.ZoomX; vertices[2][1] = y + height * ctx->Pixel.ZoomY; vertices[2][2] = ctx->Current.RasterPos[2]; - vertices[2][3] = ctx->Current.RasterPos[3]; + vertices[2][3] = 1.0; vertices[3][0] = x; vertices[3][1] = y + height * ctx->Pixel.ZoomY; vertices[3][2] = ctx->Current.RasterPos[2]; - vertices[3][3] = ctx->Current.RasterPos[3]; + vertices[3][3] = 1.0; texcoords[0][0] = 0.0; texcoords[0][1] = 0.0; @@ -212,8 +223,12 @@ intel_stencil_drawpixels(GLcontext * ctx, return GL_TRUE; /* Can't do a per-bit writemask while treating stencil as rgba data. */ - if ((ctx->Stencil.WriteMask[0] & 0xff) != 0xff) + if ((ctx->Stencil.WriteMask[0] & 0xff) != 0xff) { + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "glDrawPixels(STENCIL_INDEX) fallback: " + "stencil mask enabled\n"); return GL_FALSE; + } /* We use FBOs for our wrapping of the depthbuffer into a color * destination. @@ -224,21 +239,29 @@ intel_stencil_drawpixels(GLcontext * ctx, /* We're going to mess with texturing with no regard to existing texture * state, so if there is some set up we have to bail. */ - if (ctx->Texture._EnabledUnits != 0) + if (ctx->Texture._EnabledUnits != 0) { + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "glDrawPixels(STENCIL_INDEX) fallback: " + "texturing enabled\n"); return GL_FALSE; + } /* Can't do textured DrawPixels with a fragment program, unless we were * to generate a new program that sampled our texture and put the results * in the fragment color before the user's program started. */ - if (ctx->FragmentProgram.Enabled) + if (ctx->FragmentProgram.Enabled) { + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "glDrawPixels(STENCIL_INDEX) fallback: " + "fragment program enabled\n"); return GL_FALSE; + } /* Check that we can load in a texture this big. */ if (width > (1 << (ctx->Const.MaxTextureLevels - 1)) || height > (1 << (ctx->Const.MaxTextureLevels - 1))) { if (INTEL_DEBUG & DEBUG_FALLBACKS) - fprintf(stderr, "glDrawPixels(STENCIL_IDNEX) fallback: " + fprintf(stderr, "glDrawPixels(STENCIL_INDEX) fallback: " "bitmap too large (%dx%d)\n", width, height); return GL_FALSE; diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h index c21f4080935..81a7386e429 100644 --- a/src/mesa/drivers/dri/intel/intel_reg.h +++ b/src/mesa/drivers/dri/intel/intel_reg.h @@ -29,6 +29,8 @@ #define CMD_2D (0x2 << 29) #define CMD_3D (0x3 << 29) +#define MI_NOOP (CMD_MI | 0) + #define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23) #define MI_FLUSH (CMD_MI | (4 << 23)) @@ -44,6 +46,9 @@ #define _3DSTATE_LOAD_STATE_IMMEDIATE_1 (CMD_3D | (0x1d<<24) | (0x04<<16)) #define I1_LOAD_S(n) (1<<(4+n)) +#define _3DSTATE_DRAWRECT_INFO (CMD_3D | (0x1d<<24) | (0x80<<16) | 0x3) +#define _3DSTATE_DRAWRECT_INFO_I965 (CMD_3D | (3 << 27) | (1 << 24) | 0x2) + /** @{ * * PIPE_CONTROL operation, a combination MI_FLUSH and register write with diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index 45faf64c713..8dbcc3050ee 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -79,30 +79,6 @@ intel_region_unmap(struct intel_context *intel, struct intel_region *region) } } -static int -intel_set_region_tiling_gem(struct intel_context *intel, - struct intel_region *region, - uint32_t bo_handle) -{ - struct drm_i915_gem_get_tiling get_tiling; - int ret; - - memset(&get_tiling, 0, sizeof(get_tiling)); - - get_tiling.handle = bo_handle; - ret = ioctl(intel->driFd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling); - if (ret != 0) { - fprintf(stderr, "Failed to get tiling state for region: %s\n", - strerror(errno)); - return ret; - } - - region->tiling = get_tiling.tiling_mode; - region->bit_6_swizzle = get_tiling.swizzle_mode; - - return 0; -} - static struct intel_region * intel_region_alloc_internal(struct intel_context *intel, GLuint cpp, @@ -151,6 +127,7 @@ intel_region_alloc_for_handle(struct intel_context *intel, { struct intel_region *region; dri_bo *buffer; + int ret; buffer = intel_bo_gem_create_from_name(intel->bufmgr, name, handle); @@ -159,7 +136,14 @@ intel_region_alloc_for_handle(struct intel_context *intel, if (region == NULL) return region; - intel_set_region_tiling_gem(intel, region, handle); + ret = dri_bo_get_tiling(region->buffer, ®ion->tiling, + ®ion->bit_6_swizzle); + if (ret != 0) { + fprintf(stderr, "Couldn't get tiling of buffer %d (%s): %s\n", + handle, name, strerror(-ret)); + intel_region_release(®ion); + return NULL; + } return region; } @@ -489,7 +473,14 @@ intel_recreate_static(struct intel_context *intel, name, region_desc->bo_handle); - intel_set_region_tiling_gem(intel, region, region_desc->bo_handle); + ret = dri_bo_get_tiling(region->buffer, ®ion->tiling, + ®ion->bit_6_swizzle); + if (ret != 0) { + fprintf(stderr, "Couldn't get tiling of buffer %d (%s): %s\n", + region_desc->bo_handle, name, strerror(-ret)); + intel_region_release(®ion); + return NULL; + } } else { if (region->classic_map != NULL) { drmUnmap(region->classic_map, diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index 8e2b4456f81..8f4e681ffea 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -30,6 +30,7 @@ #include "main/mtypes.h" #include "main/colormac.h" +#include "intel_buffers.h" #include "intel_fbo.h" #include "intel_screen.h" #include "intel_span.h" @@ -131,12 +132,8 @@ pwrite_8(struct intel_renderbuffer *irb, uint32_t offset, uint8_t val) } static uint32_t no_tile_swizzle(struct intel_renderbuffer *irb, - struct intel_context *intel, int x, int y) { - x += intel->drawX; - y += intel->drawY; - return (y * irb->region->pitch + x) * irb->region->cpp; } @@ -145,7 +142,6 @@ static uint32_t no_tile_swizzle(struct intel_renderbuffer *irb, */ static uint32_t x_tile_swizzle(struct intel_renderbuffer *irb, - struct intel_context *intel, int x, int y) { int tile_stride; @@ -155,9 +151,6 @@ static uint32_t x_tile_swizzle(struct intel_renderbuffer *irb, int tile_off, tile_base; tile_stride = (irb->pfPitch * irb->region->cpp) << 3; - - x += intel->drawX; - y += intel->drawY; xbyte = x * irb->region->cpp; @@ -204,7 +197,6 @@ static uint32_t x_tile_swizzle(struct intel_renderbuffer *irb, } static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, - struct intel_context *intel, int x, int y) { int tile_stride; @@ -214,9 +206,6 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, int tile_off, tile_base; tile_stride = (irb->pfPitch * irb->region->cpp) << 5; - - x += intel->drawX; - y += intel->drawY; xbyte = x * irb->region->cpp; @@ -268,8 +257,12 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ const GLint yScale = irb->RenderToTexture ? 1 : -1; \ const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1; \ + unsigned int num_cliprects; \ + struct drm_clip_rect *cliprects; \ + int x_off, y_off; \ GLuint p; \ - (void) p; + (void) p; \ + intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off); /* XXX FBO: this is identical to the macro in spantmp2.h except we get * the cliprect info from the context, not the driDrawable. @@ -277,12 +270,12 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, */ #define HW_CLIPLOOP() \ do { \ - int _nc = intel->numClipRects; \ + int _nc = num_cliprects; \ while ( _nc-- ) { \ - int minx = intel->pClipRects[_nc].x1 - intel->drawX; \ - int miny = intel->pClipRects[_nc].y1 - intel->drawY; \ - int maxx = intel->pClipRects[_nc].x2 - intel->drawX; \ - int maxy = intel->pClipRects[_nc].y2 - intel->drawY; + int minx = cliprects[_nc].x1 - x_off; \ + int miny = cliprects[_nc].y1 - y_off; \ + int maxx = cliprects[_nc].x2 - x_off; \ + int maxy = cliprects[_nc].y2 - y_off; #if 0 }} @@ -295,6 +288,11 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define HW_UNLOCK() +/* Convenience macros to avoid typing the swizzle argument over and over */ +#define NO_TILE(_X, _Y) no_tile_swizzle(irb, (_X) + x_off, (_Y) + y_off) +#define X_TILE(_X, _Y) x_tile_swizzle(irb, (_X) + x_off, (_Y) + y_off) +#define Y_TILE(_X, _Y) y_tile_swizzle(irb, (_X) + x_off, (_Y) + y_off) + /* 16 bit, RGB565 color spanline and pixel functions */ #define SPANTMP_PIXEL_FMT GL_RGB @@ -302,8 +300,8 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define TAG(x) intel##x##_RGB565 #define TAG2(x,y) intel##x##_RGB565##y -#define GET_VALUE(X, Y) pread_16(irb, no_tile_swizzle(irb, intel, X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_16(irb, no_tile_swizzle(irb, intel, X, Y), V) +#define GET_VALUE(X, Y) pread_16(irb, NO_TILE(X, Y)) +#define PUT_VALUE(X, Y, V) pwrite_16(irb, NO_TILE(X, Y), V) #include "spantmp2.h" /* 32 bit, ARGB8888 color spanline and pixel functions @@ -313,8 +311,8 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define TAG(x) intel##x##_ARGB8888 #define TAG2(x,y) intel##x##_ARGB8888##y -#define GET_VALUE(X, Y) pread_32(irb, no_tile_swizzle(irb, intel, X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_32(irb, no_tile_swizzle(irb, intel, X, Y), V) +#define GET_VALUE(X, Y) pread_32(irb, NO_TILE(X, Y)) +#define PUT_VALUE(X, Y, V) pwrite_32(irb, NO_TILE(X, Y), V) #include "spantmp2.h" /* 32 bit, xRGB8888 color spanline and pixel functions @@ -324,8 +322,8 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define TAG(x) intel##x##_xRGB8888 #define TAG2(x,y) intel##x##_xRGB8888##y -#define GET_VALUE(X, Y) pread_xrgb8888(irb, no_tile_swizzle(irb, intel, X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, no_tile_swizzle(irb, intel, X, Y), V) +#define GET_VALUE(X, Y) pread_xrgb8888(irb, NO_TILE(X, Y)) +#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, NO_TILE(X, Y), V) #include "spantmp2.h" /* 16 bit RGB565 color tile spanline and pixel functions @@ -336,8 +334,8 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define TAG(x) intel_XTile_##x##_RGB565 #define TAG2(x,y) intel_XTile_##x##_RGB565##y -#define GET_VALUE(X, Y) pread_16(irb, x_tile_swizzle(irb, intel, X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_16(irb, x_tile_swizzle(irb, intel, X, Y), V) +#define GET_VALUE(X, Y) pread_16(irb, X_TILE(X, Y)) +#define PUT_VALUE(X, Y, V) pwrite_16(irb, X_TILE(X, Y), V) #include "spantmp2.h" #define SPANTMP_PIXEL_FMT GL_RGB @@ -345,8 +343,8 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define TAG(x) intel_YTile_##x##_RGB565 #define TAG2(x,y) intel_YTile_##x##_RGB565##y -#define GET_VALUE(X, Y) pread_16(irb, y_tile_swizzle(irb, intel, X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_16(irb, y_tile_swizzle(irb, intel, X, Y), V) +#define GET_VALUE(X, Y) pread_16(irb, Y_TILE(X, Y)) +#define PUT_VALUE(X, Y, V) pwrite_16(irb, Y_TILE(X, Y), V) #include "spantmp2.h" /* 32 bit ARGB888 color tile spanline and pixel functions @@ -357,8 +355,8 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define TAG(x) intel_XTile_##x##_ARGB8888 #define TAG2(x,y) intel_XTile_##x##_ARGB8888##y -#define GET_VALUE(X, Y) pread_32(irb, x_tile_swizzle(irb, intel, X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_32(irb, x_tile_swizzle(irb, intel, X, Y), V) +#define GET_VALUE(X, Y) pread_32(irb, X_TILE(X, Y)) +#define PUT_VALUE(X, Y, V) pwrite_32(irb, X_TILE(X, Y), V) #include "spantmp2.h" #define SPANTMP_PIXEL_FMT GL_BGRA @@ -366,8 +364,8 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define TAG(x) intel_YTile_##x##_ARGB8888 #define TAG2(x,y) intel_YTile_##x##_ARGB8888##y -#define GET_VALUE(X, Y) pread_32(irb, y_tile_swizzle(irb, intel, X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_32(irb, y_tile_swizzle(irb, intel, X, Y), V) +#define GET_VALUE(X, Y) pread_32(irb, Y_TILE(X, Y)) +#define PUT_VALUE(X, Y, V) pwrite_32(irb, Y_TILE(X, Y), V) #include "spantmp2.h" /* 32 bit xRGB888 color tile spanline and pixel functions @@ -378,8 +376,8 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define TAG(x) intel_XTile_##x##_xRGB8888 #define TAG2(x,y) intel_XTile_##x##_xRGB8888##y -#define GET_VALUE(X, Y) pread_xrgb8888(irb, x_tile_swizzle(irb, intel, X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, x_tile_swizzle(irb, intel, X, Y), V) +#define GET_VALUE(X, Y) pread_xrgb8888(irb, X_TILE(X, Y)) +#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, X_TILE(X, Y), V) #include "spantmp2.h" #define SPANTMP_PIXEL_FMT GL_BGRA @@ -387,15 +385,19 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define TAG(x) intel_YTile_##x##_xRGB8888 #define TAG2(x,y) intel_YTile_##x##_xRGB8888##y -#define GET_VALUE(X, Y) pread_xrgb8888(irb, y_tile_swizzle(irb, intel, X, Y)) -#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, y_tile_swizzle(irb, intel, X, Y), V) +#define GET_VALUE(X, Y) pread_xrgb8888(irb, Y_TILE(X, Y)) +#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, Y_TILE(X, Y), V) #include "spantmp2.h" #define LOCAL_DEPTH_VARS \ struct intel_context *intel = intel_context(ctx); \ struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ const GLint yScale = irb->RenderToTexture ? 1 : -1; \ - const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1; + const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1; \ + unsigned int num_cliprects; \ + struct drm_clip_rect *cliprects; \ + int x_off, y_off; \ + intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off); #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS @@ -404,10 +406,8 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, ** 16-bit depthbuffer functions. **/ #define VALUE_TYPE GLushort -#define WRITE_DEPTH(_x, _y, d) \ - pwrite_16(irb, no_tile_swizzle(irb, intel, _x, _y), d) -#define READ_DEPTH(d, _x, _y) \ - d = pread_16(irb, no_tile_swizzle(irb, intel, _x, _y)) +#define WRITE_DEPTH(_x, _y, d) pwrite_16(irb, NO_TILE(_x, _y), d) +#define READ_DEPTH(d, _x, _y) d = pread_16(irb, NO_TILE(_x, _y)) #define TAG(x) intel##x##_z16 #include "depthtmp.h" @@ -416,10 +416,8 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, ** 16-bit x tile depthbuffer functions. **/ #define VALUE_TYPE GLushort -#define WRITE_DEPTH(_x, _y, d) \ - pwrite_16(irb, x_tile_swizzle(irb, intel, _x, _y), d) -#define READ_DEPTH(d, _x, _y) \ - d = pread_16(irb, x_tile_swizzle(irb, intel, _x, _y)) +#define WRITE_DEPTH(_x, _y, d) pwrite_16(irb, X_TILE(_x, _y), d) +#define READ_DEPTH(d, _x, _y) d = pread_16(irb, X_TILE(_x, _y)) #define TAG(x) intel_XTile_##x##_z16 #include "depthtmp.h" @@ -427,10 +425,8 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, ** 16-bit y tile depthbuffer functions. **/ #define VALUE_TYPE GLushort -#define WRITE_DEPTH(_x, _y, d) \ - pwrite_16(irb, y_tile_swizzle(irb, intel, _x, _y), d) -#define READ_DEPTH(d, _x, _y) \ - d = pread_16(irb, y_tile_swizzle(irb, intel, _x, _y)) +#define WRITE_DEPTH(_x, _y, d) pwrite_16(irb, Y_TILE(_x, _y), d) +#define READ_DEPTH(d, _x, _y) d = pread_16(irb, Y_TILE(_x, _y)) #define TAG(x) intel_YTile_##x##_z16 #include "depthtmp.h" @@ -445,12 +441,11 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, /* Change ZZZS -> SZZZ */ #define WRITE_DEPTH(_x, _y, d) \ - pwrite_32(irb, no_tile_swizzle(irb, intel, _x, _y), \ - ((d) >> 8) | ((d) << 24)) + pwrite_32(irb, NO_TILE(_x, _y), ((d) >> 8) | ((d) << 24)) /* Change SZZZ -> ZZZS */ #define READ_DEPTH( d, _x, _y ) { \ - GLuint tmp = pread_32(irb, no_tile_swizzle(irb, intel, _x, _y)); \ + GLuint tmp = pread_32(irb, NO_TILE(_x, _y)); \ d = (tmp << 8) | (tmp >> 24); \ } @@ -468,12 +463,11 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, /* Change ZZZS -> SZZZ */ #define WRITE_DEPTH(_x, _y, d) \ - pwrite_32(irb, x_tile_swizzle(irb, intel, _x, _y), \ - ((d) >> 8) | ((d) << 24)) \ + pwrite_32(irb, X_TILE(_x, _y), ((d) >> 8) | ((d) << 24)) /* Change SZZZ -> ZZZS */ #define READ_DEPTH( d, _x, _y ) { \ - GLuint tmp = pread_32(irb, x_tile_swizzle(irb, intel, _x, _y)); \ + GLuint tmp = pread_32(irb, X_TILE(_x, _y)); \ d = (tmp << 8) | (tmp >> 24); \ } @@ -490,12 +484,11 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, /* Change ZZZS -> SZZZ */ #define WRITE_DEPTH(_x, _y, d) \ - pwrite_32(irb, y_tile_swizzle(irb, intel, _x, _y), \ - ((d) >> 8) | ((d) << 24)) + pwrite_32(irb, Y_TILE(_x, _y), ((d) >> 8) | ((d) << 24)) /* Change SZZZ -> ZZZS */ #define READ_DEPTH( d, _x, _y ) { \ - GLuint tmp = pread_32(irb, y_tile_swizzle(irb, intel, _x, _y)); \ + GLuint tmp = pread_32(irb, Y_TILE(_x, _y)); \ d = (tmp << 8) | (tmp >> 24); \ } @@ -506,36 +499,24 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, /** ** 8-bit stencil function (XXX FBO: This is obsolete) **/ -#define WRITE_STENCIL(_x, _y, d) \ - pwrite_8(irb, no_tile_swizzle(irb, intel, _x, _y) + 3, d) - -#define READ_STENCIL(d, _x, _y) \ - d = pread_8(irb, no_tile_swizzle(irb, intel, _x, _y) + 3); - +#define WRITE_STENCIL(_x, _y, d) pwrite_8(irb, NO_TILE(_x, _y) + 3, d) +#define READ_STENCIL(d, _x, _y) d = pread_8(irb, NO_TILE(_x, _y) + 3); #define TAG(x) intel##x##_z24_s8 #include "stenciltmp.h" /** ** 8-bit x-tile stencil function (XXX FBO: This is obsolete) **/ -#define WRITE_STENCIL(_x, _y, d) \ - pwrite_8(irb, x_tile_swizzle(irb, intel, _x, _y) + 3, d) - -#define READ_STENCIL(d, _x, _y) \ - d = pread_8(irb, x_tile_swizzle(irb, intel, _x, _y) + 3); - +#define WRITE_STENCIL(_x, _y, d) pwrite_8(irb, X_TILE(_x, _y) + 3, d) +#define READ_STENCIL(d, _x, _y) d = pread_8(irb, X_TILE(_x, _y) + 3); #define TAG(x) intel_XTile_##x##_z24_s8 #include "stenciltmp.h" /** ** 8-bit y-tile stencil function (XXX FBO: This is obsolete) **/ -#define WRITE_STENCIL(_x, _y, d) \ - pwrite_8(irb, y_tile_swizzle(irb, intel, _x, _y) + 3, d) - -#define READ_STENCIL(d, _x, _y) \ - d = pread_8(irb, y_tile_swizzle(irb, intel, _x, _y) + 3) - +#define WRITE_STENCIL(_x, _y, d) pwrite_8(irb, Y_TILE(_x, _y) + 3, d) +#define READ_STENCIL(d, _x, _y) d = pread_8(irb, Y_TILE(_x, _y) + 3) #define TAG(x) intel_YTile_##x##_z24_s8 #include "stenciltmp.h" @@ -602,14 +583,10 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map) if (tex) { /* render to texture */ ASSERT(att->Renderbuffer); - if (map) { - struct gl_texture_image *texImg; - texImg = tex->Image[att->CubeMapFace][att->TextureLevel]; + if (map) intel_tex_map_images(intel, intel_texture_object(tex)); - } - else { + else intel_tex_unmap_images(intel, intel_texture_object(tex)); - } } } diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 05107dd2ada..5f32dd575e3 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -900,7 +900,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) screen->depthHasSurface = (sPriv->ddx_version.major > 4) || /* these chips don't use tiled z without hyperz. So always pretend we have set up a surface which will cause linear reads/writes */ - ((screen->chip_family & RADEON_CLASS_R100) && + (IS_R100_CLASS(screen) && !(screen->chip_flags & RADEON_CHIPSET_TCL)); if ( dri_priv->textureSize == 0 ) { |