diff options
Diffstat (limited to 'src/mesa/drivers')
59 files changed, 1298 insertions, 567 deletions
diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 44adaf86828..276da41f4e4 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -34,6 +34,7 @@ #include "main/renderbuffer.h" #include "main/texcompress.h" #include "main/texformat.h" +#include "main/texgetimage.h" #include "main/teximage.h" #include "main/texobj.h" #include "main/texstore.h" diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index ae790554055..38c2e7b00d1 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -163,21 +163,18 @@ static int driBindContext(__DRIcontext *pcp, { __DRIscreenPrivate *psp = pcp->driScreenPriv; - /* - ** Assume error checking is done properly in glXMakeCurrent before - ** calling driBindContext. - */ - - if (pcp == NULL || pdp == None || prp == None) - return GL_FALSE; - /* Bind the drawable to the context */ - pcp->driDrawablePriv = pdp; - pcp->driReadablePriv = prp; - pdp->driContextPriv = pcp; - pdp->refcount++; - if ( pdp != prp ) { - prp->refcount++; + + if (pcp) { + pcp->driDrawablePriv = pdp; + pcp->driReadablePriv = prp; + if (pdp) { + pdp->driContextPriv = pcp; + pdp->refcount++; + } + if ( prp && pdp != prp ) { + prp->refcount++; + } } /* @@ -186,23 +183,21 @@ static int driBindContext(__DRIcontext *pcp, */ if (!psp->dri2.enabled) { - if (!pdp->pStamp || *pdp->pStamp != pdp->lastStamp) { + if (pdp && !pdp->pStamp) { DRM_SPINLOCK(&psp->pSAREA->drawable_lock, psp->drawLockID); __driUtilUpdateDrawableInfo(pdp); DRM_SPINUNLOCK(&psp->pSAREA->drawable_lock, psp->drawLockID); } - - if ((pdp != prp) && (!prp->pStamp || *prp->pStamp != prp->lastStamp)) { + if (prp && pdp != prp && !prp->pStamp) { DRM_SPINLOCK(&psp->pSAREA->drawable_lock, psp->drawLockID); __driUtilUpdateDrawableInfo(prp); DRM_SPINUNLOCK(&psp->pSAREA->drawable_lock, psp->drawLockID); - } + } } /* Call device-specific MakeCurrent */ - (*psp->DriverAPI.MakeCurrent)(pcp, pdp, prp); - return GL_TRUE; + return (*psp->DriverAPI.MakeCurrent)(pcp, pdp, prp); } /*@}*/ diff --git a/src/mesa/drivers/dri/common/utils.c b/src/mesa/drivers/dri/common/utils.c index c9acd81be74..66f277c10b2 100644 --- a/src/mesa/drivers/dri/common/utils.c +++ b/src/mesa/drivers/dri/common/utils.c @@ -481,7 +481,7 @@ driCreateConfigs(GLenum fb_format, GLenum fb_type, const uint8_t * depth_bits, const uint8_t * stencil_bits, unsigned num_depth_stencil_bits, const GLenum * db_modes, unsigned num_db_modes, - const u_int8_t * msaa_samples, unsigned num_msaa_modes) + const uint8_t * msaa_samples, unsigned num_msaa_modes) { static const uint8_t bits_table[4][4] = { /* R G B A */ diff --git a/src/mesa/drivers/dri/gamma/gamma_tex.c b/src/mesa/drivers/dri/gamma/gamma_tex.c index ca33c1740ff..97797d47882 100644 --- a/src/mesa/drivers/dri/gamma/gamma_tex.c +++ b/src/mesa/drivers/dri/gamma/gamma_tex.c @@ -107,9 +107,14 @@ static void gammaSetTexFilter(gammaContextPtr gmesa, static void gammaSetTexBorderColor(gammaContextPtr gmesa, gammaTextureObjectPtr t, - GLubyte color[4]) + const GLfloat color[4]) { - t->TextureBorderColor = PACK_COLOR_8888(color[0], color[1], color[2], color[3]); + GLubyte c[4]; + CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]); + CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]); + CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]); + CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]); + t->TextureBorderColor = PACK_COLOR_8888(c[0], c[1], c[2], c[3]); } @@ -143,7 +148,7 @@ static void gammaTexParameter( GLcontext *ctx, GLenum target, break; case GL_TEXTURE_BORDER_COLOR: - gammaSetTexBorderColor( gmesa, t, tObj->_BorderChan ); + gammaSetTexBorderColor( gmesa, t, tObj->BorderColor ); break; case GL_TEXTURE_BASE_LEVEL: @@ -347,7 +352,7 @@ static void gammaBindTexture( GLcontext *ctx, GLenum target, gammaSetTexWrapping( t, tObj->WrapS, tObj->WrapT ); gammaSetTexFilter( gmesa, t, tObj->MinFilter, tObj->MagFilter, bias ); - gammaSetTexBorderColor( gmesa, t, tObj->_BorderChan ); + gammaSetTexBorderColor( gmesa, t, tObj->BorderColor ); } } diff --git a/src/mesa/drivers/dri/i810/i810tex.c b/src/mesa/drivers/dri/i810/i810tex.c index ba4e6b5b0b1..cd6e1a8e6e8 100644 --- a/src/mesa/drivers/dri/i810/i810tex.c +++ b/src/mesa/drivers/dri/i810/i810tex.c @@ -162,7 +162,7 @@ static void i810SetTexFilter(i810ContextPtr imesa, static void -i810SetTexBorderColor( i810TextureObjectPtr t, GLubyte color[4] ) +i810SetTexBorderColor( i810TextureObjectPtr t, const GLfloat color[4] ) { /* Need a fallback. */ @@ -211,7 +211,7 @@ i810AllocTexObj( GLcontext *ctx, struct gl_texture_object *texObj ) i810SetTexWrapping( t, texObj->WrapS, texObj->WrapT ); /*i830SetTexMaxAnisotropy( t, texObj->MaxAnisotropy );*/ i810SetTexFilter( imesa, t, texObj->MinFilter, texObj->MagFilter, bias ); - i810SetTexBorderColor( t, texObj->_BorderChan ); + i810SetTexBorderColor( t, texObj->BorderColor ); } return t; @@ -252,7 +252,7 @@ static void i810TexParameter( GLcontext *ctx, GLenum target, break; case GL_TEXTURE_BORDER_COLOR: - i810SetTexBorderColor( t, tObj->_BorderChan ); + i810SetTexBorderColor( t, tObj->BorderColor ); break; case GL_TEXTURE_BASE_LEVEL: diff --git a/src/mesa/drivers/dri/i915/i830_texstate.c b/src/mesa/drivers/dri/i915/i830_texstate.c index df43b779a79..753c25b57ed 100644 --- a/src/mesa/drivers/dri/i915/i830_texstate.c +++ b/src/mesa/drivers/dri/i915/i830_texstate.c @@ -122,6 +122,7 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) struct gl_texture_image *firstImage; GLuint *state = i830->state.Tex[unit], format, pitch; GLint lodbias; + GLubyte border[4]; memset(state, 0, sizeof(state)); @@ -294,11 +295,16 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) (ws))); } + /* convert border color from float to ubyte */ + CLAMPED_FLOAT_TO_UBYTE(border[0], tObj->BorderColor[0]); + CLAMPED_FLOAT_TO_UBYTE(border[1], tObj->BorderColor[1]); + CLAMPED_FLOAT_TO_UBYTE(border[2], tObj->BorderColor[2]); + CLAMPED_FLOAT_TO_UBYTE(border[3], tObj->BorderColor[3]); - state[I830_TEXREG_TM0S4] = INTEL_PACKCOLOR8888(tObj->_BorderChan[0], - tObj->_BorderChan[1], - tObj->_BorderChan[2], - tObj->_BorderChan[3]); + state[I830_TEXREG_TM0S4] = INTEL_PACKCOLOR8888(border[0], + border[1], + border[2], + border[3]); I830_ACTIVESTATE(i830, I830_UPLOAD_TEX(unit), GL_TRUE); diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c index 6d25f8dd8ef..43f65392b56 100644 --- a/src/mesa/drivers/dri/i915/i915_texstate.c +++ b/src/mesa/drivers/dri/i915/i915_texstate.c @@ -133,6 +133,7 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) struct gl_texture_image *firstImage; GLuint *state = i915->state.Tex[unit], format, pitch; GLint lodbias; + GLubyte border[4]; memset(state, 0, sizeof(state)); @@ -318,21 +319,26 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) state[I915_TEXREG_SS3] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT); } + /* convert border color from float to ubyte */ + CLAMPED_FLOAT_TO_UBYTE(border[0], tObj->BorderColor[0]); + CLAMPED_FLOAT_TO_UBYTE(border[1], tObj->BorderColor[1]); + CLAMPED_FLOAT_TO_UBYTE(border[2], tObj->BorderColor[2]); + CLAMPED_FLOAT_TO_UBYTE(border[3], tObj->BorderColor[3]); if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) { /* GL specs that border color for depth textures is taken from the * R channel, while the hardware uses A. Spam R into all the channels * for safety. */ - state[I915_TEXREG_SS4] = INTEL_PACKCOLOR8888(tObj->_BorderChan[0], - tObj->_BorderChan[0], - tObj->_BorderChan[0], - tObj->_BorderChan[0]); + state[I915_TEXREG_SS4] = INTEL_PACKCOLOR8888(border[0], + border[0], + border[0], + border[0]); } else { - state[I915_TEXREG_SS4] = INTEL_PACKCOLOR8888(tObj->_BorderChan[0], - tObj->_BorderChan[1], - tObj->_BorderChan[2], - tObj->_BorderChan[3]); + state[I915_TEXREG_SS4] = INTEL_PACKCOLOR8888(border[0], + border[1], + border[2], + border[3]); } diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 48ed4325bef..01e07c967fa 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -167,6 +167,9 @@ struct brw_fragment_program { struct gl_fragment_program program; GLuint id; /**< serial no. to identify frag progs, never re-used */ GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */ + + /** Program constant buffer/surface */ + dri_bo *const_buffer; }; @@ -238,8 +241,16 @@ struct brw_vs_ouput_sizes { }; +/** Number of texture sampler units */ #define BRW_MAX_TEX_UNIT 16 -#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + MAX_DRAW_BUFFERS + +/** + * Size of our surface binding table. + * This contains pointers to the drawing surfaces and current texture + * objects and shader constant buffer (+1). + */ +#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1) + enum brw_cache_id { BRW_CC_VP, @@ -513,8 +524,8 @@ struct brw_context /* BRW_NEW_CURBE_OFFSETS: */ struct { - GLuint wm_start; - GLuint wm_size; + GLuint wm_start; /**< pos of first wm const in CURBE buffer */ + GLuint wm_size; /**< number of float[4] consts, multiple of 16 */ GLuint clip_start; GLuint clip_size; GLuint vs_start; @@ -588,7 +599,7 @@ struct brw_context GLuint nr_surfaces; GLuint max_threads; - dri_bo *scratch_buffer; + dri_bo *scratch_bo; GLuint sampler_count; dri_bo *sampler_bo; diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 545dedd34ba..a6bfb7507e7 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -38,6 +38,7 @@ #include "shader/prog_parameter.h" #include "shader/prog_statevars.h" #include "intel_batchbuffer.h" +#include "intel_regions.h" #include "brw_context.h" #include "brw_defines.h" #include "brw_state.h" @@ -251,6 +252,7 @@ static void prepare_constant_buffer(struct brw_context *brw) _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); + /* XXX just use a memcpy here */ for (i = 0; i < nr; i++) { const GLfloat *value = vp->program.Base.Parameters->ParameterValues[i]; buf[offset + i * 4 + 0] = value[0]; @@ -330,11 +332,39 @@ static void prepare_constant_buffer(struct brw_context *brw) } +/** + * Vertex/fragment shader constants are stored in a pseudo 1D texture. + * This function updates the constants in that buffer. + */ +static void +update_texture_constant_buffer(struct brw_context *brw) +{ + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + const struct gl_program_parameter_list *params = fp->program.Base.Parameters; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + + assert(fp->const_buffer); + assert(fp->const_buffer->size >= size); + + /* copy constants into the buffer */ + if (size > 0) { + GLubyte *map; + dri_bo_map(fp->const_buffer, GL_TRUE); + map = fp->const_buffer->virtual; + memcpy(map, params->ParameterValues, size); + dri_bo_unmap(fp->const_buffer); + } +} + + static void emit_constant_buffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; GLuint sz = brw->curbe.total_size; + update_texture_constant_buffer(brw); + BEGIN_BATCH(2, IGNORE_CLIPRECTS); if (sz == 0) { OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2)); diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 590b064c7ef..98fc909c2ad 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -225,6 +225,24 @@ #define BRW_RASTRULE_UPPER_LEFT 0 #define BRW_RASTRULE_UPPER_RIGHT 1 +/* These are listed as "Reserved, but not seen as useful" + * in Intel documentation (page 212, "Point Rasterization Rule", + * section 7.4 "SF Pipeline State Summary", of document + * "IntelĀ® 965 Express Chipset Family and IntelĀ® G35 Express + * Chipset Graphics Controller Programmer's Reference Manual, + * Volume 2: 3D/Media", Revision 1.0b as of January 2008, + * available at + * http://intellinuxgraphics.org/documentation.html + * at the time of this writing). + * + * These appear to be supported on at least some + * i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT + * is useful when using OpenGL to render to a FBO + * (which has the pixel coordinate Y orientation inverted + * with respect to the normal OpenGL pixel coordinate system). + */ +#define BRW_RASTRULE_LOWER_LEFT 2 +#define BRW_RASTRULE_LOWER_RIGHT 3 #define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0 #define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1 @@ -349,9 +367,10 @@ #define BRW_SURFACEFORMAT_L8A8_UNORM 0x114 #define BRW_SURFACEFORMAT_I16_FLOAT 0x115 #define BRW_SURFACEFORMAT_L16_FLOAT 0x116 -#define BRW_SURFACEFORMAT_A16_FLOAT 0x117 -#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 -#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A +#define BRW_SURFACEFORMAT_A16_FLOAT 0x117 +#define BRW_SURFACEFORMAT_L8A8_UNORM_SRGB 0x118 +#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A #define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B #define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C #define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D @@ -368,6 +387,7 @@ #define BRW_SURFACEFORMAT_A4P4_UNORM 0x148 #define BRW_SURFACEFORMAT_R8_SSCALED 0x149 #define BRW_SURFACEFORMAT_R8_USCALED 0x14A +#define BRW_SURFACEFORMAT_L8_UNORM_SRGB 0x14C #define BRW_SURFACEFORMAT_R1_UINT 0x181 #define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182 #define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 02998d59571..b91b20bec6f 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -277,6 +277,7 @@ copy_array_to_vbo_array( struct brw_context *brw, struct brw_vertex_element *element, GLuint dst_stride) { + struct intel_context *intel = &brw->intel; GLuint size = element->count * dst_stride; get_space(brw, size, &element->bo, &element->offset); @@ -289,29 +290,52 @@ copy_array_to_vbo_array( struct brw_context *brw, } if (dst_stride == element->glarray->StrideB) { - dri_bo_subdata(element->bo, - element->offset, - size, - element->glarray->Ptr); + if (intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(element->bo); + memcpy((char *)element->bo->virtual + element->offset, + element->glarray->Ptr, size); + drm_intel_gem_bo_unmap_gtt(element->bo); + } else { + dri_bo_subdata(element->bo, + element->offset, + size, + element->glarray->Ptr); + } } else { - void *data; char *dest; const unsigned char *src = element->glarray->Ptr; int i; - data = _mesa_malloc(dst_stride * element->count); - dest = data; - for (i = 0; i < element->count; i++) { - memcpy(dest, src, dst_stride); - src += element->glarray->StrideB; - dest += dst_stride; - } + if (intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(element->bo); + dest = element->bo->virtual; + dest += element->offset; - dri_bo_subdata(element->bo, - element->offset, - size, - data); - _mesa_free(data); + for (i = 0; i < element->count; i++) { + memcpy(dest, src, dst_stride); + src += element->glarray->StrideB; + dest += dst_stride; + } + + drm_intel_gem_bo_unmap_gtt(element->bo); + } else { + void *data; + + data = _mesa_malloc(dst_stride * element->count); + dest = data; + for (i = 0; i < element->count; i++) { + memcpy(dest, src, dst_stride); + src += element->glarray->StrideB; + dest += dst_stride; + } + + dri_bo_subdata(element->bo, + element->offset, + size, + data); + + _mesa_free(data); + } } } @@ -563,7 +587,13 @@ static void brw_prepare_indices(struct brw_context *brw) /* Straight upload */ - dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); + if (intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(bo); + memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size); + drm_intel_gem_bo_unmap_gtt(bo); + } else { + dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); + } } else { offset = (GLuint) (unsigned long) index_buffer->ptr; diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index eb99c21711e..d05f2e6c410 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -730,6 +730,13 @@ static INLINE struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset return ptr; } +/** Do two brw_regs refer to the same register? */ +static INLINE GLboolean +brw_same_reg(struct brw_reg r1, struct brw_reg r2) +{ + return r1.file == r2.file && r1.nr == r2.nr; +} + static INLINE struct brw_instruction *current_insn( struct brw_compile *p) { return &p->store[p->nr_insn]; @@ -851,6 +858,13 @@ void brw_dp_READ_16( struct brw_compile *p, GLuint msg_reg_nr, GLuint scratch_offset ); +void brw_dp_READ_4( struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + GLboolean relAddr, + GLuint scratch_offset, + GLuint bind_table_index ); + void brw_dp_WRITE_16( struct brw_compile *p, struct brw_reg src, GLuint msg_reg_nr, diff --git a/src/mesa/drivers/dri/i965/brw_eu_debug.c b/src/mesa/drivers/dri/i965/brw_eu_debug.c index 91dbbd5af62..29f3f6d02fa 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_debug.c +++ b/src/mesa/drivers/dri/i965/brw_eu_debug.c @@ -65,6 +65,7 @@ void brw_print_reg( struct brw_reg hwreg ) hwreg.width == BRW_WIDTH_8 && hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 && hwreg.type == BRW_REGISTER_TYPE_F) { + /* vector register */ _mesa_printf("vec%d", hwreg.nr); } else if (hwreg.file == BRW_GENERAL_REGISTER_FILE && @@ -72,8 +73,12 @@ void brw_print_reg( struct brw_reg hwreg ) hwreg.width == BRW_WIDTH_1 && hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 && hwreg.type == BRW_REGISTER_TYPE_F) { + /* "scalar" register */ _mesa_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); } + else if (hwreg.file == BRW_IMMEDIATE_VALUE) { + _mesa_printf("imm %f", hwreg.dw1.f); + } else { _mesa_printf("%s%d.%d<%d;%d,%d>:%s", file[hwreg.file], diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 6dce1ca48e8..21ce8369db4 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -320,14 +320,15 @@ static void brw_set_dp_read_message( struct brw_instruction *insn, { brw_set_src1(insn, brw_imm_d(0)); - insn->bits3.dp_read.binding_table_index = binding_table_index; - insn->bits3.dp_read.msg_control = msg_control; - insn->bits3.dp_read.msg_type = msg_type; - insn->bits3.dp_read.target_cache = target_cache; - insn->bits3.dp_read.response_length = response_length; - insn->bits3.dp_read.msg_length = msg_length; - insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; - insn->bits3.dp_read.end_of_thread = end_of_thread; + insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ + insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ + insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ + insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ + insn->bits3.dp_read.response_length = response_length; /*16:19*/ + insn->bits3.dp_read.msg_length = msg_length; /*20:23*/ + insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ + insn->bits3.dp_read.pad1 = 0; /*28:30*/ + insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/ } static void brw_set_sampler_message(struct brw_context *brw, @@ -770,7 +771,7 @@ void brw_CMP(struct brw_compile *p, * Helpers for the various SEND message types: */ -/* Invert 8 values +/** Extended math function, float[8]. */ void brw_math( struct brw_compile *p, struct brw_reg dest, @@ -802,7 +803,9 @@ void brw_math( struct brw_compile *p, data_type); } -/* Use 2 send instructions to invert 16 elements +/** + * Extended math function, float[16]. + * Use 2 send instructions. */ void brw_math_16( struct brw_compile *p, struct brw_reg dest, @@ -855,8 +858,11 @@ void brw_math_16( struct brw_compile *p, } - - +/** + * Write block of 16 dwords/floats to the data port Render Cache scratch buffer. + * Scratch offset should be a multiple of 64. + * Used for register spilling. + */ void brw_dp_WRITE_16( struct brw_compile *p, struct brw_reg src, GLuint msg_reg_nr, @@ -867,6 +873,7 @@ void brw_dp_WRITE_16( struct brw_compile *p, brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_compression_control(p, BRW_COMPRESSION_NONE); + /* set message header global offset field (reg 0, element 2) */ brw_MOV(p, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), brw_imm_d(scratch_offset)); @@ -887,7 +894,7 @@ void brw_dp_WRITE_16( struct brw_compile *p, brw_set_src0(insn, src); brw_set_dp_write_message(insn, - 255, /* bti */ + 255, /* binding table index (255=stateless) */ BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ msg_length, @@ -895,10 +902,14 @@ void brw_dp_WRITE_16( struct brw_compile *p, 0, /* response_length */ 0); /* eot */ } - } +/** + * Read block of 16 dwords/floats from the data port Render Cache scratch buffer. + * Scratch offset should be a multiple of 64. + * Used for register spilling. + */ void brw_dp_READ_16( struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, @@ -909,6 +920,7 @@ void brw_dp_READ_16( struct brw_compile *p, brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); + /* set message header global offset field (reg 0, element 2) */ brw_MOV(p, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), brw_imm_d(scratch_offset)); @@ -927,10 +939,10 @@ void brw_dp_READ_16( struct brw_compile *p, brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); brw_set_dp_read_message(insn, - 255, /* bti */ - 3, /* msg_control */ + 255, /* binding table index (255=stateless) */ + 3, /* msg_control (3 means 4 Owords) */ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ - 1, /* target cache */ + 1, /* target cache (render/scratch) */ 1, /* msg_length */ 2, /* response_length */ 0); /* eot */ @@ -938,6 +950,56 @@ void brw_dp_READ_16( struct brw_compile *p, } +/** + * Read a float[4] vector from the data port Data Cache (const buffer). + * Scratch offset should be a multiple of 16. + * Used for fetching shader constants. + * If relAddr is true, we'll do an indirect fetch using the address register. + */ +void brw_dp_READ_4( struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + GLboolean relAddr, + GLuint scratch_offset, + GLuint bind_table_index ) +{ + { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + /* set message header global offset field (reg 0, element 2) */ + brw_MOV(p, + retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD), + brw_imm_d(scratch_offset)); + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + /* cast dest to a uword[8] vector */ + dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); + + brw_set_dest(insn, dest); + brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); + + brw_set_dp_read_message(insn, + bind_table_index, /* binding table index (255=stateless) */ + 0, /* msg_control (0 means 1 Oword) */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 0, /* source cache = data cache */ + 1, /* msg_length */ + 1, /* response_length (1 Oword) */ + 0); /* eot */ + } +} + + void brw_fb_WRITE(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, @@ -966,7 +1028,11 @@ void brw_fb_WRITE(struct brw_compile *p, } - +/** + * Texture sample instruction. + * Note: the msg_type plus msg_length values determine exactly what kind + * of sampling operation is performed. See volume 4, page 161 of docs. + */ void brw_SAMPLE(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, @@ -1061,7 +1127,7 @@ void brw_SAMPLE(struct brw_compile *p, /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } */ brw_push_insn_state(p); - brw_set_compression_control(p, GL_FALSE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, reg, reg); brw_pop_insn_state(p); } diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index d90bd820386..457bc2fc7f4 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -111,6 +111,8 @@ static void brwProgramStringNotify( GLcontext *ctx, struct gl_program *prog ) { struct brw_context *brw = brw_context(ctx); + struct intel_context *intel = &brw->intel; + if (target == GL_FRAGMENT_PROGRAM_ARB) { struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; struct brw_fragment_program *newFP = brw_fragment_program(fprog); @@ -126,6 +128,24 @@ static void brwProgramStringNotify( GLcontext *ctx, brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; newFP->id = brw->program_id++; newFP->isGLSL = brw_wm_is_glsl(fprog); + + /* alloc constant buffer/surface */ + { + const struct gl_program_parameter_list *params = prog->Parameters; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + + /* free old const buffer if too small */ + if (newFP->const_buffer && newFP->const_buffer->size < size) { + dri_bo_unreference(newFP->const_buffer); + newFP->const_buffer = NULL; + } + + if (!newFP->const_buffer) { + newFP->const_buffer = drm_intel_bo_alloc(intel->bufmgr, + "fp_const_buffer", + size, 64); + } + } } else if (target == GL_VERTEX_PROGRAM_ARB) { struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog; diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c index ffdb0ae6df8..862835f157a 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c @@ -59,37 +59,6 @@ static GLboolean have_attr(struct brw_sf_compile *c, return (c->key.attrs & (1<<attr)) ? 1 : 0; } -/** - * Sets VERT_RESULT_FOGC.Y for gl_FrontFacing - * - * This is currently executed if the fragment program uses VERT_RESULT_FOGC - * at all, but this could be eliminated with a scan of the FP contents. - */ -static void -do_front_facing( struct brw_sf_compile *c ) -{ - struct brw_compile *p = &c->func; - int i; - - if (!have_attr(c, VERT_RESULT_FOGC)) - return; - - brw_push_insn_state(p); - brw_CMP(p, brw_null_reg(), - c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L, - c->det, brw_imm_f(0)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - for (i = 0; i < 3; i++) { - struct brw_reg fogc = get_vert_attr(c, c->vert[i],FRAG_ATTRIB_FOGC); - brw_MOV(p, get_element(fogc, 1), brw_imm_f(0)); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, get_element(fogc, 1), brw_imm_f(1)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - } - brw_pop_insn_state(p); -} - - /*********************************************************************** * Twoside lighting */ @@ -384,7 +353,6 @@ void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate) invert_det(c); copy_z_inv_w(c); - do_front_facing(c); if (c->key.do_twoside_color) do_twoside_color(c); diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 93a9686f718..fc4eddda0a5 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -231,7 +231,33 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, sf.sf6.line_width = 0; /* _NEW_POINT */ - sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT; /* opengl conventions */ + key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; + if (!key->render_to_fbo) { + /* Rendering to an OpenGL window */ + sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT; + } + else { + /* If rendering to an FBO, the pixel coordinate system is + * inverted with respect to the normal OpenGL coordinate + * system, so BRW_RASTRULE_LOWER_RIGHT is correct. + * But this value is listed as "Reserved, but not seen as useful" + * in Intel documentation (page 212, "Point Rasterization Rule", + * section 7.4 "SF Pipeline State Summary", of document + * "IntelĀ® 965 Express Chipset Family and IntelĀ® G35 Express + * Chipset Graphics Controller Programmer's Reference Manual, + * Volume 2: 3D/Media", Revision 1.0b as of January 2008, + * available at + * http://intellinuxgraphics.org/documentation.html + * at the time of this writing). + * + * It does work on at least some devices, if not all; + * if devices that don't support it can be identified, + * the likely failure case is that points are rasterized + * incorrectly, which is no worse than occurs without + * the value, so we're using it here. + */ + sf.sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT; + } /* XXX clamp max depends on AA vs. non-AA */ sf.sf7.sprite_point = key->point_sprite; diff --git a/src/mesa/drivers/dri/i965/brw_tex.c b/src/mesa/drivers/dri/i965/brw_tex.c index ef99e9c1aea..71bff166dda 100644 --- a/src/mesa/drivers/dri/i965/brw_tex.c +++ b/src/mesa/drivers/dri/i965/brw_tex.c @@ -32,21 +32,12 @@ #include "main/glheader.h" #include "main/mtypes.h" -#include "main/imports.h" -#include "main/simple_list.h" -#include "main/enums.h" -#include "main/image.h" #include "main/teximage.h" -#include "main/texstore.h" -#include "main/texformat.h" - -#include "texmem.h" #include "intel_context.h" #include "intel_regions.h" #include "intel_tex.h" #include "brw_context.h" -#include "brw_defines.h" void brw_FrameBufferTexInit( struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 3807dff9919..0d6c6ab9a8a 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -49,7 +49,8 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* r0 -- reserved as usual */ - c->r0 = brw_vec8_grf(reg, 0); reg++; + c->r0 = brw_vec8_grf(reg, 0); + reg++; /* User clip planes from curbe: */ @@ -60,7 +61,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* Deal with curbe alignment: */ - reg += ((6+c->key.nr_userclip+3)/4)*2; + reg += ((6 + c->key.nr_userclip + 3) / 4) * 2; } /* Vertex program parameters from curbe: @@ -69,7 +70,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) for (i = 0; i < nr_params; i++) { c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); } - reg += (nr_params+1)/2; + reg += (nr_params + 1) / 2; c->prog_data.curb_read_length = reg - 1; @@ -77,7 +78,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) */ c->nr_inputs = 0; for (i = 0; i < VERT_ATTRIB_MAX; i++) { - if (c->prog_data.inputs_read & (1<<i)) { + if (c->prog_data.inputs_read & (1 << i)) { c->nr_inputs++; c->regs[PROGRAM_INPUT][i] = brw_vec8_grf(reg, 0); reg++; @@ -91,7 +92,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) c->first_output = reg; mrf = 4; for (i = 0; i < VERT_RESULT_MAX; i++) { - if (c->prog_data.outputs_written & (1<<i)) { + if (c->prog_data.outputs_written & (1 << i)) { c->nr_outputs++; if (i == VERT_RESULT_HPOS) { c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); @@ -133,16 +134,15 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) } for (i = 0; i < 128; i++) { - if (c->output_regs[i].used_in_src) { - c->output_regs[i].reg = brw_vec8_grf(reg, 0); - reg++; - } + if (c->output_regs[i].used_in_src) { + c->output_regs[i].reg = brw_vec8_grf(reg, 0); + reg++; + } } c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); reg += 2; - - + /* Some opcodes need an internal temporary: */ c->first_tmp = reg; @@ -152,9 +152,9 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) * urb_read_length is the number of registers read from *each* * vertex urb, so is half the amount: */ - c->prog_data.urb_read_length = (c->nr_inputs+1)/2; + c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2; - c->prog_data.urb_entry_size = (c->nr_outputs+2+3)/4; + c->prog_data.urb_entry_size = (c->nr_outputs + 2 + 3) / 4; c->prog_data.total_grf = reg; if (INTEL_DEBUG & DEBUG_VS) { @@ -187,6 +187,10 @@ static void release_tmps( struct brw_vs_compile *c ) } +/** + * If an instruction uses a temp reg both as a src and the dest, we + * sometimes need to allocate an intermediate temporary. + */ static void unalias1( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0, @@ -206,6 +210,10 @@ static void unalias1( struct brw_vs_compile *c, } } +/** + * \sa unalias2 + * Checkes if 2-operand instruction needs an intermediate temporary. + */ static void unalias2( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0, @@ -228,6 +236,10 @@ static void unalias2( struct brw_vs_compile *c, } } +/** + * \sa unalias2 + * Checkes if 3-operand instruction needs an intermediate temporary. + */ static void unalias3( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0, @@ -981,7 +993,7 @@ post_vs_emit( struct brw_vs_compile *c, } -/* Emit the fragment program instructions here. +/* Emit the vertex program instructions here. */ void brw_vs_emit(struct brw_vs_compile *c ) { @@ -1038,7 +1050,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) struct prog_src_register *src = &inst->SrcReg[i]; index = src->Index; file = src->File; - if (file == PROGRAM_OUTPUT&&c->output_regs[index].used_in_src) + if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) args[i] = c->output_regs[index].reg; else args[i] = get_arg(c, src); diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 1645ca0b70e..90d74c2885c 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -40,6 +40,8 @@ GLuint brw_wm_nr_args( GLuint opcode ) { switch (opcode) { + case WM_FRONTFACING: + return 0; case WM_PIXELXY: case WM_CINTERP: case WM_WPOSXY: @@ -103,12 +105,17 @@ brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) brw_wm_pass1(c); /* Register allocation. + * Divide by two because we operate on 16 pixels at a time and require + * two GRF entries for each logical shader register. */ c->grf_limit = BRW_WM_MAX_GRF / 2; brw_wm_pass2(c); + /* how many general-purpose registers are used */ c->prog_data.total_grf = c->max_wm_grf; + + /* Scratch space is used for register spilling */ if (c->last_scratch) { c->prog_data.total_scratch = c->last_scratch + 0x40; } diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 7f0e5702f2e..d0ab3bdc65f 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -172,7 +172,8 @@ struct brw_wm_instruction { #define WM_CINTERP (MAX_OPCODE + 5) #define WM_WPOSXY (MAX_OPCODE + 6) #define WM_FB_WRITE (MAX_OPCODE + 7) -#define MAX_WM_OPCODE (MAX_OPCODE + 8) +#define WM_FRONTFACING (MAX_OPCODE + 8) +#define MAX_WM_OPCODE (MAX_OPCODE + 9) #define PROGRAM_PAYLOAD (PROGRAM_FILE_MAX) #define PAYLOAD_DEPTH (FRAG_ATTRIB_MAX) @@ -241,8 +242,8 @@ struct brw_wm_compile { /** Mapping from Mesa registers to hardware registers */ struct { - GLboolean inited; - struct brw_reg reg; + GLboolean inited; + struct brw_reg reg; } wm_regs[PROGRAM_PAYLOAD+1][256][4]; struct brw_reg stack; @@ -252,6 +253,14 @@ struct brw_wm_compile { GLuint tmp_index; GLuint tmp_max; GLuint subroutines[BRW_WM_MAX_SUBROUTINE]; + + /** using a real constant buffer? */ + GLboolean use_const_buffer; + /** we may need up to 3 constants per instruction (if use_const_buffer) */ + struct { + GLint index; + struct brw_reg reg; + } current_const[3]; }; diff --git a/src/mesa/drivers/dri/i965/brw_wm_debug.c b/src/mesa/drivers/dri/i965/brw_wm_debug.c index 8f07f89ebc5..220821087c1 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_debug.c +++ b/src/mesa/drivers/dri/i965/brw_wm_debug.c @@ -130,6 +130,9 @@ void brw_wm_print_insn( struct brw_wm_compile *c, case WM_FB_WRITE: _mesa_printf(" = FB_WRITE"); break; + case WM_FRONTFACING: + _mesa_printf(" = FRONTFACING"); + break; default: _mesa_printf(" = %s", _mesa_opcode_string(inst->opcode)); break; diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index f2dca9caa6c..d65b1332c61 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -254,6 +254,34 @@ static void emit_cinterp( struct brw_compile *p, } } +/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */ +static void emit_frontfacing( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask ) +{ + struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); + GLuint i; + + if (!(mask & WRITEMASK_XYZW)) + return; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_MOV(p, dst[i], brw_imm_f(0.0)); + } + } + + /* bit 31 is "primitive is back face", so checking < (1 << 31) gives + * us front face + */ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31)); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_MOV(p, dst[i], brw_imm_f(1.0)); + } + } + brw_set_predicate_control_flag_value(p, 0xff); +} static void emit_alu1( struct brw_compile *p, struct brw_instruction *(*func)(struct brw_compile *, @@ -996,8 +1024,8 @@ static void emit_fb_write( struct brw_wm_compile *c, } -/* Post-fragment-program processing. Send the results to the - * framebuffer. +/** + * Move a GPR to scratch memory. */ static void emit_spill( struct brw_wm_compile *c, struct brw_reg reg, @@ -1021,6 +1049,9 @@ static void emit_spill( struct brw_wm_compile *c, } +/** + * Load a GPR from scratch memory. + */ static void emit_unspill( struct brw_wm_compile *c, struct brw_reg reg, GLuint slot ) @@ -1041,13 +1072,14 @@ static void emit_unspill( struct brw_wm_compile *c, brw_dp_READ_16(p, retype(vec16(reg), BRW_REGISTER_TYPE_UW), - 1, + 1, slot); } /** - * Retrieve upto 4 GEN4 register pairs for the given wm reg: + * Retrieve up to 4 GEN4 register pairs for the given wm reg: + * Args with unspill_reg != 0 will be loaded from scratch memory. */ static void get_argument_regs( struct brw_wm_compile *c, struct brw_wm_ref *arg[], @@ -1057,13 +1089,12 @@ static void get_argument_regs( struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (arg[i]) { - - if (arg[i]->unspill_reg) - emit_unspill(c, + if (arg[i]->unspill_reg) + emit_unspill(c, brw_vec8_grf(arg[i]->unspill_reg, 0), arg[i]->value->spill_slot); - regs[i] = arg[i]->hw_reg; + regs[i] = arg[i]->hw_reg; } else { regs[i] = brw_null_reg(); @@ -1072,6 +1103,9 @@ static void get_argument_regs( struct brw_wm_compile *c, } +/** + * For values that have a spill_slot!=0, write those regs to scratch memory. + */ static void spill_values( struct brw_wm_compile *c, struct brw_wm_value *values, GLuint nr ) @@ -1160,6 +1194,10 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot); break; + case WM_FRONTFACING: + emit_frontfacing(p, dst, dst_flags); + break; + /* Straightforward arithmetic: */ case OPCODE_ADD: diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 533be3858e9..a7f5f1b9a28 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -58,7 +58,8 @@ static const char *wm_opcode_strings[] = { "PINTERP", "CINTERP", "WPOSXY", - "FB_WRITE" + "FB_WRITE", + "FRONTFACING", }; #if 0 @@ -313,18 +314,13 @@ static void emit_interp( struct brw_wm_compile *c, struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx); struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); struct prog_src_register deltas = get_delta_xy(c); - struct prog_src_register arg2; - GLuint opcode; - + /* Need to use PINTERP on attributes which have been * multiplied by 1/W in the SF program, and LINTERP on those * which have not: */ switch (idx) { case FRAG_ATTRIB_WPOS: - opcode = WM_LINTERP; - arg2 = src_undef(); - /* Have to treat wpos.xy specially: */ emit_op(c, @@ -345,7 +341,7 @@ static void emit_interp( struct brw_wm_compile *c, 0, interp, deltas, - arg2); + src_undef()); break; case FRAG_ATTRIB_COL0: case FRAG_ATTRIB_COL1: @@ -368,6 +364,56 @@ static void emit_interp( struct brw_wm_compile *c, src_undef()); } break; + case FRAG_ATTRIB_FOGC: + /* The FOGC input is really special. When a program uses glFogFragCoord, + * the results returned are supposed to be (f,0,0,1). But for Mesa GLSL, + * the glFrontFacing and glPointCoord values are also stashed in FOGC. + * So, write the interpolated fog value to X, then either 0, 1, or the + * stashed values to Y, Z, W. Note that this means that + * glFogFragCoord.yzw can be wrong in those cases! + */ + + /* Interpolate the fog coordinate */ + emit_op(c, + WM_PINTERP, + dst_mask(dst, WRITEMASK_X), + 0, + interp, + deltas, + get_pixel_w(c)); + + /* Move the front facing value into FOGC.y if it's needed. */ + if (c->fp->program.UsesFrontFacing) { + emit_op(c, + WM_FRONTFACING, + dst_mask(dst, WRITEMASK_Y), + 0, + src_undef(), + src_undef(), + src_undef()); + } else { + emit_op(c, + OPCODE_MOV, + dst_mask(dst, WRITEMASK_Y), + 0, + src_swizzle1(interp, SWIZZLE_ZERO), + src_undef(), + src_undef()); + } + + /* Should do the PointCoord thing here. */ + emit_op(c, + OPCODE_MOV, + dst_mask(dst, WRITEMASK_ZW), + 0, + src_swizzle(interp, + SWIZZLE_ZERO, + SWIZZLE_ZERO, + SWIZZLE_ZERO, + SWIZZLE_ONE), + src_undef(), + src_undef()); + break; default: emit_op(c, WM_PINTERP, diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 4cf092226cf..575cd45d572 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -192,28 +192,42 @@ static void prealloc_reg(struct brw_wm_compile *c) /* constants */ { const int nr_params = c->fp->program.Base.Parameters->NumParameters; - const struct gl_program_parameter_list *plist = - c->fp->program.Base.Parameters; - int index = 0; - - /* number of float constants */ - c->prog_data.nr_params = 4 * nr_params; - - /* loop over program constants (float[4]) */ - for (i = 0; i < nr_params; i++) { - /* loop over XYZW channels */ - for (j = 0; j < 4; j++, index++) { - reg = brw_vec1_grf(c->reg_index + index / 8, index % 8); - /* Save pointer to parameter/constant value. - * Constants will be copied in prepare_constant_buffer() - */ - c->prog_data.param[index] = &plist->ParameterValues[i][j]; - set_reg(c, PROGRAM_STATE_VAR, i, j, reg); - } - } - /* number of constant regs used (each reg is float[8]) */ - c->nr_creg = 2 * ((4 * nr_params + 15) / 16); - c->reg_index += c->nr_creg; + + /* use a real constant buffer, or just use a section of the GRF? */ + c->use_const_buffer = GL_FALSE; /* (nr_params > 8);*/ + + if (c->use_const_buffer) { + /* We'll use a real constant buffer and fetch constants from + * it with a dataport read message. + */ + + /* number of float constants in CURBE */ + c->prog_data.nr_params = 0; + } + else { + const struct gl_program_parameter_list *plist = + c->fp->program.Base.Parameters; + int index = 0; + + /* number of float constants in CURBE */ + c->prog_data.nr_params = 4 * nr_params; + + /* loop over program constants (float[4]) */ + for (i = 0; i < nr_params; i++) { + /* loop over XYZW channels */ + for (j = 0; j < 4; j++, index++) { + reg = brw_vec1_grf(c->reg_index + index / 8, index % 8); + /* Save pointer to parameter/constant value. + * Constants will be copied in prepare_constant_buffer() + */ + c->prog_data.param[index] = &plist->ParameterValues[i][j]; + set_reg(c, PROGRAM_STATE_VAR, i, j, reg); + } + } + /* number of constant regs used (each reg is float[8]) */ + c->nr_creg = 2 * ((4 * nr_params + 15) / 16); + c->reg_index += c->nr_creg; + } } /* fragment shader inputs */ @@ -234,6 +248,81 @@ static void prealloc_reg(struct brw_wm_compile *c) c->reg_index++; c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); c->reg_index += 2; + + /* An instruction may reference up to three constants. + * They'll be found in these registers. + * XXX alloc these on demand! + */ + if (c->use_const_buffer) { + c->current_const[0].reg = alloc_tmp(c); + c->current_const[1].reg = alloc_tmp(c); + c->current_const[2].reg = alloc_tmp(c); + } + /* + printf("USE CONST BUFFER? %d\n", c->use_const_buffer); + printf("AFTER PRE_ALLOC, reg_index = %d\n", c->reg_index); + */ +} + + +/** + * Check if any of the instruction's src registers are constants, uniforms, + * or statevars. If so, fetch any constants that we don't already have in + * the three GRF slots. + */ +static void fetch_constants(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint i; + + /* loop over instruction src regs */ + for (i = 0; i < 3; i++) { + const struct prog_src_register *src = &inst->SrcReg[i]; + if (src->File == PROGRAM_STATE_VAR || + src->File == PROGRAM_CONSTANT || + src->File == PROGRAM_UNIFORM) { + if (c->current_const[i].index != src->Index) { + + c->current_const[i].index = src->Index; + /*c->current_const[i].reg = alloc_tmp(c);*/ + + /* + printf(" fetch const[%d] for arg %d into reg %d\n", + src->Index, i, c->current_const[i].reg.nr); + */ + + /* need to fetch the constant now */ + brw_dp_READ_4(p, + c->current_const[i].reg, /* writeback dest */ + 1, /* msg_reg */ + src->RelAddr, /* relative indexing? */ + 16 * src->Index, /* byte offset */ + BRW_WM_MAX_SURF - 1 /* binding table index */ + ); + +#if 0 + /* dependency stall */ + { + int response_length = 1; + int mark = mark_tmps( c ); + struct brw_reg src = c->current_const[i].reg; + struct brw_reg tmp = alloc_tmp(c); + + /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } + */ + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, tmp, src); + brw_MOV(p, src, tmp); + brw_pop_insn_state(p); + + release_tmps( c, mark ); + } +#endif + } + } + } } @@ -241,24 +330,112 @@ static void prealloc_reg(struct brw_wm_compile *c) * Convert Mesa dst register to brw register. */ static struct brw_reg get_dst_reg(struct brw_wm_compile *c, - struct prog_instruction *inst, int component, int nr) + const struct prog_instruction *inst, + GLuint component) { + const int nr = 1; return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr, 0, 0); } +static struct brw_reg +get_src_reg_const(struct brw_wm_compile *c, + const struct prog_instruction *inst, + GLuint srcRegIndex, GLuint component) +{ + /* We should have already fetched the constant from the constant + * buffer in fetch_constants(). Now we just have to return a + * register description that extracts the needed component and + * smears it across all eight vector components. + */ + const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; + struct brw_reg const_reg; + + assert(component < 4); + assert(srcRegIndex < 3); + assert(c->current_const[srcRegIndex].index != -1); + const_reg = c->current_const[srcRegIndex].reg; + + /* extract desired float from the const_reg, and smear */ + const_reg = stride(const_reg, 0, 1, 0); + const_reg.subnr = component * 4; + + if (src->NegateBase) + const_reg = negate(const_reg); + if (src->Abs) + const_reg = brw_abs(const_reg); + + /* + printf(" form const[%d] for arg %d, comp %d, reg %d\n", + c->current_const[srcRegIndex].index, + srcRegIndex, + component, + const_reg.nr); + */ + + return const_reg; +} + + /** * Convert Mesa src register to brw register. */ static struct brw_reg get_src_reg(struct brw_wm_compile *c, - struct prog_src_register *src, int index, int nr) -{ - int component = GET_SWZ(src->Swizzle, index); - return get_reg(c, src->File, src->Index, component, nr, - src->NegateBase, src->Abs); + const struct prog_instruction *inst, + GLuint srcRegIndex, GLuint channel) +{ + const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; + const GLuint nr = 1; + const GLuint component = GET_SWZ(src->Swizzle, channel); + + if (c->use_const_buffer && + (src->File == PROGRAM_STATE_VAR || + src->File == PROGRAM_CONSTANT || + src->File == PROGRAM_UNIFORM)) { + return get_src_reg_const(c, inst, srcRegIndex, component); + } + else { + /* other type of source register */ + return get_reg(c, src->File, src->Index, component, nr, + src->NegateBase, src->Abs); + } +} + + +/** + * Same as \sa get_src_reg() but if the register is a literal, emit + * a brw_reg encoding the literal. + * Note that a brw instruction only allows one src operand to be a literal. + * For instructions with more than one operand, only the second can be a + * literal. This means that we treat some literals as constants/uniforms + * (which why PROGRAM_CONSTANT is checked in fetch_constants()). + * + */ +static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c, + const struct prog_instruction *inst, + GLuint srcRegIndex, GLuint channel) +{ + const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; + if (src->File == PROGRAM_CONSTANT) { + /* a literal */ + const int component = GET_SWZ(src->Swizzle, channel); + const GLfloat *param = + c->fp->program.Base.Parameters->ParameterValues[src->Index]; + GLfloat value = param[component]; + if (src->NegateBase) + value = -value; + if (src->Abs) + value = FABSF(value); + /*printf(" form imm reg %f\n", value);*/ + return brw_imm_f(value); + } + else { + return get_src_reg(c, inst, srcRegIndex, channel); + } } + /** * Subroutines are minimal support for resusable instruction sequences. * They are implemented as simply as possible to minimise overhead: there @@ -332,8 +509,8 @@ static void emit_abs( struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (inst->DstReg.WriteMask & (1<<i)) { struct brw_reg src, dst; - dst = get_dst_reg(c, inst, i, 1); - src = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src = get_src_reg(c, inst, 0, i); brw_MOV(p, dst, brw_abs(src)); } } @@ -350,8 +527,8 @@ static void emit_trunc( struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { struct brw_reg src, dst; - dst = get_dst_reg(c, inst, i, 1) ; - src = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src = get_src_reg(c, inst, 0, i); brw_RNDZ(p, dst, src); } } @@ -368,8 +545,8 @@ static void emit_mov( struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { struct brw_reg src, dst; - dst = get_dst_reg(c, inst, i, 1); - src = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src = get_src_reg_imm(c, inst, 0, i); brw_MOV(p, dst, src); } } @@ -386,8 +563,8 @@ static void emit_pixel_xy(struct brw_wm_compile *c, struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; - dst0 = get_dst_reg(c, inst, 0, 1); - dst1 = get_dst_reg(c, inst, 1, 1); + dst0 = get_dst_reg(c, inst, 0); + dst1 = get_dst_reg(c, inst, 1); /* Calculate pixel centers by adding 1 or 0 to each of the * micro-tile coordinates passed in r1. */ @@ -414,10 +591,10 @@ static void emit_delta_xy(struct brw_wm_compile *c, struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; - dst0 = get_dst_reg(c, inst, 0, 1); - dst1 = get_dst_reg(c, inst, 1, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - src1 = get_src_reg(c, &inst->SrcReg[0], 1, 1); + dst0 = get_dst_reg(c, inst, 0); + dst1 = get_dst_reg(c, inst, 1); + src0 = get_src_reg(c, inst, 0, 0); + src1 = get_src_reg(c, inst, 0, 1); /* Calc delta X,Y by subtracting origin in r1 from the pixel * centers. */ @@ -482,7 +659,7 @@ static void emit_fb_write(struct brw_wm_compile *c, brw_push_insn_state(p); for (channel = 0; channel < 4; channel++) { - src0 = get_src_reg(c, &inst->SrcReg[0], channel, 1); + src0 = get_src_reg(c, inst, 0, channel); /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ brw_MOV(p, brw_message_reg(nr + channel), src0); @@ -493,11 +670,11 @@ static void emit_fb_write(struct brw_wm_compile *c, if (c->key.source_depth_to_render_target) { if (c->key.computes_depth) { - src0 = get_src_reg(c, &inst->SrcReg[2], 2, 1); + src0 = get_src_reg(c, inst, 2, 2); brw_MOV(p, brw_message_reg(nr), src0); } else { - src0 = get_src_reg(c, &inst->SrcReg[1], 1, 1); + src0 = get_src_reg(c, inst, 1, 1); brw_MOV(p, brw_message_reg(nr), src0); } @@ -524,7 +701,7 @@ static void emit_fb_write(struct brw_wm_compile *c, else #endif { - struct brw_reg src = get_src_reg(c, &inst->SrcReg[1], 1, 1); + struct brw_reg src = get_src_reg(c, inst, 1, 1); brw_MOV(p, brw_message_reg(nr), src); } nr += 2; @@ -544,10 +721,10 @@ static void emit_pixel_w( struct brw_wm_compile *c, struct brw_reg dst, src0, delta0, delta1; struct brw_reg interp3; - dst = get_dst_reg(c, inst, 3, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); - delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); + dst = get_dst_reg(c, inst, 3); + src0 = get_src_reg(c, inst, 0, 0); + delta0 = get_src_reg(c, inst, 1, 0); + delta1 = get_src_reg(c, inst, 1, 1); interp3 = brw_vec1_grf(src0.nr+1, 4); /* Calc 1/w - just linterp wpos[3] optimized by putting the @@ -575,9 +752,9 @@ static void emit_linterp(struct brw_wm_compile *c, struct brw_reg src0; GLuint nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); - delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); + src0 = get_src_reg(c, inst, 0, 0); + delta0 = get_src_reg(c, inst, 1, 0); + delta1 = get_src_reg(c, inst, 1, 1); nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); @@ -587,7 +764,7 @@ static void emit_linterp(struct brw_wm_compile *c, for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_LINE(p, brw_null_reg(), interp[i], delta0); brw_MAC(p, dst, suboffset(interp[i],1), delta1); } @@ -604,7 +781,7 @@ static void emit_cinterp(struct brw_wm_compile *c, struct brw_reg dst, src0; GLuint nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + src0 = get_src_reg(c, inst, 0, 0); nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); @@ -614,7 +791,7 @@ static void emit_cinterp(struct brw_wm_compile *c, for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV(p, dst, suboffset(interp[i],3)); } } @@ -631,10 +808,10 @@ static void emit_pinterp(struct brw_wm_compile *c, struct brw_reg src0, w; GLuint nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); - delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); - w = get_src_reg(c, &inst->SrcReg[2], 3, 1); + src0 = get_src_reg(c, inst, 0, 0); + delta0 = get_src_reg(c, inst, 1, 0); + delta1 = get_src_reg(c, inst, 1, 1); + w = get_src_reg(c, inst, 2, 3); nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); @@ -644,7 +821,7 @@ static void emit_pinterp(struct brw_wm_compile *c, for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_LINE(p, brw_null_reg(), interp[i], delta0); brw_MAC(p, dst, suboffset(interp[i],1), delta1); @@ -653,6 +830,36 @@ static void emit_pinterp(struct brw_wm_compile *c, } } +/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */ +static void emit_frontfacing(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); + struct brw_reg dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + brw_MOV(p, dst, brw_imm_f(0.0)); + } + } + + /* bit 31 is "primitive is back face", so checking < (1 << 31) gives + * us front face + */ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31)); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + brw_MOV(p, dst, brw_imm_f(1.0)); + } + } + brw_set_predicate_control_flag_value(p, 0xff); +} + static void emit_xpd(struct brw_wm_compile *c, struct prog_instruction *inst) { @@ -664,12 +871,12 @@ static void emit_xpd(struct brw_wm_compile *c, GLuint i1 = (i+1)%3; if (mask & (1<<i)) { struct brw_reg src0, src1, dst; - dst = get_dst_reg(c, inst, i, 1); - src0 = negate(get_src_reg(c, &inst->SrcReg[0], i2, 1)); - src1 = get_src_reg(c, &inst->SrcReg[1], i1, 1); + dst = get_dst_reg(c, inst, i); + src0 = negate(get_src_reg(c, inst, 0, i2)); + src1 = get_src_reg_imm(c, inst, 1, i1); brw_MUL(p, brw_null_reg(), src0, src1); - src0 = get_src_reg(c, &inst->SrcReg[0], i1, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i2, 1); + src0 = get_src_reg(c, inst, 0, i1); + src1 = get_src_reg_imm(c, inst, 1, i2); brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); brw_MAC(p, dst, src0, src1); brw_set_saturate(p, 0); @@ -685,11 +892,11 @@ static void emit_dp3(struct brw_wm_compile *c, int i; struct brw_compile *p = &c->func; for (i = 0; i < 3; i++) { - src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); + src0[i] = get_src_reg(c, inst, 0, i); + src1[i] = get_src_reg_imm(c, inst, 1, i); } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); @@ -704,10 +911,10 @@ static void emit_dp4(struct brw_wm_compile *c, int i; struct brw_compile *p = &c->func; for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); + src0[i] = get_src_reg(c, inst, 0, i); + src1[i] = get_src_reg_imm(c, inst, 1, i); } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_MAC(p, brw_null_reg(), src0[2], src1[2]); @@ -723,10 +930,10 @@ static void emit_dph(struct brw_wm_compile *c, int i; struct brw_compile *p = &c->func; for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); + src0[i] = get_src_reg(c, inst, 0, i); + src1[i] = get_src_reg_imm(c, inst, 1, i); } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_MAC(p, dst, src0[2], src1[2]); @@ -751,7 +958,7 @@ static void emit_math1(struct brw_wm_compile *c, tmp = alloc_tmp(c); /* Get first component of source register */ - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + src0 = get_src_reg(c, inst, 0, 0); /* tmp = func(src0) */ brw_MOV(p, brw_message_reg(2), src0); @@ -769,7 +976,7 @@ static void emit_math1(struct brw_wm_compile *c, /* replicate tmp value across enabled dest channels */ for (i = 0; i < 4; i++) { if (inst->DstReg.WriteMask & (1 << i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV(p, dst, tmp); } } @@ -823,15 +1030,28 @@ static void emit_add(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); brw_ADD(p, dst, src0, src1); } } brw_set_saturate(p, 0); } +static void emit_arl(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, addr_reg; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ADDRESS, 0); + src0 = get_src_reg(c, inst, 0, 0); /* channel 0 */ + brw_MOV(p, addr_reg, src0); + brw_set_saturate(p, 0); +} + static void emit_sub(struct brw_wm_compile *c, struct prog_instruction *inst) { @@ -842,9 +1062,9 @@ static void emit_sub(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); brw_ADD(p, dst, src0, negate(src1)); } } @@ -861,9 +1081,9 @@ static void emit_mul(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); brw_MUL(p, dst, src0, src1); } } @@ -880,8 +1100,8 @@ static void emit_frc(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg_imm(c, inst, 0, i); brw_FRC(p, dst, src0); } } @@ -899,68 +1119,61 @@ static void emit_flr(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg_imm(c, inst, 0, i); brw_RNDD(p, dst, src0); } } brw_set_saturate(p, 0); } -static void emit_max(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg src0, src1, dst; - int i; - brw_push_insn_state(p); - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MOV(p, dst, src0); - brw_set_saturate(p, 0); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, src1); - brw_set_saturate(p, 0); - brw_set_predicate_control_flag_value(p, 0xff); - } - } - brw_pop_insn_state(p); -} -static void emit_min(struct brw_wm_compile *c, - struct prog_instruction *inst) +static void emit_min_max(struct brw_wm_compile *c, + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg src0, src1, dst; + const GLuint mask = inst->DstReg.WriteMask; + const int mark = mark_tmps(c); int i; brw_push_insn_state(p); for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + struct brw_reg real_dst = get_dst_reg(c, inst, i); + struct brw_reg src0 = get_src_reg(c, inst, 0, i); + struct brw_reg src1 = get_src_reg(c, inst, 1, i); + struct brw_reg dst; + /* if dst==src0 or dst==src1 we need to use a temp reg */ + GLboolean use_temp = brw_same_reg(dst, src0) || + brw_same_reg(dst, src1); + if (use_temp) + dst = alloc_tmp(c); + else + dst = real_dst; + + /* + printf(" Min/max: dst %d src0 %d src1 %d\n", + dst.nr, src0.nr, src1.nr); + */ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); brw_MOV(p, dst, src0); brw_set_saturate(p, 0); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); + if (inst->Opcode == OPCODE_MIN) + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); + else + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, src1, src0); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); brw_MOV(p, dst, src1); brw_set_saturate(p, 0); brw_set_predicate_control_flag_value(p, 0xff); + if (use_temp) + brw_MOV(p, real_dst, dst); } } brw_pop_insn_state(p); + release_tmps(c, mark); } static void emit_pow(struct brw_wm_compile *c, @@ -968,9 +1181,9 @@ static void emit_pow(struct brw_wm_compile *c, { struct brw_compile *p = &c->func; struct brw_reg dst, src0, src1; - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], 0, 1); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + src0 = get_src_reg_imm(c, inst, 0, 0); + src1 = get_src_reg_imm(c, inst, 1, 0); brw_MOV(p, brw_message_reg(2), src0); brw_MOV(p, brw_message_reg(3), src1); @@ -995,10 +1208,10 @@ static void emit_lrp(struct brw_wm_compile *c, int mark = mark_tmps(c); for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + src1 = get_src_reg_imm(c, inst, 1, i); if (src1.nr == dst.nr) { tmp1 = alloc_tmp(c); @@ -1006,7 +1219,7 @@ static void emit_lrp(struct brw_wm_compile *c, } else tmp1 = src1; - src2 = get_src_reg(c, &inst->SrcReg[2], i, 1); + src2 = get_src_reg(c, inst, 2, i); if (src2.nr == dst.nr) { tmp2 = alloc_tmp(c); brw_MOV(p, tmp2, src2); @@ -1048,10 +1261,10 @@ static void emit_mad(struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - src2 = get_src_reg(c, &inst->SrcReg[2], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); + src2 = get_src_reg_imm(c, inst, 2, i); brw_MUL(p, dst, src0, src1); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); @@ -1071,9 +1284,9 @@ static void emit_sop(struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); brw_push_insn_state(p); brw_CMP(p, brw_null_reg(), cond, src0, src1); brw_set_predicate_control(p, BRW_PREDICATE_NONE); @@ -1130,8 +1343,8 @@ static void emit_ddx(struct brw_wm_compile *c, struct brw_reg dst; struct brw_reg src0, w; GLuint nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - w = get_src_reg(c, &inst->SrcReg[1], 3, 1); + src0 = get_src_reg(c, inst, 0, 0); + w = get_src_reg(c, inst, 1, 3); nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); @@ -1140,7 +1353,7 @@ static void emit_ddx(struct brw_wm_compile *c, brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV(p, dst, interp[i]); brw_MUL(p, dst, dst, w); } @@ -1158,9 +1371,9 @@ static void emit_ddy(struct brw_wm_compile *c, struct brw_reg src0, w; GLuint nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + src0 = get_src_reg(c, inst, 0, 0); nr = src0.nr; - w = get_src_reg(c, &inst->SrcReg[1], 3, 1); + w = get_src_reg(c, inst, 1, 3); interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); interp[2] = brw_vec1_grf(nr+1, 0); @@ -1168,7 +1381,7 @@ static void emit_ddy(struct brw_wm_compile *c, brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV(p, dst, suboffset(interp[i], 1)); brw_MUL(p, dst, dst, w); } @@ -1302,7 +1515,7 @@ static void emit_noise1( struct brw_wm_compile *c, assert( mark == 0 ); - src = get_src_reg( c, inst->SrcReg, 0, 1 ); + src = get_src_reg( c, inst, 0, 0 ); param = alloc_tmp( c ); @@ -1314,7 +1527,7 @@ static void emit_noise1( struct brw_wm_compile *c, brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV( p, dst, param ); } } @@ -1472,8 +1685,8 @@ static void emit_noise2( struct brw_wm_compile *c, assert( mark == 0 ); - src0 = get_src_reg( c, inst->SrcReg, 0, 1 ); - src1 = get_src_reg( c, inst->SrcReg, 1, 1 ); + src0 = get_src_reg( c, inst, 0, 0 ); + src1 = get_src_reg( c, inst, 0, 1 ); param0 = alloc_tmp( c ); param1 = alloc_tmp( c ); @@ -1487,7 +1700,7 @@ static void emit_noise2( struct brw_wm_compile *c, brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV( p, dst, param0 ); } } @@ -1775,9 +1988,9 @@ static void emit_noise3( struct brw_wm_compile *c, assert( mark == 0 ); - src0 = get_src_reg( c, inst->SrcReg, 0, 1 ); - src1 = get_src_reg( c, inst->SrcReg, 1, 1 ); - src2 = get_src_reg( c, inst->SrcReg, 2, 1 ); + src0 = get_src_reg( c, inst, 0, 0 ); + src1 = get_src_reg( c, inst, 0, 1 ); + src2 = get_src_reg( c, inst, 0, 2 ); param0 = alloc_tmp( c ); param1 = alloc_tmp( c ); @@ -1793,7 +2006,7 @@ static void emit_noise3( struct brw_wm_compile *c, brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV( p, dst, param0 ); } } @@ -2198,10 +2411,10 @@ static void emit_noise4( struct brw_wm_compile *c, assert( mark == 0 ); - src0 = get_src_reg( c, inst->SrcReg, 0, 1 ); - src1 = get_src_reg( c, inst->SrcReg, 1, 1 ); - src2 = get_src_reg( c, inst->SrcReg, 2, 1 ); - src3 = get_src_reg( c, inst->SrcReg, 3, 1 ); + src0 = get_src_reg( c, inst, 0, 0 ); + src1 = get_src_reg( c, inst, 0, 1 ); + src2 = get_src_reg( c, inst, 0, 2 ); + src3 = get_src_reg( c, inst, 0, 3 ); param0 = alloc_tmp( c ); param1 = alloc_tmp( c ); @@ -2219,7 +2432,7 @@ static void emit_noise4( struct brw_wm_compile *c, brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV( p, dst, param0 ); } } @@ -2236,11 +2449,11 @@ static void emit_wpos_xy(struct brw_wm_compile *c, GLuint mask = inst->DstReg.WriteMask; struct brw_reg src0[2], dst[2]; - dst[0] = get_dst_reg(c, inst, 0, 1); - dst[1] = get_dst_reg(c, inst, 1, 1); + dst[0] = get_dst_reg(c, inst, 0); + dst[1] = get_dst_reg(c, inst, 1); - src0[0] = get_src_reg(c, &inst->SrcReg[0], 0, 1); - src0[1] = get_src_reg(c, &inst->SrcReg[0], 1, 1); + src0[0] = get_src_reg(c, inst, 0, 0); + src0[1] = get_src_reg(c, inst, 0, 1); /* Calculate the pixel offset from window bottom left into destination * X and Y channels. @@ -2263,7 +2476,7 @@ static void emit_wpos_xy(struct brw_wm_compile *c, } /* TODO - BIAS on SIMD8 not workind yet... + BIAS on SIMD8 not working yet... */ static void emit_txb(struct brw_wm_compile *c, struct prog_instruction *inst) @@ -2271,19 +2484,20 @@ static void emit_txb(struct brw_wm_compile *c, struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; - GLuint i; + payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); + for (i = 0; i < 4; i++) - dst[i] = get_dst_reg(c, inst, i, 1); + dst[i] = get_dst_reg(c, inst, i); for (i = 0; i < 4; i++) - src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); + src[i] = get_src_reg(c, inst, 0, i); switch (inst->TexSrcTarget) { case TEXTURE_1D_INDEX: - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(2), src[0]); /* s coord */ + brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); /* t coord */ + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); /* r coord */ break; case TEXTURE_2D_INDEX: case TEXTURE_RECT_INDEX: @@ -2297,28 +2511,28 @@ static void emit_txb(struct brw_wm_compile *c, brw_MOV(p, brw_message_reg(4), src[2]); break; } - brw_MOV(p, brw_message_reg(5), src[3]); - brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(5), src[3]); /* bias */ + brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */ brw_SAMPLE(p, - retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), - 1, - retype(payload_reg, BRW_REGISTER_TYPE_UW), - unit + MAX_DRAW_BUFFERS, /* surface */ - unit, /* sampler */ - inst->DstReg.WriteMask, - BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, - 4, - 4, - 0); + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ + 1, /* msg_reg_nr */ + retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ + unit + MAX_DRAW_BUFFERS, /* surface */ + unit, /* sampler */ + inst->DstReg.WriteMask, /* writemask */ + BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, /* msg_type */ + 4, /* response_length */ + 4, /* msg_length */ + 0); /* eot */ } + static void emit_tex(struct brw_wm_compile *c, struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; - GLuint msg_len; GLuint i, nr; GLuint emit; @@ -2327,10 +2541,9 @@ static void emit_tex(struct brw_wm_compile *c, payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); for (i = 0; i < 4; i++) - dst[i] = get_dst_reg(c, inst, i, 1); + dst[i] = get_dst_reg(c, inst, i); for (i = 0; i < 4; i++) - src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - + src[i] = get_src_reg(c, inst, 0, i); switch (inst->TexSrcTarget) { case TEXTURE_1D_INDEX: @@ -2349,6 +2562,7 @@ static void emit_tex(struct brw_wm_compile *c, } msg_len = 1; + /* move/load S, T, R coords */ for (i = 0; i < nr; i++) { static const GLuint swz[4] = {0,1,2,2}; if (emit & (1<<i)) @@ -2359,26 +2573,27 @@ static void emit_tex(struct brw_wm_compile *c, } if (shadow) { - brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); - brw_MOV(p, brw_message_reg(6), src[2]); + brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */ + brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */ } brw_SAMPLE(p, - retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), - 1, - retype(payload_reg, BRW_REGISTER_TYPE_UW), - unit + MAX_DRAW_BUFFERS, /* surface */ - unit, /* sampler */ - inst->DstReg.WriteMask, - BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, - 4, - shadow ? 6 : 4, - 0); + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ + 1, /* msg_reg_nr */ + retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ + unit + MAX_DRAW_BUFFERS, /* surface */ + unit, /* sampler */ + inst->DstReg.WriteMask, /* writemask */ + BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, /* msg_type */ + 4, /* response_length */ + shadow ? 6 : 4, /* msg_length */ + 0); /* eot */ if (shadow) brw_MOV(p, dst[3], brw_imm_f(1.0)); } + /** * Resolve subroutine calls after code emit is done. */ @@ -2410,6 +2625,15 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) else brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); + /* + _mesa_printf("Inst %d: ", i); + _mesa_print_instruction(inst); + */ + + /* fetch any constants that this instruction needs */ + if (c->use_const_buffer) + fetch_constants(c, inst); + switch (inst->Opcode) { case WM_PIXELXY: emit_pixel_xy(c, inst); @@ -2435,12 +2659,18 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case WM_FB_WRITE: emit_fb_write(c, inst); break; + case WM_FRONTFACING: + emit_frontfacing(c, inst); + break; case OPCODE_ABS: emit_abs(c, inst); break; case OPCODE_ADD: emit_add(c, inst); break; + case OPCODE_ARL: + emit_arl(c, inst); + break; case OPCODE_SUB: emit_sub(c, inst); break; @@ -2489,11 +2719,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_LG2: emit_lg2(c, inst); break; - case OPCODE_MAX: - emit_max(c, inst); - break; case OPCODE_MIN: - emit_min(c, inst); + case OPCODE_MAX: + emit_min_max(c, inst); break; case OPCODE_DDX: emit_ddx(c, inst); diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c index cf031899dd2..ab9aa2f10d0 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c @@ -268,6 +268,7 @@ void brw_wm_pass1( struct brw_wm_compile *c ) break; case OPCODE_DST: + case WM_FRONTFACING: default: break; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 68a9296a713..1fc9f013727 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -217,6 +217,7 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */ } + /** Sets up the cache key for sampler state for all texture units */ static void brw_wm_sampler_populate_key(struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 63fc8a004fd..58fa6aaf8f9 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -113,7 +113,7 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) /* temporary sanity check assertion */ ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp)); - /* XXX: This needs a flag to indicate when it changes. */ + /* _NEW_DEPTH */ key->stats_wm = intel->stats_wm; /* _NEW_LINE */ @@ -125,6 +125,9 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) key->offset_factor = ctx->Polygon.OffsetFactor; } +/** + * Setup wm hardware state. See page 225 of Volume 2 + */ static dri_bo * wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, dri_bo **reloc_bufs) @@ -142,7 +145,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, if (key->total_scratch != 0) { wm.thread2.scratch_space_base_pointer = - brw->wm.scratch_buffer->offset >> 10; /* reloc */ + brw->wm.scratch_bo->offset >> 10; /* reloc */ wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1; } else { wm.thread2.scratch_space_base_pointer = 0; @@ -151,9 +154,9 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, wm.thread3.dispatch_grf_start_reg = key->dispatch_grf_start_reg; wm.thread3.urb_entry_read_length = key->urb_entry_read_length; + wm.thread3.urb_entry_read_offset = 0; wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length; wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; - wm.thread3.urb_entry_read_offset = 0; wm.wm4.sampler_count = (key->sampler_count + 1) / 4; if (brw->wm.sampler_bo != NULL) { @@ -220,7 +223,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, 0, 0, wm.thread2.per_thread_scratch_space, offsetof(struct brw_wm_unit_state, thread2), - brw->wm.scratch_buffer); + brw->wm.scratch_bo); } /* Emit sampler state relocation */ @@ -251,20 +254,20 @@ static void upload_wm_unit( struct brw_context *brw ) if (key.total_scratch) { GLuint total = key.total_scratch * key.max_threads; - if (brw->wm.scratch_buffer && total > brw->wm.scratch_buffer->size) { - dri_bo_unreference(brw->wm.scratch_buffer); - brw->wm.scratch_buffer = NULL; + if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) { + dri_bo_unreference(brw->wm.scratch_bo); + brw->wm.scratch_bo = NULL; } - if (brw->wm.scratch_buffer == NULL) { - brw->wm.scratch_buffer = dri_bo_alloc(intel->bufmgr, - "wm scratch", - total, - 4096); + if (brw->wm.scratch_bo == NULL) { + brw->wm.scratch_bo = dri_bo_alloc(intel->bufmgr, + "wm scratch", + total, + 4096); } } reloc_bufs[0] = brw->wm.prog_bo; - reloc_bufs[1] = brw->wm.scratch_buffer; + reloc_bufs[1] = brw->wm.scratch_bo; reloc_bufs[2] = brw->wm.sampler_bo; dri_bo_unreference(brw->wm.state_bo); @@ -282,7 +285,8 @@ const struct brw_tracked_state brw_wm_unit = { .mesa = (_NEW_POLYGON | _NEW_POLYGONSTIPPLE | _NEW_LINE | - _NEW_COLOR), + _NEW_COLOR | + _NEW_DEPTH), .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_CURBE_OFFSETS | diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index e6113eff87e..e7d55d5dbd1 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -33,6 +33,7 @@ #include "main/mtypes.h" #include "main/texformat.h" #include "main/texstore.h" +#include "shader/prog_parameter.h" #include "intel_mipmap_tree.h" #include "intel_batchbuffer.h" @@ -140,8 +141,15 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format, case MESA_FORMAT_RGBA_DXT5: return BRW_SURFACEFORMAT_BC3_UNORM; - case MESA_FORMAT_SRGBA8: - return BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB; + case MESA_FORMAT_SARGB8: + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB; + + case MESA_FORMAT_SLA8: + return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB; + + case MESA_FORMAT_SL8: + return BRW_SURFACEFORMAT_L8_UNORM_SRGB; + case MESA_FORMAT_SRGB_DXT1: return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; @@ -159,6 +167,9 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format, case MESA_FORMAT_DUDV8: return BRW_SURFACEFORMAT_R8G8_SNORM; + case MESA_FORMAT_SIGNED_RGBA8888_REV: + return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; + default: assert(0); return 0; @@ -277,6 +288,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) struct intel_texture_object *intelObj = intel_texture_object(tObj); struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; struct brw_wm_surface_key key; + const GLuint j = MAX_DRAW_BUFFERS + unit; memset(&key, 0, sizeof(key)); @@ -303,16 +315,111 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.cpp = intelObj->mt->cpp; key.tiling = intelObj->mt->region->tiling; - dri_bo_unreference(brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS]); - brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, - NULL); - if (brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] == NULL) { - brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_create_texture_surface(brw, &key); + dri_bo_unreference(brw->wm.surf_bo[j]); + brw->wm.surf_bo[j] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->wm.surf_bo[j] == NULL) { + brw->wm.surf_bo[j] = brw_create_texture_surface(brw, &key); + } +} + + + +/** + * Create the constant buffer surface. Fragment shader constanst will be + * read from this buffer with Data Port Read instructions/messages. + */ +static dri_bo * +brw_create_constant_surface( struct brw_context *brw, + struct brw_wm_surface_key *key ) +{ + const GLint w = key->width - 1; + struct brw_surface_state surf; + dri_bo *bo; + + memset(&surf, 0, sizeof(surf)); + + surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + surf.ss0.surface_type = BRW_SURFACE_BUFFER; + surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + + /* This is ok for all textures with channel width 8bit or less: + */ + assert(key->bo); + if (key->bo) + surf.ss1.base_addr = key->bo->offset; /* reloc */ + else + surf.ss1.base_addr = key->offset; + + surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */ + surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */ + surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */ + surf.ss3.pitch = (key->pitch * key->cpp) - 1; + brw_set_surface_tiling(&surf, key->tiling); + + bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + key, sizeof(*key), + &key->bo, key->bo ? 1 : 0, + &surf, sizeof(surf), + NULL, NULL); + + if (key->bo) { + /* Emit relocation to surface contents */ + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_SAMPLER, 0, + 0, + offsetof(struct brw_surface_state, ss1), + key->bo); } + + return bo; } + +/** + * Update the constant buffer surface. + */ +static void +brw_update_constant_surface( GLcontext *ctx, + const struct brw_fragment_program *fp ) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_wm_surface_key key; + const GLuint j = BRW_WM_MAX_SURF - 1; + const GLuint numParams = fp->program.Base.Parameters->NumParameters; + + memset(&key, 0, sizeof(key)); + + key.format = MESA_FORMAT_RGBA_FLOAT32; + key.internal_format = GL_RGBA; + key.bo = fp->const_buffer; + + key.depthmode = GL_NONE; + key.pitch = numParams; + key.width = numParams; + key.height = 1; + key.depth = 1; + key.cpp = 16; + + /* + printf("%s:\n", __FUNCTION__); + printf(" width %d height %d depth %d cpp %d pitch %d\n", + key.width, key.height, key.depth, key.cpp, key.pitch); + */ + + dri_bo_unreference(brw->wm.surf_bo[j]); + brw->wm.surf_bo[j] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->wm.surf_bo[j] == NULL) { + brw->wm.surf_bo[j] = brw_create_constant_surface(brw, &key); + } +} + + /** * Sets up a surface state structure to point at the given region. * While it is only used for the front/back buffer currently, it should be @@ -467,7 +574,8 @@ static void prepare_wm_surfaces(struct brw_context *brw ) GLuint i; int old_nr_surfaces; - if (brw->state.nr_color_regions > 1) { + /* Update surfaces for drawing buffers */ + if (brw->state.nr_color_regions > 1) { for (i = 0; i < brw->state.nr_color_regions; i++) { brw_update_region_surface(brw, brw->state.color_regions[i], i, GL_FALSE); @@ -479,27 +587,41 @@ static void prepare_wm_surfaces(struct brw_context *brw ) old_nr_surfaces = brw->wm.nr_surfaces; brw->wm.nr_surfaces = MAX_DRAW_BUFFERS; + /* Update surfaces for textures */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; + const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; + const GLuint j = MAX_DRAW_BUFFERS + i; /* _NEW_TEXTURE, BRW_NEW_TEXDATA */ - if(texUnit->_ReallyEnabled) { + if (texUnit->_ReallyEnabled) { if (texUnit->_Current == intel->frame_buffer_texobj) { - dri_bo_unreference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]); - brw->wm.surf_bo[i+MAX_DRAW_BUFFERS] = brw->wm.surf_bo[0]; - dri_bo_reference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]); - brw->wm.nr_surfaces = i + MAX_DRAW_BUFFERS + 1; + /* render to texture */ + dri_bo_unreference(brw->wm.surf_bo[j]); + brw->wm.surf_bo[j] = brw->wm.surf_bo[0]; + dri_bo_reference(brw->wm.surf_bo[j]); + brw->wm.nr_surfaces = j + 1; } else { + /* regular texture */ brw_update_texture_surface(ctx, i); - brw->wm.nr_surfaces = i + MAX_DRAW_BUFFERS + 1; + brw->wm.nr_surfaces = j + 1; } } else { - dri_bo_unreference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]); - brw->wm.surf_bo[i+MAX_DRAW_BUFFERS] = NULL; + dri_bo_unreference(brw->wm.surf_bo[j]); + brw->wm.surf_bo[j] = NULL; } + } + + /* Update surface for fragment shader constant buffer */ + { + const GLuint j = BRW_WM_MAX_SURF - 1; + const struct brw_fragment_program *fp = + brw_fragment_program_const(brw->fragment_program); + brw_update_constant_surface(ctx, fp); + brw->wm.nr_surfaces = j + 1; } + dri_bo_unreference(brw->wm.bind_bo); brw->wm.bind_bo = brw_wm_get_binding_table(brw); diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 4e033082b4d..4ae9b118a3d 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -98,11 +98,11 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, ASSERT(src->cpp == dst->cpp); if (cpp == 2) { - BR13 = (0xCC << 16) | (1 << 24); + BR13 = (0xCC << 16) | BR13_565; CMD = XY_SRC_COPY_BLT_CMD; } else { - BR13 = (0xCC << 16) | (1 << 24) | (1 << 25); + BR13 = (0xCC << 16) | BR13_8888; CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; } @@ -194,13 +194,15 @@ intelEmitFillBlit(struct intel_context *intel, switch (cpp) { case 1: + BR13 = (0xF0 << 16); + CMD = XY_COLOR_BLT_CMD; + break; case 2: - case 3: - BR13 = (0xF0 << 16) | (1 << 24); + BR13 = (0xF0 << 16) | BR13_565; CMD = XY_COLOR_BLT_CMD; break; case 4: - BR13 = (0xF0 << 16) | (1 << 24) | (1 << 25); + BR13 = (0xF0 << 16) | BR13_8888; CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; break; default: @@ -335,12 +337,11 @@ intelEmitCopyBlit(struct intel_context *intel, CMD = XY_SRC_COPY_BLT_CMD; break; case 2: - case 3: - BR13 |= (1 << 24); + BR13 |= BR13_565; CMD = XY_SRC_COPY_BLT_CMD; break; case 4: - BR13 |= (1 << 24) | (1 << 25); + BR13 |= BR13_8888; CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; break; default: @@ -510,7 +511,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) /* Setup the blit command */ if (cpp == 4) { - BR13 |= (1 << 24) | (1 << 25); + BR13 |= BR13_8888; if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) { if (clearMask & BUFFER_BIT_DEPTH) CMD |= XY_BLT_WRITE_RGB; @@ -523,8 +524,8 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) } } else { - ASSERT(cpp == 2 || cpp == 0); - BR13 |= (1 << 24); + ASSERT(cpp == 2); + BR13 |= BR13_565; } #ifndef I915 diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 888bb3f18f6..a664e749360 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -307,9 +307,11 @@ intel_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h) if (!driContext->driScreenPriv->dri2.enabled) return; - intel_update_renderbuffers(driContext, driContext->driDrawablePriv); - if (driContext->driDrawablePriv != driContext->driReadablePriv) - intel_update_renderbuffers(driContext, driContext->driReadablePriv); + if (!intel->internal_viewport_call) { + intel_update_renderbuffers(driContext, driContext->driDrawablePriv); + if (driContext->driDrawablePriv != driContext->driReadablePriv) + intel_update_renderbuffers(driContext, driContext->driReadablePriv); + } old_viewport = ctx->Driver.Viewport; ctx->Driver.Viewport = NULL; diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index e520ecf220d..d635f3f50dc 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -180,6 +180,7 @@ struct intel_context struct intel_region *front_region; struct intel_region *back_region; struct intel_region *depth_region; + GLboolean internal_viewport_call; /** * This value indicates that the kernel memory manager is being used diff --git a/src/mesa/drivers/dri/intel/intel_decode.c b/src/mesa/drivers/dri/intel/intel_decode.c index f2e2e619358..f04638206d5 100644 --- a/src/mesa/drivers/dri/intel/intel_decode.c +++ b/src/mesa/drivers/dri/intel/intel_decode.c @@ -1513,7 +1513,7 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures) for (i = 1; i < len;) { instr_out(data, hw_offset, i, "buffer %d: %svalid, type 0x%04x, " - "src offset 0x%04xd bytes\n", + "src offset 0x%04x bytes\n", data[i] >> 27, data[i] & (1 << 26) ? "" : "in", (data[i] >> 16) & 0x1ff, diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c index 8dd0b2461bd..9ec1b4ec2f4 100644 --- a/src/mesa/drivers/dri/intel/intel_extensions.c +++ b/src/mesa/drivers/dri/intel/intel_extensions.c @@ -132,6 +132,7 @@ static const struct dri_extension brw_extensions[] = { { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions }, { "GL_ARB_shading_language_120", GL_VERSION_2_1_functions }, { "GL_ARB_shadow", NULL }, + { "GL_MESA_texture_signed_rgba", NULL }, { "GL_ARB_texture_non_power_of_two", NULL }, { "GL_ARB_vertex_shader", GL_ARB_vertex_shader_functions }, { "GL_EXT_shadow_funcs", NULL }, diff --git a/src/mesa/drivers/dri/intel/intel_pixel.c b/src/mesa/drivers/dri/intel/intel_pixel.c index 7041ff389ac..fc0ac0b79c0 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel.c +++ b/src/mesa/drivers/dri/intel/intel_pixel.c @@ -184,7 +184,9 @@ intel_meta_set_passthrough_transform(struct intel_context *intel) intel->meta.saved_vp_height = ctx->Viewport.Height; intel->meta.saved_matrix_mode = ctx->Transform.MatrixMode; + intel->internal_viewport_call = GL_TRUE; _mesa_Viewport(0, 0, ctx->DrawBuffer->Width, ctx->DrawBuffer->Height); + intel->internal_viewport_call = GL_FALSE; _mesa_MatrixMode(GL_PROJECTION); _mesa_PushMatrix(); @@ -206,8 +208,10 @@ intel_meta_restore_transform(struct intel_context *intel) _mesa_MatrixMode(intel->meta.saved_matrix_mode); + intel->internal_viewport_call = GL_TRUE; _mesa_Viewport(intel->meta.saved_vp_x, intel->meta.saved_vp_y, intel->meta.saved_vp_width, intel->meta.saved_vp_height); + intel->internal_viewport_call = GL_FALSE; } /** diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index d20ea151877..65e62947ef6 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -563,6 +563,7 @@ intel_init_bufmgr(intelScreenPrivate *intelScreen) GLboolean gem_supported; struct drm_i915_getparam gp; __DRIscreenPrivate *spriv = intelScreen->driScrnPriv; + int num_fences; intelScreen->no_hw = getenv("INTEL_NO_HW") != NULL; @@ -613,8 +614,10 @@ intel_init_bufmgr(intelScreenPrivate *intelScreen) &intelScreen->sarea->last_dispatch); } - /* XXX bufmgr should be per-screen, not per-context */ - intelScreen->ttm = intelScreen->ttm; + if (intel_get_param(spriv, I915_PARAM_NUM_FENCES_AVAIL, &num_fences)) + intelScreen->kernel_exec_fencing = !!num_fences; + else + intelScreen->kernel_exec_fencing = GL_FALSE; return GL_TRUE; } diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h index e1036de4db8..a9b9e109a6a 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.h +++ b/src/mesa/drivers/dri/intel/intel_screen.h @@ -79,6 +79,7 @@ typedef struct GLboolean no_vbo; int ttm; dri_bufmgr *bufmgr; + GLboolean kernel_exec_fencing; /** * Configuration cache with default values for all contexts diff --git a/src/mesa/drivers/dri/intel/intel_tex_format.c b/src/mesa/drivers/dri/intel/intel_tex_format.c index 8732354e7a5..3322a711307 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_format.c +++ b/src/mesa/drivers/dri/intel/intel_tex_format.c @@ -1,5 +1,6 @@ #include "intel_context.h" #include "intel_tex.h" +#include "intel_chipset.h" #include "main/texformat.h" #include "main/enums.h" @@ -160,24 +161,36 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, case GL_SRGB8_EXT: case GL_SRGB_ALPHA_EXT: case GL_SRGB8_ALPHA8_EXT: - case GL_SLUMINANCE_EXT: - case GL_SLUMINANCE8_EXT: - case GL_SLUMINANCE_ALPHA_EXT: - case GL_SLUMINANCE8_ALPHA8_EXT: case GL_COMPRESSED_SRGB_EXT: case GL_COMPRESSED_SRGB_ALPHA_EXT: case GL_COMPRESSED_SLUMINANCE_EXT: case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT: - return &_mesa_texformat_srgba8; + return &_mesa_texformat_sargb8; + case GL_SLUMINANCE_EXT: + case GL_SLUMINANCE8_EXT: + if (IS_G4X(intel->intelScreen->deviceID)) + return &_mesa_texformat_sl8; + else + return &_mesa_texformat_sargb8; + case GL_SLUMINANCE_ALPHA_EXT: + case GL_SLUMINANCE8_ALPHA8_EXT: + if (IS_G4X(intel->intelScreen->deviceID)) + return &_mesa_texformat_sla8; + else + return &_mesa_texformat_sargb8; case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT: case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: return &_mesa_texformat_srgb_dxt1; + /* i915 could also do this */ case GL_DUDV_ATI: case GL_DU8DV8_ATI: return &_mesa_texformat_dudv8; + case GL_RGBA_SNORM: + case GL_RGBA8_SNORM: + return &_mesa_texformat_signed_rgba8888_rev; #endif default: diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index e902187637d..71561cf85cd 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -12,6 +12,7 @@ #include "main/simple_list.h" #include "main/texcompress.h" #include "main/texformat.h" +#include "main/texgetimage.h" #include "main/texobj.h" #include "main/texstore.h" #include "main/teximage.h" @@ -482,12 +483,13 @@ intelTexImage(GLcontext * ctx, LOCK_HARDWARE(intel); if (intelImage->mt) { - texImage->Data = intel_miptree_image_map(intel, - intelImage->mt, - intelImage->face, - intelImage->level, - &dstRowStride, - intelImage->base.ImageOffsets); + if (pixels) + texImage->Data = intel_miptree_image_map(intel, + intelImage->mt, + intelImage->face, + intelImage->level, + &dstRowStride, + intelImage->base.ImageOffsets); texImage->RowStride = dstRowStride / intelImage->mt->cpp; } else { @@ -537,17 +539,18 @@ intelTexImage(GLcontext * ctx, format, type, pixels, unpack)) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage"); } - } - /* GL_SGIS_generate_mipmap */ - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - intel_generate_mipmap(ctx, target, texObj); + /* GL_SGIS_generate_mipmap */ + if (level == texObj->BaseLevel && texObj->GenerateMipmap) { + intel_generate_mipmap(ctx, target, texObj); + } } _mesa_unmap_teximage_pbo(ctx, unpack); if (intelImage->mt) { - intel_miptree_image_unmap(intel, intelImage->mt); + if (pixels) + intel_miptree_image_unmap(intel, intelImage->mt); texImage->Data = NULL; } diff --git a/src/mesa/drivers/dri/mach64/mach64_tex.c b/src/mesa/drivers/dri/mach64/mach64_tex.c index 9fe267eafd5..225d23179e1 100644 --- a/src/mesa/drivers/dri/mach64/mach64_tex.c +++ b/src/mesa/drivers/dri/mach64/mach64_tex.c @@ -99,7 +99,7 @@ static void mach64SetTexFilter( mach64TexObjPtr t, } } -static void mach64SetTexBorderColor( mach64TexObjPtr t, GLubyte c[4] ) +static void mach64SetTexBorderColor( mach64TexObjPtr t, const GLfloat c[4] ) { #if 0 GLuint border = mach64PackColor( 4, c[0], c[1], c[2], c[3] ); @@ -131,7 +131,7 @@ mach64AllocTexObj( struct gl_texture_object *texObj ) mach64SetTexWrap( t, texObj->WrapS, texObj->WrapT ); mach64SetTexFilter( t, texObj->MinFilter, texObj->MagFilter ); - mach64SetTexBorderColor( t, texObj->_BorderChan ); + mach64SetTexBorderColor( t, texObj->BorderColor ); return t; } @@ -471,7 +471,7 @@ static void mach64DDTexParameter( GLcontext *ctx, GLenum target, case GL_TEXTURE_BORDER_COLOR: if ( t->base.bound ) FLUSH_BATCH( mmesa ); - mach64SetTexBorderColor( t, tObj->_BorderChan ); + mach64SetTexBorderColor( t, tObj->BorderColor ); break; case GL_TEXTURE_BASE_LEVEL: diff --git a/src/mesa/drivers/dri/mga/mgatex.c b/src/mesa/drivers/dri/mga/mgatex.c index 2392622b902..33eb0be4496 100644 --- a/src/mesa/drivers/dri/mga/mgatex.c +++ b/src/mesa/drivers/dri/mga/mgatex.c @@ -153,10 +153,14 @@ mgaSetTexFilter( mgaTextureObjectPtr t, GLenum minf, GLenum magf ) t->setup.texfilter |= val; } -static void mgaSetTexBorderColor(mgaTextureObjectPtr t, GLubyte color[4]) +static void mgaSetTexBorderColor(mgaTextureObjectPtr t, const GLfloat color[4]) { - t->setup.texbordercol = PACK_COLOR_8888(color[3], color[0], - color[1], color[2] ); + GLubyte c[4]; + CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]); + CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]); + CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]); + CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]); + t->setup.texbordercol = PACK_COLOR_8888(c[3], c[0], c[1], c[2] ); } @@ -329,7 +333,7 @@ mgaAllocTexObj( struct gl_texture_object *tObj ) mgaSetTexWrapping( t, tObj->WrapS, tObj->WrapT ); mgaSetTexFilter( t, tObj->MinFilter, tObj->MagFilter ); - mgaSetTexBorderColor( t, tObj->_BorderChan ); + mgaSetTexBorderColor( t, tObj->BorderColor ); } return( t ); @@ -458,7 +462,7 @@ mgaTexParameter( GLcontext *ctx, GLenum target, case GL_TEXTURE_BORDER_COLOR: FLUSH_BATCH(mmesa); - mgaSetTexBorderColor(t, tObj->_BorderChan); + mgaSetTexBorderColor(t, tObj->BorderColor); break; case GL_TEXTURE_BASE_LEVEL: diff --git a/src/mesa/drivers/dri/r128/r128_tex.c b/src/mesa/drivers/dri/r128/r128_tex.c index 3fc9c06cfa2..0920270d7b6 100644 --- a/src/mesa/drivers/dri/r128/r128_tex.c +++ b/src/mesa/drivers/dri/r128/r128_tex.c @@ -135,8 +135,13 @@ static void r128SetTexFilter( r128TexObjPtr t, GLenum minf, GLenum magf ) } } -static void r128SetTexBorderColor( r128TexObjPtr t, GLubyte c[4] ) +static void r128SetTexBorderColor( r128TexObjPtr t, const GLfloat color[4] ) { + GLubyte c[4]; + CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]); + CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]); + CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]); + CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]); t->setup.tex_border_color = r128PackColor( 4, c[0], c[1], c[2], c[3] ); } @@ -165,7 +170,7 @@ static r128TexObjPtr r128AllocTexObj( struct gl_texture_object *texObj ) r128SetTexWrap( t, texObj->WrapS, texObj->WrapT ); r128SetTexFilter( t, texObj->MinFilter, texObj->MagFilter ); - r128SetTexBorderColor( t, texObj->_BorderChan ); + r128SetTexBorderColor( t, texObj->BorderColor ); } return t; @@ -531,7 +536,7 @@ static void r128TexParameter( GLcontext *ctx, GLenum target, case GL_TEXTURE_BORDER_COLOR: if ( t->base.bound ) FLUSH_BATCH( rmesa ); - r128SetTexBorderColor( t, tObj->_BorderChan ); + r128SetTexBorderColor( t, tObj->BorderColor ); break; case GL_TEXTURE_BASE_LEVEL: diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c index 1b9724d6429..ebf389efe26 100644 --- a/src/mesa/drivers/dri/r200/r200_state.c +++ b/src/mesa/drivers/dri/r200/r200_state.c @@ -1117,7 +1117,7 @@ void r200UpdateMaterial( GLcontext *ctx ) * _VP_inf_norm * _h_inf_norm * _Position - * _NormDirection + * _NormSpotDirection * _ModelViewInvScale * _NeedEyeCoords * _EyeZDir @@ -1174,9 +1174,9 @@ static void update_light( GLcontext *ctx ) fcmd[LIT_DIRECTION_W] = 0; } else { COPY_4V( &fcmd[LIT_POSITION_X], l->_Position ); - fcmd[LIT_DIRECTION_X] = -l->_NormDirection[0]; - fcmd[LIT_DIRECTION_Y] = -l->_NormDirection[1]; - fcmd[LIT_DIRECTION_Z] = -l->_NormDirection[2]; + fcmd[LIT_DIRECTION_X] = -l->_NormSpotDirection[0]; + fcmd[LIT_DIRECTION_Y] = -l->_NormSpotDirection[1]; + fcmd[LIT_DIRECTION_Z] = -l->_NormSpotDirection[2]; fcmd[LIT_DIRECTION_W] = 0; } diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c index fc2caabb5ac..9f791579158 100644 --- a/src/mesa/drivers/dri/r200/r200_tex.c +++ b/src/mesa/drivers/dri/r200/r200_tex.c @@ -270,15 +270,16 @@ static void r200SetTexFilter( radeonTexObjPtr t, GLenum minf, GLenum magf ) } } -static void r200SetTexBorderColor( radeonTexObjPtr t, GLubyte c[4] ) +static void r200SetTexBorderColor( radeonTexObjPtr t, const GLfloat color[4] ) { + GLubyte c[4]; + CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]); + CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]); + CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]); + CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]); t->pp_border_color = radeonPackColor( 4, c[0], c[1], c[2], c[3] ); } - - - - static void r200TexEnv( GLcontext *ctx, GLenum target, GLenum pname, const GLfloat *param ) { @@ -378,7 +379,7 @@ static void r200TexParameter( GLcontext *ctx, GLenum target, break; case GL_TEXTURE_BORDER_COLOR: - r200SetTexBorderColor( t, texObj->_BorderChan ); + r200SetTexBorderColor( t, texObj->BorderColor ); break; case GL_TEXTURE_BASE_LEVEL: @@ -481,7 +482,7 @@ static struct gl_texture_object *r200NewTextureObject(GLcontext * ctx, r200SetTexWrap( t, t->base.WrapS, t->base.WrapT, t->base.WrapR ); r200SetTexMaxAnisotropy( t, t->base.MaxAnisotropy ); r200SetTexFilter(t, t->base.MinFilter, t->base.MagFilter); - r200SetTexBorderColor(t, t->base._BorderChan); + r200SetTexBorderColor(t, t->base.BorderColor); return &t->base; } diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 2dd2c6a4df1..c575c9ac496 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -214,6 +214,7 @@ static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) struct radeon_renderbuffer *rrb; uint32_t cbpitch; uint32_t offset = r300->radeon.state.color.draw_offset; + uint32_t dw = 6; rrb = radeon_get_colorbuffer(&r300->radeon); if (!rrb || !rrb->bo) { @@ -230,11 +231,16 @@ static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) cbpitch |= R300_COLOR_TILE_ENABLE; - BEGIN_BATCH_NO_AUTOSTATE(8); + if (r300->radeon.radeonScreen->kernel_mm) + dw += 2; + BEGIN_BATCH_NO_AUTOSTATE(dw); OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1); OUT_BATCH_RELOC(offset, rrb->bo, offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_BATCH_REGSEQ(R300_RB3D_COLORPITCH0, 1); - OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); + if (!r300->radeon.radeonScreen->kernel_mm) + OUT_BATCH(cbpitch); + else + OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); END_BATCH(); if (r300->radeon.radeonScreen->driScreen->dri2.enabled) { if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index 06db7ab8ff4..5f279d6629e 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -345,10 +345,10 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, ctx->Const.MaxTextureMaxAnisotropy = 16.0; ctx->Const.MaxTextureLodBias = 16.0; - if (screen->chip_family >= CHIP_FAMILY_RV515) { + if (screen->chip_family >= CHIP_FAMILY_RV515) ctx->Const.MaxTextureLevels = 13; - ctx->Const.MaxTextureRectSize = 4096; - } + else + ctx->Const.MaxTextureLevels = 12; ctx->Const.MinPointSize = 1.0; ctx->Const.MinPointSizeAA = 1.0; diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 86e68e35fc6..602f86ba669 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -76,8 +76,6 @@ typedef struct r300_context *r300ContextPtr; -/************ DMA BUFFERS **************/ - /* The blit width for texture uploads */ #define R300_BLIT_WIDTH_BYTES 1024 diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c index bcf88038759..4fd6ba9b91a 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.c +++ b/src/mesa/drivers/dri/r300/r300_emit.c @@ -191,7 +191,7 @@ GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten) fprintf(stderr, "\tout of free texcoords to write fog coord\n"); _mesa_exit(-1); } - ret |= 4 << (3 * first_free_texcoord); + ret |= 1 << (3 * first_free_texcoord); } return ret; diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 6796d36d4cd..46c3df70991 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1510,9 +1510,9 @@ static void r300SetupRSUnit(GLcontext * ctx) /* with TCL we always seem to route 4 components */ if (hw_tcl_on) - count = 4; + count = 4; else - count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size; + count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size; switch(count) { case 4: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3); break; @@ -1530,12 +1530,22 @@ static void r300SetupRSUnit(GLcontext * ctx) ++fp_reg; } + if (InputsRead & FRAG_BIT_WPOS) { + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count); + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_WPOS; + rs_tex_count += 4; + ++tex_ip; + ++fp_reg; + } + if (InputsRead & FRAG_BIT_FOGC) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) { - r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count); + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(R300_RS_SEL_K0) | R300_RS_SEL_R(R300_RS_SEL_K0); + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_Q(R300_RS_SEL_K1) | R300_RS_TEX_PTR(rs_tex_count); r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg); InputsRead &= ~FRAG_BIT_FOGC; - rs_tex_count += 4; + rs_tex_count += 1; ++tex_ip; ++fp_reg; } else { @@ -1543,16 +1553,6 @@ static void r300SetupRSUnit(GLcontext * ctx) } } - if (InputsRead & FRAG_BIT_WPOS) { - r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count); - r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg); - InputsRead &= ~FRAG_BIT_WPOS; - rs_tex_count += 4; - ++tex_ip; - ++fp_reg; - } - InputsRead &= ~FRAG_BIT_WPOS; - /* Setup default color if no color or tex was set */ if (rs_tex_count == 0 && col_ip == 0) { r300->hw.rr.cmd[R300_RR_INST_0] = R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(0) | R300_RS_COL_FMT(R300_RS_COL_FMT_0001); @@ -1560,10 +1560,10 @@ static void r300SetupRSUnit(GLcontext * ctx) } high_rr = (col_ip > tex_ip) ? col_ip : tex_ip; - r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; + r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; r300->hw.rc.cmd[2] |= high_rr - 1; - r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, high_rr); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, high_rr); if (InputsRead) WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index d8a68f7fc5e..03c1521de7e 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -63,7 +63,7 @@ do { \ ++num_attrs; \ } while (0) -static void r300SwtclVAPSetup(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten) +static void r300SwtclVAPSetup(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten, GLuint vap_out_fmt_1) { r300ContextPtr rmesa = R300_CONTEXT( ctx ); TNLcontext *tnl = TNL_CONTEXT(ctx); @@ -139,7 +139,12 @@ static void r300SwtclVAPSetup(GLcontext *ctx, GLuint InputsRead, GLuint OutputsW rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead); rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten); - rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten); + /** + * Can't use r300VAPOutputCntl1 function because it assumes + * that all texture coords have 4 components and that's the case + * for HW TCL path, but not for SW TCL. + */ + rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_out_fmt_1; vte = rmesa->hw.vte.cmd[1]; vte &= ~(R300_VTX_XY_FMT | R300_VTX_Z_FMT | R300_VTX_W0_FMT); @@ -166,7 +171,7 @@ static void r300SetVertexFormat( GLcontext *ctx ) r300ContextPtr rmesa = R300_CONTEXT( ctx ); TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; - int fog_id = -1; + int first_free_tex = 0, vap_out_fmt_1 = 0; GLuint InputsRead = 0; GLuint OutputsWritten = 0; int num_attrs = 0; @@ -217,34 +222,8 @@ static void r300SetVertexFormat( GLcontext *ctx ) ADD_ATTR(VERT_ATTRIB_POINT_SIZE, EMIT_1F, SWTCL_OVM_POINT_SIZE, swiz, MASK_X); } - if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_FOG)) { - /* find first free tex coord slot */ - if (RENDERINPUTS_TEST_RANGE(tnl->render_inputs_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { - int i; - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - if (!RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX(i) )) { - fog_id = i; - break; - } - } - } else { - fog_id = 0; - } - - if (fog_id == -1) { - fprintf(stderr, "\tout of free texcoords to do fog\n"); - _mesa_exit(-1); - } - - InputsRead |= 1 << VERT_ATTRIB_FOG; - OutputsWritten |= 1 << VERT_RESULT_FOGC; - GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO); - EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F ); - ADD_ATTR(VERT_ATTRIB_FOG, EMIT_1F, SWTCL_OVM_TEX(fog_id), swiz, MASK_X); - } - if (RENDERINPUTS_TEST_RANGE(tnl->render_inputs_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { - int i; + int i, size; GLuint swiz, mask, format; for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX(i) )) { @@ -254,16 +233,19 @@ static void r300SetVertexFormat( GLcontext *ctx ) format = EMIT_2F; swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO); mask = MASK_X | MASK_Y; + size = 2; break; case 3: format = EMIT_3F; swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); mask = MASK_X | MASK_Y | MASK_Z; + size = 3; break; case 4: format = EMIT_4F; swiz = SWIZZLE_XYZW; mask = MASK_XYZW; + size = 4; break; default: continue; @@ -272,42 +254,43 @@ static void r300SetVertexFormat( GLcontext *ctx ) OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); EMIT_ATTR(_TNL_ATTRIB_TEX(i), format); ADD_ATTR(VERT_ATTRIB_TEX0 + i, format, SWTCL_OVM_TEX(i), swiz, mask); + vap_out_fmt_1 |= size << (i * 3); + ++first_free_tex; } } } /* RS can't put fragment position on the pixel stack, so stuff it in texcoord if needed */ if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_POS) && (ctx->FragmentProgram._Current->Base.InputsRead & FRAG_BIT_WPOS)) { - int first_free_tex = -1; - if (fog_id >= 0) { - first_free_tex = fog_id+1; - } else { - if (RENDERINPUTS_TEST_RANGE(tnl->render_inputs_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { - int i; - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - if (!RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX(i) )) { - first_free_tex = i; - break; - } - } - } else { - first_free_tex = 0; - } - } - - if (first_free_tex == -1) { + if (first_free_tex >= ctx->Const.MaxTextureUnits) { fprintf(stderr, "\tout of free texcoords to write w pos\n"); _mesa_exit(-1); } InputsRead |= 1 << (VERT_ATTRIB_TEX0 + first_free_tex); OutputsWritten |= 1 << (VERT_RESULT_TEX0 + first_free_tex); - EMIT_ATTR( _TNL_ATTRIB_TEX(first_free_tex), EMIT_4F ); - ADD_ATTR(VERT_ATTRIB_TEX0 + first_free_tex, EMIT_4F, SWTCL_OVM_TEX(first_free_tex), SWIZZLE_XYZW, MASK_XYZW); + EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F ); + ADD_ATTR(VERT_ATTRIB_POS, EMIT_4F, SWTCL_OVM_TEX(first_free_tex), SWIZZLE_XYZW, MASK_XYZW); + vap_out_fmt_1 |= 4 << (first_free_tex * 3); + ++first_free_tex; + } + + if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_FOG)) { + if (first_free_tex >= ctx->Const.MaxTextureUnits) { + fprintf(stderr, "\tout of free texcoords to write fog coordinate\n"); + _mesa_exit(-1); + } + + InputsRead |= 1 << VERT_ATTRIB_FOG; + OutputsWritten |= 1 << VERT_RESULT_FOGC; + GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO); + EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F ); + ADD_ATTR(VERT_ATTRIB_FOG, EMIT_1F, SWTCL_OVM_TEX(first_free_tex), swiz, MASK_X); + vap_out_fmt_1 |= 1 << (first_free_tex * 3); } R300_NEWPRIM(rmesa); - r300SwtclVAPSetup(ctx, InputsRead, OutputsWritten); + r300SwtclVAPSetup(ctx, InputsRead, OutputsWritten, vap_out_fmt_1); rmesa->radeon.swtcl.vertex_size = _tnl_install_attrs( ctx, @@ -460,7 +443,7 @@ do { \ #define LOCAL_VARS(n) \ r300ContextPtr rmesa = R300_CONTEXT(ctx); \ - GLuint color[n] = { 0, }, spec[n] = { 0, }; \ + GLuint color[n] = { 0, }, spec[n] = { 0, }; \ GLuint coloroffset = rmesa->swtcl.coloroffset; \ GLuint specoffset = rmesa->swtcl.specoffset; \ (void) color; (void) spec; (void) coloroffset; (void) specoffset; diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index 0cbb2bcf3f1..0af5bb4f469 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -176,8 +176,13 @@ static void r300SetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloa } } -static void r300SetTexBorderColor(radeonTexObjPtr t, GLubyte c[4]) +static void r300SetTexBorderColor(radeonTexObjPtr t, const GLfloat color[4]) { + GLubyte c[4]; + CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]); + CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]); + CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]); + CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]); t->pp_border_color = PACK_COLOR_8888(c[3], c[0], c[1], c[2]); } @@ -211,7 +216,7 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, break; case GL_TEXTURE_BORDER_COLOR: - r300SetTexBorderColor(t, texObj->_BorderChan); + r300SetTexBorderColor(t, texObj->BorderColor); break; case GL_TEXTURE_BASE_LEVEL: @@ -308,7 +313,7 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx, /* Initialize hardware state */ r300UpdateTexWrap(t); r300SetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy); - r300SetTexBorderColor(t, t->base._BorderChan); + r300SetTexBorderColor(t, t->base.BorderColor); return &t->base; } diff --git a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h index 9921d350e30..6f1a0b45358 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h +++ b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h @@ -5,6 +5,17 @@ #define RADEON_PARAM_DEVICE_ID 16 #endif +#ifndef RADEON_INFO_DEVICE_ID +#define RADEON_INFO_DEVICE_ID 0 +#endif +#ifndef RADEON_INFO_NUM_GB_PIPES +#define RADEON_INFO_NUM_GB_PIPES 0 +#endif + +#ifndef DRM_RADEON_INFO +#define DRM_RADEON_INFO 0x1 +#endif + #ifdef HAVE_LIBDRM_RADEON #include "radeon_bo.h" @@ -27,12 +38,16 @@ #define DRM_RADEON_GEM_INFO 0x1c struct drm_radeon_gem_info { - uint64_t gart_start; uint64_t gart_size; - uint64_t vram_start; uint64_t vram_size; uint64_t vram_visible; }; + +struct drm_radeon_info { + uint32_t request; + uint32_t pad; + uint32_t value; +}; #endif diff --git a/src/mesa/drivers/dri/radeon/radeon_lighting.c b/src/mesa/drivers/dri/radeon/radeon_lighting.c index 6d9ccfa24d6..ac3b94e4a68 100644 --- a/src/mesa/drivers/dri/radeon/radeon_lighting.c +++ b/src/mesa/drivers/dri/radeon/radeon_lighting.c @@ -246,7 +246,7 @@ void radeonUpdateMaterial( GLcontext *ctx ) * _VP_inf_norm * _h_inf_norm * _Position - * _NormDirection + * _NormSpotDirection * _ModelViewInvScale * _NeedEyeCoords * _EyeZDir @@ -308,9 +308,9 @@ void radeonUpdateLighting( GLcontext *ctx ) fcmd[LIT_DIRECTION_W] = 0; } else { COPY_4V( &fcmd[LIT_POSITION_X], l->_Position ); - fcmd[LIT_DIRECTION_X] = -l->_NormDirection[0]; - fcmd[LIT_DIRECTION_Y] = -l->_NormDirection[1]; - fcmd[LIT_DIRECTION_Z] = -l->_NormDirection[2]; + fcmd[LIT_DIRECTION_X] = -l->_NormSpotDirection[0]; + fcmd[LIT_DIRECTION_Y] = -l->_NormSpotDirection[1]; + fcmd[LIT_DIRECTION_Z] = -l->_NormSpotDirection[2]; fcmd[LIT_DIRECTION_W] = 0; } diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 08a24a2c2b2..df0128c3782 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -287,8 +287,6 @@ radeonFillInModes( __DRIscreenPrivate *psp, __GLcontextModes *m; unsigned depth_buffer_factor; unsigned back_buffer_factor; - GLenum fb_format; - GLenum fb_type; int i; /* Right now GLX_SWAP_COPY_OML isn't supported, but it would be easy @@ -319,20 +317,27 @@ radeonFillInModes( __DRIscreenPrivate *psp, depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 2 : 1; back_buffer_factor = (have_back_buffer) ? 2 : 1; - if ( pixel_bits == 16 ) { - fb_format = GL_RGB; - fb_type = GL_UNSIGNED_SHORT_5_6_5; - } - else { - fb_format = GL_BGRA; - fb_type = GL_UNSIGNED_INT_8_8_8_8_REV; - } + if (pixel_bits == 16) { + __DRIconfig **configs_a8r8g8b8; + __DRIconfig **configs_r5g6b5; + + configs_r5g6b5 = driCreateConfigs(GL_RGB, GL_UNSIGNED_SHORT_5_6_5, + depth_bits_array, stencil_bits_array, + depth_buffer_factor, back_buffer_modes, + back_buffer_factor, msaa_samples_array, + 1); + configs_a8r8g8b8 = driCreateConfigs(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, + depth_bits_array, stencil_bits_array, + 1, back_buffer_modes, 1, + msaa_samples_array, 1); + configs = driConcatConfigs(configs_r5g6b5, configs_a8r8g8b8); + } else + configs = driCreateConfigs(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, + depth_bits_array, stencil_bits_array, + depth_buffer_factor, + back_buffer_modes, back_buffer_factor, + msaa_samples_array, 1); - configs = driCreateConfigs(fb_format, fb_type, - depth_bits_array, stencil_bits_array, - depth_buffer_factor, - back_buffer_modes, back_buffer_factor, - msaa_samples_array, 1); if (configs == NULL) { fprintf( stderr, "[%s:%u] Error creating FBConfig!\n", __func__, __LINE__ ); diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c index 80cfdaa2bff..d9a7ef60612 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state.c +++ b/src/mesa/drivers/dri/radeon/radeon_state.c @@ -873,7 +873,7 @@ void radeonUpdateMaterial( GLcontext *ctx ) * _VP_inf_norm * _h_inf_norm * _Position - * _NormDirection + * _NormSpotDirection * _ModelViewInvScale * _NeedEyeCoords * _EyeZDir @@ -934,9 +934,9 @@ static void update_light( GLcontext *ctx ) fcmd[LIT_DIRECTION_W] = 0; } else { COPY_4V( &fcmd[LIT_POSITION_X], l->_Position ); - fcmd[LIT_DIRECTION_X] = -l->_NormDirection[0]; - fcmd[LIT_DIRECTION_Y] = -l->_NormDirection[1]; - fcmd[LIT_DIRECTION_Z] = -l->_NormDirection[2]; + fcmd[LIT_DIRECTION_X] = -l->_NormSpotDirection[0]; + fcmd[LIT_DIRECTION_Y] = -l->_NormSpotDirection[1]; + fcmd[LIT_DIRECTION_Z] = -l->_NormSpotDirection[2]; fcmd[LIT_DIRECTION_W] = 0; } diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c index 21509c6e5e7..2549d5cb5cb 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex.c @@ -243,8 +243,13 @@ static void radeonSetTexFilter( radeonTexObjPtr t, GLenum minf, GLenum magf ) } } -static void radeonSetTexBorderColor( radeonTexObjPtr t, GLubyte c[4] ) +static void radeonSetTexBorderColor( radeonTexObjPtr t, const GLfloat color[4] ) { + GLubyte c[4]; + CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]); + CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]); + CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]); + CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]); t->pp_border_color = radeonPackColor( 4, c[0], c[1], c[2], c[3] ); } @@ -339,7 +344,7 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target, break; case GL_TEXTURE_BORDER_COLOR: - radeonSetTexBorderColor( t, texObj->_BorderChan ); + radeonSetTexBorderColor( t, texObj->BorderColor ); break; case GL_TEXTURE_BASE_LEVEL: @@ -439,7 +444,7 @@ radeonNewTextureObject( GLcontext *ctx, GLuint name, GLenum target ) radeonSetTexWrap( t, t->base.WrapS, t->base.WrapT ); radeonSetTexMaxAnisotropy( t, t->base.MaxAnisotropy ); radeonSetTexFilter( t, t->base.MinFilter, t->base.MagFilter ); - radeonSetTexBorderColor( t, t->base._BorderChan ); + radeonSetTexBorderColor( t, t->base.BorderColor ); return &t->base; } diff --git a/src/mesa/drivers/dri/s3v/s3v_tex.c b/src/mesa/drivers/dri/s3v/s3v_tex.c index db660263638..9b92519862a 100644 --- a/src/mesa/drivers/dri/s3v/s3v_tex.c +++ b/src/mesa/drivers/dri/s3v/s3v_tex.c @@ -132,8 +132,14 @@ static void s3vSetTexFilter(s3vContextPtr vmesa, static void s3vSetTexBorderColor(s3vContextPtr vmesa, s3vTextureObjectPtr t, - GLubyte color[4]) + const GLfloat color[4]) { + GLubyte c[4]; + CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]); + CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]); + CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]); + CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]); + #if TEX_DEBUG_ON static unsigned int times=0; DEBUG_TEX(("*** s3vSetTexBorderColor: #%i ***\n", ++times)); @@ -143,8 +149,7 @@ static void s3vSetTexBorderColor(s3vContextPtr vmesa, /* switch(t0 ... t->TextureColorMode) */ /* case TEX_COL_ARGB1555: */ - t->TextureBorderColor = - S3VIRGEPACKCOLOR555(color[0], color[1], color[2], color[3]); + t->TextureBorderColor = S3VIRGEPACKCOLOR555(c[0], c[1], c[2], c[3]); DEBUG(("TextureBorderColor = 0x%x\n", t->TextureBorderColor)); @@ -182,7 +187,7 @@ static void s3vTexParameter( GLcontext *ctx, GLenum target, break; case GL_TEXTURE_BORDER_COLOR: - s3vSetTexBorderColor( vmesa, t, tObj->_BorderChan ); + s3vSetTexBorderColor( vmesa, t, tObj->BorderColor ); break; case GL_TEXTURE_BASE_LEVEL: diff --git a/src/mesa/drivers/dri/savage/savagetex.c b/src/mesa/drivers/dri/savage/savagetex.c index a3bebfa8cf7..fe239e1b05f 100644 --- a/src/mesa/drivers/dri/savage/savagetex.c +++ b/src/mesa/drivers/dri/savage/savagetex.c @@ -474,7 +474,7 @@ static void savageSetTexFilter(savageTexObjPtr t, GLenum minf, GLenum magf) /* Need a fallback ? */ -static void savageSetTexBorderColor(savageTexObjPtr t, GLubyte color[4]) +static void savageSetTexBorderColor(savageTexObjPtr t, const GLfloat color[4]) { /* t->Setup[SAVAGE_TEXREG_TEXBORDERCOL] = */ /*t->setup.borderColor = SAVAGEPACKCOLOR8888(color[0],color[1],color[2],color[3]); */ @@ -512,7 +512,7 @@ savageAllocTexObj( struct gl_texture_object *texObj ) savageSetTexWrapping(t,texObj->WrapS,texObj->WrapT); savageSetTexFilter(t,texObj->MinFilter,texObj->MagFilter); - savageSetTexBorderColor(t,texObj->_BorderChan); + savageSetTexBorderColor(t,texObj->BorderColor); } return t; @@ -2018,7 +2018,7 @@ static void savageTexParameter( GLcontext *ctx, GLenum target, break; case GL_TEXTURE_BORDER_COLOR: - savageSetTexBorderColor(t,tObj->_BorderChan); + savageSetTexBorderColor(t,tObj->BorderColor); break; default: diff --git a/src/mesa/drivers/dri/sis/sis_texstate.c b/src/mesa/drivers/dri/sis/sis_texstate.c index 63f23fc0149..46417ce414c 100644 --- a/src/mesa/drivers/dri/sis/sis_texstate.c +++ b/src/mesa/drivers/dri/sis/sis_texstate.c @@ -456,11 +456,16 @@ sis_set_texobj_parm( GLcontext *ctx, struct gl_texture_object *texObj, break; } - current->texture[hw_unit].hwTextureBorderColor = - ((GLuint) texObj->_BorderChan[3] << 24) + - ((GLuint) texObj->_BorderChan[0] << 16) + - ((GLuint) texObj->_BorderChan[1] << 8) + - ((GLuint) texObj->_BorderChan[2]); + { + GLubyte c[4]; + CLAMPED_FLOAT_TO_UBYTE(c[0], texObj->BorderColor[0]); + CLAMPED_FLOAT_TO_UBYTE(c[1], texObj->BorderColor[1]); + CLAMPED_FLOAT_TO_UBYTE(c[2], texObj->BorderColor[2]); + CLAMPED_FLOAT_TO_UBYTE(c[3], texObj->BorderColor[3]); + + current->texture[hw_unit].hwTextureBorderColor = + PACK_COLOR_8888(c[3], c[0], c[1], c[2]); + } if (current->texture[hw_unit].hwTextureBorderColor != prev->texture[hw_unit].hwTextureBorderColor) |