diff options
author | Alex Deucher <[email protected]> | 2009-06-12 12:09:34 -0400 |
---|---|---|
committer | Alex Deucher <[email protected]> | 2009-06-12 12:09:34 -0400 |
commit | 1036ef2bf468611d37b5df06fc4424f2002e3837 (patch) | |
tree | f0859a6d903c2570a0a00c918da88139f8f7d065 /src/mesa/drivers | |
parent | 917f8bc1a85e61311cef6478127b387df70fba14 (diff) | |
parent | 1cd0afffc9edbcac690f8ab436aecfced26b0aba (diff) |
Merge master and fix conflicts
Diffstat (limited to 'src/mesa/drivers')
150 files changed, 6182 insertions, 4201 deletions
diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 276da41f4e4..f31a2a25bf3 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -231,12 +231,16 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->BeginQuery = _mesa_begin_query; driver->EndQuery = _mesa_end_query; driver->WaitQuery = _mesa_wait_query; + driver->CheckQuery = _mesa_check_query; /* APPLE_vertex_array_object */ driver->NewArrayObject = _mesa_new_array_object; driver->DeleteArrayObject = _mesa_delete_array_object; driver->BindArrayObject = NULL; + /* GL_ARB_copy_buffer */ + driver->CopyBufferSubData = _mesa_copy_buffer_subdata; + /* T&L stuff */ driver->NeedValidate = GL_FALSE; driver->ValidateTnlModule = NULL; diff --git a/src/mesa/drivers/dri/Makefile.template b/src/mesa/drivers/dri/Makefile.template index 2fa36bab3f9..bd38e3be47c 100644 --- a/src/mesa/drivers/dri/Makefile.template +++ b/src/mesa/drivers/dri/Makefile.template @@ -72,10 +72,11 @@ $(TOP)/$(LIB_DIR)/$(LIBNAME): $(LIBNAME) depend: $(C_SOURCES) $(ASM_SOURCES) $(SYMLINKS) - rm -f depend - touch depend - $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) \ - $(ASM_SOURCES) + @ echo "running $(MKDEP)" + @ rm -f depend + @ touch depend + @ $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) \ + $(ASM_SOURCES) > /dev/null 2>/dev/null # Emacs tags @@ -91,7 +92,7 @@ clean: install: $(LIBNAME) $(INSTALL) -d $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR) - $(INSTALL) -m 755 $(LIBNAME) $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR) + $(MINSTALL) -m 755 $(LIBNAME) $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR) -include depend diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index 38c2e7b00d1..ae0e61e515b 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -37,6 +37,9 @@ typedef GLboolean ( * PFNGLXGETMSCRATEOMLPROC) (__DRIdrawable *drawable, int32_t *numerator, int32_t *denominator); #endif +static void dri_get_drawable(__DRIdrawable *pdp); +static void dri_put_drawable(__DRIdrawable *pdp); + /** * This is just a token extension used to signal that the driver * supports setting a read drawable. @@ -59,7 +62,7 @@ __driUtilMessage(const char *f, ...) va_list args; if (getenv("LIBGL_DEBUG")) { - fprintf(stderr, "libGL error: \n"); + fprintf(stderr, "libGL: "); va_start(args, f); vfprintf(stderr, f, args); va_end(args); @@ -127,7 +130,7 @@ static int driUnbindContext(__DRIcontext *pcp) return GL_FALSE; } - pdp->refcount--; + dri_put_drawable(pdp); if (prp != pdp) { if (prp->refcount == 0) { @@ -135,7 +138,7 @@ static int driUnbindContext(__DRIcontext *pcp) return GL_FALSE; } - prp->refcount--; + dri_put_drawable(prp); } @@ -170,10 +173,10 @@ static int driBindContext(__DRIcontext *pcp, pcp->driReadablePriv = prp; if (pdp) { pdp->driContextPriv = pcp; - pdp->refcount++; + dri_get_drawable(pdp); } if ( prp && pdp != prp ) { - prp->refcount++; + dri_get_drawable(prp); } } @@ -311,12 +314,12 @@ static void driSwapBuffers(__DRIdrawable *dPriv) __DRIscreen *psp = dPriv->driScreenPriv; drm_clip_rect_t *rects; int i; - - if (!dPriv->numClipRects) - return; psp->DriverAPI.SwapBuffers(dPriv); + if (!dPriv->numClipRects) + return; + rects = _mesa_malloc(sizeof(*rects) * dPriv->numClipRects); if (!rects) @@ -430,7 +433,7 @@ driCreateNewDrawable(__DRIscreen *psp, const __DRIconfig *config, pdp->loaderPrivate = data; pdp->hHWDrawable = hwDrawable; - pdp->refcount = 0; + pdp->refcount = 1; pdp->pStamp = NULL; pdp->lastStamp = 0; pdp->index = 0; @@ -483,12 +486,19 @@ dri2CreateNewDrawable(__DRIscreen *screen, return pdraw; } - -static void -driDestroyDrawable(__DRIdrawable *pdp) +static void dri_get_drawable(__DRIdrawable *pdp) +{ + pdp->refcount++; +} + +static void dri_put_drawable(__DRIdrawable *pdp) { __DRIscreenPrivate *psp; + pdp->refcount--; + if (pdp->refcount) + return; + if (pdp) { psp = pdp->driScreenPriv; (*psp->DriverAPI.DestroyBuffer)(pdp); @@ -504,6 +514,12 @@ driDestroyDrawable(__DRIdrawable *pdp) } } +static void +driDestroyDrawable(__DRIdrawable *pdp) +{ + dri_put_drawable(pdp); +} + /*@}*/ diff --git a/src/mesa/drivers/dri/common/extension_helper.h b/src/mesa/drivers/dri/common/extension_helper.h index 8dcaaee3079..f5e35e41618 100644 --- a/src/mesa/drivers/dri/common/extension_helper.h +++ b/src/mesa/drivers/dri/common/extension_helper.h @@ -1763,6 +1763,13 @@ static const char DeleteFencesNV_names[] = ""; #endif +#if defined(need_GL_SGIX_polynomial_ffd) +static const char DeformationMap3dSGIX_names[] = + "iddiiddiiddiip\0" /* Parameter signature */ + "glDeformationMap3dSGIX\0" + ""; +#endif + #if defined(need_GL_VERSION_2_0) static const char IsShader_names[] = "i\0" /* Parameter signature */ @@ -4396,6 +4403,13 @@ static const char WindowPos3ivMESA_names[] = ""; #endif +#if defined(need_GL_ARB_copy_buffer) +static const char CopyBufferSubData_names[] = + "iiiii\0" /* Parameter signature */ + "glCopyBufferSubData\0" + ""; +#endif + #if defined(need_GL_VERSION_1_5) || defined(need_GL_ARB_vertex_buffer_object) static const char IsBufferARB_names[] = "i\0" /* Parameter signature */ @@ -4560,13 +4574,6 @@ static const char Minmax_names[] = ""; #endif -#if defined(need_GL_SGIX_polynomial_ffd) -static const char DeformationMap3dSGIX_names[] = - "iddiiddiiddiip\0" /* Parameter signature */ - "glDeformationMap3dSGIX\0" - ""; -#endif - #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_fog_coord) static const char FogCoorddvEXT_names[] = "p\0" /* Parameter signature */ @@ -4939,6 +4946,13 @@ static const struct dri_extension_function GL_APPLE_vertex_array_object_function }; #endif +#if defined(need_GL_ARB_copy_buffer) +static const struct dri_extension_function GL_ARB_copy_buffer_functions[] = { + { CopyBufferSubData_names, CopyBufferSubData_remap_index, -1 }, + { NULL, 0, 0 } +}; +#endif + #if defined(need_GL_ARB_draw_buffers) static const struct dri_extension_function GL_ARB_draw_buffers_functions[] = { { DrawBuffersARB_names, DrawBuffersARB_remap_index, -1 }, @@ -6055,9 +6069,9 @@ static const struct dri_extension_function GL_SGIX_pixel_texture_functions[] = { #if defined(need_GL_SGIX_polynomial_ffd) static const struct dri_extension_function GL_SGIX_polynomial_ffd_functions[] = { { LoadIdentityDeformationMapSGIX_names, LoadIdentityDeformationMapSGIX_remap_index, -1 }, + { DeformationMap3dSGIX_names, DeformationMap3dSGIX_remap_index, -1 }, { DeformSGIX_names, DeformSGIX_remap_index, -1 }, { DeformationMap3fSGIX_names, DeformationMap3fSGIX_remap_index, -1 }, - { DeformationMap3dSGIX_names, DeformationMap3dSGIX_remap_index, -1 }, { NULL, 0, 0 } }; #endif diff --git a/src/mesa/drivers/dri/common/spantmp2.h b/src/mesa/drivers/dri/common/spantmp2.h index f2868cb58a6..89c815722f6 100644 --- a/src/mesa/drivers/dri/common/spantmp2.h +++ b/src/mesa/drivers/dri/common/spantmp2.h @@ -82,6 +82,71 @@ rgba[3] = 0xff; \ } while (0) +#elif (SPANTMP_PIXEL_FMT == GL_BGRA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_4_4_4_4_REV) + +/** + ** GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV + **/ + +#ifndef GET_VALUE +#ifndef GET_PTR +#define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch) +#endif + +#define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y)) +#define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v) +#endif /* GET_VALUE */ + +#define INIT_MONO_PIXEL(p, color) \ + p = PACK_COLOR_4444(color[3], color[0], color[1], color[2]) + +#define WRITE_RGBA( _x, _y, r, g, b, a ) \ + PUT_VALUE(_x, _y, PACK_COLOR_4444(a, r, g, b)) \ + +#define WRITE_PIXEL( _x, _y, p ) PUT_VALUE(_x, _y, p) + +#define READ_RGBA( rgba, _x, _y ) \ + do { \ + GLushort p = GET_VALUE(_x, _y); \ + rgba[0] = ((p >> 8) & 0xf) * 0x11; \ + rgba[1] = ((p >> 4) & 0xf) * 0x11; \ + rgba[2] = ((p >> 0) & 0xf) * 0x11; \ + rgba[3] = ((p >> 12) & 0xf) * 0x11; \ + } while (0) + + +#elif (SPANTMP_PIXEL_FMT == GL_BGRA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_1_5_5_5_REV) + +/** + ** GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV + **/ + +#ifndef GET_VALUE +#ifndef GET_PTR +#define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch) +#endif + +#define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y)) +#define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v) +#endif /* GET_VALUE */ + +#define INIT_MONO_PIXEL(p, color) \ + p = PACK_COLOR_1555(color[3], color[0], color[1], color[2]) + +#define WRITE_RGBA( _x, _y, r, g, b, a ) \ + PUT_VALUE(_x, _y, PACK_COLOR_1555(a, r, g, b)) \ + +#define WRITE_PIXEL( _x, _y, p ) PUT_VALUE(_x, _y, p) + +#define READ_RGBA( rgba, _x, _y ) \ + do { \ + GLushort p = GET_VALUE(_x, _y); \ + rgba[0] = ((p >> 7) & 0xf8) * 255 / 0xf8; \ + rgba[1] = ((p >> 2) & 0xf8) * 255 / 0xf8; \ + rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8; \ + rgba[3] = ((p >> 15) & 0x1) * 0xff; \ + } while (0) + #elif (SPANTMP_PIXEL_FMT == GL_BGRA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV) /** diff --git a/src/mesa/drivers/dri/i915/Makefile b/src/mesa/drivers/dri/i915/Makefile index 954a7e2af1f..beaf9a4b129 100644 --- a/src/mesa/drivers/dri/i915/Makefile +++ b/src/mesa/drivers/dri/i915/Makefile @@ -11,7 +11,6 @@ DRIVER_SOURCES = \ i830_metaops.c \ i830_state.c \ i830_texblend.c \ - i830_tex.c \ i830_texstate.c \ i830_vtbl.c \ intel_render.c \ @@ -20,6 +19,7 @@ DRIVER_SOURCES = \ intel_batchbuffer.c \ intel_clear.c \ intel_extensions.c \ + intel_generatemipmap.c \ intel_mipmap_tree.c \ intel_tex_layout.c \ intel_tex_image.c \ @@ -36,7 +36,6 @@ DRIVER_SOURCES = \ intel_buffers.c \ intel_blit.c \ intel_swapbuffers.c \ - i915_tex.c \ i915_tex_layout.c \ i915_texstate.c \ i915_context.c \ diff --git a/src/mesa/drivers/dri/i915/i830_context.c b/src/mesa/drivers/dri/i915/i830_context.c index 9c540cb2bb7..840946f908d 100644 --- a/src/mesa/drivers/dri/i915/i830_context.c +++ b/src/mesa/drivers/dri/i915/i830_context.c @@ -47,7 +47,6 @@ i830InitDriverFunctions(struct dd_function_table *functions) { intelInitDriverFunctions(functions); i830InitStateFuncs(functions); - i830InitTextureFuncs(functions); } extern const struct tnl_pipeline_stage *intel_pipeline[]; @@ -99,6 +98,8 @@ i830CreateContext(const __GLcontextModes * mesaVis, ctx->Const.MaxTextureRectSize = (1 << 11); ctx->Const.MaxTextureUnits = I830_TEX_UNITS; + ctx->Const.MaxTextureMaxAnisotropy = 2.0; + ctx->Const.MaxDrawBuffers = 1; _tnl_init_vertices(ctx, ctx->Const.MaxArrayLockSize + 12, diff --git a/src/mesa/drivers/dri/i915/i830_reg.h b/src/mesa/drivers/dri/i915/i830_reg.h index d210c2d08e4..db16871001d 100644 --- a/src/mesa/drivers/dri/i915/i830_reg.h +++ b/src/mesa/drivers/dri/i915/i830_reg.h @@ -48,19 +48,6 @@ #define AA_LINE_ENABLE ((1<<1) | 1) #define AA_LINE_DISABLE (1<<1) -#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) -/* Dword 1 */ -#define BUF_3D_ID_COLOR_BACK (0x3<<24) -#define BUF_3D_ID_DEPTH (0x7<<24) -#define BUF_3D_USE_FENCE (1<<23) -#define BUF_3D_TILED_SURFACE (1<<22) -#define BUF_3D_TILE_WALK_X 0 -#define BUF_3D_TILE_WALK_Y (1<<21) -#define BUF_3D_PITCH(x) (((x)/4)<<2) -/* Dword 2 */ -#define BUF_3D_ADDR(x) ((x) & ~0x3) - - #define _3DSTATE_COLOR_FACTOR_CMD (CMD_3D | (0x1d<<24) | (0x1<<16)) #define _3DSTATE_COLOR_FACTOR_N_CMD(stage) (CMD_3D | (0x1d<<24) | \ diff --git a/src/mesa/drivers/dri/i915/i830_state.c b/src/mesa/drivers/dri/i915/i830_state.c index d9cad0c4bf8..8ef6c9144f1 100644 --- a/src/mesa/drivers/dri/i915/i830_state.c +++ b/src/mesa/drivers/dri/i915/i830_state.c @@ -39,6 +39,7 @@ #include "intel_screen.h" #include "intel_batchbuffer.h" #include "intel_fbo.h" +#include "intel_buffers.h" #include "i830_context.h" #include "i830_reg.h" @@ -446,6 +447,24 @@ i830DepthMask(GLcontext * ctx, GLboolean flag) i830->state.Ctx[I830_CTXREG_ENABLES_2] |= DISABLE_DEPTH_WRITE; } +/** Called from ctx->Driver.Viewport() */ +static void +i830Viewport(GLcontext * ctx, + GLint x, GLint y, GLsizei width, GLsizei height) +{ + intelCalcViewport(ctx); + + intel_viewport(ctx, x, y, width, height); +} + + +/** Called from ctx->Driver.DepthRange() */ +static void +i830DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval) +{ + intelCalcViewport(ctx); +} + /* ============================================================= * Polygon stipple * @@ -1064,6 +1083,8 @@ i830InitStateFuncs(struct dd_function_table *functions) functions->StencilFuncSeparate = i830StencilFuncSeparate; functions->StencilMaskSeparate = i830StencilMaskSeparate; functions->StencilOpSeparate = i830StencilOpSeparate; + functions->DepthRange = i830DepthRange; + functions->Viewport = i830Viewport; } void diff --git a/src/mesa/drivers/dri/i915/i830_tex.c b/src/mesa/drivers/dri/i915/i830_tex.c deleted file mode 100644 index 34ac42a78e0..00000000000 --- a/src/mesa/drivers/dri/i915/i830_tex.c +++ /dev/null @@ -1,100 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "main/glheader.h" -#include "main/mtypes.h" -#include "main/imports.h" -#include "main/simple_list.h" -#include "main/enums.h" -#include "main/image.h" -#include "main/mm.h" -#include "main/texstore.h" -#include "main/texformat.h" -#include "swrast/swrast.h" - -#include "texmem.h" - -#include "i830_context.h" -#include "i830_reg.h" - - - -static void -i830TexEnv(GLcontext * ctx, GLenum target, - GLenum pname, const GLfloat * param) -{ - - switch (pname) { - case GL_TEXTURE_ENV_COLOR: - case GL_TEXTURE_ENV_MODE: - case GL_COMBINE_RGB: - case GL_COMBINE_ALPHA: - case GL_SOURCE0_RGB: - case GL_SOURCE1_RGB: - case GL_SOURCE2_RGB: - case GL_SOURCE0_ALPHA: - case GL_SOURCE1_ALPHA: - case GL_SOURCE2_ALPHA: - case GL_OPERAND0_RGB: - case GL_OPERAND1_RGB: - case GL_OPERAND2_RGB: - case GL_OPERAND0_ALPHA: - case GL_OPERAND1_ALPHA: - case GL_OPERAND2_ALPHA: - case GL_RGB_SCALE: - case GL_ALPHA_SCALE: - break; - - case GL_TEXTURE_LOD_BIAS:{ - struct i830_context *i830 = i830_context(ctx); - GLuint unit = ctx->Texture.CurrentUnit; - int b = (int) ((*param) * 16.0); - if (b > 63) - b = 63; - if (b < -64) - b = -64; - I830_STATECHANGE(i830, I830_UPLOAD_TEX(unit)); - i830->lodbias_tm0s3[unit] = - ((b << TM0S3_LOD_BIAS_SHIFT) & TM0S3_LOD_BIAS_MASK); - break; - } - - default: - break; - } -} - - - - -void -i830InitTextureFuncs(struct dd_function_table *functions) -{ -/* - functions->TexEnv = i830TexEnv; -*/ -} diff --git a/src/mesa/drivers/dri/i915/i830_texstate.c b/src/mesa/drivers/dri/i915/i830_texstate.c index 753c25b57ed..6f998fa6f77 100644 --- a/src/mesa/drivers/dri/i915/i830_texstate.c +++ b/src/mesa/drivers/dri/i915/i830_texstate.c @@ -174,14 +174,16 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) state[I830_TEXREG_TM0LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_2 | (LOAD_TEXTURE_MAP0 << unit) | 4); -/* state[I830_TEXREG_TM0S0] = (TM0S0_USE_FENCE | */ -/* t->intel.TextureOffset); */ - - state[I830_TEXREG_TM0S1] = (((firstImage->Height - 1) << TM0S1_HEIGHT_SHIFT) | ((firstImage->Width - 1) << TM0S1_WIDTH_SHIFT) | format); + if (intelObj->mt->region->tiling != I915_TILING_NONE) { + state[I830_TEXREG_TM0S1] |= TM0S1_TILED_SURFACE; + if (intelObj->mt->region->tiling == I915_TILING_Y) + state[I830_TEXREG_TM0S1] |= TM0S1_TILE_WALK; + } + state[I830_TEXREG_TM0S2] = ((((pitch / 4) - 1) << TM0S2_PITCH_SHIFT) | TM0S2_CUBE_FACE_ENA_MASK); diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index 1a949210789..cbee9f9efe9 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -26,12 +26,14 @@ **************************************************************************/ #include "glapi/glapi.h" +#include "main/texformat.h" #include "i830_context.h" #include "i830_reg.h" #include "intel_batchbuffer.h" #include "intel_regions.h" #include "intel_tris.h" +#include "intel_fbo.h" #include "tnl/t_context.h" #include "tnl/t_vertex.h" @@ -550,7 +552,7 @@ i830_emit_state(struct intel_context *intel) if (state->tex_buffer[i]) { OUT_RELOC(state->tex_buffer[i], I915_GEM_DOMAIN_SAMPLER, 0, - state->tex_offset[i] | TM0S0_USE_FENCE); + state->tex_offset[i]); } else if (state == &i830->meta) { assert(i == 0); @@ -614,6 +616,8 @@ i830_state_draw_region(struct intel_context *intel, { struct i830_context *i830 = i830_context(&intel->ctx); GLcontext *ctx = &intel->ctx; + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); GLuint value; ASSERT(state == &i830->state || state == &i830->meta); @@ -630,34 +634,38 @@ i830_state_draw_region(struct intel_context *intel, /* * Set stride/cpp values */ - if (color_region) { - state->Buffer[I830_DESTREG_CBUFADDR0] = _3DSTATE_BUF_INFO_CMD; - state->Buffer[I830_DESTREG_CBUFADDR1] = - (BUF_3D_ID_COLOR_BACK | - BUF_3D_PITCH(color_region->pitch * color_region->cpp) | - BUF_3D_USE_FENCE); - } + i915_set_buf_info_for_region(&state->Buffer[I830_DESTREG_CBUFADDR0], + color_region, BUF_3D_ID_COLOR_BACK); - if (depth_region) { - state->Buffer[I830_DESTREG_DBUFADDR0] = _3DSTATE_BUF_INFO_CMD; - state->Buffer[I830_DESTREG_DBUFADDR1] = - (BUF_3D_ID_DEPTH | - BUF_3D_PITCH(depth_region->pitch * depth_region->cpp) | - BUF_3D_USE_FENCE); - } + i915_set_buf_info_for_region(&state->Buffer[I830_DESTREG_DBUFADDR0], + depth_region, BUF_3D_ID_DEPTH); /* * Compute/set I830_DESTREG_DV1 value */ value = (DSTORG_HORT_BIAS(0x8) | /* .5 */ DSTORG_VERT_BIAS(0x8) | DEPTH_IS_Z); /* .5 */ - - if (color_region && color_region->cpp == 4) { - value |= DV_PF_8888; - } - else { - value |= DV_PF_565; + + if (irb != NULL) { + switch (irb->texformat->MesaFormat) { + case MESA_FORMAT_ARGB8888: + value |= DV_PF_8888; + break; + case MESA_FORMAT_RGB565: + value |= DV_PF_565; + break; + case MESA_FORMAT_ARGB1555: + value |= DV_PF_1555; + break; + case MESA_FORMAT_ARGB4444: + value |= DV_PF_4444; + break; + default: + _mesa_problem(ctx, "Bad renderbuffer format: %d\n", + irb->texformat->MesaFormat); + } } + if (depth_region && depth_region->cpp == 4) { value |= DEPTH_FRMT_24_FIXED_8_OTHER; } @@ -700,26 +708,6 @@ i830_set_draw_region(struct intel_context *intel, i830_state_draw_region(intel, &i830->state, color_regions[0], depth_region); } -#if 0 -static void -i830_update_color_z_regions(intelContextPtr intel, - const intelRegion * colorRegion, - const intelRegion * depthRegion) -{ - i830ContextPtr i830 = I830_CONTEXT(intel); - - i830->state.Buffer[I830_DESTREG_CBUFADDR1] = - (BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(colorRegion->pitch) | - BUF_3D_USE_FENCE); - i830->state.Buffer[I830_DESTREG_CBUFADDR2] = colorRegion->offset; - - i830->state.Buffer[I830_DESTREG_DBUFADDR1] = - (BUF_3D_ID_DEPTH | BUF_3D_PITCH(depthRegion->pitch) | BUF_3D_USE_FENCE); - i830->state.Buffer[I830_DESTREG_DBUFADDR2] = depthRegion->offset; -} -#endif - - /* This isn't really handled at the moment. */ static void diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index 7549029a1be..367d2a3b648 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -73,7 +73,7 @@ i915InvalidateState(GLcontext * ctx, GLuint new_state) p->params_uptodate = 0; } - if (new_state & (_NEW_FOG | _NEW_HINT | _NEW_PROGRAM)) + if (new_state & (_NEW_FOG | _NEW_HINT | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)) i915_update_fog(ctx); } @@ -83,7 +83,6 @@ i915InitDriverFunctions(struct dd_function_table *functions) { intelInitDriverFunctions(functions); i915InitStateFunctions(functions); - i915InitTextureFuncs(functions); i915InitFragProgFuncs(functions); functions->UpdateState = i915InvalidateState; } @@ -146,6 +145,8 @@ i915CreateContext(const __GLcontextModes * mesaVis, ctx->Const.MaxTextureRectSize = (1 << 11); ctx->Const.MaxTextureUnits = I915_TEX_UNITS; + ctx->Const.MaxTextureMaxAnisotropy = 4.0; + /* GL_ARB_fragment_program limits - don't think Mesa actually * validates programs against these, and in any case one ARB * instruction can translate to more than one HW instruction, so diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 52f09a4b1b2..2db10c60e99 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -162,12 +162,12 @@ src_vector(struct i915_fragment_program *p, GET_SWZ(source->Swizzle, 1), GET_SWZ(source->Swizzle, 2), GET_SWZ(source->Swizzle, 3)); - if (source->NegateBase) + if (source->Negate) src = negate(src, - GET_BIT(source->NegateBase, 0), - GET_BIT(source->NegateBase, 1), - GET_BIT(source->NegateBase, 2), - GET_BIT(source->NegateBase, 3)); + GET_BIT(source->Negate, 0), + GET_BIT(source->Negate, 1), + GET_BIT(source->Negate, 2), + GET_BIT(source->Negate, 3)); return src; } @@ -323,7 +323,8 @@ upload_program(struct i915_fragment_program *p) p->ctx->FragmentProgram._Current; const struct prog_instruction *inst = program->Base.Instructions; -/* _mesa_debug_fp_inst(program->Base.NumInstructions, inst); */ + if (INTEL_DEBUG & DEBUG_WM) + _mesa_print_program(&program->Base); /* Is this a parse-failed program? Ensure a valid program is * loaded, as the flagging of an error isn't sufficient to stop @@ -1049,9 +1050,6 @@ i915ProgramStringNotify(GLcontext * ctx, _mesa_append_fog_code(ctx, &p->FragProg); p->FragProg.FogOption = GL_NONE; } - - if (INTEL_DEBUG & DEBUG_STATE) - _mesa_print_program(prog); } _tnl_program_string(ctx, target, prog); diff --git a/src/mesa/drivers/dri/i915/i915_reg.h b/src/mesa/drivers/dri/i915/i915_reg.h index 8891e11c6fd..84db58ea950 100644 --- a/src/mesa/drivers/dri/i915/i915_reg.h +++ b/src/mesa/drivers/dri/i915/i915_reg.h @@ -93,20 +93,6 @@ /* 3DSTATE_BIN_CONTROL p141 */ -/* p143 */ -#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) -/* Dword 1 */ -#define BUF_3D_ID_COLOR_BACK (0x3<<24) -#define BUF_3D_ID_DEPTH (0x7<<24) -#define BUF_3D_USE_FENCE (1<<23) -#define BUF_3D_TILED_SURFACE (1<<22) -#define BUF_3D_TILE_WALK_X 0 -#define BUF_3D_TILE_WALK_Y (1<<21) -#define BUF_3D_PITCH(x) (((x)/4)<<2) -/* Dword 2 */ -#define BUF_3D_ADDR(x) ((x) & ~0x3) - - /* 3DSTATE_CHROMA_KEY */ /* 3DSTATE_CLEAR_PARAMETERS, p150 */ @@ -155,6 +141,7 @@ /* p161 */ #define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16)) /* Dword 1 */ +#define CLASSIC_EARLY_DEPTH (1<<31) #define TEX_DEFAULT_COLOR_OGL (0<<30) #define TEX_DEFAULT_COLOR_D3D (1<<30) #define ZR_EARLY_DEPTH (1<<29) diff --git a/src/mesa/drivers/dri/i915/i915_tex.c b/src/mesa/drivers/dri/i915/i915_tex.c deleted file mode 100644 index e38d8fe79d1..00000000000 --- a/src/mesa/drivers/dri/i915/i915_tex.c +++ /dev/null @@ -1,78 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "main/glheader.h" -#include "main/mtypes.h" -#include "main/imports.h" -#include "main/simple_list.h" -#include "main/enums.h" -#include "main/image.h" -#include "main/mm.h" -#include "main/texstore.h" -#include "main/texformat.h" -#include "swrast/swrast.h" - -#include "texmem.h" - -#include "i915_context.h" -#include "i915_reg.h" - - - -static void -i915TexEnv(GLcontext * ctx, GLenum target, - GLenum pname, const GLfloat * param) -{ - struct i915_context *i915 = I915_CONTEXT(ctx); - - switch (pname) { - case GL_TEXTURE_LOD_BIAS:{ - GLuint unit = ctx->Texture.CurrentUnit; - GLint b = (int) ((*param) * 16.0); - if (b > 255) - b = 255; - if (b < -256) - b = -256; - I915_STATECHANGE(i915, I915_UPLOAD_TEX(unit)); - i915->lodbias_ss2[unit] = - ((b << SS2_LOD_BIAS_SHIFT) & SS2_LOD_BIAS_MASK); - break; - } - - default: - break; - } -} - - -void -i915InitTextureFuncs(struct dd_function_table *functions) -{ -/* - functions->TexEnv = i915TexEnv; -*/ -} diff --git a/src/mesa/drivers/dri/i915/i915_tex_layout.c b/src/mesa/drivers/dri/i915/i915_tex_layout.c index d44a2f47b37..d9588e5b56d 100644 --- a/src/mesa/drivers/dri/i915/i915_tex_layout.c +++ b/src/mesa/drivers/dri/i915/i915_tex_layout.c @@ -55,6 +55,17 @@ static GLint step_offsets[6][2] = { [FACE_NEG_Z] = {-1, 1}, }; + +static GLint bottom_offsets[6] = { + [FACE_POS_X] = 16 + 0 * 8, + [FACE_POS_Y] = 16 + 1 * 8, + [FACE_POS_Z] = 16 + 2 * 8, + [FACE_NEG_X] = 16 + 3 * 8, + [FACE_NEG_Y] = 16 + 4 * 8, + [FACE_NEG_Z] = 16 + 5 * 8, +}; + + /** * Cube texture map layout for i830M-GM915. * @@ -101,7 +112,8 @@ static GLint step_offsets[6][2] = { */ static void i915_miptree_layout_cube(struct intel_context *intel, - struct intel_mipmap_tree * mt) + struct intel_mipmap_tree * mt, + uint32_t tiling) { const GLuint dim = mt->width0; GLuint face; @@ -111,7 +123,7 @@ i915_miptree_layout_cube(struct intel_context *intel, assert(lvlWidth == lvlHeight); /* cubemap images are square */ /* double pitch for cube layouts */ - mt->pitch = intel_miptree_pitch_align (intel, mt, dim * 2); + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, dim * 2); mt->total_height = dim * 4; for (level = mt->first_level; level <= mt->last_level; level++) { @@ -145,7 +157,8 @@ i915_miptree_layout_cube(struct intel_context *intel, static void i915_miptree_layout_3d(struct intel_context *intel, - struct intel_mipmap_tree * mt) + struct intel_mipmap_tree * mt, + uint32_t tiling) { GLuint width = mt->width0; GLuint height = mt->height0; @@ -154,7 +167,7 @@ i915_miptree_layout_3d(struct intel_context *intel, GLint level; /* Calculate the size of a single slice. */ - mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0); + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0); /* XXX: hardware expects/requires 9 levels at minimum. */ for (level = mt->first_level; level <= MAX2(8, mt->last_level); level++) { @@ -189,14 +202,15 @@ i915_miptree_layout_3d(struct intel_context *intel, static void i915_miptree_layout_2d(struct intel_context *intel, - struct intel_mipmap_tree * mt) + struct intel_mipmap_tree * mt, + uint32_t tiling) { GLuint width = mt->width0; GLuint height = mt->height0; GLuint img_height; GLint level; - mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0); + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0); mt->total_height = 0; for (level = mt->first_level; level <= mt->last_level; level++) { @@ -217,19 +231,20 @@ i915_miptree_layout_2d(struct intel_context *intel, } GLboolean -i915_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt) +i915_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt, + uint32_t tiling) { switch (mt->target) { case GL_TEXTURE_CUBE_MAP: - i915_miptree_layout_cube(intel, mt); + i915_miptree_layout_cube(intel, mt, tiling); break; case GL_TEXTURE_3D: - i915_miptree_layout_3d(intel, mt); + i915_miptree_layout_3d(intel, mt, tiling); break; case GL_TEXTURE_1D: case GL_TEXTURE_2D: case GL_TEXTURE_RECTANGLE_ARB: - i915_miptree_layout_2d(intel, mt); + i915_miptree_layout_2d(intel, mt, tiling); break; default: _mesa_problem(NULL, "Unexpected tex target in i915_miptree_layout()"); @@ -297,7 +312,7 @@ i915_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt) * +---+ +---+ +---+ +---+ +---+ +---+ * * The bottom row continues with the remaining 2x2 then the 1x1 mip contents - * in order, with each of them aligned to a 4x4 block boundary. Thus, for + * in order, with each of them aligned to a 8x8 block boundary. Thus, for * 32x32 cube maps and smaller, the bottom row layout is going to dictate the * pitch of the tree. For a tree with 4x4 images, the pitch is at least * 14 * 8 = 112 texels, for 2x2 it is at least 12 * 8 texels, and for 1x1 @@ -306,7 +321,8 @@ i915_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt) static void i945_miptree_layout_cube(struct intel_context *intel, - struct intel_mipmap_tree * mt) + struct intel_mipmap_tree * mt, + uint32_t tiling) { const GLuint dim = mt->width0; GLuint face; @@ -320,9 +336,9 @@ i945_miptree_layout_cube(struct intel_context *intel, * or the final row of 4x4, 2x2 and 1x1 faces below this. */ if (dim > 32) - mt->pitch = intel_miptree_pitch_align (intel, mt, dim * 2); + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, dim * 2); else - mt->pitch = intel_miptree_pitch_align (intel, mt, 14 * 8); + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, 14 * 8); if (dim >= 4) mt->total_height = dim * 4 + 4; @@ -375,10 +391,11 @@ i945_miptree_layout_cube(struct intel_context *intel, x = (face - 4) * 8; break; } + break; case 2: y = mt->total_height - 4; - x = 16 + face * 8; + x = bottom_offsets[face]; break; case 1: @@ -396,7 +413,8 @@ i945_miptree_layout_cube(struct intel_context *intel, static void i945_miptree_layout_3d(struct intel_context *intel, - struct intel_mipmap_tree * mt) + struct intel_mipmap_tree * mt, + uint32_t tiling) { GLuint width = mt->width0; GLuint height = mt->height0; @@ -405,7 +423,7 @@ i945_miptree_layout_3d(struct intel_context *intel, GLuint pack_y_pitch; GLuint level; - mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0); + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0); mt->total_height = 0; pack_y_pitch = MAX2(mt->height0, 2); @@ -450,19 +468,23 @@ i945_miptree_layout_3d(struct intel_context *intel, } GLboolean -i945_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt) +i945_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt, + uint32_t tiling) { switch (mt->target) { case GL_TEXTURE_CUBE_MAP: - i945_miptree_layout_cube(intel, mt); + if (mt->compressed) + i945_miptree_layout_cube(intel, mt, tiling); + else + i915_miptree_layout_cube(intel, mt, tiling); break; case GL_TEXTURE_3D: - i945_miptree_layout_3d(intel, mt); + i945_miptree_layout_3d(intel, mt, tiling); break; case GL_TEXTURE_1D: case GL_TEXTURE_2D: case GL_TEXTURE_RECTANGLE_ARB: - i945_miptree_layout_2d(intel, mt); + i945_miptree_layout_2d(intel, mt, tiling); break; default: _mesa_problem(NULL, "Unexpected tex target in i945_miptree_layout()"); diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c index 43f65392b56..32d4b30cf9a 100644 --- a/src/mesa/drivers/dri/i915/i915_texstate.c +++ b/src/mesa/drivers/dri/i915/i915_texstate.c @@ -132,7 +132,7 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) struct intel_texture_object *intelObj = intel_texture_object(tObj); struct gl_texture_image *firstImage; GLuint *state = i915->state.Tex[unit], format, pitch; - GLint lodbias; + GLint lodbias, aniso = 0; GLubyte border[4]; memset(state, 0, sizeof(state)); @@ -185,8 +185,13 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) state[I915_TEXREG_MS3] = (((firstImage->Height - 1) << MS3_HEIGHT_SHIFT) | - ((firstImage->Width - 1) << MS3_WIDTH_SHIFT) | format | - MS3_USE_FENCE_REGS); + ((firstImage->Width - 1) << MS3_WIDTH_SHIFT) | format); + + if (intelObj->mt->region->tiling != I915_TILING_NONE) { + state[I915_TEXREG_MS3] |= MS3_TILED_SURFACE; + if (intelObj->mt->region->tiling == I915_TILING_Y) + state[I915_TEXREG_MS3] |= MS3_TILE_WALK; + } state[I915_TEXREG_MS4] = ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) | MS4_CUBE_FACE_ENA_MASK | @@ -230,6 +235,10 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) if (tObj->MaxAnisotropy > 1.0) { minFilt = FILTER_ANISOTROPIC; magFilt = FILTER_ANISOTROPIC; + if (tObj->MaxAnisotropy > 2.0) + aniso = SS2_MAX_ANISO_4; + else + aniso = SS2_MAX_ANISO_2; } else { switch (tObj->MagFilter) { @@ -275,7 +284,8 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) state[I915_TEXREG_SS2] |= ((minFilt << SS2_MIN_FILTER_SHIFT) | (mipFilt << SS2_MIP_FILTER_SHIFT) | - (magFilt << SS2_MAG_FILTER_SHIFT)); + (magFilt << SS2_MAG_FILTER_SHIFT) | + aniso); } { diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 3f6d282d342..2fca247af19 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -32,6 +32,7 @@ #include "main/imports.h" #include "main/macros.h" #include "main/colormac.h" +#include "main/texformat.h" #include "tnl/t_context.h" #include "tnl/t_vertex.h" @@ -40,6 +41,8 @@ #include "intel_tex.h" #include "intel_regions.h" #include "intel_tris.h" +#include "intel_fbo.h" +#include "intel_chipset.h" #include "i915_reg.h" #include "i915_context.h" @@ -527,6 +530,23 @@ i915_destroy_context(struct intel_context *intel) _tnl_free_vertices(&intel->ctx); } +void +i915_set_buf_info_for_region(uint32_t *state, struct intel_region *region, + uint32_t buffer_id) +{ + state[0] = _3DSTATE_BUF_INFO_CMD; + state[1] = buffer_id; + + if (region != NULL) { + state[1] |= BUF_3D_PITCH(region->pitch * region->cpp); + + if (region->tiling != I915_TILING_NONE) { + state[1] |= BUF_3D_TILED_SURFACE; + if (region->tiling == I915_TILING_Y) + state[1] |= BUF_3D_TILE_WALK_Y; + } + } +} /** * Set the drawing regions for the color and depth/stencil buffers. @@ -542,6 +562,8 @@ i915_state_draw_region(struct intel_context *intel, { struct i915_context *i915 = i915_context(&intel->ctx); GLcontext *ctx = &intel->ctx; + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); GLuint value; ASSERT(state == &i915->state || state == &i915->meta); @@ -558,21 +580,11 @@ i915_state_draw_region(struct intel_context *intel, /* * Set stride/cpp values */ - if (color_region) { - state->Buffer[I915_DESTREG_CBUFADDR0] = _3DSTATE_BUF_INFO_CMD; - state->Buffer[I915_DESTREG_CBUFADDR1] = - (BUF_3D_ID_COLOR_BACK | - BUF_3D_PITCH(color_region->pitch * color_region->cpp) | - BUF_3D_USE_FENCE); - } + i915_set_buf_info_for_region(&state->Buffer[I915_DESTREG_CBUFADDR0], + color_region, BUF_3D_ID_COLOR_BACK); - if (depth_region) { - state->Buffer[I915_DESTREG_DBUFADDR0] = _3DSTATE_BUF_INFO_CMD; - state->Buffer[I915_DESTREG_DBUFADDR1] = - (BUF_3D_ID_DEPTH | - BUF_3D_PITCH(depth_region->pitch * depth_region->cpp) | - BUF_3D_USE_FENCE); - } + i915_set_buf_info_for_region(&state->Buffer[I915_DESTREG_DBUFADDR0], + depth_region, BUF_3D_ID_DEPTH); /* * Compute/set I915_DESTREG_DV1 value @@ -580,12 +592,34 @@ i915_state_draw_region(struct intel_context *intel, value = (DSTORG_HORT_BIAS(0x8) | /* .5 */ DSTORG_VERT_BIAS(0x8) | /* .5 */ LOD_PRECLAMP_OGL | TEX_DEFAULT_COLOR_OGL); - if (color_region && color_region->cpp == 4) { - value |= DV_PF_8888; - } - else { - value |= (DITHER_FULL_ALWAYS | DV_PF_565); + if (irb != NULL) { + switch (irb->texformat->MesaFormat) { + case MESA_FORMAT_ARGB8888: + value |= DV_PF_8888; + break; + case MESA_FORMAT_RGB565: + value |= DV_PF_565 | DITHER_FULL_ALWAYS; + break; + case MESA_FORMAT_ARGB1555: + value |= DV_PF_1555 | DITHER_FULL_ALWAYS; + break; + case MESA_FORMAT_ARGB4444: + value |= DV_PF_4444 | DITHER_FULL_ALWAYS; + break; + default: + _mesa_problem(ctx, "Bad renderbuffer format: %d\n", + irb->texformat->MesaFormat); + } } + + /* This isn't quite safe, thus being hidden behind an option. When changing + * the value of this bit, the pipeline needs to be MI_FLUSHed. And it + * can only be set when a depth buffer is already defined. + */ + if (IS_945(intel->intelScreen->deviceID) && intel->use_early_z && + depth_region->tiling != I915_TILING_NONE) + value |= CLASSIC_EARLY_DEPTH; + if (depth_region && depth_region->cpp == 4) { value |= DEPTH_FRMT_24_FIXED_8_OTHER; } diff --git a/src/mesa/drivers/dri/i915/intel_generatemipmap.c b/src/mesa/drivers/dri/i915/intel_generatemipmap.c new file mode 120000 index 00000000000..4c6b37ada01 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_generatemipmap.c @@ -0,0 +1 @@ +../intel/intel_generatemipmap.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 2934414d99a..9712c387254 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -14,6 +14,7 @@ DRIVER_SOURCES = \ intel_decode.c \ intel_extensions.c \ intel_fbo.c \ + intel_generatemipmap.c \ intel_mipmap_tree.c \ intel_regions.c \ intel_screen.c \ @@ -69,6 +70,7 @@ DRIVER_SOURCES = \ brw_vs_constval.c \ brw_vs_emit.c \ brw_vs_state.c \ + brw_vs_surface_state.c \ brw_vtbl.c \ brw_wm.c \ brw_wm_debug.c \ diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index d96ff293102..4dbe551d832 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -118,6 +118,8 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, ctx->Const.MaxCubeTextureLevels = 12; ctx->Const.MaxTextureRectSize = (1<<12); + ctx->Const.MaxTextureMaxAnisotropy = 16.0; + /* if conformance mode is set, swrast can handle any size AA point */ ctx->Const.MaxPointSizeAA = 255.0; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 01e07c967fa..873fc8ffff6 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -131,6 +131,7 @@ struct brw_context; #define BRW_NEW_WM_INPUT_DIMENSIONS 0x100 #define BRW_NEW_INPUT_VARYING 0x200 #define BRW_NEW_PSP 0x800 +#define BRW_NEW_WM_SURFACES 0x1000 #define BRW_NEW_FENCE 0x2000 #define BRW_NEW_INDICES 0x4000 #define BRW_NEW_VERTICES 0x8000 @@ -141,7 +142,8 @@ struct brw_context; #define BRW_NEW_BATCH 0x10000 /** brw->depth_region updated */ #define BRW_NEW_DEPTH_BUFFER 0x20000 -#define BRW_NEW_NR_SURFACES 0x40000 +#define BRW_NEW_NR_WM_SURFACES 0x40000 +#define BRW_NEW_NR_VS_SURFACES 0x80000 struct brw_state_flags { /** State update flags signalled by mesa internals */ @@ -159,6 +161,8 @@ struct brw_state_flags { struct brw_vertex_program { struct gl_vertex_program program; GLuint id; + dri_bo *const_buffer; /** Program constant buffer/surface */ + GLboolean use_const_buffer; }; @@ -168,8 +172,8 @@ struct brw_fragment_program { GLuint id; /**< serial no. to identify frag progs, never re-used */ GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */ - /** Program constant buffer/surface */ - dri_bo *const_buffer; + dri_bo *const_buffer; /** Program constant buffer/surface */ + GLboolean use_const_buffer; }; @@ -186,7 +190,7 @@ struct brw_wm_prog_data { GLuint total_grf; GLuint total_scratch; - GLuint nr_params; + GLuint nr_params; /**< number of float params/constants */ GLboolean error; /* Pointer to tracked values (only valid once @@ -225,6 +229,7 @@ struct brw_vs_prog_data { GLuint urb_read_length; GLuint total_grf; GLuint outputs_written; + GLuint nr_params; /**< number of float params/constants */ GLuint inputs_read; @@ -241,16 +246,38 @@ struct brw_vs_ouput_sizes { }; +/** Number of general purpose registers (VS, WM, etc) */ +#define BRW_MAX_GRF 128 + /** Number of texture sampler units */ #define BRW_MAX_TEX_UNIT 16 /** - * Size of our surface binding table. + * Size of our surface binding table for the WM. * This contains pointers to the drawing surfaces and current texture - * objects and shader constant buffer (+1). + * objects and shader constant buffers (+2). */ #define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1) +/** + * Helpers to convert drawing buffers, textures and constant buffers + * to surface binding table indexes, for WM. + */ +#define SURF_INDEX_DRAW(d) (d) +#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS) +#define SURF_INDEX_TEXTURE(t) (MAX_DRAW_BUFFERS + 1 + (t)) + +/** + * Size of surface binding table for the VS. + * Only one constant buffer for now. + */ +#define BRW_VS_MAX_SURF 1 + +/** + * Only a VS constant buffer + */ +#define SURF_INDEX_VERT_CONST_BUFFER 0 + enum brw_cache_id { BRW_CC_VP, @@ -427,8 +454,6 @@ struct brw_context struct { struct brw_state_flags dirty; - struct brw_tracked_state **atoms; - GLuint nr_atoms; GLuint nr_color_regions; struct intel_region *color_regions[MAX_DRAW_BUFFERS]; @@ -448,7 +473,8 @@ struct brw_context int validated_bo_count; } state; - struct brw_cache cache; + struct brw_cache cache; /** non-surface items */ + struct brw_cache surface_cache; /* surface items */ struct brw_cached_batch_item *cached_batch_items; struct { @@ -532,11 +558,6 @@ struct brw_context GLuint vs_size; GLuint total_size; - /* Dynamic tracker which changes to reflect the state referenced - * by active fp and vp program parameters: - */ - struct brw_tracked_state tracked_state; - dri_bo *curbe_bo; /** Offset within curbe_bo of space for current curbe entry */ GLuint curbe_offset; @@ -557,6 +578,11 @@ struct brw_context dri_bo *prog_bo; dri_bo *state_bo; + + /** Binding table of pointers to surf_bo entries */ + dri_bo *bind_bo; + dri_bo *surf_bo[BRW_VS_MAX_SURF]; + GLuint nr_surfaces; } vs; struct { diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index a6bfb7507e7..a1a6c53d0e0 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -36,6 +36,7 @@ #include "main/macros.h" #include "main/enums.h" #include "shader/prog_parameter.h" +#include "shader/prog_print.h" #include "shader/prog_statevars.h" #include "intel_batchbuffer.h" #include "intel_regions.h" @@ -45,17 +46,21 @@ #include "brw_util.h" -/* Partition the CURBE between the various users of constant values: +/** + * Partition the CURBE between the various users of constant values: + * Note that vertex and fragment shaders can now fetch constants out + * of constant buffers. We no longer allocatea block of the GRF for + * constants. That greatly reduces the demand for space in the CURBE. + * Some of the comments within are dated... */ static void calculate_curbe_offsets( struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; /* CACHE_NEW_WM_PROG */ - GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; + const GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; /* BRW_NEW_VERTEX_PROGRAM */ - const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); - GLuint nr_vp_regs = (vp->program.Base.Parameters->NumParameters * 4 + 15) / 16; + const GLuint nr_vp_regs = (brw->vs.prog_data->nr_params + 15) / 16; GLuint nr_clip_regs = 0; GLuint total_regs; @@ -184,13 +189,6 @@ static void prepare_constant_buffer(struct brw_context *brw) GLfloat *buf; GLuint i; - /* Update our own dependency flags. This works because this - * function will also be called whenever fp or vp changes. - */ - brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION); - brw->curbe.tracked_state.dirty.mesa |= vp->program.Base.Parameters->StateFlags; - brw->curbe.tracked_state.dirty.mesa |= fp->program.Base.Parameters->StateFlags; - if (sz == 0) { if (brw->curbe.last_buf) { free(brw->curbe.last_buf); @@ -248,7 +246,7 @@ static void prepare_constant_buffer(struct brw_context *brw) /* vertex shader constants */ if (brw->curbe.vs_size) { GLuint offset = brw->curbe.vs_start * 16; - GLuint nr = vp->program.Base.Parameters->NumParameters; + GLuint nr = brw->vs.prog_data->nr_params / 4; _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); @@ -331,40 +329,11 @@ static void prepare_constant_buffer(struct brw_context *brw) */ } - -/** - * Vertex/fragment shader constants are stored in a pseudo 1D texture. - * This function updates the constants in that buffer. - */ -static void -update_texture_constant_buffer(struct brw_context *brw) -{ - struct brw_fragment_program *fp = - (struct brw_fragment_program *) brw->fragment_program; - const struct gl_program_parameter_list *params = fp->program.Base.Parameters; - const int size = params->NumParameters * 4 * sizeof(GLfloat); - - assert(fp->const_buffer); - assert(fp->const_buffer->size >= size); - - /* copy constants into the buffer */ - if (size > 0) { - GLubyte *map; - dri_bo_map(fp->const_buffer, GL_TRUE); - map = fp->const_buffer->virtual; - memcpy(map, params->ParameterValues, size); - dri_bo_unmap(fp->const_buffer); - } -} - - static void emit_constant_buffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; GLuint sz = brw->curbe.total_size; - update_texture_constant_buffer(brw); - BEGIN_BATCH(2, IGNORE_CLIPRECTS); if (sz == 0) { OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2)); @@ -386,7 +355,7 @@ static void emit_constant_buffer(struct brw_context *brw) */ const struct brw_tracked_state brw_constant_buffer = { .dirty = { - .mesa = (_NEW_TRANSFORM|_NEW_PROJECTION), /* plus fp and vp flags */ + .mesa = _NEW_PROGRAM_CONSTANTS, .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_VERTEX_PROGRAM | BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index b91b20bec6f..1b8bcc14ec0 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -343,7 +343,7 @@ static void brw_prepare_vertices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); - GLuint tmp = brw->vs.prog_data->inputs_read; + GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; GLuint i; const unsigned char *ptr = NULL; GLuint interleave = 0; @@ -362,11 +362,11 @@ static void brw_prepare_vertices(struct brw_context *brw) _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); /* Accumulate the list of enabled arrays. */ - while (tmp) { - GLuint i = _mesa_ffsll(tmp)-1; + while (vs_inputs) { + GLuint i = _mesa_ffsll(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[i]; - tmp &= ~(1<<i); + vs_inputs &= ~(1 << i); enabled[nr_enabled++] = input; } @@ -477,17 +477,17 @@ static void brw_emit_vertices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); - GLuint tmp = brw->vs.prog_data->inputs_read; + GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; GLuint i; GLuint nr_enabled = 0; /* Accumulate the list of enabled arrays. */ - while (tmp) { - i = _mesa_ffsll(tmp)-1; + while (vs_inputs) { + i = _mesa_ffsll(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[i]; - tmp &= ~(1<<i); + vs_inputs &= ~(1 << i); enabled[nr_enabled++] = input; } diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index d05f2e6c410..bc7756ceab4 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -97,7 +97,7 @@ struct brw_glsl_call; #define BRW_EU_MAX_INSN_STACK 5 -#define BRW_EU_MAX_INSN 4000 +#define BRW_EU_MAX_INSN 10000 struct brw_compile { struct brw_instruction store[BRW_EU_MAX_INSN]; @@ -862,9 +862,17 @@ void brw_dp_READ_4( struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, GLboolean relAddr, - GLuint scratch_offset, + GLuint location, GLuint bind_table_index ); +void brw_dp_READ_4_vs( struct brw_compile *p, + struct brw_reg dest, + GLuint oword, + GLboolean relAddr, + struct brw_reg addrReg, + GLuint location, + GLuint bind_table_index ); + void brw_dp_WRITE_16( struct brw_compile *p, struct brw_reg src, GLuint msg_reg_nr, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 21ce8369db4..60ea44f7a96 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -952,7 +952,7 @@ void brw_dp_READ_16( struct brw_compile *p, /** * Read a float[4] vector from the data port Data Cache (const buffer). - * Scratch offset should be a multiple of 16. + * Location (in buffer) should be a multiple of 16. * Used for fetching shader constants. * If relAddr is true, we'll do an indirect fetch using the address register. */ @@ -960,7 +960,7 @@ void brw_dp_READ_4( struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, GLboolean relAddr, - GLuint scratch_offset, + GLuint location, GLuint bind_table_index ) { { @@ -969,18 +969,20 @@ void brw_dp_READ_4( struct brw_compile *p, brw_set_mask_control(p, BRW_MASK_DISABLE); /* set message header global offset field (reg 0, element 2) */ + /* Note that grf[0] will be copied to mrf[1] implicitly by the SEND instr */ brw_MOV(p, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD), - brw_imm_d(scratch_offset)); + brw_imm_d(location)); brw_pop_insn_state(p); } { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - insn->header.predicate_control = 0; /* XXX */ + insn->header.predicate_control = BRW_PREDICATE_NONE; insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.mask_control = BRW_MASK_DISABLE; /* cast dest to a uword[8] vector */ dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); @@ -989,7 +991,7 @@ void brw_dp_READ_4( struct brw_compile *p, brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); brw_set_dp_read_message(insn, - bind_table_index, /* binding table index (255=stateless) */ + bind_table_index, 0, /* msg_control (0 means 1 Oword) */ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 0, /* source cache = data cache */ @@ -1000,6 +1002,78 @@ void brw_dp_READ_4( struct brw_compile *p, } +/** + * Read float[4] constant(s) from VS constant buffer. + * For relative addressing, two float[4] constants will be read into 'dest'. + * Otherwise, one float[4] constant will be read into the lower half of 'dest'. + */ +void brw_dp_READ_4_vs(struct brw_compile *p, + struct brw_reg dest, + GLuint oword, + GLboolean relAddr, + struct brw_reg addrReg, + GLuint location, + GLuint bind_table_index) +{ + GLuint msg_reg_nr = 1; + + assert(oword < 2); + /* + printf("vs const read msg, location %u, msg_reg_nr %d\n", + location, msg_reg_nr); + */ + + /* Setup MRF[1] with location/offset into const buffer */ + { + struct brw_reg b; + + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + /*brw_set_access_mode(p, BRW_ALIGN_16);*/ + + /* XXX I think we're setting all the dwords of MRF[1] to 'location'. + * when the docs say only dword[2] should be set. Hmmm. But it works. + */ + b = brw_message_reg(msg_reg_nr); + b = retype(b, BRW_REGISTER_TYPE_UD); + /*b = get_element_ud(b, 2);*/ + if (relAddr) { + brw_ADD(p, b, addrReg, brw_imm_ud(location)); + } + else { + brw_MOV(p, b, brw_imm_ud(location)); + } + + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = BRW_PREDICATE_NONE; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.mask_control = BRW_MASK_DISABLE; + /*insn->header.access_mode = BRW_ALIGN_16;*/ + + brw_set_dest(insn, dest); + brw_set_src0(insn, brw_null_reg()); + + brw_set_dp_read_message(insn, + bind_table_index, + oword, /* 0 = lower Oword, 1 = upper Oword */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 0, /* source cache = data cache */ + 1, /* msg_length */ + 1, /* response_length (1 Oword) */ + 0); /* eot */ + } +} + + + void brw_fb_WRITE(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 5c94a49f60a..4784254bc7d 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -101,6 +101,7 @@ const struct brw_tracked_state brw_drawing_rect = { static void prepare_binding_table_pointers(struct brw_context *brw) { + brw_add_validated_bo(brw, brw->vs.bind_bo); brw_add_validated_bo(brw, brw->wm.bind_bo); } @@ -117,13 +118,14 @@ static void upload_binding_table_pointers(struct brw_context *brw) BEGIN_BATCH(6, IGNORE_CLIPRECTS); OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); - OUT_BATCH(0); /* vs */ + if (brw->vs.bind_bo != NULL) + OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ + else + OUT_BATCH(0); OUT_BATCH(0); /* gs */ OUT_BATCH(0); /* clip */ OUT_BATCH(0); /* sf */ - OUT_RELOC(brw->wm.bind_bo, - I915_GEM_DOMAIN_SAMPLER, 0, - 0); + OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */ ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 457bc2fc7f4..bac69187c19 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -95,6 +95,12 @@ static struct gl_program *brwNewProgram( GLcontext *ctx, static void brwDeleteProgram( GLcontext *ctx, struct gl_program *prog ) { + if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) { + struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; + struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog); + dri_bo_unreference(brw_fprog->const_buffer); + } + _mesa_delete_program( ctx, prog ); } @@ -111,7 +117,6 @@ static void brwProgramStringNotify( GLcontext *ctx, struct gl_program *prog ) { struct brw_context *brw = brw_context(ctx); - struct intel_context *intel = &brw->intel; if (target == GL_FRAGMENT_PROGRAM_ARB) { struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; @@ -128,24 +133,6 @@ static void brwProgramStringNotify( GLcontext *ctx, brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; newFP->id = brw->program_id++; newFP->isGLSL = brw_wm_is_glsl(fprog); - - /* alloc constant buffer/surface */ - { - const struct gl_program_parameter_list *params = prog->Parameters; - const int size = params->NumParameters * 4 * sizeof(GLfloat); - - /* free old const buffer if too small */ - if (newFP->const_buffer && newFP->const_buffer->size < size) { - dri_bo_unreference(newFP->const_buffer); - newFP->const_buffer = NULL; - } - - if (!newFP->const_buffer) { - newFP->const_buffer = drm_intel_bo_alloc(intel->bufmgr, - "fp_const_buffer", - size, 64); - } - } } else if (target == GL_VERTEX_PROGRAM_ARB) { struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog; diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index fc4eddda0a5..c99918724b3 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -66,7 +66,9 @@ static void upload_sf_vp(struct brw_context *brw) sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias; sfv.viewport.m32 = v[MAT_TZ] * depth_scale; - /* _NEW_SCISSOR */ + /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT + * for DrawBuffer->_[XY]{min,max} + */ /* The scissor only needs to handle the intersection of drawable and * scissor rect. Clipping to the boundaries of static shared buffers @@ -97,7 +99,8 @@ static void upload_sf_vp(struct brw_context *brw) const struct brw_tracked_state brw_sf_vp = { .dirty = { .mesa = (_NEW_VIEWPORT | - _NEW_SCISSOR), + _NEW_SCISSOR | + _NEW_BUFFERS), .brw = 0, .cache = 0 }, @@ -147,7 +150,7 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key) key->line_smooth = ctx->Line.SmoothFlag; key->point_sprite = ctx->Point.PointSprite; - key->point_size = ctx->Point.Size; + key->point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize); key->point_attenuated = ctx->Point._Attenuated; key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 81b0a45998f..bf9f6cae55e 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -72,11 +72,13 @@ const struct brw_tracked_state brw_sf_vp; const struct brw_tracked_state brw_state_base_address; const struct brw_tracked_state brw_urb_fence; const struct brw_tracked_state brw_vertex_state; +const struct brw_tracked_state brw_vs_surfaces; const struct brw_tracked_state brw_vs_prog; const struct brw_tracked_state brw_vs_unit; const struct brw_tracked_state brw_wm_input_sizes; const struct brw_tracked_state brw_wm_prog; const struct brw_tracked_state brw_wm_samplers; +const struct brw_tracked_state brw_wm_constant_surface; const struct brw_tracked_state brw_wm_surfaces; const struct brw_tracked_state brw_wm_unit; @@ -91,6 +93,20 @@ const struct brw_tracked_state brw_drawing_rect; const struct brw_tracked_state brw_indices; const struct brw_tracked_state brw_vertices; +/** + * Use same key for WM and VS surfaces. + */ +struct brw_surface_key { + GLenum target, depthmode; + dri_bo *bo; + GLint format, internal_format; + GLint first_level, last_level; + GLint width, height, depth; + GLint pitch, cpp; + uint32_t tiling; + GLuint offset; +}; + /*********************************************************************** * brw_state.c */ @@ -135,8 +151,8 @@ dri_bo *brw_search_cache( struct brw_cache *cache, void *aux_return); void brw_state_cache_check_size( struct brw_context *brw ); -void brw_init_cache( struct brw_context *brw ); -void brw_destroy_cache( struct brw_context *brw ); +void brw_init_caches( struct brw_context *brw ); +void brw_destroy_caches( struct brw_context *brw ); /*********************************************************************** * brw_state_batch.c @@ -150,4 +166,9 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, void brw_destroy_batch_cache( struct brw_context *brw ); void brw_clear_batch_cache_flush( struct brw_context *brw ); +/* brw_wm_surface_state.c */ +dri_bo * +brw_create_constant_surface( struct brw_context *brw, + struct brw_surface_key *key ); + #endif diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index d5b51664066..e40d7a04164 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -56,9 +56,9 @@ * incorrect program is run for the other instance. */ +#include "main/imports.h" #include "brw_state.h" #include "intel_batchbuffer.h" -#include "main/imports.h" /* XXX: Fixme - have to include these to get the sizes of the prog_key * structs: @@ -69,8 +69,10 @@ #include "brw_sf.h" #include "brw_gs.h" -static GLuint hash_key( const void *key, GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs) + +static GLuint +hash_key(const void *key, GLuint key_size, + dri_bo **reloc_bufs, GLuint nr_reloc_bufs) { GLuint *ikey = (GLuint *)key; GLuint hash = 0, i; @@ -95,6 +97,7 @@ static GLuint hash_key( const void *key, GLuint key_size, return hash; } + /** * Marks a new buffer as being chosen for the given cache id. */ @@ -111,6 +114,7 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, cache->brw->state.dirty.cache |= 1 << cache_id; } + static struct brw_cache_item * search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, GLuint hash, const void *key, GLuint key_size, @@ -143,7 +147,8 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, } -static void rehash( struct brw_cache *cache ) +static void +rehash(struct brw_cache *cache) { struct brw_cache_item **items; struct brw_cache_item *c, *next; @@ -164,15 +169,17 @@ static void rehash( struct brw_cache *cache ) cache->size = size; } + /** * Returns the buffer object matching cache_id and key, or NULL. */ -dri_bo *brw_search_cache( struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs, - void *aux_return ) +dri_bo * +brw_search_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, GLuint nr_reloc_bufs, + void *aux_return) { struct brw_cache_item *item; GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); @@ -192,6 +199,7 @@ dri_bo *brw_search_cache( struct brw_cache *cache, return item->bo; } + dri_bo * brw_upload_cache( struct brw_cache *cache, enum brw_cache_id cache_id, @@ -265,7 +273,9 @@ brw_upload_cache( struct brw_cache *cache, return bo; } -/* This doesn't really work with aux data. Use search/upload instead + +/** + * This doesn't really work with aux data. Use search/upload instead */ dri_bo * brw_cache_data_sz(struct brw_cache *cache, @@ -296,6 +306,7 @@ brw_cache_data_sz(struct brw_cache *cache, return bo; } + /** * Wrapper around brw_cache_data_sz using the cache_id's canonical key size. * @@ -319,21 +330,22 @@ enum pool_type { DW_GENERAL_STATE }; + static void -brw_init_cache_id( struct brw_context *brw, - const char *name, - enum brw_cache_id id, - GLuint key_size, - GLuint aux_size) +brw_init_cache_id(struct brw_cache *cache, + const char *name, + enum brw_cache_id id, + GLuint key_size, + GLuint aux_size) { - struct brw_cache *cache = &brw->cache; - cache->name[id] = strdup(name); cache->key_size[id] = key_size; cache->aux_size[id] = aux_size; } -void brw_init_cache( struct brw_context *brw ) + +static void +brw_init_non_surface_cache(struct brw_context *brw) { struct brw_cache *cache = &brw->cache; @@ -342,114 +354,136 @@ void brw_init_cache( struct brw_context *brw ) cache->size = 7; cache->n_items = 0; cache->items = (struct brw_cache_item **) - _mesa_calloc(cache->size * - sizeof(struct brw_cache_item)); + _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CC_VP", BRW_CC_VP, sizeof(struct brw_cc_viewport), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CC_UNIT", BRW_CC_UNIT, sizeof(struct brw_cc_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "WM_PROG", BRW_WM_PROG, sizeof(struct brw_wm_prog_key), sizeof(struct brw_wm_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SAMPLER_DEFAULT_COLOR", BRW_SAMPLER_DEFAULT_COLOR, sizeof(struct brw_sampler_default_color), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SAMPLER", BRW_SAMPLER, 0, /* variable key/data size */ 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "WM_UNIT", BRW_WM_UNIT, sizeof(struct brw_wm_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SF_PROG", BRW_SF_PROG, sizeof(struct brw_sf_prog_key), sizeof(struct brw_sf_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SF_VP", BRW_SF_VP, sizeof(struct brw_sf_viewport), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SF_UNIT", BRW_SF_UNIT, sizeof(struct brw_sf_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "VS_UNIT", BRW_VS_UNIT, sizeof(struct brw_vs_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "VS_PROG", BRW_VS_PROG, sizeof(struct brw_vs_prog_key), sizeof(struct brw_vs_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CLIP_UNIT", BRW_CLIP_UNIT, sizeof(struct brw_clip_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CLIP_PROG", BRW_CLIP_PROG, sizeof(struct brw_clip_prog_key), sizeof(struct brw_clip_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "GS_UNIT", BRW_GS_UNIT, sizeof(struct brw_gs_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "GS_PROG", BRW_GS_PROG, sizeof(struct brw_gs_prog_key), sizeof(struct brw_gs_prog_data)); +} + + +static void +brw_init_surface_cache(struct brw_context *brw) +{ + struct brw_cache *cache = &brw->surface_cache; + + cache->brw = brw; - brw_init_cache_id(brw, + cache->size = 7; + cache->n_items = 0; + cache->items = (struct brw_cache_item **) + _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); + + brw_init_cache_id(cache, "SS_SURFACE", BRW_SS_SURFACE, sizeof(struct brw_surface_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SS_SURF_BIND", BRW_SS_SURF_BIND, 0, 0); } + +void +brw_init_caches(struct brw_context *brw) +{ + brw_init_non_surface_cache(brw); + brw_init_surface_cache(brw); +} + + static void -brw_clear_cache( struct brw_context *brw ) +brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) { struct brw_cache_item *c, *next; GLuint i; @@ -457,8 +491,8 @@ brw_clear_cache( struct brw_context *brw ) if (INTEL_DEBUG & DEBUG_STATE) _mesa_printf("%s\n", __FUNCTION__); - for (i = 0; i < brw->cache.size; i++) { - for (c = brw->cache.items[i]; c; c = next) { + for (i = 0; i < cache->size; i++) { + for (c = cache->items[i]; c; c = next) { int j; next = c->next; @@ -468,10 +502,10 @@ brw_clear_cache( struct brw_context *brw ) free((void *)c->key); free(c); } - brw->cache.items[i] = NULL; + cache->items[i] = NULL; } - brw->cache.n_items = 0; + cache->n_items = 0; if (brw->curbe.last_buf) { _mesa_free(brw->curbe.last_buf); @@ -483,25 +517,46 @@ brw_clear_cache( struct brw_context *brw ) brw->state.dirty.cache |= ~0; } -void brw_state_cache_check_size( struct brw_context *brw ) + +void +brw_state_cache_check_size(struct brw_context *brw) { + if (INTEL_DEBUG & DEBUG_STATE) + _mesa_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items); + /* un-tuned guess. We've got around 20 state objects for a total of around * 32k, so 1000 of them is around 1.5MB. */ if (brw->cache.n_items > 1000) - brw_clear_cache(brw); + brw_clear_cache(brw, &brw->cache); + + if (brw->surface_cache.n_items > 1000) + brw_clear_cache(brw, &brw->surface_cache); } -void brw_destroy_cache( struct brw_context *brw ) + +static void +brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) { GLuint i; - brw_clear_cache(brw); + if (INTEL_DEBUG & DEBUG_STATE) + _mesa_printf("%s\n", __FUNCTION__); + + brw_clear_cache(brw, cache); for (i = 0; i < BRW_MAX_CACHE; i++) { - dri_bo_unreference(brw->cache.last_bo[i]); - free(brw->cache.name[i]); + dri_bo_unreference(cache->last_bo[i]); + free(cache->name[i]); } - free(brw->cache.items); - brw->cache.items = NULL; - brw->cache.size = 0; + free(cache->items); + cache->items = NULL; + cache->size = 0; +} + + +void +brw_destroy_caches(struct brw_context *brw) +{ + brw_destroy_cache(brw, &brw->cache); + brw_destroy_cache(brw, &brw->surface_cache); } diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index 5d332d010c2..a7132622696 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -84,6 +84,19 @@ get_965_surfacetype(unsigned int surfacetype) } } +static const char * +get_965_surface_format(unsigned int surface_format) +{ + switch (surface_format) { + case 0x000: return "r32g32b32a32_float"; + case 0x0c1: return "b8g8r8a8_unorm"; + case 0x100: return "b5g6r5_unorm"; + case 0x102: return "b5g5r5a1_unorm"; + case 0x104: return "b4g4r4a4_unorm"; + default: return "unknown"; + } +} + static void dump_wm_surface_state(struct brw_context *brw) { int i; @@ -95,7 +108,7 @@ static void dump_wm_surface_state(struct brw_context *brw) char name[20]; if (surf_bo == NULL) { - fprintf(stderr, "WM SS%d: NULL\n", i); + fprintf(stderr, " WM SS%d: NULL\n", i); continue; } dri_bo_map(surf_bo, GL_FALSE); @@ -103,8 +116,9 @@ static void dump_wm_surface_state(struct brw_context *brw) surf = (struct brw_surface_state *)(surf_bo->virtual); sprintf(name, "WM SS%d", i); - state_out(name, surf, surfoff, 0, "%s\n", - get_965_surfacetype(surf->ss0.surface_type)); + state_out(name, surf, surfoff, 0, "%s %s\n", + get_965_surfacetype(surf->ss0.surface_type), + get_965_surface_format(surf->ss0.surface_format)); state_out(name, surf, surfoff, 1, "offset\n"); state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n", surf->ss2.width + 1, surf->ss2.height + 1, surf->ss2.mip_count); diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 5de1450e612..c6dfea4743c 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -59,11 +59,12 @@ const struct brw_tracked_state *atoms[] = &brw_curbe_offsets, &brw_recalculate_urb_fence, - &brw_cc_vp, &brw_cc_unit, - &brw_wm_surfaces, /* must do before samplers */ + &brw_vs_surfaces, /* must do before unit */ + &brw_wm_constant_surface, /* must do before wm surfaces/bind bo */ + &brw_wm_surfaces, /* must do before samplers and unit */ &brw_wm_samplers, &brw_wm_unit, @@ -88,54 +89,26 @@ const struct brw_tracked_state *atoms[] = &brw_line_stipple, &brw_aa_line_parameters, - /* Ordering of the commands below is documented as fixed. - */ -#if 0 - &brw_pipelined_state_pointers, - &brw_urb_fence, - &brw_constant_buffer_state, -#else + &brw_psp_urb_cbs, -#endif &brw_drawing_rect, &brw_indices, &brw_vertices, - NULL, /* brw_constant_buffer */ + &brw_constant_buffer }; void brw_init_state( struct brw_context *brw ) { - GLuint i; - - brw_init_cache(brw); - - brw->state.atoms = _mesa_malloc(sizeof(atoms)); - brw->state.nr_atoms = sizeof(atoms)/sizeof(*atoms); - _mesa_memcpy(brw->state.atoms, atoms, sizeof(atoms)); - - /* Patch in a pointer to the dynamic state atom: - */ - for (i = 0; i < brw->state.nr_atoms; i++) - if (brw->state.atoms[i] == NULL) - brw->state.atoms[i] = &brw->curbe.tracked_state; - - _mesa_memcpy(&brw->curbe.tracked_state, - &brw_constant_buffer, - sizeof(brw_constant_buffer)); + brw_init_caches(brw); } void brw_destroy_state( struct brw_context *brw ) { - if (brw->state.atoms) { - _mesa_free(brw->state.atoms); - brw->state.atoms = NULL; - } - - brw_destroy_cache(brw); + brw_destroy_caches(brw); brw_destroy_batch_cache(brw); } @@ -218,6 +191,7 @@ static struct dirty_bit_map mesa_bits[] = { DEFINE_BIT(_NEW_MULTISAMPLE), DEFINE_BIT(_NEW_TRACK_MATRIX), DEFINE_BIT(_NEW_PROGRAM), + DEFINE_BIT(_NEW_PROGRAM_CONSTANTS), {0, 0, 0} }; @@ -336,7 +310,7 @@ void brw_validate_state( struct brw_context *brw ) /* do prepare stage for all atoms */ for (i = 0; i < Elements(atoms); i++) { - const struct brw_tracked_state *atom = brw->state.atoms[i]; + const struct brw_tracked_state *atom = atoms[i]; if (brw->intel.Fallback) break; @@ -367,8 +341,8 @@ void brw_upload_state(struct brw_context *brw) _mesa_memset(&examined, 0, sizeof(examined)); prev = *state; - for (i = 0; i < brw->state.nr_atoms; i++) { - const struct brw_tracked_state *atom = brw->state.atoms[i]; + for (i = 0; i < Elements(atoms); i++) { + const struct brw_tracked_state *atom = atoms[i]; struct brw_state_flags generated; assert(atom->dirty.mesa || @@ -397,7 +371,7 @@ void brw_upload_state(struct brw_context *brw) } else { for (i = 0; i < Elements(atoms); i++) { - const struct brw_tracked_state *atom = brw->state.atoms[i]; + const struct brw_tracked_state *atom = atoms[i]; if (brw->intel.Fallback) break; diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 51a617fcb40..5c5455813a4 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -28,7 +28,6 @@ * Authors: * Keith Whitwell <[email protected]> */ - /* Code to layout images in a mipmap tree for i965. */ @@ -40,14 +39,15 @@ #define FILE_DEBUG_FLAG DEBUG_MIPTREE -GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_tree *mt ) +GLboolean brw_miptree_layout(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t tiling) { - /* XXX: these vary depending on image format: - */ -/* GLint align_w = 4; */ + /* XXX: these vary depending on image format: */ + /* GLint align_w = 4; */ switch (mt->target) { - case GL_TEXTURE_CUBE_MAP: + case GL_TEXTURE_CUBE_MAP: case GL_TEXTURE_3D: { GLuint width = mt->width0; GLuint height = mt->height0; @@ -59,25 +59,25 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t GLuint align_w = 4; mt->total_height = 0; - + if (mt->compressed) { align_w = intel_compressed_alignment(mt->internal_format); mt->pitch = ALIGN(width, align_w); pack_y_pitch = (height + 3) / 4; } else { - mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0); - pack_y_pitch = ALIGN(mt->height0, align_h); + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0); + pack_y_pitch = ALIGN(mt->height0, align_h); } pack_x_pitch = mt->pitch; pack_x_nr = 1; - for ( level = mt->first_level ; level <= mt->last_level ; level++ ) { + for (level = mt->first_level ; level <= mt->last_level ; level++) { GLuint nr_images = mt->target == GL_TEXTURE_3D ? depth : 6; GLint x = 0; GLint y = 0; GLint q, j; - + intel_miptree_set_level_info(mt, level, nr_images, 0, mt->total_height, width, height, depth); @@ -89,7 +89,7 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t } x = 0; - y += pack_y_pitch; + y += pack_y_pitch; } @@ -98,40 +98,40 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t height = minify(height); depth = minify(depth); - if (mt->compressed) { - pack_y_pitch = (height + 3) / 4; - - if (pack_x_pitch > ALIGN(width, align_w)) { - pack_x_pitch = ALIGN(width, align_w); - pack_x_nr <<= 1; - } - } else { - if (pack_x_pitch > 4) { - pack_x_pitch >>= 1; - pack_x_nr <<= 1; - assert(pack_x_pitch * pack_x_nr <= mt->pitch); - } - - if (pack_y_pitch > 2) { - pack_y_pitch >>= 1; - pack_y_pitch = ALIGN(pack_y_pitch, align_h); - } - } + if (mt->compressed) { + pack_y_pitch = (height + 3) / 4; + + if (pack_x_pitch > ALIGN(width, align_w)) { + pack_x_pitch = ALIGN(width, align_w); + pack_x_nr <<= 1; + } + } else { + if (pack_x_pitch > 4) { + pack_x_pitch >>= 1; + pack_x_nr <<= 1; + assert(pack_x_pitch * pack_x_nr <= mt->pitch); + } + + if (pack_y_pitch > 2) { + pack_y_pitch >>= 1; + pack_y_pitch = ALIGN(pack_y_pitch, align_h); + } + } } break; } default: - i945_miptree_layout_2d(intel, mt); + i945_miptree_layout_2d(intel, mt, tiling); break; } - DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, - mt->pitch, + DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, + mt->pitch, mt->total_height, mt->cpp, mt->pitch * mt->total_height * mt->cpp ); - + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 99d0e937226..1e4f66091e3 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -75,6 +75,11 @@ struct brw_vs_compile { struct brw_reg userplane[6]; + /** we may need up to 3 constants per instruction (if use_const_buffer) */ + struct { + GLint index; + struct brw_reg reg; + } current_const[3]; }; void brw_vs_emit( struct brw_vs_compile *c ); diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c index d29eb17f8cf..2637344b482 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_constval.c +++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c @@ -96,7 +96,7 @@ static GLubyte get_active( struct tracker *t, struct prog_src_register src ) { GLuint i; - GLubyte active = src.NegateBase; /* NOTE! */ + GLubyte active = src.Negate; /* NOTE! */ if (src.RelAddr) return 0xf; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 0d6c6ab9a8a..d7f75e3685e 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -38,14 +38,49 @@ #include "brw_vs.h" +static struct brw_reg get_tmp( struct brw_vs_compile *c ) +{ + struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} -/* Do things as simply as possible. Allocate and populate all regs +static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp ) +{ + if (tmp.nr == c->last_tmp-1) + c->last_tmp--; +} + +static void release_tmps( struct brw_vs_compile *c ) +{ + c->last_tmp = c->first_tmp; +} + + +/** + * Preallocate GRF register before code emit. + * Do things as simply as possible. Allocate and populate all regs * ahead of time. */ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { GLuint i, reg = 0, mrf; - GLuint nr_params; + + /* Determine whether to use a real constant buffer or use a block + * of GRF registers for constants. The later is faster but only + * works if everything fits in the GRF. + * XXX this heuristic/check may need some fine tuning... + */ + if (c->vp->program.Base.Parameters->NumParameters + + c->vp->program.Base.NumTemporaries + 20 > BRW_MAX_GRF) + c->vp->use_const_buffer = GL_TRUE; + else + c->vp->use_const_buffer = GL_FALSE; + + /*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/ /* r0 -- reserved as usual */ @@ -66,13 +101,22 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* Vertex program parameters from curbe: */ - nr_params = c->vp->program.Base.Parameters->NumParameters; - for (i = 0; i < nr_params; i++) { - c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); - } - reg += (nr_params + 1) / 2; + if (c->vp->use_const_buffer) { + /* get constants from a real constant buffer */ + c->prog_data.curb_read_length = 0; + c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */ + } + else { + /* use a section of the GRF for constants */ + GLuint nr_params = c->vp->program.Base.Parameters->NumParameters; + for (i = 0; i < nr_params; i++) { + c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); + } + reg += (nr_params + 1) / 2; + c->prog_data.curb_read_length = reg - 1; - c->prog_data.curb_read_length = reg - 1; + c->prog_data.nr_params = nr_params * 4; + } /* Allocate input regs: */ @@ -133,6 +177,14 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) reg++; } + if (c->vp->use_const_buffer) { + for (i = 0; i < 3; i++) { + c->current_const[i].index = -1; + c->current_const[i].reg = brw_vec8_grf(reg, 0); + reg++; + } + } + for (i = 0; i < 128; i++) { if (c->output_regs[i].used_in_src) { c->output_regs[i].reg = brw_vec8_grf(reg, 0); @@ -165,28 +217,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) } -static struct brw_reg get_tmp( struct brw_vs_compile *c ) -{ - struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0); - - if (++c->last_tmp > c->prog_data.total_grf) - c->prog_data.total_grf = c->last_tmp; - - return tmp; -} - -static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp ) -{ - if (tmp.nr == c->last_tmp-1) - c->last_tmp--; -} - -static void release_tmps( struct brw_vs_compile *c ) -{ - c->last_tmp = c->first_tmp; -} - - /** * If an instruction uses a temp reg both as a src and the dest, we * sometimes need to allocate an intermediate temporary. @@ -633,6 +663,8 @@ static void emit_lit_noalias( struct brw_vs_compile *c, } brw_ENDIF(p, if_insn); + + release_tmp(c, tmp); } static void emit_lrp_noalias(struct brw_vs_compile *c, @@ -673,13 +705,84 @@ static void emit_nrm( struct brw_vs_compile *c, } +static struct brw_reg +get_constant(struct brw_vs_compile *c, + const struct prog_instruction *inst, + GLuint argIndex) +{ + const struct prog_src_register *src = &inst->SrcReg[argIndex]; + struct brw_compile *p = &c->func; + struct brw_reg const_reg; + struct brw_reg const2_reg; + const GLboolean relAddr = src->RelAddr; + + assert(argIndex < 3); + + if (c->current_const[argIndex].index != src->Index || relAddr) { + struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0]; + + c->current_const[argIndex].index = src->Index; + +#if 0 + printf(" fetch const[%d] for arg %d into reg %d\n", + src->Index, argIndex, c->current_const[argIndex].reg.nr); +#endif + /* need to fetch the constant now */ + brw_dp_READ_4_vs(p, + c->current_const[argIndex].reg,/* writeback dest */ + 0, /* oword */ + relAddr, /* relative indexing? */ + addrReg, /* address register */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ + ); + + if (relAddr) { + /* second read */ + const2_reg = get_tmp(c); + + /* use upper half of address reg for second read */ + addrReg = stride(addrReg, 0, 4, 0); + addrReg.subnr = 16; + + brw_dp_READ_4_vs(p, + const2_reg, /* writeback dest */ + 1, /* oword */ + relAddr, /* relative indexing? */ + addrReg, /* address register */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_VERT_CONST_BUFFER + ); + } + } + + const_reg = c->current_const[argIndex].reg; + + if (relAddr) { + /* merge the two Owords into the constant register */ + /* const_reg[7..4] = const2_reg[7..4] */ + brw_MOV(p, + suboffset(stride(const_reg, 0, 4, 1), 4), + suboffset(stride(const2_reg, 0, 4, 1), 4)); + release_tmp(c, const2_reg); + } + else { + /* replicate lower four floats into upper half (to get XYZWXYZW) */ + const_reg = stride(const_reg, 0, 4, 0); + const_reg.subnr = 0; + } + + return const_reg; +} + + + /* TODO: relative addressing! */ static struct brw_reg get_reg( struct brw_vs_compile *c, gl_register_file file, GLuint index ) { - switch (file) { case PROGRAM_TEMPORARY: case PROGRAM_INPUT: @@ -708,13 +811,17 @@ static struct brw_reg get_reg( struct brw_vs_compile *c, } +/** + * Indirect addressing: get reg[[arg] + offset]. + */ static struct brw_reg deref( struct brw_vs_compile *c, struct brw_reg arg, GLint offset) { struct brw_compile *p = &c->func; struct brw_reg tmp = vec4(get_tmp(c)); - struct brw_reg vp_address = retype(vec1(get_reg(c, PROGRAM_ADDRESS, 0)), BRW_REGISTER_TYPE_UW); + struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0]; + struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW); GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16; struct brw_reg indirect = brw_vec4_indirect(0,0); @@ -735,10 +842,67 @@ static struct brw_reg deref( struct brw_vs_compile *c, brw_pop_insn_state(p); } + /* NOTE: tmp not released */ return vec8(tmp); } +/** + * Get brw reg corresponding to the instruction's [argIndex] src reg. + * TODO: relative addressing! + */ +static struct brw_reg +get_src_reg( struct brw_vs_compile *c, + const struct prog_instruction *inst, + GLuint argIndex ) +{ + const GLuint file = inst->SrcReg[argIndex].File; + const GLint index = inst->SrcReg[argIndex].Index; + const GLboolean relAddr = inst->SrcReg[argIndex].RelAddr; + + switch (file) { + case PROGRAM_TEMPORARY: + case PROGRAM_INPUT: + case PROGRAM_OUTPUT: + if (relAddr) { + return deref(c, c->regs[file][0], index); + } + else { + assert(c->regs[file][index].nr != 0); + return c->regs[file][index]; + } + + case PROGRAM_STATE_VAR: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: + if (c->vp->use_const_buffer) { + return get_constant(c, inst, argIndex); + } + else if (relAddr) { + return deref(c, c->regs[PROGRAM_STATE_VAR][0], index); + } + else { + assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0); + return c->regs[PROGRAM_STATE_VAR][index]; + } + case PROGRAM_ADDRESS: + assert(index == 0); + return c->regs[file][index]; + + case PROGRAM_UNDEFINED: + /* this is a normal case since we loop over all three src args */ + return brw_null_reg(); + + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_WRITE_ONLY: + default: + assert(0); + return brw_null_reg(); + } +} + + static void emit_arl( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0 ) @@ -750,30 +914,31 @@ static void emit_arl( struct brw_vs_compile *c, if (need_tmp) tmp = get_tmp(c); - brw_RNDD(p, tmp, arg0); - brw_MUL(p, dst, tmp, brw_imm_d(16)); + brw_RNDD(p, tmp, arg0); /* tmp = round(arg0) */ + brw_MUL(p, dst, tmp, brw_imm_d(16)); /* dst = tmp * 16 */ if (need_tmp) release_tmp(c, tmp); } -/* Will return mangled results for SWZ op. The emit_swz() function +/** + * Return the brw reg for the given instruction's src argument. + * Will return mangled results for SWZ op. The emit_swz() function * ignores this result and recalculates taking extended swizzles into * account. */ static struct brw_reg get_arg( struct brw_vs_compile *c, - struct prog_src_register *src ) + const struct prog_instruction *inst, + GLuint argIndex ) { + const struct prog_src_register *src = &inst->SrcReg[argIndex]; struct brw_reg reg; if (src->File == PROGRAM_UNDEFINED) return brw_null_reg(); - if (src->RelAddr) - reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index); - else - reg = get_reg(c, src->File, src->Index); + reg = get_src_reg(c, inst, argIndex); /* Convert 3-bit swizzle to 2-bit. */ @@ -784,16 +949,38 @@ static struct brw_reg get_arg( struct brw_vs_compile *c, /* Note this is ok for non-swizzle instructions: */ - reg.negate = src->NegateBase ? 1 : 0; + reg.negate = src->Negate ? 1 : 0; return reg; } +/** + * Get brw register for the given program dest register. + */ static struct brw_reg get_dst( struct brw_vs_compile *c, struct prog_dst_register dst ) { - struct brw_reg reg = get_reg(c, dst.File, dst.Index); + struct brw_reg reg; + + switch (dst.File) { + case PROGRAM_TEMPORARY: + case PROGRAM_OUTPUT: + assert(c->regs[dst.File][dst.Index].nr != 0); + reg = c->regs[dst.File][dst.Index]; + break; + case PROGRAM_ADDRESS: + assert(dst.Index == 0); + reg = c->regs[dst.File][dst.Index]; + break; + case PROGRAM_UNDEFINED: + /* we may hit this for OPCODE_END, OPCODE_KIL, etc */ + reg = brw_null_reg(); + break; + default: + assert(0); + reg = brw_null_reg(); + } reg.dw1.bits.writemask = dst.WriteMask; @@ -803,14 +990,16 @@ static struct brw_reg get_dst( struct brw_vs_compile *c, static void emit_swz( struct brw_vs_compile *c, struct brw_reg dst, - struct prog_src_register src ) + const struct prog_instruction *inst) { + const GLuint argIndex = 0; + const struct prog_src_register src = inst->SrcReg[argIndex]; struct brw_compile *p = &c->func; GLuint zeros_mask = 0; GLuint ones_mask = 0; GLuint src_mask = 0; GLubyte src_swz[4]; - GLboolean need_tmp = (src.NegateBase && + GLboolean need_tmp = (src.Negate && dst.file != BRW_GENERAL_REGISTER_FILE); struct brw_reg tmp = dst; GLuint i; @@ -844,10 +1033,7 @@ static void emit_swz( struct brw_vs_compile *c, if (src_mask) { struct brw_reg arg0; - if (src.RelAddr) - arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index); - else - arg0 = get_reg(c, src.File, src.Index); + arg0 = get_src_reg(c, inst, argIndex); arg0 = brw_swizzle(arg0, src_swz[0], src_swz[1], @@ -862,8 +1048,8 @@ static void emit_swz( struct brw_vs_compile *c, if (ones_mask) brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1)); - if (src.NegateBase) - brw_MOV(p, brw_writemask(tmp, src.NegateBase), negate(tmp)); + if (src.Negate) + brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp)); if (need_tmp) { brw_MOV(p, dst, tmp); @@ -1039,21 +1225,26 @@ void brw_vs_emit(struct brw_vs_compile *c ) for (insn = 0; insn < nr_insns; insn++) { - struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn]; + const struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn]; struct brw_reg args[3], dst; GLuint i; +#if 0 + printf("%d: ", insn); + _mesa_print_instruction(inst); +#endif + /* Get argument regs. SWZ is special and does this itself. */ if (inst->Opcode != OPCODE_SWZ) for (i = 0; i < 3; i++) { - struct prog_src_register *src = &inst->SrcReg[i]; + const struct prog_src_register *src = &inst->SrcReg[i]; index = src->Index; file = src->File; if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) args[i] = c->output_regs[index].reg; else - args[i] = get_arg(c, src); + args[i] = get_arg(c, inst, i); } /* Get dest regs. Note that it is possible for a reg to be both @@ -1181,7 +1372,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) /* The args[0] value can't be used here as it won't have * correctly encoded the full swizzle: */ - emit_swz(c, dst, inst->SrcReg[0] ); + emit_swz(c, dst, inst); break; case OPCODE_TRUNC: /* round toward zero */ diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index 1a63766ea1f..3d295388437 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -44,6 +44,8 @@ struct brw_vs_unit_key { unsigned int curbe_offset; unsigned int nr_urb_entries, urb_size; + + unsigned int nr_surfaces; }; static void @@ -62,6 +64,9 @@ vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key) key->nr_urb_entries = brw->urb.nr_vs_entries; key->urb_size = brw->urb.vsize; + /* BRW_NEW_NR_VS_SURFACES */ + key->nr_surfaces = brw->vs.nr_surfaces; + /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */ if (ctx->Transform.ClipPlanesEnabled) { /* Note that we read in the userclip planes as well, hence @@ -92,6 +97,8 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) * brw_urb_WRITE() results. */ vs.thread1.single_program_flow = 0; + vs.thread1.binding_table_entry_count = key->nr_surfaces; + vs.thread3.urb_entry_read_length = key->urb_entry_read_length; vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length; vs.thread3.dispatch_grf_start_reg = 1; @@ -158,6 +165,7 @@ const struct brw_tracked_state brw_vs_unit = { .dirty = { .mesa = _NEW_TRANSFORM, .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_NR_VS_SURFACES | BRW_NEW_URB_FENCE), .cache = CACHE_NEW_VS_PROG }, diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c new file mode 100644 index 00000000000..89f47522a1c --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -0,0 +1,226 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "main/mtypes.h" +#include "main/texformat.h" +#include "main/texstore.h" +#include "shader/prog_parameter.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +/* Creates a new VS constant buffer reflecting the current VS program's + * constants, if needed by the VS program. + * + * Otherwise, constants go through the CURBEs using the brw_constant_buffer + * state atom. + */ +static drm_intel_bo * +brw_vs_update_constant_buffer(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + struct brw_vertex_program *vp = + (struct brw_vertex_program *) brw->vertex_program; + const struct gl_program_parameter_list *params = vp->program.Base.Parameters; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + drm_intel_bo *const_buffer; + + /* BRW_NEW_VERTEX_PROGRAM */ + if (!vp->use_const_buffer) + return NULL; + + const_buffer = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", + size, 64); + + /* _NEW_PROGRAM_CONSTANTS */ + dri_bo_subdata(const_buffer, 0, size, params->ParameterValues); + + return const_buffer; +} + +/** + * Update the surface state for a VS constant buffer. + * + * Sets brw->vs.surf_bo[surf] and brw->vp->const_buffer. + */ +static void +brw_update_vs_constant_surface( GLcontext *ctx, + GLuint surf) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_surface_key key; + struct brw_vertex_program *vp = + (struct brw_vertex_program *) brw->vertex_program; + const struct gl_program_parameter_list *params = vp->program.Base.Parameters; + + assert(surf == 0); + + /* If we're in this state update atom, we need to update VS constants, so + * free the old buffer and create a new one for the new contents. + */ + dri_bo_unreference(vp->const_buffer); + vp->const_buffer = brw_vs_update_constant_buffer(brw); + + /* If there's no constant buffer, then no surface BO is needed to point at + * it. + */ + if (vp->const_buffer == 0) { + drm_intel_bo_unreference(brw->vs.surf_bo[surf]); + brw->vs.surf_bo[surf] = NULL; + return; + } + + memset(&key, 0, sizeof(key)); + + key.format = MESA_FORMAT_RGBA_FLOAT32; + key.internal_format = GL_RGBA; + key.bo = vp->const_buffer; + key.depthmode = GL_NONE; + key.pitch = params->NumParameters; + key.width = params->NumParameters; + key.height = 1; + key.depth = 1; + key.cpp = 16; + + /* + printf("%s:\n", __FUNCTION__); + printf(" width %d height %d depth %d cpp %d pitch %d\n", + key.width, key.height, key.depth, key.cpp, key.pitch); + */ + + drm_intel_bo_unreference(brw->vs.surf_bo[surf]); + brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->vs.surf_bo[surf] == NULL) { + brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key); + } +} + + +/** + * Constructs the binding table for the VS surface state. + */ +static dri_bo * +brw_vs_get_binding_table(struct brw_context *brw) +{ + dri_bo *bind_bo; + + bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->vs.surf_bo, BRW_VS_MAX_SURF, + NULL); + + if (bind_bo == NULL) { + GLuint data_size = BRW_VS_MAX_SURF * sizeof(GLuint); + uint32_t *data = malloc(data_size); + int i; + + for (i = 0; i < BRW_VS_MAX_SURF; i++) + if (brw->vs.surf_bo[i]) + data[i] = brw->vs.surf_bo[i]->offset; + else + data[i] = 0; + + bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->vs.surf_bo, BRW_VS_MAX_SURF, + data, data_size, + NULL, NULL); + + /* Emit binding table relocations to surface state */ + for (i = 0; i < BRW_VS_MAX_SURF; i++) { + if (brw->vs.surf_bo[i] != NULL) { + /* The presumed offsets were set in the data values for + * brw_upload_cache. + */ + drm_intel_bo_emit_reloc(bind_bo, i * 4, + brw->vs.surf_bo[i], 0, + I915_GEM_DOMAIN_INSTRUCTION, 0); + } + } + + free(data); + } + + return bind_bo; +} + +/** + * Vertex shader surfaces (constant buffer). + * + * This consumes the state updates for the constant buffer needing + * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit and + * CACHE_NEW_SURF_BIND for the binding table upload. + */ +static void prepare_vs_surfaces(struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + int i; + int nr_surfaces = 0; + + brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER); + + for (i = 0; i < BRW_VS_MAX_SURF; i++) { + if (brw->vs.surf_bo[i] != NULL) { + nr_surfaces = i + 1; + } + } + + if (brw->vs.nr_surfaces != nr_surfaces) { + brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES; + brw->vs.nr_surfaces = nr_surfaces; + } + + /* Note that we don't end up updating the bind_bo if we don't have a + * surface to be pointing at. This should be relatively harmless, as it + * just slightly increases our working set size. + */ + if (brw->vs.nr_surfaces != 0) { + dri_bo_unreference(brw->vs.bind_bo); + brw->vs.bind_bo = brw_vs_get_binding_table(brw); + } +} + +const struct brw_tracked_state brw_vs_surfaces = { + .dirty = { + .mesa = (_NEW_PROGRAM_CONSTANTS), + .brw = (BRW_NEW_VERTEX_PROGRAM), + .cache = 0 + }, + .prepare = prepare_vs_surfaces, +}; + + + diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 960bbb311e3..ba03afd6c13 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -79,6 +79,7 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->curbe.curbe_bo); dri_bo_release(&brw->vs.prog_bo); dri_bo_release(&brw->vs.state_bo); + dri_bo_release(&brw->vs.bind_bo); dri_bo_release(&brw->gs.prog_bo); dri_bo_release(&brw->gs.state_bo); dri_bo_release(&brw->clip.prog_bo); diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 90d74c2885c..3e476fd3be5 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -146,6 +146,13 @@ static void do_wm_prog( struct brw_context *brw, if (c == NULL) { brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data)); c = brw->wm.compile_data; + if (c == NULL) { + /* Ouch - big out of memory problem. Can't continue + * without triggering a segfault, no way to signal, + * so just return. + */ + return; + } } else { memset(c, 0, sizeof(*brw->wm.compile_data)); } @@ -312,6 +319,9 @@ static void brw_wm_populate_key( struct brw_context *brw, key->drawable_height = brw->intel.driDrawable->h; } + /* CACHE_NEW_VS_PROG */ + key->vp_outputs_written = brw->vs.prog_data->outputs_written & DO_SETUP_BITS; + /* The unique fragment program ID */ key->program_string_id = fp->id; } @@ -350,7 +360,7 @@ const struct brw_tracked_state brw_wm_prog = { .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_WM_INPUT_DIMENSIONS | BRW_NEW_REDUCED_PRIMITIVE), - .cache = 0 + .cache = CACHE_NEW_VS_PROG, }, .prepare = brw_prepare_wm_prog }; diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index d0ab3bdc65f..fb15c03e83d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -75,6 +75,7 @@ struct brw_wm_prog_key { GLuint program_string_id:32; GLuint origin_x, origin_y; GLuint drawable_height; + GLuint vp_outputs_written; }; @@ -240,22 +241,25 @@ struct brw_wm_compile { GLuint max_wm_grf; GLuint last_scratch; + GLuint cur_inst; /**< index of current instruction */ + + GLboolean out_of_regs; /**< ran out of GRF registers? */ + /** Mapping from Mesa registers to hardware registers */ struct { GLboolean inited; struct brw_reg reg; } wm_regs[PROGRAM_PAYLOAD+1][256][4]; + GLboolean used_grf[BRW_WM_MAX_GRF]; + GLuint first_free_grf; struct brw_reg stack; struct brw_reg emit_mask_reg; - GLuint reg_index; /**< Index of next free GRF register */ GLuint tmp_regs[BRW_WM_MAX_GRF]; GLuint tmp_index; GLuint tmp_max; GLuint subroutines[BRW_WM_MAX_SUBROUTINE]; - /** using a real constant buffer? */ - GLboolean use_const_buffer; /** we may need up to 3 constants per instruction (if use_const_buffer) */ struct { GLint index; diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index d65b1332c61..14ab9042de7 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -353,6 +353,19 @@ static void emit_mad( struct brw_compile *p, } } +static void emit_trunc( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) +{ + GLuint i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_RNDZ(p, dst[i], arg0[i]); + } + } +} static void emit_lrp( struct brw_compile *p, const struct brw_reg *dst, @@ -742,7 +755,7 @@ static void emit_tex( struct brw_wm_compile *c, retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), 1, retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */ + SURF_INDEX_TEXTURE(inst->tex_unit), inst->tex_unit, /* sampler */ inst->writemask, (inst->tex_shadow ? @@ -791,7 +804,7 @@ static void emit_txb( struct brw_wm_compile *c, retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), 1, retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */ + SURF_INDEX_TEXTURE(inst->tex_unit), inst->tex_unit, /* sampler */ inst->writemask, BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, @@ -1224,6 +1237,10 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_dph(p, dst, dst_flags, args[0], args[1]); break; + case OPCODE_TRUNC: + emit_trunc(p, dst, dst_flags, args[0]); + break; + case OPCODE_LRP: emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]); break; diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index a7f5f1b9a28..1798d842c79 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -80,9 +80,8 @@ static struct prog_src_register src_reg(GLuint file, GLuint idx) reg.Index = idx; reg.Swizzle = SWIZZLE_NOOP; reg.RelAddr = 0; - reg.NegateBase = 0; + reg.Negate = NEGATE_NONE; reg.Abs = 0; - reg.NegateAbs = 0; return reg; } @@ -569,7 +568,7 @@ static void precalc_dst( struct brw_wm_compile *c, src_undef(), src_undef()); /* Avoid letting negation flag of src0 affect our 1 constant. */ - swz->SrcReg[0].NegateBase &= ~NEGATE_X; + swz->SrcReg[0].Negate &= ~NEGATE_X; } if (dst.WriteMask & WRITEMASK_W) { /* dst.w = mov src1.w @@ -604,7 +603,7 @@ static void precalc_lit( struct brw_wm_compile *c, src_undef(), src_undef()); /* Avoid letting the negation flag of src0 affect our 1 constant. */ - swz->SrcReg[0].NegateBase = 0; + swz->SrcReg[0].Negate = NEGATE_NONE; } if (dst.WriteMask & WRITEMASK_YZ) { @@ -651,7 +650,7 @@ static void precalc_tex( struct brw_wm_compile *c, src0, src_undef(), src_undef()); - out->SrcReg[0].NegateBase = 0; + out->SrcReg[0].Negate = NEGATE_NONE; out->SrcReg[0].Abs = 1; /* tmp0 = MAX(coord.X, coord.Y) */ @@ -1050,14 +1049,14 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) case OPCODE_ABS: out = emit_insn(c, inst); out->Opcode = OPCODE_MOV; - out->SrcReg[0].NegateBase = 0; + out->SrcReg[0].Negate = NEGATE_NONE; out->SrcReg[0].Abs = 1; break; case OPCODE_SUB: out = emit_insn(c, inst); out->Opcode = OPCODE_ADD; - out->SrcReg[1].NegateBase ^= 0xf; + out->SrcReg[1].Negate ^= NEGATE_XYZW; break; case OPCODE_SCS: diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 575cd45d572..0e6a2f8ef09 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -1,5 +1,7 @@ #include "main/macros.h" #include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "shader/prog_optimize.h" #include "brw_context.h" #include "brw_eu.h" #include "brw_wm.h" @@ -21,7 +23,6 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) const struct prog_instruction *inst = &fp->Base.Instructions[i]; switch (inst->Opcode) { case OPCODE_IF: - case OPCODE_TRUNC: case OPCODE_ENDIF: case OPCODE_CAL: case OPCODE_BRK: @@ -42,6 +43,83 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) } + +static void +reclaim_temps(struct brw_wm_compile *c); + + +/** Mark GRF register as used. */ +static void +prealloc_grf(struct brw_wm_compile *c, int r) +{ + c->used_grf[r] = GL_TRUE; +} + + +/** Mark given GRF register as not in use. */ +static void +release_grf(struct brw_wm_compile *c, int r) +{ + /*assert(c->used_grf[r]);*/ + c->used_grf[r] = GL_FALSE; + c->first_free_grf = MIN2(c->first_free_grf, r); +} + + +/** Return index of a free GRF, mark it as used. */ +static int +alloc_grf(struct brw_wm_compile *c) +{ + GLuint r; + for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { + if (!c->used_grf[r]) { + c->used_grf[r] = GL_TRUE; + c->first_free_grf = r + 1; /* a guess */ + return r; + } + } + + /* no free temps, try to reclaim some */ + reclaim_temps(c); + c->first_free_grf = 0; + + /* try alloc again */ + for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { + if (!c->used_grf[r]) { + c->used_grf[r] = GL_TRUE; + c->first_free_grf = r + 1; /* a guess */ + return r; + } + } + + for (r = 0; r < BRW_WM_MAX_GRF; r++) { + assert(c->used_grf[r]); + } + + /* really, no free GRF regs found */ + if (!c->out_of_regs) { + /* print warning once per compilation */ + _mesa_warning(NULL, "i965: ran out of registers for fragment program"); + c->out_of_regs = GL_TRUE; + } + + return -1; +} + + +/** Return number of GRF registers used */ +static int +num_grf_used(const struct brw_wm_compile *c) +{ + int r; + for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--) + if (c->used_grf[r]) + return r + 1; + return 0; +} + + + /** * Record the mapping of a Mesa register to a hardware register. */ @@ -56,7 +134,7 @@ static void set_reg(struct brw_wm_compile *c, int file, int index, * Examine instruction's write mask to find index of first component * enabled for writing. */ -static int get_scalar_dst_index(struct prog_instruction *inst) +static int get_scalar_dst_index(const struct prog_instruction *inst) { int i; for (i = 0; i < 4; i++) @@ -68,11 +146,23 @@ static int get_scalar_dst_index(struct prog_instruction *inst) static struct brw_reg alloc_tmp(struct brw_wm_compile *c) { struct brw_reg reg; - if(c->tmp_index == c->tmp_max) - c->tmp_regs[ c->tmp_max++ ] = c->reg_index++; - + + /* if we need to allocate another temp, grow the tmp_regs[] array */ + if (c->tmp_index == c->tmp_max) { + int r = alloc_grf(c); + if (r < 0) { + /*printf("Out of temps in %s\n", __FUNCTION__);*/ + r = 50; /* XXX random register! */ + } + c->tmp_regs[ c->tmp_max++ ] = r; + } + + /* form the GRF register */ reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0); + /*printf("alloc_temp %d\n", reg.nr);*/ + assert(reg.nr < BRW_WM_MAX_GRF); return reg; + } /** @@ -130,35 +220,29 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, return brw_null_reg(); } + assert(index < 256); + assert(component < 4); + /* see if we've already allocated a HW register for this Mesa register */ if (c->wm_regs[file][index][component].inited) { - /* yes, re-use */ - reg = c->wm_regs[file][index][component].reg; + /* yes, re-use */ + reg = c->wm_regs[file][index][component].reg; } else { /* no, allocate new register */ - reg = brw_vec8_grf(c->reg_index, 0); - } + int grf = alloc_grf(c); + /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/ + if (grf < 0) { + /* totally out of temps */ + grf = 51; /* XXX random register! */ + } - /* if this is a new register allocation, record it in the table */ - if (!c->wm_regs[file][index][component].inited) { - set_reg(c, file, index, component, reg); - c->reg_index++; - } + reg = brw_vec8_grf(grf, 0); + /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/ - if (c->reg_index >= BRW_WM_MAX_GRF - 12) { - /* ran out of temporary registers! */ -#if 1 - /* This is a big hack for now. - * Return bad register index, just don't hang the GPU. - */ - _mesa_fprintf(stderr, "out of regs %d\n", c->reg_index); - c->reg_index = BRW_WM_MAX_GRF - 13; -#else - return brw_null_reg(); -#endif + set_reg(c, file, index, component, reg); } - + if (neg & (1 << component)) { reg = negate(reg); } @@ -168,6 +252,46 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, } + +/** + * This is called if we run out of GRF registers. Examine the live intervals + * of temp regs in the program and free those which won't be used again. + */ +static void +reclaim_temps(struct brw_wm_compile *c) +{ + GLint intBegin[MAX_PROGRAM_TEMPS]; + GLint intEnd[MAX_PROGRAM_TEMPS]; + int index; + + /*printf("Reclaim temps:\n");*/ + + _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns, + intBegin, intEnd); + + for (index = 0; index < MAX_PROGRAM_TEMPS; index++) { + if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) { + /* program temp[i] can be freed */ + int component; + /*printf(" temp[%d] is dead\n", index);*/ + for (component = 0; component < 4; component++) { + if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) { + int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr; + release_grf(c, r); + /* + printf(" Reclaim temp %d, reg %d at inst %d\n", + index, r, c->cur_inst); + */ + c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE; + } + } + } + } +} + + + + /** * Preallocate registers. This sets up the Mesa to hardware register * mapping for certain registers, such as constants (uniforms/state vars) @@ -177,8 +301,12 @@ static void prealloc_reg(struct brw_wm_compile *c) { int i, j; struct brw_reg reg; - int nr_interp_regs = 0; + int urb_read_length = 0; GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted; + GLuint reg_index = 0; + + memset(c->used_grf, GL_FALSE, sizeof(c->used_grf)); + c->first_free_grf = 0; for (i = 0; i < 4; i++) { if (i < c->key.nr_depth_regs) @@ -187,16 +315,22 @@ static void prealloc_reg(struct brw_wm_compile *c) reg = brw_vec8_grf(0, 0); set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); } - c->reg_index += 2 * c->key.nr_depth_regs; + reg_index += 2 * c->key.nr_depth_regs; /* constants */ { - const int nr_params = c->fp->program.Base.Parameters->NumParameters; + const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters; + const GLuint nr_temps = c->fp->program.Base.NumTemporaries; /* use a real constant buffer, or just use a section of the GRF? */ - c->use_const_buffer = GL_FALSE; /* (nr_params > 8);*/ + /* XXX this heuristic may need adjustment... */ + if ((nr_params + nr_temps) * 4 + reg_index > 80) + c->fp->use_const_buffer = GL_TRUE; + else + c->fp->use_const_buffer = GL_FALSE; + /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/ - if (c->use_const_buffer) { + if (c->fp->use_const_buffer) { /* We'll use a real constant buffer and fetch constants from * it with a dataport read message. */ @@ -216,7 +350,7 @@ static void prealloc_reg(struct brw_wm_compile *c) for (i = 0; i < nr_params; i++) { /* loop over XYZW channels */ for (j = 0; j < 4; j++, index++) { - reg = brw_vec1_grf(c->reg_index + index / 8, index % 8); + reg = brw_vec1_grf(reg_index + index / 8, index % 8); /* Save pointer to parameter/constant value. * Constants will be copied in prepare_constant_buffer() */ @@ -226,42 +360,62 @@ static void prealloc_reg(struct brw_wm_compile *c) } /* number of constant regs used (each reg is float[8]) */ c->nr_creg = 2 * ((4 * nr_params + 15) / 16); - c->reg_index += c->nr_creg; + reg_index += c->nr_creg; } } /* fragment shader inputs */ - for (i = 0; i < FRAG_ATTRIB_MAX; i++) { - if (inputs & (1<<i)) { - nr_interp_regs++; - reg = brw_vec8_grf(c->reg_index, 0); - for (j = 0; j < 4; j++) - set_reg(c, PROGRAM_PAYLOAD, i, j, reg); - c->reg_index += 2; - } + for (i = 0; i < VERT_RESULT_MAX; i++) { + int fp_input; + + if (i >= VERT_RESULT_VAR0) + fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0; + else if (i <= VERT_RESULT_TEX7) + fp_input = i; + else + fp_input = -1; + + if (fp_input >= 0 && inputs & (1 << fp_input)) { + urb_read_length = reg_index; + reg = brw_vec8_grf(reg_index, 0); + for (j = 0; j < 4; j++) + set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg); + } + if (c->key.vp_outputs_written & (1 << i)) { + reg_index += 2; + } } c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; - c->prog_data.urb_read_length = nr_interp_regs * 2; + c->prog_data.urb_read_length = urb_read_length; c->prog_data.curb_read_length = c->nr_creg; - c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); - c->reg_index++; - c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); - c->reg_index += 2; + c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); + reg_index++; + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); + reg_index += 2; + + /* mark GRF regs [0..reg_index-1] as in-use */ + for (i = 0; i < reg_index; i++) + prealloc_grf(c, i); + + /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */ + prealloc_grf(c, 126); + prealloc_grf(c, 127); /* An instruction may reference up to three constants. * They'll be found in these registers. * XXX alloc these on demand! */ - if (c->use_const_buffer) { - c->current_const[0].reg = alloc_tmp(c); - c->current_const[1].reg = alloc_tmp(c); - c->current_const[2].reg = alloc_tmp(c); - } - /* - printf("USE CONST BUFFER? %d\n", c->use_const_buffer); - printf("AFTER PRE_ALLOC, reg_index = %d\n", c->reg_index); - */ + if (c->fp->use_const_buffer) { + for (i = 0; i < 3; i++) { + c->current_const[i].index = -1; + c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0); + } + } +#if 0 + printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer); + printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index); +#endif } @@ -282,45 +436,21 @@ static void fetch_constants(struct brw_wm_compile *c, if (src->File == PROGRAM_STATE_VAR || src->File == PROGRAM_CONSTANT || src->File == PROGRAM_UNIFORM) { - if (c->current_const[i].index != src->Index) { - - c->current_const[i].index = src->Index; - /*c->current_const[i].reg = alloc_tmp(c);*/ - - /* - printf(" fetch const[%d] for arg %d into reg %d\n", - src->Index, i, c->current_const[i].reg.nr); - */ - - /* need to fetch the constant now */ - brw_dp_READ_4(p, - c->current_const[i].reg, /* writeback dest */ - 1, /* msg_reg */ - src->RelAddr, /* relative indexing? */ - 16 * src->Index, /* byte offset */ - BRW_WM_MAX_SURF - 1 /* binding table index */ - ); + c->current_const[i].index = src->Index; #if 0 - /* dependency stall */ - { - int response_length = 1; - int mark = mark_tmps( c ); - struct brw_reg src = c->current_const[i].reg; - struct brw_reg tmp = alloc_tmp(c); - - /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } - */ - brw_push_insn_state(p); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, tmp, src); - brw_MOV(p, src, tmp); - brw_pop_insn_state(p); - - release_tmps( c, mark ); - } + printf(" fetch const[%d] for arg %d into reg %d\n", + src->Index, i, c->current_const[i].reg.nr); #endif - } + + /* need to fetch the constant now */ + brw_dp_READ_4(p, + c->current_const[i].reg, /* writeback dest */ + 1, /* msg_reg */ + src->RelAddr, /* relative indexing? */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */ + ); } } } @@ -361,18 +491,18 @@ get_src_reg_const(struct brw_wm_compile *c, const_reg = stride(const_reg, 0, 1, 0); const_reg.subnr = component * 4; - if (src->NegateBase) + if (src->Negate & (1 << component)) const_reg = negate(const_reg); if (src->Abs) const_reg = brw_abs(const_reg); - /* - printf(" form const[%d] for arg %d, comp %d, reg %d\n", +#if 0 + printf(" form const[%d].%d for arg %d, reg %d\n", c->current_const[srcRegIndex].index, - srcRegIndex, component, + srcRegIndex, const_reg.nr); - */ +#endif return const_reg; } @@ -389,7 +519,15 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, const GLuint nr = 1; const GLuint component = GET_SWZ(src->Swizzle, channel); - if (c->use_const_buffer && + /* Extended swizzle terms */ + if (component == SWIZZLE_ZERO) { + return brw_imm_f(0.0F); + } + else if (component == SWIZZLE_ONE) { + return brw_imm_f(1.0F); + } + + if (c->fp->use_const_buffer && (src->File == PROGRAM_STATE_VAR || src->File == PROGRAM_CONSTANT || src->File == PROGRAM_UNIFORM)) { @@ -398,7 +536,7 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, else { /* other type of source register */ return get_reg(c, src->File, src->Index, component, nr, - src->NegateBase, src->Abs); + src->Negate, src->Abs); } } @@ -423,11 +561,13 @@ static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c, const GLfloat *param = c->fp->program.Base.Parameters->ParameterValues[src->Index]; GLfloat value = param[component]; - if (src->NegateBase) + if (src->Negate & (1 << channel)) value = -value; if (src->Abs) value = FABSF(value); - /*printf(" form imm reg %f\n", value);*/ +#if 0 + printf(" form immed value %f for chan %d\n", value, channel); +#endif return brw_imm_f(value); } else { @@ -501,7 +641,7 @@ static void invoke_subroutine( struct brw_wm_compile *c, } static void emit_abs( struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -518,7 +658,7 @@ static void emit_abs( struct brw_wm_compile *c, } static void emit_trunc( struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -536,7 +676,7 @@ static void emit_trunc( struct brw_wm_compile *c, } static void emit_mov( struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -546,7 +686,9 @@ static void emit_mov( struct brw_wm_compile *c, if (mask & (1<<i)) { struct brw_reg src, dst; dst = get_dst_reg(c, inst, i); - src = get_src_reg_imm(c, inst, 0, i); + /* XXX some moves from immediate value don't work reliably!!! */ + /*src = get_src_reg_imm(c, inst, 0, i);*/ + src = get_src_reg(c, inst, 0, i); brw_MOV(p, dst, src); } } @@ -554,7 +696,7 @@ static void emit_mov( struct brw_wm_compile *c, } static void emit_pixel_xy(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); @@ -584,7 +726,7 @@ static void emit_pixel_xy(struct brw_wm_compile *c, } static void emit_delta_xy(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg dst0, dst1, src0, src1; @@ -644,7 +786,7 @@ static void fire_fb_write( struct brw_wm_compile *c, } static void emit_fb_write(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; int nr = 2; @@ -682,27 +824,26 @@ static void emit_fb_write(struct brw_wm_compile *c, } if (c->key.dest_depth_reg) { - GLuint comp = c->key.dest_depth_reg / 2; - GLuint off = c->key.dest_depth_reg % 2; + const GLuint comp = c->key.dest_depth_reg / 2; + const GLuint off = c->key.dest_depth_reg % 2; - assert(comp == 1); - assert(off == 0); -#if 0 - /* XXX do we need this code? comp always 1, off always 0, it seems */ if (off != 0) { + /* XXX this code needs review/testing */ + struct brw_reg arg1_0 = get_src_reg(c, inst, 1, comp); + struct brw_reg arg1_1 = get_src_reg(c, inst, 1, comp+1); + brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1)); + brw_MOV(p, brw_message_reg(nr), offset(arg1_0, 1)); /* 2nd half? */ - brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]); + brw_MOV(p, brw_message_reg(nr+1), arg1_1); brw_pop_insn_state(p); } else -#endif { - struct brw_reg src = get_src_reg(c, inst, 1, 1); - brw_MOV(p, brw_message_reg(nr), src); + struct brw_reg src = get_src_reg(c, inst, 1, 1); + brw_MOV(p, brw_message_reg(nr), src); } nr += 2; } @@ -713,7 +854,7 @@ static void emit_fb_write(struct brw_wm_compile *c, } static void emit_pixel_w( struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -743,7 +884,7 @@ static void emit_pixel_w( struct brw_wm_compile *c, } static void emit_linterp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -772,7 +913,7 @@ static void emit_linterp(struct brw_wm_compile *c, } static void emit_cinterp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -798,7 +939,7 @@ static void emit_cinterp(struct brw_wm_compile *c, } static void emit_pinterp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -832,7 +973,7 @@ static void emit_pinterp(struct brw_wm_compile *c, /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */ static void emit_frontfacing(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); @@ -861,7 +1002,7 @@ static void emit_frontfacing(struct brw_wm_compile *c, } static void emit_xpd(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -886,7 +1027,7 @@ static void emit_xpd(struct brw_wm_compile *c, } static void emit_dp3(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg src0[3], src1[3], dst; int i; @@ -905,7 +1046,7 @@ static void emit_dp3(struct brw_wm_compile *c, } static void emit_dp4(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg src0[4], src1[4], dst; int i; @@ -924,7 +1065,7 @@ static void emit_dp4(struct brw_wm_compile *c, } static void emit_dph(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg src0[4], src1[4], dst; int i; @@ -948,7 +1089,7 @@ static void emit_dph(struct brw_wm_compile *c, * register's X, Y, Z and W channels (subject to writemasking of course). */ static void emit_math1(struct brw_wm_compile *c, - struct prog_instruction *inst, GLuint func) + const struct prog_instruction *inst, GLuint func) { struct brw_compile *p = &c->func; struct brw_reg src0, dst, tmp; @@ -985,43 +1126,43 @@ static void emit_math1(struct brw_wm_compile *c, } static void emit_rcp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_INV); } static void emit_rsq(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); } static void emit_sin(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); } static void emit_cos(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_COS); } static void emit_ex2(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); } static void emit_lg2(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); } static void emit_add(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, dst; @@ -1040,7 +1181,7 @@ static void emit_add(struct brw_wm_compile *c, } static void emit_arl(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, addr_reg; @@ -1053,7 +1194,7 @@ static void emit_arl(struct brw_wm_compile *c, } static void emit_sub(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, dst; @@ -1072,7 +1213,7 @@ static void emit_sub(struct brw_wm_compile *c, } static void emit_mul(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, dst; @@ -1091,7 +1232,7 @@ static void emit_mul(struct brw_wm_compile *c, } static void emit_frc(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, dst; @@ -1110,7 +1251,7 @@ static void emit_frc(struct brw_wm_compile *c, } static void emit_flr(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, dst; @@ -1177,7 +1318,7 @@ static void emit_min_max(struct brw_wm_compile *c, } static void emit_pow(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst, src0, src1; @@ -1199,7 +1340,7 @@ static void emit_pow(struct brw_wm_compile *c, } static void emit_lrp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1252,7 +1393,7 @@ static void emit_kil(struct brw_wm_compile *c) } static void emit_mad(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1275,7 +1416,7 @@ static void emit_mad(struct brw_wm_compile *c, } static void emit_sop(struct brw_wm_compile *c, - struct prog_instruction *inst, GLuint cond) + const struct prog_instruction *inst, GLuint cond) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1299,43 +1440,43 @@ static void emit_sop(struct brw_wm_compile *c, } static void emit_slt(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_L); } static void emit_sle(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_LE); } static void emit_sgt(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_G); } static void emit_sge(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_GE); } static void emit_seq(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_EQ); } static void emit_sne(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_NEQ); } static void emit_ddx(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1362,7 +1503,7 @@ static void emit_ddx(struct brw_wm_compile *c, } static void emit_ddy(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1505,7 +1646,7 @@ static void noise1_sub( struct brw_wm_compile *c ) { } static void emit_noise1( struct brw_wm_compile *c, - struct prog_instruction *inst ) + const struct prog_instruction *inst ) { struct brw_compile *p = &c->func; struct brw_reg src, param, dst; @@ -1675,7 +1816,7 @@ static void noise2_sub( struct brw_wm_compile *c ) { } static void emit_noise2( struct brw_wm_compile *c, - struct prog_instruction *inst ) + const struct prog_instruction *inst ) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, param0, param1, dst; @@ -1978,7 +2119,7 @@ static void noise3_sub( struct brw_wm_compile *c ) { } static void emit_noise3( struct brw_wm_compile *c, - struct prog_instruction *inst ) + const struct prog_instruction *inst ) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, src2, param0, param1, param2, dst; @@ -2401,7 +2542,7 @@ static void noise4_sub( struct brw_wm_compile *c ) } static void emit_noise4( struct brw_wm_compile *c, - struct prog_instruction *inst ) + const struct prog_instruction *inst ) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, src2, src3, param0, param1, param2, param3, dst; @@ -2443,7 +2584,7 @@ static void emit_noise4( struct brw_wm_compile *c, } static void emit_wpos_xy(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -2479,7 +2620,7 @@ static void emit_wpos_xy(struct brw_wm_compile *c, BIAS on SIMD8 not working yet... */ static void emit_txb(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; @@ -2517,7 +2658,7 @@ static void emit_txb(struct brw_wm_compile *c, retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ 1, /* msg_reg_nr */ retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ - unit + MAX_DRAW_BUFFERS, /* surface */ + SURF_INDEX_TEXTURE(unit), unit, /* sampler */ inst->DstReg.WriteMask, /* writemask */ BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, /* msg_type */ @@ -2528,7 +2669,7 @@ static void emit_txb(struct brw_wm_compile *c, static void emit_tex(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; @@ -2581,7 +2722,7 @@ static void emit_tex(struct brw_wm_compile *c, retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ 1, /* msg_reg_nr */ retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ - unit + MAX_DRAW_BUFFERS, /* surface */ + SURF_INDEX_TEXTURE(unit), unit, /* sampler */ inst->DstReg.WriteMask, /* writemask */ BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, /* msg_type */ @@ -2612,28 +2753,31 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) struct brw_compile *p = &c->func; struct brw_indirect stack_index = brw_indirect(0, 0); - c->reg_index = 0; + c->out_of_regs = GL_FALSE; + prealloc_reg(c); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); for (i = 0; i < c->nr_fp_insns; i++) { - struct prog_instruction *inst = &c->prog_instructions[i]; + const struct prog_instruction *inst = &c->prog_instructions[i]; - if (inst->CondUpdate) - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - else - brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); + c->cur_inst = i; - /* +#if 0 _mesa_printf("Inst %d: ", i); _mesa_print_instruction(inst); - */ +#endif /* fetch any constants that this instruction needs */ - if (c->use_const_buffer) + if (c->fp->use_const_buffer) fetch_constants(c, inst); + if (inst->CondUpdate) + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + else + brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); + switch (inst->Opcode) { case WM_PIXELXY: emit_pixel_xy(c, inst); @@ -2819,6 +2963,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) break; case OPCODE_BGNLOOP: + /* XXX may need to invalidate the current_constant regs */ loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8); break; case OPCODE_BRK: @@ -2849,17 +2994,13 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) _mesa_printf("unsupported IR in fragment shader %d\n", inst->Opcode); } + if (inst->CondUpdate) brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); else brw_set_predicate_control(p, BRW_PREDICATE_NONE); } post_wm_emit(c); - - if (c->reg_index >= BRW_WM_MAX_GRF) { - _mesa_problem(NULL, "Ran out of registers in brw_wm_emit_glsl()"); - /* XXX we need to do some proper error recovery here */ - } } @@ -2883,6 +3024,6 @@ void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) brw_wm_print_program(c, "brw_wm_glsl_emit done"); } - c->prog_data.total_grf = c->reg_index; + c->prog_data.total_grf = num_grf_used(c); c->prog_data.total_scratch = 0; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_iz.c b/src/mesa/drivers/dri/i965/brw_wm_iz.c index bd60ac9b315..8fd067abe7d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_iz.c +++ b/src/mesa/drivers/dri/i965/brw_wm_iz.c @@ -116,6 +116,10 @@ const struct { { C, 0, 1, 1, 1 } }; +/** + * \param line_aa AA_NEVER, AA_ALWAYS or AA_SOMETIMES + * \param lookup bitmask of IZ_* flags + */ void brw_wm_lookup_iz( GLuint line_aa, GLuint lookup, struct brw_wm_prog_key *key ) diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c index 2debd0678a5..92142764f5d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -322,7 +322,7 @@ static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c, newref->value->lastuse = newref; } - if (src.NegateBase & (1<<i)) + if (src.Negate & (1 << i)) newref->hw_reg.negate ^= 1; if (src.Abs) { diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c index ab9aa2f10d0..3436a247170 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c @@ -159,6 +159,7 @@ void brw_wm_pass1( struct brw_wm_compile *c ) case OPCODE_FRC: case OPCODE_MOV: case OPCODE_SWZ: + case OPCODE_TRUNC: read0 = writemask; break; diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass2.c b/src/mesa/drivers/dri/i965/brw_wm_pass2.c index 780edbc42e6..6faea018fbc 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass2.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass2.c @@ -69,8 +69,6 @@ static void prealloc_reg(struct brw_wm_compile *c, */ static void init_registers( struct brw_wm_compile *c ) { - struct brw_context *brw = c->func.brw; - GLuint inputs = (brw->vs.prog_data->outputs_written & DO_SETUP_BITS); GLuint nr_interp_regs = 0; GLuint i = 0; GLuint j; @@ -85,16 +83,19 @@ static void init_registers( struct brw_wm_compile *c ) prealloc_reg(c, &c->creg[j], i++); for (j = 0; j < FRAG_ATTRIB_MAX; j++) { - if (inputs & (1<<j)) { - /* index for vs output and ps input are not the same - in shader varying */ - GLuint index; - if (j > FRAG_ATTRIB_VAR0) - index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); + if (c->key.vp_outputs_written & (1<<j)) { + int fp_index; + + if (j >= VERT_RESULT_VAR0) + fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); + else if (j <= VERT_RESULT_TEX7) + fp_index = j; else - index = j; + fp_index = -1; + nr_interp_regs++; - prealloc_reg(c, &c->payload.input_interp[index], i++); + if (fp_index >= 0) + prealloc_reg(c, &c->payload.input_interp[fp_index], i++); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 1fc9f013727..3fc18ff1f3a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -152,7 +152,7 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC; if (key->max_aniso > 2.0) { - sampler->ss3.max_aniso = MAX2((key->max_aniso - 2) / 2, + sampler->ss3.max_aniso = MIN2((key->max_aniso - 2) / 2, BRW_ANISORATIO_16); } } @@ -178,6 +178,16 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE; sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE; } + else if (key->tex_target == GL_TEXTURE_1D) { + /* There's a bug in 1D texture sampling - it actually pays + * attention to the wrap_t value, though it should not. + * Override the wrap_t value here to GL_REPEAT to keep + * any nonexistent border pixels from floating in. + */ + sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); + sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); + sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; + } else { sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 58fa6aaf8f9..67b41173fb2 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -290,7 +290,7 @@ const struct brw_tracked_state brw_wm_unit = { .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_CURBE_OFFSETS | - BRW_NEW_NR_SURFACES), + BRW_NEW_NR_WM_SURFACES), .cache = (CACHE_NEW_WM_PROG | CACHE_NEW_SAMPLER) diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index e7d55d5dbd1..c49a5f6b4ec 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -38,7 +38,7 @@ #include "intel_mipmap_tree.h" #include "intel_batchbuffer.h" #include "intel_tex.h" - +#include "intel_fbo.h" #include "brw_context.h" #include "brw_state.h" @@ -176,17 +176,6 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format, } } -struct brw_wm_surface_key { - GLenum target, depthmode; - dri_bo *bo; - GLint format, internal_format; - GLint first_level, last_level; - GLint width, height, depth; - GLint pitch, cpp; - uint32_t tiling; - GLuint offset; -}; - static void brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) { @@ -208,7 +197,7 @@ brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) static dri_bo * brw_create_texture_surface( struct brw_context *brw, - struct brw_wm_surface_key *key ) + struct brw_surface_key *key ) { struct brw_surface_state surf; dri_bo *bo; @@ -263,7 +252,7 @@ brw_create_texture_surface( struct brw_context *brw, surf.ss0.cube_neg_z = 1; } - bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), &key->bo, key->bo ? 1 : 0, &surf, sizeof(surf), @@ -287,8 +276,8 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; struct intel_texture_object *intelObj = intel_texture_object(tObj); struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; - struct brw_wm_surface_key key; - const GLuint j = MAX_DRAW_BUFFERS + unit; + struct brw_surface_key key; + const GLuint surf = SURF_INDEX_TEXTURE(unit); memset(&key, 0, sizeof(key)); @@ -315,25 +304,26 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.cpp = intelObj->mt->cpp; key.tiling = intelObj->mt->region->tiling; - dri_bo_unreference(brw->wm.surf_bo[j]); - brw->wm.surf_bo[j] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, - NULL); - if (brw->wm.surf_bo[j] == NULL) { - brw->wm.surf_bo[j] = brw_create_texture_surface(brw, &key); + dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->wm.surf_bo[surf] == NULL) { + brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key); } } /** - * Create the constant buffer surface. Fragment shader constanst will be + * Create the constant buffer surface. Vertex/fragment shader constants will be * read from this buffer with Data Port Read instructions/messages. */ -static dri_bo * +dri_bo * brw_create_constant_surface( struct brw_context *brw, - struct brw_wm_surface_key *key ) + struct brw_surface_key *key ) { const GLint w = key->width - 1; struct brw_surface_state surf; @@ -345,8 +335,6 @@ brw_create_constant_surface( struct brw_context *brw, surf.ss0.surface_type = BRW_SURFACE_BUFFER; surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; - /* This is ok for all textures with channel width 8bit or less: - */ assert(key->bo); if (key->bo) surf.ss1.base_addr = key->bo->offset; /* reloc */ @@ -356,10 +344,10 @@ brw_create_constant_surface( struct brw_context *brw, surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */ surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */ surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */ - surf.ss3.pitch = (key->pitch * key->cpp) - 1; - brw_set_surface_tiling(&surf, key->tiling); + surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */ + brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */ - bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), &key->bo, key->bo ? 1 : 0, &surf, sizeof(surf), @@ -377,28 +365,73 @@ brw_create_constant_surface( struct brw_context *brw, return bo; } +/* Creates a new WM constant buffer reflecting the current fragment program's + * constants, if needed by the fragment program. + * + * Otherwise, constants go through the CURBEs using the brw_constant_buffer + * state atom. + */ +static drm_intel_bo * +brw_wm_update_constant_buffer(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + const struct gl_program_parameter_list *params = fp->program.Base.Parameters; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + drm_intel_bo *const_buffer; + + /* BRW_NEW_FRAGMENT_PROGRAM */ + if (!fp->use_const_buffer) + return NULL; + + const_buffer = drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer", + size, 64); + + /* _NEW_PROGRAM_CONSTANTS */ + dri_bo_subdata(const_buffer, 0, size, params->ParameterValues); + + return const_buffer; +} /** - * Update the constant buffer surface. + * Update the surface state for a WM constant buffer. + * The constant buffer will be (re)allocated here if needed. */ static void -brw_update_constant_surface( GLcontext *ctx, - const struct brw_fragment_program *fp ) +brw_update_wm_constant_surface( GLcontext *ctx, + GLuint surf) { struct brw_context *brw = brw_context(ctx); - struct brw_wm_surface_key key; - const GLuint j = BRW_WM_MAX_SURF - 1; - const GLuint numParams = fp->program.Base.Parameters->NumParameters; + struct brw_surface_key key; + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + const struct gl_program_parameter_list *params = + fp->program.Base.Parameters; + + /* If we're in this state update atom, we need to update WM constants, so + * free the old buffer and create a new one for the new contents. + */ + dri_bo_unreference(fp->const_buffer); + fp->const_buffer = brw_wm_update_constant_buffer(brw); + + /* If there's no constant buffer, then no surface BO is needed to point at + * it. + */ + if (fp->const_buffer == 0) { + drm_intel_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = NULL; + return; + } memset(&key, 0, sizeof(key)); key.format = MESA_FORMAT_RGBA_FLOAT32; key.internal_format = GL_RGBA; key.bo = fp->const_buffer; - key.depthmode = GL_NONE; - key.pitch = numParams; - key.width = numParams; + key.pitch = params->NumParameters; + key.width = params->NumParameters; key.height = 1; key.depth = 1; key.cpp = 16; @@ -409,16 +442,60 @@ brw_update_constant_surface( GLcontext *ctx, key.width, key.height, key.depth, key.cpp, key.pitch); */ - dri_bo_unreference(brw->wm.surf_bo[j]); - brw->wm.surf_bo[j] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, - NULL); - if (brw->wm.surf_bo[j] == NULL) { - brw->wm.surf_bo[j] = brw_create_constant_surface(brw, &key); + dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->wm.surf_bo[surf] == NULL) { + brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key); } + brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; } +/** + * Updates surface / buffer for fragment shader constant buffer, if + * one is required. + * + * This consumes the state updates for the constant buffer, and produces + * BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for + * inclusion in the binding table. + */ +static void prepare_wm_constant_surface(struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER; + + drm_intel_bo_unreference(fp->const_buffer); + fp->const_buffer = brw_wm_update_constant_buffer(brw); + + /* If there's no constant buffer, then no surface BO is needed to point at + * it. + */ + if (fp->const_buffer == 0) { + if (brw->wm.surf_bo[surf] != NULL) { + drm_intel_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = NULL; + brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; + } + return; + } + + brw_update_wm_constant_surface(ctx, surf); +} + +const struct brw_tracked_state brw_wm_constant_surface = { + .dirty = { + .mesa = (_NEW_PROGRAM_CONSTANTS), + .brw = (BRW_NEW_FRAGMENT_PROGRAM), + .cache = 0 + }, + .prepare = prepare_wm_constant_surface, +}; + /** * Sets up a surface state structure to point at the given region. @@ -426,18 +503,22 @@ brw_update_constant_surface( GLcontext *ctx, * usable for further buffers when doing ARB_draw_buffer support. */ static void -brw_update_region_surface(struct brw_context *brw, struct intel_region *region, - unsigned int unit, GLboolean cached) +brw_update_renderbuffer_surface(struct brw_context *brw, + struct gl_renderbuffer *rb, + unsigned int unit) { GLcontext *ctx = &brw->intel.ctx; dri_bo *region_bo = NULL; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + struct intel_region *region = irb ? irb->region : NULL; struct { unsigned int surface_type; unsigned int surface_format; - unsigned int width, height, cpp; + unsigned int width, height, pitch, cpp; GLubyte color_mask[4]; GLboolean color_blend; uint32_t tiling; + uint32_t draw_offset; } key; memset(&key, 0, sizeof(key)); @@ -446,14 +527,29 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, region_bo = region->buffer; key.surface_type = BRW_SURFACE_2D; - if (region->cpp == 4) + switch (irb->texformat->MesaFormat) { + case MESA_FORMAT_ARGB8888: key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - else + break; + case MESA_FORMAT_RGB565: key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; + break; + case MESA_FORMAT_ARGB1555: + key.surface_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM; + break; + case MESA_FORMAT_ARGB4444: + key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM; + break; + default: + _mesa_problem(ctx, "Bad renderbuffer format: %d\n", + irb->texformat->MesaFormat); + } key.tiling = region->tiling; - key.width = region->pitch; /* XXX: not really! */ + key.width = region->width; key.height = region->height; + key.pitch = region->pitch; key.cpp = region->cpp; + key.draw_offset = region->draw_offset; /* cur 3d or cube face offset */ } else { key.surface_type = BRW_SURFACE_NULL; key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; @@ -461,6 +557,7 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, key.width = 1; key.height = 1; key.cpp = 4; + key.draw_offset = 0; } memcpy(key.color_mask, ctx->Color.ColorMask, sizeof(key.color_mask)); @@ -468,12 +565,11 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, ctx->Color.BlendEnabled); dri_bo_unreference(brw->wm.surf_bo[unit]); - brw->wm.surf_bo[unit] = NULL; - if (cached) - brw->wm.surf_bo[unit] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - ®ion_bo, 1, - NULL); + brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + ®ion_bo, 1, + NULL); if (brw->wm.surf_bo[unit] == NULL) { struct brw_surface_state surf; @@ -482,13 +578,14 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, surf.ss0.surface_format = key.surface_format; surf.ss0.surface_type = key.surface_type; + surf.ss1.base_addr = key.draw_offset; if (region_bo != NULL) - surf.ss1.base_addr = region_bo->offset; /* reloc */ + surf.ss1.base_addr += region_bo->offset; /* reloc */ surf.ss2.width = key.width - 1; surf.ss2.height = key.height - 1; brw_set_surface_tiling(&surf, key.tiling); - surf.ss3.pitch = (key.width * key.cpp) - 1; + surf.ss3.pitch = (key.pitch * key.cpp) - 1; /* _NEW_COLOR */ surf.ss0.color_blend = key.color_blend; @@ -498,8 +595,9 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, surf.ss0.writedisable_alpha = !key.color_mask[3]; /* Key size will never match key size for textures, so we're safe. */ - brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), + brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), ®ion_bo, 1, &surf, sizeof(surf), NULL, NULL); @@ -508,12 +606,12 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, * them both. We might be able to figure out from other state * a more restrictive relocation to emit. */ - dri_bo_emit_reloc(brw->wm.surf_bo[unit], - I915_GEM_DOMAIN_RENDER, - I915_GEM_DOMAIN_RENDER, - 0, - offsetof(struct brw_surface_state, ss1), - region_bo); + drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit], + offsetof(struct brw_surface_state, ss1), + region_bo, + key.draw_offset, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER); } } } @@ -528,7 +626,9 @@ brw_wm_get_binding_table(struct brw_context *brw) { dri_bo *bind_bo; - bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, + assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF); + + bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->wm.surf_bo, brw->wm.nr_surfaces, NULL); @@ -544,7 +644,7 @@ brw_wm_get_binding_table(struct brw_context *brw) else data[i] = 0; - bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND, + bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->wm.surf_bo, brw->wm.nr_surfaces, data, data_size, @@ -574,66 +674,62 @@ static void prepare_wm_surfaces(struct brw_context *brw ) GLuint i; int old_nr_surfaces; + /* _NEW_BUFFERS */ /* Update surfaces for drawing buffers */ - if (brw->state.nr_color_regions > 1) { - for (i = 0; i < brw->state.nr_color_regions; i++) { - brw_update_region_surface(brw, brw->state.color_regions[i], i, - GL_FALSE); + if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) { + for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { + brw_update_renderbuffer_surface(brw, + ctx->DrawBuffer->_ColorDrawBuffers[i], + i); } } else { - brw_update_region_surface(brw, brw->state.color_regions[0], 0, GL_TRUE); + brw_update_renderbuffer_surface(brw, NULL, 0); } old_nr_surfaces = brw->wm.nr_surfaces; brw->wm.nr_surfaces = MAX_DRAW_BUFFERS; + if (brw->wm.surf_bo[SURF_INDEX_FRAG_CONST_BUFFER] != NULL) + brw->wm.nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1; + /* Update surfaces for textures */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; - const GLuint j = MAX_DRAW_BUFFERS + i; + const GLuint surf = SURF_INDEX_TEXTURE(i); /* _NEW_TEXTURE, BRW_NEW_TEXDATA */ if (texUnit->_ReallyEnabled) { if (texUnit->_Current == intel->frame_buffer_texobj) { /* render to texture */ - dri_bo_unreference(brw->wm.surf_bo[j]); - brw->wm.surf_bo[j] = brw->wm.surf_bo[0]; - dri_bo_reference(brw->wm.surf_bo[j]); - brw->wm.nr_surfaces = j + 1; + dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = brw->wm.surf_bo[0]; + dri_bo_reference(brw->wm.surf_bo[surf]); + brw->wm.nr_surfaces = surf + 1; } else { /* regular texture */ brw_update_texture_surface(ctx, i); - brw->wm.nr_surfaces = j + 1; + brw->wm.nr_surfaces = surf + 1; } } else { - dri_bo_unreference(brw->wm.surf_bo[j]); - brw->wm.surf_bo[j] = NULL; + dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = NULL; } } - /* Update surface for fragment shader constant buffer */ - { - const GLuint j = BRW_WM_MAX_SURF - 1; - const struct brw_fragment_program *fp = - brw_fragment_program_const(brw->fragment_program); - - brw_update_constant_surface(ctx, fp); - brw->wm.nr_surfaces = j + 1; - } - - dri_bo_unreference(brw->wm.bind_bo); brw->wm.bind_bo = brw_wm_get_binding_table(brw); if (brw->wm.nr_surfaces != old_nr_surfaces) - brw->state.dirty.brw |= BRW_NEW_NR_SURFACES; + brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES; } - const struct brw_tracked_state brw_wm_surfaces = { .dirty = { - .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS, - .brw = BRW_NEW_CONTEXT, + .mesa = (_NEW_COLOR | + _NEW_TEXTURE | + _NEW_BUFFERS), + .brw = (BRW_NEW_CONTEXT | + BRW_NEW_WM_SURFACES), .cache = 0 }, .prepare = prepare_wm_surfaces, diff --git a/src/mesa/drivers/dri/i965/intel_generatemipmap.c b/src/mesa/drivers/dri/i965/intel_generatemipmap.c new file mode 120000 index 00000000000..4c6b37ada01 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_generatemipmap.c @@ -0,0 +1 @@ +../intel/intel_generatemipmap.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 4ae9b118a3d..49198281316 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -32,6 +32,8 @@ #include "main/mtypes.h" #include "main/context.h" #include "main/enums.h" +#include "main/texformat.h" +#include "main/colormac.h" #include "intel_blit.h" #include "intel_buffers.h" @@ -484,10 +486,9 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) const GLbitfield bufBit = 1 << buf; if ((clearMask & bufBit) && !(bufBit & skipBuffers)) { /* OK, clear this renderbuffer */ - struct intel_region *irb_region = - intel_get_rb_region(fb, buf); + struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, buf); dri_bo *write_buffer = - intel_region_buffer(intel, irb_region, + intel_region_buffer(intel, irb->region, all ? INTEL_WRITE_FULL : INTEL_WRITE_PART); @@ -495,15 +496,13 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) GLint pitch, cpp; GLuint BR13, CMD; - ASSERT(irb_region); - - pitch = irb_region->pitch; - cpp = irb_region->cpp; + pitch = irb->region->pitch; + cpp = irb->region->cpp; DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", __FUNCTION__, - irb_region->buffer, (pitch * cpp), - irb_region->draw_offset, + irb->region->buffer, (pitch * cpp), + irb->region->draw_offset, b.x1, b.y1, b.x2 - b.x1, b.y2 - b.y1); BR13 = 0xf0 << 16; @@ -529,7 +528,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) } #ifndef I915 - if (irb_region->tiling != I915_TILING_NONE) { + if (irb->region->tiling != I915_TILING_NONE) { CMD |= XY_DST_TILED; pitch /= 4; } @@ -540,9 +539,36 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) clearVal = clear_depth; } else { - clearVal = (cpp == 4) - ? intel->ClearColor8888 : intel->ClearColor565; - } + uint8_t clear[4]; + GLclampf *color = ctx->Color.ClearColor; + + CLAMPED_FLOAT_TO_UBYTE(clear[0], color[0]); + CLAMPED_FLOAT_TO_UBYTE(clear[1], color[1]); + CLAMPED_FLOAT_TO_UBYTE(clear[2], color[2]); + CLAMPED_FLOAT_TO_UBYTE(clear[3], color[3]); + + switch (irb->texformat->MesaFormat) { + case MESA_FORMAT_ARGB8888: + clearVal = intel->ClearColor8888; + break; + case MESA_FORMAT_RGB565: + clearVal = intel->ClearColor565; + break; + case MESA_FORMAT_ARGB4444: + clearVal = PACK_COLOR_4444(clear[3], clear[0], + clear[1], clear[2]); + break; + case MESA_FORMAT_ARGB1555: + clearVal = PACK_COLOR_1555(clear[3], clear[0], + clear[1], clear[2]); + break; + default: + _mesa_problem(ctx, "Unexpected renderbuffer format: %d\n", + irb->texformat->MesaFormat); + clearVal = 0; + } + } + /* _mesa_debug(ctx, "hardware blit clear buf %d rb id %d\n", buf, irb->Base.Name); @@ -558,7 +584,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) OUT_BATCH((b.y2 << 16) | b.x2); OUT_RELOC(write_buffer, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - irb_region->draw_offset); + irb->region->draw_offset); OUT_BATCH(clearVal); ADVANCE_BATCH(); clearMask &= ~bufBit; /* turn off bit, for faster loop exit */ diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index b7c7eeb368f..23ba3b9ef67 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -35,6 +35,9 @@ #include "intel_batchbuffer.h" #include "intel_regions.h" +static GLboolean +intel_bufferobj_unmap(GLcontext * ctx, + GLenum target, struct gl_buffer_object *obj); /** Allocates a new dri_bo to store the data for the buffer object. */ static void @@ -100,8 +103,15 @@ intel_bufferobj_free(GLcontext * ctx, struct gl_buffer_object *obj) struct intel_buffer_object *intel_obj = intel_buffer_object(obj); assert(intel_obj); - assert(!obj->Pointer); /* Mesa should have unmapped it */ + /* Buffer objects are automatically unmapped when deleting according + * to the spec, but Mesa doesn't do UnmapBuffer for us at context destroy + * (though it does if you call glDeleteBuffers) + */ + if (obj->Pointer) + intel_bufferobj_unmap(ctx, 0, obj); + + _mesa_free(intel_obj->sys_buffer); if (intel_obj->region) { intel_bufferobj_release_region(intel, intel_obj); } @@ -142,7 +152,23 @@ intel_bufferobj_data(GLcontext * ctx, dri_bo_unreference(intel_obj->buffer); intel_obj->buffer = NULL; } + _mesa_free(intel_obj->sys_buffer); + intel_obj->sys_buffer = NULL; + if (size != 0) { +#ifdef I915 + /* On pre-965, stick VBOs in system memory, as we're always doing swtnl + * with their contents anyway. + */ + if (target == GL_ARRAY_BUFFER || target == GL_ELEMENT_ARRAY_BUFFER) { + intel_obj->sys_buffer = _mesa_malloc(size); + if (intel_obj->sys_buffer != NULL) { + if (data != NULL) + memcpy(intel_obj->sys_buffer, data, size); + return; + } + } +#endif intel_bufferobj_alloc_buffer(intel, intel_obj); if (data != NULL) @@ -172,7 +198,10 @@ intel_bufferobj_subdata(GLcontext * ctx, if (intel_obj->region) intel_bufferobj_cow(intel, intel_obj); - dri_bo_subdata(intel_obj->buffer, offset, size, data); + if (intel_obj->sys_buffer) + memcpy((char *)intel_obj->sys_buffer + offset, data, size); + else + dri_bo_subdata(intel_obj->buffer, offset, size, data); } @@ -204,11 +233,16 @@ intel_bufferobj_map(GLcontext * ctx, { struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + GLboolean read_only = (access == GL_READ_ONLY_ARB); + GLboolean write_only = (access == GL_WRITE_ONLY_ARB); - /* XXX: Translate access to flags arg below: - */ assert(intel_obj); + if (intel_obj->sys_buffer) { + obj->Pointer = intel_obj->sys_buffer; + return obj->Pointer; + } + if (intel_obj->region) intel_bufferobj_cow(intel, intel_obj); @@ -217,7 +251,14 @@ intel_bufferobj_map(GLcontext * ctx, return NULL; } - dri_bo_map(intel_obj->buffer, GL_TRUE); + if (write_only && intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(intel_obj->buffer); + intel_obj->mapped_gtt = GL_TRUE; + } else { + drm_intel_bo_map(intel_obj->buffer, !read_only); + intel_obj->mapped_gtt = GL_FALSE; + } + obj->Pointer = intel_obj->buffer->virtual; return obj->Pointer; } @@ -235,7 +276,11 @@ intel_bufferobj_unmap(GLcontext * ctx, assert(intel_obj); if (intel_obj->buffer != NULL) { assert(obj->Pointer); - dri_bo_unmap(intel_obj->buffer); + if (intel_obj->mapped_gtt) { + drm_intel_gem_bo_unmap_gtt(intel_obj->buffer); + } else { + drm_intel_bo_unmap(intel_obj->buffer); + } obj->Pointer = NULL; } return GL_TRUE; @@ -254,6 +299,18 @@ intel_bufferobj_buffer(struct intel_context *intel, } } + if (intel_obj->buffer == NULL) { + intel_bufferobj_alloc_buffer(intel, intel_obj); + intel_bufferobj_subdata(&intel->ctx, + GL_ARRAY_BUFFER_ARB, + 0, + intel_obj->Base.Size, + intel_obj->sys_buffer, + &intel_obj->Base); + _mesa_free(intel_obj->sys_buffer); + intel_obj->sys_buffer = NULL; + } + return intel_obj->buffer; } diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.h b/src/mesa/drivers/dri/intel/intel_buffer_objects.h index bf6dbd58f27..04310156319 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.h +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.h @@ -42,10 +42,13 @@ struct intel_buffer_object { struct gl_buffer_object Base; dri_bo *buffer; /* the low-level buffer manager's buffer handle */ + /** System memory buffer data, if not using a BO to store the data. */ + void *sys_buffer; struct intel_region *region; /* Is there a zero-copy texture associated with this (pixel) buffer object? */ + GLboolean mapped_gtt; }; diff --git a/src/mesa/drivers/dri/intel/intel_buffers.c b/src/mesa/drivers/dri/intel/intel_buffers.c index 0929a2c223c..df5c3fc1766 100644 --- a/src/mesa/drivers/dri/intel/intel_buffers.c +++ b/src/mesa/drivers/dri/intel/intel_buffers.c @@ -157,7 +157,7 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) /* Do this here, not core Mesa, since this function is called from * many places within the driver. */ - if (ctx->NewState & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) { + if (ctx->NewState & _NEW_BUFFERS) { /* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */ _mesa_update_framebuffer(ctx); /* this updates the DrawBuffer's Width/Height if it's a FBO */ @@ -202,6 +202,8 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) intel_batchbuffer_flush(intel->batch); intel->front_cliprects = GL_TRUE; colorRegions[0] = intel_get_rb_region(fb, BUFFER_FRONT_LEFT); + + intel->front_buffer_dirty = GL_TRUE; } else { if (!intel->constant_cliprect && intel->front_cliprects) @@ -274,7 +276,7 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) ctx->Driver.Enable(ctx, GL_DEPTH_TEST, (ctx->Depth.Test && fb->Visual.depthBits > 0)); ctx->Driver.Enable(ctx, GL_STENCIL_TEST, - (ctx->Stencil._Enabled && fb->Visual.stencilBits > 0)); + (ctx->Stencil.Enabled && fb->Visual.stencilBits > 0)); } else { /* Mesa's Stencil._Enabled field is updated when @@ -319,6 +321,23 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) static void intelDrawBuffer(GLcontext * ctx, GLenum mode) { + if ((ctx->DrawBuffer != NULL) && (ctx->DrawBuffer->Name == 0)) { + struct intel_context *const intel = intel_context(ctx); + const GLboolean was_front_buffer_rendering = + intel->is_front_buffer_rendering; + + intel->is_front_buffer_rendering = (mode == GL_FRONT_LEFT) + || (mode == GL_FRONT); + + /* If we weren't front-buffer rendering before but we are now, make sure + * that the front-buffer has actually been allocated. + */ + if (!was_front_buffer_rendering && intel->is_front_buffer_rendering) { + intel_update_renderbuffers(intel->driContext, + intel->driContext->driDrawablePriv); + } + } + intel_draw_buffer(ctx, ctx->DrawBuffer); } diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c index 28281b38615..309ac1923bc 100644 --- a/src/mesa/drivers/dri/intel/intel_clear.c +++ b/src/mesa/drivers/dri/intel/intel_clear.c @@ -30,6 +30,7 @@ #include "main/enums.h" #include "main/image.h" #include "main/mtypes.h" +#include "main/arrayobj.h" #include "main/attrib.h" #include "main/blend.h" #include "main/bufferobj.h" @@ -38,6 +39,7 @@ #include "main/enable.h" #include "main/macros.h" #include "main/matrix.h" +#include "main/polygon.h" #include "main/texstate.h" #include "main/shaders.h" #include "main/stencil.h" @@ -51,6 +53,7 @@ #include "intel_clear.h" #include "intel_fbo.h" #include "intel_pixel.h" +#include "intel_regions.h" #define FILE_DEBUG_FLAG DEBUG_BLIT @@ -65,6 +68,45 @@ BUFFER_BIT_COLOR6 | \ BUFFER_BIT_COLOR7) + +/** + * Per-context one-time init of things for intl_clear_tris(). + * Basically set up a private array object for vertex/color arrays. + */ +static void +init_clear(GLcontext *ctx) +{ + struct intel_context *intel = intel_context(ctx); + struct gl_array_object *arraySave = NULL; + const GLuint arrayBuffer = ctx->Array.ArrayBufferObj->Name; + const GLuint elementBuffer = ctx->Array.ElementArrayBufferObj->Name; + + /* create new array object */ + intel->clear.arrayObj = _mesa_new_array_object(ctx, ~0); + + /* save current array object, bind new one */ + _mesa_reference_array_object(ctx, &arraySave, ctx->Array.ArrayObj); + _mesa_reference_array_object(ctx, &ctx->Array.ArrayObj, intel->clear.arrayObj); + + /* one-time setup of vertex arrays (pos, color) */ + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, 0); + _mesa_BindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, 0); + _mesa_ColorPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), intel->clear.color); + _mesa_VertexPointer(3, GL_FLOAT, 3 * sizeof(GLfloat), intel->clear.vertices); + _mesa_Enable(GL_COLOR_ARRAY); + _mesa_Enable(GL_VERTEX_ARRAY); + + /* restore original array object */ + _mesa_reference_array_object(ctx, &ctx->Array.ArrayObj, arraySave); + _mesa_reference_array_object(ctx, &arraySave, NULL); + + /* restore original buffer objects */ + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, arrayBuffer); + _mesa_BindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, elementBuffer); +} + + + /** * Perform glClear where mask contains only color, depth, and/or stencil. * @@ -77,14 +119,16 @@ void intel_clear_tris(GLcontext *ctx, GLbitfield mask) { struct intel_context *intel = intel_context(ctx); - GLfloat vertices[4][3]; - GLfloat color[4][4]; GLfloat dst_z; struct gl_framebuffer *fb = ctx->DrawBuffer; int i; GLboolean saved_fp_enable = GL_FALSE, saved_vp_enable = GL_FALSE; GLuint saved_shader_program = 0; unsigned int saved_active_texture; + struct gl_array_object *arraySave = NULL; + + if (!intel->clear.arrayObj) + init_clear(ctx); assert((mask & ~(TRI_CLEAR_COLOR_BITS | BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL)) == 0); @@ -93,10 +137,10 @@ intel_clear_tris(GLcontext *ctx, GLbitfield mask) GL_CURRENT_BIT | GL_DEPTH_BUFFER_BIT | GL_ENABLE_BIT | + GL_POLYGON_BIT | GL_STENCIL_BUFFER_BIT | GL_TRANSFORM_BIT | GL_CURRENT_BIT); - _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT); saved_active_texture = ctx->Texture.CurrentUnit; /* Disable existing GL state we don't want to apply to a clear. */ @@ -114,6 +158,7 @@ intel_clear_tris(GLcontext *ctx, GLbitfield mask) _mesa_Disable(GL_CLIP_PLANE3); _mesa_Disable(GL_CLIP_PLANE4); _mesa_Disable(GL_CLIP_PLANE5); + _mesa_PolygonMode(GL_FRONT_AND_BACK, GL_FILL); if (ctx->Extensions.ARB_fragment_program && ctx->FragmentProgram.Enabled) { saved_fp_enable = GL_TRUE; _mesa_Disable(GL_FRAGMENT_PROGRAM_ARB); @@ -146,13 +191,14 @@ intel_clear_tris(GLcontext *ctx, GLbitfield mask) } } + /* save current array object, bind our private one */ + _mesa_reference_array_object(ctx, &arraySave, ctx->Array.ArrayObj); + _mesa_reference_array_object(ctx, &ctx->Array.ArrayObj, intel->clear.arrayObj); + intel_meta_set_passthrough_transform(intel); for (i = 0; i < 4; i++) { - color[i][0] = ctx->Color.ClearColor[0]; - color[i][1] = ctx->Color.ClearColor[1]; - color[i][2] = ctx->Color.ClearColor[2]; - color[i][3] = ctx->Color.ClearColor[3]; + COPY_4FV(intel->clear.color[i], ctx->Color.ClearColor); } /* convert clear Z from [0,1] to NDC coord in [-1,1] */ @@ -161,23 +207,18 @@ intel_clear_tris(GLcontext *ctx, GLbitfield mask) /* Prepare the vertices, which are the same regardless of which buffer we're * drawing to. */ - vertices[0][0] = fb->_Xmin; - vertices[0][1] = fb->_Ymin; - vertices[0][2] = dst_z; - vertices[1][0] = fb->_Xmax; - vertices[1][1] = fb->_Ymin; - vertices[1][2] = dst_z; - vertices[2][0] = fb->_Xmax; - vertices[2][1] = fb->_Ymax; - vertices[2][2] = dst_z; - vertices[3][0] = fb->_Xmin; - vertices[3][1] = fb->_Ymax; - vertices[3][2] = dst_z; - - _mesa_ColorPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), &color); - _mesa_VertexPointer(3, GL_FLOAT, 3 * sizeof(GLfloat), &vertices); - _mesa_Enable(GL_COLOR_ARRAY); - _mesa_Enable(GL_VERTEX_ARRAY); + intel->clear.vertices[0][0] = fb->_Xmin; + intel->clear.vertices[0][1] = fb->_Ymin; + intel->clear.vertices[0][2] = dst_z; + intel->clear.vertices[1][0] = fb->_Xmax; + intel->clear.vertices[1][1] = fb->_Ymin; + intel->clear.vertices[1][2] = dst_z; + intel->clear.vertices[2][0] = fb->_Xmax; + intel->clear.vertices[2][1] = fb->_Ymax; + intel->clear.vertices[2][2] = dst_z; + intel->clear.vertices[3][0] = fb->_Xmin; + intel->clear.vertices[3][1] = fb->_Ymax; + intel->clear.vertices[3][2] = dst_z; while (mask != 0) { GLuint this_mask = 0; @@ -215,8 +256,10 @@ intel_clear_tris(GLcontext *ctx, GLbitfield mask) /* Control writing of the stencil clear value to stencil. */ if (this_mask & BUFFER_BIT_STENCIL) { _mesa_Enable(GL_STENCIL_TEST); - _mesa_StencilOp(GL_REPLACE, GL_REPLACE, GL_REPLACE); - _mesa_StencilFuncSeparate(GL_FRONT, GL_ALWAYS, ctx->Stencil.Clear, + _mesa_StencilOpSeparate(GL_FRONT_AND_BACK, + GL_REPLACE, GL_REPLACE, GL_REPLACE); + _mesa_StencilFuncSeparate(GL_FRONT_AND_BACK, GL_ALWAYS, + ctx->Stencil.Clear, ctx->Stencil.WriteMask[0]); } else { _mesa_Disable(GL_STENCIL_TEST); @@ -238,8 +281,11 @@ intel_clear_tris(GLcontext *ctx, GLbitfield mask) if (saved_shader_program) _mesa_UseProgramObjectARB(saved_shader_program); - _mesa_PopClientAttrib(); _mesa_PopAttrib(); + + /* restore current array object */ + _mesa_reference_array_object(ctx, &ctx->Array.ArrayObj, arraySave); + _mesa_reference_array_object(ctx, &arraySave, NULL); } static const char *buffer_names[] = { @@ -267,7 +313,6 @@ static const char *buffer_names[] = { static void intelClear(GLcontext *ctx, GLbitfield mask) { - struct intel_context *intel = intel_context(ctx); const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask); GLbitfield tri_mask = 0; GLbitfield blit_mask = 0; @@ -295,7 +340,7 @@ intelClear(GLcontext *ctx, GLbitfield mask) = intel_get_rb_region(fb, BUFFER_STENCIL); if (stencilRegion) { /* have hw stencil */ - if (IS_965(intel->intelScreen->deviceID) || + if (stencilRegion->tiling == I915_TILING_Y || (ctx->Stencil.WriteMask[0] & 0xff) != 0xff) { /* We have to use the 3D engine if we're clearing a partial mask * of the stencil buffer, or if we're on a 965 which has a tiled @@ -312,9 +357,10 @@ intelClear(GLcontext *ctx, GLbitfield mask) /* HW depth */ if (mask & BUFFER_BIT_DEPTH) { + const struct intel_region *irb = intel_get_rb_region(fb, BUFFER_DEPTH); + /* clear depth with whatever method is used for stencil (see above) */ - if (IS_965(intel->intelScreen->deviceID) || - tri_mask & BUFFER_BIT_STENCIL) + if (irb->tiling == I915_TILING_Y || tri_mask & BUFFER_BIT_STENCIL) tri_mask |= BUFFER_BIT_DEPTH; else blit_mask |= BUFFER_BIT_DEPTH; diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index a664e749360..f88b37d0f3a 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -28,6 +28,7 @@ #include "main/glheader.h" #include "main/context.h" +#include "main/arrayobj.h" #include "main/extensions.h" #include "main/framebuffer.h" #include "main/imports.h" @@ -173,6 +174,24 @@ intelGetString(GLcontext * ctx, GLenum name) } } +static unsigned +intel_bits_per_pixel(const struct intel_renderbuffer *rb) +{ + switch (rb->Base._ActualFormat) { + case GL_RGB5: + case GL_DEPTH_COMPONENT16: + return 16; + case GL_RGB8: + case GL_RGBA8: + case GL_DEPTH_COMPONENT24: + case GL_DEPTH24_STENCIL8_EXT: + case GL_STENCIL_INDEX8_EXT: + return 32; + default: + return 0; + } +} + void intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) { @@ -180,7 +199,7 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) struct intel_renderbuffer *rb; struct intel_region *region, *depth_region; struct intel_context *intel = context->driverPrivate; - __DRIbuffer *buffers; + __DRIbuffer *buffers = NULL; __DRIscreen *screen; int i, count; unsigned int attachments[10]; @@ -192,22 +211,63 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) screen = intel->intelScreen->driScrnPriv; - i = 0; - if (intel_fb->color_rb[0]) - attachments[i++] = __DRI_BUFFER_FRONT_LEFT; - if (intel_fb->color_rb[1]) - attachments[i++] = __DRI_BUFFER_BACK_LEFT; - if (intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH)) - attachments[i++] = __DRI_BUFFER_DEPTH; - if (intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL)) - attachments[i++] = __DRI_BUFFER_STENCIL; - - buffers = (*screen->dri2.loader->getBuffers)(drawable, - &drawable->w, - &drawable->h, - attachments, i, - &count, - drawable->loaderPrivate); + if (screen->dri2.loader + && (screen->dri2.loader->base.version > 2) + && (screen->dri2.loader->getBuffersWithFormat != NULL)) { + struct intel_renderbuffer *depth_rb; + struct intel_renderbuffer *stencil_rb; + + i = 0; + if ((intel->is_front_buffer_rendering || !intel_fb->color_rb[1]) + && intel_fb->color_rb[0]) { + attachments[i++] = __DRI_BUFFER_FRONT_LEFT; + attachments[i++] = intel_bits_per_pixel(intel_fb->color_rb[0]); + } + + if (intel_fb->color_rb[1]) { + attachments[i++] = __DRI_BUFFER_BACK_LEFT; + attachments[i++] = intel_bits_per_pixel(intel_fb->color_rb[1]); + } + + depth_rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH); + stencil_rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL); + + if ((depth_rb != NULL) && (stencil_rb != NULL)) { + attachments[i++] = __DRI_BUFFER_DEPTH_STENCIL; + attachments[i++] = intel_bits_per_pixel(depth_rb); + } else if (depth_rb != NULL) { + attachments[i++] = __DRI_BUFFER_DEPTH; + attachments[i++] = intel_bits_per_pixel(depth_rb); + } else if (stencil_rb != NULL) { + attachments[i++] = __DRI_BUFFER_STENCIL; + attachments[i++] = intel_bits_per_pixel(stencil_rb); + } + + buffers = + (*screen->dri2.loader->getBuffersWithFormat)(drawable, + &drawable->w, + &drawable->h, + attachments, i / 2, + &count, + drawable->loaderPrivate); + } else if (screen->dri2.loader) { + i = 0; + if (intel_fb->color_rb[0]) + attachments[i++] = __DRI_BUFFER_FRONT_LEFT; + if (intel_fb->color_rb[1]) + attachments[i++] = __DRI_BUFFER_BACK_LEFT; + if (intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH)) + attachments[i++] = __DRI_BUFFER_DEPTH; + if (intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL)) + attachments[i++] = __DRI_BUFFER_STENCIL; + + buffers = (*screen->dri2.loader->getBuffers)(drawable, + &drawable->w, + &drawable->h, + attachments, i, + &count, + drawable->loaderPrivate); + } if (buffers == NULL) return; @@ -235,6 +295,11 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) region_name = "dri2 front buffer"; break; + case __DRI_BUFFER_FAKE_FRONT_LEFT: + rb = intel_fb->color_rb[0]; + region_name = "dri2 fake front buffer"; + break; + case __DRI_BUFFER_BACK_LEFT: rb = intel_fb->color_rb[1]; region_name = "dri2 back buffer"; @@ -245,6 +310,11 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) region_name = "dri2 depth buffer"; break; + case __DRI_BUFFER_DEPTH_STENCIL: + rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH); + region_name = "dri2 depth / stencil buffer"; + break; + case __DRI_BUFFER_STENCIL: rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL); region_name = "dri2 stencil buffer"; @@ -291,6 +361,23 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) intel_renderbuffer_set_region(rb, region); intel_region_release(®ion); + + if (buffers[i].attachment == __DRI_BUFFER_DEPTH_STENCIL) { + rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL); + if (rb != NULL) { + struct intel_region *stencil_region = NULL; + + if (rb->region) { + dri_bo_flink(rb->region->buffer, &name); + if (name == buffers[i].name) + continue; + } + + intel_region_reference(&stencil_region, region); + intel_renderbuffer_set_region(rb, stencil_region); + intel_region_release(&stencil_region); + } + } } driUpdateFramebufferSize(&intel->ctx, drawable); @@ -307,7 +394,7 @@ intel_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h) if (!driContext->driScreenPriv->dri2.enabled) return; - if (!intel->internal_viewport_call) { + if (!intel->internal_viewport_call && ctx->DrawBuffer->Name == 0) { intel_update_renderbuffers(driContext, driContext->driDrawablePriv); if (driContext->driDrawablePriv != driContext->driReadablePriv) intel_update_renderbuffers(driContext, driContext->driReadablePriv); @@ -391,6 +478,27 @@ intel_flush(GLcontext *ctx, GLboolean needs_mi_flush) if (intel->batch->map != intel->batch->ptr) intel_batchbuffer_flush(intel->batch); + + if ((ctx->DrawBuffer->Name == 0) && intel->front_buffer_dirty) { + __DRIscreen *const screen = intel->intelScreen->driScrnPriv; + + if (screen->dri2.loader && + (screen->dri2.loader->base.version >= 2) + && (screen->dri2.loader->flushFrontBuffer != NULL)) { + (*screen->dri2.loader->flushFrontBuffer)(intel->driDrawable, + intel->driDrawable->loaderPrivate); + + /* Only clear the dirty bit if front-buffer rendering is no longer + * enabled. This is done so that the dirty bit can only be set in + * glDrawBuffer. Otherwise the dirty bit would have to be set at + * each of N places that do rendering. This has worse performances, + * but it is much easier to get correct. + */ + if (intel->is_front_buffer_rendering) { + intel->front_buffer_dirty = GL_FALSE; + } + } + } } void @@ -442,6 +550,9 @@ intelInitDriverFunctions(struct dd_function_table *functions) functions->CopyConvolutionFilter2D = _swrast_CopyConvolutionFilter2D; intelInitTextureFuncs(functions); + intelInitTextureImageFuncs(functions); + intelInitTextureSubImageFuncs(functions); + intelInitTextureCopyImageFuncs(functions); intelInitStateFuncs(functions); intelInitClearFuncs(functions); intelInitBufferFuncs(functions); @@ -502,8 +613,6 @@ intelInitContext(struct intel_context *intel, } } - ctx->Const.MaxTextureMaxAnisotropy = 2.0; - /* This doesn't yet catch all non-conformant rendering, but it's a * start. */ @@ -544,6 +653,13 @@ intelInitContext(struct intel_context *intel, _mesa_init_point(ctx); ctx->Const.MaxColorAttachments = 4; /* XXX FBO: review this */ + if (IS_965(intelScreen->deviceID)) { + if (MAX_WIDTH > 8192) + ctx->Const.MaxRenderbufferSize = 8192; + } else { + if (MAX_WIDTH > 2048) + ctx->Const.MaxRenderbufferSize = 2048; + } /* Initialize the software rasterizer and helper modules. */ _swrast_CreateContext(ctx); @@ -611,6 +727,9 @@ intelInitContext(struct intel_context *intel, else if (driQueryOptionb(&intel->optionCache, "force_s3tc_enable")) { _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); } + intel->use_texture_tiling = driQueryOptionb(&intel->optionCache, + "texture_tiling"); + intel->use_early_z = driQueryOptionb(&intel->optionCache, "early_z"); intel->prim.primitive = ~0; @@ -650,6 +769,9 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) INTEL_FIREVERTICES(intel); + if (intel->clear.arrayObj) + _mesa_delete_array_object(&intel->ctx, intel->clear.arrayObj); + intel->vtbl.destroy(intel); release_texture_heaps = (intel->ctx.Shared->RefCount == 1); @@ -669,13 +791,64 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) intel->prim.vb_bo = NULL; if (release_texture_heaps) { - /* This share group is about to go away, free our private - * texture object data. + /* Nothing is currently done here to free texture heaps; + * but we're not using the texture heap utilities, so I + * rather think we shouldn't. I've taken a look, and can't + * find any private texture data hanging around anywhere, but + * I'm not yet certain there isn't any at all... */ - if (INTEL_DEBUG & DEBUG_TEXTURE) + /* if (INTEL_DEBUG & DEBUG_TEXTURE) fprintf(stderr, "do something to free texture heaps\n"); + */ } + /* XXX In intelMakeCurrent() below, the context's static regions are + * referenced inside the frame buffer; it's listed as a hack, + * with a comment of "XXX FBO temporary fix-ups!", but + * as long as it's there, we should release the regions here. + * The do/while loop around the block is used to allow the + * "continue" statements inside the block to exit the block, + * to avoid many layers of "if" constructs. + */ + do { + __DRIdrawablePrivate * driDrawPriv = intel->driDrawable; + struct intel_framebuffer *intel_fb; + struct intel_renderbuffer *irbDepth, *irbStencil; + if (!driDrawPriv) { + /* We're already detached from the drawable; exit this block. */ + continue; + } + intel_fb = (struct intel_framebuffer *) driDrawPriv->driverPrivate; + if (!intel_fb) { + /* The frame buffer is already gone; exit this block. */ + continue; + } + irbDepth = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH); + irbStencil = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL); + + /* If the regions of the frame buffer still match the regions + * of the context, release them. If they've changed somehow, + * leave them alone. + */ + if (intel_fb->color_rb[0] && intel_fb->color_rb[0]->region == intel->front_region) { + intel_renderbuffer_set_region(intel_fb->color_rb[0], NULL); + } + if (intel_fb->color_rb[1] && intel_fb->color_rb[1]->region == intel->back_region) { + intel_renderbuffer_set_region(intel_fb->color_rb[1], NULL); + } + + if (irbDepth && irbDepth->region == intel->depth_region) { + intel_renderbuffer_set_region(irbDepth, NULL); + } + /* Usually, the stencil buffer is the same as the depth buffer; + * but they're handled separately in MakeCurrent, so we'll + * handle them separately here. + */ + if (irbStencil && irbStencil->region == intel->depth_region) { + intel_renderbuffer_set_region(irbStencil, NULL); + } + } while (0); + intel_region_release(&intel->front_region); intel_region_release(&intel->back_region); intel_region_release(&intel->depth_region); @@ -684,6 +857,8 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) /* free the Mesa context */ _mesa_free_context_data(&intel->ctx); + + } } @@ -712,7 +887,10 @@ intelMakeCurrent(__DRIcontextPrivate * driContextPriv, if (driDrawPriv != driReadPriv) intel_update_renderbuffers(driContextPriv, driReadPriv); } else { - /* XXX FBO temporary fix-ups! */ + /* XXX FBO temporary fix-ups! These are released in + * intelDextroyContext(), above. Changes here should be + * reflected there. + */ /* if the renderbuffers don't have regions, init them from the context */ struct intel_renderbuffer *irbDepth = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH); diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index d635f3f50dc..7d3c80bb21c 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -48,6 +48,8 @@ #define DV_PF_555 (1<<8) #define DV_PF_565 (2<<8) #define DV_PF_8888 (3<<8) +#define DV_PF_4444 (8<<8) +#define DV_PF_1555 (9<<8) struct intel_region; struct intel_context; @@ -159,12 +161,22 @@ struct intel_context struct { struct gl_fragment_program *bitmap_fp; struct gl_vertex_program *passthrough_vp; + struct gl_buffer_object *texcoord_vbo; struct gl_fragment_program *saved_fp; GLboolean saved_fp_enable; struct gl_vertex_program *saved_vp; GLboolean saved_vp_enable; + struct gl_fragment_program *tex2d_fp; + + GLboolean saved_texcoord_enable; + struct gl_buffer_object *saved_array_vbo, *saved_texcoord_vbo; + GLenum saved_texcoord_type; + GLsizei saved_texcoord_size, saved_texcoord_stride; + const void *saved_texcoord_ptr; + int saved_active_texture; + GLint saved_vp_x, saved_vp_y; GLsizei saved_vp_width, saved_vp_height; GLenum saved_matrix_mode; @@ -213,6 +225,14 @@ struct intel_context GLuint ClearColor565; GLuint ClearColor8888; + /* info for intel_clear_tris() */ + struct + { + struct gl_array_object *arrayObj; + GLfloat vertices[4][3]; + GLfloat color[4][4]; + } clear; + /* Offsets of fields within the current vertex: */ GLuint coloroffset; @@ -262,11 +282,32 @@ struct intel_context * flush time while the lock is held. */ GLboolean constant_cliprect; + /** * In !constant_cliprect mode, set to true if the front cliprects should be * used instead of back. */ GLboolean front_cliprects; + + /** + * Set if rendering has occured to the drawable's front buffer. + * + * This is used in the DRI2 case to detect that glFlush should also copy + * the contents of the fake front buffer to the real front buffer. + */ + GLboolean front_buffer_dirty; + + /** + * Track whether front-buffer rendering is currently enabled + * + * A separate flag is used to track this in order to support MRT more + * easily. + */ + GLboolean is_front_buffer_rendering; + + GLboolean use_texture_tiling; + GLboolean use_early_z; + drm_clip_rect_t fboRect; /**< cliprect for FBO rendering */ int perf_boxes; @@ -319,6 +360,7 @@ extern char *__progname; #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) #define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1)) +#define IS_POWER_OF_TWO(val) (((val) & (val - 1)) == 0) #define INTEL_FIREVERTICES(intel) \ do { \ @@ -520,6 +562,9 @@ void intel_viewport(GLcontext * ctx, GLint x, GLint y, void intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable); +void i915_set_buf_info_for_region(uint32_t *state, struct intel_region *region, + uint32_t buffer_id); + /*====================================================================== * Inline conversion functions. * These are better-typed than the macros used previously: @@ -530,4 +575,10 @@ intel_context(GLcontext * ctx) return (struct intel_context *) ctx; } +static INLINE GLboolean +is_power_of_two(uint32_t value) +{ + return (value & (value - 1)) == 0; +} + #endif diff --git a/src/mesa/drivers/dri/intel/intel_decode.c b/src/mesa/drivers/dri/intel/intel_decode.c index f04638206d5..a9dfe281cbd 100644 --- a/src/mesa/drivers/dri/intel/intel_decode.c +++ b/src/mesa/drivers/dri/intel/intel_decode.c @@ -800,6 +800,7 @@ static int decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures, int i830) { unsigned int len, i, c, opcode, word, map, sampler, instr; + char *format; struct { uint32_t opcode; @@ -1001,6 +1002,35 @@ decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures, int i (*failures)++; } return len; + case 0x85: + len = (data[0] & 0x0000000f) + 2; + + if (len != 2) + fprintf(out, "Bad count in 3DSTATE_DEST_BUFFER_VARIABLES\n"); + if (count < 2) + BUFFER_FAIL(count, len, "3DSTATE_DEST_BUFFER_VARIABLES"); + + instr_out(data, hw_offset, 0, + "3DSTATE_DEST_BUFFER_VARIABLES\n"); + + switch ((data[1] >> 8) & 0xf) { + case 0x0: format = "g8"; break; + case 0x1: format = "x1r5g5b5"; break; + case 0x2: format = "r5g6b5"; break; + case 0x3: format = "a8r8g8b8"; break; + case 0x4: format = "ycrcb_swapy"; break; + case 0x5: format = "ycrcb_normal"; break; + case 0x6: format = "ycrcb_swapuv"; break; + case 0x7: format = "ycrcb_swapuvy"; break; + case 0x8: format = "a4r4g4b4"; break; + case 0x9: format = "a1r5g5b5"; break; + case 0xa: format = "a2r10g10b10"; break; + default: format = "BAD"; break; + } + instr_out(data, hw_offset, 1, "%s format, early Z %sabled\n", + format, + (data[1] & (1 << 31)) ? "en" : "dis"); + return len; } for (opcode = 0; opcode < sizeof(opcodes_3d_1d) / sizeof(opcodes_3d_1d[0]); diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c index 9ec1b4ec2f4..1e8b1878abe 100644 --- a/src/mesa/drivers/dri/intel/intel_extensions.c +++ b/src/mesa/drivers/dri/intel/intel_extensions.c @@ -48,6 +48,7 @@ #define need_GL_EXT_point_parameters #define need_GL_EXT_secondary_color #define need_GL_EXT_stencil_two_side +#define need_GL_APPLE_vertex_array_object #define need_GL_ATI_separate_stencil #define need_GL_ATI_envmap_bumpmap #define need_GL_NV_point_sprite @@ -95,6 +96,7 @@ static const struct dri_extension card_extensions[] = { { "GL_EXT_texture_lod_bias", NULL }, { "GL_3DFX_texture_compression_FXT1", NULL }, { "GL_APPLE_client_storage", NULL }, + { "GL_APPLE_vertex_array_object", GL_APPLE_vertex_array_object_functions}, { "GL_MESA_pack_invert", NULL }, { "GL_MESA_ycbcr_texture", NULL }, { "GL_NV_blend_square", NULL }, diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index a401f730ba2..0ea413aee1d 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -119,6 +119,7 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rb->RedBits = 5; rb->GreenBits = 6; rb->BlueBits = 5; + irb->texformat = &_mesa_texformat_rgb565; cpp = 2; break; case GL_RGB: @@ -132,6 +133,7 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rb->GreenBits = 8; rb->BlueBits = 8; rb->AlphaBits = 0; + irb->texformat = &_mesa_texformat_argb8888; /* XXX: Need xrgb8888 */ cpp = 4; break; case GL_RGBA: @@ -148,6 +150,7 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rb->GreenBits = 8; rb->BlueBits = 8; rb->AlphaBits = 8; + irb->texformat = &_mesa_texformat_argb8888; cpp = 4; break; case GL_STENCIL_INDEX: @@ -160,12 +163,14 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rb->DataType = GL_UNSIGNED_INT_24_8_EXT; rb->StencilBits = 8; cpp = 4; + irb->texformat = &_mesa_texformat_s8_z24; break; case GL_DEPTH_COMPONENT16: rb->_ActualFormat = GL_DEPTH_COMPONENT16; rb->DataType = GL_UNSIGNED_SHORT; rb->DepthBits = 16; cpp = 2; + irb->texformat = &_mesa_texformat_z16; break; case GL_DEPTH_COMPONENT: case GL_DEPTH_COMPONENT24: @@ -174,6 +179,7 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rb->DataType = GL_UNSIGNED_INT_24_8_EXT; rb->DepthBits = 24; cpp = 4; + irb->texformat = &_mesa_texformat_s8_z24; break; case GL_DEPTH_STENCIL_EXT: case GL_DEPTH24_STENCIL8_EXT: @@ -182,6 +188,7 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rb->DepthBits = 24; rb->StencilBits = 8; cpp = 4; + irb->texformat = &_mesa_texformat_s8_z24; break; default: _mesa_problem(ctx, @@ -210,7 +217,8 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, DBG("Allocating %d x %d Intel RBO (pitch %d)\n", width, height, pitch); - irb->region = intel_region_alloc(intel, cpp, width, height, pitch, + irb->region = intel_region_alloc(intel, I915_TILING_NONE, + cpp, width, height, pitch, GL_TRUE); if (!irb->region) return GL_FALSE; /* out of memory? */ @@ -322,6 +330,7 @@ intel_create_renderbuffer(GLenum intFormat) irb->Base.GreenBits = 6; irb->Base.BlueBits = 5; irb->Base.DataType = GL_UNSIGNED_BYTE; + irb->texformat = &_mesa_texformat_rgb565; break; case GL_RGB8: irb->Base._ActualFormat = GL_RGB8; @@ -331,6 +340,7 @@ intel_create_renderbuffer(GLenum intFormat) irb->Base.BlueBits = 8; irb->Base.AlphaBits = 0; irb->Base.DataType = GL_UNSIGNED_BYTE; + irb->texformat = &_mesa_texformat_argb8888; /* XXX: Need xrgb8888 */ break; case GL_RGBA8: irb->Base._ActualFormat = GL_RGBA8; @@ -340,24 +350,28 @@ intel_create_renderbuffer(GLenum intFormat) irb->Base.BlueBits = 8; irb->Base.AlphaBits = 8; irb->Base.DataType = GL_UNSIGNED_BYTE; + irb->texformat = &_mesa_texformat_argb8888; break; case GL_STENCIL_INDEX8_EXT: irb->Base._ActualFormat = GL_STENCIL_INDEX8_EXT; irb->Base._BaseFormat = GL_STENCIL_INDEX; irb->Base.StencilBits = 8; irb->Base.DataType = GL_UNSIGNED_BYTE; + irb->texformat = &_mesa_texformat_s8_z24; break; case GL_DEPTH_COMPONENT16: irb->Base._ActualFormat = GL_DEPTH_COMPONENT16; irb->Base._BaseFormat = GL_DEPTH_COMPONENT; irb->Base.DepthBits = 16; irb->Base.DataType = GL_UNSIGNED_SHORT; + irb->texformat = &_mesa_texformat_z16; break; case GL_DEPTH_COMPONENT24: irb->Base._ActualFormat = GL_DEPTH24_STENCIL8_EXT; irb->Base._BaseFormat = GL_DEPTH_COMPONENT; irb->Base.DepthBits = 24; irb->Base.DataType = GL_UNSIGNED_INT; + irb->texformat = &_mesa_texformat_s8_z24; break; case GL_DEPTH24_STENCIL8_EXT: irb->Base._ActualFormat = GL_DEPTH24_STENCIL8_EXT; @@ -365,6 +379,7 @@ intel_create_renderbuffer(GLenum intFormat) irb->Base.DepthBits = 24; irb->Base.StencilBits = 8; irb->Base.DataType = GL_UNSIGNED_INT_24_8_EXT; + irb->texformat = &_mesa_texformat_s8_z24; break; default: _mesa_problem(NULL, @@ -449,6 +464,8 @@ static GLboolean intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, struct gl_texture_image *texImage) { + irb->texformat = texImage->TexFormat; + if (texImage->TexFormat == &_mesa_texformat_argb8888) { irb->Base._ActualFormat = GL_RGBA8; irb->Base._BaseFormat = GL_RGBA; @@ -458,9 +475,21 @@ intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, else if (texImage->TexFormat == &_mesa_texformat_rgb565) { irb->Base._ActualFormat = GL_RGB5; irb->Base._BaseFormat = GL_RGB; - irb->Base.DataType = GL_UNSIGNED_SHORT; + irb->Base.DataType = GL_UNSIGNED_BYTE; DBG("Render to RGB5 texture OK\n"); } + else if (texImage->TexFormat == &_mesa_texformat_argb1555) { + irb->Base._ActualFormat = GL_RGB5_A1; + irb->Base._BaseFormat = GL_RGBA; + irb->Base.DataType = GL_UNSIGNED_BYTE; + DBG("Render to ARGB1555 texture OK\n"); + } + else if (texImage->TexFormat == &_mesa_texformat_argb4444) { + irb->Base._ActualFormat = GL_RGBA4; + irb->Base._BaseFormat = GL_RGBA; + irb->Base.DataType = GL_UNSIGNED_BYTE; + DBG("Render to ARGB4444 texture OK\n"); + } else if (texImage->TexFormat == &_mesa_texformat_z16) { irb->Base._ActualFormat = GL_DEPTH_COMPONENT16; irb->Base._BaseFormat = GL_DEPTH_COMPONENT; @@ -546,9 +575,10 @@ intel_render_texture(GLcontext * ctx, ASSERT(newImage); - if (newImage->Border != 0) { - /* Fallback on drawing to a texture with a border, which won't have a - * miptree. + intel_image = intel_texture_image(newImage); + if (!intel_image->mt) { + /* Fallback on drawing to a texture that doesn't have a miptree + * (has a border, width/height 0, etc.) */ _mesa_reference_renderbuffer(&att->Renderbuffer, NULL); _mesa_render_texture(ctx, fb, att); @@ -579,7 +609,6 @@ intel_render_texture(GLcontext * ctx, irb->Base.RefCount); /* point the renderbufer's region to the texture image region */ - intel_image = intel_texture_image(newImage); if (irb->region != intel_image->mt->region) { if (irb->region) intel_region_release(&irb->region); @@ -631,11 +660,11 @@ intel_finish_render_texture(GLcontext * ctx, static void intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb) { - struct intel_context *intel = intel_context(ctx); const struct intel_renderbuffer *depthRb = intel_get_renderbuffer(fb, BUFFER_DEPTH); const struct intel_renderbuffer *stencilRb = intel_get_renderbuffer(fb, BUFFER_STENCIL); + int i; if (stencilRb && stencilRb != depthRb) { /* we only support combined depth/stencil buffers, not separate @@ -644,32 +673,26 @@ intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb) fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; } - /* check that texture color buffers are a format we can render into */ - { - const struct gl_texture_format *supportedFormat; - GLuint i; + for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) { + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); - /* The texture format we can render into seems to depend on the - * screen depth. There currently seems to be a problem when - * rendering into a rgb565 texture when the screen is abgr8888. - */ + if (rb == NULL) + continue; + + if (irb == NULL) { + fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; + continue; + } - if (intel->ctx.Visual.rgbBits >= 24) - supportedFormat = &_mesa_texformat_argb8888; - else - supportedFormat = &_mesa_texformat_rgb565; - - for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) { - const struct gl_texture_object *texObj = - fb->Attachment[BUFFER_COLOR0 + i].Texture; - if (texObj) { - const struct gl_texture_image *texImg = - texObj->Image[0][texObj->BaseLevel]; - if (texImg && texImg->TexFormat != supportedFormat) { - fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; - break; - } - } + switch (irb->texformat->MesaFormat) { + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_RGB565: + case MESA_FORMAT_ARGB1555: + case MESA_FORMAT_ARGB4444: + break; + default: + fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; } } } diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h index 7226ee026f6..f0665af482e 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.h +++ b/src/mesa/drivers/dri/intel/intel_fbo.h @@ -61,6 +61,8 @@ struct intel_renderbuffer struct gl_renderbuffer Base; struct intel_region *region; + const struct gl_texture_format *texformat; + GLuint vbl_pending; /**< vblank sequence number of pending flip */ uint8_t *span_cache; diff --git a/src/mesa/drivers/dri/intel/intel_generatemipmap.c b/src/mesa/drivers/dri/intel/intel_generatemipmap.c new file mode 100644 index 00000000000..1060fbd9e59 --- /dev/null +++ b/src/mesa/drivers/dri/intel/intel_generatemipmap.c @@ -0,0 +1,283 @@ +/* + * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <[email protected]> + * + */ + +#include "main/glheader.h" +#include "main/enums.h" +#include "main/image.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "main/bufferobj.h" +#include "main/teximage.h" +#include "main/texenv.h" +#include "main/texobj.h" +#include "main/texstate.h" +#include "main/texparam.h" +#include "main/varray.h" +#include "main/attrib.h" +#include "main/enable.h" +#include "main/buffers.h" +#include "main/fbobject.h" +#include "main/framebuffer.h" +#include "main/renderbuffer.h" +#include "main/depth.h" +#include "main/hash.h" +#include "main/mipmap.h" +#include "main/blend.h" +#include "glapi/dispatch.h" +#include "swrast/swrast.h" + +#include "intel_screen.h" +#include "intel_context.h" +#include "intel_batchbuffer.h" +#include "intel_pixel.h" +#include "intel_tex.h" +#include "intel_mipmap_tree.h" + +static const char *intel_fp_tex2d = + "!!ARBfp1.0\n" + "TEX result.color, fragment.texcoord[0], texture[0], 2D;\n" + "END\n"; + +static GLboolean +intel_generate_mipmap_level(GLcontext *ctx, GLuint tex_name, + int level, int width, int height) +{ + struct intel_context *intel = intel_context(ctx); + GLfloat vertices[4][2]; + GLint status; + + /* Set to source from the previous level */ + _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, level - 1); + _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, level - 1); + + /* Set to draw into the current level */ + _mesa_FramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, + GL_COLOR_ATTACHMENT0_EXT, + GL_TEXTURE_2D, + tex_name, + level); + /* Choose to render to the color attachment. */ + _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0_EXT); + + status = _mesa_CheckFramebufferStatusEXT (GL_FRAMEBUFFER_EXT); + if (status != GL_FRAMEBUFFER_COMPLETE_EXT) + return GL_FALSE; + + intel_meta_set_passthrough_transform(intel); + + /* XXX: Doing it right would involve setting up the transformation to do + * 0-1 mapping or something, and not changing the vertex data. + */ + vertices[0][0] = 0; + vertices[0][1] = 0; + vertices[1][0] = width; + vertices[1][1] = 0; + vertices[2][0] = width; + vertices[2][1] = height; + vertices[3][0] = 0; + vertices[3][1] = height; + + _mesa_VertexPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &vertices); + _mesa_Enable(GL_VERTEX_ARRAY); + intel_meta_set_default_texrect(intel); + + CALL_DrawArrays(ctx->Exec, (GL_TRIANGLE_FAN, 0, 4)); + + intel_meta_restore_texcoords(intel); + intel_meta_restore_transform(intel); + + return GL_TRUE; +} + +static GLboolean +intel_generate_mipmap_2d(GLcontext *ctx, + GLenum target, + struct gl_texture_object *texObj) +{ + struct intel_context *intel = intel_context(ctx); + GLint old_active_texture; + int level, max_levels, start_level, end_level; + GLuint fb_name; + GLboolean success = GL_FALSE; + struct gl_framebuffer *saved_fbo = NULL; + + _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT | + GL_CURRENT_BIT | GL_COLOR_BUFFER_BIT | + GL_DEPTH_BUFFER_BIT); + _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT); + old_active_texture = ctx->Texture.CurrentUnit; + _mesa_reference_framebuffer(&saved_fbo, ctx->DrawBuffer); + + _mesa_Disable(GL_POLYGON_STIPPLE); + _mesa_Disable(GL_DEPTH_TEST); + _mesa_Disable(GL_STENCIL_TEST); + _mesa_ColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + _mesa_DepthMask(GL_FALSE); + + /* Bind the given texture to GL_TEXTURE_2D with linear filtering for our + * minification. + */ + _mesa_ActiveTextureARB(GL_TEXTURE0_ARB); + _mesa_Enable(GL_TEXTURE_2D); + _mesa_BindTexture(GL_TEXTURE_2D, texObj->Name); + _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, + GL_LINEAR_MIPMAP_NEAREST); + _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + + /* Bind the new renderbuffer to the color attachment point. */ + _mesa_GenFramebuffersEXT(1, &fb_name); + _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb_name); + + intel_meta_set_fragment_program(intel, &intel->meta.tex2d_fp, + intel_fp_tex2d); + intel_meta_set_passthrough_vertex_program(intel); + + max_levels = _mesa_max_texture_levels(ctx, texObj->Target); + start_level = texObj->BaseLevel; + end_level = texObj->MaxLevel; + + /* Loop generating level+1 from level. */ + for (level = start_level; level < end_level && level < max_levels - 1; level++) { + const struct gl_texture_image *srcImage; + int width, height; + + srcImage = _mesa_select_tex_image(ctx, texObj, target, level); + if (srcImage->Border != 0) + goto fail; + + width = srcImage->Width / 2; + if (width < 1) + width = 1; + height = srcImage->Height / 2; + if (height < 1) + height = 1; + + if (width == srcImage->Width && + height == srcImage->Height) { + /* Neither _mesa_max_texture_levels nor texObj->MaxLevel are the + * maximum texture level for the object, so break out when we've gone + * over the edge. + */ + break; + } + + /* Make sure that there's space allocated for the target level. + * We could skip this if there's already space allocated and save some + * time. + */ + _mesa_TexImage2D(GL_TEXTURE_2D, level + 1, srcImage->InternalFormat, + width, height, 0, + GL_RGBA, GL_UNSIGNED_INT, NULL); + + if (!intel_generate_mipmap_level(ctx, texObj->Name, level + 1, + width, height)) + goto fail; + } + + success = GL_TRUE; + +fail: + intel_meta_restore_fragment_program(intel); + intel_meta_restore_vertex_program(intel); + + _mesa_DeleteFramebuffersEXT(1, &fb_name); + _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture); + if (saved_fbo) + _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, saved_fbo->Name); + _mesa_reference_framebuffer(&saved_fbo, NULL); + _mesa_PopClientAttrib(); + _mesa_PopAttrib(); + + return success; +} + + +/** + * Generate new mipmap data from BASE+1 to BASE+p (the minimally-sized mipmap + * level). + * + * The texture object's miptree must be mapped. + * + * It would be really nice if this was just called by Mesa whenever mipmaps + * needed to be regenerated, rather than us having to remember to do so in + * each texture image modification path. + * + * This function should also include an accelerated path. + */ +void +intel_generate_mipmap(GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_object *intelObj = intel_texture_object(texObj); + GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; + int face, i; + + /* HW path */ + if (target == GL_TEXTURE_2D && + ctx->Extensions.EXT_framebuffer_object && + ctx->Extensions.ARB_fragment_program && + ctx->Extensions.ARB_vertex_program) { + GLboolean success; + + /* We'll be accessing this texture using GL entrypoints, which should + * be resilient against other access to this texture. + */ + _mesa_unlock_texture(ctx, texObj); + success = intel_generate_mipmap_2d(ctx, target, texObj); + _mesa_lock_texture(ctx, texObj); + + if (success) + return; + } + + /* SW path */ + intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel); + _mesa_generate_mipmap(ctx, target, texObj); + intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel); + + /* Update the level information in our private data in the new images, since + * it didn't get set as part of a normal TexImage path. + */ + for (face = 0; face < nr_faces; face++) { + for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) { + struct intel_texture_image *intelImage; + + intelImage = intel_texture_image(texObj->Image[face][i]); + if (intelImage == NULL) + break; + + intelImage->level = i; + intelImage->face = face; + /* Unreference the miptree to signal that the new Data is a bare + * pointer from mesa. + */ + intel_miptree_release(intel, &intelImage->mt); + } + } +} diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index 6e1e034e53d..0d34f28311e 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -57,14 +57,16 @@ intel_miptree_create_internal(struct intel_context *intel, GLuint last_level, GLuint width0, GLuint height0, - GLuint depth0, GLuint cpp, GLuint compress_byte) + GLuint depth0, GLuint cpp, GLuint compress_byte, + uint32_t tiling) { GLboolean ok; struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1); - DBG("%s target %s format %s level %d..%d\n", __FUNCTION__, + DBG("%s target %s format %s level %d..%d <-- %p\n", __FUNCTION__, _mesa_lookup_enum_by_nr(target), - _mesa_lookup_enum_by_nr(internal_format), first_level, last_level); + _mesa_lookup_enum_by_nr(internal_format), + first_level, last_level, mt); mt->target = target_to_target(target); mt->internal_format = internal_format; @@ -80,15 +82,16 @@ intel_miptree_create_internal(struct intel_context *intel, #ifdef I915 if (IS_945(intel->intelScreen->deviceID)) - ok = i945_miptree_layout(intel, mt); + ok = i945_miptree_layout(intel, mt, tiling); else - ok = i915_miptree_layout(intel, mt); + ok = i915_miptree_layout(intel, mt, tiling); #else - ok = brw_miptree_layout(intel, mt); + ok = brw_miptree_layout(intel, mt, tiling); #endif if (!ok) { free(mt); + DBG("%s not okay - returning NULL\n", __FUNCTION__); return NULL; } @@ -107,10 +110,18 @@ intel_miptree_create(struct intel_context *intel, GLboolean expect_accelerated_upload) { struct intel_mipmap_tree *mt; + uint32_t tiling; + + if (intel->use_texture_tiling && compress_byte == 0 && + intel->intelScreen->kernel_exec_fencing) + tiling = I915_TILING_X; + else + tiling = I915_TILING_NONE; mt = intel_miptree_create_internal(intel, target, internal_format, first_level, last_level, width0, - height0, depth0, cpp, compress_byte); + height0, depth0, cpp, compress_byte, + tiling); /* * pitch == 0 || height == 0 indicates the null texture */ @@ -118,6 +129,7 @@ intel_miptree_create(struct intel_context *intel, return NULL; mt->region = intel_region_alloc(intel, + tiling, mt->cpp, mt->pitch, mt->total_height, @@ -147,7 +159,8 @@ intel_miptree_create_for_region(struct intel_context *intel, mt = intel_miptree_create_internal(intel, target, internal_format, first_level, last_level, region->width, region->height, 1, - region->cpp, compress_byte); + region->cpp, compress_byte, + I915_TILING_NONE); if (!mt) return mt; #if 0 @@ -185,6 +198,7 @@ intel_miptree_create_for_region(struct intel_context *intel, int intel_miptree_pitch_align (struct intel_context *intel, struct intel_mipmap_tree *mt, + uint32_t tiling, int pitch) { #ifdef I915 @@ -205,6 +219,11 @@ int intel_miptree_pitch_align (struct intel_context *intel, pitch_align = 4; } + if (tiling == I915_TILING_X) + pitch_align = 512; + else if (tiling == I915_TILING_Y) + pitch_align = 128; + pitch = ALIGN(pitch * mt->cpp, pitch_align); #ifdef I915 diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h index 4060b9df78f..3af9966827f 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h @@ -148,6 +148,7 @@ intel_miptree_create_for_region(struct intel_context *intel, int intel_miptree_pitch_align (struct intel_context *intel, struct intel_mipmap_tree *mt, + uint32_t tiling, int pitch); void intel_miptree_reference(struct intel_mipmap_tree **dst, @@ -218,10 +219,13 @@ void intel_miptree_image_copy(struct intel_context *intel, /* i915_mipmap_tree.c: */ GLboolean i915_miptree_layout(struct intel_context *intel, - struct intel_mipmap_tree *mt); + struct intel_mipmap_tree *mt, + uint32_t tiling); GLboolean i945_miptree_layout(struct intel_context *intel, - struct intel_mipmap_tree *mt); + struct intel_mipmap_tree *mt, + uint32_t tiling); GLboolean brw_miptree_layout(struct intel_context *intel, - struct intel_mipmap_tree *mt); + struct intel_mipmap_tree *mt, + uint32_t tiling); #endif diff --git a/src/mesa/drivers/dri/intel/intel_pixel.c b/src/mesa/drivers/dri/intel/intel_pixel.c index fc0ac0b79c0..36a684b3b85 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel.c +++ b/src/mesa/drivers/dri/intel/intel_pixel.c @@ -27,9 +27,12 @@ #include "main/enums.h" #include "main/state.h" +#include "main/bufferobj.h" #include "main/context.h" #include "main/enable.h" #include "main/matrix.h" +#include "main/texstate.h" +#include "main/varray.h" #include "main/viewport.h" #include "swrast/swrast.h" #include "shader/arbprogram.h" @@ -334,6 +337,85 @@ intel_meta_restore_fragment_program(struct intel_context *intel) _mesa_Disable(GL_FRAGMENT_PROGRAM_ARB); } +static const float default_texcoords[4][2] = { { 0.0, 0.0 }, + { 1.0, 0.0 }, + { 1.0, 1.0 }, + { 0.0, 1.0 } }; + +void +intel_meta_set_default_texrect(struct intel_context *intel) +{ + GLcontext *ctx = &intel->ctx; + struct gl_client_array *old_texcoord_array; + + intel->meta.saved_active_texture = ctx->Texture.CurrentUnit; + if (intel->meta.saved_array_vbo == NULL) { + _mesa_reference_buffer_object(ctx, &intel->meta.saved_array_vbo, + ctx->Array.ArrayBufferObj); + } + + old_texcoord_array = &ctx->Array.ArrayObj->TexCoord[0]; + intel->meta.saved_texcoord_type = old_texcoord_array->Type; + intel->meta.saved_texcoord_size = old_texcoord_array->Size; + intel->meta.saved_texcoord_stride = old_texcoord_array->Stride; + intel->meta.saved_texcoord_enable = old_texcoord_array->Enabled; + intel->meta.saved_texcoord_ptr = old_texcoord_array->Ptr; + _mesa_reference_buffer_object(ctx, &intel->meta.saved_texcoord_vbo, + old_texcoord_array->BufferObj); + + _mesa_ClientActiveTextureARB(GL_TEXTURE0); + + if (intel->meta.texcoord_vbo == NULL) { + GLuint vbo_name; + + _mesa_GenBuffersARB(1, &vbo_name); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, vbo_name); + _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(default_texcoords), + default_texcoords, GL_STATIC_DRAW_ARB); + _mesa_reference_buffer_object(ctx, &intel->meta.texcoord_vbo, + ctx->Array.ArrayBufferObj); + } else { + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, + intel->meta.texcoord_vbo->Name); + } + _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), NULL); + + _mesa_Enable(GL_TEXTURE_COORD_ARRAY); +} + +void +intel_meta_restore_texcoords(struct intel_context *intel) +{ + GLcontext *ctx = &intel->ctx; + + /* Restore the old TexCoordPointer */ + if (intel->meta.saved_texcoord_vbo) { + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, + intel->meta.saved_texcoord_vbo->Name); + _mesa_reference_buffer_object(ctx, &intel->meta.saved_texcoord_vbo, NULL); + } else { + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, 0); + } + + _mesa_TexCoordPointer(intel->meta.saved_texcoord_size, + intel->meta.saved_texcoord_type, + intel->meta.saved_texcoord_stride, + intel->meta.saved_texcoord_ptr); + if (!intel->meta.saved_texcoord_enable) + _mesa_Disable(GL_TEXTURE_COORD_ARRAY); + + _mesa_ClientActiveTextureARB(GL_TEXTURE0 + + intel->meta.saved_active_texture); + + if (intel->meta.saved_array_vbo) { + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, + intel->meta.saved_array_vbo->Name); + _mesa_reference_buffer_object(ctx, &intel->meta.saved_array_vbo, NULL); + } else { + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, 0); + } +} + void intelInitPixelFuncs(struct dd_function_table *functions) { @@ -355,5 +437,7 @@ intel_free_pixel_state(struct intel_context *intel) _mesa_reference_vertprog(ctx, &intel->meta.passthrough_vp, NULL); _mesa_reference_fragprog(ctx, &intel->meta.bitmap_fp, NULL); + _mesa_reference_fragprog(ctx, &intel->meta.tex2d_fp, NULL); + _mesa_reference_buffer_object(ctx, &intel->meta.texcoord_vbo, NULL); } diff --git a/src/mesa/drivers/dri/intel/intel_pixel.h b/src/mesa/drivers/dri/intel/intel_pixel.h index cb41fa182cb..6acf0813c8c 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel.h +++ b/src/mesa/drivers/dri/intel/intel_pixel.h @@ -40,6 +40,9 @@ void intel_meta_set_fragment_program(struct intel_context *intel, const char *prog_string); void intel_meta_restore_fragment_program(struct intel_context *intel); void intel_free_pixel_state(struct intel_context *intel); +void intel_meta_set_default_texrect(struct intel_context *intel); +void intel_meta_set_default_texrect(struct intel_context *intel); +void intel_meta_restore_texcoords(struct intel_context *intel); GLboolean intel_check_blit_fragment_ops(GLcontext * ctx, GLboolean src_alpha_is_one); diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c index 1db7f5594e9..80d3239189c 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c @@ -360,7 +360,6 @@ intel_texture_bitmap(GLcontext * ctx, "END\n"; GLuint texname; GLfloat vertices[4][4]; - GLfloat texcoords[4][2]; GLint old_active_texture; GLubyte *unpacked_bitmap; GLubyte *a8_bitmap; @@ -401,6 +400,14 @@ intel_texture_bitmap(GLcontext * ctx, return GL_FALSE; } + if (!ctx->Extensions.ARB_texture_non_power_of_two && + (!is_power_of_two(width) || !is_power_of_two(height))) { + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, + "glBitmap() fallback: NPOT texture\n"); + return GL_FALSE; + } + /* Check that we can load in a texture this big. */ if (width > (1 << (ctx->Const.MaxTextureLevels - 1)) || height > (1 << (ctx->Const.MaxTextureLevels - 1))) { @@ -485,22 +492,12 @@ intel_texture_bitmap(GLcontext * ctx, vertices[3][2] = dst_z; vertices[3][3] = 1.0; - texcoords[0][0] = 0.0; - texcoords[0][1] = 0.0; - texcoords[1][0] = 1.0; - texcoords[1][1] = 0.0; - texcoords[2][0] = 1.0; - texcoords[2][1] = 1.0; - texcoords[3][0] = 0.0; - texcoords[3][1] = 1.0; - _mesa_VertexPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), &vertices); - _mesa_ClientActiveTextureARB(GL_TEXTURE0); - _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords); _mesa_Enable(GL_VERTEX_ARRAY); - _mesa_Enable(GL_TEXTURE_COORD_ARRAY); + intel_meta_set_default_texrect(intel); CALL_DrawArrays(ctx->Exec, (GL_TRIANGLE_FAN, 0, 4)); + intel_meta_restore_texcoords(intel); intel_meta_restore_transform(intel); intel_meta_restore_fragment_program(intel); intel_meta_restore_vertex_program(intel); diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c index e8d5ac8569d..46d27f1a93a 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_draw.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c @@ -70,7 +70,6 @@ intel_texture_drawpixels(GLcontext * ctx, struct intel_context *intel = intel_context(ctx); GLuint texname; GLfloat vertices[4][4]; - GLfloat texcoords[4][2]; GLfloat z; GLint old_active_texture; GLenum internalFormat; @@ -97,7 +96,7 @@ intel_texture_drawpixels(GLcontext * ctx, /* We don't have a way to generate fragments with stencil values which * will set the resulting stencil value. */ - if (format == GL_STENCIL_INDEX) + if (format == GL_STENCIL_INDEX || format == GL_DEPTH_STENCIL) return GL_FALSE; /* Check that we can load in a texture this big. */ @@ -120,6 +119,14 @@ intel_texture_drawpixels(GLcontext * ctx, return GL_FALSE; } + if (!ctx->Extensions.ARB_texture_non_power_of_two && + (!is_power_of_two(width) || !is_power_of_two(height))) { + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, + "glDrawPixels() fallback: NPOT texture\n"); + return GL_FALSE; + } + _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT | GL_CURRENT_BIT); _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT); @@ -169,22 +176,13 @@ intel_texture_drawpixels(GLcontext * ctx, vertices[3][2] = z; vertices[3][3] = 1.0; - texcoords[0][0] = 0.0; - texcoords[0][1] = 0.0; - texcoords[1][0] = 1.0; - texcoords[1][1] = 0.0; - texcoords[2][0] = 1.0; - texcoords[2][1] = 1.0; - texcoords[3][0] = 0.0; - texcoords[3][1] = 1.0; - _mesa_VertexPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), &vertices); - _mesa_ClientActiveTextureARB(GL_TEXTURE0); - _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords); _mesa_Enable(GL_VERTEX_ARRAY); - _mesa_Enable(GL_TEXTURE_COORD_ARRAY); + intel_meta_set_default_texrect(intel); + CALL_DrawArrays(ctx->Exec, (GL_TRIANGLE_FAN, 0, 4)); + intel_meta_restore_texcoords(intel); intel_meta_restore_transform(intel); _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture); @@ -208,7 +206,6 @@ intel_stencil_drawpixels(GLcontext * ctx, struct intel_context *intel = intel_context(ctx); GLuint texname, rb_name, fb_name, old_fb_name; GLfloat vertices[4][2]; - GLfloat texcoords[4][2]; struct intel_renderbuffer *irb; struct intel_renderbuffer *depth_irb; struct gl_renderbuffer *rb; @@ -273,6 +270,14 @@ intel_stencil_drawpixels(GLcontext * ctx, return GL_FALSE; } + if (!ctx->Extensions.ARB_texture_non_power_of_two && + (!is_power_of_two(width) || !is_power_of_two(height))) { + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, + "glDrawPixels(GL_STENCIL_INDEX) fallback: NPOT texture\n"); + return GL_FALSE; + } + _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT | GL_CURRENT_BIT | GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT); @@ -343,7 +348,6 @@ intel_stencil_drawpixels(GLcontext * ctx, _mesa_free(stencil_pixels); intel_meta_set_passthrough_transform(intel); - vertices[0][0] = x; vertices[0][1] = y; vertices[1][0] = x + width * ctx->Pixel.ZoomX; @@ -353,22 +357,13 @@ intel_stencil_drawpixels(GLcontext * ctx, vertices[3][0] = x; vertices[3][1] = y + height * ctx->Pixel.ZoomY; - texcoords[0][0] = 0.0; - texcoords[0][1] = 0.0; - texcoords[1][0] = 1.0; - texcoords[1][1] = 0.0; - texcoords[2][0] = 1.0; - texcoords[2][1] = 1.0; - texcoords[3][0] = 0.0; - texcoords[3][1] = 1.0; - _mesa_VertexPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &vertices); - _mesa_ClientActiveTextureARB(GL_TEXTURE0); - _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords); _mesa_Enable(GL_VERTEX_ARRAY); - _mesa_Enable(GL_TEXTURE_COORD_ARRAY); + intel_meta_set_default_texrect(intel); + CALL_DrawArrays(ctx->Exec, (GL_TRIANGLE_FAN, 0, 4)); + intel_meta_restore_texcoords(intel); intel_meta_restore_transform(intel); _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture); diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h index 57ac8f0cc14..d19f1bae34c 100644 --- a/src/mesa/drivers/dri/intel/intel_reg.h +++ b/src/mesa/drivers/dri/intel/intel_reg.h @@ -189,6 +189,19 @@ #define S7_DEPTH_OFFSET_CONST_MASK ~0 +/* p143 */ +#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) +/* Dword 1 */ +#define BUF_3D_ID_COLOR_BACK (0x3<<24) +#define BUF_3D_ID_DEPTH (0x7<<24) +#define BUF_3D_USE_FENCE (1<<23) +#define BUF_3D_TILED_SURFACE (1<<22) +#define BUF_3D_TILE_WALK_X 0 +#define BUF_3D_TILE_WALK_Y (1<<21) +#define BUF_3D_PITCH(x) (((x)/4)<<2) +/* Dword 2 */ +#define BUF_3D_ADDR(x) ((x) & ~0x3) + /* Primitive dispatch on 830-945 */ #define _3DPRIMITIVE (CMD_3D | (0x1f << 24)) #define PRIM_INDIRECT (1<<23) diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index 0aa5b8c02c9..49bcb3c1ddd 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -52,17 +52,74 @@ #define FILE_DEBUG_FLAG DEBUG_REGION +/* This should be set to the maximum backtrace size desired. + * Set it to 0 to disable backtrace debugging. + */ +#define DEBUG_BACKTRACE_SIZE 0 + +#if DEBUG_BACKTRACE_SIZE == 0 +/* Use the standard debug output */ +#define _DBG(...) DBG(__VA_ARGS__) +#else +/* Use backtracing debug output */ +#define _DBG(...) {debug_backtrace(); DBG(__VA_ARGS__);} + +/* Backtracing debug support */ +#include <execinfo.h> + +static void +debug_backtrace(void) +{ + void *trace[DEBUG_BACKTRACE_SIZE]; + char **strings = NULL; + int traceSize; + register int i; + + traceSize = backtrace(trace, DEBUG_BACKTRACE_SIZE); + strings = backtrace_symbols(trace, traceSize); + if (strings == NULL) { + DBG("no backtrace:"); + return; + } + + /* Spit out all the strings with a colon separator. Ignore + * the first, since we don't really care about the call + * to debug_backtrace() itself. Skip until the final "/" in + * the trace to avoid really long lines. + */ + for (i = 1; i < traceSize; i++) { + char *p = strings[i], *slash = strings[i]; + while (*p) { + if (*p++ == '/') { + slash = p; + } + } + + DBG("%s:", slash); + } + + /* Free up the memory, and we're done */ + free(strings); +} + +#endif + + + /* XXX: Thread safety? */ GLubyte * intel_region_map(struct intel_context *intel, struct intel_region *region) { - DBG("%s\n", __FUNCTION__); + _DBG("%s %p\n", __FUNCTION__, region); if (!region->map_refcount++) { if (region->pbo) intel_region_cow(intel, region); - dri_bo_map(region->buffer, GL_TRUE); + if (intel->intelScreen->kernel_exec_fencing) + drm_intel_gem_bo_map_gtt(region->buffer); + else + dri_bo_map(region->buffer, GL_TRUE); region->map = region->buffer->virtual; } @@ -72,9 +129,12 @@ intel_region_map(struct intel_context *intel, struct intel_region *region) void intel_region_unmap(struct intel_context *intel, struct intel_region *region) { - DBG("%s\n", __FUNCTION__); + _DBG("%s %p\n", __FUNCTION__, region); if (!--region->map_refcount) { - dri_bo_unmap(region->buffer); + if (intel->intelScreen->kernel_exec_fencing) + drm_intel_gem_bo_unmap_gtt(region->buffer); + else + dri_bo_unmap(region->buffer); region->map = NULL; } } @@ -87,10 +147,10 @@ intel_region_alloc_internal(struct intel_context *intel, { struct intel_region *region; - DBG("%s\n", __FUNCTION__); - - if (buffer == NULL) + if (buffer == NULL) { + _DBG("%s <-- NULL\n", __FUNCTION__); return NULL; + } region = calloc(sizeof(*region), 1); region->cpp = cpp; @@ -104,15 +164,18 @@ intel_region_alloc_internal(struct intel_context *intel, region->tiling = I915_TILING_NONE; region->bit_6_swizzle = I915_BIT_6_SWIZZLE_NONE; + _DBG("%s <-- %p\n", __FUNCTION__, region); return region; } struct intel_region * intel_region_alloc(struct intel_context *intel, + uint32_t tiling, GLuint cpp, GLuint width, GLuint height, GLuint pitch, GLboolean expect_accelerated_upload) { dri_bo *buffer; + struct intel_region *region; if (expect_accelerated_upload) { buffer = drm_intel_bo_alloc_for_render(intel->bufmgr, "region", @@ -122,7 +185,16 @@ intel_region_alloc(struct intel_context *intel, pitch * cpp * height, 64); } - return intel_region_alloc_internal(intel, cpp, width, height, pitch, buffer); + region = intel_region_alloc_internal(intel, cpp, width, height, + pitch, buffer); + + if (tiling != I915_TILING_NONE) { + assert(((pitch * cpp) & 511) == 0); + drm_intel_bo_set_tiling(buffer, &tiling, pitch * cpp); + drm_intel_bo_get_tiling(buffer, ®ion->tiling, ®ion->bit_6_swizzle); + } + + return region; } struct intel_region * @@ -158,7 +230,7 @@ void intel_region_reference(struct intel_region **dst, struct intel_region *src) { if (src) - DBG("%s %p %d\n", __FUNCTION__, src, src->refcount); + _DBG("%s %p %d\n", __FUNCTION__, src, src->refcount); assert(*dst == NULL); if (src) { @@ -172,10 +244,12 @@ intel_region_release(struct intel_region **region_handle) { struct intel_region *region = *region_handle; - if (region == NULL) + if (region == NULL) { + _DBG("%s NULL\n", __FUNCTION__); return; + } - DBG("%s %p %d\n", __FUNCTION__, region, region->refcount - 1); + _DBG("%s %p %d\n", __FUNCTION__, region, region->refcount - 1); ASSERT(region->refcount > 0); region->refcount--; @@ -251,7 +325,7 @@ intel_region_data(struct intel_context *intel, { GLboolean locked = GL_FALSE; - DBG("%s\n", __FUNCTION__); + _DBG("%s\n", __FUNCTION__); if (intel == NULL) return; @@ -293,7 +367,7 @@ intel_region_copy(struct intel_context *intel, GLuint src_offset, GLuint srcx, GLuint srcy, GLuint width, GLuint height) { - DBG("%s\n", __FUNCTION__); + _DBG("%s\n", __FUNCTION__); if (intel == NULL) return; @@ -326,7 +400,7 @@ intel_region_fill(struct intel_context *intel, GLuint dstx, GLuint dsty, GLuint width, GLuint height, GLuint color) { - DBG("%s\n", __FUNCTION__); + _DBG("%s\n", __FUNCTION__); if (intel == NULL) return; @@ -356,6 +430,8 @@ intel_region_attach_pbo(struct intel_context *intel, if (region->pbo == pbo) return; + _DBG("%s %p %p\n", __FUNCTION__, region, pbo); + /* If there is already a pbo attached, break the cow tie now. * Don't call intel_region_release_pbo() as that would * unnecessarily allocate a new buffer we would have to immediately @@ -385,6 +461,7 @@ void intel_region_release_pbo(struct intel_context *intel, struct intel_region *region) { + _DBG("%s %p\n", __FUNCTION__, region); assert(region->buffer == region->pbo->buffer); region->pbo->region = NULL; region->pbo = NULL; @@ -412,7 +489,7 @@ intel_region_cow(struct intel_context *intel, struct intel_region *region) assert(region->cpp * region->pitch * region->height == pbo->Base.Size); - DBG("%s (%d bytes)\n", __FUNCTION__, pbo->Base.Size); + _DBG("%s %p (%d bytes)\n", __FUNCTION__, region, pbo->Base.Size); /* Now blit from the texture buffer to the new buffer: */ @@ -459,6 +536,10 @@ intel_recreate_static(struct intel_context *intel, if (region == NULL) { region = calloc(sizeof(*region), 1); region->refcount = 1; + _DBG("%s creating new region %p\n", __FUNCTION__, region); + } + else { + _DBG("%s %p\n", __FUNCTION__, region); } if (intel->ctx.Visual.rgbBits == 24) diff --git a/src/mesa/drivers/dri/intel/intel_regions.h b/src/mesa/drivers/dri/intel/intel_regions.h index 45e2bf4e77a..bd3c8e7325b 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.h +++ b/src/mesa/drivers/dri/intel/intel_regions.h @@ -73,7 +73,8 @@ struct intel_region * copied by calling intel_reference_region(). */ struct intel_region *intel_region_alloc(struct intel_context *intel, - GLuint cpp, GLuint width, + uint32_t tiling, + GLuint cpp, GLuint width, GLuint height, GLuint pitch, GLboolean expect_accelerated_upload); diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 65e62947ef6..8da96ede644 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -49,6 +49,10 @@ #include "i915_drm.h" #include "i830_dri.h" +#define DRI_CONF_TEXTURE_TILING(def) \ + DRI_CONF_OPT_BEGIN(texture_tiling, bool, def) \ + DRI_CONF_DESC(en, "Enable texture tiling") \ + DRI_CONF_OPT_END \ PUBLIC const char __driConfigOptions[] = DRI_CONF_BEGIN @@ -64,6 +68,17 @@ PUBLIC const char __driConfigOptions[] = DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects") DRI_CONF_DESC_END DRI_CONF_OPT_END + +#ifdef I915 + DRI_CONF_TEXTURE_TILING(false) +#else + DRI_CONF_TEXTURE_TILING(true) +#endif + + DRI_CONF_OPT_BEGIN(early_z, bool, false) + DRI_CONF_DESC(en, "Enable early Z in classic mode (unstable, 945-only).") + DRI_CONF_OPT_END + DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY DRI_CONF_FORCE_S3TC_ENABLE(false) @@ -76,7 +91,7 @@ PUBLIC const char __driConfigOptions[] = DRI_CONF_SECTION_END DRI_CONF_END; -const GLuint __driNConfigOptions = 8; +const GLuint __driNConfigOptions = 10; #ifdef USE_NEW_INTERFACE static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; @@ -236,7 +251,7 @@ intel_get_param(__DRIscreenPrivate *psp, int param, int *value) ret = drmCommandWriteRead(psp->fd, DRM_I915_GETPARAM, &gp, sizeof(gp)); if (ret) { - fprintf(stderr, "drm_i915_getparam: %d\n", ret); + _mesa_warning(NULL, "drm_i915_getparam: %d", ret); return GL_FALSE; } diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index c3a873f1abd..34b78ebc1ab 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -29,6 +29,7 @@ #include "main/macros.h" #include "main/mtypes.h" #include "main/colormac.h" +#include "main/texformat.h" #include "intel_buffers.h" #include "intel_fbo.h" @@ -313,6 +314,22 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define INTEL_TAG(x) x##_RGB565 #include "intel_spantmp.h" +/* a4r4g4b4 color span and pixel functions */ +#define INTEL_PIXEL_FMT GL_BGRA +#define INTEL_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV +#define INTEL_READ_VALUE(offset) pread_16(irb, offset) +#define INTEL_WRITE_VALUE(offset, v) pwrite_16(irb, offset, v) +#define INTEL_TAG(x) x##_ARGB4444 +#include "intel_spantmp.h" + +/* a1r5g5b5 color span and pixel functions */ +#define INTEL_PIXEL_FMT GL_BGRA +#define INTEL_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV +#define INTEL_READ_VALUE(offset) pread_16(irb, offset) +#define INTEL_WRITE_VALUE(offset, v) pwrite_16(irb, offset, v) +#define INTEL_TAG(x) x##_ARGB1555 +#include "intel_spantmp.h" + /* a8r8g8b8 color span and pixel functions */ #define INTEL_PIXEL_FMT GL_BGRA #define INTEL_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV @@ -561,8 +578,8 @@ intel_set_span_functions(struct intel_context *intel, else tiling = I915_TILING_NONE; - if (rb->_ActualFormat == GL_RGB5) { - /* 565 RGB */ + switch (irb->texformat->MesaFormat) { + case MESA_FORMAT_RGB565: switch (tiling) { case I915_TILING_NONE: default: @@ -575,38 +592,67 @@ intel_set_span_functions(struct intel_context *intel, intel_YTile_InitPointers_RGB565(rb); break; } - } - else if (rb->_ActualFormat == GL_RGB8) { - /* 8888 RGBx */ + break; + case MESA_FORMAT_ARGB4444: switch (tiling) { case I915_TILING_NONE: default: - intelInitPointers_xRGB8888(rb); + intelInitPointers_ARGB4444(rb); break; case I915_TILING_X: - intel_XTile_InitPointers_xRGB8888(rb); + intel_XTile_InitPointers_ARGB4444(rb); break; case I915_TILING_Y: - intel_YTile_InitPointers_xRGB8888(rb); + intel_YTile_InitPointers_ARGB4444(rb); break; } - } - else if (rb->_ActualFormat == GL_RGBA8) { - /* 8888 RGBA */ + break; + case MESA_FORMAT_ARGB1555: switch (tiling) { case I915_TILING_NONE: default: - intelInitPointers_ARGB8888(rb); + intelInitPointers_ARGB1555(rb); break; case I915_TILING_X: - intel_XTile_InitPointers_ARGB8888(rb); + intel_XTile_InitPointers_ARGB1555(rb); break; case I915_TILING_Y: - intel_YTile_InitPointers_ARGB8888(rb); + intel_YTile_InitPointers_ARGB1555(rb); break; } - } - else if (rb->_ActualFormat == GL_DEPTH_COMPONENT16) { + break; + case MESA_FORMAT_ARGB8888: + if (rb->AlphaBits == 0) { /* XXX: Need xRGB8888 Mesa format */ + /* 8888 RGBx */ + switch (tiling) { + case I915_TILING_NONE: + default: + intelInitPointers_xRGB8888(rb); + break; + case I915_TILING_X: + intel_XTile_InitPointers_xRGB8888(rb); + break; + case I915_TILING_Y: + intel_YTile_InitPointers_xRGB8888(rb); + break; + } + } else { + /* 8888 RGBA */ + switch (tiling) { + case I915_TILING_NONE: + default: + intelInitPointers_ARGB8888(rb); + break; + case I915_TILING_X: + intel_XTile_InitPointers_ARGB8888(rb); + break; + case I915_TILING_Y: + intel_YTile_InitPointers_ARGB8888(rb); + break; + } + } + break; + case MESA_FORMAT_Z16: switch (tiling) { case I915_TILING_NONE: default: @@ -619,51 +665,57 @@ intel_set_span_functions(struct intel_context *intel, intel_YTile_InitDepthPointers_z16(rb); break; } - } - else if (rb->_ActualFormat == GL_DEPTH_COMPONENT24) { - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitDepthPointers_z24(rb); - break; - case I915_TILING_X: - intel_XTile_InitDepthPointers_z24(rb); - break; - case I915_TILING_Y: - intel_YTile_InitDepthPointers_z24(rb); - break; - } - } - else if (rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT) { - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitDepthPointers_z24_s8(rb); - break; - case I915_TILING_X: - intel_XTile_InitDepthPointers_z24_s8(rb); - break; - case I915_TILING_Y: - intel_YTile_InitDepthPointers_z24_s8(rb); - break; - } - } - else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) { - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitStencilPointers_z24_s8(rb); - break; - case I915_TILING_X: - intel_XTile_InitStencilPointers_z24_s8(rb); - break; - case I915_TILING_Y: - intel_YTile_InitStencilPointers_z24_s8(rb); - break; + break; + case MESA_FORMAT_S8_Z24: + /* There are a few different ways SW asks us to access the S8Z24 data: + * Z24 depth-only depth reads + * S8Z24 depth reads + * S8Z24 stencil reads. + */ + if (rb->_ActualFormat == GL_DEPTH_COMPONENT24) { + switch (tiling) { + case I915_TILING_NONE: + default: + intelInitDepthPointers_z24(rb); + break; + case I915_TILING_X: + intel_XTile_InitDepthPointers_z24(rb); + break; + case I915_TILING_Y: + intel_YTile_InitDepthPointers_z24(rb); + break; + } + } else if (rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT) { + switch (tiling) { + case I915_TILING_NONE: + default: + intelInitDepthPointers_z24_s8(rb); + break; + case I915_TILING_X: + intel_XTile_InitDepthPointers_z24_s8(rb); + break; + case I915_TILING_Y: + intel_YTile_InitDepthPointers_z24_s8(rb); + break; + } + } else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) { + switch (tiling) { + case I915_TILING_NONE: + default: + intelInitStencilPointers_z24_s8(rb); + break; + case I915_TILING_X: + intel_XTile_InitStencilPointers_z24_s8(rb); + break; + case I915_TILING_Y: + intel_YTile_InitStencilPointers_z24_s8(rb); + break; + } } - } - else { + break; + default: _mesa_problem(NULL, - "Unexpected _ActualFormat in intelSetSpanFunctions"); + "Unexpected MesaFormat in intelSetSpanFunctions"); + break; } } diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c index ae0994b183a..df63f29a42c 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.c +++ b/src/mesa/drivers/dri/intel/intel_tex.c @@ -158,81 +158,11 @@ timed_memcpy(void *dest, const void *src, size_t n) } #endif /* DO_DEBUG */ -/** - * Generate new mipmap data from BASE+1 to BASE+p (the minimally-sized mipmap - * level). - * - * The texture object's miptree must be mapped. - * - * It would be really nice if this was just called by Mesa whenever mipmaps - * needed to be regenerated, rather than us having to remember to do so in - * each texture image modification path. - * - * This function should also include an accelerated path. - */ -void -intel_generate_mipmap(GLcontext *ctx, GLenum target, - struct gl_texture_object *texObj) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_texture_object *intelObj = intel_texture_object(texObj); - GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; - int face, i; - - _mesa_generate_mipmap(ctx, target, texObj); - - /* Update the level information in our private data in the new images, since - * it didn't get set as part of a normal TexImage path. - */ - for (face = 0; face < nr_faces; face++) { - for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) { - struct intel_texture_image *intelImage; - - intelImage = intel_texture_image(texObj->Image[face][i]); - if (intelImage == NULL) - break; - - intelImage->level = i; - intelImage->face = face; - /* Unreference the miptree to signal that the new Data is a bare - * pointer from mesa. - */ - intel_miptree_release(intel, &intelImage->mt); - } - } -} - -static void intelGenerateMipmap(GLcontext *ctx, GLenum target, struct gl_texture_object *texObj) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_texture_object *intelObj = intel_texture_object(texObj); - - intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel); - intel_generate_mipmap(ctx, target, texObj); - intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel); -} - void intelInitTextureFuncs(struct dd_function_table *functions) { functions->ChooseTextureFormat = intelChooseTextureFormat; - functions->TexImage1D = intelTexImage1D; - functions->TexImage2D = intelTexImage2D; - functions->TexImage3D = intelTexImage3D; - functions->TexSubImage1D = intelTexSubImage1D; - functions->TexSubImage2D = intelTexSubImage2D; - functions->TexSubImage3D = intelTexSubImage3D; - functions->CopyTexImage1D = intelCopyTexImage1D; - functions->CopyTexImage2D = intelCopyTexImage2D; - functions->CopyTexSubImage1D = intelCopyTexSubImage1D; - functions->CopyTexSubImage2D = intelCopyTexSubImage2D; - functions->GetTexImage = intelGetTexImage; - functions->GenerateMipmap = intelGenerateMipmap; - - /* compressed texture functions */ - functions->CompressedTexImage2D = intelCompressedTexImage2D; - functions->CompressedTexSubImage2D = intelCompressedTexSubImage2D; - functions->GetCompressedTexImage = intelGetCompressedTexImage; + functions->GenerateMipmap = intel_generate_mipmap; functions->NewTextureObject = intelNewTextureObject; functions->NewTextureImage = intelNewTextureImage; diff --git a/src/mesa/drivers/dri/intel/intel_tex.h b/src/mesa/drivers/dri/intel/intel_tex.h index f5372d82fb2..471aa2a240b 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.h +++ b/src/mesa/drivers/dri/intel/intel_tex.h @@ -35,116 +35,17 @@ void intelInitTextureFuncs(struct dd_function_table *functions); +void intelInitTextureImageFuncs(struct dd_function_table *functions); + +void intelInitTextureSubImageFuncs(struct dd_function_table *functions); + +void intelInitTextureCopyImageFuncs(struct dd_function_table *functions); + const struct gl_texture_format *intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, GLenum format, GLenum type); - -void intelTexImage3D(GLcontext * ctx, - GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint depth, - GLint border, - GLenum format, GLenum type, const void *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage); - -void intelTexSubImage3D(GLcontext * ctx, - GLenum target, - GLint level, - GLint xoffset, GLint yoffset, GLint zoffset, - GLsizei width, GLsizei height, GLsizei depth, - GLenum format, GLenum type, - const GLvoid * pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage); - -void intelTexImage2D(GLcontext * ctx, - GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint border, - GLenum format, GLenum type, const void *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage); - -void intelTexSubImage2D(GLcontext * ctx, - GLenum target, - GLint level, - GLint xoffset, GLint yoffset, - GLsizei width, GLsizei height, - GLenum format, GLenum type, - const GLvoid * pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage); - -void intelTexImage1D(GLcontext * ctx, - GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint border, - GLenum format, GLenum type, const void *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage); - -void intelTexSubImage1D(GLcontext * ctx, - GLenum target, - GLint level, - GLint xoffset, - GLsizei width, - GLenum format, GLenum type, - const GLvoid * pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage); - -void intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLint border); - -void intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border); - -void intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level, - GLint xoffset, GLint x, GLint y, GLsizei width); - -void intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level, - GLint xoffset, GLint yoffset, - GLint x, GLint y, GLsizei width, GLsizei height); - -void intelGetTexImage(GLcontext * ctx, GLenum target, GLint level, - GLenum format, GLenum type, GLvoid * pixels, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage); - -void intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint border, - GLsizei imageSize, const GLvoid *data, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ); - -void intelCompressedTexSubImage2D(GLcontext * ctx, - GLenum target, - GLint level, - GLint xoffset, GLint yoffset, - GLsizei width, GLsizei height, - GLenum format, GLsizei imageSize, - const GLvoid * pixels, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage); - -void intelGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level, - GLvoid *pixels, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage); - void intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname, unsigned long long offset, GLint depth, GLuint pitch); void intelSetTexBuffer(__DRIcontext *pDRICtx, diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index 08437aa0e2b..260235b1eb1 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -118,8 +118,12 @@ do_copy_texsubimage(struct intel_context *intel, dstx += x - orig_x; dsty += y - orig_y; - /* image_offset may be non-page-aligned, but that's illegal for tiling. */ - assert(intelImage->mt->region->tiling == I915_TILING_NONE); + /* Can't blit to tiled buffers with non-tile-aligned offset. */ + if (intelImage->mt->region->tiling != I915_TILING_NONE && + (image_offset & 4095) != 0) { + UNLOCK_HARDWARE(intel); + return GL_FALSE; + } if (ctx->ReadBuffer->Name == 0) { /* reading from a window, adjust x, y */ @@ -158,17 +162,14 @@ do_copy_texsubimage(struct intel_context *intel, /* GL_SGIS_generate_mipmap */ if (intelImage->level == texObj->BaseLevel && texObj->GenerateMipmap) { - ctx->Driver.GenerateMipmap(ctx, target, texObj); + intel_generate_mipmap(ctx, target, texObj); } return GL_TRUE; } - - - -void +static void intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level, GLenum internalFormat, GLint x, GLint y, GLsizei width, GLint border) @@ -214,7 +215,8 @@ intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level, width, border); } -void + +static void intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level, GLenum internalFormat, GLint x, GLint y, GLsizei width, GLsizei height, @@ -231,6 +233,14 @@ intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level, if (border) goto fail; + /* Setup or redefine the texture object, mipmap tree and texture + * image. Don't populate yet. + */ + ctx->Driver.TexImage2D(ctx, target, level, internalFormat, + width, height, border, + GL_RGBA, CHAN_TYPE, NULL, + &ctx->DefaultPacking, texObj, texImage); + srcx = x; srcy = y; dstx = 0; @@ -241,15 +251,6 @@ intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level, &width, &height)) return; - /* Setup or redefine the texture object, mipmap tree and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage2D(ctx, target, level, internalFormat, - width, height, border, - GL_RGBA, CHAN_TYPE, NULL, - &ctx->DefaultPacking, texObj, texImage); - - if (!do_copy_texsubimage(intel_context(ctx), target, intel_texture_image(texImage), internalFormat, 0, 0, x, y, width, height)) @@ -263,7 +264,7 @@ intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level, } -void +static void intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width) { @@ -288,8 +289,7 @@ intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level, } - -void +static void intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height) @@ -302,7 +302,6 @@ intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level, _mesa_select_tex_image(ctx, texObj, target, level); GLenum internalFormat = texImage->InternalFormat; - /* Need to check texture is compatible with source format. */ @@ -317,3 +316,13 @@ intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level, xoffset, yoffset, x, y, width, height); } } + + +void +intelInitTextureCopyImageFuncs(struct dd_function_table *functions) +{ + functions->CopyTexImage1D = intelCopyTexImage1D; + functions->CopyTexImage2D = intelCopyTexImage2D; + functions->CopyTexSubImage1D = intelCopyTexSubImage1D; + functions->CopyTexSubImage2D = intelCopyTexSubImage2D; +} diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index 71561cf85cd..e9a3823078a 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -208,7 +208,7 @@ try_pbo_upload(struct intel_context *intel, if (!pbo || intel->ctx._ImageTransferState || unpack->SkipPixels || unpack->SkipRows) { - _mesa_printf("%s: failure 1\n", __FUNCTION__); + DBG("%s: failure 1\n", __FUNCTION__); return GL_FALSE; } @@ -248,7 +248,6 @@ try_pbo_upload(struct intel_context *intel, } - static GLboolean try_pbo_zcopy(struct intel_context *intel, struct intel_texture_image *intelImage, @@ -264,7 +263,7 @@ try_pbo_zcopy(struct intel_context *intel, if (!pbo || intel->ctx._ImageTransferState || unpack->SkipPixels || unpack->SkipRows) { - _mesa_printf("%s: failure 1\n", __FUNCTION__); + DBG("%s: failure 1\n", __FUNCTION__); return GL_FALSE; } @@ -283,7 +282,7 @@ try_pbo_zcopy(struct intel_context *intel, dst_stride = intelImage->mt->pitch; if (src_stride != dst_stride || dst_offset != 0 || src_offset != 0) { - _mesa_printf("%s: failure 2\n", __FUNCTION__); + DBG("%s: failure 2\n", __FUNCTION__); return GL_FALSE; } @@ -293,10 +292,6 @@ try_pbo_zcopy(struct intel_context *intel, } - - - - static void intelTexImage(GLcontext * ctx, GLint dims, @@ -307,7 +302,8 @@ intelTexImage(GLcontext * ctx, GLenum format, GLenum type, const void *pixels, const struct gl_pixelstore_attrib *unpack, struct gl_texture_object *texObj, - struct gl_texture_image *texImage, GLsizei imageSize, int compressed) + struct gl_texture_image *texImage, GLsizei imageSize, + GLboolean compressed) { struct intel_context *intel = intel_context(ctx); struct intel_texture_object *intelObj = intel_texture_object(texObj); @@ -315,8 +311,7 @@ intelTexImage(GLcontext * ctx, GLint postConvWidth = width; GLint postConvHeight = height; GLint texelBytes, sizeInBytes; - GLuint dstRowStride, srcRowStride = texImage->RowStride; - + GLuint dstRowStride = 0, srcRowStride = texImage->RowStride; DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__, _mesa_lookup_enum_by_nr(target), level, width, height, depth, border); @@ -464,8 +459,6 @@ intelTexImage(GLcontext * ctx, DBG("pbo upload failed\n"); } - - /* intelCopyTexImage calls this function with pixels == NULL, with * the expectation that the mipmap tree will be set up but nothing * more will be done. This is where those calls return: @@ -483,7 +476,7 @@ intelTexImage(GLcontext * ctx, LOCK_HARDWARE(intel); if (intelImage->mt) { - if (pixels) + if (pixels != NULL) texImage->Data = intel_miptree_image_map(intel, intelImage->mt, intelImage->face, @@ -509,8 +502,9 @@ intelTexImage(GLcontext * ctx, } DBG("Upload image %dx%dx%d row_len %d " - "pitch %d\n", - width, height, depth, width * texelBytes, dstRowStride); + "pitch %d pixels %d compressed %d\n", + width, height, depth, width * texelBytes, dstRowStride, + pixels ? 1 : 0, compressed); /* Copy data. Would like to know when it's ok for us to eg. use * the blitter to copy. Or, use the hardware to do the format @@ -523,7 +517,7 @@ intelTexImage(GLcontext * ctx, _mesa_copy_rect(texImage->Data, dst->cpp, dst->pitch, 0, 0, intelImage->mt->level[level].width, - intelImage->mt->level[level].height/4, + (intelImage->mt->level[level].height+3)/4, pixels, srcRowStride, 0, 0); @@ -539,25 +533,26 @@ intelTexImage(GLcontext * ctx, format, type, pixels, unpack)) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage"); } - - /* GL_SGIS_generate_mipmap */ - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - intel_generate_mipmap(ctx, target, texObj); - } } _mesa_unmap_teximage_pbo(ctx, unpack); if (intelImage->mt) { - if (pixels) + if (pixels != NULL) intel_miptree_image_unmap(intel, intelImage->mt); texImage->Data = NULL; } UNLOCK_HARDWARE(intel); + + /* GL_SGIS_generate_mipmap */ + if (level == texObj->BaseLevel && texObj->GenerateMipmap) { + intel_generate_mipmap(ctx, target, texObj); + } } -void + +static void intelTexImage3D(GLcontext * ctx, GLenum target, GLint level, GLint internalFormat, @@ -570,11 +565,11 @@ intelTexImage3D(GLcontext * ctx, { intelTexImage(ctx, 3, target, level, internalFormat, width, height, depth, border, - format, type, pixels, unpack, texObj, texImage, 0, 0); + format, type, pixels, unpack, texObj, texImage, 0, GL_FALSE); } -void +static void intelTexImage2D(GLcontext * ctx, GLenum target, GLint level, GLint internalFormat, @@ -586,10 +581,11 @@ intelTexImage2D(GLcontext * ctx, { intelTexImage(ctx, 2, target, level, internalFormat, width, height, 1, border, - format, type, pixels, unpack, texObj, texImage, 0, 0); + format, type, pixels, unpack, texObj, texImage, 0, GL_FALSE); } -void + +static void intelTexImage1D(GLcontext * ctx, GLenum target, GLint level, GLint internalFormat, @@ -601,21 +597,24 @@ intelTexImage1D(GLcontext * ctx, { intelTexImage(ctx, 1, target, level, internalFormat, width, 1, 1, border, - format, type, pixels, unpack, texObj, texImage, 0, 0); + format, type, pixels, unpack, texObj, texImage, 0, GL_FALSE); } -void intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint border, - GLsizei imageSize, const GLvoid *data, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) + +static void +intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint border, + GLsizei imageSize, const GLvoid *data, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage ) { intelTexImage(ctx, 2, target, level, internalFormat, width, height, 1, border, - 0, 0, data, &ctx->Unpack, texObj, texImage, imageSize, 1); + 0, 0, data, &ctx->Unpack, texObj, texImage, imageSize, GL_TRUE); } + /** * Need to map texture image into memory before copying image data, * then unmap it. @@ -624,11 +623,17 @@ static void intel_get_tex_image(GLcontext * ctx, GLenum target, GLint level, GLenum format, GLenum type, GLvoid * pixels, struct gl_texture_object *texObj, - struct gl_texture_image *texImage, int compressed) + struct gl_texture_image *texImage, GLboolean compressed) { struct intel_context *intel = intel_context(ctx); struct intel_texture_image *intelImage = intel_texture_image(texImage); + /* If we're reading from a texture that has been rendered to, need to + * make sure rendering is complete. + * We could probably predicate this on texObj->_RenderToTexture + */ + intelFlush(ctx); + /* Map */ if (intelImage->mt) { /* Image is stored in hardware format in a buffer managed by the @@ -672,28 +677,29 @@ intel_get_tex_image(GLcontext * ctx, GLenum target, GLint level, } } -void + +static void intelGetTexImage(GLcontext * ctx, GLenum target, GLint level, GLenum format, GLenum type, GLvoid * pixels, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { intel_get_tex_image(ctx, target, level, format, type, pixels, - texObj, texImage, 0); - - + texObj, texImage, GL_FALSE); } -void + +static void intelGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level, GLvoid *pixels, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { intel_get_tex_image(ctx, target, level, 0, 0, pixels, - texObj, texImage, 1); + texObj, texImage, GL_TRUE); } + void intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname, unsigned long long offset, GLint depth, GLuint pitch) @@ -802,3 +808,16 @@ intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) */ intelSetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv); } + + +void +intelInitTextureImageFuncs(struct dd_function_table *functions) +{ + functions->TexImage1D = intelTexImage1D; + functions->TexImage2D = intelTexImage2D; + functions->TexImage3D = intelTexImage3D; + functions->GetTexImage = intelGetTexImage; + + functions->CompressedTexImage2D = intelCompressedTexImage2D; + functions->GetCompressedTexImage = intelGetCompressedTexImage; +} diff --git a/src/mesa/drivers/dri/intel/intel_tex_layout.c b/src/mesa/drivers/dri/intel/intel_tex_layout.c index e6f9a417790..2c1b722b7f5 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_layout.c +++ b/src/mesa/drivers/dri/intel/intel_tex_layout.c @@ -52,7 +52,9 @@ GLuint intel_compressed_alignment(GLenum internalFormat) return alignment; } -void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tree *mt ) +void i945_miptree_layout_2d( struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t tiling ) { GLint align_h = 2, align_w = 4; GLuint level; @@ -92,7 +94,7 @@ void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tr /* Pitch must be a whole number of dwords, even though we * express it in texels. */ - mt->pitch = intel_miptree_pitch_align (intel, mt, mt->pitch); + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->pitch); mt->total_height = 0; for ( level = mt->first_level ; level <= mt->last_level ; level++ ) { diff --git a/src/mesa/drivers/dri/intel/intel_tex_layout.h b/src/mesa/drivers/dri/intel/intel_tex_layout.h index dbc90e6f9b7..7bc25b6bcb1 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_layout.h +++ b/src/mesa/drivers/dri/intel/intel_tex_layout.h @@ -38,5 +38,7 @@ static GLuint minify( GLuint d ) return MAX2(1, d>>1); } -extern void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tree *mt ); +extern void i945_miptree_layout_2d(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t tiling); extern GLuint intel_compressed_alignment(GLenum); diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c index f86de568976..1f27131dac0 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c +++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c @@ -101,11 +101,6 @@ intelTexSubimage(GLcontext * ctx, _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage"); } - /* GL_SGIS_generate_mipmap */ - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - intel_generate_mipmap(ctx, target, texObj); - } - _mesa_unmap_teximage_pbo(ctx, packing); if (intelImage->mt) { @@ -114,13 +109,15 @@ intelTexSubimage(GLcontext * ctx, } UNLOCK_HARDWARE(intel); -} - - + /* GL_SGIS_generate_mipmap */ + if (level == texObj->BaseLevel && texObj->GenerateMipmap) { + intel_generate_mipmap(ctx, target, texObj); + } +} -void +static void intelTexSubImage3D(GLcontext * ctx, GLenum target, GLint level, @@ -132,18 +129,15 @@ intelTexSubImage3D(GLcontext * ctx, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { - intelTexSubimage(ctx, 3, target, level, xoffset, yoffset, zoffset, width, height, depth, format, type, pixels, packing, texObj, texImage); - } - -void +static void intelTexSubImage2D(GLcontext * ctx, GLenum target, GLint level, @@ -155,17 +149,15 @@ intelTexSubImage2D(GLcontext * ctx, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { - intelTexSubimage(ctx, 2, target, level, xoffset, yoffset, 0, width, height, 1, format, type, pixels, packing, texObj, texImage); - } -void +static void intelTexSubImage1D(GLcontext * ctx, GLenum target, GLint level, @@ -182,10 +174,9 @@ intelTexSubImage1D(GLcontext * ctx, xoffset, 0, 0, width, 1, 1, format, type, pixels, packing, texObj, texImage); - } -void +static void intelCompressedTexSubImage2D(GLcontext * ctx, GLenum target, GLint level, @@ -199,3 +190,14 @@ intelCompressedTexSubImage2D(GLcontext * ctx, fprintf(stderr, "stubbed CompressedTexSubImage2D: %dx%d@%dx%d\n", width, height, xoffset, yoffset); } + + + +void +intelInitTextureSubImageFuncs(struct dd_function_table *functions) +{ + functions->TexSubImage1D = intelTexSubImage1D; + functions->TexSubImage2D = intelTexSubImage2D; + functions->TexSubImage3D = intelTexSubImage3D; + functions->CompressedTexSubImage2D = intelCompressedTexSubImage2D; +} diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c index 05a375e1f3b..b5cb7597d16 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_validate.c +++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c @@ -241,7 +241,7 @@ intel_tex_map_level_images(struct intel_context *intel, struct intel_texture_image *intelImage = intel_texture_image(intelObj->base.Image[face][level]); - if (intelImage->mt) { + if (intelImage && intelImage->mt) { intelImage->base.Data = intel_miptree_image_map(intel, intelImage->mt, @@ -268,7 +268,7 @@ intel_tex_unmap_level_images(struct intel_context *intel, struct intel_texture_image *intelImage = intel_texture_image(intelObj->base.Image[face][level]); - if (intelImage->mt) { + if (intelImage && intelImage->mt) { intel_miptree_image_unmap(intel, intelImage->mt); intelImage->base.Data = NULL; } diff --git a/src/mesa/drivers/dri/r200/r200_cmdbuf.c b/src/mesa/drivers/dri/r200/r200_cmdbuf.c index 3a11a448eca..df9dd83344c 100644 --- a/src/mesa/drivers/dri/r200/r200_cmdbuf.c +++ b/src/mesa/drivers/dri/r200/r200_cmdbuf.c @@ -105,6 +105,35 @@ void r200SetUpAtomList( r200ContextPtr rmesa ) insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpi[1] ); } +void r200EmitScissor(r200ContextPtr rmesa) +{ + BATCH_LOCALS(&rmesa->radeon); + if (!rmesa->radeon.radeonScreen->kernel_mm) { + return; + } + if (rmesa->radeon.state.scissor.enabled) { + BEGIN_BATCH(8); + OUT_BATCH(CP_PACKET0(R200_RE_CNTL, 0)); + OUT_BATCH(R200_SCISSOR_ENABLE | rmesa->hw.set.cmd[SET_RE_CNTL]); + OUT_BATCH(CP_PACKET0(R200_RE_AUX_SCISSOR_CNTL, 0)); + OUT_BATCH(R200_SCISSOR_ENABLE_0); + OUT_BATCH(CP_PACKET0(R200_RE_SCISSOR_TL_0, 0)); + OUT_BATCH((rmesa->radeon.state.scissor.rect.y1 << 16) | + rmesa->radeon.state.scissor.rect.x1); + OUT_BATCH(CP_PACKET0(R200_RE_SCISSOR_BR_0, 0)); + OUT_BATCH(((rmesa->radeon.state.scissor.rect.y2 - 1) << 16) | + (rmesa->radeon.state.scissor.rect.x2 - 1)); + END_BATCH(); + } else { + BEGIN_BATCH(4); + OUT_BATCH(CP_PACKET0(R200_RE_CNTL, 0)); + OUT_BATCH(rmesa->hw.set.cmd[SET_RE_CNTL] & ~R200_SCISSOR_ENABLE); + OUT_BATCH(CP_PACKET0(R200_RE_AUX_SCISSOR_CNTL, 0)); + OUT_BATCH(0); + END_BATCH(); + } +} + /* Fire a section of the retained (indexed_verts) buffer as a regular * primtive. */ @@ -121,6 +150,7 @@ void r200EmitVbufPrim( r200ContextPtr rmesa, if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS)) fprintf(stderr, "%s cmd_used/4: %d prim %x nr %d\n", __FUNCTION__, rmesa->store.cmd_used/4, primitive, vertex_nr); + r200EmitScissor(rmesa); BEGIN_BATCH(3); OUT_BATCH_PACKET3_CLIP(R200_CP_CMD_3D_DRAW_VBUF_2, 0); @@ -134,9 +164,11 @@ static void r200FireEB(r200ContextPtr rmesa, int vertex_count, int type) BATCH_LOCALS(&rmesa->radeon); if (vertex_count > 0) { + r200EmitScissor(rmesa); BEGIN_BATCH(8+2); - OUT_BATCH_PACKET3(R200_CP_CMD_3D_DRAW_INDX_2, 0); + OUT_BATCH_PACKET3_CLIP(R200_CP_CMD_3D_DRAW_INDX_2, 0); OUT_BATCH(R200_VF_PRIM_WALK_IND | + R200_VF_COLOR_ORDER_RGBA | ((vertex_count + 0) << 16) | type); @@ -147,12 +179,12 @@ static void r200FireEB(r200ContextPtr rmesa, int vertex_count, int type) rmesa->radeon.tcl.elt_dma_bo, rmesa->radeon.tcl.elt_dma_offset, RADEON_GEM_DOMAIN_GTT, 0, 0); - OUT_BATCH(vertex_count/2); + OUT_BATCH((vertex_count + 1)/2); } else { OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2); OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810); OUT_BATCH(rmesa->radeon.tcl.elt_dma_offset); - OUT_BATCH(vertex_count/2); + OUT_BATCH((vertex_count + 1)/2); radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, rmesa->radeon.tcl.elt_dma_bo, RADEON_GEM_DOMAIN_GTT, 0, 0); @@ -172,8 +204,6 @@ void r200FlushElts(GLcontext *ctx) assert( rmesa->radeon.dma.flush == r200FlushElts ); rmesa->radeon.dma.flush = NULL; - elt_used = (elt_used + 2) & ~2; - nr = elt_used / 2; radeon_bo_unmap(rmesa->radeon.tcl.elt_dma_bo); @@ -209,6 +239,11 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa, rmesa->radeon.tcl.elt_dma_offset = 0; rmesa->tcl.elt_used = min_nr * 2; + radeon_validate_bo(&rmesa->radeon, rmesa->radeon.tcl.elt_dma_bo, + RADEON_GEM_DOMAIN_GTT, 0); + if (radeon_revalidate_bos(rmesa->radeon.glCtx) == GL_FALSE) + fprintf(stderr,"failure to revalidate BOs - badness\n"); + radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1); retval = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset; @@ -238,7 +273,7 @@ void r200EmitVertexAOS( r200ContextPtr rmesa, __FUNCTION__, vertex_size, offset); - BEGIN_BATCH(5); + BEGIN_BATCH(7); OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, 2); OUT_BATCH(1); OUT_BATCH(vertex_size | (vertex_size << 8)); diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c index f80f0d8ac73..8924849d082 100644 --- a/src/mesa/drivers/dri/r200/r200_context.c +++ b/src/mesa/drivers/dri/r200/r200_context.c @@ -354,6 +354,10 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, i = driQueryOptioni( &rmesa->radeon.optionCache, "allow_large_textures"); + /* FIXME: When no memory manager is available we should set this + * to some reasonable value based on texture memory pool size */ + ctx->Const.MaxTextureLevels = 12; + ctx->Const.MaxTextureMaxAnisotropy = 16.0; /* No wide AA points. diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c index 0262aea880e..0b3398a730e 100644 --- a/src/mesa/drivers/dri/r200/r200_ioctl.c +++ b/src/mesa/drivers/dri/r200/r200_ioctl.c @@ -31,7 +31,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * Authors: * Keith Whitwell <[email protected]> */ - + #include <sched.h> #include <errno.h> @@ -66,7 +66,7 @@ static void r200UserClear(GLcontext *ctx, GLuint mask) static void r200KernelClear(GLcontext *ctx, GLuint flags) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable; + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); GLint cx, cy, cw, ch, ret; GLuint i; @@ -94,7 +94,7 @@ static void r200KernelClear(GLcontext *ctx, GLuint flags) if ( rmesa->radeon.sarea->last_clear - clear <= 25 ) { break; } - + if (rmesa->radeon.do_usleeps) { UNLOCK_HARDWARE( &rmesa->radeon ); DO_USLEEP( 1 ); @@ -190,7 +190,7 @@ static void r200KernelClear(GLcontext *ctx, GLuint flags) static void r200Clear( GLcontext *ctx, GLbitfield mask ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable; + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); GLuint flags = 0; GLuint color_mask = 0; GLuint orig_mask = mask; @@ -202,7 +202,7 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask ) { LOCK_HARDWARE( &rmesa->radeon ); UNLOCK_HARDWARE( &rmesa->radeon ); - if ( dPriv->numClipRects == 0 ) + if ( dPriv->numClipRects == 0 ) return; } @@ -236,7 +236,7 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask ) _swrast_Clear( ctx, mask ); } - if ( !flags ) + if ( !flags ) return; if (rmesa->using_hyperz) { @@ -267,7 +267,7 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask ) * device fd. */ void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size, - GLfloat readfreq, GLfloat writefreq, + GLfloat readfreq, GLfloat writefreq, GLfloat priority) { GET_CURRENT_CONTEXT(ctx); @@ -277,7 +277,7 @@ void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size, int ret; if (R200_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s sz %d %f/%f/%f\n", __FUNCTION__, size, readfreq, + fprintf(stderr, "%s sz %d %f/%f/%f\n", __FUNCTION__, size, readfreq, writefreq, priority); if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->radeon.radeonScreen->gartTextures.map) @@ -294,12 +294,12 @@ void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size, ret = drmCommandWriteRead( rmesa->radeon.radeonScreen->driScreen->fd, DRM_RADEON_ALLOC, &alloc, sizeof(alloc)); - + if (ret) { fprintf(stderr, "%s: DRM_RADEON_ALLOC ret %d\n", __FUNCTION__, ret); return NULL; } - + { char *region_start = (char *)rmesa->radeon.radeonScreen->gartTextures.map; return (void *)(region_start + region_offset); @@ -326,7 +326,7 @@ void r200FreeMemoryMESA(__DRIscreen *screen, GLvoid *pointer) region_offset = (char *)pointer - (char *)rmesa->radeon.radeonScreen->gartTextures.map; - if (region_offset < 0 || + if (region_offset < 0 || region_offset > rmesa->radeon.radeonScreen->gartTextures.size) { fprintf(stderr, "offset %d outside range 0..%d\n", region_offset, rmesa->radeon.radeonScreen->gartTextures.size); @@ -335,12 +335,12 @@ void r200FreeMemoryMESA(__DRIscreen *screen, GLvoid *pointer) memfree.region = RADEON_MEM_REGION_GART; memfree.region_offset = region_offset; - + ret = drmCommandWrite( rmesa->radeon.radeonScreen->driScreen->fd, DRM_RADEON_FREE, &memfree, sizeof(memfree)); - - if (ret) + + if (ret) fprintf(stderr, "%s: DRM_RADEON_FREE ret %d\n", __FUNCTION__, ret); } @@ -374,7 +374,7 @@ GLboolean r200IsGartMemory( r200ContextPtr rmesa, const GLvoid *pointer, if (R200_DEBUG & DEBUG_IOCTL) fprintf(stderr, "r200IsGartMemory( %p ) : %d\n", pointer, valid ); - + return valid; } diff --git a/src/mesa/drivers/dri/r200/r200_pixel.c b/src/mesa/drivers/dri/r200/r200_pixel.c index 354daef07f6..654f2c6ae98 100644 --- a/src/mesa/drivers/dri/r200/r200_pixel.c +++ b/src/mesa/drivers/dri/r200/r200_pixel.c @@ -65,8 +65,8 @@ check_color( const GLcontext *ctx, GLenum type, GLenum format, return GL_FALSE; } - if ( type == GL_UNSIGNED_INT_8_8_8_8_REV && - cpp == 4 && + if ( type == GL_UNSIGNED_INT_8_8_8_8_REV && + cpp == 4 && format == GL_BGRA ) { if (R200_DEBUG & DEBUG_PIXEL) fprintf(stderr, "%s: passed 2\n", __FUNCTION__); @@ -83,7 +83,7 @@ static GLboolean check_color_per_fragment_ops( const GLcontext *ctx ) { int result; - result = (!( ctx->Color.AlphaEnabled || + result = (!( ctx->Color.AlphaEnabled || ctx->Depth.Test || ctx->Fog.Enabled || ctx->Scissor.Enabled || @@ -96,7 +96,7 @@ check_color_per_fragment_ops( const GLcontext *ctx ) ctx->Texture._EnabledUnits ) && ctx->Current.RasterPosValid); - + return result; } @@ -163,7 +163,7 @@ r200TryReadPixels( GLcontext *ctx, /* Only accelerate reading to GART buffers. */ - if ( !r200IsGartMemory(rmesa, pixels, + if ( !r200IsGartMemory(rmesa, pixels, pitch * height * rmesa->radeon.radeonScreen->cpp ) ) { if (R200_DEBUG & DEBUG_PIXEL) fprintf(stderr, "%s: dest not GART\n", __FUNCTION__); @@ -224,7 +224,7 @@ r200TryReadPixels( GLcontext *ctx, drm_clip_rect_t *box = dPriv->pClipRects; int i; - r200EmitWait( rmesa, RADEON_WAIT_3D ); + r200EmitWait( rmesa, RADEON_WAIT_3D ); y = dPriv->h - y - height; x += dPriv->x; @@ -241,7 +241,7 @@ r200TryReadPixels( GLcontext *ctx, GLint by = box[i].y1; GLint bw = box[i].x2 - bx; GLint bh = box[i].y2 - by; - + if (bx < x) bw -= x - bx, bx = x; if (by < y) bh -= y - by, by = y; if (bx + bw > x + width) bw = x + width - bx; @@ -277,9 +277,9 @@ r200ReadPixels( GLcontext *ctx, if (R200_DEBUG & DEBUG_PIXEL) fprintf(stderr, "%s\n", __FUNCTION__); - if (!r200TryReadPixels( ctx, x, y, width, height, format, type, pack, + if (!r200TryReadPixels( ctx, x, y, width, height, format, type, pack, pixels)) - _swrast_ReadPixels( ctx, x, y, width, height, format, type, pack, + _swrast_ReadPixels( ctx, x, y, width, height, format, type, pack, pixels); } @@ -293,7 +293,7 @@ static void do_draw_pix( GLcontext *ctx, GLuint planemask) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable; + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); drm_clip_rect_t *box = dPriv->pClipRects; struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorDrawBuffers[0]; driRenderbuffer *drb = (driRenderbuffer *) rb; @@ -325,7 +325,7 @@ static void do_draw_pix( GLcontext *ctx, rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ ); y -= height; /* cope with pixel zoom */ - + if (!clip_pixelrect(ctx, ctx->DrawBuffer, &x, &y, &width, &height, &size)) { @@ -409,7 +409,7 @@ r200TryDrawPixels( GLcontext *ctx, if (planemask != ~0) return GL_FALSE; /* fix me -- should be possible */ - /* Can't do conversions on GART reads/draws. + /* Can't do conversions on GART reads/draws. */ if ( !r200IsGartMemory( rmesa, pixels, size ) ) { if (R200_DEBUG & DEBUG_PIXEL) @@ -484,9 +484,9 @@ r200Bitmap( GLcontext *ctx, GLint px, GLint py, void r200InitPixelFuncs( GLcontext *ctx ) { if (!getenv("R200_NO_BLITS")) { - ctx->Driver.ReadPixels = r200ReadPixels; - ctx->Driver.DrawPixels = r200DrawPixels; - if (getenv("R200_HW_BITMAP")) + ctx->Driver.ReadPixels = r200ReadPixels; + ctx->Driver.DrawPixels = r200DrawPixels; + if (getenv("R200_HW_BITMAP")) ctx->Driver.Bitmap = r200Bitmap; } } diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c index ebf389efe26..f8ebe0df57e 100644 --- a/src/mesa/drivers/dri/r200/r200_state.c +++ b/src/mesa/drivers/dri/r200/r200_state.c @@ -79,7 +79,7 @@ static void r200AlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref ) switch ( func ) { case GL_NEVER: - pp_misc |= R200_ALPHA_TEST_FAIL; + pp_misc |= R200_ALPHA_TEST_FAIL; break; case GL_LESS: pp_misc |= R200_ALPHA_TEST_LESS; @@ -479,7 +479,7 @@ static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param ) } } break; - case GL_FOG_COLOR: + case GL_FOG_COLOR: R200_STATECHANGE( rmesa, ctx ); UNCLAMPED_FLOAT_TO_RGB_CHAN( col, ctx->Fog.Color ); i = radeonPackColor( 4, col[0], col[1], col[2], 0 ); @@ -507,7 +507,7 @@ static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param ) if (out_0 != rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0]) { R200_STATECHANGE( rmesa, vtx ); - rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] = out_0; + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] = out_0; } break; @@ -696,7 +696,7 @@ static void r200LineStipple( GLcontext *ctx, GLint factor, GLushort pattern ) r200ContextPtr rmesa = R200_CONTEXT(ctx); R200_STATECHANGE( rmesa, lin ); - rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = + rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = ((((GLuint)factor & 0xff) << 16) | ((GLuint)pattern)); } @@ -720,10 +720,10 @@ static void r200ColorMask( GLcontext *ctx, if (!(r && g && b && a)) flag |= R200_PLANE_MASK_ENABLE; - if ( rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] != flag ) { - R200_STATECHANGE( rmesa, ctx ); - rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = flag; - } + if ( rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] != flag ) { + R200_STATECHANGE( rmesa, ctx ); + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = flag; + } if ( rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] != mask ) { R200_STATECHANGE( rmesa, msk ); @@ -774,7 +774,7 @@ static void r200PolygonStipple( GLcontext *ctx, const GLubyte *mask ) /* FIXME: Use window x,y offsets into stipple RAM. */ stipple.mask = rmesa->state.stipple.mask; - drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_STIPPLE, + drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_STIPPLE, &stipple, sizeof(stipple) ); UNLOCK_HARDWARE( &rmesa->radeon ); } @@ -785,7 +785,7 @@ static void r200PolygonMode( GLcontext *ctx, GLenum face, GLenum mode ) GLboolean flag = (ctx->_TriangleCaps & DD_TRI_UNFILLED) != 0; /* Can't generally do unfilled via tcl, but some good special - * cases work. + * cases work. */ TCL_FALLBACK( ctx, R200_TCL_FALLBACK_UNFILLED, flag); if (rmesa->radeon.TclFallback) { @@ -827,34 +827,34 @@ static void r200UpdateSpecular( GLcontext *ctx ) if (ctx->Light.Enabled && ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR) { - rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT) | - (R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT)); + (R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT)); rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_0; rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_1; rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHTING_ENABLE; p |= R200_SPECULAR_ENABLE; - rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_DIFFUSE_SPECULAR_COMBINE; } else if (ctx->Light.Enabled) { - rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= - ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT)); + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= + ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT)); rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_0; rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHTING_ENABLE; } else if (ctx->Fog.ColorSumEnabled ) { - rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT) | - (R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT)); + (R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT)); p |= R200_SPECULAR_ENABLE; } else { - rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= - ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT)); + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= + ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT)); } if (ctx->Fog.Enabled) { - rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= - ((R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT)); + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= + ((R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT)); rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_1; } @@ -865,7 +865,7 @@ static void r200UpdateSpecular( GLcontext *ctx ) /* Update vertex/render formats */ - if (rmesa->radeon.TclFallback) { + if (rmesa->radeon.TclFallback) { r200ChooseRenderState( ctx ); r200ChooseVertexState( ctx ); } @@ -877,7 +877,7 @@ static void r200UpdateSpecular( GLcontext *ctx ) */ -/* Update on colormaterial, material emmissive/ambient, +/* Update on colormaterial, material emmissive/ambient, * lightmodel.globalambient */ static void update_global_ambient( GLcontext *ctx ) @@ -891,23 +891,23 @@ static void update_global_ambient( GLcontext *ctx ) */ if ((rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1] & ((3 << R200_FRONT_EMISSIVE_SOURCE_SHIFT) | - (3 << R200_FRONT_AMBIENT_SOURCE_SHIFT))) == 0) + (3 << R200_FRONT_AMBIENT_SOURCE_SHIFT))) == 0) { - COPY_3V( &fcmd[GLT_RED], + COPY_3V( &fcmd[GLT_RED], ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_EMISSION]); ACC_SCALE_3V( &fcmd[GLT_RED], ctx->Light.Model.Ambient, ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_AMBIENT]); - } + } else { COPY_3V( &fcmd[GLT_RED], ctx->Light.Model.Ambient ); } - + R200_DB_STATECHANGE(rmesa, &rmesa->hw.glt); } -/* Update on change to +/* Update on change to * - light[p].colors * - light[p].enabled */ @@ -921,10 +921,10 @@ static void update_light_colors( GLcontext *ctx, GLuint p ) r200ContextPtr rmesa = R200_CONTEXT(ctx); float *fcmd = (float *)R200_DB_STATE( lit[p] ); - COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient ); + COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient ); COPY_4V( &fcmd[LIT_DIFFUSE_RED], l->Diffuse ); COPY_4V( &fcmd[LIT_SPECULAR_RED], l->Specular ); - + R200_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] ); } } @@ -944,7 +944,7 @@ static void r200ColorMaterial( GLcontext *ctx, GLenum face, GLenum mode ) if (ctx->Light.ColorMaterialEnabled) { GLuint mask = ctx->Light.ColorMaterialBitmask; - + if (mask & MAT_BIT_FRONT_EMISSION) { light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 << R200_FRONT_EMISSIVE_SOURCE_SHIFT); @@ -960,7 +960,7 @@ static void r200ColorMaterial( GLcontext *ctx, GLenum face, GLenum mode ) else light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_AMBIENT_SOURCE_SHIFT); - + if (mask & MAT_BIT_FRONT_DIFFUSE) { light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 << R200_FRONT_DIFFUSE_SOURCE_SHIFT); @@ -968,7 +968,7 @@ static void r200ColorMaterial( GLcontext *ctx, GLenum face, GLenum mode ) else light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_DIFFUSE_SOURCE_SHIFT); - + if (mask & MAT_BIT_FRONT_SPECULAR) { light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 << R200_FRONT_SPECULAR_SOURCE_SHIFT); @@ -977,7 +977,7 @@ static void r200ColorMaterial( GLcontext *ctx, GLenum face, GLenum mode ) light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_SPECULAR_SOURCE_SHIFT); } - + if (mask & MAT_BIT_BACK_EMISSION) { light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 << R200_BACK_EMISSIVE_SOURCE_SHIFT); @@ -1027,8 +1027,8 @@ static void r200ColorMaterial( GLcontext *ctx, GLenum face, GLenum mode ) R200_STATECHANGE( rmesa, tcl ); rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1] = light_model_ctl1; } - - + + } void r200UpdateMaterial( GLcontext *ctx ) @@ -1038,7 +1038,7 @@ void r200UpdateMaterial( GLcontext *ctx ) GLfloat *fcmd = (GLfloat *)R200_DB_STATE( mtl[0] ); GLfloat *fcmd2 = (GLfloat *)R200_DB_STATE( mtl[1] ); GLuint mask = ~0; - + /* Might be possible and faster to update everything unconditionally? */ if (ctx->Light.ColorMaterialEnabled) mask &= ~ctx->Light.ColorMaterialBitmask; @@ -1124,7 +1124,7 @@ void r200UpdateMaterial( GLcontext *ctx ) * * which are calculated in light.c and are correct for the current * lighting space (model or eye), hence dependencies on _NEW_MODELVIEW - * and _MESA_NEW_NEED_EYE_COORDS. + * and _MESA_NEW_NEED_EYE_COORDS. */ static void update_light( GLcontext *ctx ) { @@ -1141,8 +1141,8 @@ static void update_light( GLcontext *ctx ) tmp &= ~R200_LIGHT_IN_MODELSPACE; else tmp |= R200_LIGHT_IN_MODELSPACE; - - if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]) + + if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]) { R200_STATECHANGE( rmesa, tcl ); rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] = tmp; @@ -1166,10 +1166,10 @@ static void update_light( GLcontext *ctx ) if (ctx->Light.Light[p].Enabled) { struct gl_light *l = &ctx->Light.Light[p]; GLfloat *fcmd = (GLfloat *)R200_DB_STATE( lit[p] ); - + if (l->EyePosition[3] == 0.0) { - COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm ); - COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm ); + COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm ); + COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm ); fcmd[LIT_POSITION_W] = 0; fcmd[LIT_DIRECTION_W] = 0; } else { @@ -1193,21 +1193,21 @@ static void r200Lightfv( GLcontext *ctx, GLenum light, GLint p = light - GL_LIGHT0; struct gl_light *l = &ctx->Light.Light[p]; GLfloat *fcmd = (GLfloat *)rmesa->hw.lit[p].cmd; - + switch (pname) { - case GL_AMBIENT: + case GL_AMBIENT: case GL_DIFFUSE: case GL_SPECULAR: update_light_colors( ctx, p ); break; - case GL_SPOT_DIRECTION: - /* picked up in update_light */ + case GL_SPOT_DIRECTION: + /* picked up in update_light */ break; case GL_POSITION: { - /* positions picked up in update_light, but can do flag here */ + /* positions picked up in update_light, but can do flag here */ GLuint flag = (p&1)? R200_LIGHT_1_IS_LOCAL : R200_LIGHT_0_IS_LOCAL; GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2; @@ -1323,7 +1323,7 @@ static void r200LightModelfv( GLcontext *ctx, GLenum pname, r200ContextPtr rmesa = R200_CONTEXT(ctx); switch (pname) { - case GL_LIGHT_MODEL_AMBIENT: + case GL_LIGHT_MODEL_AMBIENT: update_global_ambient( ctx ); break; @@ -1582,7 +1582,7 @@ static void r200ClearStencil( GLcontext *ctx, GLint s ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - rmesa->radeon.state.stencil.clear = + rmesa->radeon.state.stencil.clear = ((GLuint) (ctx->Stencil.Clear & 0xff) | (0xff << R200_STENCIL_MASK_SHIFT) | ((ctx->Stencil.WriteMask[0] & 0xff) << R200_STENCIL_WRITEMASK_SHIFT)); @@ -1607,7 +1607,7 @@ static void r200ClearStencil( GLcontext *ctx, GLint s ) void r200UpdateWindow( GLcontext *ctx ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable; + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0; GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0; const GLfloat *v = ctx->Viewport._WindowMap.m; @@ -1663,7 +1663,7 @@ static void r200DepthRange( GLcontext *ctx, GLclampd nearval, void r200UpdateViewportOffset( GLcontext *ctx ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable; + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); GLfloat xoffset = (GLfloat)dPriv->x; GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h; const GLfloat *v = ctx->Viewport._WindowMap.m; @@ -1693,8 +1693,8 @@ void r200UpdateViewportOffset( GLcontext *ctx ) R200_STIPPLE_Y_OFFSET_MASK); /* add magic offsets, then invert */ - stx = 31 - ((rmesa->radeon.dri.drawable->x - 1) & R200_STIPPLE_COORD_MASK); - sty = 31 - ((rmesa->radeon.dri.drawable->y + rmesa->radeon.dri.drawable->h - 1) + stx = 31 - ((dPriv->x - 1) & R200_STIPPLE_COORD_MASK); + sty = 31 - ((dPriv->y + dPriv->h - 1) & R200_STIPPLE_COORD_MASK); m |= ((stx << R200_STIPPLE_X_OFFSET_SHIFT) | @@ -1808,7 +1808,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state ) case GL_CLIP_PLANE2: case GL_CLIP_PLANE3: case GL_CLIP_PLANE4: - case GL_CLIP_PLANE5: + case GL_CLIP_PLANE5: p = cap-GL_CLIP_PLANE0; R200_STATECHANGE( rmesa, tcl ); if (state) { @@ -1860,7 +1860,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state ) rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~R200_TCL_FOG_MASK; } r200UpdateSpecular( ctx ); /* for PK_SPEC */ - if (rmesa->radeon.TclFallback) + if (rmesa->radeon.TclFallback) r200ChooseVertexState( ctx ); _mesa_allow_light_in_model( ctx, !state ); break; @@ -1875,13 +1875,13 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state ) case GL_LIGHT7: R200_STATECHANGE(rmesa, tcl); p = cap - GL_LIGHT0; - if (p&1) + if (p&1) flag = (R200_LIGHT_1_ENABLE | - R200_LIGHT_1_ENABLE_AMBIENT | + R200_LIGHT_1_ENABLE_AMBIENT | R200_LIGHT_1_ENABLE_SPECULAR); else flag = (R200_LIGHT_0_ENABLE | - R200_LIGHT_0_ENABLE_AMBIENT | + R200_LIGHT_0_ENABLE_AMBIENT | R200_LIGHT_0_ENABLE_SPECULAR); if (state) @@ -1889,7 +1889,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state ) else rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] &= ~flag; - /* + /* */ update_light_colors( ctx, p ); break; @@ -2043,7 +2043,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state ) case GL_TEXTURE_GEN_T: /* Picked up in r200UpdateTextureState. */ - rmesa->recheck_texgen[ctx->Texture.CurrentUnit] = GL_TRUE; + rmesa->recheck_texgen[ctx->Texture.CurrentUnit] = GL_TRUE; break; case GL_COLOR_SUM_EXT: @@ -2160,7 +2160,7 @@ void r200LightingSpaceChange( GLcontext *ctx ) r200ContextPtr rmesa = R200_CONTEXT(ctx); GLboolean tmp; - if (R200_DEBUG & DEBUG_STATE) + if (R200_DEBUG & DEBUG_STATE) fprintf(stderr, "%s %d BEFORE %x\n", __FUNCTION__, ctx->_NeedEyeCoords, rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]); @@ -2176,7 +2176,7 @@ void r200LightingSpaceChange( GLcontext *ctx ) rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_RESCALE_NORMALS; } - if (R200_DEBUG & DEBUG_STATE) + if (R200_DEBUG & DEBUG_STATE) fprintf(stderr, "%s %d AFTER %x\n", __FUNCTION__, ctx->_NeedEyeCoords, rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]); } @@ -2219,7 +2219,7 @@ static void update_texturematrix( GLcontext *ctx ) GLuint compsel = rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL]; int unit; - if (R200_DEBUG & DEBUG_STATE) + if (R200_DEBUG & DEBUG_STATE) fprintf(stderr, "%s before COMPSEL: %x\n", __FUNCTION__, rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL]); @@ -2227,7 +2227,7 @@ static void update_texturematrix( GLcontext *ctx ) rmesa->TexMatCompSel = 0; for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) { - if (!ctx->Texture.Unit[unit]._ReallyEnabled) + if (!ctx->Texture.Unit[unit]._ReallyEnabled) continue; if (ctx->TextureMatrixStack[unit].Top->type != MATRIX_IDENTITY) { @@ -2237,21 +2237,21 @@ static void update_texturematrix( GLcontext *ctx ) rmesa->TexMatCompSel |= R200_OUTPUT_TEX_0 << unit; if (rmesa->TexGenEnabled & (R200_TEXMAT_0_ENABLE << unit)) { - /* Need to preconcatenate any active texgen + /* Need to preconcatenate any active texgen * obj/eyeplane matrices: */ _math_matrix_mul_matrix( &rmesa->tmpmat, - ctx->TextureMatrixStack[unit].Top, + ctx->TextureMatrixStack[unit].Top, &rmesa->TexGenMatrix[unit] ); upload_matrix( rmesa, rmesa->tmpmat.m, R200_MTX_TEX0+unit ); - } + } else { - upload_matrix( rmesa, ctx->TextureMatrixStack[unit].Top->m, + upload_matrix( rmesa, ctx->TextureMatrixStack[unit].Top->m, R200_MTX_TEX0+unit ); } } else if (rmesa->TexGenEnabled & (R200_TEXMAT_0_ENABLE << unit)) { - upload_matrix( rmesa, rmesa->TexGenMatrix[unit].m, + upload_matrix( rmesa, rmesa->TexGenMatrix[unit].m, R200_MTX_TEX0+unit ); } } @@ -2277,7 +2277,7 @@ static GLboolean r200ValidateBuffers(GLcontext *ctx) int i; radeon_validate_reset_bos(&rmesa->radeon); - + rrb = radeon_get_colorbuffer(&rmesa->radeon); /* color buffer */ if (rrb && rrb->bo) { @@ -2295,7 +2295,7 @@ static GLboolean r200ValidateBuffers(GLcontext *ctx) for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) { radeonTexObj *t; - + if (!ctx->Texture.Unit[i]._ReallyEnabled) continue; @@ -2319,15 +2319,15 @@ GLboolean r200ValidateState( GLcontext *ctx ) r200ContextPtr rmesa = R200_CONTEXT(ctx); GLuint new_state = rmesa->radeon.NewGLState; - if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) { + if (new_state & _NEW_BUFFERS) { _mesa_update_framebuffer(ctx); /* this updates the DrawBuffer's Width/Height if it's a FBO */ _mesa_update_draw_buffer_bounds(ctx); - + R200_STATECHANGE(rmesa, ctx); } - if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM)) { + if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)) { r200UpdateTextureState( ctx ); new_state |= rmesa->radeon.NewGLState; /* may add TEXTURE_MATRIX */ r200UpdateLocalViewer( ctx ); @@ -2341,7 +2341,7 @@ GLboolean r200ValidateState( GLcontext *ctx ) /* Need an event driven matrix update? */ - if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION)) + if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION)) upload_matrix( rmesa, ctx->_ModelProjectMatrix.m, R200_MTX_MVP ); /* Need these for lighting (shouldn't upload otherwise) @@ -2365,11 +2365,12 @@ GLboolean r200ValidateState( GLcontext *ctx ) /* emit all active clip planes if projection matrix changes. */ if (new_state & (_NEW_PROJECTION)) { - if (ctx->Transform.ClipPlanesEnabled) + if (ctx->Transform.ClipPlanesEnabled) r200UpdateClipPlanes( ctx ); } if (new_state & (_NEW_PROGRAM| + _NEW_PROGRAM_CONSTANTS | /* need to test for pretty much anything due to possible parameter bindings */ _NEW_MODELVIEW|_NEW_PROJECTION|_NEW_TRANSFORM| _NEW_LIGHT|_NEW_TEXTURE|_NEW_TEXTURE_MATRIX| @@ -2436,7 +2437,7 @@ static void r200WrapRunPipeline( GLcontext *ctx ) } /* Run the pipeline. - */ + */ _tnl_run_pipeline( ctx ); if (has_material) { diff --git a/src/mesa/drivers/dri/r200/r200_state_init.c b/src/mesa/drivers/dri/r200/r200_state_init.c index 75262e46bd8..83920093378 100644 --- a/src/mesa/drivers/dri/r200/r200_state_init.c +++ b/src/mesa/drivers/dri/r200/r200_state_init.c @@ -295,12 +295,16 @@ VP_CHECK( tcl_vpp_size, ctx->VertexProgram.Current->Base.NumNativeParameters > 9 h.i = hdr; \ _start = h.veclinear.addr_lo | (h.veclinear.addr_hi << 8); \ _sz = h.veclinear.count * 4; \ + if (r200->radeon.radeonScreen->kernel_mm && _sz) { \ + BEGIN_BATCH_NO_AUTOSTATE(dwords); \ OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0)); \ OUT_BATCH(0); \ OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0)); \ OUT_BATCH(_start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); \ OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, _sz - 1)); \ OUT_BATCH_TABLE((data), _sz); \ + END_BATCH(); \ + } \ } while(0) #define OUT_SCL(hdr, data) do { \ @@ -367,9 +371,7 @@ static void veclinear_emit(GLcontext *ctx, struct radeon_state_atom *atom) uint32_t dwords = atom->cmd_size; dwords += 4; - BEGIN_BATCH_NO_AUTOSTATE(dwords); OUT_VECLINEAR(atom->cmd[0], atom->cmd+1); - END_BATCH(); } static void scl_emit(GLcontext *ctx, struct radeon_state_atom *atom) @@ -505,6 +507,8 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) dwords += 6; if (rrb) dwords += 6; + if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) + dwords += 4; /* output the first 7 bytes of context */ BEGIN_BATCH_NO_AUTOSTATE(dwords); @@ -555,17 +559,12 @@ static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom) if (t && t->mt && !t->image_override) dwords += 2; BEGIN_BATCH_NO_AUTOSTATE(dwords); + /* is this ok even with drm older than 1.18? */ OUT_BATCH_TABLE(atom->cmd, 10); if (t && t->mt && !t->image_override) { - if ((ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT)) { - lvl = &t->mt->levels[0]; - OUT_BATCH_RELOC(lvl->faces[5].offset, t->mt->bo, lvl->faces[5].offset, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); - } else { - OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); - } + OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); } else if (!t) { /* workaround for old CS mechanism */ OUT_BATCH(r200->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]); @@ -593,27 +592,20 @@ static void tex_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) hastexture = 0; } - dwords += 2; if (hastexture) dwords += 2; else dwords -= 2; BEGIN_BATCH_NO_AUTOSTATE(dwords); - OUT_BATCH(CP_PACKET0(R200_PP_TXFILTER_0 + (24 * i), 7)); + OUT_BATCH(CP_PACKET0(R200_PP_TXFILTER_0 + (32 * i), 7)); OUT_BATCH_TABLE((atom->cmd + 1), 8); if (hastexture) { OUT_BATCH(CP_PACKET0(R200_PP_TXOFFSET_0 + (24 * i), 0)); if (t->mt && !t->image_override) { - if ((ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT)) { - lvl = &t->mt->levels[0]; - OUT_BATCH_RELOC(lvl->faces[5].offset, t->mt->bo, lvl->faces[5].offset, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); - } else { - OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); - } + OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); } else { if (t->bo) OUT_BATCH_RELOC(t->tile_bits, t->bo, 0, @@ -628,21 +620,21 @@ static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom) { r200ContextPtr r200 = R200_CONTEXT(ctx); BATCH_LOCALS(&r200->radeon); - uint32_t dwords = atom->cmd_size; - int i = atom->idx; + uint32_t dwords = 2; + int i = atom->idx, j; radeonTexObj *t = r200->state.texture.unit[i].texobj; - GLuint size; + radeon_mipmap_level *lvl; - BEGIN_BATCH_NO_AUTOSTATE(dwords + (2 * 5)); - OUT_BATCH_TABLE(atom->cmd, 3); + BEGIN_BATCH_NO_AUTOSTATE(dwords + (4 * 5)); + OUT_BATCH_TABLE(atom->cmd, 2); if (t && !t->image_override) { - size = t->mt->totalsize / 6; - OUT_BATCH_RELOC(0, t->mt->bo, size, RADEON_GEM_DOMAIN_VRAM, 0, 0); - OUT_BATCH_RELOC(0, t->mt->bo, size * 2, RADEON_GEM_DOMAIN_VRAM, 0, 0); - OUT_BATCH_RELOC(0, t->mt->bo, size * 3, RADEON_GEM_DOMAIN_VRAM, 0, 0); - OUT_BATCH_RELOC(0, t->mt->bo, size * 4, RADEON_GEM_DOMAIN_VRAM, 0, 0); - OUT_BATCH_RELOC(0, t->mt->bo, size * 5, RADEON_GEM_DOMAIN_VRAM, 0, 0); + lvl = &t->mt->levels[0]; + for (j = 1; j <= 5; j++) { + OUT_BATCH(CP_PACKET0(R200_PP_CUBIC_OFFSET_F1_0 + (24*i) + (4 * (j-1)), 0)); + OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset, + RADEON_GEM_DOMAIN_VRAM, 0, 0); + } } END_BATCH(); } diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c index 712da980775..83e70b586d7 100644 --- a/src/mesa/drivers/dri/r200/r200_swtcl.c +++ b/src/mesa/drivers/dri/r200/r200_swtcl.c @@ -56,7 +56,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /*********************************************************************** - * Initialization + * Initialization ***********************************************************************/ #define EMIT_ATTR( ATTR, STYLE, F0 ) \ @@ -118,7 +118,7 @@ static void r200SetVertexFormat( GLcontext *ctx ) } rmesa->swtcl.coloroffset = offset; -#if MESA_LITTLE_ENDIAN +#if MESA_LITTLE_ENDIAN EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA, (R200_VTX_PK_RGBA << R200_VTX_COLOR_0_SHIFT) ); #else EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR, (R200_VTX_PK_RGBA << R200_VTX_COLOR_0_SHIFT) ); @@ -129,7 +129,7 @@ static void r200SetVertexFormat( GLcontext *ctx ) if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 ) || RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) { -#if MESA_LITTLE_ENDIAN +#if MESA_LITTLE_ENDIAN if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) { rmesa->swtcl.specoffset = offset; EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_RGB, (R200_VTX_PK_RGBA << R200_VTX_COLOR_1_SHIFT) ); @@ -192,7 +192,7 @@ static void r200SetVertexFormat( GLcontext *ctx ) rmesa->radeon.swtcl.vertex_size = _tnl_install_attrs( ctx, - rmesa->radeon.swtcl.vertex_attrs, + rmesa->radeon.swtcl.vertex_attrs, rmesa->radeon.swtcl.vertex_attr_count, NULL, 0 ); rmesa->radeon.swtcl.vertex_size /= 4; @@ -278,7 +278,7 @@ void r200_swtcl_flush(GLcontext *ctx, uint32_t current_offset) rmesa->radeon.dma.current, current_offset); - + r200EmitVbufPrim( rmesa, rmesa->radeon.swtcl.hw_primitive, rmesa->radeon.swtcl.numverts); @@ -338,7 +338,7 @@ static void r200ResetLineStipple( GLcontext *ctx ); r200ContextPtr rmesa = R200_CONTEXT(ctx); \ const char *r200verts = (char *)rmesa->radeon.swtcl.verts; #define VERT(x) (radeonVertex *)(r200verts + ((x) * vertsize * sizeof(int))) -#define VERTEX radeonVertex +#define VERTEX radeonVertex #define DO_DEBUG_VERTS (1 && (R200_DEBUG & DEBUG_VERTS)) #undef TAG @@ -539,7 +539,7 @@ void r200ChooseRenderState( GLcontext *ctx ) GLuint index = 0; GLuint flags = ctx->_TriangleCaps; - if (!rmesa->radeon.TclFallback || rmesa->radeon.Fallback) + if (!rmesa->radeon.TclFallback || rmesa->radeon.Fallback) return; if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R200_TWOSIDE_BIT; @@ -597,7 +597,7 @@ static void r200RenderPrimitive( GLcontext *ctx, GLenum prim ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); rmesa->radeon.swtcl.render_primitive = prim; - if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED)) + if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED)) r200RasterPrimitive( ctx, reduced_hw_prim(ctx, prim) ); } @@ -695,7 +695,7 @@ void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode ) /** * Cope with depth operations by drawing individual pixels as points. - * + * * \todo * The way the vertex state is set in this routine is hokey. It seems to * work, but it's very hackish. This whole routine is pretty hackish. If @@ -710,14 +710,14 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py, const GLubyte *bitmap ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - const GLfloat *rc = ctx->Current.RasterColor; + const GLfloat *rc = ctx->Current.RasterColor; GLint row, col; radeonVertex vert; GLuint orig_vte; GLuint h; - /* Turn off tcl. + /* Turn off tcl. */ TCL_FALLBACK( ctx, R200_TCL_FALLBACK_BITMAP, 1 ); @@ -768,7 +768,7 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py, R200_VPORT_Z_SCALE_ENA | R200_VPORT_X_OFFSET_ENA | R200_VPORT_Y_OFFSET_ENA | - R200_VPORT_Z_OFFSET_ENA); + R200_VPORT_Z_OFFSET_ENA); /* Turn off other stuff: Stipple?, texture?, blending?, etc. */ @@ -813,14 +813,14 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py, */ LOCK_HARDWARE( &rmesa->radeon ); UNLOCK_HARDWARE( &rmesa->radeon ); - h = rmesa->radeon.dri.drawable->h + rmesa->radeon.dri.drawable->y; - px += rmesa->radeon.dri.drawable->x; + h = radeon_get_drawable(&rmesa->radeon)->h + radeon_get_drawable(&rmesa->radeon)->y; + px += radeon_get_drawable(&rmesa->radeon)->x; /* Clipping handled by existing mechansims in r200_ioctl.c? */ for (row=0; row<height; row++) { - const GLubyte *src = (const GLubyte *) - _mesa_image_address2d(unpack, bitmap, width, height, + const GLubyte *src = (const GLubyte *) + _mesa_image_address2d(unpack, bitmap, width, height, GL_COLOR_INDEX, GL_BITMAP, row, 0 ); if (unpack->LsbFirst) { @@ -899,9 +899,9 @@ void r200InitSwtcl( GLcontext *ctx ) tnl->Driver.Render.Interp = _tnl_interp; /* FIXME: what are these numbers? */ - _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, + _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 36 * sizeof(GLfloat) ); - + rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf; rmesa->radeon.swtcl.RenderIndex = ~0; rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES; diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c index 9797f77ec4f..ed1995e147c 100644 --- a/src/mesa/drivers/dri/r200/r200_texstate.c +++ b/src/mesa/drivers/dri/r200/r200_texstate.c @@ -833,7 +833,6 @@ void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo radeon_miptree_unreference(rImage->mt); rImage->mt = NULL; } - fprintf(stderr,"settexbuf %d %dx%d@%d\n", rb->pitch, rb->width, rb->height, rb->cpp); _mesa_init_teximage_fields(radeon->glCtx, target, texImage, rb->width, rb->height, 1, 0, rb->cpp); texImage->RowStride = rb->pitch / rb->cpp; @@ -851,7 +850,10 @@ void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo pitch_val = rb->pitch; switch (rb->cpp) { case 4: - t->pp_txformat = tx_table_le[MESA_FORMAT_ARGB8888].format; + if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT) + t->pp_txformat = tx_table_le[MESA_FORMAT_RGB888].format; + else + t->pp_txformat = tx_table_le[MESA_FORMAT_ARGB8888].format; t->pp_txfilter |= tx_table_le[MESA_FORMAT_ARGB8888].filter; break; case 3: @@ -1422,8 +1424,8 @@ void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d ) */ static void setup_hardware_state(r200ContextPtr rmesa, radeonTexObj *t) { - const struct gl_texture_image *firstImage = - t->base.Image[0][t->mt->firstLevel]; + int firstlevel = t->mt ? t->mt->firstLevel : 0; + const struct gl_texture_image *firstImage = t->base.Image[0][firstlevel]; GLint log2Width, log2Height, log2Depth, texelBytes; if ( t->bo ) { diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c index aadd1443ad9..620f29b5c6e 100644 --- a/src/mesa/drivers/dri/r200/r200_vertprog.c +++ b/src/mesa/drivers/dri/r200/r200_vertprog.c @@ -290,7 +290,7 @@ static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_regis t_swizzle(GET_SWZ(src->Swizzle, 2)), t_swizzle(GET_SWZ(src->Swizzle, 3)), t_src_class(src->File), - src->NegateBase) | (src->RelAddr << 4); + src->Negate) | (src->RelAddr << 4); } static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src) @@ -302,7 +302,7 @@ static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_sr t_swizzle(GET_SWZ(src->Swizzle, 0)), t_swizzle(GET_SWZ(src->Swizzle, 0)), t_src_class(src->File), - src->NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4); + src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4); } static unsigned long t_opcode(enum prog_opcode opcode) @@ -700,7 +700,7 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO, t_src_class(src[0].File), - src[0].NegateBase) | (src[0].RelAddr << 4); + src[0].Negate) | (src[0].RelAddr << 4); o_inst->src1 = UNUSED_SRC_0; o_inst->src2 = UNUSED_SRC_0; } @@ -712,12 +712,12 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte t_swizzle(GET_SWZ(src[0].Swizzle, 0)), SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, t_src_class(src[0].File), - src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), SWIZZLE_ZERO, SWIZZLE_ZERO, t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO, t_src_class(src[1].File), - src[1].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); + src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); o_inst->src2 = UNUSED_SRC_1; o_inst++; @@ -766,11 +766,11 @@ if ((o_inst - vp->instr) == 31) { o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, t_src_class(src[1].File), - src[1].NegateBase) | (src[1].RelAddr << 4); + src[1].Negate) | (src[1].RelAddr << 4); o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, t_src_class(src[1].File), - src[1].NegateBase) | (src[1].RelAddr << 4); + src[1].Negate) | (src[1].RelAddr << 4); } else { o_inst->src1 = t_src(vp, &src[1]); @@ -792,7 +792,7 @@ else { t_swizzle(GET_SWZ(src[0].Swizzle, 2)), SWIZZLE_ZERO, t_src_class(src[0].File), - src[0].NegateBase) | (src[0].RelAddr << 4); + src[0].Negate) | (src[0].RelAddr << 4); o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 0)), @@ -800,7 +800,7 @@ else { t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO, t_src_class(src[1].File), - src[1].NegateBase) | (src[1].RelAddr << 4); + src[1].Negate) | (src[1].RelAddr << 4); o_inst->src2 = UNUSED_SRC_1; goto next; @@ -815,7 +815,7 @@ else { t_swizzle(GET_SWZ(src[0].Swizzle, 2)), VSF_IN_COMPONENT_ONE, t_src_class(src[0].File), - src[0].NegateBase) | (src[0].RelAddr << 4); + src[0].Negate) | (src[0].RelAddr << 4); o_inst->src1 = t_src(vp, &src[1]); o_inst->src2 = UNUSED_SRC_1; goto next; @@ -831,7 +831,7 @@ else { t_swizzle(GET_SWZ(src[1].Swizzle, 2)), t_swizzle(GET_SWZ(src[1].Swizzle, 3)), t_src_class(src[1].File), - (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); + (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); o_inst->src2 = UNUSED_SRC_1; goto next; @@ -846,7 +846,7 @@ else { t_swizzle(GET_SWZ(src[0].Swizzle, 2)), t_swizzle(GET_SWZ(src[0].Swizzle, 3)), t_src_class(src[0].File), - (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); + (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); o_inst->src2 = UNUSED_SRC_1; goto next; @@ -874,7 +874,7 @@ else { VSF_IN_COMPONENT_W, VSF_IN_CLASS_TMP, /* Not 100% sure about this */ - (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/); + (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/); o_inst->src2 = UNUSED_SRC_0; u_temp_i--; @@ -899,7 +899,7 @@ else { t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w t_src_class(src[0].File), - src[0].NegateBase) | (src[0].RelAddr << 4); + src[0].Negate) | (src[0].RelAddr << 4); o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z @@ -907,7 +907,7 @@ else { t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w t_src_class(src[1].File), - src[1].NegateBase) | (src[1].RelAddr << 4); + src[1].Negate) | (src[1].RelAddr << 4); o_inst->src2 = UNUSED_SRC_1; o_inst++; @@ -922,7 +922,7 @@ else { t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w t_src_class(src[1].File), - (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); + (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z @@ -930,7 +930,7 @@ else { t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w t_src_class(src[0].File), - src[0].NegateBase) | (src[0].RelAddr << 4); + src[0].Negate) | (src[0].RelAddr << 4); o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1, VSF_IN_COMPONENT_X, diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index 0dff9a12731..bdb09624be4 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -37,6 +37,7 @@ RADEON_COMMON_SOURCES = \ DRIVER_SOURCES = \ radeon_screen.c \ r300_context.c \ + r300_draw.c \ r300_ioctl.c \ r300_cmdbuf.c \ r300_state.c \ @@ -48,6 +49,7 @@ DRIVER_SOURCES = \ radeon_program_pair.c \ radeon_nqssadce.c \ r300_vertprog.c \ + r300_fragprog_common.c \ r300_fragprog.c \ r300_fragprog_swizzle.c \ r300_fragprog_emit.c \ diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index c575c9ac496..0261a5b1d8a 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -55,9 +55,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_state.h" #include "radeon_reg.h" -#define R300_VAP_PVS_UPLOAD_ADDRESS 0x2200 -# define RADEON_ONE_REG_WR (1 << 15) - /** # of dwords reserved for additional instructions that may need to be written * during flushing. */ @@ -71,7 +68,6 @@ static unsigned packet0_count(r300ContextPtr r300, uint32_t *pkt) drm_r300_cmd_header_t *t = (drm_r300_cmd_header_t*)pkt; return t->packet0.count; } - return 0; } #define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count) @@ -83,7 +79,7 @@ void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom) BATCH_LOCALS(&r300->radeon); drm_r300_cmd_header_t cmd; uint32_t addr, ndw, i; - + if (!r300->radeon.radeonScreen->kernel_mm) { uint32_t dwords; dwords = (*atom->check) (ctx, atom); @@ -92,7 +88,7 @@ void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom) END_BATCH(); return; } - + cmd.u = atom->cmd[0]; addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo; ndw = cmd.vpu.count * 4; @@ -111,7 +107,7 @@ void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom) } else { BEGIN_BATCH_NO_AUTOSTATE(5 + ndw); } - OUT_BATCH_REGVAL(R300_VAP_PVS_UPLOAD_ADDRESS, addr); + OUT_BATCH_REGVAL(R300_VAP_PVS_VECTOR_INDX_REG, addr); OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, ndw-1) | RADEON_ONE_REG_WR); for (i = 0; i < ndw; i++) { OUT_BATCH(atom->cmd[i+1]); @@ -175,7 +171,7 @@ static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom) for(i = 0; i < numtmus; ++i) { radeonTexObj *t = r300->hw.textures[i]; - + if (!t) notexture = 1; } @@ -183,30 +179,75 @@ static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom) if (r300->radeon.radeonScreen->kernel_mm && notexture) { return; } - BEGIN_BATCH_NO_AUTOSTATE(4 * numtmus); for(i = 0; i < numtmus; ++i) { radeonTexObj *t = r300->hw.textures[i]; - OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); if (t && !t->image_override) { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); } else if (!t) { - OUT_BATCH(r300->radeon.radeonScreen->texOffset[0]); + /* Texture unit hasn't a texture bound nothings to do */ } else { /* override cases */ if (t->bo) { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); OUT_BATCH_RELOC(t->tile_bits, t->bo, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); } else if (!r300->radeon.radeonScreen->kernel_mm) { + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); OUT_BATCH(t->override_offset); - } - else - OUT_BATCH(r300->radeon.radeonScreen->texOffset[0]); + END_BATCH(); + } else { + /* Texture unit hasn't a texture bound nothings to do */ + } } } - END_BATCH(); } } +void r300_emit_scissor(GLcontext *ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + BATCH_LOCALS(&r300->radeon); + unsigned x1, y1, x2, y2; + struct radeon_renderbuffer *rrb; + + if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { + return; + } + rrb = radeon_get_colorbuffer(&r300->radeon); + if (!rrb || !rrb->bo) { + fprintf(stderr, "no rrb\n"); + return; + } + if (r300->radeon.state.scissor.enabled) { + x1 = r300->radeon.state.scissor.rect.x1; + y1 = r300->radeon.state.scissor.rect.y1; + x2 = r300->radeon.state.scissor.rect.x2 - 1; + y2 = r300->radeon.state.scissor.rect.y2 - 1; + } else { + x1 = 0; + y1 = 0; + x2 = rrb->width - 1; + y2 = rrb->height - 1; + } + if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { + x1 += R300_SCISSORS_OFFSET; + y1 += R300_SCISSORS_OFFSET; + x2 += R300_SCISSORS_OFFSET; + y2 += R300_SCISSORS_OFFSET; + } + BEGIN_BATCH_NO_AUTOSTATE(3); + OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2); + OUT_BATCH((x1 << R300_SCISSORS_X_SHIFT)|(y1 << R300_SCISSORS_Y_SHIFT)); + OUT_BATCH((x2 << R300_SCISSORS_X_SHIFT)|(y2 << R300_SCISSORS_Y_SHIFT)); + END_BATCH(); +} + static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) { r300ContextPtr r300 = R300_CONTEXT(ctx); @@ -215,6 +256,7 @@ static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) uint32_t cbpitch; uint32_t offset = r300->radeon.state.color.draw_offset; uint32_t dw = 6; + int i; rrb = radeon_get_colorbuffer(&r300->radeon); if (!rrb || !rrb->bo) { @@ -247,16 +289,39 @@ static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) BEGIN_BATCH_NO_AUTOSTATE(3); OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2); OUT_BATCH(0); - OUT_BATCH((rrb->width << R300_SCISSORS_X_SHIFT) | - (rrb->height << R300_SCISSORS_Y_SHIFT)); + OUT_BATCH(((rrb->width - 1) << R300_SCISSORS_X_SHIFT) | + ((rrb->height - 1) << R300_SCISSORS_Y_SHIFT)); + END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(16); + for (i = 0; i < 4; i++) { + OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2); + OUT_BATCH((0 << R300_CLIPRECT_X_SHIFT) | (0 << R300_CLIPRECT_Y_SHIFT)); + OUT_BATCH(((rrb->width - 1) << R300_CLIPRECT_X_SHIFT) | ((rrb->height - 1) << R300_CLIPRECT_Y_SHIFT)); + } + OUT_BATCH_REGSEQ(R300_SC_CLIP_RULE, 1); + OUT_BATCH(0xAAAA); + OUT_BATCH_REGSEQ(R300_SC_SCREENDOOR, 1); + OUT_BATCH(0xffffff); END_BATCH(); } else { BEGIN_BATCH_NO_AUTOSTATE(3); OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2); OUT_BATCH((R300_SCISSORS_OFFSET << R300_SCISSORS_X_SHIFT) | (R300_SCISSORS_OFFSET << R300_SCISSORS_Y_SHIFT)); - OUT_BATCH(((rrb->width + R300_SCISSORS_OFFSET) << R300_SCISSORS_X_SHIFT) | - ((rrb->height + R300_SCISSORS_OFFSET) << R300_SCISSORS_Y_SHIFT)); + OUT_BATCH(((rrb->width + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_X_SHIFT) | + ((rrb->height + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_Y_SHIFT)); + END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(16); + for (i = 0; i < 4; i++) { + OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2); + OUT_BATCH((R300_SCISSORS_OFFSET << R300_CLIPRECT_X_SHIFT) | (R300_SCISSORS_OFFSET << R300_CLIPRECT_Y_SHIFT)); + OUT_BATCH(((R300_SCISSORS_OFFSET + rrb->width - 1) << R300_CLIPRECT_X_SHIFT) | + ((R300_SCISSORS_OFFSET + rrb->height - 1) << R300_CLIPRECT_Y_SHIFT)); + } + OUT_BATCH_REGSEQ(R300_SC_CLIP_RULE, 1); + OUT_BATCH(0xAAAA); + OUT_BATCH_REGSEQ(R300_SC_SCREENDOOR, 1); + OUT_BATCH(0xffffff); END_BATCH(); } } @@ -280,7 +345,7 @@ static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom) if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){ zbpitch |= R300_DEPTHMICROTILE_TILED; } - + BEGIN_BATCH_NO_AUTOSTATE(6); OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1); OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); @@ -288,6 +353,46 @@ static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom) END_BATCH(); } +static void emit_gb_misc(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + BATCH_LOCALS(&r300->radeon); + if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH(atom->cmd[0]); + OUT_BATCH(atom->cmd[1]); + OUT_BATCH(atom->cmd[2]); + OUT_BATCH(atom->cmd[3]); + END_BATCH(); + } +} + +static void emit_threshold_misc(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + BATCH_LOCALS(&r300->radeon); + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + BEGIN_BATCH_NO_AUTOSTATE(3); + OUT_BATCH(atom->cmd[0]); + OUT_BATCH(atom->cmd[1]); + OUT_BATCH(atom->cmd[2]); + END_BATCH(); + } +} + +static void emit_shade_misc(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + BATCH_LOCALS(&r300->radeon); + + if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH(atom->cmd[0]); + OUT_BATCH(atom->cmd[1]); + END_BATCH(); + } +} + static void emit_zstencil_format(GLcontext *ctx, struct radeon_state_atom * atom) { r300ContextPtr r300 = R300_CONTEXT(ctx); @@ -372,12 +477,10 @@ int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom) void r300InitCmdBuf(r300ContextPtr r300) { int mtu; - int has_tcl = 1; + int has_tcl; int is_r500 = 0; - int i; - if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) - has_tcl = 0; + has_tcl = r300->options.hw_tcl_enabled; if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) is_r500 = 1; @@ -444,7 +547,10 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(gb_enable, always, 2, 0); r300->hw.gb_enable.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_ENABLE, 1); ALLOC_STATE(gb_misc, always, R300_GB_MISC_CMDSIZE, 0); - r300->hw.gb_misc.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_MSPOS0, 5); + r300->hw.gb_misc.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_MSPOS0, 3); + r300->hw.gb_misc.emit = emit_gb_misc; + ALLOC_STATE(gb_misc2, always, R300_GB_MISC2_CMDSIZE, 0); + r300->hw.gb_misc2.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x401C, 2); ALLOC_STATE(txe, always, R300_TXE_CMDSIZE, 0); r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_ENABLE, 1); ALLOC_STATE(ga_point_s0, always, 5, 0); @@ -459,8 +565,11 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.lcntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_CNTL, 1); ALLOC_STATE(ga_line_stipple, always, 4, 0); r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_STIPPLE_VALUE, 3); - ALLOC_STATE(shade, always, 5, 0); - r300->hw.shade.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_ENHANCE, 4); + ALLOC_STATE(shade, always, 2, 0); + r300->hw.shade.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_ENHANCE, 1); + r300->hw.shade.emit = emit_shade_misc; + ALLOC_STATE(shade2, always, 4, 0); + r300->hw.shade2.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x4278, 3); ALLOC_STATE(polygon_mode, always, 4, 0); r300->hw.polygon_mode.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POLY_MODE, 3); ALLOC_STATE(fogp, always, 3, 0); @@ -479,19 +588,12 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(rc, always, R300_RC_CMDSIZE, 0); r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_COUNT, 2); if (is_r500) { - ALLOC_STATE(ri, always, R500_RI_CMDSIZE, 0); + ALLOC_STATE(ri, variable, R500_RI_CMDSIZE, 0); r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_IP_0, 16); - for (i = 0; i < 8; i++) { - r300->hw.ri.cmd[R300_RI_CMD_0 + i +1] = - (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | - (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | - (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | - (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT); - } ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, 1); } else { - ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0); + ALLOC_STATE(ri, variable, R300_RI_CMDSIZE, 0); r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_IP_0, 8); ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, 1); @@ -567,8 +669,9 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DITHER_CTL, 9); ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0); r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_AARESOLVE_CTL, 1); - ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0); - r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2); + ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0); + r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2); + r300->hw.rb3d_discard_src_pixel_lte_threshold.emit = emit_threshold_misc; ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0); r300->hw.zs.cmd[R300_ZS_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_CNTL, 3); @@ -582,8 +685,8 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.zb.emit = emit_zb_offset; ALLOC_STATE(zb_depthclearvalue, always, 2, 0); r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_DEPTHCLEARVALUE, 1); - ALLOC_STATE(unk4F30, always, 3, 0); - r300->hw.unk4F30.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x4F30, 2); + ALLOC_STATE(zb_zmask, always, 3, 0); + r300->hw.zb_zmask.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_ZMASK_OFFSET, 2); ALLOC_STATE(zb_hiz_offset, always, 2, 0); r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_OFFSET, 1); ALLOC_STATE(zb_hiz_pitch, always, 2, 0); diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.h b/src/mesa/drivers/dri/r300/r300_cmdbuf.h index 3786813de36..53bcc0eeb49 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.h +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.h @@ -39,6 +39,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_context.h" extern void r300InitCmdBuf(r300ContextPtr r300); +void r300_emit_scissor(GLcontext *ctx); void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom); int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom); diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index 5f279d6629e..394521a051f 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -64,6 +64,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_ioctl.h" #include "r300_tex.h" #include "r300_emit.h" +#include "r300_render.h" #include "r300_swtcl.h" #include "radeon_bocs_wrapper.h" @@ -72,10 +73,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "utils.h" #include "xmlpool.h" /* for symbolic values of enum-type options */ -/* hw_tcl_on derives from future_hw_tcl_on when its safe to change it. */ -int future_hw_tcl_on = 1; -int hw_tcl_on = 1; - #define need_GL_VERSION_2_0 #define need_GL_ARB_point_parameters #define need_GL_ARB_vertex_program @@ -127,6 +124,7 @@ const struct dri_extension card_extensions[] = { {"GL_EXT_texture_lod_bias", NULL}, {"GL_EXT_texture_mirror_clamp", NULL}, {"GL_EXT_texture_rectangle", NULL}, + {"GL_EXT_vertex_array_bgra", NULL}, {"GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions}, {"GL_ATI_texture_env_combine3", NULL}, {"GL_ATI_texture_mirror_once", NULL}, @@ -154,16 +152,8 @@ const struct dri_extension gl_20_extension[] = { {"GL_VERSION_2_0", GL_VERSION_2_0_functions }, }; - -extern struct tnl_pipeline_stage _r300_render_stage; -extern const struct tnl_pipeline_stage _r300_tcl_stage; - static const struct tnl_pipeline_stage *r300_pipeline[] = { - /* Try and go straight to t&l - */ - &_r300_tcl_stage, - /* Catch any t&l fallbacks */ &_tnl_vertex_transform_stage, @@ -172,6 +162,7 @@ static const struct tnl_pipeline_stage *r300_pipeline[] = { &_tnl_fog_coordinate_stage, &_tnl_texgen_stage, &_tnl_texture_transform_stage, + &_tnl_point_attenuation_stage, &_tnl_vertex_program_stage, /* Try again to go to tcl? @@ -191,17 +182,6 @@ static const struct tnl_pipeline_stage *r300_pipeline[] = { 0, }; -static void r300RunPipeline(GLcontext * ctx) -{ - _mesa_lock_context_textures(ctx); - - if (ctx->NewState) - _mesa_update_state_locked(ctx); - - _tnl_run_pipeline(ctx); - _mesa_unlock_context_textures(ctx); -} - static void r300_get_lock(radeonContextPtr rmesa) { drm_radeon_sarea_t *sarea = rmesa->sarea; @@ -211,7 +191,7 @@ static void r300_get_lock(radeonContextPtr rmesa) if (!rmesa->radeonScreen->kernel_mm) radeon_bo_legacy_texture_age(rmesa->radeonScreen->bom); } -} +} static void r300_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa) { @@ -246,9 +226,9 @@ static void r300_vtbl_pre_emit_atoms(radeonContextPtr radeon) { r300ContextPtr r300 = (r300ContextPtr)radeon; BATCH_LOCALS(radeon); - + r300->vap_flush_needed = GL_TRUE; - + cp_wait(radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN); BEGIN_BATCH_NO_AUTOSTATE(2); OUT_BATCH_REGVAL(R300_TX_INVALTAGS, R300_TX_FLUSH); @@ -275,6 +255,112 @@ static void r300_init_vtbl(radeonContextPtr radeon) radeon->vtbl.fallback = r300_fallback; } +static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + ctx->Const.MaxTextureImageUnits = + driQueryOptioni(&r300->radeon.optionCache, "texture_image_units"); + ctx->Const.MaxTextureCoordUnits = + driQueryOptioni(&r300->radeon.optionCache, "texture_coord_units"); + ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureImageUnits, + ctx->Const.MaxTextureCoordUnits); + + ctx->Const.MaxTextureMaxAnisotropy = 16.0; + ctx->Const.MaxTextureLodBias = 16.0; + + if (screen->chip_family >= CHIP_FAMILY_RV515) + ctx->Const.MaxTextureLevels = 13; + else + ctx->Const.MaxTextureLevels = 12; + + ctx->Const.MinPointSize = 1.0; + ctx->Const.MinPointSizeAA = 1.0; + ctx->Const.MaxPointSize = R300_POINTSIZE_MAX; + ctx->Const.MaxPointSizeAA = R300_POINTSIZE_MAX; + + ctx->Const.MinLineWidth = 1.0; + ctx->Const.MinLineWidthAA = 1.0; + ctx->Const.MaxLineWidth = R300_LINESIZE_MAX; + ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX; + + ctx->Const.MaxDrawBuffers = 1; + + /* currently bogus data */ + if (r300->options.hw_tcl_enabled) { + ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; + ctx->Const.VertexProgram.MaxNativeInstructions = + VSF_MAX_FRAGMENT_LENGTH / 4; + ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */ + ctx->Const.VertexProgram.MaxTemps = 32; + ctx->Const.VertexProgram.MaxNativeTemps = + /*VSF_MAX_FRAGMENT_TEMPS */ 32; + ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */ + ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; + } + + if (screen->chip_family >= CHIP_FAMILY_RV515) { + ctx->Const.FragmentProgram.MaxNativeTemps = R500_PFS_NUM_TEMP_REGS; + ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */ + ctx->Const.FragmentProgram.MaxNativeParameters = R500_PFS_NUM_CONST_REGS; + ctx->Const.FragmentProgram.MaxNativeAluInstructions = R500_PFS_MAX_INST; + ctx->Const.FragmentProgram.MaxNativeTexInstructions = R500_PFS_MAX_INST; + ctx->Const.FragmentProgram.MaxNativeInstructions = R500_PFS_MAX_INST; + ctx->Const.FragmentProgram.MaxNativeTexIndirections = R500_PFS_MAX_INST; + ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; + } else { + ctx->Const.FragmentProgram.MaxNativeTemps = R300_PFS_NUM_TEMP_REGS; + ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */ + ctx->Const.FragmentProgram.MaxNativeParameters = R300_PFS_NUM_CONST_REGS; + ctx->Const.FragmentProgram.MaxNativeAluInstructions = R300_PFS_MAX_ALU_INST; + ctx->Const.FragmentProgram.MaxNativeTexInstructions = R300_PFS_MAX_TEX_INST; + ctx->Const.FragmentProgram.MaxNativeInstructions = R300_PFS_MAX_ALU_INST + R300_PFS_MAX_TEX_INST; + ctx->Const.FragmentProgram.MaxNativeTexIndirections = R300_PFS_MAX_TEX_INDIRECT; + ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; + } +} + +static void r300ParseOptions(r300ContextPtr r300, radeonScreenPtr screen) +{ + struct r300_options options = { 0 }; + + driParseConfigFiles(&r300->radeon.optionCache, &screen->optionCache, + screen->driScreen->myNum, "r300"); + + r300->radeon.initialMaxAnisotropy = driQueryOptionf(&r300->radeon.optionCache, "def_max_anisotropy"); + + options.stencil_two_side_disabled = driQueryOptionb(&r300->radeon.optionCache, "disable_stencil_two_side"); + options.s3tc_force_enabled = driQueryOptionb(&r300->radeon.optionCache, "force_s3tc_enable"); + options.s3tc_force_disabled = driQueryOptionb(&r300->radeon.optionCache, "disable_s3tc"); + + if (!(screen->chip_flags & RADEON_CHIPSET_TCL) || driQueryOptioni(&r300->radeon.optionCache, "tcl_mode") == DRI_CONF_TCL_SW) + options.hw_tcl_enabled = 0; + else + options.hw_tcl_enabled = 1; + + options.conformance_mode = !driQueryOptionb(&r300->radeon.optionCache, "disable_lowimpact_fallback"); + + r300->options = options; +} + +static void r300InitGLExtensions(GLcontext *ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + driInitExtensions(ctx, card_extensions, GL_TRUE); + if (r300->radeon.radeonScreen->kernel_mm) + driInitExtensions(ctx, mm_extensions, GL_FALSE); + + if (r300->options.stencil_two_side_disabled) + _mesa_disable_extension(ctx, "GL_EXT_stencil_two_side"); + + if (ctx->Mesa_DXTn && !r300->options.s3tc_force_enabled) { + _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); + _mesa_enable_extension(ctx, "GL_S3_s3tc"); + } else if (r300->options.s3tc_force_disabled) { + _mesa_disable_extension(ctx, "GL_EXT_texture_compression_s3tc"); + } +} /* Create the device specific rendering context. */ @@ -287,33 +373,19 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, struct dd_function_table functions; r300ContextPtr r300; GLcontext *ctx; - int tcl_mode; assert(glVisual); assert(driContextPriv); assert(screen); - /* Allocate the R300 context */ r300 = (r300ContextPtr) CALLOC(sizeof(*r300)); if (!r300) return GL_FALSE; - if (!(screen->chip_flags & RADEON_CHIPSET_TCL)) - hw_tcl_on = future_hw_tcl_on = 0; + r300ParseOptions(r300, screen); r300_init_vtbl(&r300->radeon); - /* Parse configuration files. - * Do this here so that initialMaxAnisotropy is set before we create - * the default textures. - */ - driParseConfigFiles(&r300->radeon.optionCache, &screen->optionCache, - screen->driScreen->myNum, "r300"); - r300->radeon.initialMaxAnisotropy = driQueryOptionf(&r300->radeon.optionCache, - "def_max_anisotropy"); - /* Init default driver functions then plug in our R300-specific functions - * (the texture functions are especially important) - */ _mesa_init_driver_functions(&functions); r300InitIoctlFuncs(&functions); r300InitStateFuncs(&functions); @@ -327,46 +399,15 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, return GL_FALSE; } - /* Init r300 context data */ - /* Set the maximum texture size small enough that we can guarentee that - * all texture units can bind a maximal texture and have them both in - * texturable memory at once. - */ - ctx = r300->radeon.glCtx; - ctx->Const.MaxTextureImageUnits = - driQueryOptioni(&r300->radeon.optionCache, "texture_image_units"); - ctx->Const.MaxTextureCoordUnits = - driQueryOptioni(&r300->radeon.optionCache, "texture_coord_units"); - ctx->Const.MaxTextureUnits = - MIN2(ctx->Const.MaxTextureImageUnits, - ctx->Const.MaxTextureCoordUnits); - ctx->Const.MaxTextureMaxAnisotropy = 16.0; - ctx->Const.MaxTextureLodBias = 16.0; - - if (screen->chip_family >= CHIP_FAMILY_RV515) - ctx->Const.MaxTextureLevels = 13; - else - ctx->Const.MaxTextureLevels = 12; - - ctx->Const.MinPointSize = 1.0; - ctx->Const.MinPointSizeAA = 1.0; - ctx->Const.MaxPointSize = R300_POINTSIZE_MAX; - ctx->Const.MaxPointSizeAA = R300_POINTSIZE_MAX; + r300->fallback = 0; + if (r300->options.hw_tcl_enabled) + ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; - ctx->Const.MinLineWidth = 1.0; - ctx->Const.MinLineWidthAA = 1.0; - ctx->Const.MaxLineWidth = R300_LINESIZE_MAX; - ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX; - - /* Needs further modifications */ -#if 0 - ctx->Const.MaxArrayLockSize = - ( /*512 */ RADEON_BUFFER_SIZE * 16 * 1024) / (4 * 4); -#endif + ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; - ctx->Const.MaxDrawBuffers = 1; + r300InitConstValues(ctx, screen); /* Initialize the software rasterizer and helper modules. */ @@ -375,16 +416,12 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, _tnl_CreateContext(ctx); _swsetup_CreateContext(ctx); _swsetup_Wakeup(ctx); - _ae_create_context(ctx); /* Install the customized pipeline: */ _tnl_destroy_pipeline(ctx); _tnl_install_pipeline(ctx, r300_pipeline); - - /* Try and keep materials and vertices separate: - */ -/* _tnl_isolate_materials(ctx, GL_TRUE); */ + TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; /* Configure swrast and TNL to match hardware characteristics: */ @@ -393,80 +430,25 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, _tnl_allow_pixel_fog(ctx, GL_FALSE); _tnl_allow_vertex_fog(ctx, GL_TRUE); - /* currently bogus data */ - if (screen->chip_flags & RADEON_CHIPSET_TCL) { - ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeInstructions = - VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */ - ctx->Const.VertexProgram.MaxTemps = 32; - ctx->Const.VertexProgram.MaxNativeTemps = - /*VSF_MAX_FRAGMENT_TEMPS */ 32; - ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */ - ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; - } - - ctx->Const.FragmentProgram.MaxNativeTemps = PFS_NUM_TEMP_REGS; - ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */ - ctx->Const.FragmentProgram.MaxNativeParameters = PFS_NUM_CONST_REGS; - ctx->Const.FragmentProgram.MaxNativeAluInstructions = PFS_MAX_ALU_INST; - ctx->Const.FragmentProgram.MaxNativeTexInstructions = PFS_MAX_TEX_INST; - ctx->Const.FragmentProgram.MaxNativeInstructions = - PFS_MAX_ALU_INST + PFS_MAX_TEX_INST; - ctx->Const.FragmentProgram.MaxNativeTexIndirections = - PFS_MAX_TEX_INDIRECT; - ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* and these are?? */ - ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; - ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; - - driInitExtensions(ctx, card_extensions, GL_TRUE); - if (r300->radeon.radeonScreen->kernel_mm) - driInitExtensions(ctx, mm_extensions, GL_FALSE); - - if (driQueryOptionb - (&r300->radeon.optionCache, "disable_stencil_two_side")) - _mesa_disable_extension(ctx, "GL_EXT_stencil_two_side"); - - if (r300->radeon.glCtx->Mesa_DXTn - && !driQueryOptionb(&r300->radeon.optionCache, "disable_s3tc")) { - _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); - _mesa_enable_extension(ctx, "GL_S3_s3tc"); - } else - if (driQueryOptionb(&r300->radeon.optionCache, "force_s3tc_enable")) - { - _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); + if (r300->options.hw_tcl_enabled) { + r300InitDraw(ctx); + } else { + r300InitSwtcl(ctx); } - r300->disable_lowimpact_fallback = - driQueryOptionb(&r300->radeon.optionCache, - "disable_lowimpact_fallback"); radeon_fbo_init(&r300->radeon); - radeonInitSpanFuncs( ctx ); + radeonInitSpanFuncs( ctx ); r300InitCmdBuf(r300); r300InitState(r300); - if (!(screen->chip_flags & RADEON_CHIPSET_TCL)) - r300InitSwtcl(ctx); - - TNL_CONTEXT(ctx)->Driver.RunPipeline = r300RunPipeline; + r300InitShaderFunctions(r300); - tcl_mode = driQueryOptioni(&r300->radeon.optionCache, "tcl_mode"); - if (driQueryOptionb(&r300->radeon.optionCache, "no_rast")) { - fprintf(stderr, "disabling 3D acceleration\n"); -#if R200_MERGED - FALLBACK(&r300->radeon, RADEON_FALLBACK_DISABLE, 1); -#endif - } - if (tcl_mode == DRI_CONF_TCL_SW || - !(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) { - if (r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { - r300->radeon.radeonScreen->chip_flags &= - ~RADEON_CHIPSET_TCL; - fprintf(stderr, "Disabling HW TCL support\n"); - } - TCL_FALLBACK(r300->radeon.glCtx, - RADEON_TCL_FALLBACK_TCL_DISABLE, 1); + if (screen->chip_family == CHIP_FAMILY_RS600 || screen->chip_family == CHIP_FAMILY_RS690 || + screen->chip_family == CHIP_FAMILY_RS740) { + r300->radeon.texture_row_align = 64; } + r300InitGLExtensions(ctx); + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 602f86ba669..026c33c67cb 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -37,24 +37,19 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #ifndef __R300_CONTEXT_H__ #define __R300_CONTEXT_H__ -#include "tnl/t_vertex.h" #include "drm.h" #include "radeon_drm.h" #include "dri_util.h" -#include "texmem.h" #include "radeon_common.h" -#include "main/macros.h" #include "main/mtypes.h" -#include "main/colormac.h" +#include "shader/prog_instruction.h" struct r300_context; typedef struct r300_context r300ContextRec; typedef struct r300_context *r300ContextPtr; -#include "main/mm.h" - /* From http://gcc. gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html . I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble with other compilers ... GLUE! @@ -72,8 +67,6 @@ typedef struct r300_context *r300ContextPtr; } #include "r300_vertprog.h" -#include "r500_fragprog.h" - /* The blit width for texture uploads @@ -81,9 +74,6 @@ typedef struct r300_context *r300ContextPtr; #define R300_BLIT_WIDTH_BYTES 1024 #define R300_MAX_TEXTURE_UNITS 8 -struct r300_texture_state { - int tc_count; /* number of incoming texture coordinates from VAP */ -}; #define R300_VPT_CMD_0 0 @@ -126,9 +116,11 @@ struct r300_texture_state { #define R300_GB_MISC_MSPOS_0 1 #define R300_GB_MISC_MSPOS_1 2 #define R300_GB_MISC_TILE_CONFIG 3 -#define R300_GB_MISC_SELECT 4 -#define R300_GB_MISC_AA_CONFIG 5 -#define R300_GB_MISC_CMDSIZE 6 +#define R300_GB_MISC_CMDSIZE 4 +#define R300_GB_MISC2_CMD_0 0 +#define R300_GB_MISC2_SELECT 1 +#define R300_GB_MISC2_AA_CONFIG 2 +#define R300_GB_MISC2_CMDSIZE 3 #define R300_TXE_CMD_0 0 #define R300_TXE_ENABLE 1 @@ -303,7 +295,7 @@ struct r300_texture_state { struct r300_hw_state { struct radeon_state_atom vpt; /* viewport (1D98) */ struct radeon_state_atom vap_cntl; - struct radeon_state_atom vap_index_offset; /* 0x208c r5xx only */ + struct radeon_state_atom vap_index_offset; /* 0x208c r5xx only */ struct radeon_state_atom vof; /* VAP output format register 0x2090 */ struct radeon_state_atom vte; /* (20B0) */ struct radeon_state_atom vap_vf_max_vtx_indx; /* Maximum Vertex Indx Clamp (2134) */ @@ -317,6 +309,7 @@ struct r300_hw_state { struct radeon_state_atom pvs; /* pvs_cntl (22D0) */ struct radeon_state_atom gb_enable; /* (4008) */ struct radeon_state_atom gb_misc; /* Multisampling position shifts ? (4010) */ + struct radeon_state_atom gb_misc2; /* Multisampling position shifts ? (4010) */ struct radeon_state_atom ga_point_s0; /* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) (4200) */ struct radeon_state_atom ga_triangle_stipple; /* (4214) */ struct radeon_state_atom ps; /* pointsize (421C) */ @@ -324,6 +317,7 @@ struct r300_hw_state { struct radeon_state_atom lcntl; /* line control */ struct radeon_state_atom ga_line_stipple; /* (4260) */ struct radeon_state_atom shade; + struct radeon_state_atom shade2; struct radeon_state_atom polygon_mode; struct radeon_state_atom fogp; /* fog parameters (4294) */ struct radeon_state_atom ga_soft_reset; /* (429C) */ @@ -361,7 +355,7 @@ struct r300_hw_state { struct radeon_state_atom zstencil_format; struct radeon_state_atom zb; /* z buffer (4F20) */ struct radeon_state_atom zb_depthclearvalue; /* (4F28) */ - struct radeon_state_atom unk4F30; /* (4F30) */ + struct radeon_state_atom zb_zmask; /* (4F30) */ struct radeon_state_atom zb_hiz_offset; /* (4F44) */ struct radeon_state_atom zb_hiz_pitch; /* (4F54) */ @@ -405,73 +399,55 @@ struct r300_hw_state { #define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) #define STATE_R300_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1) -struct r300_vertex_shader_fragment { - int length; - union { - GLuint d[VSF_MAX_FRAGMENT_LENGTH]; - float f[VSF_MAX_FRAGMENT_LENGTH]; - GLuint i[VSF_MAX_FRAGMENT_LENGTH]; - } body; -}; - -struct r300_vertex_shader_state { - struct r300_vertex_shader_fragment program; -}; - -extern int hw_tcl_on; - #define COLOR_IS_RGBA #define TAG(x) r300##x #include "tnl_dd/t_dd_vertex.h" #undef TAG -//#define CURRENT_VERTEX_SHADER(ctx) (ctx->VertexProgram._Current) -#define CURRENT_VERTEX_SHADER(ctx) (R300_CONTEXT(ctx)->selected_vp) - -/* Should but doesnt work */ -//#define CURRENT_VERTEX_SHADER(ctx) (R300_CONTEXT(ctx)->curr_vp) - -/* r300_vertex_shader_state and r300_vertex_program should probably be merged together someday. - * Keeping them them seperate for now should ensure fixed pipeline keeps functioning properly. - */ - -struct r300_vertex_program_key { - GLuint InputsRead; - GLuint OutputsWritten; - GLuint OutputsAdded; -}; - struct r300_vertex_program { struct r300_vertex_program *next; - struct r300_vertex_program_key key; - int translated; - struct r300_vertex_shader_fragment program; + struct r300_vertex_program_key { + GLuint InputsRead; + GLuint OutputsWritten; + GLuint OutputsAdded; + } key; + + struct r300_vertex_shader_hw_code { + int length; + union { + GLuint d[VSF_MAX_FRAGMENT_LENGTH]; + float f[VSF_MAX_FRAGMENT_LENGTH]; + } body; + } hw_code; + + GLboolean translated; + GLboolean error; int pos_end; int num_temporaries; /* Number of temp vars used by program */ int wpos_idx; int inputs[VERT_ATTRIB_MAX]; int outputs[VERT_RESULT_MAX]; - int native; - int ref_count; - int use_ref_count; }; struct r300_vertex_program_cont { struct gl_vertex_program mesa_program; /* Must be first */ - struct r300_vertex_shader_fragment params; struct r300_vertex_program *progs; }; -#define PFS_MAX_ALU_INST 64 -#define PFS_MAX_TEX_INST 64 -#define PFS_MAX_TEX_INDIRECT 4 -#define PFS_NUM_TEMP_REGS 32 -#define PFS_NUM_CONST_REGS 16 +#define R300_PFS_MAX_ALU_INST 64 +#define R300_PFS_MAX_TEX_INST 32 +#define R300_PFS_MAX_TEX_INDIRECT 4 +#define R300_PFS_NUM_TEMP_REGS 32 +#define R300_PFS_NUM_CONST_REGS 32 -struct r300_pfs_compile_state; +#define R500_PFS_MAX_INST 512 +#define R500_PFS_NUM_TEMP_REGS 128 +#define R500_PFS_NUM_CONST_REGS 256 +struct r300_pfs_compile_state; +struct r500_pfs_compile_state; /** * Stores state that influences the compilation of a fragment program. @@ -514,7 +490,7 @@ struct r300_fragment_program_node { struct r300_fragment_program_code { struct { int length; /**< total # of texture instructions used */ - GLuint inst[PFS_MAX_TEX_INST]; + GLuint inst[R300_PFS_MAX_TEX_INST]; } tex; struct { @@ -524,7 +500,7 @@ struct r300_fragment_program_code { GLuint inst1; GLuint inst2; GLuint inst3; - } inst[PFS_MAX_ALU_INST]; + } inst[R300_PFS_MAX_ALU_INST]; } alu; struct r300_fragment_program_node node[4]; @@ -535,53 +511,12 @@ struct r300_fragment_program_code { * Remember which program register a given hardware constant * belongs to. */ - struct prog_src_register constant[PFS_NUM_CONST_REGS]; + struct prog_src_register constant[R300_PFS_NUM_CONST_REGS]; int const_nr; int max_temp_idx; }; -/** - * Store everything about a fragment program that is needed - * to render with that program. - */ -struct r300_fragment_program { - struct gl_fragment_program mesa_program; - - GLboolean translated; - GLboolean error; - - struct r300_fragment_program_external_state state; - struct r300_fragment_program_code code; - - GLboolean WritesDepth; - GLuint optimization; -}; - -struct r500_pfs_compile_state; - -struct r500_fragment_program_external_state { - struct { - /** - * If the sampler is used as a shadow sampler, - * this field is: - * 0 - GL_LUMINANCE - * 1 - GL_INTENSITY - * 2 - GL_ALPHA - * depending on the depth texture mode. - */ - GLuint depth_texture_mode : 2; - - /** - * If the sampler is used as a shadow sampler, - * this field is (texture_compare_func - GL_NEVER). - * [e.g. if compare function is GL_LEQUAL, this field is 3] - * - * Otherwise, this field is 0. - */ - GLuint texture_compare_func : 3; - } unit[16]; -}; struct r500_fragment_program_code { struct { @@ -591,7 +526,7 @@ struct r500_fragment_program_code { GLuint inst3; GLuint inst4; GLuint inst5; - } inst[512]; + } inst[R500_PFS_MAX_INST]; int inst_offset; int inst_end; @@ -600,47 +535,41 @@ struct r500_fragment_program_code { * Remember which program register a given hardware constant * belongs to. */ - struct prog_src_register constant[PFS_NUM_CONST_REGS]; + struct prog_src_register constant[R500_PFS_NUM_CONST_REGS]; int const_nr; int max_temp_idx; }; -struct r500_fragment_program { - struct gl_fragment_program mesa_program; +/** +* Store everything about a fragment program that is needed +* to render with that program. +*/ +struct r300_fragment_program { + struct gl_fragment_program Base; - GLcontext *ctx; GLboolean translated; GLboolean error; - struct r500_fragment_program_external_state state; - struct r500_fragment_program_code code; + struct r300_fragment_program_external_state state; + union rX00_fragment_program_code { + struct r300_fragment_program_code r300; + struct r500_fragment_program_code r500; + } code; GLboolean writes_depth; - GLuint optimization; }; -#define R300_MAX_AOS_ARRAYS 16 - -#define REG_COORDS 0 -#define REG_COLOR0 1 -#define REG_TEX0 2 - -struct r300_state { - struct r300_texture_state texture; - int sw_tcl_inputs[VERT_ATTRIB_MAX]; - struct r300_vertex_shader_state vertex_shader; - - - DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for. - They are the same as tnl->render_inputs for fixed pipeline */ - +struct r300_fragment_program_compiler { + r300ContextPtr r300; + struct r300_fragment_program *fp; + union rX00_fragment_program_code *code; + struct gl_program *program; }; -#define R300_FALLBACK_NONE 0 -#define R300_FALLBACK_TCL 1 -#define R300_FALLBACK_RAST 2 +#define R300_MAX_AOS_ARRAYS 16 + /* r300_swtcl.c */ @@ -654,18 +583,44 @@ struct r300_swtcl_info { * Offset of the 3UB specular color data within a hardware (swtcl) vertex. */ GLuint specoffset; +}; - struct vertex_attribute{ - GLuint attr; - GLubyte format; - GLubyte dst_loc; - GLuint swizzle; - GLubyte write_mask; - } vert_attrs[VERT_ATTRIB_MAX]; +struct r300_vtable { + void (* SetupRSUnit)(GLcontext *ctx); + void (* SetupFragmentShaderTextures)(GLcontext *ctx, int *tmu_mappings); + GLboolean (* BuildFragmentProgramHwCode)(struct r300_fragment_program_compiler *compiler); + void (* FragmentProgramDump)(union rX00_fragment_program_code *code); + void (* SetupPixelShader)(GLcontext *ctx); +}; - GLubyte vertex_attr_count; +struct r300_vertex_buffer { + struct vertex_attribute { + /* generic */ + GLubyte element; + GLvoid *data; + GLboolean free_needed; + GLuint stride; + GLuint dwords; + GLubyte size; /* number of components */ + + /* hw specific */ + uint32_t data_type:4; + uint32_t dst_loc:5; + uint32_t _signed:1; + uint32_t normalize:1; + uint32_t swizzle:12; + uint32_t write_mask:4; + } attribs[VERT_ATTRIB_MAX]; + + GLubyte num_attribs; }; +struct r300_index_buffer { + GLvoid *ptr; + GLboolean is_32bit; + GLboolean free_needed; + GLuint count; +}; /** * \brief R300 context structure. @@ -673,10 +628,10 @@ struct r300_swtcl_info { struct r300_context { struct radeon_context radeon; /* parent class, must be first */ + struct r300_vtable vtbl; + struct r300_hw_state hw; - struct r300_state state; - struct gl_vertex_program *curr_vp; struct r300_vertex_program *selected_vp; /* Vertex buffers @@ -684,10 +639,22 @@ struct r300_context { GLvector4f dummy_attrib[_TNL_ATTRIB_MAX]; GLvector4f *temp_attrib[_TNL_ATTRIB_MAX]; - GLboolean disable_lowimpact_fallback; - + struct r300_options { + uint32_t conformance_mode:1; + uint32_t hw_tcl_enabled:1; + uint32_t s3tc_force_enabled:1; + uint32_t s3tc_force_disabled:1; + uint32_t stencil_two_side_disabled:1; + } options; + struct r300_swtcl_info swtcl; + struct r300_vertex_buffer vbuf; + struct r300_index_buffer ind_buf; GLboolean vap_flush_needed; + + uint32_t fallback; + + DECLARE_RENDERINPUTS(render_inputs_bitset); }; #define R300_CONTEXT(ctx) ((r300ContextPtr)(ctx->DriverCtx)) @@ -703,10 +670,9 @@ extern int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_program_cont *vp, float *dst); -#define RADEON_D_CAPTURE 0 -#define RADEON_D_PLAYBACK 1 -#define RADEON_D_PLAYBACK_RAW 2 -#define RADEON_D_T 3 +extern void r300InitShaderFunctions(r300ContextPtr r300); + +extern void r300InitDraw(GLcontext *ctx); #define r300PackFloat32 radeonPackFloat32 #define r300PackFloat24 radeonPackFloat24 diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c new file mode 100644 index 00000000000..cc5650fb7c8 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -0,0 +1,475 @@ +/************************************************************************** + * + * Copyright 2009 Maciej Cencora + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHOR(S) AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <stdlib.h> + +#include "main/glheader.h" +#include "main/context.h" +#include "main/state.h" +#include "main/api_validate.h" +#include "main/enums.h" + +#include "r300_reg.h" +#include "r300_context.h" +#include "r300_emit.h" +#include "r300_render.h" +#include "r300_state.h" +#include "r300_tex.h" + +#include "tnl/tnl.h" +#include "tnl/t_vp_build.h" +#include "vbo/vbo_context.h" +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" + +static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf, struct gl_buffer_object **bo, GLuint *nr_bo) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_index_buffer *ind_buf = &r300->ind_buf; + GLvoid *src_ptr; + + if (!mesa_ind_buf) { + ind_buf->ptr = NULL; + return; + } + + ind_buf->count = mesa_ind_buf->count; + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { + bo[*nr_bo] = mesa_ind_buf->obj; + (*nr_bo)++; + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + assert(mesa_ind_buf->obj->Pointer != NULL); + } + src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); + + if (mesa_ind_buf->type == GL_UNSIGNED_BYTE) { + GLubyte *in = (GLubyte *)src_ptr; + GLuint *out = _mesa_malloc(sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1)); + int i; + + ind_buf->ptr = out; + + for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) { + *out++ = in[i] | in[i + 1] << 16; + } + + if (i < mesa_ind_buf->count) { + *out++ = in[i]; + } + + ind_buf->free_needed = GL_TRUE; + ind_buf->is_32bit = GL_FALSE; + } else if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) { +#if MESA_BIG_ENDIAN + GLushort *in = (GLushort *)src_ptr; + GLuint *out = _mesa_malloc(sizeof(GLushort) * + ((mesa_ind_buf->count + 1) & ~1)); + int i; + + ind_buf->ptr = out; + + for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) { + *out++ = in[i] | in[i + 1] << 16; + } + + if (i < mesa_ind_buf->count) { + *out++ = in[i]; + } + + ind_buf->free_needed = GL_TRUE; +#else + ind_buf->ptr = src_ptr; + ind_buf->free_needed = GL_FALSE; +#endif + ind_buf->is_32bit = GL_FALSE; + } else { + ind_buf->ptr = src_ptr; + ind_buf->free_needed = GL_FALSE; + ind_buf->is_32bit = GL_TRUE; + } +} + +static int getTypeSize(GLenum type) +{ + switch (type) { + case GL_DOUBLE: + return sizeof(GLdouble); + case GL_FLOAT: + return sizeof(GLfloat); + case GL_INT: + return sizeof(GLint); + case GL_UNSIGNED_INT: + return sizeof(GLuint); + case GL_SHORT: + return sizeof(GLshort); + case GL_UNSIGNED_SHORT: + return sizeof(GLushort); + case GL_BYTE: + return sizeof(GLbyte); + case GL_UNSIGNED_BYTE: + return sizeof(GLubyte); + default: + assert(0); + return 0; + } +} + +#define CONVERT( TYPE, MACRO ) do { \ + GLuint i, j, sz; \ + sz = input->Size; \ + if (input->Normalized) { \ + for (i = 0; i < count; i++) { \ + const TYPE *in = (TYPE *)src_ptr; \ + for (j = 0; j < sz; j++) { \ + *dst_ptr++ = MACRO(*in); \ + in++; \ + } \ + src_ptr += stride; \ + } \ + } else { \ + for (i = 0; i < count; i++) { \ + const TYPE *in = (TYPE *)src_ptr; \ + for (j = 0; j < sz; j++) { \ + *dst_ptr++ = (GLfloat)(*in); \ + in++; \ + } \ + src_ptr += stride; \ + } \ + } \ +} while (0) + +static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const struct gl_client_array *input, struct gl_buffer_object **bo, GLuint *nr_bo) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_vertex_buffer *vbuf = &r300->vbuf; + struct vertex_attribute r300_attr; + const void *src_ptr; + GLenum type; + GLuint stride; + + if (input->BufferObj->Name) { + if (!input->BufferObj->Pointer) { + bo[*nr_bo] = input->BufferObj; + (*nr_bo)++; + ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + assert(input->BufferObj->Pointer != NULL); + } + + src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); + } else + src_ptr = input->Ptr; + + stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB; + + if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT || +#if MESA_BIG_ENDIAN + getTypeSize(input->Type) != 4 || +#endif + stride < 4) { + if (RADEON_DEBUG & DEBUG_FALLBACKS) { + fprintf(stderr, "%s: Converting vertex attributes, attribute data format %x,", __FUNCTION__, input->Type); + fprintf(stderr, "stride %d, components %d\n", stride, input->Size); + } + + GLfloat *dst_ptr, *tmp; + tmp = dst_ptr = _mesa_malloc(sizeof(GLfloat) * input->Size * count); + + switch (input->Type) { + case GL_DOUBLE: + CONVERT(GLdouble, (GLfloat)); + break; + case GL_UNSIGNED_INT: + CONVERT(GLuint, UINT_TO_FLOAT); + break; + case GL_INT: + CONVERT(GLint, INT_TO_FLOAT); + break; + case GL_UNSIGNED_SHORT: + CONVERT(GLushort, USHORT_TO_FLOAT); + break; + case GL_SHORT: + CONVERT(GLshort, SHORT_TO_FLOAT); + break; + case GL_UNSIGNED_BYTE: + assert(input->Format != GL_BGRA); + CONVERT(GLubyte, UBYTE_TO_FLOAT); + break; + case GL_BYTE: + CONVERT(GLbyte, BYTE_TO_FLOAT); + break; + default: + assert(0); + break; + } + + type = GL_FLOAT; + r300_attr.free_needed = GL_TRUE; + r300_attr.data = tmp; + r300_attr.stride = sizeof(GLfloat) * input->Size; + r300_attr.dwords = input->Size; + } else { + type = input->Type; + r300_attr.free_needed = GL_FALSE; + r300_attr.data = (GLvoid *)src_ptr; + r300_attr.stride = stride; + r300_attr.dwords = (getTypeSize(type) * input->Size + 3)/ 4; + } + + r300_attr.size = input->Size; + r300_attr.element = attr; + r300_attr.dst_loc = vbuf->num_attribs; + + switch (type) { + case GL_FLOAT: + switch (input->Size) { + case 1: r300_attr.data_type = R300_DATA_TYPE_FLOAT_1; break; + case 2: r300_attr.data_type = R300_DATA_TYPE_FLOAT_2; break; + case 3: r300_attr.data_type = R300_DATA_TYPE_FLOAT_3; break; + case 4: r300_attr.data_type = R300_DATA_TYPE_FLOAT_4; break; + } + r300_attr._signed = 0; + r300_attr.normalize = 0; + break; + case GL_SHORT: + r300_attr._signed = 1; + r300_attr.normalize = input->Normalized; + switch (input->Size) { + case 1: + case 2: + r300_attr.data_type = R300_DATA_TYPE_SHORT_2; + break; + case 3: + case 4: + r300_attr.data_type = R300_DATA_TYPE_SHORT_4; + break; + } + break; + case GL_BYTE: + r300_attr._signed = 1; + r300_attr.normalize = input->Normalized; + r300_attr.data_type = R300_DATA_TYPE_BYTE; + break; + case GL_UNSIGNED_SHORT: + r300_attr._signed = 0; + r300_attr.normalize = input->Normalized; + switch (input->Size) { + case 1: + case 2: + r300_attr.data_type = R300_DATA_TYPE_SHORT_2; + break; + case 3: + case 4: + r300_attr.data_type = R300_DATA_TYPE_SHORT_4; + break; + } + break; + case GL_UNSIGNED_BYTE: + r300_attr._signed = 0; + r300_attr.normalize = input->Normalized; + if (input->Format == GL_BGRA) + r300_attr.data_type = R300_DATA_TYPE_D3DCOLOR; + else + r300_attr.data_type = R300_DATA_TYPE_BYTE; + break; + + default: + case GL_DOUBLE: + case GL_INT: + case GL_UNSIGNED_INT: + assert(0); + break; + } + + switch (input->Size) { + case 4: + r300_attr.swizzle = SWIZZLE_XYZW; + break; + case 3: + r300_attr.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); + break; + case 2: + r300_attr.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ONE); + break; + case 1: + r300_attr.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE); + break; + } + + r300_attr.write_mask = MASK_XYZW; + + vbuf->attribs[vbuf->num_attribs] = r300_attr; + ++vbuf->num_attribs; +} + +static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count, struct gl_buffer_object **bo, GLuint *nr_bo) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_vertex_buffer *vbuf = &r300->vbuf; + + { + int i, tmp; + + tmp = r300->selected_vp->key.InputsRead; + i = 0; + vbuf->num_attribs = 0; + while (tmp) { + /* find first enabled bit */ + while (!(tmp & 1)) { + tmp >>= 1; + ++i; + } + + r300TranslateAttrib(ctx, i, count, arrays[i], bo, nr_bo); + + tmp >>= 1; + ++i; + } + } + + r300SwitchFallback(ctx, R300_FALLBACK_AOS_LIMIT, vbuf->num_attribs > R300_MAX_AOS_ARRAYS); + if (r300->fallback) + return; + + { + int i; + + for (i = 0; i < vbuf->num_attribs; i++) { + rcommon_emit_vector(ctx, &r300->radeon.tcl.aos[i], + vbuf->attribs[i].data, vbuf->attribs[i].dwords, + vbuf->attribs[i].stride, count); + } + + r300->radeon.tcl.aos_count = vbuf->num_attribs; + } +} + +static void r300FreeData(GLcontext *ctx, struct gl_buffer_object **bo, GLuint nr_bo) +{ + { + struct r300_vertex_buffer *vbuf = &R300_CONTEXT(ctx)->vbuf; + int i; + + for (i = 0; i < vbuf->num_attribs; i++) { + if (vbuf->attribs[i].free_needed) + _mesa_free(vbuf->attribs[i].data); + } + } + + { + struct r300_index_buffer *ind_buf = &R300_CONTEXT(ctx)->ind_buf; + if (ind_buf->free_needed) + _mesa_free(ind_buf->ptr); + } + + { + int i; + + for (i = 0; i < nr_bo; ++i) { + ctx->Driver.UnmapBuffer(ctx, 0, bo[i]); + } + } +} + +static GLboolean r300TryDrawPrims(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLuint min_index, + GLuint max_index ) +{ + struct r300_context *r300 = R300_CONTEXT(ctx); + struct gl_buffer_object *bo[VERT_ATTRIB_MAX+1]; + GLuint i, nr_bo = 0; + + if (ctx->NewState) + _mesa_update_state( ctx ); + + if (r300->options.hw_tcl_enabled) + _tnl_UpdateFixedFunctionProgram(ctx); + + r300UpdateShaders(r300); + + r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, !r300ValidateBuffers(ctx)); + + r300FixupIndexBuffer(ctx, ib, bo, &nr_bo); + + r300SetVertexFormat(ctx, arrays, max_index + 1, bo, &nr_bo); + + if (r300->fallback) + return GL_FALSE; + + r300SetupVAP(ctx, r300->selected_vp->key.InputsRead, r300->selected_vp->key.OutputsWritten); + + r300UpdateShaderStates(r300); + + r300EmitCacheFlush(r300); + radeonEmitState(&r300->radeon); + + for (i = 0; i < nr_prims; ++i) { + r300RunRenderPrimitive(ctx, prim[i].start, prim[i].start + prim[i].count, prim[i].mode); + } + + r300EmitCacheFlush(r300); + + radeonReleaseArrays(ctx, ~0); + + r300FreeData(ctx, bo, nr_bo); + + return GL_TRUE; +} + +/* TODO: rebase if number of indices in any of primitives is > 8192 for 32bit indices or 16384 for 16bit indices */ + +static void r300DrawPrims(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLuint min_index, + GLuint max_index) +{ + GLboolean retval; + + if (min_index) { + vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r300DrawPrims ); + return; + } + + /* Make an attempt at drawing */ + retval = r300TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + + /* If failed run tnl pipeline - it should take care of fallbacks */ + if (!retval) + _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); +} + +void r300InitDraw(GLcontext *ctx) +{ + struct vbo_context *vbo = vbo_context(ctx); + + vbo->draw_prims = r300DrawPrims; +} diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c index 4fd6ba9b91a..c3817721dc4 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.c +++ b/src/mesa/drivers/dri/r300/r300_emit.c @@ -31,6 +31,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * \file * * \author Keith Whitwell <[email protected]> + * \author Maciej Cencora <[email protected]> */ #include "main/glheader.h" @@ -49,74 +50,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_state.h" #include "r300_emit.h" #include "r300_ioctl.h" - - -#if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \ - SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \ - SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \ - SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \ - SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \ - SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE -#error Cannot change these! -#endif - -#define DEBUG_ALL DEBUG_VERTS - -#define DW_SIZE(x) ((inputs[tab[(x)]] << R300_DST_VEC_LOC_SHIFT) | \ - (attribptr[tab[(x)]]->size - 1) << R300_DATA_TYPE_0_SHIFT) - -GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, - int *inputs, GLint * tab, GLuint nr) -{ - GLuint i, dw; - - /* type, inputs, stop bit, size */ - for (i = 0; i < nr; i += 2) { - /* make sure input is valid, would lockup the gpu */ - assert(inputs[tab[i]] != -1); - dw = (R300_SIGNED | DW_SIZE(i)); - if (i + 1 == nr) { - dw |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT; - } else { - assert(inputs[tab[i + 1]] != -1); - dw |= (R300_SIGNED | - DW_SIZE(i + 1)) << R300_DATA_TYPE_1_SHIFT; - if (i + 2 == nr) { - dw |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT; - } - } - dst[i >> 1] = dw; - } - - return (nr + 1) >> 1; -} - -static GLuint r300VAPInputRoute1Swizzle(int swizzle[4]) -{ - return (swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) | - (swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) | - (swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) | - (swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT); -} - -GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr) -{ - GLuint i, dw; - - for (i = 0; i < nr; i += 2) { - dw = (r300VAPInputRoute1Swizzle(swizzle[i]) | - ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | - R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE0_SHIFT; - if (i + 1 < nr) { - dw |= (r300VAPInputRoute1Swizzle(swizzle[i + 1]) | - ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | - R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE1_SHIFT; - } - dst[i >> 1] = dw; - } - - return (nr + 1) >> 1; -} +#include "r300_render.h" +#include "r300_swtcl.h" GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead) { @@ -127,7 +62,6 @@ GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead) GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead) { - r300ContextPtr rmesa = R300_CONTEXT(ctx); GLuint i, vic_1 = 0; if (InputsRead & (1 << VERT_ATTRIB_POS)) @@ -139,213 +73,99 @@ GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead) if (InputsRead & (1 << VERT_ATTRIB_COLOR0)) vic_1 |= R300_INPUT_CNTL_COLOR; - rmesa->state.texture.tc_count = 0; for (i = 0; i < ctx->Const.MaxTextureUnits; i++) if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) { - rmesa->state.texture.tc_count++; vic_1 |= R300_INPUT_CNTL_TC0 << i; } return vic_1; } -GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten) +GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads) { GLuint ret = 0; - if (OutputsWritten & (1 << VERT_RESULT_HPOS)) + if (vp_writes & (1 << VERT_RESULT_HPOS)) ret |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; - if (OutputsWritten & (1 << VERT_RESULT_COL0)) + if (vp_writes & (1 << VERT_RESULT_COL0) && fp_reads & FRAG_BIT_COL0) ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT; - if (OutputsWritten & (1 << VERT_RESULT_COL1)) + if (vp_writes & (1 << VERT_RESULT_COL1) && fp_reads & FRAG_BIT_COL1) ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT; - if (OutputsWritten & (1 << VERT_RESULT_BFC0) - || OutputsWritten & (1 << VERT_RESULT_BFC1)) - ret |= - R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT | - R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT | - R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT; + /* Two sided lighting works only if all 4 colors are written */ + if (vp_writes & (1 << VERT_RESULT_BFC0) || vp_writes & (1 << VERT_RESULT_BFC1)) + ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT | R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT | + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT | R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT; - if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) + if (vp_writes & (1 << VERT_RESULT_PSIZ)) ret |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; return ret; } -GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten) +GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads) { GLuint i, ret = 0, first_free_texcoord = 0; for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - if (OutputsWritten & (1 << (VERT_RESULT_TEX0 + i))) { - ret |= (4 << (3 * i)); + if (vp_writes & (1 << (VERT_RESULT_TEX0 + i)) && fp_reads & FRAG_BIT_TEX(i)) { + ret |= (4 << (3 * first_free_texcoord)); ++first_free_texcoord; } } - if (OutputsWritten & (1 << VERT_RESULT_FOGC)) { - if (first_free_texcoord > 8) { - fprintf(stderr, "\tout of free texcoords to write fog coord\n"); - _mesa_exit(-1); - } - ret |= 1 << (3 * first_free_texcoord); + if (fp_reads & FRAG_BIT_WPOS) { + ret |= (4 << (3 * first_free_texcoord)); + ++first_free_texcoord; + } + + if (vp_writes & (1 << VERT_RESULT_FOGC) && fp_reads & FRAG_BIT_FOGC) { + ret |= 4 << (3 * first_free_texcoord); + } + + if (first_free_texcoord > 8) { + fprintf(stderr, "\tout of free texcoords\n"); + _mesa_exit(-1); } return ret; } -/* Emit vertex data to GART memory - * Route inputs to the vertex processor - * This function should never return R300_FALLBACK_TCL when using software tcl. - */ -int r300EmitArrays(GLcontext * ctx) +GLboolean r300EmitArrays(GLcontext * ctx) { - r300ContextPtr rmesa = R300_CONTEXT(ctx); - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - GLuint nr; - GLuint count = vb->Count; - GLuint i; - GLuint InputsRead = 0, OutputsWritten = 0; - int *inputs = NULL; - int vir_inputs[VERT_ATTRIB_MAX]; - GLint tab[VERT_ATTRIB_MAX]; - int swizzle[VERT_ATTRIB_MAX][4]; - struct r300_vertex_program *prog = - (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); - - if (hw_tcl_on) { - inputs = prog->inputs; - InputsRead = prog->key.InputsRead; - OutputsWritten = prog->key.OutputsWritten; - } else { - inputs = rmesa->state.sw_tcl_inputs; - - DECLARE_RENDERINPUTS(render_inputs_bitset); - RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset); - - vb->AttribPtr[VERT_ATTRIB_POS] = vb->ClipPtr; - - assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)); - assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_NORMAL) == 0); - - if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)) { - InputsRead |= 1 << VERT_ATTRIB_POS; - OutputsWritten |= 1 << VERT_RESULT_HPOS; - } + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_vertex_buffer *vbuf = &r300->vbuf; + GLuint InputsRead, OutputsWritten; - if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0)) { - InputsRead |= 1 << VERT_ATTRIB_COLOR0; - OutputsWritten |= 1 << VERT_RESULT_COL0; - } + r300ChooseSwtclVertexFormat(ctx, &InputsRead, &OutputsWritten); - if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR1)) { - InputsRead |= 1 << VERT_ATTRIB_COLOR1; - OutputsWritten |= 1 << VERT_RESULT_COL1; - } + r300SwitchFallback(ctx, R300_FALLBACK_AOS_LIMIT, vbuf->num_attribs > R300_MAX_AOS_ARRAYS); + if (r300->fallback & R300_RASTER_FALLBACK_MASK) + return GL_FALSE; - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_TEX(i))) { - InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i); - OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); - } - } + { + struct vertex_buffer *mesa_vb = &TNL_CONTEXT(ctx)->vb; + GLuint attr, i; - for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { - if (InputsRead & (1 << i)) { - inputs[i] = nr++; - } else { - inputs[i] = -1; - } + for (i = 0; i < vbuf->num_attribs; i++) { + attr = vbuf->attribs[i].element; + rcommon_emit_vector(ctx, &r300->radeon.tcl.aos[i], mesa_vb->AttribPtr[attr]->data, + mesa_vb->AttribPtr[attr]->size, mesa_vb->AttribPtr[attr]->stride, mesa_vb->Count); } - /* Fixed, apply to vir0 only */ - memcpy(vir_inputs, inputs, VERT_ATTRIB_MAX * sizeof(int)); - inputs = vir_inputs; - if (InputsRead & VERT_ATTRIB_POS) - inputs[VERT_ATTRIB_POS] = 0; - if (InputsRead & (1 << VERT_ATTRIB_COLOR0)) - inputs[VERT_ATTRIB_COLOR0] = 2; - if (InputsRead & (1 << VERT_ATTRIB_COLOR1)) - inputs[VERT_ATTRIB_COLOR1] = 3; - for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) - if (InputsRead & (1 << i)) - inputs[i] = 6 + (i - VERT_ATTRIB_TEX0); - - RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset); - } + r300->radeon.tcl.aos_count = vbuf->num_attribs; - assert(InputsRead); - assert(OutputsWritten); - - for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { - if (InputsRead & (1 << i)) { - tab[nr++] = i; - } + /* Fill index buffer info */ + r300->ind_buf.ptr = mesa_vb->Elts; + r300->ind_buf.is_32bit = GL_TRUE; + r300->ind_buf.free_needed = GL_FALSE; } - if (nr > R300_MAX_AOS_ARRAYS) { - return R300_FALLBACK_TCL; - } - - for (i = 0; i < nr; i++) { - int ci; - - swizzle[i][0] = SWIZZLE_ZERO; - swizzle[i][1] = SWIZZLE_ZERO; - swizzle[i][2] = SWIZZLE_ZERO; - swizzle[i][3] = SWIZZLE_ONE; + r300SetupVAP(ctx, InputsRead, OutputsWritten); - for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) { - swizzle[i][ci] = ci; - } - rcommon_emit_vector(ctx, &rmesa->radeon.tcl.aos[i], - vb->AttribPtr[tab[i]]->data, - vb->AttribPtr[tab[i]]->size, - vb->AttribPtr[tab[i]]->stride, count); - } - - /* Setup INPUT_ROUTE. */ - if (rmesa->radeon.radeonScreen->kernel_mm) { - R300_STATECHANGE(rmesa, vir[0]); - rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF; - rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF; - rmesa->hw.vir[0].cmd[0] |= - (r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0], - vb->AttribPtr, inputs, tab, nr) & 0x3FFF) << 16; - R300_STATECHANGE(rmesa, vir[1]); - rmesa->hw.vir[1].cmd[0] |= - (r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, - nr) & 0x3FFF) << 16; - } else { - R300_STATECHANGE(rmesa, vir[0]); - ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = - r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0], - vb->AttribPtr, inputs, tab, nr); - R300_STATECHANGE(rmesa, vir[1]); - ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = - r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, - nr); - } - - /* Setup INPUT_CNTL. */ - R300_STATECHANGE(rmesa, vic); - rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); - rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead); - - /* Setup OUTPUT_VTX_FMT. */ - R300_STATECHANGE(rmesa, vof); - rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = - r300VAPOutputCntl0(ctx, OutputsWritten); - rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = - r300VAPOutputCntl1(ctx, OutputsWritten); - - rmesa->radeon.tcl.aos_count = nr; - - return R300_FALLBACK_NONE; + return GL_TRUE; } void r300EmitCacheFlush(r300ContextPtr rmesa) diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h index 80c22d5e9ab..2fb8b82d3a1 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.h +++ b/src/mesa/drivers/dri/r300/r300_emit.h @@ -216,19 +216,16 @@ void static INLINE cp_wait(radeonContextPtr radeon, unsigned char flags) } } -extern int r300EmitArrays(GLcontext * ctx); +extern GLboolean r300EmitArrays(GLcontext * ctx); extern int r300PrimitiveType(r300ContextPtr rmesa, int prim); extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim); extern void r300EmitCacheFlush(r300ContextPtr rmesa); -extern GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, - int *inputs, GLint * tab, GLuint nr); -extern GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr); extern GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead); extern GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead); -extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten); -extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten); +extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads); +extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads); #endif diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 32182bb6674..55c1cfe6317 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -25,32 +25,12 @@ * */ -/** - * \file - * - * Fragment program compiler. Perform transformations on the intermediate - * representation until the program is in a form where we can translate - * it more or less directly into machine-readable form. - * - * \author Ben Skeggs <[email protected]> - * \author Jerome Glisse <[email protected]> - */ +#include "r300_fragprog.h" -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/prog_instruction.h" #include "shader/prog_parameter.h" -#include "shader/prog_print.h" #include "r300_context.h" -#include "r300_fragprog.h" #include "r300_fragprog_swizzle.h" -#include "r300_state.h" - -#include "radeon_nqssadce.h" -#include "radeon_program_alu.h" - static void reset_srcreg(struct prog_src_register* reg) { @@ -81,7 +61,7 @@ static struct prog_src_register shadow_ambient(struct gl_program *program, int t * \todo If/when r5xx uses the radeon_program architecture, this can probably * be reused. */ -static GLboolean transform_TEX( +GLboolean r300_transform_TEX( struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data) { @@ -160,6 +140,8 @@ static GLboolean transform_TEX( inst.DstReg.Index = tempreg; inst.DstReg.WriteMask = WRITEMASK_XYZW; destredirect = GL_TRUE; + } else if (inst.SaturateMode) { + destredirect = GL_TRUE; } } @@ -175,7 +157,7 @@ static GLboolean transform_TEX( inst.SrcReg[0].File = PROGRAM_TEMPORARY; inst.SrcReg[0].Index = tmpreg; } - + tgt = radeonAppendInstructions(t->Program, 1); _mesa_copy_instructions(tgt, &inst, 1); @@ -214,9 +196,9 @@ static GLboolean transform_TEX( * r < tex <=> -tex+r < 0 * r >= tex <=> not (-tex+r < 0 */ if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) - tgt[1].SrcReg[2].NegateBase = tgt[0].SrcReg[2].NegateBase ^ NEGATE_XYZW; + tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW; else - tgt[1].SrcReg[0].NegateBase = tgt[0].SrcReg[0].NegateBase ^ NEGATE_XYZW; + tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW; tgt[2].Opcode = OPCODE_CMP; tgt[2].DstReg = orig_inst->DstReg; @@ -239,6 +221,7 @@ static GLboolean transform_TEX( tgt->Opcode = OPCODE_MOV; tgt->DstReg = orig_inst->DstReg; + tgt->SaturateMode = inst.SaturateMode; tgt->SrcReg[0].File = PROGRAM_TEMPORARY; tgt->SrcReg[0].Index = inst.DstReg.Index; } @@ -246,241 +229,10 @@ static GLboolean transform_TEX( return GL_TRUE; } - -static void update_params(r300ContextPtr r300, struct r300_fragment_program *fp) -{ - struct gl_fragment_program *mp = &fp->mesa_program; - - /* Ask Mesa nicely to fill in ParameterValues for us */ - if (mp->Base.Parameters) - _mesa_load_state_parameters(r300->radeon.glCtx, mp->Base.Parameters); -} - - -/** - * Transform the program to support fragment.position. - * - * Introduce a small fragment at the start of the program that will be - * the only code that directly reads the FRAG_ATTRIB_WPOS input. - * All other code pieces that reference that input will be rewritten - * to read from a newly allocated temporary. - * - * \todo if/when r5xx supports the radeon_program architecture, this is a - * likely candidate for code sharing. - */ -static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) -{ - GLuint InputsRead = compiler->fp->mesa_program.Base.InputsRead; - - if (!(InputsRead & FRAG_BIT_WPOS)) - return; - - static gl_state_index tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 - }; - struct prog_instruction *fpi; - GLuint window_index; - int i = 0; - GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY); - - _mesa_insert_instructions(compiler->program, 0, 3); - fpi = compiler->program->Instructions; - - /* perspective divide */ - fpi[i].Opcode = OPCODE_RCP; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_W; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; - fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; - i++; - - fpi[i].Opcode = OPCODE_MUL; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; - fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - - fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[1].Index = tempregi; - fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; - i++; - - /* viewport transformation */ - window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens); - - fpi[i].Opcode = OPCODE_MAD; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[0].Index = tempregi; - fpi[i].SrcReg[0].Swizzle = - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; - fpi[i].SrcReg[1].Index = window_index; - fpi[i].SrcReg[1].Swizzle = - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; - fpi[i].SrcReg[2].Index = window_index; - fpi[i].SrcReg[2].Swizzle = - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - i++; - - for (; i < compiler->program->NumInstructions; ++i) { - int reg; - for (reg = 0; reg < 3; reg++) { - if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT && - fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) { - fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[reg].Index = tempregi; - } - } - } -} - - -static void nqssadce_init(struct nqssadce_state* s) -{ - s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; - s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; -} - - -static GLuint build_dtm(GLuint depthmode) -{ - switch(depthmode) { - default: - case GL_LUMINANCE: return 0; - case GL_INTENSITY: return 1; - case GL_ALPHA: return 2; - } -} - -static GLuint build_func(GLuint comparefunc) -{ - return comparefunc - GL_NEVER; -} - - -/** - * Collect all external state that is relevant for compiling the given - * fragment program. - */ -static void build_state( - r300ContextPtr r300, - struct r300_fragment_program *fp, - struct r300_fragment_program_external_state *state) -{ - int unit; - - _mesa_bzero(state, sizeof(*state)); - - for(unit = 0; unit < 16; ++unit) { - if (fp->mesa_program.Base.ShadowSamplers & (1 << unit)) { - struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; - - state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); - state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); - } - } -} - - -void r300TranslateFragmentShader(r300ContextPtr r300, - struct r300_fragment_program *fp) -{ - struct r300_fragment_program_external_state state; - - build_state(r300, fp, &state); - if (_mesa_memcmp(&fp->state, &state, sizeof(state))) { - /* TODO: cache compiled programs */ - fp->translated = GL_FALSE; - _mesa_memcpy(&fp->state, &state, sizeof(state)); - } - - if (!fp->translated) { - struct r300_fragment_program_compiler compiler; - - compiler.r300 = r300; - compiler.fp = fp; - compiler.code = &fp->code; - compiler.program = _mesa_clone_program(r300->radeon.glCtx, &fp->mesa_program.Base); - - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Fragment Program: Initial program:\n"); - _mesa_print_program(compiler.program); - } - - insert_WPOS_trailer(&compiler); - - struct radeon_program_transformation transformations[] = { - { &transform_TEX, &compiler }, - { &radeonTransformALU, 0 }, - { &radeonTransformTrigSimple, 0 } - }; - radeonLocalTransform( - r300->radeon.glCtx, - compiler.program, - 3, transformations); - - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Fragment Program: After native rewrite:\n"); - _mesa_print_program(compiler.program); - } - - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadce_init, - .IsNativeSwizzle = &r300FPIsNativeSwizzle, - .BuildSwizzle = &r300FPBuildSwizzle, - .RewriteDepthOut = GL_TRUE - }; - radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce); - - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Compiler: after NqSSA-DCE:\n"); - _mesa_print_program(compiler.program); - } - - if (!r300FragmentProgramEmit(&compiler)) - fp->error = GL_TRUE; - - /* Subtle: Rescue any parameters that have been added during transformations */ - _mesa_free_parameter_list(fp->mesa_program.Base.Parameters); - fp->mesa_program.Base.Parameters = compiler.program->Parameters; - compiler.program->Parameters = 0; - - _mesa_reference_program(r300->radeon.glCtx, &compiler.program, NULL); - - if (!fp->error) - fp->translated = GL_TRUE; - if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) - r300FragmentProgramDump(fp, &fp->code); - r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); - } - - update_params(r300, fp); -} - /* just some random things... */ -void r300FragmentProgramDump( - struct r300_fragment_program *fp, - struct r300_fragment_program_code *code) +void r300FragmentProgramDump(union rX00_fragment_program_code *c) { + struct r300_fragment_program_code *code = &c->r300; int n, i, j; static int pc = 0; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h index 94fb554fb37..5ce6f33cee7 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.h +++ b/src/mesa/drivers/dri/r300/r300_fragprog.h @@ -33,9 +33,6 @@ #ifndef __R300_FRAGPROG_H_ #define __R300_FRAGPROG_H_ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" #include "shader/program.h" #include "shader/prog_instruction.h" @@ -105,28 +102,10 @@ #endif -struct r300_fragment_program; +extern GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); -extern void r300TranslateFragmentShader(r300ContextPtr r300, - struct r300_fragment_program *fp); +extern void r300FragmentProgramDump(union rX00_fragment_program_code *c); - -/** - * Used internally by the r300 fragment program code to store compile-time - * only data. - */ -struct r300_fragment_program_compiler { - r300ContextPtr r300; - struct r300_fragment_program *fp; - struct r300_fragment_program_code *code; - struct gl_program *program; -}; - -extern GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler); - - -extern void r300FragmentProgramDump( - struct r300_fragment_program *fp, - struct r300_fragment_program_code *code); +extern GLboolean r300_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data); #endif diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c new file mode 100644 index 00000000000..abc8757ba1f --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -0,0 +1,291 @@ +/* + * Copyright (C) 2009 Maciej Cencora <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * Fragment program compiler. Perform transformations on the intermediate + * representation until the program is in a form where we can translate + * it more or less directly into machine-readable form. + * + * \author Ben Skeggs <[email protected]> + * \author Jerome Glisse <[email protected]> + */ + +#include "r300_fragprog_common.h" + +#include "shader/program.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" + +#include "r300_state.h" +#include "r300_fragprog.h" +#include "r300_fragprog_swizzle.h" +#include "r500_fragprog.h" + +#include "radeon_program.h" +#include "radeon_program_alu.h" + +static void update_params(GLcontext *ctx, struct gl_fragment_program *fp) +{ + /* Ask Mesa nicely to fill in ParameterValues for us */ + if (fp->Base.Parameters) + _mesa_load_state_parameters(ctx, fp->Base.Parameters); +} + +static void nqssadce_init(struct nqssadce_state* s) +{ + s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; + s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; +} + +/** + * Transform the program to support fragment.position. + * + * Introduce a small fragment at the start of the program that will be + * the only code that directly reads the FRAG_ATTRIB_WPOS input. + * All other code pieces that reference that input will be rewritten + * to read from a newly allocated temporary. + * + */ +static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) +{ + GLuint InputsRead = compiler->fp->Base.Base.InputsRead; + + if (!(InputsRead & FRAG_BIT_WPOS)) + return; + + static gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 + }; + struct prog_instruction *fpi; + GLuint window_index; + int i = 0; + GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY); + + _mesa_insert_instructions(compiler->program, 0, 3); + fpi = compiler->program->Instructions; + + /* perspective divide */ + fpi[i].Opcode = OPCODE_RCP; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_W; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; + i++; + + fpi[i].Opcode = OPCODE_MUL; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; + + fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[1].Index = tempregi; + fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; + i++; + + /* viewport transformation */ + window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens); + + fpi[i].Opcode = OPCODE_MAD; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[0].Index = tempregi; + fpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[1].Index = window_index; + fpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[2].Index = window_index; + fpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + i++; + + for (; i < compiler->program->NumInstructions; ++i) { + int reg; + for (reg = 0; reg < 3; reg++) { + if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT && + fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) { + fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[reg].Index = tempregi; + } + } + } +} + +static GLuint build_dtm(GLuint depthmode) +{ + switch(depthmode) { + default: + case GL_LUMINANCE: return 0; + case GL_INTENSITY: return 1; + case GL_ALPHA: return 2; + } +} + +static GLuint build_func(GLuint comparefunc) +{ + return comparefunc - GL_NEVER; +} + +/** + * Collect all external state that is relevant for compiling the given + * fragment program. + */ +static void build_state( + r300ContextPtr r300, + struct r300_fragment_program *fp, + struct r300_fragment_program_external_state *state) +{ + int unit; + + _mesa_bzero(state, sizeof(*state)); + + for(unit = 0; unit < 16; ++unit) { + if (fp->Base.Base.ShadowSamplers & (1 << unit)) { + struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; + + state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); + state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); + } + } +} + +void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)fp; + struct r300_fragment_program_external_state state; + + build_state(r300, r300_fp, &state); + if (_mesa_memcmp(&r300_fp->state, &state, sizeof(state))) { + /* TODO: cache compiled programs */ + r300_fp->translated = GL_FALSE; + _mesa_memcpy(&r300_fp->state, &state, sizeof(state)); + } + + if (!r300_fp->translated) { + struct r300_fragment_program_compiler compiler; + + compiler.r300 = r300; + compiler.fp = r300_fp; + compiler.code = &r300_fp->code; + compiler.program = _mesa_clone_program(ctx, &fp->Base); + + if (RADEON_DEBUG & DEBUG_PIXEL) { + fflush(stdout); + _mesa_printf("Fragment Program: Initial program:\n"); + _mesa_print_program(compiler.program); + fflush(stdout); + } + + insert_WPOS_trailer(&compiler); + + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + struct radeon_program_transformation transformations[] = { + { &r500_transform_TEX, &compiler }, + { &radeonTransformALU, 0 }, + { &radeonTransformDeriv, 0 }, + { &radeonTransformTrigScale, 0 } + }; + radeonLocalTransform(ctx, compiler.program, 4, transformations); + } else { + struct radeon_program_transformation transformations[] = { + { &r300_transform_TEX, &compiler }, + { &radeonTransformALU, 0 }, + { &radeonTransformTrigSimple, 0 } + }; + radeonLocalTransform(ctx, compiler.program, 3, transformations); + } + + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Fragment Program: After native rewrite:\n"); + _mesa_print_program(compiler.program); + fflush(stdout); + } + + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadce_init, + .IsNativeSwizzle = &r500FPIsNativeSwizzle, + .BuildSwizzle = &r500FPBuildSwizzle, + .RewriteDepthOut = GL_TRUE + }; + radeonNqssaDce(ctx, compiler.program, &nqssadce); + } else { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadce_init, + .IsNativeSwizzle = &r300FPIsNativeSwizzle, + .BuildSwizzle = &r300FPBuildSwizzle, + .RewriteDepthOut = GL_TRUE + }; + radeonNqssaDce(ctx, compiler.program, &nqssadce); + } + + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Compiler: after NqSSA-DCE:\n"); + _mesa_print_program(compiler.program); + fflush(stdout); + } + + if (!r300->vtbl.BuildFragmentProgramHwCode(&compiler)) + r300_fp->error = GL_TRUE; + + /* Subtle: Rescue any parameters that have been added during transformations */ + _mesa_free_parameter_list(fp->Base.Parameters); + fp->Base.Parameters = compiler.program->Parameters; + compiler.program->Parameters = 0; + + _mesa_reference_program(ctx, &compiler.program, NULL); + + r300_fp->translated = GL_TRUE; + + r300UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + + if (r300_fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) + r300->vtbl.FragmentProgramDump(&r300_fp->code); + } + + update_params(ctx, fp); +} diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.h b/src/mesa/drivers/dri/r300/r300_fragprog_common.h new file mode 100644 index 00000000000..85ea86fecb1 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2009 Maciej Cencora <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __R300_FRAGPROG_COMMON_H_ +#define __R300_FRAGPROG_COMMON_H_ + +#include "main/mtypes.h" + +extern void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp); + +#endif diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c index 9f0b7e35349..b75656e7ee1 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c @@ -47,7 +47,7 @@ #define PROG_CODE \ struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ - struct r300_fragment_program_code *code = c->code + struct r300_fragment_program_code *code = &c->code->r300 #define error(fmt, args...) do { \ fprintf(stderr, "%s::%s(): " fmt "\n", \ @@ -66,7 +66,7 @@ static GLboolean emit_const(void* data, GLuint file, GLuint index, GLuint *hwind } if (*hwindex >= code->const_nr) { - if (*hwindex >= PFS_NUM_CONST_REGS) { + if (*hwindex >= R300_PFS_NUM_CONST_REGS) { error("Out of hw constants!\n"); return GL_FALSE; } @@ -138,7 +138,7 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) { PROG_CODE; - if (code->alu.length >= PFS_MAX_ALU_INST) { + if (code->alu.length >= R300_PFS_MAX_ALU_INST) { error("Too many ALU instructions"); return GL_FALSE; } @@ -201,7 +201,7 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) if (inst->Alpha.DepthWriteMask) { code->alu.inst[ip].inst3 |= R300_ALU_DSTA_DEPTH; code->node[code->cur_node].flags |= R300_W_OUT; - c->fp->WritesDepth = GL_TRUE; + c->fp->writes_depth = GL_TRUE; } return GL_TRUE; @@ -213,7 +213,7 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) */ static GLboolean finish_node(struct r300_fragment_program_compiler *c) { - struct r300_fragment_program_code *code = c->code; + struct r300_fragment_program_code *code = &c->code->r300; struct r300_fragment_program_node *node = &code->node[code->cur_node]; if (node->alu_end < 0) { @@ -275,7 +275,7 @@ static GLboolean emit_tex(void* data, struct prog_instruction* inst) { PROG_CODE; - if (code->tex.length >= PFS_MAX_TEX_INST) { + if (code->tex.length >= R300_PFS_MAX_TEX_INST) { error("Too many TEX instructions"); return GL_FALSE; } @@ -318,16 +318,16 @@ static const struct radeon_pair_handler pair_handler = { .EmitPaired = &emit_alu, .EmitTex = &emit_tex, .BeginTexBlock = &begin_tex, - .MaxHwTemps = PFS_NUM_TEMP_REGS + .MaxHwTemps = R300_PFS_NUM_TEMP_REGS }; /** * Final compilation step: Turn the intermediate radeon_program into * machine-readable instructions. */ -GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler) +GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) { - struct r300_fragment_program_code *code = compiler->code; + struct r300_fragment_program_code *code = &compiler->code->r300; _mesa_bzero(code, sizeof(struct r300_fragment_program_code)); code->node[0].alu_end = -1; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c index a86d2bd4712..fc9d855bce6 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c @@ -92,7 +92,7 @@ static const struct swizzle_data* lookup_native_swizzle(GLuint swizzle) GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) { if (reg.Abs) - reg.NegateBase = 0; + reg.Negate = NEGATE_NONE; if (opcode == OPCODE_KIL || opcode == OPCODE_TEX || @@ -100,7 +100,7 @@ GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) opcode == OPCODE_TXP) { int j; - if (reg.Abs || reg.NegateBase != (15*reg.NegateAbs)) + if (reg.Abs || reg.Negate) return GL_FALSE; for(j = 0; j < 4; ++j) { @@ -121,7 +121,7 @@ GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) if (GET_SWZ(reg.Swizzle, j) != SWIZZLE_NIL) relevant |= 1 << j; - if ((reg.NegateBase & relevant) && (reg.NegateBase & relevant) != relevant) + if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) return GL_FALSE; if (!lookup_native_swizzle(reg.Swizzle)) @@ -137,13 +137,12 @@ GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) { if (src.Abs) - src.NegateBase = 0; + src.Negate = NEGATE_NONE; while(dst.WriteMask) { const struct swizzle_data *best_swizzle = 0; GLuint best_matchcount = 0; GLuint best_matchmask = 0; - GLboolean rgbnegate; int i, comp; for(i = 0; i < num_native_swizzles; ++i) { @@ -157,6 +156,11 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, if (swz == SWIZZLE_NIL) continue; if (swz == GET_SWZ(sd->hash, comp)) { + /* check if the negate bit of current component + * is the same for already matched components */ + if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp)))) + continue; + matchcount++; matchmask |= 1 << comp; } @@ -170,13 +174,6 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, } } - if ((src.NegateBase & best_matchmask) != 0) { - best_matchmask &= src.NegateBase; - rgbnegate = !src.NegateAbs; - } else { - rgbnegate = src.NegateAbs; - } - struct prog_instruction *inst; _mesa_insert_instructions(s->Program, s->IP, 1); @@ -185,6 +182,7 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, inst->DstReg = dst; inst->DstReg.WriteMask &= (best_matchmask | WRITEMASK_W); inst->SrcReg[0] = src; + inst->SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE; /* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */ dst.WriteMask &= ~inst->DstReg.WriteMask; diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index a7f5121da74..104079b4dbe 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -79,7 +79,7 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, { BATCH_LOCALS(&r300->radeon); GLcontext *ctx = r300->radeon.glCtx; - __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&r300->radeon); GLuint cbpitch = 0; r300ContextPtr rmesa = r300; @@ -200,7 +200,7 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]); OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]); } - + r300EmitCacheFlush(rmesa); cp_wait(&r300->radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN); @@ -213,14 +213,13 @@ static void r300EmitClearState(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); BATCH_LOCALS(&r300->radeon); - __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&r300->radeon); int i; - int has_tcl = 1; + int has_tcl; int is_r500 = 0; GLuint vap_cntl; - if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) - has_tcl = 0; + has_tcl = r300->options.hw_tcl_enabled; if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) is_r500 = 1; @@ -448,7 +447,7 @@ static void r300EmitClearState(GLcontext * ctx) R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 | R500_ALU_RGBA_A_SWIZ_0; - + r500fp.cmd[7] = 0; emit_r500fp(ctx, &r500fp); } @@ -542,9 +541,9 @@ static void r300EmitClearState(GLcontext * ctx) } static void r300KernelClear(GLcontext *ctx, GLuint flags) -{ +{ r300ContextPtr r300 = R300_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&r300->radeon); struct radeon_framebuffer *rfb = dPriv->driverPrivate; struct radeon_renderbuffer *rrb; struct radeon_renderbuffer *rrbd; @@ -566,7 +565,7 @@ static void r300KernelClear(GLcontext *ctx, GLuint flags) r300ClearBuffer(r300, CLEARBUFFER_COLOR, rrb, NULL); bits = 0; } - + if (flags & BUFFER_BIT_FRONT_LEFT) { rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_FRONT_LEFT); r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb, rrbd); @@ -591,7 +590,7 @@ static void r300KernelClear(GLcontext *ctx, GLuint flags) static void r300Clear(GLcontext * ctx, GLbitfield mask) { r300ContextPtr r300 = R300_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&r300->radeon); const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask); GLbitfield swrast_mask = 0, tri_mask = 0; int i; diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index ed552d09bbc..c22616b95f6 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -2432,6 +2432,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* Z Buffer Clear Value */ #define R300_ZB_DEPTHCLEARVALUE 0x4f28 +#define R300_ZB_ZMASK_OFFSET 0x4f30 +#define R300_ZB_ZMASK_PITCH 0x4f34 +#define R300_ZB_ZMASK_WRINDEX 0x4f38 +#define R300_ZB_ZMASK_DWORD 0x4f3c +#define R300_ZB_ZMASK_RDINDEX 0x4f40 + /* Hierarchical Z Memory Offset */ #define R300_ZB_HIZ_OFFSET 0x4f44 @@ -3172,6 +3178,9 @@ enum { # define R300_W_SRC_RAS (1 << 2) +/* Packet0 field ordering to write all values to the same reg */ +#define RADEON_ONE_REG_WR (1 << 15) + /* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR. * Two parameter dwords: * 0. VAP_VTX_FMT: The first parameter is not written to hardware diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 924305dd128..1356305a212 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -50,6 +50,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * no bugs... */ +#include "r300_render.h" + #include "main/glheader.h" #include "main/state.h" #include "main/imports.h" @@ -72,8 +74,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_reg.h" #include "r300_tex.h" #include "r300_emit.h" -#include "r300_fragprog.h" -extern int future_hw_tcl_on; +#include "r300_fragprog_common.h" +#include "r300_swtcl.h" /** * \brief Convert a OpenGL primitive type into a R300 primitive type. @@ -170,16 +172,19 @@ int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) return num_verts - verts_off; } -static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts) +static void r300EmitElts(GLcontext * ctx, unsigned long n_elts) { r300ContextPtr rmesa = R300_CONTEXT(ctx); void *out; + GLuint size; + + size = ((rmesa->ind_buf.is_32bit ? 4 : 2) * n_elts + 3) & ~3; radeonAllocDmaRegion(&rmesa->radeon, &rmesa->radeon.tcl.elt_dma_bo, - &rmesa->radeon.tcl.elt_dma_offset, n_elts * 4, 4); + &rmesa->radeon.tcl.elt_dma_offset, size, 4); radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1); out = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset; - memcpy(out, elts, n_elts * 4); + memcpy(out, rmesa->ind_buf.ptr, size); radeon_bo_unmap(rmesa->radeon.tcl.elt_dma_bo); } @@ -187,14 +192,23 @@ static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type) { BATCH_LOCALS(&rmesa->radeon); + r300_emit_scissor(rmesa->radeon.glCtx); if (vertex_count > 0) { + int size; + BEGIN_BATCH(10); OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0); - OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | - ((vertex_count + 0) << 16) | - type | + if (rmesa->ind_buf.is_32bit) { + size = vertex_count; + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | + ((vertex_count + 0) << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); - + } else { + size = (vertex_count + 1) >> 1; + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | + ((vertex_count + 0) << 16) | type); + } + if (!rmesa->radeon.radeonScreen->kernel_mm) { OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | @@ -203,13 +217,13 @@ static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type) rmesa->radeon.tcl.elt_dma_bo, rmesa->radeon.tcl.elt_dma_offset, RADEON_GEM_DOMAIN_GTT, 0, 0); - OUT_BATCH(vertex_count); + OUT_BATCH(size); } else { OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | (R300_VAP_PORT_IDX0 >> 2)); OUT_BATCH(rmesa->radeon.tcl.elt_dma_offset); - OUT_BATCH(vertex_count); + OUT_BATCH(size); radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, rmesa->radeon.tcl.elt_dma_bo, RADEON_GEM_DOMAIN_GTT, 0, 0); @@ -224,12 +238,12 @@ static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) uint32_t voffset; int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2; int i; - + if (RADEON_DEBUG & DEBUG_VERTS) fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr, offset); - + if (!rmesa->radeon.radeonScreen->kernel_mm) { BEGIN_BATCH(sz+2+(nr * 2)); OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1); @@ -240,7 +254,7 @@ static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) (rmesa->radeon.tcl.aos[i].stride << 8) | (rmesa->radeon.tcl.aos[i + 1].components << 16) | (rmesa->radeon.tcl.aos[i + 1].stride << 24)); - + voffset = rmesa->radeon.tcl.aos[i + 0].offset + offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride; OUT_BATCH_RELOC(voffset, @@ -256,7 +270,7 @@ static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) RADEON_GEM_DOMAIN_GTT, 0, 0); } - + if (nr & 1) { OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) | (rmesa->radeon.tcl.aos[nr - 1].stride << 8)); @@ -280,7 +294,7 @@ static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) (rmesa->radeon.tcl.aos[i].stride << 8) | (rmesa->radeon.tcl.aos[i + 1].components << 16) | (rmesa->radeon.tcl.aos[i + 1].stride << 24)); - + voffset = rmesa->radeon.tcl.aos[i + 0].offset + offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride; OUT_BATCH(voffset); @@ -288,7 +302,7 @@ static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride; OUT_BATCH(voffset); } - + if (nr & 1) { OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) | (rmesa->radeon.tcl.aos[nr - 1].stride << 8)); @@ -327,18 +341,18 @@ static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type) { BATCH_LOCALS(&rmesa->radeon); + r300_emit_scissor(rmesa->radeon.glCtx); BEGIN_BATCH(3); OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0); OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type); END_BATCH(); } -static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, - int start, int end, int prim) +void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) { + r300ContextPtr rmesa = R300_CONTEXT(ctx); + BATCH_LOCALS(&rmesa->radeon); int type, num_verts; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; type = r300PrimitiveType(rmesa, prim); num_verts = r300NumVerts(rmesa, end - start, prim); @@ -350,9 +364,9 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, * This is supposed to ensure that we can get all rendering * commands into a single command buffer. */ - rcommonEnsureCmdBufSpace(&rmesa->radeon, 64, __FUNCTION__); + rcommonEnsureCmdBufSpace(&rmesa->radeon, 128, __FUNCTION__); - if (vb->Elts) { + if (rmesa->ind_buf.ptr) { if (num_verts > 65535) { /* not implemented yet */ WARN_ONCE("Too many elts\n"); @@ -369,8 +383,14 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, * allocating the index array might actually evict the vertex * arrays. *sigh* */ - r300EmitElts(ctx, vb->Elts, num_verts); + r300EmitElts(ctx, num_verts); r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start); + if (rmesa->radeon.radeonScreen->kernel_mm) { + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH_REGSEQ(R300_VAP_VF_MAX_VTX_INDX, 1); + OUT_BATCH(rmesa->radeon.tcl.aos[0].count); + END_BATCH(); + } r300FireEB(rmesa, num_verts, type); } else { r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start); @@ -379,8 +399,7 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, COMMIT_BATCH(); } -static GLboolean r300RunRender(GLcontext * ctx, - struct tnl_pipeline_stage *stage) +static void r300RunRender(GLcontext * ctx, struct tnl_pipeline_stage *stage) { r300ContextPtr rmesa = R300_CONTEXT(ctx); int i; @@ -391,8 +410,7 @@ static GLboolean r300RunRender(GLcontext * ctx, fprintf(stderr, "%s\n", __FUNCTION__); r300UpdateShaders(rmesa); - if (r300EmitArrays(ctx)) - return GL_TRUE; + r300EmitArrays(ctx); r300UpdateShaderStates(rmesa); @@ -403,77 +421,104 @@ static GLboolean r300RunRender(GLcontext * ctx, GLuint prim = _tnl_translate_prim(&vb->Primitive[i]); GLuint start = vb->Primitive[i].start; GLuint end = vb->Primitive[i].start + vb->Primitive[i].count; - r300RunRenderPrimitive(rmesa, ctx, start, end, prim); + r300RunRenderPrimitive(ctx, start, end, prim); } r300EmitCacheFlush(rmesa); radeonReleaseArrays(ctx, ~0); +} - return GL_FALSE; + +static const char *getFallbackString(uint32_t bit) +{ + switch (bit) { + case R300_FALLBACK_VERTEX_PROGRAM : + return "vertex program"; + case R300_FALLBACK_LINE_SMOOTH: + return "smooth lines"; + case R300_FALLBACK_POINT_SMOOTH: + return "smooth points"; + case R300_FALLBACK_POLYGON_SMOOTH: + return "smooth polygons"; + case R300_FALLBACK_LINE_STIPPLE: + return "line stipple"; + case R300_FALLBACK_POLYGON_STIPPLE: + return "polygon stipple"; + case R300_FALLBACK_STENCIL_TWOSIDE: + return "two-sided stencil"; + case R300_FALLBACK_RENDER_MODE: + return "render mode != GL_RENDER"; + case R300_FALLBACK_FRAGMENT_PROGRAM: + return "fragment program"; + case R300_FALLBACK_AOS_LIMIT: + return "aos limit"; + case R300_FALLBACK_INVALID_BUFFERS: + return "invalid buffers"; + default: + return "unknown"; + } } -#define FALLBACK_IF(expr) \ - do { \ - if (expr) { \ - if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \ - WARN_ONCE("Software fallback:%s\n", \ - #expr); \ - return R300_FALLBACK_RAST; \ - } \ - } while(0) - -static int r300Fallback(GLcontext * ctx) +void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode) { - r300ContextPtr r300 = R300_CONTEXT(ctx); - const unsigned back = ctx->Stencil._BackFace; + TNLcontext *tnl = TNL_CONTEXT(ctx); + r300ContextPtr rmesa = R300_CONTEXT(ctx); + uint32_t old_fallback = rmesa->fallback; + static uint32_t fallback_warn = 0; - FALLBACK_IF(r300->radeon.Fallback); - /* Do we need to use new-style shaders? - * Also is there a better way to do this? */ - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - struct r500_fragment_program *fp = (struct r500_fragment_program *) - (char *)ctx->FragmentProgram._Current; - if (fp) { - if (!fp->translated) { - r500TranslateFragmentShader(r300, fp); - FALLBACK_IF(!fp->translated); + if (mode) { + if ((fallback_warn & bit) == 0) { + _mesa_fprintf(stderr, "WARNING! Falling back to software for %s\n", getFallbackString(bit)); + fallback_warn |= bit; + } + rmesa->fallback |= bit; + + /* update only if we change from no tcl fallbacks to some tcl fallbacks */ + if (rmesa->options.hw_tcl_enabled) { + if (((old_fallback & R300_TCL_FALLBACK_MASK) == 0) && + ((bit & R300_TCL_FALLBACK_MASK) > 0)) { + R300_STATECHANGE(rmesa, vap_cntl_status); + rmesa->hw.vap_cntl_status.cmd[1] |= R300_VAP_TCL_BYPASS; } } + + /* update only if we change from no raster fallbacks to some raster fallbacks */ + if (((old_fallback & R300_RASTER_FALLBACK_MASK) == 0) && + ((bit & R300_RASTER_FALLBACK_MASK) > 0)) { + + radeon_firevertices(&rmesa->radeon); + rmesa->radeon.swtcl.RenderIndex = ~0; + _swsetup_Wakeup( ctx ); + } } else { - struct r300_fragment_program *fp = (struct r300_fragment_program *) - (char *)ctx->FragmentProgram._Current; - if (fp) { - if (!fp->translated) { - r300TranslateFragmentShader(r300, fp); - FALLBACK_IF(!fp->translated); + rmesa->fallback &= ~bit; + + /* update only if we have disabled all tcl fallbacks */ + if (rmesa->options.hw_tcl_enabled) { + if ((old_fallback & R300_RASTER_FALLBACK_MASK) == bit) { + R300_STATECHANGE(rmesa, vap_cntl_status); + rmesa->hw.vap_cntl_status.cmd[1] &= ~R300_VAP_TCL_BYPASS; } } - } - FALLBACK_IF(ctx->RenderMode != GL_RENDER); - - /* If GL_EXT_stencil_two_side is disabled, this fallback check can - * be removed. - */ - FALLBACK_IF(ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back] - || ctx->Stencil.ValueMask[0] != - ctx->Stencil.ValueMask[back] - || ctx->Stencil.WriteMask[0] != - ctx->Stencil.WriteMask[back]); - - if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite) - FALLBACK_IF(ctx->Point.PointSprite); - - if (!r300->disable_lowimpact_fallback) { - FALLBACK_IF(ctx->Polygon.StippleFlag); - FALLBACK_IF(ctx->Multisample._Enabled); - FALLBACK_IF(ctx->Line.StippleFlag); - FALLBACK_IF(ctx->Line.SmoothFlag); - FALLBACK_IF(ctx->Point.SmoothFlag); + /* update only if we have disabled all raster fallbacks */ + if ((old_fallback & R300_RASTER_FALLBACK_MASK) == bit) { + _swrast_flush( ctx ); + + tnl->Driver.Render.Start = r300RenderStart; + tnl->Driver.Render.Finish = r300RenderFinish; + tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive; + tnl->Driver.Render.ResetLineStipple = r300ResetLineStipple; + tnl->Driver.Render.BuildVertices = _tnl_build_vertices; + tnl->Driver.Render.CopyPV = _tnl_copy_pv; + tnl->Driver.Render.Interp = _tnl_interp; + + _tnl_invalidate_vertex_state( ctx, ~0 ); + _tnl_invalidate_vertices( ctx, ~0 ); + } } - - return R300_FALLBACK_NONE; + } static GLboolean r300RunNonTCLRender(GLcontext * ctx, @@ -484,49 +529,15 @@ static GLboolean r300RunNonTCLRender(GLcontext * ctx, if (RADEON_DEBUG & DEBUG_PRIMS) fprintf(stderr, "%s\n", __FUNCTION__); - if (r300Fallback(ctx) >= R300_FALLBACK_RAST) + if (rmesa->fallback & R300_RASTER_FALLBACK_MASK) return GL_TRUE; - if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) - return GL_TRUE; - - if (!r300ValidateBuffers(ctx)) - return GL_TRUE; - - return r300RunRender(ctx, stage); -} - -static GLboolean r300RunTCLRender(GLcontext * ctx, - struct tnl_pipeline_stage *stage) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct r300_vertex_program *vp; - - hw_tcl_on = future_hw_tcl_on; - - if (RADEON_DEBUG & DEBUG_PRIMS) - fprintf(stderr, "%s\n", __FUNCTION__); - - if (hw_tcl_on == GL_FALSE) - return GL_TRUE; - - if (r300Fallback(ctx) >= R300_FALLBACK_TCL) { - hw_tcl_on = GL_FALSE; + if (rmesa->options.hw_tcl_enabled == GL_FALSE) return GL_TRUE; - } - if (!r300ValidateBuffers(ctx)) - return GL_TRUE; - - r300UpdateShaders(rmesa); - - vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); - if (vp->native == GL_FALSE) { - hw_tcl_on = GL_FALSE; - return GL_TRUE; - } + r300RunRender(ctx, stage); - return r300RunRender(ctx, stage); + return GL_FALSE; } const struct tnl_pipeline_stage _r300_render_stage = { @@ -537,12 +548,3 @@ const struct tnl_pipeline_stage _r300_render_stage = { NULL, r300RunNonTCLRender }; - -const struct tnl_pipeline_stage _r300_tcl_stage = { - "r300 Hardware Transform, Clipping and Lighting", - NULL, - NULL, - NULL, - NULL, - r300RunTCLRender -}; diff --git a/src/mesa/drivers/dri/r300/r300_render.h b/src/mesa/drivers/dri/r300/r300_render.h new file mode 100644 index 00000000000..ec785474a67 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_render.h @@ -0,0 +1,69 @@ +/* + * Copyright 2009 Maciej Cencora <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __R300_RENDER_H__ +#define __R300_RENDER_H__ + +#include "main/mtypes.h" + +#define R300_FALLBACK_VERTEX_PROGRAM (1 << 0) +#define R300_TCL_FALLBACK_MASK 0x0000ffff + +#define R300_FALLBACK_LINE_SMOOTH (1 << 16) +#define R300_FALLBACK_POINT_SMOOTH (1 << 17) +#define R300_FALLBACK_POLYGON_SMOOTH (1 << 18) +#define R300_FALLBACK_LINE_STIPPLE (1 << 19) +#define R300_FALLBACK_POLYGON_STIPPLE (1 << 20) +#define R300_FALLBACK_STENCIL_TWOSIDE (1 << 21) +#define R300_FALLBACK_RENDER_MODE (1 << 22) +#define R300_FALLBACK_FRAGMENT_PROGRAM (1 << 23) +#define R300_FALLBACK_AOS_LIMIT (1 << 30) +#define R300_FALLBACK_INVALID_BUFFERS (1 << 31) +#define R300_RASTER_FALLBACK_MASK 0xffff0000 + +#define MASK_XYZW (R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) +#define MASK_X R300_WRITE_ENA_X +#define MASK_Y R300_WRITE_ENA_Y +#define MASK_Z R300_WRITE_ENA_Z +#define MASK_W R300_WRITE_ENA_W + +#if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \ + SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \ + SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \ + SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \ + SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \ + SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE +#error Cannot change these! +#endif + +extern const struct tnl_pipeline_stage _r300_render_stage; + +extern void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode); + +extern void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim); + +#endif diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c index f30fd986e0f..0133b837966 100644 --- a/src/mesa/drivers/dri/r300/r300_shader.c +++ b/src/mesa/drivers/dri/r300/r300_shader.c @@ -1,18 +1,42 @@ +/* + * Copyright 2009 Maciej Cencora <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ #include "main/glheader.h" #include "shader/program.h" #include "tnl/tnl.h" #include "r300_context.h" -#include "r300_fragprog.h" +#include "r300_fragprog_common.h" static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target, GLuint id) { - r300ContextPtr rmesa = R300_CONTEXT(ctx); struct r300_vertex_program_cont *vp; - struct r300_fragment_program *r300_fp; - struct r500_fragment_program *r500_fp; + struct r300_fragment_program *fp; switch (target) { case GL_VERTEX_STATE_PROGRAM_NV: @@ -20,28 +44,12 @@ static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target, vp = CALLOC_STRUCT(r300_vertex_program_cont); return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id); - case GL_FRAGMENT_PROGRAM_ARB: - if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - r500_fp = CALLOC_STRUCT(r500_fragment_program); - r500_fp->ctx = ctx; - return _mesa_init_fragment_program(ctx, &r500_fp->mesa_program, - target, id); - } else { - r300_fp = CALLOC_STRUCT(r300_fragment_program); - return _mesa_init_fragment_program(ctx, &r300_fp->mesa_program, - target, id); - } case GL_FRAGMENT_PROGRAM_NV: - if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - r500_fp = CALLOC_STRUCT(r500_fragment_program); - return _mesa_init_fragment_program(ctx, &r500_fp->mesa_program, - target, id); - } else { - r300_fp = CALLOC_STRUCT(r300_fragment_program); - return _mesa_init_fragment_program(ctx, &r300_fp->mesa_program, - target, id); - } + case GL_FRAGMENT_PROGRAM_ARB: + fp = CALLOC_STRUCT(r300_fragment_program); + return _mesa_init_fragment_program(ctx, &fp->Base, target, id); + default: _mesa_problem(ctx, "Bad target in r300NewProgram"); } @@ -57,20 +65,15 @@ static void r300DeleteProgram(GLcontext * ctx, struct gl_program *prog) static void r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) { - r300ContextPtr rmesa = R300_CONTEXT(ctx); struct r300_vertex_program_cont *vp = (void *)prog; struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)prog; - struct r500_fragment_program *r500_fp = (struct r500_fragment_program *)prog; switch (target) { case GL_VERTEX_PROGRAM_ARB: vp->progs = NULL; break; case GL_FRAGMENT_PROGRAM_ARB: - if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) - r500_fp->translated = GL_FALSE; - else - r300_fp->translated = GL_FALSE; + r300_fp->translated = GL_FALSE; break; } @@ -81,7 +84,14 @@ r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) static GLboolean r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) { - return GL_TRUE; + if (target == GL_FRAGMENT_PROGRAM_ARB) { + struct r300_fragment_program *fp = (struct r300_fragment_program *)prog; + if (!fp->translated) + r300TranslateFragmentShader(ctx, &fp->Base); + + return !fp->error; + } else + return GL_TRUE; } void r300InitShaderFuncs(struct dd_function_table *functions) diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 46c3df70991..c0eda977db8 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -53,20 +53,22 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "shader/prog_statevars.h" #include "vbo/vbo.h" #include "tnl/tnl.h" +#include "tnl/t_vp_build.h" #include "r300_context.h" #include "r300_ioctl.h" #include "r300_state.h" #include "r300_reg.h" #include "r300_emit.h" -#include "r300_fragprog.h" #include "r300_tex.h" +#include "r300_fragprog_common.h" +#include "r300_fragprog.h" +#include "r500_fragprog.h" +#include "r300_render.h" +#include "r300_vertprog.h" #include "drirenderbuffer.h" -extern int future_hw_tcl_on; -extern void _tnl_UpdateFixedFunctionProgram(GLcontext * ctx); - static void r300BlendColor(GLcontext * ctx, const GLfloat cf[4]) { r300ContextPtr rmesa = R300_CONTEXT(ctx); @@ -365,7 +367,7 @@ static void r300ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ) GLint *ip; /* no VAP UCP on non-TCL chipsets */ - if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + if (!rmesa->options.hw_tcl_enabled) return; p = (GLint) plane - (GLint) GL_CLIP_PLANE0; @@ -384,7 +386,7 @@ static void r300SetClipPlaneState(GLcontext * ctx, GLenum cap, GLboolean state) GLuint p; /* no VAP UCP on non-TCL chipsets */ - if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + if (!r300->options.hw_tcl_enabled) return; p = cap - GL_CLIP_PLANE0; @@ -450,28 +452,22 @@ static void r300SetPolygonOffsetState(GLcontext * ctx, GLboolean state) static GLboolean current_fragment_program_writes_depth(GLcontext* ctx) { - r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; - if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { - struct r300_fragment_program *fp = (struct r300_fragment_program *) - (char *)ctx->FragmentProgram._Current; - return (fp && fp->WritesDepth); - } else { - struct r500_fragment_program* fp = - (struct r500_fragment_program*)(char*) - ctx->FragmentProgram._Current; - return (fp && fp->writes_depth); - } + return (fp && fp->writes_depth); } static void r300SetEarlyZState(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); GLuint topZ = R300_ZTOP_ENABLE; + GLuint w_fmt, fgdepthsrc; if (ctx->Color.AlphaEnabled && ctx->Color.AlphaFunc != GL_ALWAYS) topZ = R300_ZTOP_DISABLE; - if (current_fragment_program_writes_depth(ctx)) + else if (current_fragment_program_writes_depth(ctx)) + topZ = R300_ZTOP_DISABLE; + else if (ctx->FragmentProgram._Current && ctx->FragmentProgram._Current->UsesKill) topZ = R300_ZTOP_DISABLE; if (topZ != r300->hw.zstencil_format.cmd[2]) { @@ -482,6 +478,26 @@ static void r300SetEarlyZState(GLcontext * ctx) R300_STATECHANGE(r300, zstencil_format); r300->hw.zstencil_format.cmd[2] = topZ; } + + /* w_fmt value is set to get best performance + * see p.130 R5xx 3D acceleration guide v1.3 */ + if (current_fragment_program_writes_depth(ctx)) { + fgdepthsrc = R300_FG_DEPTH_SRC_SHADER; + w_fmt = R300_W_FMT_W24 | R300_W_SRC_US; + } else { + fgdepthsrc = R300_FG_DEPTH_SRC_SCAN; + w_fmt = R300_W_FMT_W0 | R300_W_SRC_US; + } + + if (w_fmt != r300->hw.us_out_fmt.cmd[5]) { + R300_STATECHANGE(r300, us_out_fmt); + r300->hw.us_out_fmt.cmd[5] = w_fmt; + } + + if (fgdepthsrc != r300->hw.fg_depth_src.cmd[1]) { + R300_STATECHANGE(r300, fg_depth_src); + r300->hw.fg_depth_src.cmd[1] = fgdepthsrc; + } } static void r300SetAlphaState(GLcontext * ctx) @@ -532,8 +548,6 @@ static void r300SetAlphaState(GLcontext * ctx) R300_STATECHANGE(r300, at); r300->hw.at.cmd[R300_AT_ALPHA_TEST] = pp_misc; r300->hw.at.cmd[R300_AT_UNKNOWN] = 0; - - r300SetEarlyZState(ctx); } static void r300AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref) @@ -581,14 +595,28 @@ static void r300SetDepthState(GLcontext * ctx) r300->hw.zs.cmd[R300_ZS_CNTL_1] |= translate_func(ctx->Depth.Func) << R300_Z_FUNC_SHIFT; } +} - r300SetEarlyZState(ctx); +static void r300CatchStencilFallback(GLcontext *ctx) +{ + const unsigned back = ctx->Stencil._BackFace; + + if (ctx->Stencil._Enabled && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back] + || ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[back] + || ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[back])) { + r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_TRUE); + } else { + r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_FALSE); + } } static void r300SetStencilState(GLcontext * ctx, GLboolean state) { r300ContextPtr r300 = R300_CONTEXT(ctx); GLboolean hw_stencil = GL_FALSE; + + r300CatchStencilFallback(ctx); + if (ctx->DrawBuffer) { struct radeon_renderbuffer *rrbStencil = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL); @@ -604,10 +632,6 @@ static void r300SetStencilState(GLcontext * ctx, GLboolean state) r300->hw.zs.cmd[R300_ZS_CNTL_0] &= ~R300_STENCIL_ENABLE; } - } else { -#if R200_MERGED - FALLBACK(&r300->radeon, RADEON_FALLBACK_STENCIL, state); -#endif } } @@ -740,7 +764,12 @@ static void r300ColorMask(GLcontext * ctx, static void r300PointSize(GLcontext * ctx, GLfloat size) { r300ContextPtr r300 = R300_CONTEXT(ctx); - /* same size limits for AA, non-AA points */ + + /* We need to clamp to user defined range here, because + * the HW clamping happens only for per vertex point size. */ + size = CLAMP(size, ctx->Point.MinSize, ctx->Point.MaxSize); + + /* same size limits for AA, non-AA points */ size = CLAMP(size, ctx->Const.MinPointSize, ctx->Const.MaxPointSize); R300_STATECHANGE(r300, ps); @@ -833,29 +862,33 @@ static void r300ShadeModel(GLcontext * ctx, GLenum mode) R300_STATECHANGE(rmesa, shade); rmesa->hw.shade.cmd[1] = 0x00000002; + R300_STATECHANGE(rmesa, shade2); switch (mode) { case GL_FLAT: - rmesa->hw.shade.cmd[2] = R300_RE_SHADE_MODEL_FLAT; + rmesa->hw.shade2.cmd[1] = R300_RE_SHADE_MODEL_FLAT; break; case GL_SMOOTH: - rmesa->hw.shade.cmd[2] = R300_RE_SHADE_MODEL_SMOOTH; + rmesa->hw.shade2.cmd[1] = R300_RE_SHADE_MODEL_SMOOTH; break; default: return; } - rmesa->hw.shade.cmd[3] = 0x00000000; - rmesa->hw.shade.cmd[4] = 0x00000000; + rmesa->hw.shade2.cmd[2] = 0x00000000; + rmesa->hw.shade2.cmd[3] = 0x00000000; } static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face, GLenum func, GLint ref, GLuint mask) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - GLuint refmask = - ((ctx->Stencil.Ref[0] & 0xff) << R300_STENCILREF_SHIFT) - | ((ctx->Stencil.ValueMask[0] & 0xff) << R300_STENCILMASK_SHIFT); - const unsigned back = ctx->Stencil._BackFace; + GLuint refmask; GLuint flag; + const unsigned back = ctx->Stencil._BackFace; + + r300CatchStencilFallback(ctx); + + refmask = ((ctx->Stencil.Ref[0] & 0xff) << R300_STENCILREF_SHIFT) + | ((ctx->Stencil.ValueMask[0] & 0xff) << R300_STENCILMASK_SHIFT); R300_STATECHANGE(rmesa, zs); rmesa->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_STENCIL_FRONT_BACK; @@ -883,6 +916,8 @@ static void r300StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) { r300ContextPtr rmesa = R300_CONTEXT(ctx); + r300CatchStencilFallback(ctx); + R300_STATECHANGE(rmesa, zs); rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &= ~(R300_STENCILREF_MASK << @@ -899,6 +934,8 @@ static void r300StencilOpSeparate(GLcontext * ctx, GLenum face, r300ContextPtr rmesa = R300_CONTEXT(ctx); const unsigned back = ctx->Stencil._BackFace; + r300CatchStencilFallback(ctx); + R300_STATECHANGE(rmesa, zs); /* It is easier to mask what's left.. */ rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= @@ -927,16 +964,10 @@ static void r300StencilOpSeparate(GLcontext * ctx, GLenum face, * Window position and viewport transformation */ -/* - * To correctly position primitives: - */ -#define SUBPIXEL_X 0.125 -#define SUBPIXEL_Y 0.125 - static void r300UpdateWindow(GLcontext * ctx) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable; + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0; GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0; const GLfloat *v = ctx->Viewport._WindowMap.m; @@ -953,9 +984,9 @@ static void r300UpdateWindow(GLcontext * ctx) } GLfloat sx = v[MAT_SX]; - GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X; + GLfloat tx = v[MAT_TX] + xoffset; GLfloat sy = v[MAT_SY] * y_scale; - GLfloat ty = (v[MAT_TY] * y_scale) + y_bias + SUBPIXEL_Y; + GLfloat ty = (v[MAT_TY] * y_scale) + y_bias; GLfloat sz = v[MAT_SZ] * depthScale; GLfloat tz = v[MAT_TZ] * depthScale; @@ -989,13 +1020,13 @@ static void r300DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval) void r300UpdateViewportOffset(GLcontext * ctx) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = ((radeonContextPtr) rmesa)->dri.drawable; + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); GLfloat xoffset = (GLfloat) dPriv->x; GLfloat yoffset = (GLfloat) dPriv->y + dPriv->h; const GLfloat *v = ctx->Viewport._WindowMap.m; - GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X; - GLfloat ty = (-v[MAT_TY]) + yoffset + SUBPIXEL_Y; + GLfloat tx = v[MAT_TX] + xoffset; + GLfloat ty = (-v[MAT_TY]) + yoffset; if (rmesa->hw.vpt.cmd[R300_VPT_XOFFSET] != r300PackFloat32(tx) || rmesa->hw.vpt.cmd[R300_VPT_YOFFSET] != r300PackFloat32(ty)) { @@ -1021,12 +1052,14 @@ r300FetchStateParameter(GLcontext * ctx, switch (state[0]) { case STATE_INTERNAL: switch (state[1]) { - case STATE_R300_WINDOW_DIMENSION: - value[0] = r300->radeon.dri.drawable->w * 0.5f; /* width*0.5 */ - value[1] = r300->radeon.dri.drawable->h * 0.5f; /* height*0.5 */ - value[2] = 0.5F; /* for moving range [-1 1] -> [0 1] */ - value[3] = 1.0F; /* not used */ - break; + case STATE_R300_WINDOW_DIMENSION: { + __DRIdrawablePrivate * drawable = radeon_get_drawable(&r300->radeon); + value[0] = drawable->w * 0.5f; /* width*0.5 */ + value[1] = drawable->h * 0.5f; /* height*0.5 */ + value[2] = 0.5F; /* for moving range [-1 1] -> [0 1] */ + value[3] = 1.0F; /* not used */ + break; + } case STATE_R300_TEXRECT_FACTOR:{ struct gl_texture_object *t = @@ -1066,14 +1099,14 @@ void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state) struct gl_program_parameter_list *paramList; GLuint i; - if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM))) + if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS))) return; fp = (struct r300_fragment_program *)ctx->FragmentProgram._Current; if (!fp) return; - paramList = fp->mesa_program.Base.Parameters; + paramList = fp->Base.Base.Parameters; if (!paramList) return; @@ -1192,9 +1225,8 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) { r300ContextPtr r300 = R300_CONTEXT(ctx); int i; - struct r300_fragment_program *fp = (struct r300_fragment_program *) - (char *)ctx->FragmentProgram._Current; - struct r300_fragment_program_code *code = &fp->code; + struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; + struct r300_fragment_program_code *code = &fp->code.r300; R300_STATECHANGE(r300, fpt); @@ -1235,9 +1267,8 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) { int i; - struct r500_fragment_program *fp = (struct r500_fragment_program *) - (char *)ctx->FragmentProgram._Current; - struct r500_fragment_program_code *code = &fp->code; + struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; + struct r500_fragment_program_code *code = &fp->code.r500; /* find all the texture instructions and relocate the texture units */ for (i = 0; i < code->inst_end + 1; i++) { @@ -1388,11 +1419,8 @@ static void r300SetupTextures(GLcontext * ctx) r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, last_hw_tmu + 1); - if (!fp) /* should only happenen once, just after context is created */ - return; - if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { - if (fp->mesa_program.UsesKill && last_hw_tmu < 0) { + if (fp->Base.UsesKill && last_hw_tmu < 0) { // The KILL operation requires the first texture unit // to be enabled. r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1; @@ -1400,9 +1428,8 @@ static void r300SetupTextures(GLcontext * ctx) r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 1); } - r300SetupFragmentShaderTextures(ctx, tmu_mappings); - } else - r500SetupFragmentShaderTextures(ctx, tmu_mappings); + } + r300->vtbl.SetupFragmentShaderTextures(ctx, tmu_mappings); if (RADEON_DEBUG & DEBUG_STATE) fprintf(stderr, "TX_ENABLE: %08x last_hw_tmu=%d\n", @@ -1421,26 +1448,21 @@ union r300_outputs_written { static void r300SetupRSUnit(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; union r300_outputs_written OutputsWritten; GLuint InputsRead; int fp_reg, high_rr; int col_ip, tex_ip; int rs_tex_count = 0; - int i, count, col_fmt; + int i, col_fmt, hw_tcl_on; + + hw_tcl_on = r300->options.hw_tcl_enabled; if (hw_tcl_on) - OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; + OutputsWritten.vp_outputs = r300->selected_vp->key.OutputsWritten; else - RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->state.render_inputs_bitset); + RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset); - if (ctx->FragmentProgram._Current) - InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; - else { - fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); - return; /* This should only ever happen once.. */ - } + InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; R300_STATECHANGE(r300, ri); R300_STATECHANGE(r300, rc); @@ -1459,15 +1481,7 @@ static void r300SetupRSUnit(GLcontext * ctx) if (InputsRead & FRAG_BIT_COL0) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { - count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size; - if (count == 4) - col_fmt = R300_RS_COL_FMT_RGBA; - else if (count == 3) - col_fmt = R300_RS_COL_FMT_RGB1; - else - col_fmt = R300_RS_COL_FMT_0001; - - r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(col_fmt); + r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA); r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R300_RS_INST_COL_ID(col_ip) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_reg); InputsRead &= ~FRAG_BIT_COL0; ++col_ip; @@ -1479,15 +1493,7 @@ static void r300SetupRSUnit(GLcontext * ctx) if (InputsRead & FRAG_BIT_COL1) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { - count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size; - if (count == 4) - col_fmt = R300_RS_COL_FMT_RGBA; - else if (count == 3) - col_fmt = R300_RS_COL_FMT_RGB1; - else - col_fmt = R300_RS_COL_FMT_0001; - - r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(col_fmt); + r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA); r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R300_RS_INST_COL_ID(col_ip) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_reg); InputsRead &= ~FRAG_BIT_COL1; ++col_ip; @@ -1497,6 +1503,7 @@ static void r300SetupRSUnit(GLcontext * ctx) } } + /* We always route 4 texcoord components */ for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { if (! ( InputsRead & FRAG_BIT_TEX(i) ) ) continue; @@ -1506,26 +1513,10 @@ static void r300SetupRSUnit(GLcontext * ctx) continue; } - int swiz; - - /* with TCL we always seem to route 4 components */ - if (hw_tcl_on) - count = 4; - else - count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size; - - switch(count) { - case 4: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3); break; - case 3: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(R300_RS_SEL_K1); break; - default: - case 1: - case 2: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1); break; - }; - - r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= swiz | R300_RS_TEX_PTR(rs_tex_count); + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count); r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg); InputsRead &= ~(FRAG_BIT_TEX0 << i); - rs_tex_count += count; + rs_tex_count += 4; ++tex_ip; ++fp_reg; } @@ -1545,7 +1536,7 @@ static void r300SetupRSUnit(GLcontext * ctx) r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_Q(R300_RS_SEL_K1) | R300_RS_TEX_PTR(rs_tex_count); r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg); InputsRead &= ~FRAG_BIT_FOGC; - rs_tex_count += 1; + rs_tex_count += 4; ++tex_ip; ++fp_reg; } else { @@ -1555,7 +1546,8 @@ static void r300SetupRSUnit(GLcontext * ctx) /* Setup default color if no color or tex was set */ if (rs_tex_count == 0 && col_ip == 0) { - r300->hw.rr.cmd[R300_RR_INST_0] = R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(0) | R300_RS_COL_FMT(R300_RS_COL_FMT_0001); + r300->hw.rr.cmd[R300_RR_INST_0] = R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_ADDR(0); + r300->hw.ri.cmd[R300_RI_INTERP_0] = R300_RS_COL_PTR(0) | R300_RS_COL_FMT(R300_RS_COL_FMT_0001); ++col_ip; } @@ -1564,6 +1556,7 @@ static void r300SetupRSUnit(GLcontext * ctx) r300->hw.rc.cmd[2] |= high_rr - 1; r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, high_rr); + r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_IP_0, high_rr); if (InputsRead) WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); @@ -1572,26 +1565,21 @@ static void r300SetupRSUnit(GLcontext * ctx) static void r500SetupRSUnit(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; union r300_outputs_written OutputsWritten; GLuint InputsRead; int fp_reg, high_rr; int col_ip, tex_ip; int rs_tex_count = 0; - int i, count, col_fmt; + int i, col_fmt, hw_tcl_on; + + hw_tcl_on = r300->options.hw_tcl_enabled; if (hw_tcl_on) - OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; + OutputsWritten.vp_outputs = r300->selected_vp->key.OutputsWritten; else - RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->state.render_inputs_bitset); + RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset); - if (ctx->FragmentProgram._Current) - InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; - else { - fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); - return; /* This should only ever happen once.. */ - } + InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; R300_STATECHANGE(r300, ri); R300_STATECHANGE(r300, rc); @@ -1610,15 +1598,7 @@ static void r500SetupRSUnit(GLcontext * ctx) if (InputsRead & FRAG_BIT_COL0) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { - count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size; - if (count == 4) - col_fmt = R300_RS_COL_FMT_RGBA; - else if (count == 3) - col_fmt = R300_RS_COL_FMT_RGB1; - else - col_fmt = R300_RS_COL_FMT_0001; - - r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(col_fmt); + r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA); r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R500_RS_INST_COL_ID(col_ip) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_reg); InputsRead &= ~FRAG_BIT_COL0; ++col_ip; @@ -1630,15 +1610,7 @@ static void r500SetupRSUnit(GLcontext * ctx) if (InputsRead & FRAG_BIT_COL1) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { - count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size; - if (count == 4) - col_fmt = R300_RS_COL_FMT_RGBA; - else if (count == 3) - col_fmt = R300_RS_COL_FMT_RGB1; - else - col_fmt = R300_RS_COL_FMT_0001; - - r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(col_fmt); + r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA); r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R500_RS_INST_COL_ID(col_ip) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_reg); InputsRead &= ~FRAG_BIT_COL1; ++col_ip; @@ -1648,7 +1620,7 @@ static void r500SetupRSUnit(GLcontext * ctx) } } - + /* We always route 4 texcoord components */ for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { if (! ( InputsRead & FRAG_BIT_TEX(i) ) ) continue; @@ -1658,55 +1630,37 @@ static void r500SetupRSUnit(GLcontext * ctx) continue; } - int swiz = 0; - - /* with TCL we always seem to route 4 components */ - if (hw_tcl_on) - count = 4; - else - count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size; - - if (count == 4) { - swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT; - swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT; - swiz |= (rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT; - swiz |= (rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT; - } else if (count == 3) { - swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT; - swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT; - swiz |= (rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT; - swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; - } else if (count == 2) { - swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT; - swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT; - swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT; - swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; - } else if (count == 1) { - swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT; - swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT; - swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT; - swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; - } else { - swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT; - swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT; - swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT; - swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; - } + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) | + ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) | + ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) | + ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT); - r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= swiz; r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg); InputsRead &= ~(FRAG_BIT_TEX0 << i); - rs_tex_count += count; + rs_tex_count += 4; + ++tex_ip; + ++fp_reg; + } + + if (InputsRead & FRAG_BIT_WPOS) { + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) | + ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) | + ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) | + ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT); + + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_WPOS; + rs_tex_count += 4; ++tex_ip; ++fp_reg; } if (InputsRead & FRAG_BIT_FOGC) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) { - r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) | - ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) | - ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) | - ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT); + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= (rs_tex_count << R500_RS_IP_TEX_PTR_S_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | + (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT); r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg); InputsRead &= ~FRAG_BIT_FOGC; @@ -1718,87 +1672,27 @@ static void r500SetupRSUnit(GLcontext * ctx) } } - if (InputsRead & FRAG_BIT_WPOS) { - r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) | - ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) | - ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) | - ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT); - - r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg); - InputsRead &= ~FRAG_BIT_WPOS; - rs_tex_count += 4; - ++tex_ip; - ++fp_reg; - } - /* Setup default color if no color or tex was set */ if (rs_tex_count == 0 && col_ip == 0) { - r300->hw.rr.cmd[R300_RR_INST_0] |= R500_RS_INST_COL_ID(0) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(0) | R500_RS_COL_FMT(R300_RS_COL_FMT_0001); + r300->hw.rr.cmd[R300_RR_INST_0] = R500_RS_INST_COL_ID(0) | R500_RS_INST_COL_ADDR(0); + r300->hw.ri.cmd[R300_RI_INTERP_0] = R500_RS_COL_PTR(0) | R500_RS_COL_FMT(R300_RS_COL_FMT_0001); ++col_ip; } high_rr = (col_ip > tex_ip) ? col_ip : tex_ip; - r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; - r300->hw.rc.cmd[2] |= 0xC0 | (high_rr - 1); + r300->hw.rc.cmd[1] = (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; + r300->hw.rc.cmd[2] = 0xC0 | (high_rr - 1); r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, high_rr); + r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_IP_0, high_rr); if (InputsRead) WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); } - - - -#define bump_vpu_count(ptr, new_count) do{\ - drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\ - int _nc=(new_count)/4; \ - assert(_nc < 256); \ - if(_nc>_p->vpu.count)_p->vpu.count=_nc;\ - }while(0) - -static INLINE void r300SetupVertexProgramFragment(r300ContextPtr r300, int dest, struct r300_vertex_shader_fragment *vsf) -{ - int i; - - if (vsf->length == 0) - return; - - if (vsf->length & 0x3) { - fprintf(stderr, "VERTEX_SHADER_FRAGMENT must have length divisible by 4\n"); - _mesa_exit(-1); - } - - switch ((dest >> 8) & 0xf) { - case 0: - R300_STATECHANGE(r300, vpi); - for (i = 0; i < vsf->length; i++) - r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (vsf->body.d[i]); - bump_vpu_count(r300->hw.vpi.cmd, vsf->length + 4 * (dest & 0xff)); - break; - - case 2: - R300_STATECHANGE(r300, vpp); - for (i = 0; i < vsf->length; i++) - r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (vsf->body.d[i]); - bump_vpu_count(r300->hw.vpp.cmd, vsf->length + 4 * (dest & 0xff)); - break; - case 4: - R300_STATECHANGE(r300, vps); - for (i = 0; i < vsf->length; i++) - r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (vsf->body.d[i]); - bump_vpu_count(r300->hw.vps.cmd, vsf->length + 4 * (dest & 0xff)); - break; - default: - fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest); - _mesa_exit(-1); - } -} - #define MIN3(a, b, c) ((a) < (b) ? MIN2(a, c) : MIN2(b, c)) - -static void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, +void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, GLuint output_count, GLuint temp_count) { int vtx_mem_size; @@ -1822,7 +1716,7 @@ static void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, pvs_num_cntrls = MIN2(6, vtx_mem_size/temp_count); R300_STATECHANGE(rmesa, vap_cntl); - if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { + if (rmesa->options.hw_tcl_enabled) { rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] = (pvs_num_slots << R300_PVS_NUM_SLOTS_SHIFT) | (pvs_num_cntrls << R300_PVS_NUM_CNTLRS_SHIFT) | @@ -1852,115 +1746,6 @@ static void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, } -static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa) -{ - struct r300_vertex_shader_state *prog = &(rmesa->state.vertex_shader); - GLuint o_reg = 0; - GLuint i_reg = 0; - int i; - int inst_count = 0; - int param_count = 0; - int program_end = 0; - - for (i = VERT_ATTRIB_POS; i < VERT_ATTRIB_MAX; i++) { - if (rmesa->state.sw_tcl_inputs[i] != -1) { - prog->program.body.i[program_end + 0] = PVS_OP_DST_OPERAND(VE_MULTIPLY, GL_FALSE, GL_FALSE, o_reg++, VSF_FLAG_ALL, PVS_DST_REG_OUT); - prog->program.body.i[program_end + 1] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE); - prog->program.body.i[program_end + 2] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE); - prog->program.body.i[program_end + 3] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE); - program_end += 4; - i_reg++; - } - } - - prog->program.length = program_end; - - r300SetupVertexProgramFragment(rmesa, R300_PVS_CODE_START, - &(prog->program)); - inst_count = (prog->program.length / 4) - 1; - - r300VapCntl(rmesa, i_reg, o_reg, 0); - - R300_STATECHANGE(rmesa, pvs); - rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = - (0 << R300_PVS_FIRST_INST_SHIFT) | - (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) | - (inst_count << R300_PVS_LAST_INST_SHIFT); - rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = - (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | - (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT); - rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = - (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT); -} - -static int bit_count (int x) -{ - x = ((x & 0xaaaaaaaaU) >> 1) + (x & 0x55555555U); - x = ((x & 0xccccccccU) >> 2) + (x & 0x33333333U); - x = (x >> 16) + (x & 0xffff); - x = ((x & 0xf0f0) >> 4) + (x & 0x0f0f); - return (x >> 8) + (x & 0x00ff); -} - -static void r300SetupRealVertexProgram(r300ContextPtr rmesa) -{ - GLcontext *ctx = rmesa->radeon.glCtx; - struct r300_vertex_program *prog = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); - int inst_count = 0; - int param_count = 0; - - /* FIXME: r300SetupVertexProgramFragment */ - R300_STATECHANGE(rmesa, vpp); - param_count = - r300VertexProgUpdateParams(ctx, - (struct r300_vertex_program_cont *) - ctx->VertexProgram._Current, - (float *)&rmesa->hw.vpp. - cmd[R300_VPP_PARAM_0]); - bump_vpu_count(rmesa->hw.vpp.cmd, param_count); - param_count /= 4; - - r300SetupVertexProgramFragment(rmesa, R300_PVS_CODE_START, &(prog->program)); - inst_count = (prog->program.length / 4) - 1; - - r300VapCntl(rmesa, bit_count(prog->key.InputsRead), - bit_count(prog->key.OutputsWritten), prog->num_temporaries); - - R300_STATECHANGE(rmesa, pvs); - rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = - (0 << R300_PVS_FIRST_INST_SHIFT) | - (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) | - (inst_count << R300_PVS_LAST_INST_SHIFT); - rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = - (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | - (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT); - rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = - (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT); -} - - -static void r300SetupVertexProgram(r300ContextPtr rmesa) -{ - GLcontext *ctx = rmesa->radeon.glCtx; - - /* Reset state, in case we don't use something */ - ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0; - ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0; - ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0; - - /* Not sure why this doesnt work... - 0x400 area might have something to do with pixel shaders as it appears right after pfs programming. - 0x406 is set to { 0.0, 0.0, 1.0, 0.0 } most of the time but should change with smooth points and in other rare cases. */ - //setup_vertex_shader_fragment(rmesa, 0x406, &unk4); - if (hw_tcl_on && ((struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx))->translated) { - r300SetupRealVertexProgram(rmesa); - } else { - /* FIXME: This needs to be replaced by vertex shader generation code. */ - r300SetupDefaultVertexProgram(rmesa); - } - -} - /** * Enable/Disable states. * @@ -1975,14 +1760,6 @@ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state) state ? "GL_TRUE" : "GL_FALSE"); switch (cap) { - case GL_TEXTURE_1D: - case GL_TEXTURE_2D: - case GL_TEXTURE_3D: - /* empty */ - break; - case GL_FOG: - /* empty */ - break; case GL_ALPHA_TEST: r300SetAlphaState(ctx); break; @@ -2000,14 +1777,31 @@ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state) case GL_CLIP_PLANE5: r300SetClipPlaneState(ctx, cap, state); break; + case GL_CULL_FACE: + r300UpdateCulling(ctx); + break; case GL_DEPTH_TEST: r300SetDepthState(ctx); break; - case GL_STENCIL_TEST: - r300SetStencilState(ctx, state); + case GL_LINE_SMOOTH: + if (rmesa->options.conformance_mode) + r300SwitchFallback(ctx, R300_FALLBACK_LINE_SMOOTH, ctx->Line.SmoothFlag); break; - case GL_CULL_FACE: - r300UpdateCulling(ctx); + case GL_LINE_STIPPLE: + if (rmesa->options.conformance_mode) + r300SwitchFallback(ctx, R300_FALLBACK_LINE_STIPPLE, ctx->Line.StippleFlag); + break; + case GL_POINT_SMOOTH: + if (rmesa->options.conformance_mode) + r300SwitchFallback(ctx, R300_FALLBACK_POINT_SMOOTH, ctx->Point.SmoothFlag); + break; + case GL_POLYGON_SMOOTH: + if (rmesa->options.conformance_mode) + r300SwitchFallback(ctx, R300_FALLBACK_POLYGON_SMOOTH, ctx->Polygon.SmoothFlag); + break; + case GL_POLYGON_STIPPLE: + if (rmesa->options.conformance_mode) + r300SwitchFallback(ctx, R300_FALLBACK_POLYGON_STIPPLE, ctx->Polygon.StippleFlag); break; case GL_POLYGON_OFFSET_POINT: case GL_POLYGON_OFFSET_LINE: @@ -2019,6 +1813,9 @@ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state) rmesa->radeon.state.scissor.enabled = state; radeonUpdateScissor( ctx ); break; + case GL_STENCIL_TEST: + r300SetStencilState(ctx, state); + break; default: break; } @@ -2030,10 +1827,9 @@ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state) static void r300ResetHwState(r300ContextPtr r300) { GLcontext *ctx = r300->radeon.glCtx; - int has_tcl = 1; + int has_tcl; - if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) - has_tcl = 0; + has_tcl = r300->options.hw_tcl_enabled; if (RADEON_DEBUG & DEBUG_STATE) fprintf(stderr, "%s\n", __FUNCTION__); @@ -2138,8 +1934,8 @@ static void r300ResetHwState(r300ContextPtr r300) } /* XXX: Enable anti-aliasing? */ - r300->hw.gb_misc.cmd[R300_GB_MISC_AA_CONFIG] = GB_AA_CONFIG_AA_DISABLE; - r300->hw.gb_misc.cmd[R300_GB_MISC_SELECT] = 0; + r300->hw.gb_misc2.cmd[R300_GB_MISC2_AA_CONFIG] = GB_AA_CONFIG_AA_DISABLE; + r300->hw.gb_misc2.cmd[R300_GB_MISC2_SELECT] = 0; r300->hw.ga_point_s0.cmd[1] = r300PackFloat32(0.0); r300->hw.ga_point_s0.cmd[2] = r300PackFloat32(0.0); @@ -2210,8 +2006,8 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.rb3d_aaresolve_ctl.cmd[1] = 0; - r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = 0x00000000; - r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = 0xffffffff; + r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = 0x00000000; + r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = 0xffffffff; r300->hw.zb_depthclearvalue.cmd[1] = 0; @@ -2220,8 +2016,8 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.zstencil_format.cmd[4] = 0x00000000; r300SetEarlyZState(ctx); - r300->hw.unk4F30.cmd[1] = 0; - r300->hw.unk4F30.cmd[2] = 0; + r300->hw.zb_zmask.cmd[1] = 0; + r300->hw.zb_zmask.cmd[2] = 0; r300->hw.zb_hiz_offset.cmd[1] = 0; @@ -2241,14 +2037,20 @@ static void r300ResetHwState(r300ContextPtr r300) void r300UpdateShaders(r300ContextPtr rmesa) { GLcontext *ctx; - struct r300_vertex_program *vp; + struct r300_fragment_program *fp; int i; ctx = rmesa->radeon.glCtx; + fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; - if (rmesa->radeon.NewGLState && hw_tcl_on) { - rmesa->radeon.NewGLState = 0; + /* should only happenen once, just after context is created */ + /* TODO: shouldn't we fallback to sw here? */ + if (!fp) { + _mesa_fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); + return; + } + if (rmesa->radeon.NewGLState && rmesa->options.hw_tcl_enabled) { for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { rmesa->temp_attrib[i] = TNL_CONTEXT(ctx)->vb.AttribPtr[i]; @@ -2264,20 +2066,16 @@ void r300UpdateShaders(r300ContextPtr rmesa) } r300SelectVertexShader(rmesa); - vp = (struct r300_vertex_program *) - CURRENT_VERTEX_SHADER(ctx); - /*if (vp->translated == GL_FALSE) - r300TranslateVertexShader(vp); */ - if (vp->translated == GL_FALSE) { - fprintf(stderr, "Failing back to sw-tcl\n"); - hw_tcl_on = future_hw_tcl_on = 0; - r300ResetHwState(rmesa); - - r300UpdateStateParameters(ctx, _NEW_PROGRAM); - return; - } + r300SwitchFallback(ctx, R300_FALLBACK_VERTEX_PROGRAM, rmesa->selected_vp->error); } - r300UpdateStateParameters(ctx, _NEW_PROGRAM); + + if (!fp->translated || rmesa->radeon.NewGLState) + r300TranslateFragmentShader(ctx, ctx->FragmentProgram._Current); + + r300SwitchFallback(ctx, R300_FALLBACK_FRAGMENT_PROGRAM, fp->error); + + r300UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + rmesa->radeon.NewGLState = 0; } static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, @@ -2301,26 +2099,14 @@ static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, } -static void r300SetupPixelShader(r300ContextPtr rmesa) +static void r300SetupPixelShader(GLcontext *ctx) { - GLcontext *ctx = rmesa->radeon.glCtx; - struct r300_fragment_program *fp = (struct r300_fragment_program *) - (char *)ctx->FragmentProgram._Current; + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; struct r300_fragment_program_code *code; int i, k; - if (!fp) /* should only happenen once, just after context is created */ - return; - - r300TranslateFragmentShader(rmesa, fp); - if (!fp->translated) { - fprintf(stderr, "%s: No valid fragment shader, exiting\n", - __FUNCTION__); - return; - } - code = &fp->code; - - r300SetupTextures(ctx); + code = &fp->code.r300; R300_STATECHANGE(rmesa, fpi[0]); R300_STATECHANGE(rmesa, fpi[1]); @@ -2363,7 +2149,7 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, code->const_nr * 4); for (i = 0; i < code->const_nr; i++) { const GLfloat *constant = get_fragmentprogram_constant(ctx, - &fp->mesa_program.Base, code->constant[i]); + &fp->Base.Base, code->constant[i]); rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]); rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]); rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]); @@ -2385,29 +2171,17 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\ } while(0) -static void r500SetupPixelShader(r300ContextPtr rmesa) +static void r500SetupPixelShader(GLcontext *ctx) { - GLcontext *ctx = rmesa->radeon.glCtx; - struct r500_fragment_program *fp = (struct r500_fragment_program *) - (char *)ctx->FragmentProgram._Current; + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; int i; struct r500_fragment_program_code *code; - if (!fp) /* should only happenen once, just after context is created */ - return; - ((drm_r300_cmd_header_t *) rmesa->hw.r500fp.cmd)->r500fp.count = 0; ((drm_r300_cmd_header_t *) rmesa->hw.r500fp_const.cmd)->r500fp.count = 0; - r500TranslateFragmentShader(rmesa, fp); - if (!fp->translated) { - fprintf(stderr, "%s: No valid fragment shader, exiting\n", - __FUNCTION__); - return; - } - code = &fp->code; - - r300SetupTextures(ctx); + code = &fp->code.r500; R300_STATECHANGE(rmesa, fp); rmesa->hw.fp.cmd[R500_FP_PIXSIZE] = code->max_temp_idx; @@ -2437,57 +2211,96 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) R300_STATECHANGE(rmesa, r500fp_const); for (i = 0; i < code->const_nr; i++) { const GLfloat *constant = get_fragmentprogram_constant(ctx, - &fp->mesa_program.Base, code->constant[i]); + &fp->Base.Base, code->constant[i]); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(constant[3]); } bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, code->const_nr * 4); +} + +void r300SetupVAP(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten) +{ + r300ContextPtr rmesa = R300_CONTEXT( ctx ); + struct vertex_attribute *attrs = rmesa->vbuf.attribs; + int i, j, reg_count; + uint32_t *vir0 = &rmesa->hw.vir[0].cmd[1]; + uint32_t *vir1 = &rmesa->hw.vir[1].cmd[1]; + + for (i = 0; i < R300_VIR_CMDSIZE-1; ++i) + vir0[i] = vir1[i] = 0; + + for (i = 0, j = 0; i < rmesa->vbuf.num_attribs; ++i) { + int tmp; + + tmp = attrs[i].data_type | (attrs[i].dst_loc << R300_DST_VEC_LOC_SHIFT); + if (attrs[i]._signed) + tmp |= R300_SIGNED; + if (attrs[i].normalize) + tmp |= R300_NORMALIZE; + + if (i % 2 == 0) { + vir0[j] = tmp << R300_DATA_TYPE_0_SHIFT; + vir1[j] = attrs[i].swizzle | (attrs[i].write_mask << R300_WRITE_ENA_SHIFT); + } else { + vir0[j] |= tmp << R300_DATA_TYPE_1_SHIFT; + vir1[j] |= (attrs[i].swizzle | (attrs[i].write_mask << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE1_SHIFT; + ++j; + } + } + reg_count = (rmesa->vbuf.num_attribs + 1) >> 1; + if (rmesa->vbuf.num_attribs % 2 != 0) { + vir0[reg_count-1] |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT; + } else { + vir0[reg_count-1] |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT; + } + + R300_STATECHANGE(rmesa, vir[0]); + R300_STATECHANGE(rmesa, vir[1]); + R300_STATECHANGE(rmesa, vof); + R300_STATECHANGE(rmesa, vic); + + if (rmesa->radeon.radeonScreen->kernel_mm) { + rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF; + rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF; + rmesa->hw.vir[0].cmd[0] |= (reg_count & 0x3FFF) << 16; + rmesa->hw.vir[1].cmd[0] |= (reg_count & 0x3FFF) << 16; + } else { + ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = reg_count; + ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = reg_count; + } + + rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); + rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead); + rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten, ctx->FragmentProgram._Current->Base.InputsRead); + rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten, ctx->FragmentProgram._Current->Base.InputsRead); } void r300UpdateShaderStates(r300ContextPtr rmesa) { GLcontext *ctx; ctx = rmesa->radeon.glCtx; + struct r300_fragment_program *r300_fp; - r300SetEarlyZState(ctx); + r300_fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; - /* w_fmt value is set to get best performance - * see p.130 R5xx 3D acceleration guide v1.3 */ - GLuint w_fmt, fgdepthsrc; - if (current_fragment_program_writes_depth(ctx)) { - fgdepthsrc = R300_FG_DEPTH_SRC_SHADER; - w_fmt = R300_W_FMT_W24 | R300_W_SRC_US; - } else { - fgdepthsrc = R300_FG_DEPTH_SRC_SCAN; - w_fmt = R300_W_FMT_W0 | R300_W_SRC_US; - } + /* should only happenen once, just after context is created */ + if (!r300_fp) + return; - if (w_fmt != rmesa->hw.us_out_fmt.cmd[5]) { - R300_STATECHANGE(rmesa, us_out_fmt); - rmesa->hw.us_out_fmt.cmd[5] = w_fmt; - } + r300SetEarlyZState(ctx); - if (fgdepthsrc != rmesa->hw.fg_depth_src.cmd[1]) { - R300_STATECHANGE(rmesa, fg_depth_src); - rmesa->hw.fg_depth_src.cmd[1] = fgdepthsrc; - } + r300SetupTextures(ctx); - if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) - r500SetupPixelShader(rmesa); - else - r300SetupPixelShader(rmesa); + rmesa->vtbl.SetupPixelShader(ctx); - if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) - r500SetupRSUnit(ctx); - else - r300SetupRSUnit(ctx); + rmesa->vtbl.SetupRSUnit(ctx); - if ((rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + if (rmesa->options.hw_tcl_enabled) { r300SetupVertexProgram(rmesa); - + } } /** @@ -2501,9 +2314,8 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state) _swsetup_InvalidateState(ctx, new_state); _vbo_InvalidateState(ctx, new_state); _tnl_InvalidateState(ctx, new_state); - _ae_invalidate_state(ctx, new_state); - if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) { + if (new_state & _NEW_BUFFERS) { _mesa_update_framebuffer(ctx); /* this updates the DrawBuffer's Width/Height if it's a FBO */ _mesa_update_draw_buffer_bounds(ctx); @@ -2523,34 +2335,12 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state) */ void r300InitState(r300ContextPtr r300) { - memset(&(r300->state.texture), 0, sizeof(r300->state.texture)); - r300ResetHwState(r300); } static void r300RenderMode(GLcontext * ctx, GLenum mode) { - r300ContextPtr rmesa = R300_CONTEXT(ctx); - (void)rmesa; - (void)mode; -} - -void r300UpdateClipPlanes( GLcontext *ctx ) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - GLuint p; - - for (p = 0; p < ctx->Const.MaxClipPlanes; p++) { - if (ctx->Transform.ClipPlanesEnabled & (1 << p)) { - GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p]; - - R300_STATECHANGE( rmesa, vpucp[p] ); - rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0]; - rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1]; - rmesa->hw.vpucp[p].cmd[R300_VPUCP_Z] = ip[2]; - rmesa->hw.vpucp[p].cmd[R300_VPUCP_W] = ip[3]; - } - } + r300SwitchFallback(ctx, R300_FALLBACK_RENDER_MODE, ctx->RenderMode != GL_RENDER); } /** @@ -2598,3 +2388,20 @@ void r300InitStateFuncs(struct dd_function_table *functions) functions->DrawBuffer = radeonDrawBuffer; functions->ReadBuffer = radeonReadBuffer; } + +void r300InitShaderFunctions(r300ContextPtr r300) +{ + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + r300->vtbl.SetupRSUnit = r500SetupRSUnit; + r300->vtbl.SetupPixelShader = r500SetupPixelShader; + r300->vtbl.SetupFragmentShaderTextures = r500SetupFragmentShaderTextures; + r300->vtbl.BuildFragmentProgramHwCode = r500BuildFragmentProgramHwCode; + r300->vtbl.FragmentProgramDump = r500FragmentProgramDump; + } else { + r300->vtbl.SetupRSUnit = r300SetupRSUnit; + r300->vtbl.SetupPixelShader = r300SetupPixelShader; + r300->vtbl.SetupFragmentShaderTextures = r300SetupFragmentShaderTextures; + r300->vtbl.BuildFragmentProgramHwCode = r300BuildFragmentProgramHwCode; + r300->vtbl.FragmentProgramDump = r300FragmentProgramDump; + } +} diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h index 247a20ee516..23282894202 100644 --- a/src/mesa/drivers/dri/r300/r300_state.h +++ b/src/mesa/drivers/dri/r300/r300_state.h @@ -50,16 +50,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. r300->radeon.hw.is_dirty = GL_TRUE; \ } while(0) -// r300_state.c -extern int future_hw_tcl_on; -void _tnl_UpdateFixedFunctionProgram (GLcontext * ctx); void r300UpdateViewportOffset (GLcontext * ctx); void r300UpdateDrawBuffer (GLcontext * ctx); void r300UpdateStateParameters (GLcontext * ctx, GLuint new_state); void r300UpdateShaders (r300ContextPtr rmesa); void r300UpdateShaderStates (r300ContextPtr rmesa); void r300InitState (r300ContextPtr r300); -void r300UpdateClipPlanes (GLcontext * ctx); void r300InitStateFuncs (struct dd_function_table *functions); +void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, GLuint output_count, GLuint temp_count); +void r300SetupVAP(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten); #endif /* __R300_STATE_H__ */ diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index 03c1521de7e..ce4179208eb 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -34,9 +34,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "tnl/tnl.h" #include "tnl/t_pipeline.h" +#include "r300_state.h" #include "r300_swtcl.h" #include "r300_emit.h" #include "r300_tex.h" +#include "r300_render.h" #define EMIT_ATTR( ATTR, STYLE ) \ do { \ @@ -53,199 +55,130 @@ do { \ rmesa->radeon.swtcl.vertex_attr_count++; \ } while (0) -#define ADD_ATTR(_attr, _format, _dst_loc, _swizzle, _write_mask) \ +#define ADD_ATTR(_attr, _format, _dst_loc, _swizzle, _write_mask, _normalize) \ do { \ - attrs[num_attrs].attr = (_attr); \ - attrs[num_attrs].format = (_format); \ + attrs[num_attrs].element = (_attr); \ + attrs[num_attrs].data_type = (_format); \ attrs[num_attrs].dst_loc = (_dst_loc); \ attrs[num_attrs].swizzle = (_swizzle); \ attrs[num_attrs].write_mask = (_write_mask); \ + attrs[num_attrs]._signed = 0; \ + attrs[num_attrs].normalize = (_normalize); \ ++num_attrs; \ } while (0) -static void r300SwtclVAPSetup(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten, GLuint vap_out_fmt_1) +void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_OutputsWritten) { r300ContextPtr rmesa = R300_CONTEXT( ctx ); TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; - struct vertex_attribute *attrs = rmesa->swtcl.vert_attrs; - int vte = 0; - int i, j, reg_count; - uint32_t *vir0 = &rmesa->hw.vir[0].cmd[1]; - uint32_t *vir1 = &rmesa->hw.vir[1].cmd[1]; - - for (i = 0; i < R300_VIR_CMDSIZE-1; ++i) - vir0[i] = vir1[i] = 0; - - for (i = 0, j = 0; i < rmesa->radeon.swtcl.vertex_attr_count; ++i) { - int tmp, data_format; - switch (attrs[i].format) { - case EMIT_1F: - data_format = R300_DATA_TYPE_FLOAT_1; - break; - case EMIT_2F: - data_format = R300_DATA_TYPE_FLOAT_2; - break; - case EMIT_3F: - data_format = R300_DATA_TYPE_FLOAT_3; - break; - case EMIT_4F: - data_format = R300_DATA_TYPE_FLOAT_4; - break; - case EMIT_4UB_4F_RGBA: - case EMIT_4UB_4F_ABGR: - data_format = R300_DATA_TYPE_BYTE | R300_NORMALIZE; - break; - default: - fprintf(stderr, "%s: Invalid data format type", __FUNCTION__); - _mesa_exit(-1); - break; - } - - tmp = data_format | (attrs[i].dst_loc << R300_DST_VEC_LOC_SHIFT); - if (i % 2 == 0) { - vir0[j] = tmp << R300_DATA_TYPE_0_SHIFT; - vir1[j] = attrs[i].swizzle | (attrs[i].write_mask << R300_WRITE_ENA_SHIFT); - } else { - vir0[j] |= tmp << R300_DATA_TYPE_1_SHIFT; - vir1[j] |= (attrs[i].swizzle | (attrs[i].write_mask << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE1_SHIFT; - ++j; - } - } - - reg_count = (rmesa->radeon.swtcl.vertex_attr_count + 1) >> 1; - if (rmesa->radeon.swtcl.vertex_attr_count % 2 != 0) { - vir0[reg_count-1] |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT; - } else { - vir0[reg_count-1] |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT; - } - - R300_STATECHANGE(rmesa, vir[0]); - R300_STATECHANGE(rmesa, vir[1]); - R300_STATECHANGE(rmesa, vof); - R300_STATECHANGE(rmesa, vte); - R300_STATECHANGE(rmesa, vic); - - if (rmesa->radeon.radeonScreen->kernel_mm) { - rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF; - rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF; - rmesa->hw.vir[0].cmd[0] |= (reg_count & 0x3FFF) << 16; - rmesa->hw.vir[1].cmd[0] |= (reg_count & 0x3FFF) << 16; - } else { - ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = reg_count; - ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = reg_count; - } - - rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); - rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead); - rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten); - /** - * Can't use r300VAPOutputCntl1 function because it assumes - * that all texture coords have 4 components and that's the case - * for HW TCL path, but not for SW TCL. - */ - rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_out_fmt_1; - - vte = rmesa->hw.vte.cmd[1]; - vte &= ~(R300_VTX_XY_FMT | R300_VTX_Z_FMT | R300_VTX_W0_FMT); - /* Important: - */ - if ( VB->NdcPtr != NULL ) { - VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr; - vte |= R300_VTX_XY_FMT | R300_VTX_Z_FMT; - } - else { - VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr; - vte |= R300_VTX_W0_FMT; - } - - assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL ); - - rmesa->hw.vte.cmd[1] = vte; - rmesa->hw.vte.cmd[2] = rmesa->radeon.swtcl.vertex_size; -} - - -static void r300SetVertexFormat( GLcontext *ctx ) -{ - r300ContextPtr rmesa = R300_CONTEXT( ctx ); - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; - int first_free_tex = 0, vap_out_fmt_1 = 0; + int first_free_tex = 0; GLuint InputsRead = 0; GLuint OutputsWritten = 0; int num_attrs = 0; - struct vertex_attribute *attrs = rmesa->swtcl.vert_attrs; + GLuint fp_reads = ctx->FragmentProgram._Current->Base.InputsRead; + struct vertex_attribute *attrs = rmesa->vbuf.attribs; rmesa->swtcl.coloroffset = rmesa->swtcl.specoffset = 0; rmesa->radeon.swtcl.vertex_attr_count = 0; - if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_POS)) { - InputsRead |= 1 << VERT_ATTRIB_POS; - OutputsWritten |= 1 << VERT_RESULT_HPOS; - EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F ); - ADD_ATTR(VERT_ATTRIB_POS, EMIT_4F, SWTCL_OVM_POS, SWIZZLE_XYZW, MASK_XYZW); - rmesa->swtcl.coloroffset = 4; - } + /* We always want non Ndc coords format */ + VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr; + + /* Always write position vector */ + InputsRead |= 1 << VERT_ATTRIB_POS; + OutputsWritten |= 1 << VERT_RESULT_HPOS; + EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F ); + ADD_ATTR(VERT_ATTRIB_POS, R300_DATA_TYPE_FLOAT_4, SWTCL_OVM_POS, SWIZZLE_XYZW, MASK_XYZW, 0); + rmesa->swtcl.coloroffset = 4; - if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_COLOR0)) { + if (fp_reads & FRAG_BIT_COL0) { InputsRead |= 1 << VERT_ATTRIB_COLOR0; OutputsWritten |= 1 << VERT_RESULT_COL0; #if MESA_LITTLE_ENDIAN EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA ); - ADD_ATTR(VERT_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW); + ADD_ATTR(VERT_ATTRIB_COLOR0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW, 1); #else EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR ); - ADD_ATTR(VERT_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW); + ADD_ATTR(VERT_ATTRIB_COLOR0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW, 1); #endif } - if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_COLOR1 )) { + if (fp_reads & FRAG_BIT_COL1) { GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); InputsRead |= 1 << VERT_ATTRIB_COLOR1; OutputsWritten |= 1 << VERT_RESULT_COL1; #if MESA_LITTLE_ENDIAN EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_RGBA ); - ADD_ATTR(VERT_ATTRIB_COLOR1, EMIT_4UB_4F_RGBA, SWTCL_OVM_COLOR1, swiz, MASK_XYZW); + ADD_ATTR(VERT_ATTRIB_COLOR1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR1, swiz, MASK_XYZW, 1); #else EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_ABGR ); - ADD_ATTR(VERT_ATTRIB_COLOR1, EMIT_4UB_4F_ABGR, SWTCL_OVM_COLOR1, swiz, MASK_XYZW); + ADD_ATTR(VERT_ATTRIB_COLOR1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR1, swiz, MASK_XYZW, 1); #endif rmesa->swtcl.specoffset = rmesa->swtcl.coloroffset + 1; } + if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) { + VB->AttribPtr[VERT_ATTRIB_GENERIC0] = VB->ColorPtr[1]; + OutputsWritten |= 1 << VERT_RESULT_BFC0; +#if MESA_LITTLE_ENDIAN + EMIT_ATTR( _TNL_ATTRIB_GENERIC0, EMIT_4UB_4F_RGBA ); + ADD_ATTR(VERT_ATTRIB_GENERIC0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR2, SWIZZLE_XYZW, MASK_XYZW, 1); +#else + EMIT_ATTR( _TNL_ATTRIB_GENERIC0, EMIT_4UB_4F_ABGR ); + ADD_ATTR(VERT_ATTRIB_GENERIC0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR2, SWIZZLE_XYZW, MASK_XYZW, 1); +#endif + if (fp_reads & FRAG_BIT_COL1) { + VB->AttribPtr[VERT_ATTRIB_GENERIC1] = VB->SecondaryColorPtr[1]; + GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); + OutputsWritten |= 1 << VERT_RESULT_BFC1; +#if MESA_LITTLE_ENDIAN + EMIT_ATTR( _TNL_ATTRIB_GENERIC1, EMIT_4UB_4F_RGBA ); + ADD_ATTR(VERT_ATTRIB_GENERIC1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR3, swiz, MASK_XYZW, 1); +#else + EMIT_ATTR( _TNL_ATTRIB_GENERIC1, EMIT_4UB_4F_ABGR ); + ADD_ATTR(VERT_ATTRIB_GENERIC1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR3, swiz, MASK_XYZW, 1); +#endif + } + } + if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_POINTSIZE )) { GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO); InputsRead |= 1 << VERT_ATTRIB_POINT_SIZE; OutputsWritten |= 1 << VERT_RESULT_PSIZ; EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F ); - ADD_ATTR(VERT_ATTRIB_POINT_SIZE, EMIT_1F, SWTCL_OVM_POINT_SIZE, swiz, MASK_X); + ADD_ATTR(VERT_ATTRIB_POINT_SIZE, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_POINT_SIZE, swiz, MASK_X, 0); } - if (RENDERINPUTS_TEST_RANGE(tnl->render_inputs_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { - int i, size; - GLuint swiz, mask, format; + /** + * Sending only one texcoord component may lead to lock up, + * so for all textures always output 4 texcoord components to RS. + */ + { + int i; + GLuint swiz, format, hw_format; for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX(i) )) { + if (fp_reads & FRAG_BIT_TEX(i)) { switch (VB->TexCoordPtr[i]->size) { case 1: + format = EMIT_1F; + hw_format = R300_DATA_TYPE_FLOAT_1; + swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE); + break; case 2: format = EMIT_2F; - swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO); - mask = MASK_X | MASK_Y; - size = 2; + hw_format = R300_DATA_TYPE_FLOAT_2; + swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ONE); break; case 3: format = EMIT_3F; - swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - mask = MASK_X | MASK_Y | MASK_Z; - size = 3; + hw_format = R300_DATA_TYPE_FLOAT_3; + swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); break; case 4: format = EMIT_4F; + hw_format = R300_DATA_TYPE_FLOAT_4; swiz = SWIZZLE_XYZW; - mask = MASK_XYZW; - size = 4; break; default: continue; @@ -253,15 +186,14 @@ static void r300SetVertexFormat( GLcontext *ctx ) InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i); OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); EMIT_ATTR(_TNL_ATTRIB_TEX(i), format); - ADD_ATTR(VERT_ATTRIB_TEX0 + i, format, SWTCL_OVM_TEX(i), swiz, mask); - vap_out_fmt_1 |= size << (i * 3); + ADD_ATTR(VERT_ATTRIB_TEX0 + i, hw_format, SWTCL_OVM_TEX(first_free_tex), swiz, MASK_XYZW, 0); ++first_free_tex; } } } /* RS can't put fragment position on the pixel stack, so stuff it in texcoord if needed */ - if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_POS) && (ctx->FragmentProgram._Current->Base.InputsRead & FRAG_BIT_WPOS)) { + if (fp_reads & FRAG_BIT_WPOS) { if (first_free_tex >= ctx->Const.MaxTextureUnits) { fprintf(stderr, "\tout of free texcoords to write w pos\n"); _mesa_exit(-1); @@ -270,12 +202,11 @@ static void r300SetVertexFormat( GLcontext *ctx ) InputsRead |= 1 << (VERT_ATTRIB_TEX0 + first_free_tex); OutputsWritten |= 1 << (VERT_RESULT_TEX0 + first_free_tex); EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F ); - ADD_ATTR(VERT_ATTRIB_POS, EMIT_4F, SWTCL_OVM_TEX(first_free_tex), SWIZZLE_XYZW, MASK_XYZW); - vap_out_fmt_1 |= 4 << (first_free_tex * 3); + ADD_ATTR(VERT_ATTRIB_POS, R300_DATA_TYPE_FLOAT_4, SWTCL_OVM_TEX(first_free_tex), SWIZZLE_XYZW, MASK_XYZW, 0); ++first_free_tex; } - if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_FOG)) { + if (fp_reads & FRAG_BIT_FOGC) { if (first_free_tex >= ctx->Const.MaxTextureUnits) { fprintf(stderr, "\tout of free texcoords to write fog coordinate\n"); _mesa_exit(-1); @@ -285,12 +216,24 @@ static void r300SetVertexFormat( GLcontext *ctx ) OutputsWritten |= 1 << VERT_RESULT_FOGC; GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO); EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F ); - ADD_ATTR(VERT_ATTRIB_FOG, EMIT_1F, SWTCL_OVM_TEX(first_free_tex), swiz, MASK_X); - vap_out_fmt_1 |= 1 << (first_free_tex * 3); + ADD_ATTR(VERT_ATTRIB_FOG, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_TEX(first_free_tex), swiz, MASK_XYZW, 0); } R300_NEWPRIM(rmesa); - r300SwtclVAPSetup(ctx, InputsRead, OutputsWritten, vap_out_fmt_1); + rmesa->vbuf.num_attribs = num_attrs; + *_InputsRead = InputsRead; + *_OutputsWritten = OutputsWritten; + + RENDERINPUTS_COPY(rmesa->render_inputs_bitset, tnl->render_inputs_bitset); +} + +static void r300PrepareVertices(GLcontext *ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + GLuint InputsRead, OutputsWritten; + + r300ChooseSwtclVertexFormat(ctx, &InputsRead, &OutputsWritten); + r300SetupVAP(ctx, InputsRead, OutputsWritten); rmesa->radeon.swtcl.vertex_size = _tnl_install_attrs( ctx, @@ -299,8 +242,6 @@ static void r300SetVertexFormat( GLcontext *ctx ) NULL, 0 ); rmesa->radeon.swtcl.vertex_size /= 4; - - RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, tnl->render_inputs_bitset); } @@ -318,7 +259,6 @@ static GLuint reduced_prim[] = { }; static void r300RasterPrimitive( GLcontext *ctx, GLuint prim ); -static void r300RenderPrimitive( GLcontext *ctx, GLenum prim ); /*********************************************************************** * Emit primitives as inline vertices * @@ -366,9 +306,8 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim ); * Build render functions from dd templates * ***********************************************************************/ -#define R300_TWOSIDE_BIT 0x01 -#define R300_UNFILLED_BIT 0x02 -#define R300_MAX_TRIFUNC 0x04 +#define R300_UNFILLED_BIT 0x01 +#define R300_MAX_TRIFUNC 0x02 static struct { tnl_points_func points; @@ -379,9 +318,9 @@ static struct { #define DO_FALLBACK 0 #define DO_UNFILLED (IND & R300_UNFILLED_BIT) -#define DO_TWOSIDE (IND & R300_TWOSIDE_BIT) +#define DO_TWOSIDE 0 #define DO_FLAT 0 -#define DO_OFFSET 0 +#define DO_OFFSET 0 #define DO_TRI 1 #define DO_QUAD 1 #define DO_LINE 1 @@ -469,26 +408,15 @@ do { \ #define TAG(x) x #include "tnl_dd/t_dd_tritmp.h" -#define IND (R300_TWOSIDE_BIT) -#define TAG(x) x##_twoside -#include "tnl_dd/t_dd_tritmp.h" - #define IND (R300_UNFILLED_BIT) #define TAG(x) x##_unfilled #include "tnl_dd/t_dd_tritmp.h" -#define IND (R300_TWOSIDE_BIT|R300_UNFILLED_BIT) -#define TAG(x) x##_twoside_unfilled -#include "tnl_dd/t_dd_tritmp.h" - - static void init_rast_tab( void ) { init(); - init_twoside(); init_unfilled(); - init_twoside_unfilled(); } /**********************************************************************/ @@ -540,7 +468,6 @@ static void r300ChooseRenderState( GLcontext *ctx ) GLuint index = 0; GLuint flags = ctx->_TriangleCaps; - if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R300_TWOSIDE_BIT; if (flags & DD_TRI_UNFILLED) index |= R300_UNFILLED_BIT; if (index != rmesa->radeon.swtcl.RenderIndex) { @@ -565,12 +492,12 @@ static void r300ChooseRenderState( GLcontext *ctx ) } -static void r300RenderStart(GLcontext *ctx) +void r300RenderStart(GLcontext *ctx) { r300ContextPtr rmesa = R300_CONTEXT( ctx ); r300ChooseRenderState(ctx); - r300SetVertexFormat(ctx); + r300PrepareVertices(ctx); r300ValidateBuffers(ctx); @@ -585,7 +512,7 @@ static void r300RenderStart(GLcontext *ctx) } } -static void r300RenderFinish(GLcontext *ctx) +void r300RenderFinish(GLcontext *ctx) { } @@ -599,7 +526,7 @@ static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim ) } } -static void r300RenderPrimitive(GLcontext *ctx, GLenum prim) +void r300RenderPrimitive(GLcontext *ctx, GLenum prim) { r300ContextPtr rmesa = R300_CONTEXT(ctx); @@ -611,7 +538,7 @@ static void r300RenderPrimitive(GLcontext *ctx, GLenum prim) r300RasterPrimitive( ctx, reduced_prim[prim] ); } -static void r300ResetLineStipple(GLcontext *ctx) +void r300ResetLineStipple(GLcontext *ctx) { } @@ -647,7 +574,6 @@ void r300InitSwtcl(GLcontext *ctx) _tnl_invalidate_vertices( ctx, ~0 ); _tnl_need_projected_coords( ctx, GL_FALSE ); - r300ChooseRenderState(ctx); } void r300DestroySwtcl(GLcontext *ctx) @@ -692,6 +618,7 @@ void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset) rmesa->radeon.hw.max_state_size + (12*sizeof(int)), __FUNCTION__); radeonEmitState(&rmesa->radeon); + r300_emit_scissor(ctx); r300EmitVertexAOS(rmesa, rmesa->radeon.swtcl.vertex_size, rmesa->radeon.dma.current, diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.h b/src/mesa/drivers/dri/r300/r300_swtcl.h index ebc99c9e8a7..c271d265468 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.h +++ b/src/mesa/drivers/dri/r300/r300_swtcl.h @@ -39,24 +39,27 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "swrast/swrast.h" #include "r300_context.h" -#define MASK_XYZW (R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) -#define MASK_X R300_WRITE_ENA_X -#define MASK_Y R300_WRITE_ENA_Y -#define MASK_Z R300_WRITE_ENA_Z -#define MASK_W R300_WRITE_ENA_W - /* * Here are definitions of OVM locations of vertex attributes for non TCL hw */ #define SWTCL_OVM_POS 0 #define SWTCL_OVM_COLOR0 2 #define SWTCL_OVM_COLOR1 3 +#define SWTCL_OVM_COLOR2 4 +#define SWTCL_OVM_COLOR3 5 #define SWTCL_OVM_TEX(n) ((n) + 6) #define SWTCL_OVM_POINT_SIZE 15 +extern void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *InputsRead, GLuint *OutputsWritten); extern void r300InitSwtcl( GLcontext *ctx ); extern void r300DestroySwtcl( GLcontext *ctx ); +extern void r300RenderStart(GLcontext *ctx); +extern void r300RenderFinish(GLcontext *ctx); +extern void r300RenderPrimitive(GLcontext *ctx, GLenum prim); +extern void r300ResetLineStipple(GLcontext *ctx); + extern void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset); + #endif diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index cf4cad73d29..6d6a90aa88a 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -117,6 +117,7 @@ static const struct tx_table { _ASSIGN(INTENSITY_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, X, FL_I16)), _ASSIGN(Z16, R300_EASY_TX_FORMAT(X, X, X, X, X16)), _ASSIGN(Z24_S8, R300_EASY_TX_FORMAT(X, X, X, X, X24_Y8)), + _ASSIGN(S8_Z24, R300_EASY_TX_FORMAT(Y, Y, Y, Y, X24_Y8)), _ASSIGN(Z32, R300_EASY_TX_FORMAT(X, X, X, X, X32)), /* *INDENT-ON* */ }; @@ -431,7 +432,6 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo radeon_miptree_unreference(rImage->mt); rImage->mt = NULL; } - fprintf(stderr,"settexbuf %dx%d@%d %d targ %x format %x\n", rb->width, rb->height, rb->cpp, rb->pitch, target, format); _mesa_init_teximage_fields(radeon->glCtx, target, texImage, rb->width, rb->height, 1, 0, rb->cpp); texImage->RowStride = rb->pitch / rb->cpp; @@ -449,7 +449,10 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo pitch_val = rb->pitch; switch (rb->cpp) { case 4: - t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); + if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT) + t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); + else + t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); t->pp_txfilter |= tx_table[2].filter; pitch_val /= 4; break; diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index 50806575ced..c41a8fdd621 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -34,10 +34,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "shader/program.h" #include "shader/prog_instruction.h" #include "shader/prog_parameter.h" +#include "shader/prog_print.h" #include "shader/prog_statevars.h" #include "tnl/tnl.h" #include "r300_context.h" +#include "r300_state.h" /* TODO: Get rid of t_src_class call */ #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \ @@ -64,7 +66,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \ if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \ WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \ - vp->native = GL_FALSE; \ + vp->error = GL_TRUE; \ } \ u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \ } while (0) @@ -214,21 +216,8 @@ static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller) static unsigned long t_src_index(struct r300_vertex_program *vp, struct prog_src_register *src) { - int i; - int max_reg = -1; - if (src->File == PROGRAM_INPUT) { - if (vp->inputs[src->Index] != -1) - return vp->inputs[src->Index]; - - for (i = 0; i < VERT_ATTRIB_MAX; i++) - if (vp->inputs[i] > max_reg) - max_reg = vp->inputs[i]; - - vp->inputs[src->Index] = max_reg + 1; - - //vp_dump_inputs(vp, __FUNCTION__); - + assert(vp->inputs[src->Index] != -1); return vp->inputs[src->Index]; } else { if (src->Index < 0) { @@ -245,7 +234,7 @@ static unsigned long t_src_index(struct r300_vertex_program *vp, static unsigned long t_src(struct r300_vertex_program *vp, struct prog_src_register *src) { - /* src->NegateBase uses the NEGATE_ flags from program_instruction.h, + /* src->Negate uses the NEGATE_ flags from program_instruction.h, * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. */ return PVS_SRC_OPERAND(t_src_index(vp, src), @@ -254,13 +243,13 @@ static unsigned long t_src(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src->Swizzle, 2)), t_swizzle(GET_SWZ(src->Swizzle, 3)), t_src_class(src->File), - src->NegateBase) | (src->RelAddr << 4); + src->Negate) | (src->RelAddr << 4); } static unsigned long t_src_scalar(struct r300_vertex_program *vp, struct prog_src_register *src) { - /* src->NegateBase uses the NEGATE_ flags from program_instruction.h, + /* src->Negate uses the NEGATE_ flags from program_instruction.h, * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. */ return PVS_SRC_OPERAND(t_src_index(vp, src), @@ -269,8 +258,7 @@ static unsigned long t_src_scalar(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src->Swizzle, 0)), t_swizzle(GET_SWZ(src->Swizzle, 0)), t_src_class(src->File), - src-> - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4); } @@ -307,7 +295,7 @@ static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[0].Swizzle, 3)), t_src_class(src[0].File), (!src[0]. - NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[3] = 0; @@ -369,8 +357,7 @@ static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[0].Swizzle, 2)), SWIZZLE_ZERO, t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), @@ -378,8 +365,7 @@ static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[1].Swizzle, 1)), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO, t_src_class(src[1].File), - src[1]. - NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | + src[1].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[1].RelAddr << 4); inst[3] = __CONST(1, SWIZZLE_ZERO); @@ -422,8 +408,7 @@ static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[0].Swizzle, 2)), PVS_SRC_SELECT_FORCE_1, t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[2] = t_src(vp, &src[1]); inst[3] = __CONST(1, SWIZZLE_ZERO); @@ -519,7 +504,7 @@ static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program *vp, PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY, /* Not 100% sure about this */ (!src[0]. - NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE + Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE /*VSF_FLAG_ALL */ ); inst[3] = __CONST(0, SWIZZLE_ZERO); (*u_temp_i)--; @@ -564,8 +549,7 @@ static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[0].Swizzle, 0)), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[2] = __CONST(0, SWIZZLE_ZERO); inst[3] = __CONST(0, SWIZZLE_ZERO); @@ -592,24 +576,21 @@ static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program *vp, PVS_SRC_SELECT_FORCE_0, // Z t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W PVS_SRC_SELECT_FORCE_0, // Z t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X PVS_SRC_SELECT_FORCE_0, // Z t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); return inst; @@ -837,7 +818,7 @@ static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[1].Swizzle, 3)), t_src_class(src[1].File), (!src[1]. - NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); inst[3] = 0; #else @@ -857,7 +838,7 @@ static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[1].Swizzle, 3)), t_src_class(src[1].File), (!src[1]. - NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); #endif @@ -905,16 +886,14 @@ static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W t_src_class(src[1].File), - src[1]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); inst[3] = __CONST(1, SWIZZLE_ZERO); inst += 4; @@ -931,15 +910,14 @@ static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W t_src_class(src[1].File), (!src[1]. - NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[3] = PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, @@ -954,11 +932,17 @@ static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp, static void t_inputs_outputs(struct r300_vertex_program *vp) { int i; - int cur_reg = 0; + int cur_reg; - for (i = 0; i < VERT_ATTRIB_MAX; i++) - vp->inputs[i] = -1; + cur_reg = -1; + for (i = 0; i < VERT_ATTRIB_MAX; i++) { + if (vp->key.InputsRead & (1 << i)) + vp->inputs[i] = ++cur_reg; + else + vp->inputs[i] = -1; + } + cur_reg = 0; for (i = 0; i < VERT_RESULT_MAX; i++) vp->outputs[i] = -1; @@ -972,26 +956,36 @@ static void t_inputs_outputs(struct r300_vertex_program *vp) vp->outputs[VERT_RESULT_PSIZ] = cur_reg++; } + /* If we're writing back facing colors we need to send + * four colors to make front/back face colors selection work. + * If the vertex program doesn't write all 4 colors, lets + * pretend it does by skipping output index reg so the colors + * get written into appropriate output vectors. + */ if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) { vp->outputs[VERT_RESULT_COL0] = cur_reg++; + } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0) || + vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) { + cur_reg++; } if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) { - vp->outputs[VERT_RESULT_COL1] = - vp->outputs[VERT_RESULT_COL0] + 1; - cur_reg = vp->outputs[VERT_RESULT_COL1] + 1; + vp->outputs[VERT_RESULT_COL1] = cur_reg++; + } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0) || + vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) { + cur_reg++; } if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) { - vp->outputs[VERT_RESULT_BFC0] = - vp->outputs[VERT_RESULT_COL0] + 2; - cur_reg = vp->outputs[VERT_RESULT_BFC0] + 2; + vp->outputs[VERT_RESULT_BFC0] = cur_reg++; + } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) { + cur_reg++; } if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) { - vp->outputs[VERT_RESULT_BFC1] = - vp->outputs[VERT_RESULT_COL0] + 3; - cur_reg = vp->outputs[VERT_RESULT_BFC1] + 1; + vp->outputs[VERT_RESULT_BFC1] = cur_reg++; + } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) { + cur_reg++; } for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) { @@ -1018,14 +1012,13 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp, struct prog_src_register src[3]; vp->pos_end = 0; /* Not supported yet */ - vp->program.length = 0; - /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */ + vp->hw_code.length = 0; vp->translated = GL_TRUE; - vp->native = GL_TRUE; + vp->error = GL_FALSE; t_inputs_outputs(vp); - for (inst = vp->program.body.i; vpi->Opcode != OPCODE_END; + for (inst = vp->hw_code.body.d; vpi->Opcode != OPCODE_END; vpi++, inst += 4) { FREE_TEMPS(); @@ -1187,38 +1180,15 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp, &u_temp_i); break; default: - assert(0); + vp->error = GL_TRUE; break; } } - /* Some outputs may be artificially added, to match the inputs - of the fragment program. Blank the outputs here. */ - for (i = 0; i < VERT_RESULT_MAX; i++) { - if (vp->key.OutputsAdded & (1 << i)) { - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - vp->outputs[i], - VSF_FLAG_ALL, - PVS_DST_REG_OUT); - inst[1] = __CONST(0, SWIZZLE_ZERO); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - inst += 4; - } + vp->hw_code.length = (inst - vp->hw_code.body.d); + if (vp->hw_code.length >= VSF_MAX_FRAGMENT_LENGTH) { + vp->error = GL_TRUE; } - - vp->program.length = (inst - vp->program.body.i); - if (vp->program.length >= VSF_MAX_FRAGMENT_LENGTH) { - vp->program.length = 0; - vp->native = GL_FALSE; - } -#if 0 - fprintf(stderr, "hw program:\n"); - for (i = 0; i < vp->program.length; i++) - fprintf(stderr, "%08x\n", vp->program.body.d[i]); -#endif } /* DP4 version seems to trigger some hw peculiarity */ @@ -1397,6 +1367,49 @@ static struct r300_vertex_program *build_program(struct r300_vertex_program_key pos_as_texcoord(vp, &mesa_vp->Base); } + if (RADEON_DEBUG & DEBUG_VERTS) { + fprintf(stderr, "Vertex program after native rewrite:\n"); + _mesa_print_program(&mesa_vp->Base); + fflush(stdout); + } + + /* Some outputs may be artificially added, to match the inputs of the fragment program. + * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by + * vertex program are undefined, so just use MOV [vertex_result], CONST[0] + */ + { + int i, count = 0; + for (i = 0; i < VERT_RESULT_MAX; ++i) { + if (vp->key.OutputsAdded & (1 << i)) { + ++count; + } + } + + if (count > 0) { + struct prog_instruction *inst; + + _mesa_insert_instructions(&mesa_vp->Base, mesa_vp->Base.NumInstructions - 1, count); + inst = &mesa_vp->Base.Instructions[mesa_vp->Base.NumInstructions - 1 - count]; + + for (i = 0; i < VERT_RESULT_MAX; ++i) { + if (vp->key.OutputsAdded & (1 << i)) { + inst->Opcode = OPCODE_MOV; + + inst->DstReg.File = PROGRAM_OUTPUT; + inst->DstReg.Index = i; + inst->DstReg.WriteMask = WRITEMASK_XYZW; + inst->DstReg.CondMask = COND_TR; + + inst->SrcReg[0].File = PROGRAM_CONSTANT; + inst->SrcReg[0].Index = 0; + inst->SrcReg[0].Swizzle = SWIZZLE_XYZW; + + ++inst; + } + } + } + } + assert(mesa_vp->Base.NumInstructions); vp->num_temporaries = mesa_vp->Base.NumTemporaries; r300TranslateVertexShader(vp, mesa_vp->Base.Instructions); @@ -1443,7 +1456,12 @@ void r300SelectVertexShader(r300ContextPtr r300) wpos_idx = i; } - add_outputs(&wanted_key, VERT_RESULT_HPOS); + if (vpc->mesa_program.IsPositionInvariant) { + wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS); + wanted_key.OutputsWritten |= (1 << VERT_RESULT_HPOS); + } else { + add_outputs(&wanted_key, VERT_RESULT_HPOS); + } if (InputsRead & FRAG_BIT_COL0) { add_outputs(&wanted_key, VERT_RESULT_COL0); @@ -1453,27 +1471,103 @@ void r300SelectVertexShader(r300ContextPtr r300) add_outputs(&wanted_key, VERT_RESULT_COL1); } + if (InputsRead & FRAG_BIT_FOGC) { + add_outputs(&wanted_key, VERT_RESULT_FOGC); + } + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { if (InputsRead & (FRAG_BIT_TEX0 << i)) { add_outputs(&wanted_key, VERT_RESULT_TEX0 + i); } } - if (vpc->mesa_program.IsPositionInvariant) { - /* we wan't position don't we ? */ - wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS); - } - for (vp = vpc->progs; vp; vp = vp->next) if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) == 0) { r300->selected_vp = vp; return; } - //_mesa_print_program(&vpc->mesa_program.Base); + + if (RADEON_DEBUG & DEBUG_VERTS) { + fprintf(stderr, "Initial vertex program:\n"); + _mesa_print_program(&vpc->mesa_program.Base); + fflush(stdout); + } vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx); vp->next = vpc->progs; vpc->progs = vp; r300->selected_vp = vp; } + +#define bump_vpu_count(ptr, new_count) do { \ + drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \ + int _nc=(new_count)/4; \ + assert(_nc < 256); \ + if(_nc>_p->vpu.count)_p->vpu.count=_nc; \ + } while(0) + +static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_shader_hw_code *code) +{ + int i; + + assert((code->length > 0) && (code->length % 4 == 0)); + + switch ((dest >> 8) & 0xf) { + case 0: + R300_STATECHANGE(r300, vpi); + for (i = 0; i < code->length; i++) + r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]); + bump_vpu_count(r300->hw.vpi.cmd, code->length + 4 * (dest & 0xff)); + break; + case 2: + R300_STATECHANGE(r300, vpp); + for (i = 0; i < code->length; i++) + r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]); + bump_vpu_count(r300->hw.vpp.cmd, code->length + 4 * (dest & 0xff)); + break; + case 4: + R300_STATECHANGE(r300, vps); + for (i = 0; i < code->length; i++) + r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (code->body.d[i]); + bump_vpu_count(r300->hw.vps.cmd, code->length + 4 * (dest & 0xff)); + break; + default: + fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest); + _mesa_exit(-1); + } +} + +void r300SetupVertexProgram(r300ContextPtr rmesa) +{ + GLcontext *ctx = rmesa->radeon.glCtx; + struct r300_vertex_program *prog = rmesa->selected_vp; + int inst_count = 0; + int param_count = 0; + + /* Reset state, in case we don't use something */ + ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0; + ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0; + ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0; + + R300_STATECHANGE(rmesa, vpp); + param_count = r300VertexProgUpdateParams(ctx, + (struct r300_vertex_program_cont *) + ctx->VertexProgram._Current, + (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]); + bump_vpu_count(rmesa->hw.vpp.cmd, param_count); + param_count /= 4; + + r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->hw_code)); + inst_count = (prog->hw_code.length / 4) - 1; + + r300VapCntl(rmesa, _mesa_bitcount(prog->key.InputsRead), + _mesa_bitcount(prog->key.OutputsWritten), prog->num_temporaries); + + R300_STATECHANGE(rmesa, pvs); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) | + (inst_count << R300_PVS_LAST_INST_SHIFT); + + rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT); +} diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.h b/src/mesa/drivers/dri/r300/r300_vertprog.h index 2f35f02bc84..b552e3fb1bd 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.h +++ b/src/mesa/drivers/dri/r300/r300_vertprog.h @@ -32,4 +32,6 @@ #endif +void r300SetupVertexProgram(r300ContextPtr rmesa); + #endif diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 07a2a7b17ce..4d58cf21622 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -27,10 +27,6 @@ #include "r500_fragprog.h" -#include "radeon_nqssadce.h" -#include "radeon_program_alu.h" - - static void reset_srcreg(struct prog_src_register* reg) { _mesa_bzero(reg, sizeof(*reg)); @@ -58,12 +54,12 @@ static struct prog_src_register shadow_ambient(struct gl_program *program, int t * - introduce a temporary register when write masks are needed * */ -static GLboolean transform_TEX( +GLboolean r500_transform_TEX( struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data) { - struct r500_fragment_program_compiler *compiler = - (struct r500_fragment_program_compiler*)data; + struct r300_fragment_program_compiler *compiler = + (struct r300_fragment_program_compiler*)data; struct prog_instruction inst = *orig_inst; struct prog_instruction* tgt; GLboolean destredirect = GL_FALSE; @@ -156,9 +152,9 @@ static GLboolean transform_TEX( * r < tex <=> -tex+r < 0 * r >= tex <=> not (-tex+r < 0 */ if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) - tgt[1].SrcReg[2].NegateBase = tgt[0].SrcReg[2].NegateBase ^ NEGATE_XYZW; + tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW; else - tgt[1].SrcReg[0].NegateBase = tgt[0].SrcReg[0].NegateBase ^ NEGATE_XYZW; + tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW; tgt[2].Opcode = OPCODE_CMP; tgt[2].DstReg = orig_inst->DstReg; @@ -188,121 +184,7 @@ static GLboolean transform_TEX( return GL_TRUE; } - -static void update_params(r300ContextPtr r300, struct r500_fragment_program *fp) -{ - struct gl_fragment_program *mp = &fp->mesa_program; - - /* Ask Mesa nicely to fill in ParameterValues for us */ - if (mp->Base.Parameters) - _mesa_load_state_parameters(r300->radeon.glCtx, mp->Base.Parameters); -} - - -/** - * Transform the program to support fragment.position. - * - * Introduce a small fragment at the start of the program that will be - * the only code that directly reads the FRAG_ATTRIB_WPOS input. - * All other code pieces that reference that input will be rewritten - * to read from a newly allocated temporary. - * - * \todo if/when r5xx supports the radeon_program architecture, this is a - * likely candidate for code sharing. - */ -static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler) -{ - GLuint InputsRead = compiler->fp->mesa_program.Base.InputsRead; - - if (!(InputsRead & FRAG_BIT_WPOS)) - return; - - static gl_state_index tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 - }; - struct prog_instruction *fpi; - GLuint window_index; - int i = 0; - GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY); - - _mesa_insert_instructions(compiler->program, 0, 3); - fpi = compiler->program->Instructions; - - /* perspective divide */ - fpi[i].Opcode = OPCODE_RCP; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_W; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; - fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; - i++; - - fpi[i].Opcode = OPCODE_MUL; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; - fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - - fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[1].Index = tempregi; - fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; - i++; - - /* viewport transformation */ - window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens); - - fpi[i].Opcode = OPCODE_MAD; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[0].Index = tempregi; - fpi[i].SrcReg[0].Swizzle = - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; - fpi[i].SrcReg[1].Index = window_index; - fpi[i].SrcReg[1].Swizzle = - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; - fpi[i].SrcReg[2].Index = window_index; - fpi[i].SrcReg[2].Swizzle = - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - i++; - - for (; i < compiler->program->NumInstructions; ++i) { - int reg; - for (reg = 0; reg < 3; reg++) { - if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT && - fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) { - fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[reg].Index = tempregi; - } - } - } -} - - -static void nqssadce_init(struct nqssadce_state* s) -{ - s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; - s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; -} - -static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg) +GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) { GLuint relevant; int i; @@ -314,33 +196,30 @@ static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg) if (reg.Abs) return GL_FALSE; - if (reg.NegateAbs) - reg.NegateBase ^= 15; + if (opcode == OPCODE_KIL && (reg.Swizzle != SWIZZLE_NOOP || reg.Negate != NEGATE_NONE)) + return GL_FALSE; - if (opcode == OPCODE_KIL) { - if (reg.Swizzle != SWIZZLE_NOOP) - return GL_FALSE; - } else { - for(i = 0; i < 4; ++i) { - GLuint swz = GET_SWZ(reg.Swizzle, i); - if (swz == SWIZZLE_NIL) { - reg.NegateBase &= ~(1 << i); - continue; - } - if (swz >= 4) - return GL_FALSE; + if (reg.Negate) + reg.Negate ^= NEGATE_XYZW; + + for(i = 0; i < 4; ++i) { + GLuint swz = GET_SWZ(reg.Swizzle, i); + if (swz == SWIZZLE_NIL) { + reg.Negate &= ~(1 << i); + continue; } + if (swz >= 4) + return GL_FALSE; } - if (reg.NegateBase) + if (reg.Negate) return GL_FALSE; return GL_TRUE; } else if (opcode == OPCODE_DDX || opcode == OPCODE_DDY) { /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles; * if it doesn't fit perfectly into a .xyzw case... */ - if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs - && !reg.NegateBase && !reg.NegateAbs) + if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs && !reg.Negate) return GL_TRUE; return GL_FALSE; @@ -355,7 +234,7 @@ static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg) if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO) relevant |= 1 << i; } - if ((reg.NegateBase & relevant) && ((reg.NegateBase & relevant) != relevant)) + if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) return GL_FALSE; return GL_TRUE; @@ -368,8 +247,7 @@ static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg) * The only thing we *cannot* do in an ALU instruction is per-component * negation. Therefore, we split the MOV into two instructions when necessary. */ -static void nqssadce_build_swizzle(struct nqssadce_state *s, - struct prog_dst_register dst, struct prog_src_register src) +void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) { struct prog_instruction *inst; GLuint negatebase[2] = { 0, 0 }; @@ -379,7 +257,7 @@ static void nqssadce_build_swizzle(struct nqssadce_state *s, GLuint swz = GET_SWZ(src.Swizzle, i); if (swz == SWIZZLE_NIL) continue; - negatebase[GET_BIT(src.NegateBase, i)] |= 1 << i; + negatebase[GET_BIT(src.Negate, i)] |= 1 << i; } _mesa_insert_instructions(s->Program, s->IP, (negatebase[0] ? 1 : 0) + (negatebase[1] ? 1 : 0)); @@ -393,129 +271,12 @@ static void nqssadce_build_swizzle(struct nqssadce_state *s, inst->DstReg = dst; inst->DstReg.WriteMask = negatebase[i]; inst->SrcReg[0] = src; + inst->SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW; inst++; s->IP++; } } -static GLuint build_dtm(GLuint depthmode) -{ - switch(depthmode) { - default: - case GL_LUMINANCE: return 0; - case GL_INTENSITY: return 1; - case GL_ALPHA: return 2; - } -} - -static GLuint build_func(GLuint comparefunc) -{ - return comparefunc - GL_NEVER; -} - - -/** - * Collect all external state that is relevant for compiling the given - * fragment program. - */ -static void build_state( - r300ContextPtr r300, - struct r500_fragment_program *fp, - struct r500_fragment_program_external_state *state) -{ - int unit; - - _mesa_bzero(state, sizeof(*state)); - - for(unit = 0; unit < 16; ++unit) { - if (fp->mesa_program.Base.ShadowSamplers & (1 << unit)) { - struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; - - state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); - state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); - } - } -} - -static void dump_program(struct r500_fragment_program_code *code); - -void r500TranslateFragmentShader(r300ContextPtr r300, - struct r500_fragment_program *fp) -{ - struct r500_fragment_program_external_state state; - - build_state(r300, fp, &state); - if (_mesa_memcmp(&fp->state, &state, sizeof(state))) { - /* TODO: cache compiled programs */ - fp->translated = GL_FALSE; - _mesa_memcpy(&fp->state, &state, sizeof(state)); - } - - if (!fp->translated) { - struct r500_fragment_program_compiler compiler; - - compiler.r300 = r300; - compiler.fp = fp; - compiler.code = &fp->code; - compiler.program = _mesa_clone_program(r300->radeon.glCtx, &fp->mesa_program.Base); - - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Compiler: Initial program:\n"); - _mesa_print_program(compiler.program); - } - - insert_WPOS_trailer(&compiler); - - struct radeon_program_transformation transformations[] = { - { &transform_TEX, &compiler }, - { &radeonTransformALU, 0 }, - { &radeonTransformDeriv, 0 }, - { &radeonTransformTrigScale, 0 } - }; - radeonLocalTransform(r300->radeon.glCtx, compiler.program, - 4, transformations); - - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Compiler: after native rewrite:\n"); - _mesa_print_program(compiler.program); - } - - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadce_init, - .IsNativeSwizzle = &is_native_swizzle, - .BuildSwizzle = &nqssadce_build_swizzle, - .RewriteDepthOut = GL_TRUE - }; - radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce); - - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Compiler: after NqSSA-DCE:\n"); - _mesa_print_program(compiler.program); - } - - fp->translated = r500FragmentProgramEmit(&compiler); - - /* Subtle: Rescue any parameters that have been added during transformations */ - _mesa_free_parameter_list(fp->mesa_program.Base.Parameters); - fp->mesa_program.Base.Parameters = compiler.program->Parameters; - compiler.program->Parameters = 0; - - _mesa_reference_program(r300->radeon.glCtx, &compiler.program, 0); - - r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); - - if (RADEON_DEBUG & DEBUG_PIXEL) { - if (fp->translated) { - _mesa_printf("Machine-readable code:\n"); - dump_program(&fp->code); - } - } - - } - - update_params(r300, fp); - -} static char *toswiz(int swiz_val) { switch(swiz_val) { @@ -614,9 +375,9 @@ static char *to_texop(int val) return NULL; } -static void dump_program(struct r500_fragment_program_code *code) +void r500FragmentProgramDump(union rX00_fragment_program_code *c) { - + struct r500_fragment_program_code *code = &c->r500; fprintf(stderr, "R500 Fragment Program:\n--------\n"); int n; diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.h b/src/mesa/drivers/dri/r300/r500_fragprog.h index 1e45538f807..1179bf66073 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/r500_fragprog.h @@ -33,30 +33,20 @@ #ifndef __R500_FRAGPROG_H_ #define __R500_FRAGPROG_H_ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" #include "shader/prog_parameter.h" -#include "shader/prog_print.h" -#include "shader/program.h" #include "shader/prog_instruction.h" #include "r300_context.h" -#include "r300_state.h" -#include "radeon_program.h" +#include "radeon_nqssadce.h" -struct r500_fragment_program; +extern GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); -extern void r500TranslateFragmentShader(r300ContextPtr r300, - struct r500_fragment_program *fp); +extern void r500FragmentProgramDump(union rX00_fragment_program_code *c); -struct r500_fragment_program_compiler { - r300ContextPtr r300; - struct r500_fragment_program *fp; - struct r500_fragment_program_code *code; - struct gl_program *program; -}; +extern GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg); -extern GLboolean r500FragmentProgramEmit(struct r500_fragment_program_compiler *compiler); +extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src); + +extern GLboolean r500_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data); #endif diff --git a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c index 4631235f0d3..30f4514897e 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c @@ -49,8 +49,8 @@ #define PROG_CODE \ - struct r500_fragment_program_compiler *c = (struct r500_fragment_program_compiler*)data; \ - struct r500_fragment_program_code *code = c->code + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ + struct r500_fragment_program_code *code = &c->code->r500 #define error(fmt, args...) do { \ fprintf(stderr, "%s::%s(): " fmt "\n", \ @@ -72,7 +72,7 @@ static GLboolean emit_const(void *data, GLuint file, GLuint idx, GLuint *hwindex } if (*hwindex >= code->const_nr) { - if (*hwindex >= PFS_NUM_CONST_REGS) { + if (*hwindex >= R500_PFS_NUM_CONST_REGS) { error("Out of hw constants!\n"); return GL_FALSE; } @@ -299,9 +299,9 @@ static const struct radeon_pair_handler pair_handler = { .MaxHwTemps = 128 }; -GLboolean r500FragmentProgramEmit(struct r500_fragment_program_compiler *compiler) +GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) { - struct r500_fragment_program_code *code = compiler->code; + struct r500_fragment_program_code *code = &compiler->code->r500; _mesa_bzero(code, sizeof(*code)); code->max_temp_idx = 1; diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/radeon_nqssadce.c index a083c3d2436..4a2e1cba402 100644 --- a/src/mesa/drivers/dri/r300/radeon_nqssadce.c +++ b/src/mesa/drivers/dri/r300/radeon_nqssadce.c @@ -61,12 +61,12 @@ static struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_reg struct prog_src_register tmp = srcreg; int i; tmp.Swizzle = 0; - tmp.NegateBase = 0; + tmp.Negate = NEGATE_NONE; for(i = 0; i < 4; ++i) { GLuint swz = GET_SWZ(swizzle, i); if (swz < 4) { tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); - tmp.NegateBase |= GET_BIT(srcreg.NegateBase, swz) << i; + tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; } else { tmp.Swizzle |= swz << (i*3); } @@ -103,9 +103,8 @@ static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s, inst->SrcReg[src].File = PROGRAM_TEMPORARY; inst->SrcReg[src].Index = dstreg.Index; inst->SrcReg[src].Swizzle = 0; - inst->SrcReg[src].NegateBase = 0; + inst->SrcReg[src].Negate = NEGATE_NONE; inst->SrcReg[src].Abs = 0; - inst->SrcReg[src].NegateAbs = 0; for(i = 0; i < 4; ++i) { if (GET_BIT(sourced, i)) inst->SrcReg[src].Swizzle |= i << (3*i); diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.c b/src/mesa/drivers/dri/r300/radeon_program_alu.c index 1ef71e74dcf..8283723bad7 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/radeon_program_alu.c @@ -81,18 +81,6 @@ static struct prog_instruction *emit3(struct gl_program* p, return fpi; } -static void set_swizzle(struct prog_src_register *SrcReg, int coordinate, int swz) -{ - SrcReg->Swizzle &= ~(7 << (3*coordinate)); - SrcReg->Swizzle |= swz << (3*coordinate); -} - -static void set_negate_base(struct prog_src_register *SrcReg, int coordinate, int negate) -{ - SrcReg->NegateBase &= ~(1 << coordinate); - SrcReg->NegateBase |= (negate << coordinate); -} - static struct prog_dst_register dstreg(int file, int index) { struct prog_dst_register dst; @@ -156,15 +144,14 @@ static struct prog_src_register absolute(struct prog_src_register reg) { struct prog_src_register newreg = reg; newreg.Abs = 1; - newreg.NegateBase = 0; - newreg.NegateAbs = 0; + newreg.Negate = NEGATE_NONE; return newreg; } static struct prog_src_register negate(struct prog_src_register reg) { struct prog_src_register newreg = reg; - newreg.NegateAbs = !newreg.NegateAbs; + newreg.Negate = newreg.Negate ^ NEGATE_XYZW; return newreg; } @@ -189,8 +176,7 @@ static void transform_ABS(struct radeon_transform_context* t, { struct prog_src_register src = inst->SrcReg[0]; src.Abs = 1; - src.NegateBase = 0; - src.NegateAbs = 0; + src.Negate = NEGATE_NONE; emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, src); } @@ -198,18 +184,9 @@ static void transform_DPH(struct radeon_transform_context* t, struct prog_instruction* inst) { struct prog_src_register src0 = inst->SrcReg[0]; - if (src0.NegateAbs) { - if (src0.Abs) { - int tempreg = radeonFindFreeTemporary(t); - emit1(t->Program, OPCODE_MOV, 0, dstreg(PROGRAM_TEMPORARY, tempreg), src0); - src0 = srcreg(src0.File, src0.Index); - } else { - src0.NegateAbs = 0; - src0.NegateBase ^= NEGATE_XYZW; - } - } - set_swizzle(&src0, 3, SWIZZLE_ONE); - set_negate_base(&src0, 3, 0); + src0.Negate &= ~NEGATE_W; + src0.Swizzle &= ~(7 << (3 * 3)); + src0.Swizzle |= SWIZZLE_ONE << (3 * 3); emit2(t->Program, OPCODE_DP4, inst->SaturateMode, inst->DstReg, src0, inst->SrcReg[1]); } @@ -649,7 +626,7 @@ GLboolean radeonTransformDeriv(struct radeon_transform_context* t, B.Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE); - B.NegateBase = NEGATE_XYZW; + B.Negate = NEGATE_XYZW; emit2(t->Program, inst->Opcode, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], B); diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/radeon_program_pair.c index 49aa90dd94a..906d36e5226 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/radeon_program_pair.c @@ -255,8 +255,7 @@ static void final_rewrite(struct pair_state *s, struct prog_instruction *inst) inst->SrcReg[2] = inst->SrcReg[1]; inst->SrcReg[1].File = PROGRAM_BUILTIN; inst->SrcReg[1].Swizzle = SWIZZLE_1111; - inst->SrcReg[1].NegateBase = 0; - inst->SrcReg[1].NegateAbs = 0; + inst->SrcReg[1].Negate = NEGATE_NONE; inst->Opcode = OPCODE_MAD; break; case OPCODE_CMP: @@ -610,6 +609,7 @@ static void emit_all_tex(struct pair_state *s) if (s->Debug) { _mesa_printf(" "); _mesa_print_instruction(inst); + fflush(stdout); } s->Error = s->Error || !s->Handler->EmitTex(s->UserData, inst); } @@ -722,7 +722,6 @@ static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_ if (pairinst->NeedRGB && !pairinst->IsTranscendent) { GLboolean srcrgb = GL_FALSE; GLboolean srcalpha = GL_FALSE; - GLuint negatebase = 0; int j; for(j = 0; j < 3; ++j) { GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); @@ -730,8 +729,6 @@ static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_ srcrgb = GL_TRUE; else if (swz < 4) srcalpha = GL_TRUE; - if (swz != SWIZZLE_NIL && GET_BIT(inst->SrcReg[i].NegateBase, j)) - negatebase = 1; } source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); if (source < 0) @@ -739,12 +736,11 @@ static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_ pair->RGB.Arg[i].Source = source; pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff; pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; - pair->RGB.Arg[i].Negate = (negatebase & ~pair->RGB.Arg[i].Abs) ^ inst->SrcReg[i].NegateAbs; + pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (NEGATE_X | NEGATE_Y | NEGATE_Z)); } if (pairinst->NeedAlpha) { GLboolean srcrgb = GL_FALSE; GLboolean srcalpha = GL_FALSE; - GLuint negatebase = GET_BIT(inst->SrcReg[i].NegateBase, pairinst->IsTranscendent ? 0 : 3); GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, pairinst->IsTranscendent ? 0 : 3); if (swz < 3) srcrgb = GL_TRUE; @@ -756,7 +752,7 @@ static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_ pair->Alpha.Arg[i].Source = source; pair->Alpha.Arg[i].Swizzle = swz; pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; - pair->Alpha.Arg[i].Negate = (negatebase & ~pair->RGB.Arg[i].Abs) ^ inst->SrcReg[i].NegateAbs; + pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & NEGATE_W); } } diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 9f1d1a3a448..4666518824f 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -1057,10 +1057,10 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm, pAsm->S[fld].src.swizzlez = (pILInst->SrcReg[src].Swizzle >> 6) & 0x7; pAsm->S[fld].src.swizzlew = (pILInst->SrcReg[src].Swizzle >> 9) & 0x7; - pAsm->S[fld].src.negx = pILInst->SrcReg[src].NegateBase & 0x1; - pAsm->S[fld].src.negy = (pILInst->SrcReg[src].NegateBase >> 1) & 0x1; - pAsm->S[fld].src.negz = (pILInst->SrcReg[src].NegateBase >> 2) & 0x1; - pAsm->S[fld].src.negw = (pILInst->SrcReg[src].NegateBase >> 3) & 0x1; + pAsm->S[fld].src.negx = pILInst->SrcReg[src].Negate & 0x1; + pAsm->S[fld].src.negy = (pILInst->SrcReg[src].Negate >> 1) & 0x1; + pAsm->S[fld].src.negz = (pILInst->SrcReg[src].Negate >> 2) & 0x1; + pAsm->S[fld].src.negw = (pILInst->SrcReg[src].Negate >> 3) & 0x1; return GL_TRUE; } @@ -1197,10 +1197,10 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) pAsm->S[0].src.swizzlez = (pILInst->SrcReg[0].Swizzle >> 6) & 0x7; pAsm->S[0].src.swizzlew = (pILInst->SrcReg[0].Swizzle >> 9) & 0x7; - pAsm->S[0].src.negx = pILInst->SrcReg[0].NegateBase & 0x1; - pAsm->S[0].src.negy = (pILInst->SrcReg[0].NegateBase >> 1) & 0x1; - pAsm->S[0].src.negz = (pILInst->SrcReg[0].NegateBase >> 2) & 0x1; - pAsm->S[0].src.negw = (pILInst->SrcReg[0].NegateBase >> 3) & 0x1; + pAsm->S[0].src.negx = pILInst->SrcReg[0].Negate & 0x1; + pAsm->S[0].src.negy = (pILInst->SrcReg[0].Negate >> 1) & 0x1; + pAsm->S[0].src.negz = (pILInst->SrcReg[0].Negate >> 2) & 0x1; + pAsm->S[0].src.negw = (pILInst->SrcReg[0].Negate >> 3) & 0x1; return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r700_clear.c b/src/mesa/drivers/dri/r600/r700_clear.c index b62aa66eb29..f142aafd03e 100644 --- a/src/mesa/drivers/dri/r600/r700_clear.c +++ b/src/mesa/drivers/dri/r600/r700_clear.c @@ -57,7 +57,7 @@ static void r700UserClear(GLcontext *ctx, GLuint mask) void r700Clear(GLcontext * ctx, GLbitfield mask) { context_t *context = R700_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = context->radeon.dri.drawable; + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&context->radeon); const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask); GLbitfield swrast_mask = 0, tri_mask = 0; int i; diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c index 404e946f7ca..329a4aa179a 100644 --- a/src/mesa/drivers/dri/r600/r700_state.c +++ b/src/mesa/drivers/dri/r600/r700_state.c @@ -527,7 +527,7 @@ static void r700UpdateWindow(GLcontext * ctx, int id) //-------------------- context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); - __DRIdrawablePrivate *dPriv = context->radeon.dri.drawable; + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&context->radeon); GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0; GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0; const GLfloat *v = ctx->Viewport._WindowMap.m; diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c index 670fa5b6047..0b0a2aa2c46 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c +++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c @@ -48,6 +48,7 @@ #include "radeon_drm.h" #include "radeon_common.h" #include "radeon_bocs_wrapper.h" +#include "radeon_macros.h" /* no seriously texmem.c is this screwed up */ struct bo_legacy_texture_object { @@ -167,6 +168,7 @@ static int legacy_free_handle(struct bo_manager_legacy *bom, uint32_t handle) static void legacy_get_current_age(struct bo_manager_legacy *boml) { drm_radeon_getparam_t gp; + unsigned char *RADEONMMIO = NULL; int r; if (IS_R300_CLASS(boml->screen)) { @@ -178,8 +180,11 @@ static void legacy_get_current_age(struct bo_manager_legacy *boml) fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, r); exit(1); } - } else - boml->current_age = boml->screen->scratch[3]; + } else { + RADEONMMIO = boml->screen->mmio.map; + boml->current_age = boml->screen->scratch[3]; + boml->current_age = INREG(RADEON_GUI_SCRATCH_REG3); + } } static int legacy_is_pending(struct radeon_bo *bo) diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index 691615938f5..60d6bbb5af9 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -62,6 +62,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/bufferobj.h" #include "main/buffers.h" #include "main/depth.h" +#include "main/polygon.h" #include "main/shaders.h" #include "main/texstate.h" #include "main/varray.h" @@ -152,7 +153,7 @@ void radeon_get_cliprects(radeonContextPtr radeon, unsigned int *num_cliprects, int *x_off, int *y_off) { - __DRIdrawablePrivate *dPriv = radeon->dri.drawable; + __DRIdrawablePrivate *dPriv = radeon_get_drawable(radeon); struct radeon_framebuffer *rfb = dPriv->driverPrivate; if (radeon->constant_cliprect) { @@ -184,15 +185,15 @@ void radeon_get_cliprects(radeonContextPtr radeon, */ void radeonSetCliprects(radeonContextPtr radeon) { - __DRIdrawablePrivate *const drawable = radeon->dri.drawable; - __DRIdrawablePrivate *const readable = radeon->dri.readable; + __DRIdrawablePrivate *const drawable = radeon_get_drawable(radeon); + __DRIdrawablePrivate *const readable = radeon_get_readable(radeon); struct radeon_framebuffer *const draw_rfb = drawable->driverPrivate; struct radeon_framebuffer *const read_rfb = readable->driverPrivate; int x_off, y_off; radeon_get_cliprects(radeon, &radeon->pClipRects, &radeon->numClipRects, &x_off, &y_off); - + if ((draw_rfb->base.Width != drawable->w) || (draw_rfb->base.Height != drawable->h)) { _mesa_resize_framebuffer(radeon->glCtx, &draw_rfb->base, @@ -220,9 +221,9 @@ void radeonUpdateScissor( GLcontext *ctx ) { radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - if ( rmesa->dri.drawable ) { - __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; - + if ( radeon_get_drawable(rmesa) ) { + __DRIdrawablePrivate *dPriv = radeon_get_drawable(rmesa); + int x = ctx->Scissor.X; int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height; int w = ctx->Scissor.X + ctx->Scissor.Width - 1; @@ -424,11 +425,11 @@ void radeonCopyBuffer( __DRIdrawablePrivate *dPriv, radeonContextPtr rmesa; struct radeon_framebuffer *rfb; GLint nbox, i, ret; - + assert(dPriv); assert(dPriv->driContextPriv); assert(dPriv->driContextPriv->driverPrivate); - + rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; LOCK_HARDWARE(rmesa); @@ -505,7 +506,7 @@ static int radeonScheduleSwap(__DRIdrawablePrivate *dPriv, GLboolean *missed_tar UNLOCK_HARDWARE(rmesa); driWaitForVBlank(dPriv, missed_target); - + return 0; } @@ -539,7 +540,7 @@ static GLboolean radeonPageFlip( __DRIdrawablePrivate *dPriv ) radeon->sarea->nbox = 1; ret = drmCommandNone( radeon->dri.fd, DRM_RADEON_FLIP ); - + UNLOCK_HARDWARE(radeon); if ( ret ) { @@ -637,7 +638,7 @@ void radeon_draw_buffer(GLcontext *ctx, struct gl_framebuffer *fb) struct radeon_renderbuffer *rrbDepth = NULL, *rrbStencil = NULL, *rrbColor = NULL; uint32_t offset = 0; - + if (!fb) { /* this can happen during the initial context initialization */ @@ -649,7 +650,7 @@ void radeon_draw_buffer(GLcontext *ctx, struct gl_framebuffer *fb) radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE); return; } - + /* Do this here, note core Mesa, since this function is called from * many places within the driver. */ @@ -677,6 +678,7 @@ void radeon_draw_buffer(GLcontext *ctx, struct gl_framebuffer *fb) if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) { rrbColor = radeon_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer); radeon->front_cliprects = GL_TRUE; + radeon->front_buffer_dirty = GL_TRUE; } else { rrbColor = radeon_renderbuffer(fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer); radeon->front_cliprects = GL_FALSE; @@ -735,21 +737,23 @@ void radeon_draw_buffer(GLcontext *ctx, struct gl_framebuffer *fb) ctx->Driver.FrontFace(ctx, ctx->Polygon.FrontFace); else ctx->NewState |= _NEW_POLYGON; - + /* * Update depth test state */ if (ctx->Driver.Enable) { ctx->Driver.Enable(ctx, GL_DEPTH_TEST, (ctx->Depth.Test && fb->Visual.depthBits > 0)); + /* Need to update the derived ctx->Stencil._Enabled first */ + _mesa_update_stencil(ctx); ctx->Driver.Enable(ctx, GL_STENCIL_TEST, (ctx->Stencil._Enabled && fb->Visual.stencilBits > 0)); } else { ctx->NewState |= (_NEW_DEPTH | _NEW_STENCIL); } - - radeon->state.depth.rrb = rrbDepth; - radeon->state.color.rrb = rrbColor; + + _mesa_reference_renderbuffer(&radeon->state.depth.rb, &rrbDepth->base); + _mesa_reference_renderbuffer(&radeon->state.color.rb, &rrbColor->base); radeon->state.color.draw_offset = offset; #if 0 @@ -758,16 +762,14 @@ void radeon_draw_buffer(GLcontext *ctx, struct gl_framebuffer *fb) ctx->Driver.Viewport(ctx, ctx->Viewport.X, ctx->Viewport.Y, ctx->Viewport.Width, ctx->Viewport.Height); } else { - + } #endif ctx->NewState |= _NEW_VIEWPORT; /* Set state we know depends on drawable parameters: */ - if (ctx->Driver.Scissor) - ctx->Driver.Scissor(ctx, ctx->Scissor.X, ctx->Scissor.Y, - ctx->Scissor.Width, ctx->Scissor.Height); + radeonUpdateScissor(ctx); radeon->NewGLState |= _NEW_SCISSOR; if (ctx->Driver.DepthRange) @@ -792,7 +794,25 @@ void radeonDrawBuffer( GLcontext *ctx, GLenum mode ) if (RADEON_DEBUG & DEBUG_DRI) fprintf(stderr, "%s %s\n", __FUNCTION__, _mesa_lookup_enum_by_nr( mode )); - + + if (ctx->DrawBuffer->Name == 0) { + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + + const GLboolean was_front_buffer_rendering = + radeon->is_front_buffer_rendering; + + radeon->is_front_buffer_rendering = (mode == GL_FRONT_LEFT) || + (mode == GL_FRONT); + + /* If we weren't front-buffer rendering before but we are now, make sure + * that the front-buffer has actually been allocated. + */ + if (!was_front_buffer_rendering && radeon->is_front_buffer_rendering) { + radeon_update_renderbuffers(radeon->dri.context, + radeon->dri.context->driDrawablePriv); + } + } + radeon_draw_buffer(ctx, ctx->DrawBuffer); } @@ -814,7 +834,7 @@ void radeonReadBuffer( GLcontext *ctx, GLenum mode ) */ void radeonUpdatePageFlipping(radeonContextPtr radeon) { - struct radeon_framebuffer *rfb = radeon->dri.drawable->driverPrivate; + struct radeon_framebuffer *rfb = radeon_get_drawable(radeon)->driverPrivate; rfb->pf_active = radeon->sarea->pfState; rfb->pf_current_page = radeon->sarea->pfCurrentPage; @@ -847,24 +867,60 @@ void radeon_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei he old_viewport = ctx->Driver.Viewport; ctx->Driver.Viewport = NULL; - radeon->dri.drawable = driContext->driDrawablePriv; radeon_window_moved(radeon); radeon_draw_buffer(ctx, radeon->glCtx->DrawBuffer); ctx->Driver.Viewport = old_viewport; - - } -static void radeon_print_state_atom(radeonContextPtr radeon, struct radeon_state_atom *state ) -{ - int i; - int dwords = (*state->check)(radeon->glCtx, state); - - fprintf(stderr, "emit %s %d/%d\n", state->name, state->cmd_size, dwords); - if (RADEON_DEBUG & DEBUG_VERBOSE) - for (i = 0 ; i < dwords; i++) - fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]); +static void radeon_print_state_atom(radeonContextPtr radeon, struct radeon_state_atom *state) +{ + int i, j, reg; + int dwords = (*state->check) (radeon->glCtx, state); + drm_r300_cmd_header_t cmd; + + fprintf(stderr, " emit %s %d/%d\n", state->name, dwords, state->cmd_size); + + if (RADEON_DEBUG & DEBUG_VERBOSE) { + for (i = 0; i < dwords;) { + cmd = *((drm_r300_cmd_header_t *) &state->cmd[i]); + reg = (cmd.packet0.reghi << 8) | cmd.packet0.reglo; + fprintf(stderr, " %s[%d]: cmdpacket0 (first reg=0x%04x, count=%d)\n", + state->name, i, reg, cmd.packet0.count); + ++i; + for (j = 0; j < cmd.packet0.count && i < dwords; j++) { + fprintf(stderr, " %s[%d]: 0x%04x = %08x\n", + state->name, i, reg, state->cmd[i]); + reg += 4; + ++i; + } + } + } +} +static void radeon_print_state_atom_kmm(radeonContextPtr radeon, struct radeon_state_atom *state) +{ + int i, j, reg, count; + int dwords = (*state->check) (radeon->glCtx, state); + uint32_t packet0; + + fprintf(stderr, " emit %s %d/%d\n", state->name, dwords, state->cmd_size); + + if (RADEON_DEBUG & DEBUG_VERBOSE) { + for (i = 0; i < dwords;) { + packet0 = state->cmd[i]; + reg = (packet0 & 0x1FFF) << 2; + count = ((packet0 & 0x3FFF0000) >> 16) + 1; + fprintf(stderr, " %s[%d]: cmdpacket0 (first reg=0x%04x, count=%d)\n", + state->name, i, reg, count); + ++i; + for (j = 0; j < count && i < dwords; j++) { + fprintf(stderr, " %s[%d]: 0x%04x = %08x\n", + state->name, i, reg, state->cmd[i]); + reg += 4; + ++i; + } + } + } } static INLINE void radeonEmitAtoms(radeonContextPtr radeon, GLboolean dirty) @@ -882,7 +938,10 @@ static INLINE void radeonEmitAtoms(radeonContextPtr radeon, GLboolean dirty) dwords = (*atom->check) (radeon->glCtx, atom); if (dwords) { if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) { - radeon_print_state_atom(radeon, atom); + if (radeon->radeonScreen->kernel_mm) + radeon_print_state_atom_kmm(radeon, atom); + else + radeon_print_state_atom(radeon, atom); } if (atom->emit) { (*atom->emit)(radeon->glCtx, atom); @@ -900,7 +959,7 @@ static INLINE void radeonEmitAtoms(radeonContextPtr radeon, GLboolean dirty) } } } - + COMMIT_BATCH(); } @@ -928,6 +987,7 @@ void radeon_validate_reset_bos(radeonContextPtr radeon) int i; for (i = 0; i < radeon->state.validated_bo_count; i++) { + radeon_bo_unref(radeon->state.bos[i].bo); radeon->state.bos[i].bo = NULL; radeon->state.bos[i].read_domains = 0; radeon->state.bos[i].write_domain = 0; @@ -938,6 +998,7 @@ void radeon_validate_reset_bos(radeonContextPtr radeon) void radeon_validate_bo(radeonContextPtr radeon, struct radeon_bo *bo, uint32_t read_domains, uint32_t write_domain) { + radeon_bo_ref(bo); radeon->state.bos[radeon->state.validated_bo_count].bo = bo; radeon->state.bos[radeon->state.validated_bo_count].read_domains = read_domains; radeon->state.bos[radeon->state.validated_bo_count].write_domain = write_domain; @@ -968,7 +1029,7 @@ void radeonEmitState(radeonContextPtr radeon) if (!radeon->cmdbuf.cs->cdw) { if (RADEON_DEBUG & DEBUG_STATE) fprintf(stderr, "Begin reemit state\n"); - + radeonEmitAtoms(radeon, GL_FALSE); } @@ -1000,9 +1061,29 @@ void radeonFlush(GLcontext *ctx) radeon->dma.flush( ctx ); radeonEmitState(radeon); - + if (radeon->cmdbuf.cs->cdw) rcommonFlushCmdBuf(radeon, __FUNCTION__); + + if ((ctx->DrawBuffer->Name == 0) && radeon->front_buffer_dirty) { + __DRIscreen *const screen = radeon->radeonScreen->driScreen; + + if (screen->dri2.loader && (screen->dri2.loader->base.version >= 2) + && (screen->dri2.loader->flushFrontBuffer != NULL)) { + __DRIdrawablePrivate * drawable = radeon_get_drawable(radeon); + (*screen->dri2.loader->flushFrontBuffer)(drawable, drawable->loaderPrivate); + + /* Only clear the dirty bit if front-buffer rendering is no longer + * enabled. This is done so that the dirty bit can only be set in + * glDrawBuffer. Otherwise the dirty bit would have to be set at + * each of N places that do rendering. This has worse performances, + * but it is much easier to get correct. + */ + if (radeon->is_front_buffer_rendering) { + radeon->front_buffer_dirty = GL_FALSE; + } + } + } } /* Make sure all commands have been sent to the hardware and have @@ -1077,7 +1158,7 @@ int rcommonFlushCmdBuf(radeonContextPtr rmesa, const char *caller) int ret; radeonReleaseDmaRegion(rmesa); - + LOCK_HARDWARE(rmesa); ret = rcommonFlushCmdBufLocked(rmesa, caller); UNLOCK_HARDWARE(rmesa); @@ -1139,12 +1220,12 @@ void rcommonInitCmdBuf(radeonContextPtr rmesa) rmesa->cmdbuf.cs = radeon_cs_create(rmesa->cmdbuf.csm, size); assert(rmesa->cmdbuf.cs != NULL); rmesa->cmdbuf.size = size; - + if (!rmesa->radeonScreen->kernel_mm) { radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, rmesa->radeonScreen->texSize[0]); radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, rmesa->radeonScreen->gartTextures.size); } else { - struct drm_radeon_gem_info mminfo; + struct drm_radeon_gem_info mminfo = { 0 }; if (!drmCommandWriteRead(rmesa->dri.fd, DRM_RADEON_GEM_INFO, &mminfo, sizeof(mminfo))) { @@ -1250,18 +1331,19 @@ void radeon_clear_tris(GLcontext *ctx, GLbitfield mask) unsigned int saved_active_texture; assert((mask & ~(TRI_CLEAR_COLOR_BITS | BUFFER_BIT_DEPTH | - BUFFER_BIT_STENCIL)) == 0); + BUFFER_BIT_STENCIL)) == 0); _mesa_PushAttrib(GL_COLOR_BUFFER_BIT | GL_CURRENT_BIT | GL_DEPTH_BUFFER_BIT | GL_ENABLE_BIT | + GL_POLYGON_BIT | GL_STENCIL_BUFFER_BIT | GL_TRANSFORM_BIT | GL_CURRENT_BIT); _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT); saved_active_texture = ctx->Texture.CurrentUnit; - + /* Disable existing GL state we don't want to apply to a clear. */ _mesa_Disable(GL_ALPHA_TEST); _mesa_Disable(GL_BLEND); @@ -1277,6 +1359,7 @@ void radeon_clear_tris(GLcontext *ctx, GLbitfield mask) _mesa_Disable(GL_CLIP_PLANE3); _mesa_Disable(GL_CLIP_PLANE4); _mesa_Disable(GL_CLIP_PLANE5); + _mesa_PolygonMode(GL_FRONT_AND_BACK, GL_FILL); if (ctx->Extensions.ARB_fragment_program && ctx->FragmentProgram.Enabled) { saved_fp_enable = GL_TRUE; _mesa_Disable(GL_FRAGMENT_PROGRAM_ARB); @@ -1289,10 +1372,10 @@ void radeon_clear_tris(GLcontext *ctx, GLbitfield mask) saved_shader_program = ctx->Shader.CurrentProgram->Name; _mesa_UseProgramObjectARB(0); } - + if (ctx->Texture._EnabledUnits != 0) { int i; - + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { _mesa_ActiveTextureARB(GL_TEXTURE0 + i); _mesa_Disable(GL_TEXTURE_1D); @@ -1308,9 +1391,14 @@ void radeon_clear_tris(GLcontext *ctx, GLbitfield mask) } } } - + +#if FEATURE_ARB_vertex_buffer_object + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, 0); + _mesa_BindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, 0); +#endif + radeon_meta_set_passthrough_transform(rmesa); - + for (i = 0; i < 4; i++) { color[i][0] = ctx->Color.ClearColor[0]; color[i][1] = ctx->Color.ClearColor[1]; @@ -1380,7 +1468,7 @@ void radeon_clear_tris(GLcontext *ctx, GLbitfield mask) if (this_mask & BUFFER_BIT_STENCIL) { _mesa_Enable(GL_STENCIL_TEST); _mesa_StencilOp(GL_REPLACE, GL_REPLACE, GL_REPLACE); - _mesa_StencilFuncSeparate(GL_FRONT, GL_ALWAYS, ctx->Stencil.Clear, + _mesa_StencilFuncSeparate(GL_FRONT_AND_BACK, GL_ALWAYS, ctx->Stencil.Clear, ctx->Stencil.WriteMask[0]); } else { _mesa_Disable(GL_STENCIL_TEST); diff --git a/src/mesa/drivers/dri/radeon/radeon_common.h b/src/mesa/drivers/dri/radeon/radeon_common.h index aa6058c6842..2cefb53fe04 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.h +++ b/src/mesa/drivers/dri/radeon/radeon_common.h @@ -82,7 +82,7 @@ static inline struct radeon_renderbuffer *radeon_get_renderbuffer(struct gl_fram static inline struct radeon_renderbuffer *radeon_get_depthbuffer(radeonContextPtr rmesa) { struct radeon_renderbuffer *rrb; - rrb = rmesa->state.depth.rrb; + rrb = radeon_renderbuffer(rmesa->state.depth.rb); if (!rrb) return NULL; @@ -93,7 +93,7 @@ static inline struct radeon_renderbuffer *radeon_get_colorbuffer(radeonContextPt { struct radeon_renderbuffer *rrb; - rrb = rmesa->state.color.rrb; + rrb = radeon_renderbuffer(rmesa->state.color.rb); if (!rrb) return NULL; return rrb; diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index 1e900865f02..4bf006c7a55 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -39,6 +39,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "drirenderbuffer.h" #include "main/context.h" #include "main/framebuffer.h" +#include "main/renderbuffer.h" #include "main/state.h" #include "main/simple_list.h" #include "swrast/swrast.h" @@ -51,6 +52,40 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. int RADEON_DEBUG = (0); #endif + +static const char* get_chip_family_name(int chip_family) +{ + switch(chip_family) { + case CHIP_FAMILY_R100: return "R100"; + case CHIP_FAMILY_RV100: return "RV100"; + case CHIP_FAMILY_RS100: return "RS100"; + case CHIP_FAMILY_RV200: return "RV200"; + case CHIP_FAMILY_RS200: return "RS200"; + case CHIP_FAMILY_R200: return "R200"; + case CHIP_FAMILY_RV250: return "RV250"; + case CHIP_FAMILY_RS300: return "RS300"; + case CHIP_FAMILY_RV280: return "RV280"; + case CHIP_FAMILY_R300: return "R300"; + case CHIP_FAMILY_R350: return "R350"; + case CHIP_FAMILY_RV350: return "RV350"; + case CHIP_FAMILY_RV380: return "RV380"; + case CHIP_FAMILY_R420: return "R420"; + case CHIP_FAMILY_RV410: return "RV410"; + case CHIP_FAMILY_RS400: return "RS400"; + case CHIP_FAMILY_RS600: return "RS600"; + case CHIP_FAMILY_RS690: return "RS690"; + case CHIP_FAMILY_RS740: return "RS740"; + case CHIP_FAMILY_RV515: return "RV515"; + case CHIP_FAMILY_R520: return "R520"; + case CHIP_FAMILY_RV530: return "RV530"; + case CHIP_FAMILY_R580: return "R580"; + case CHIP_FAMILY_RV560: return "RV560"; + case CHIP_FAMILY_RV570: return "RV570"; + default: return "unknown"; + } +} + + /* Return various strings for glGetString(). */ static const GLubyte *radeonGetString(GLcontext * ctx, GLenum name) @@ -72,18 +107,24 @@ static const GLubyte *radeonGetString(GLcontext * ctx, GLenum name) unsigned offset; GLuint agp_mode = (radeon->radeonScreen->card_type==RADEON_CARD_PCI) ? 0 : radeon->radeonScreen->AGPMode; - const char* chipname; + const char* chipclass; + char hardwarename[32]; if (IS_R600_CLASS(radeon->radeonScreen)) - chipname = "R600"; + chipclass = "R600"; else if (IS_R300_CLASS(radeon->radeonScreen)) - chipname = "R300"; + chipclass = "R300"; else if (IS_R200_CLASS(radeon->radeonScreen)) - chipname = "R200"; + chipclass = "R200"; else - chipname = "R100"; + chipclass = "R100"; - offset = driGetRendererString(buffer, chipname, DRIVER_DATE, + sprintf(hardwarename, "%s (%s %04X)", + chipclass, + get_chip_family_name(radeon->radeonScreen->chip_family), + radeon->radeonScreen->device_id); + + offset = driGetRendererString(buffer, hardwarename, DRIVER_DATE, agp_mode); if (IS_R600_CLASS(radeon->radeonScreen)) { @@ -152,8 +193,6 @@ GLboolean radeonInitContext(radeonContextPtr radeon, /* DRI fields */ radeon->dri.context = driContextPriv; radeon->dri.screen = sPriv; - radeon->dri.drawable = NULL; - radeon->dri.readable = NULL; radeon->dri.hwContext = driContextPriv->hHWContext; radeon->dri.hwLock = &sPriv->pSAREA->lock; radeon->dri.fd = sPriv->fd; @@ -176,13 +215,15 @@ GLboolean radeonInitContext(radeonContextPtr radeon, "IRQ's not enabled, falling back to %s: %d %d\n", radeon->do_usleeps ? "usleeps" : "busy waits", fthrottle_mode, radeon->radeonScreen->irq); - + radeon->texture_depth = driQueryOptioni (&radeon->optionCache, "texture_depth"); if (radeon->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB) radeon->texture_depth = ( glVisual->rgbBits > 16 ) ? DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16; + radeon->texture_row_align = 32; + return GL_TRUE; } @@ -225,7 +266,7 @@ void radeonDestroyContext(__DRIcontextPrivate *driContextPriv ) radeon_firevertices(radeon); _mesa_make_current(NULL, NULL, NULL); } - + assert(radeon); if (radeon) { @@ -249,14 +290,11 @@ void radeonDestroyContext(__DRIcontextPrivate *driContextPriv ) _tnl_DestroyContext( radeon->glCtx ); _vbo_DestroyContext( radeon->glCtx ); _swrast_DestroyContext( radeon->glCtx ); - - radeonDestroyBuffer(radeon->dri.drawable); - radeonDestroyBuffer(radeon->dri.readable); /* free atom list */ /* free the Mesa context */ _mesa_destroy_context(radeon->glCtx); - + /* _mesa_destroy_context() might result in calls to functions that * depend on the DriverCtx, so don't set it to NULL before. * @@ -264,7 +302,7 @@ void radeonDestroyContext(__DRIcontextPrivate *driContextPriv ) */ /* free the option cache */ driDestroyOptionCache(&radeon->optionCache); - + rcommonDestroyCmdBuf(radeon); #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) /* +r6/r7 */ @@ -405,12 +443,12 @@ radeon_make_renderbuffer_current(radeonContextPtr radeon, int size = 4096*4096*4; /* if radeon->fake */ struct radeon_renderbuffer *rb; - + if (radeon->radeonScreen->kernel_mm) { radeon_make_kernel_renderbuffer_current(radeon, draw); return; } - + if ((rb = (void *)draw->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) { if (!rb->bo) { @@ -510,12 +548,29 @@ radeon_make_renderbuffer_current(radeonContextPtr radeon, } } +static unsigned +radeon_bits_per_pixel(const struct radeon_renderbuffer *rb) +{ + switch (rb->base._ActualFormat) { + case GL_RGB5: + case GL_DEPTH_COMPONENT16: + return 16; + case GL_RGB8: + case GL_RGBA8: + case GL_DEPTH_COMPONENT24: + case GL_DEPTH24_STENCIL8_EXT: + case GL_STENCIL_INDEX8_EXT: + return 32; + default: + return 0; + } +} void radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) { unsigned int attachments[10]; - __DRIbuffer *buffers; + __DRIbuffer *buffers = NULL; __DRIscreen *screen; struct radeon_renderbuffer *rb; int i, count; @@ -526,26 +581,68 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) if (RADEON_DEBUG & DEBUG_DRI) fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable); - + draw = drawable->driverPrivate; screen = context->driScreenPriv; radeon = (radeonContextPtr) context->driverPrivate; - i = 0; - if (draw->color_rb[0]) - attachments[i++] = __DRI_BUFFER_FRONT_LEFT; - if (draw->color_rb[1]) - attachments[i++] = __DRI_BUFFER_BACK_LEFT; - if (radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH)) - attachments[i++] = __DRI_BUFFER_DEPTH; - if (radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL)) - attachments[i++] = __DRI_BUFFER_STENCIL; - - buffers = (*screen->dri2.loader->getBuffers)(drawable, - &drawable->w, - &drawable->h, - attachments, i, - &count, - drawable->loaderPrivate); + + if (screen->dri2.loader + && (screen->dri2.loader->base.version > 2) + && (screen->dri2.loader->getBuffersWithFormat != NULL)) { + struct radeon_renderbuffer *depth_rb; + struct radeon_renderbuffer *stencil_rb; + + i = 0; + if ((radeon->is_front_buffer_rendering || !draw->color_rb[1]) + && draw->color_rb[0]) { + attachments[i++] = __DRI_BUFFER_FRONT_LEFT; + attachments[i++] = radeon_bits_per_pixel(draw->color_rb[0]); + } + + if (draw->color_rb[1]) { + attachments[i++] = __DRI_BUFFER_BACK_LEFT; + attachments[i++] = radeon_bits_per_pixel(draw->color_rb[1]); + } + + depth_rb = radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH); + stencil_rb = radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL); + + if ((depth_rb != NULL) && (stencil_rb != NULL)) { + attachments[i++] = __DRI_BUFFER_DEPTH_STENCIL; + attachments[i++] = radeon_bits_per_pixel(depth_rb); + } else if (depth_rb != NULL) { + attachments[i++] = __DRI_BUFFER_DEPTH; + attachments[i++] = radeon_bits_per_pixel(depth_rb); + } else if (stencil_rb != NULL) { + attachments[i++] = __DRI_BUFFER_STENCIL; + attachments[i++] = radeon_bits_per_pixel(stencil_rb); + } + + buffers = (*screen->dri2.loader->getBuffersWithFormat)(drawable, + &drawable->w, + &drawable->h, + attachments, i / 2, + &count, + drawable->loaderPrivate); + } else if (screen->dri2.loader) { + i = 0; + if (draw->color_rb[0]) + attachments[i++] = __DRI_BUFFER_FRONT_LEFT; + if (draw->color_rb[1]) + attachments[i++] = __DRI_BUFFER_BACK_LEFT; + if (radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH)) + attachments[i++] = __DRI_BUFFER_DEPTH; + if (radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL)) + attachments[i++] = __DRI_BUFFER_STENCIL; + + buffers = (*screen->dri2.loader->getBuffers)(drawable, + &drawable->w, + &drawable->h, + attachments, i, + &count, + drawable->loaderPrivate); + } + if (buffers == NULL) return; @@ -570,6 +667,10 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) rb = draw->color_rb[0]; regname = "dri2 front buffer"; break; + case __DRI_BUFFER_FAKE_FRONT_LEFT: + rb = draw->color_rb[0]; + regname = "dri2 fake front buffer"; + break; case __DRI_BUFFER_BACK_LEFT: rb = draw->color_rb[1]; regname = "dri2 back buffer"; @@ -578,6 +679,10 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) rb = radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH); regname = "dri2 depth buffer"; break; + case __DRI_BUFFER_DEPTH_STENCIL: + rb = radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH); + regname = "dri2 depth / stencil buffer"; + break; case __DRI_BUFFER_STENCIL: rb = radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL); regname = "dri2 stencil buffer"; @@ -637,7 +742,7 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) fprintf(stderr, "failed to attach %s %d\n", regname, buffers[i].name); - + } } @@ -649,7 +754,24 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) radeon_renderbuffer_set_bo(rb, bo); radeon_bo_unref(bo); - + + if (buffers[i].attachment == __DRI_BUFFER_DEPTH_STENCIL) { + rb = radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL); + if (rb != NULL) { + struct radeon_bo *stencil_bo = NULL; + + if (rb->bo) { + uint32_t name = radeon_gem_name_bo(rb->bo); + if (name == buffers[i].name) + continue; + } + + stencil_bo = bo; + radeon_bo_ref(stencil_bo); + radeon_renderbuffer_set_bo(rb, stencil_bo); + radeon_bo_unref(stencil_bo); + } + } } driUpdateFramebufferSize(radeon->glCtx, drawable); @@ -677,14 +799,14 @@ GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv, drfb = driDrawPriv->driverPrivate; readfb = driReadPriv->driverPrivate; - if (driContextPriv->driScreenPriv->dri2.enabled) { + if (driContextPriv->driScreenPriv->dri2.enabled) { radeon_update_renderbuffers(driContextPriv, driDrawPriv); if (driDrawPriv != driReadPriv) radeon_update_renderbuffers(driContextPriv, driReadPriv); - radeon->state.color.rrb = - radeon_get_renderbuffer(&drfb->base, BUFFER_BACK_LEFT); - radeon->state.depth.rrb = - radeon_get_renderbuffer(&drfb->base, BUFFER_DEPTH); + _mesa_reference_renderbuffer(&radeon->state.color.rb, + &(radeon_get_renderbuffer(&drfb->base, BUFFER_BACK_LEFT)->base)); + _mesa_reference_renderbuffer(&radeon->state.depth.rb, + &(radeon_get_renderbuffer(&drfb->base, BUFFER_DEPTH)->base)); } else { radeon_make_renderbuffer_current(radeon, drfb); } @@ -692,9 +814,6 @@ GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv, if (RADEON_DEBUG & DEBUG_DRI) fprintf(stderr, "%s ctx %p dfb %p rfb %p\n", __FUNCTION__, radeon->glCtx, drfb, readfb); - if (radeon->dri.readable != driReadPriv) - radeon->dri.readable = driReadPriv; - driUpdateFramebufferSize(radeon->glCtx, driDrawPriv); if (driReadPriv != driDrawPriv) driUpdateFramebufferSize(radeon->glCtx, driReadPriv); @@ -704,29 +823,25 @@ GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv, _mesa_update_state(radeon->glCtx); if (radeon->glCtx->DrawBuffer == &drfb->base) { - - if (radeon->dri.drawable != driDrawPriv) { - if (driDrawPriv->swap_interval == (unsigned)-1) { - int i; - driDrawPriv->vblFlags = - (radeon->radeonScreen->irq != 0) - ? driGetDefaultVBlankFlags(&radeon-> - optionCache) - : VBLANK_FLAG_NO_IRQ; - - driDrawableInitVBlank(driDrawPriv); - drfb->vbl_waited = driDrawPriv->vblSeq; - - for (i = 0; i < 2; i++) { - if (drfb->color_rb[i]) - drfb->color_rb[i]->vbl_pending = driDrawPriv->vblSeq; - } - + if (driDrawPriv->swap_interval == (unsigned)-1) { + int i; + driDrawPriv->vblFlags = + (radeon->radeonScreen->irq != 0) + ? driGetDefaultVBlankFlags(&radeon-> + optionCache) + : VBLANK_FLAG_NO_IRQ; + + driDrawableInitVBlank(driDrawPriv); + drfb->vbl_waited = driDrawPriv->vblSeq; + + for (i = 0; i < 2; i++) { + if (drfb->color_rb[i]) + drfb->color_rb[i]->vbl_pending = driDrawPriv->vblSeq; } - radeon->dri.drawable = driDrawPriv; - - radeon_window_moved(radeon); + } + + radeon_window_moved(radeon); radeon_draw_buffer(radeon->glCtx, &drfb->base); } diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h index 07e60a28a46..96bc6858765 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h @@ -117,17 +117,17 @@ struct radeon_framebuffer }; - + struct radeon_colorbuffer_state { GLuint clear; int roundEnable; - struct radeon_renderbuffer *rrb; + struct gl_renderbuffer *rb; uint32_t draw_offset; /* offset into color renderbuffer - FBOs */ }; struct radeon_depthbuffer_state { GLuint clear; - struct radeon_renderbuffer *rrb; + struct gl_renderbuffer *rb; }; struct radeon_scissor_state { @@ -346,16 +346,6 @@ struct radeon_dri_mirror { __DRIcontextPrivate *context; /* DRI context */ __DRIscreenPrivate *screen; /* DRI screen */ - /** - * DRI drawable bound to this context for drawing. - */ - __DRIdrawablePrivate *drawable; - - /** - * DRI drawable bound to this context for reading. - */ - __DRIdrawablePrivate *readable; - drm_context_t hwContext; drm_hw_lock_t *hwLock; int fd; @@ -388,7 +378,7 @@ typedef void (*radeon_line_func) (radeonContextPtr, typedef void (*radeon_point_func) (radeonContextPtr, radeonVertex *); -#define RADEON_MAX_BOS 24 +#define RADEON_MAX_BOS 32 struct radeon_state { struct radeon_colorbuffer_state color; struct radeon_depthbuffer_state depth; @@ -416,11 +406,12 @@ struct radeon_cmdbuf { struct radeon_context { GLcontext *glCtx; radeonScreenPtr radeonScreen; /* Screen private DRI data */ - + /* Texture object bookkeeping */ int texture_depth; float initialMaxAnisotropy; + uint32_t texture_row_align; struct radeon_dma dma; struct radeon_hw_state hw; @@ -457,11 +448,27 @@ struct radeon_context { driOptionCache optionCache; struct radeon_cmdbuf cmdbuf; - + drm_clip_rect_t fboRect; GLboolean constant_cliprect; /* use for FBO or DRI2 rendering */ GLboolean front_cliprects; + /** + * Set if rendering has occured to the drawable's front buffer. + * + * This is used in the DRI2 case to detect that glFlush should also copy + * the contents of the fake front buffer to the real front buffer. + */ + GLboolean front_buffer_dirty; + + /** + * Track whether front-buffer rendering is currently enabled + * + * A separate flag is used to track this in order to support MRT more + * easily. + */ + GLboolean is_front_buffer_rendering; + struct { struct gl_fragment_program *bitmap_fp; struct gl_vertex_program *passthrough_vp; @@ -490,6 +497,17 @@ struct radeon_context { #define RADEON_CONTEXT(glctx) ((radeonContextPtr)(ctx->DriverCtx)) +static inline __DRIdrawablePrivate* radeon_get_drawable(radeonContextPtr radeon) +{ + return radeon->dri.context->driDrawablePriv; +} + +static inline __DRIdrawablePrivate* radeon_get_readable(radeonContextPtr radeon) +{ + return radeon->dri.context->driReadablePriv; +} + + /** * This function takes a float and packs it into a uint32_t */ diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c index 2600c78df39..8f780c443c9 100644 --- a/src/mesa/drivers/dri/radeon/radeon_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_context.c @@ -283,6 +283,11 @@ r100CreateContext( const __GLcontextModes *glVisual, i = driQueryOptioni( &rmesa->radeon.optionCache, "allow_large_textures"); + /* FIXME: When no memory manager is available we should set this + * to some reasonable value based on texture memory pool size */ + /* FIXME: does r100 support 2048x2048 texture ? */ + ctx->Const.MaxTextureLevels = 12; + ctx->Const.MaxTextureMaxAnisotropy = 16.0; /* No wide points. diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c index b33417e93e3..a2727ef6f7f 100644 --- a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c +++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c @@ -267,14 +267,6 @@ static int cs_set_age(struct radeon_cs *cs) return 0; } -static void dump_cmdbuf(struct radeon_cs *cs) -{ - int i; - for (i = 0; i < cs->cdw; i++){ - fprintf(stderr,"%x: %08x\n", i, cs->packets[i]); - } - -} static int cs_emit(struct radeon_cs *cs) { struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm; diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.c b/src/mesa/drivers/dri/radeon/radeon_dma.c index db96269337e..2fbf89bf6de 100644 --- a/src/mesa/drivers/dri/radeon/radeon_dma.c +++ b/src/mesa/drivers/dri/radeon/radeon_dma.c @@ -164,7 +164,7 @@ void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos, void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size) { - size = MAX2(size, MAX_DMA_BUF_SZ * 16); + size = MAX2(size, MAX_DMA_BUF_SZ); if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) fprintf(stderr, "%s\n", __FUNCTION__); @@ -208,7 +208,13 @@ again_alloc: if (radeon_revalidate_bos(rmesa->glCtx) == GL_FALSE) fprintf(stderr,"failure to revalidate BOs - badness\n"); - + + if (!rmesa->dma.current) { + /* Cmd buff have been flushed in radeon_revalidate_bos */ + rmesa->dma.nr_released_bufs = 0; + goto again_alloc; + } + radeon_bo_map(rmesa->dma.current, 1); } @@ -325,6 +331,9 @@ void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs ) radeonContextPtr radeon = RADEON_CONTEXT( ctx ); int i; + if (radeon->dma.flush) { + radeon->dma.flush(radeon->glCtx); + } if (radeon->tcl.elt_dma_bo) { radeon_bo_unref(radeon->tcl.elt_dma_bo); radeon->tcl.elt_dma_bo = NULL; diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c index b5fde6d3de5..01c45df2dfa 100644 --- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c +++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c @@ -35,7 +35,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include <sched.h> -#include <errno.h> +#include <errno.h> #include "main/attrib.h" #include "main/enable.h" @@ -113,8 +113,33 @@ void radeonSetUpAtomList( r100ContextPtr rmesa ) insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.glt); } +void radeonEmitScissor(r100ContextPtr rmesa) +{ + BATCH_LOCALS(&rmesa->radeon); + if (!rmesa->radeon.radeonScreen->kernel_mm) { + return; + } + if (rmesa->radeon.state.scissor.enabled) { + BEGIN_BATCH(6); + OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 0)); + OUT_BATCH(rmesa->hw.ctx.cmd[CTX_PP_CNTL] | RADEON_SCISSOR_ENABLE); + OUT_BATCH(CP_PACKET0(RADEON_RE_TOP_LEFT, 0)); + OUT_BATCH((rmesa->radeon.state.scissor.rect.y1 << 16) | + rmesa->radeon.state.scissor.rect.x1); + OUT_BATCH(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0)); + OUT_BATCH(((rmesa->radeon.state.scissor.rect.y2 - 1) << 16) | + (rmesa->radeon.state.scissor.rect.x2 - 1)); + END_BATCH(); + } else { + BEGIN_BATCH(2); + OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 0)); + OUT_BATCH(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & ~RADEON_SCISSOR_ENABLE); + END_BATCH(); + } +} + /* Fire a section of the retained (indexed_verts) buffer as a regular - * primtive. + * primtive. */ extern void radeonEmitVbufPrim( r100ContextPtr rmesa, GLuint vertex_format, @@ -124,8 +149,9 @@ extern void radeonEmitVbufPrim( r100ContextPtr rmesa, BATCH_LOCALS(&rmesa->radeon); assert(!(primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND)); - + radeonEmitState(&rmesa->radeon); + radeonEmitScissor(rmesa); #if RADEON_OLD_PACKETS BEGIN_BATCH(8); @@ -135,7 +161,7 @@ extern void radeonEmitVbufPrim( r100ContextPtr rmesa, } else { OUT_BATCH(rmesa->ioctl.vertex_offset); } - + OUT_BATCH(vertex_nr); OUT_BATCH(vertex_format); OUT_BATCH(primitive | RADEON_CP_VC_CNTL_PRIM_WALK_LIST | @@ -149,10 +175,10 @@ extern void radeonEmitVbufPrim( r100ContextPtr rmesa, RADEON_GEM_DOMAIN_GTT, 0, 0); } - + END_BATCH(); - -#else + +#else BEGIN_BATCH(4); OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_DRAW_VBUF, 1); OUT_BATCH(vertex_format); @@ -173,7 +199,7 @@ void radeonFlushElts( GLcontext *ctx ) int nr; uint32_t *cmd = (uint32_t *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_start); int dwords = (rmesa->radeon.cmdbuf.cs->section_ndw - rmesa->radeon.cmdbuf.cs->section_cdw); - + if (RADEON_DEBUG & DEBUG_IOCTL) fprintf(stderr, "%s\n", __FUNCTION__); @@ -230,9 +256,10 @@ GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa, fprintf(stderr, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive); assert((primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND)); - + radeonEmitState(&rmesa->radeon); - + radeonEmitScissor(rmesa); + rmesa->tcl.elt_cmd_start = rmesa->radeon.cmdbuf.cs->cdw; /* round up min_nr to align the state */ @@ -248,16 +275,15 @@ GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa, } OUT_BATCH(0xffff); OUT_BATCH(vertex_format); - OUT_BATCH(primitive | + OUT_BATCH(primitive | RADEON_CP_VC_CNTL_PRIM_WALK_IND | RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA | RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE); - #else BEGIN_BATCH_NO_AUTOSTATE(ELTS_BUFSZ(align_min_nr)/4); OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_DRAW_INDX, 0); OUT_BATCH(vertex_format); - OUT_BATCH(primitive | + OUT_BATCH(primitive | RADEON_CP_VC_CNTL_PRIM_WALK_IND | RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA | RADEON_CP_VC_CNTL_MAOS_ENABLE | @@ -269,7 +295,7 @@ GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa, rmesa->tcl.elt_used = min_nr; retval = (GLushort *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_offset); - + if (RADEON_DEBUG & DEBUG_PRIMS) fprintf(stderr, "%s: header prim %x \n", __FUNCTION__, primitive); @@ -305,7 +331,7 @@ void radeonEmitVertexAOS( r100ContextPtr rmesa, #endif } - + void radeonEmitAOS( r100ContextPtr rmesa, GLuint nr, @@ -314,7 +340,7 @@ void radeonEmitAOS( r100ContextPtr rmesa, #if RADEON_OLD_PACKETS assert( nr == 1 ); rmesa->ioctl.bo = rmesa->radeon.tcl.aos[0].bo; - rmesa->ioctl.vertex_offset = + rmesa->ioctl.vertex_offset = (rmesa->radeon.tcl.aos[0].offset + offset * rmesa->radeon.tcl.aos[0].stride * 4); #else BATCH_LOCALS(&rmesa->radeon); @@ -336,7 +362,7 @@ void radeonEmitAOS( r100ContextPtr rmesa, (rmesa->radeon.tcl.aos[i].stride << 8) | (rmesa->radeon.tcl.aos[i + 1].components << 16) | (rmesa->radeon.tcl.aos[i + 1].stride << 24)); - + voffset = rmesa->radeon.tcl.aos[i + 0].offset + offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride; OUT_BATCH_RELOC(voffset, @@ -352,7 +378,7 @@ void radeonEmitAOS( r100ContextPtr rmesa, RADEON_GEM_DOMAIN_GTT, 0, 0); } - + if (nr & 1) { OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) | (rmesa->radeon.tcl.aos[nr - 1].stride << 8)); @@ -370,7 +396,7 @@ void radeonEmitAOS( r100ContextPtr rmesa, (rmesa->radeon.tcl.aos[i].stride << 8) | (rmesa->radeon.tcl.aos[i + 1].components << 16) | (rmesa->radeon.tcl.aos[i + 1].stride << 24)); - + voffset = rmesa->radeon.tcl.aos[i + 0].offset + offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride; OUT_BATCH(voffset); @@ -378,7 +404,7 @@ void radeonEmitAOS( r100ContextPtr rmesa, offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride; OUT_BATCH(voffset); } - + if (nr & 1) { OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) | (rmesa->radeon.tcl.aos[nr - 1].stride << 8)); @@ -427,7 +453,7 @@ static void radeonUserClear(GLcontext *ctx, GLuint mask) static void radeonKernelClear(GLcontext *ctx, GLuint flags) { r100ContextPtr rmesa = R100_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable; + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); drm_radeon_sarea_t *sarea = rmesa->radeon.sarea; uint32_t clear; GLint ret, i; @@ -529,7 +555,7 @@ static void radeonKernelClear(GLcontext *ctx, GLuint flags) depth_boxes[n].f[CLEAR_Y1] = (float)b[n].y1; depth_boxes[n].f[CLEAR_X2] = (float)b[n].x2; depth_boxes[n].f[CLEAR_Y2] = (float)b[n].y2; - depth_boxes[n].f[CLEAR_DEPTH] = + depth_boxes[n].f[CLEAR_DEPTH] = (float)rmesa->radeon.state.depth.clear; } @@ -548,7 +574,7 @@ static void radeonKernelClear(GLcontext *ctx, GLuint flags) static void radeonClear( GLcontext *ctx, GLbitfield mask ) { r100ContextPtr rmesa = R100_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable; + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); GLuint flags = 0; GLuint color_mask = 0; GLuint orig_mask = mask; @@ -560,11 +586,11 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask ) { LOCK_HARDWARE( &rmesa->radeon ); UNLOCK_HARDWARE( &rmesa->radeon ); - if ( dPriv->numClipRects == 0 ) + if ( dPriv->numClipRects == 0 ) return; } - - radeon_firevertices(&rmesa->radeon); + + radeon_firevertices(&rmesa->radeon); if ( mask & BUFFER_BIT_FRONT_LEFT ) { flags |= RADEON_FRONT; @@ -594,12 +620,12 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask ) _swrast_Clear( ctx, mask ); } - if ( !flags ) + if ( !flags ) return; if (rmesa->using_hyperz) { flags |= RADEON_USE_COMP_ZBUF; -/* if (rmesa->radeon.radeonScreen->chipset & RADEON_CHIPSET_TCL) +/* if (rmesa->radeon.radeonScreen->chipset & RADEON_CHIPSET_TCL) flags |= RADEON_USE_HIERZ; */ if (((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) && ((rmesa->radeon.state.stencil.clear & RADEON_STENCIL_WRITE_MASK) == RADEON_STENCIL_WRITE_MASK))) { diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c index fe19218d7ae..5774f7ebcf7 100644 --- a/src/mesa/drivers/dri/radeon/radeon_lock.c +++ b/src/mesa/drivers/dri/radeon/radeon_lock.c @@ -58,8 +58,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ void radeonGetLock(radeonContextPtr rmesa, GLuint flags) { - __DRIdrawablePrivate *const drawable = rmesa->dri.drawable; - __DRIdrawablePrivate *const readable = rmesa->dri.readable; + __DRIdrawablePrivate *const drawable = radeon_get_drawable(rmesa); + __DRIdrawablePrivate *const readable = radeon_get_readable(rmesa); __DRIscreenPrivate *sPriv = rmesa->dri.screen; assert(drawable != NULL); @@ -95,8 +95,8 @@ void radeon_lock_hardware(radeonContextPtr radeon) struct radeon_framebuffer *rfb = NULL; struct radeon_renderbuffer *rrb = NULL; - if (radeon->dri.drawable) { - rfb = radeon->dri.drawable->driverPrivate; + if (radeon_get_drawable(radeon)) { + rfb = radeon_get_drawable(radeon)->driverPrivate; if (rfb) rrb = radeon_get_renderbuffer(&rfb->base, diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c index 0a33fe4afa4..f04a07fecd2 100644 --- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c +++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c @@ -86,10 +86,11 @@ static int radeon_compressed_num_bytes(GLuint mesaFormat) * \param curOffset points to the offset at which the image is to be stored * and is updated by this function according to the size of the image. */ -static void compute_tex_image_offset(radeon_mipmap_tree *mt, +static void compute_tex_image_offset(radeonContextPtr rmesa, radeon_mipmap_tree *mt, GLuint face, GLuint level, GLuint* curOffset) { radeon_mipmap_level *lvl = &mt->levels[level]; + uint32_t row_align = rmesa->texture_row_align - 1; /* Find image size in bytes */ if (mt->compressed) { @@ -107,7 +108,7 @@ static void compute_tex_image_offset(radeon_mipmap_tree *mt, lvl->rowstride = (lvl->width * mt->bpp * 2 + 31) & ~31; lvl->size = lvl->rowstride * ((lvl->height + 1) / 2) * lvl->depth; } else { - lvl->rowstride = (lvl->width * mt->bpp + 31) & ~31; + lvl->rowstride = (lvl->width * mt->bpp + row_align) & ~row_align; lvl->size = lvl->rowstride * lvl->height * lvl->depth; } assert(lvl->size > 0); @@ -131,14 +132,40 @@ static GLuint minify(GLuint size, GLuint levels) return size; } -static void calculate_miptree_layout(radeon_mipmap_tree *mt) + +static void calculate_miptree_layout_r100(radeonContextPtr rmesa, radeon_mipmap_tree *mt) +{ + GLuint curOffset; + GLuint numLevels; + GLuint i; + GLuint face; + + numLevels = mt->lastLevel - mt->firstLevel + 1; + assert(numLevels <= rmesa->glCtx->Const.MaxTextureLevels); + + curOffset = 0; + for(face = 0; face < mt->faces; face++) { + + for(i = 0; i < numLevels; i++) { + mt->levels[i].width = minify(mt->width0, i); + mt->levels[i].height = minify(mt->height0, i); + mt->levels[i].depth = minify(mt->depth0, i); + compute_tex_image_offset(rmesa, mt, face, i, &curOffset); + } + } + + /* Note the required size in memory */ + mt->totalsize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK; +} + +static void calculate_miptree_layout_r300(radeonContextPtr rmesa, radeon_mipmap_tree *mt) { GLuint curOffset; GLuint numLevels; GLuint i; numLevels = mt->lastLevel - mt->firstLevel + 1; - assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS); + assert(numLevels <= rmesa->glCtx->Const.MaxTextureLevels); curOffset = 0; for(i = 0; i < numLevels; i++) { @@ -149,14 +176,13 @@ static void calculate_miptree_layout(radeon_mipmap_tree *mt) mt->levels[i].depth = minify(mt->depth0, i); for(face = 0; face < mt->faces; face++) - compute_tex_image_offset(mt, face, i, &curOffset); + compute_tex_image_offset(rmesa, mt, face, i, &curOffset); } /* Note the required size in memory */ mt->totalsize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK; } - /** * Create a new mipmap tree, calculate its layout and allocate memory. */ @@ -181,7 +207,10 @@ radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, radeonTexObj * mt->tilebits = tilebits; mt->compressed = compressed; - calculate_miptree_layout(mt); + if (rmesa->radeonScreen->chip_family >= CHIP_FAMILY_R300) + calculate_miptree_layout_r300(rmesa, mt); + else + calculate_miptree_layout_r100(rmesa, mt); #ifdef RADEON_DEBUG_BO mt->bo = radeon_bo_open(rmesa->radeonScreen->bom, diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h index 697010bc022..7ece688493d 100644 --- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h +++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h @@ -47,6 +47,8 @@ struct _radeon_mipmap_level { radeon_mipmap_image faces[6]; }; +/* store the max possible in the miptree */ +#define RADEON_MIPTREE_MAX_TEXTURE_LEVELS 13 /** * A mipmap tree contains texture images in the layout that the hardware @@ -77,7 +79,7 @@ struct _radeon_mipmap_tree { GLuint tilebits; /** RADEON_TXO_xxx_TILE */ GLuint compressed; /** MESA_FORMAT_xxx indicating a compressed format, or 0 if uncompressed */ - radeon_mipmap_level levels[RADEON_MAX_TEXTURE_LEVELS]; + radeon_mipmap_level levels[RADEON_MIPTREE_MAX_TEXTURE_LEVELS]; }; radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, radeonTexObj *t, diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index a0e1b131781..e23d53c7a1a 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -253,8 +253,8 @@ static int radeonGetParam(__DRIscreenPrivate *sPriv, int param, void *value) { int ret; - drm_radeon_getparam_t gp; - struct drm_radeon_info info; + drm_radeon_getparam_t gp = { 0 }; + struct drm_radeon_info info = { 0 }; if (sPriv->drm_version.major >= 2) { info.value = (uint64_t)value; @@ -417,6 +417,7 @@ static const __DRItexBufferExtension r600TexBufferExtension = { static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) { + screen->device_id = device_id; screen->chip_flags = 0; switch ( device_id ) { case PCI_CHIP_RADEON_LY: @@ -493,11 +494,7 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) screen->chip_family = CHIP_FAMILY_RS300; break; - /* 9500 with 1 pipe verified by: Reid Linnemann <[email protected]> */ case PCI_CHIP_R300_AD: - screen->chip_family = CHIP_FAMILY_RV350; - screen->chip_flags = RADEON_CHIPSET_TCL; - break; case PCI_CHIP_R300_AE: case PCI_CHIP_R300_AF: case PCI_CHIP_R300_AG: @@ -962,19 +959,6 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) { int ret; -#ifdef RADEON_PARAM_KERNEL_MM - ret = radeonGetParam(sPriv, RADEON_PARAM_KERNEL_MM, &screen->kernel_mm); - - if (ret && ret != -EINVAL) { - FREE( screen ); - fprintf(stderr, "drm_radeon_getparam_t (RADEON_OFFSET): %d\n", ret); - return NULL; - } - - if (ret == -EINVAL) - screen->kernel_mm = 0; -#endif - ret = radeonGetParam(sPriv, RADEON_PARAM_GART_BUFFER_OFFSET, &screen->gart_buffer_offset); @@ -1012,66 +996,63 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) if (ret == -1) return NULL; - if (!screen->kernel_mm) { - screen->mmio.handle = dri_priv->registerHandle; - screen->mmio.size = dri_priv->registerSize; - if ( drmMap( sPriv->fd, - screen->mmio.handle, - screen->mmio.size, - &screen->mmio.map ) ) { - FREE( screen ); - __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ ); - return NULL; - } + screen->mmio.handle = dri_priv->registerHandle; + screen->mmio.size = dri_priv->registerSize; + if ( drmMap( sPriv->fd, + screen->mmio.handle, + screen->mmio.size, + &screen->mmio.map ) ) { + FREE( screen ); + __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ ); + return NULL; + } + + RADEONMMIO = screen->mmio.map; + + screen->status.handle = dri_priv->statusHandle; + screen->status.size = dri_priv->statusSize; + if ( drmMap( sPriv->fd, + screen->status.handle, + screen->status.size, + &screen->status.map ) ) { + drmUnmap( screen->mmio.map, screen->mmio.size ); + FREE( screen ); + __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ ); + return NULL; + } + if (screen->chip_family < CHIP_FAMILY_R600) + screen->scratch = (__volatile__ uint32_t *) + ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET); + else + screen->scratch = (__volatile__ uint32_t *) + ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET); - RADEONMMIO = screen->mmio.map; + screen->buffers = drmMapBufs( sPriv->fd ); + if ( !screen->buffers ) { + drmUnmap( screen->status.map, screen->status.size ); + drmUnmap( screen->mmio.map, screen->mmio.size ); + FREE( screen ); + __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ ); + return NULL; + } - screen->status.handle = dri_priv->statusHandle; - screen->status.size = dri_priv->statusSize; + if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) { + screen->gartTextures.handle = dri_priv->gartTexHandle; + screen->gartTextures.size = dri_priv->gartTexMapSize; if ( drmMap( sPriv->fd, - screen->status.handle, - screen->status.size, - &screen->status.map ) ) { - drmUnmap( screen->mmio.map, screen->mmio.size ); - FREE( screen ); - __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ ); - return NULL; - } - if (screen->chip_family < CHIP_FAMILY_R600) - screen->scratch = (__volatile__ uint32_t *) - ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET); - else - screen->scratch = (__volatile__ uint32_t *) - ((GLubyte *)screen->status.map + R600_SCRATCH_REG_OFFSET); - - screen->buffers = drmMapBufs( sPriv->fd ); - if ( !screen->buffers ) { + screen->gartTextures.handle, + screen->gartTextures.size, + (drmAddressPtr)&screen->gartTextures.map ) ) { + drmUnmapBufs( screen->buffers ); drmUnmap( screen->status.map, screen->status.size ); drmUnmap( screen->mmio.map, screen->mmio.size ); FREE( screen ); - __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ ); + __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__); return NULL; - } - - if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) { - screen->gartTextures.handle = dri_priv->gartTexHandle; - screen->gartTextures.size = dri_priv->gartTexMapSize; - if ( drmMap( sPriv->fd, - screen->gartTextures.handle, - screen->gartTextures.size, - (drmAddressPtr)&screen->gartTextures.map ) ) { - drmUnmapBufs( screen->buffers ); - drmUnmap( screen->status.map, screen->status.size ); - drmUnmap( screen->mmio.map, screen->mmio.size ); - FREE( screen ); - __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__); - return NULL; - } - - screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base; - } - } + } + screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base; + } if ((screen->chip_family == CHIP_FAMILY_R350 || screen->chip_family == CHIP_FAMILY_R300) && sPriv->ddx_version.minor < 2) { @@ -1161,6 +1142,17 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) } else { screen->num_gb_pipes = temp; } + + /* pipe overrides */ + switch (dri_priv->deviceID) { + case PCI_CHIP_R300_AD: /* 9500 with 1 quadpipe verified by: Reid Linnemann <[email protected]> */ + case PCI_CHIP_RV410_5E4C: /* RV410 SE only have 1 quadpipe */ + case PCI_CHIP_RV410_5E4F: /* RV410 SE only have 1 quadpipe */ + screen->num_gb_pipes = 1; + break; + default: + break; + } } if ( sPriv->drm_version.minor >= 10 ) { @@ -1224,26 +1216,24 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) screen->extensions[i++] = &driMediaStreamCounterExtension.base; } - if (!screen->kernel_mm) { #if !RADEON_COMMON - screen->extensions[i++] = &radeonTexOffsetExtension.base; + screen->extensions[i++] = &radeonTexOffsetExtension.base; #endif #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) - if (IS_R200_CLASS(screen)) - screen->extensions[i++] = &r200AllocateExtension.base; + if (IS_R200_CLASS(screen)) + screen->extensions[i++] = &r200AllocateExtension.base; - screen->extensions[i++] = &r200texOffsetExtension.base; + screen->extensions[i++] = &r200texOffsetExtension.base; #endif #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) - screen->extensions[i++] = &r300texOffsetExtension.base; + screen->extensions[i++] = &r300texOffsetExtension.base; #endif #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) - screen->extensions[i++] = &r600texOffsetExtension.base; + screen->extensions[i++] = &r600texOffsetExtension.base; #endif - } screen->extensions[i++] = NULL; sPriv->extensions = screen->extensions; @@ -1253,10 +1243,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) screen->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA + screen->sarea_priv_offset); - if (screen->kernel_mm) - screen->bom = radeon_bo_manager_gem_ctor(sPriv->fd); - else - screen->bom = radeon_bo_manager_legacy_ctor(screen); + screen->bom = radeon_bo_manager_legacy_ctor(screen); if (screen->bom == NULL) { free(screen); return NULL; @@ -1271,7 +1258,7 @@ radeonCreateScreen2(__DRIscreenPrivate *sPriv) radeonScreenPtr screen; int i; int ret; - uint32_t device_id; + uint32_t device_id = 0; uint32_t temp = 0; /* Allocate the private area */ @@ -1294,7 +1281,15 @@ radeonCreateScreen2(__DRIscreenPrivate *sPriv) screen->kernel_mm = 1; screen->chip_flags = 0; - ret = radeonGetParam(sPriv, RADEON_PARAM_IRQ_NR, &screen->irq); + /* if we have kms we can support all of these */ + screen->drmSupportsCubeMapsR200 = 1; + screen->drmSupportsBlendColor = 1; + screen->drmSupportsTriPerf = 1; + screen->drmSupportsFragShader = 1; + screen->drmSupportsPointSprites = 1; + screen->drmSupportsCubeMapsR100 = 1; + screen->drmSupportsVertexProgram = 1; + screen->irq = 1; ret = radeonGetParam(sPriv, RADEON_PARAM_DEVICE_ID, &device_id); if (ret) { @@ -1346,6 +1341,18 @@ radeonCreateScreen2(__DRIscreenPrivate *sPriv) } else { screen->num_gb_pipes = temp; } + + /* pipe overrides */ + switch (device_id) { + case PCI_CHIP_R300_AD: /* 9500 with 1 quadpipe verified by: Reid Linnemann <[email protected]> */ + case PCI_CHIP_RV410_5E4C: /* RV410 SE only have 1 quadpipe */ + case PCI_CHIP_RV410_5E4F: /* RV410 SE only have 1 quadpipe */ + screen->num_gb_pipes = 1; + break; + default: + break; + } + } i = 0; @@ -1586,6 +1593,7 @@ static GLboolean radeonCreateContext(const __GLcontextModes * glVisual, #endif #if !RADEON_COMMON + (void)screen; return r100CreateContext(glVisual, driContextPriv, sharedContextPriv); #endif return GL_FALSE; diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h index 5194224acb7..2a2f6b1b0b8 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.h +++ b/src/mesa/drivers/dri/radeon/radeon_screen.h @@ -59,6 +59,7 @@ typedef struct radeon_screen { int chip_flags; int cpp; int card_type; + int device_id; /* PCI ID */ int AGPMode; unsigned int irq; /* IRQ number (0 means none) */ diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c index d9a7ef60612..06b8c299368 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state.c +++ b/src/mesa/drivers/dri/radeon/radeon_state.c @@ -149,7 +149,7 @@ static void radeonBlendFuncSeparate( GLcontext *ctx, GLenum sfactorA, GLenum dfactorA ) { r100ContextPtr rmesa = R100_CONTEXT(ctx); - GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & + GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & ~(RADEON_SRC_BLEND_MASK | RADEON_DST_BLEND_MASK); GLboolean fallback = GL_FALSE; @@ -392,7 +392,7 @@ static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param ) rmesa->hw.fog.cmd[FOG_D] = d.i; } break; - case GL_FOG_COLOR: + case GL_FOG_COLOR: RADEON_STATECHANGE( rmesa, ctx ); UNCLAMPED_FLOAT_TO_RGB_CHAN( col, ctx->Fog.Color ); rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~RADEON_FOG_COLOR_MASK; @@ -495,7 +495,7 @@ static void radeonLineStipple( GLcontext *ctx, GLint factor, GLushort pattern ) r100ContextPtr rmesa = R100_CONTEXT(ctx); RADEON_STATECHANGE( rmesa, lin ); - rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = + rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = ((((GLuint)factor & 0xff) << 16) | ((GLuint)pattern)); } @@ -558,7 +558,7 @@ static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask ) /* FIXME: Use window x,y offsets into stipple RAM. */ stipple.mask = rmesa->state.stipple.mask; - drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_STIPPLE, + drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_STIPPLE, &stipple, sizeof(drm_radeon_stipple_t) ); UNLOCK_HARDWARE( &rmesa->radeon ); } @@ -569,7 +569,7 @@ static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode ) GLboolean flag = (ctx->_TriangleCaps & DD_TRI_UNFILLED) != 0; /* Can't generally do unfilled via tcl, but some good special - * cases work. + * cases work. */ TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_UNFILLED, flag); if (rmesa->radeon.TclFallback) { @@ -617,7 +617,7 @@ static void radeonUpdateSpecular( GLcontext *ctx ) rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE; rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE; p |= RADEON_SPECULAR_ENABLE; - rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_DIFFUSE_SPECULAR_COMBINE; } else if (ctx->Light.Enabled) { @@ -647,7 +647,7 @@ static void radeonUpdateSpecular( GLcontext *ctx ) RADEON_TCL_COMPUTE_SPECULAR) != 0; } } - + TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_FOGCOORDSPEC, flag); if (NEED_SECONDARY_COLOR(ctx)) { @@ -663,7 +663,7 @@ static void radeonUpdateSpecular( GLcontext *ctx ) /* Update vertex/render formats */ - if (rmesa->radeon.TclFallback) { + if (rmesa->radeon.TclFallback) { radeonChooseRenderState( ctx ); radeonChooseVertexState( ctx ); } @@ -675,7 +675,7 @@ static void radeonUpdateSpecular( GLcontext *ctx ) */ -/* Update on colormaterial, material emmissive/ambient, +/* Update on colormaterial, material emmissive/ambient, * lightmodel.globalambient */ static void update_global_ambient( GLcontext *ctx ) @@ -688,23 +688,23 @@ static void update_global_ambient( GLcontext *ctx ) */ if ((rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] & ((3 << RADEON_EMISSIVE_SOURCE_SHIFT) | - (3 << RADEON_AMBIENT_SOURCE_SHIFT))) == 0) + (3 << RADEON_AMBIENT_SOURCE_SHIFT))) == 0) { - COPY_3V( &fcmd[GLT_RED], + COPY_3V( &fcmd[GLT_RED], ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_EMISSION]); ACC_SCALE_3V( &fcmd[GLT_RED], ctx->Light.Model.Ambient, ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_AMBIENT]); - } + } else { COPY_3V( &fcmd[GLT_RED], ctx->Light.Model.Ambient ); } - + RADEON_DB_STATECHANGE(rmesa, &rmesa->hw.glt); } -/* Update on change to +/* Update on change to * - light[p].colors * - light[p].enabled */ @@ -718,10 +718,10 @@ static void update_light_colors( GLcontext *ctx, GLuint p ) r100ContextPtr rmesa = R100_CONTEXT(ctx); float *fcmd = (float *)RADEON_DB_STATE( lit[p] ); - COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient ); + COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient ); COPY_4V( &fcmd[LIT_DIFFUSE_RED], l->Diffuse ); COPY_4V( &fcmd[LIT_SPECULAR_RED], l->Specular ); - + RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] ); } } @@ -735,7 +735,7 @@ static void check_twoside_fallback( GLcontext *ctx ) if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) { if (ctx->Light.ColorMaterialEnabled && - (ctx->Light.ColorMaterialBitmask & BACK_MATERIAL_BITS) != + (ctx->Light.ColorMaterialBitmask & BACK_MATERIAL_BITS) != ((ctx->Light.ColorMaterialBitmask & FRONT_MATERIAL_BITS)<<1)) fallback = GL_TRUE; else { @@ -743,7 +743,7 @@ static void check_twoside_fallback( GLcontext *ctx ) if (memcmp( ctx->Light.Material.Attrib[i], ctx->Light.Material.Attrib[i+1], sizeof(GLfloat)*4) != 0) { - fallback = GL_TRUE; + fallback = GL_TRUE; break; } } @@ -761,8 +761,8 @@ static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode ) light_model_ctl1 &= ~((3 << RADEON_EMISSIVE_SOURCE_SHIFT) | (3 << RADEON_AMBIENT_SOURCE_SHIFT) | (3 << RADEON_DIFFUSE_SOURCE_SHIFT) | - (3 << RADEON_SPECULAR_SOURCE_SHIFT)); - + (3 << RADEON_SPECULAR_SOURCE_SHIFT)); + if (ctx->Light.ColorMaterialEnabled) { GLuint mask = ctx->Light.ColorMaterialBitmask; @@ -783,7 +783,7 @@ static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode ) light_model_ctl1 |= (RADEON_LM_SOURCE_STATE_MULT << RADEON_AMBIENT_SOURCE_SHIFT); } - + if (mask & MAT_BIT_FRONT_DIFFUSE) { light_model_ctl1 |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE << RADEON_DIFFUSE_SOURCE_SHIFT); @@ -792,7 +792,7 @@ static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode ) light_model_ctl1 |= (RADEON_LM_SOURCE_STATE_MULT << RADEON_DIFFUSE_SOURCE_SHIFT); } - + if (mask & MAT_BIT_FRONT_SPECULAR) { light_model_ctl1 |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE << RADEON_SPECULAR_SOURCE_SHIFT); @@ -810,10 +810,10 @@ static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode ) (RADEON_LM_SOURCE_STATE_MULT << RADEON_DIFFUSE_SOURCE_SHIFT) | (RADEON_LM_SOURCE_STATE_MULT << RADEON_SPECULAR_SOURCE_SHIFT); } - + if (light_model_ctl1 != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]) { RADEON_STATECHANGE( rmesa, tcl ); - rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = light_model_ctl1; + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = light_model_ctl1; } } @@ -823,14 +823,14 @@ void radeonUpdateMaterial( GLcontext *ctx ) GLfloat (*mat)[4] = ctx->Light.Material.Attrib; GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( mtl ); GLuint mask = ~0; - + if (ctx->Light.ColorMaterialEnabled) mask &= ~ctx->Light.ColorMaterialBitmask; if (RADEON_DEBUG & DEBUG_STATE) fprintf(stderr, "%s\n", __FUNCTION__); - + if (mask & MAT_BIT_FRONT_EMISSION) { fcmd[MTL_EMMISSIVE_RED] = mat[MAT_ATTRIB_FRONT_EMISSION][0]; fcmd[MTL_EMMISSIVE_GREEN] = mat[MAT_ATTRIB_FRONT_EMISSION][1]; @@ -880,7 +880,7 @@ void radeonUpdateMaterial( GLcontext *ctx ) * * which are calculated in light.c and are correct for the current * lighting space (model or eye), hence dependencies on _NEW_MODELVIEW - * and _MESA_NEW_NEED_EYE_COORDS. + * and _MESA_NEW_NEED_EYE_COORDS. */ static void update_light( GLcontext *ctx ) { @@ -897,12 +897,12 @@ static void update_light( GLcontext *ctx ) tmp &= ~RADEON_LIGHT_IN_MODELSPACE; else tmp |= RADEON_LIGHT_IN_MODELSPACE; - + /* Leave this test disabled: (unexplained q3 lockup) (even with new packets) */ - if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]) + if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]) { RADEON_STATECHANGE( rmesa, tcl ); rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = tmp; @@ -926,10 +926,10 @@ static void update_light( GLcontext *ctx ) if (ctx->Light.Light[p].Enabled) { struct gl_light *l = &ctx->Light.Light[p]; GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( lit[p] ); - + if (l->EyePosition[3] == 0.0) { - COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm ); - COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm ); + COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm ); + COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm ); fcmd[LIT_POSITION_W] = 0; fcmd[LIT_DIRECTION_W] = 0; } else { @@ -953,26 +953,26 @@ static void radeonLightfv( GLcontext *ctx, GLenum light, GLint p = light - GL_LIGHT0; struct gl_light *l = &ctx->Light.Light[p]; GLfloat *fcmd = (GLfloat *)rmesa->hw.lit[p].cmd; - + switch (pname) { - case GL_AMBIENT: + case GL_AMBIENT: case GL_DIFFUSE: case GL_SPECULAR: update_light_colors( ctx, p ); break; - case GL_SPOT_DIRECTION: - /* picked up in update_light */ + case GL_SPOT_DIRECTION: + /* picked up in update_light */ break; case GL_POSITION: { - /* positions picked up in update_light, but can do flag here */ + /* positions picked up in update_light, but can do flag here */ GLuint flag; GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2; /* FIXME: Set RANGE_ATTEN only when needed */ - if (p&1) + if (p&1) flag = RADEON_LIGHT_1_IS_LOCAL; else flag = RADEON_LIGHT_0_IS_LOCAL; @@ -1064,7 +1064,7 @@ static void radeonLightfv( GLcontext *ctx, GLenum light, } } - + static void radeonLightModelfv( GLcontext *ctx, GLenum pname, @@ -1073,7 +1073,7 @@ static void radeonLightModelfv( GLcontext *ctx, GLenum pname, r100ContextPtr rmesa = R100_CONTEXT(ctx); switch (pname) { - case GL_LIGHT_MODEL_AMBIENT: + case GL_LIGHT_MODEL_AMBIENT: update_global_ambient( ctx ); break; @@ -1247,14 +1247,14 @@ static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail, /* radeon 7200 have stencil bug, DEC and INC_WRAP will actually both do DEC_WRAP, and DEC_WRAP (and INVERT) will do INVERT. No way to get correct INC_WRAP and DEC, but DEC_WRAP can be fixed by using DEC and INC_WRAP at least use INC. */ - + GLuint tempRADEON_STENCIL_FAIL_DEC_WRAP; GLuint tempRADEON_STENCIL_FAIL_INC_WRAP; GLuint tempRADEON_STENCIL_ZFAIL_DEC_WRAP; GLuint tempRADEON_STENCIL_ZFAIL_INC_WRAP; GLuint tempRADEON_STENCIL_ZPASS_DEC_WRAP; GLuint tempRADEON_STENCIL_ZPASS_INC_WRAP; - + if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_BROKEN_STENCIL) { tempRADEON_STENCIL_FAIL_DEC_WRAP = RADEON_STENCIL_FAIL_DEC; tempRADEON_STENCIL_FAIL_INC_WRAP = RADEON_STENCIL_FAIL_INC; @@ -1271,7 +1271,7 @@ static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail, tempRADEON_STENCIL_ZPASS_DEC_WRAP = RADEON_STENCIL_ZPASS_DEC_WRAP; tempRADEON_STENCIL_ZPASS_INC_WRAP = RADEON_STENCIL_ZPASS_INC_WRAP; } - + RADEON_STATECHANGE( rmesa, ctx ); rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~(RADEON_STENCIL_FAIL_MASK | RADEON_STENCIL_ZFAIL_MASK | @@ -1363,7 +1363,7 @@ static void radeonClearStencil( GLcontext *ctx, GLint s ) { r100ContextPtr rmesa = R100_CONTEXT(ctx); - rmesa->radeon.state.stencil.clear = + rmesa->radeon.state.stencil.clear = ((GLuint) (ctx->Stencil.Clear & 0xff) | (0xff << RADEON_STENCIL_MASK_SHIFT) | ((ctx->Stencil.WriteMask[0] & 0xff) << RADEON_STENCIL_WRITEMASK_SHIFT)); @@ -1388,7 +1388,7 @@ static void radeonClearStencil( GLcontext *ctx, GLint s ) void radeonUpdateWindow( GLcontext *ctx ) { r100ContextPtr rmesa = R100_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable; + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0; GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0; const GLfloat *v = ctx->Viewport._WindowMap.m; @@ -1443,7 +1443,7 @@ static void radeonDepthRange( GLcontext *ctx, GLclampd nearval, void radeonUpdateViewportOffset( GLcontext *ctx ) { r100ContextPtr rmesa = R100_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable; + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); GLfloat xoffset = (GLfloat)dPriv->x; GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h; const GLfloat *v = ctx->Viewport._WindowMap.m; @@ -1473,8 +1473,8 @@ void radeonUpdateViewportOffset( GLcontext *ctx ) RADEON_STIPPLE_Y_OFFSET_MASK); /* add magic offsets, then invert */ - stx = 31 - ((rmesa->radeon.dri.drawable->x - 1) & RADEON_STIPPLE_COORD_MASK); - sty = 31 - ((rmesa->radeon.dri.drawable->y + rmesa->radeon.dri.drawable->h - 1) + stx = 31 - ((dPriv->x - 1) & RADEON_STIPPLE_COORD_MASK); + sty = 31 - ((dPriv->y + dPriv->h - 1) & RADEON_STIPPLE_COORD_MASK); m |= ((stx << RADEON_STIPPLE_X_OFFSET_SHIFT) | @@ -1613,7 +1613,7 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state ) case GL_CLIP_PLANE2: case GL_CLIP_PLANE3: case GL_CLIP_PLANE4: - case GL_CLIP_PLANE5: + case GL_CLIP_PLANE5: p = cap-GL_CLIP_PLANE0; RADEON_STATECHANGE( rmesa, tcl ); if (state) { @@ -1678,13 +1678,13 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state ) case GL_LIGHT7: RADEON_STATECHANGE(rmesa, tcl); p = cap - GL_LIGHT0; - if (p&1) + if (p&1) flag = (RADEON_LIGHT_1_ENABLE | - RADEON_LIGHT_1_ENABLE_AMBIENT | + RADEON_LIGHT_1_ENABLE_AMBIENT | RADEON_LIGHT_1_ENABLE_SPECULAR); else flag = (RADEON_LIGHT_0_ENABLE | - RADEON_LIGHT_0_ENABLE_AMBIENT | + RADEON_LIGHT_0_ENABLE_AMBIENT | RADEON_LIGHT_0_ENABLE_SPECULAR); if (state) @@ -1692,7 +1692,7 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state ) else rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] &= ~flag; - /* + /* */ update_light_colors( ctx, p ); break; @@ -1730,7 +1730,7 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state ) rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE; } break; - + case GL_NORMALIZE: RADEON_STATECHANGE( rmesa, tcl ); if ( state ) { @@ -1830,7 +1830,7 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state ) case GL_TEXTURE_GEN_T: /* Picked up in radeonUpdateTextureState. */ - rmesa->recheck_texgen[ctx->Texture.CurrentUnit] = GL_TRUE; + rmesa->recheck_texgen[ctx->Texture.CurrentUnit] = GL_TRUE; break; case GL_COLOR_SUM_EXT: @@ -1864,7 +1864,7 @@ static void radeonLightingSpaceChange( GLcontext *ctx ) rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_RESCALE_NORMALS; } - if (RADEON_DEBUG & DEBUG_STATE) + if (RADEON_DEBUG & DEBUG_STATE) fprintf(stderr, "%s %d AFTER %x\n", __FUNCTION__, ctx->_NeedEyeCoords, rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]); } @@ -2051,7 +2051,7 @@ static GLboolean r100ValidateBuffers(GLcontext *ctx) int i; radeon_validate_reset_bos(&rmesa->radeon); - + rrb = radeon_get_colorbuffer(&rmesa->radeon); /* color buffer */ if (rrb && rrb->bo) { @@ -2069,7 +2069,7 @@ static GLboolean r100ValidateBuffers(GLcontext *ctx) for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) { radeonTexObj *t; - + if (!ctx->Texture.Unit[i]._ReallyEnabled) continue; @@ -2094,7 +2094,7 @@ GLboolean radeonValidateState( GLcontext *ctx ) r100ContextPtr rmesa = R100_CONTEXT(ctx); GLuint new_state = rmesa->radeon.NewGLState; - if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) { + if (new_state & _NEW_BUFFERS) { _mesa_update_framebuffer(ctx); /* this updates the DrawBuffer's Width/Height if it's a FBO */ _mesa_update_draw_buffer_bounds(ctx); @@ -2112,7 +2112,7 @@ GLboolean radeonValidateState( GLcontext *ctx ) /* Need an event driven matrix update? */ - if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION)) + if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION)) upload_matrix( rmesa, ctx->_ModelProjectMatrix.m, MODEL_PROJ ); /* Need these for lighting (shouldn't upload otherwise) @@ -2136,7 +2136,7 @@ GLboolean radeonValidateState( GLcontext *ctx ) /* emit all active clip planes if projection matrix changes. */ if (new_state & (_NEW_PROJECTION)) { - if (ctx->Transform.ClipPlanesEnabled) + if (ctx->Transform.ClipPlanesEnabled) radeonUpdateClipPlanes( ctx ); } @@ -2165,8 +2165,8 @@ static GLboolean check_material( GLcontext *ctx ) TNLcontext *tnl = TNL_CONTEXT(ctx); GLint i; - for (i = _TNL_ATTRIB_MAT_FRONT_AMBIENT; - i < _TNL_ATTRIB_MAT_BACK_INDEXES; + for (i = _TNL_ATTRIB_MAT_FRONT_AMBIENT; + i < _TNL_ATTRIB_MAT_BACK_INDEXES; i++) if (tnl->vb.AttribPtr[i] && tnl->vb.AttribPtr[i]->stride) @@ -2174,7 +2174,7 @@ static GLboolean check_material( GLcontext *ctx ) return GL_FALSE; } - + static void radeonWrapRunPipeline( GLcontext *ctx ) { @@ -2197,7 +2197,7 @@ static void radeonWrapRunPipeline( GLcontext *ctx ) } /* Run the pipeline. - */ + */ _tnl_run_pipeline( ctx ); if (has_material) { diff --git a/src/mesa/drivers/dri/radeon/radeon_state_init.c b/src/mesa/drivers/dri/radeon/radeon_state_init.c index 174a7e1862f..f5d4189d669 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state_init.c +++ b/src/mesa/drivers/dri/radeon/radeon_state_init.c @@ -448,16 +448,28 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) // } END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH(CP_PACKET0(RADEON_RE_TOP_LEFT, 0)); + OUT_BATCH(0); + OUT_BATCH(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0)); + if (rrb) { + OUT_BATCH(((rrb->width - 1) << RADEON_RE_WIDTH_SHIFT) | + ((rrb->height - 1) << RADEON_RE_HEIGHT_SHIFT)); + } else { + OUT_BATCH(0); + } + END_BATCH(); } static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom) { r100ContextPtr r100 = R100_CONTEXT(ctx); BATCH_LOCALS(&r100->radeon); - uint32_t dwords = atom->cmd_size; + uint32_t dwords = 2; int i = atom->idx, j; radeonTexObj *t = r100->state.texture.unit[i].texobj; radeon_mipmap_level *lvl; + uint32_t base_reg; if (!(ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT)) return; @@ -468,10 +480,17 @@ static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom) if (!t->mt) return; - BEGIN_BATCH_NO_AUTOSTATE(dwords + 10); - OUT_BATCH_TABLE(atom->cmd, 3); + switch(i) { + case 1: base_reg = RADEON_PP_CUBIC_OFFSET_T1_0; break; + case 2: base_reg = RADEON_PP_CUBIC_OFFSET_T2_0; break; + default: + case 0: base_reg = RADEON_PP_CUBIC_OFFSET_T0_0; break; + }; + BEGIN_BATCH_NO_AUTOSTATE(dwords + (5 * 4)); + OUT_BATCH_TABLE(atom->cmd, 2); lvl = &t->mt->levels[0]; for (j = 0; j < 5; j++) { + OUT_BATCH(CP_PACKET0(base_reg + (4 * (j-1)), 0)); OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset, RADEON_GEM_DOMAIN_VRAM, 0, 0); } diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c index e4df33766eb..279bcd4df6a 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texstate.c +++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c @@ -722,7 +722,10 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_ pitch_val = rb->pitch; switch (rb->cpp) { case 4: - t->pp_txformat = tx_table[MESA_FORMAT_ARGB8888].format; + if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT) + t->pp_txformat = tx_table[MESA_FORMAT_RGB888].format; + else + t->pp_txformat = tx_table[MESA_FORMAT_ARGB8888].format; t->pp_txfilter |= tx_table[MESA_FORMAT_ARGB8888].filter; break; case 3: diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c index 64029eeb91a..ee66fc2ea0d 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.c +++ b/src/mesa/drivers/dri/radeon/radeon_texture.c @@ -38,6 +38,7 @@ #include "main/texstore.h" #include "main/teximage.h" #include "main/texobj.h" +#include "main/texgetimage.h" #include "xmlpool.h" /* for symbolic values of enum-type options */ @@ -611,11 +612,10 @@ static void radeon_teximage( _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage"); } - } - - /* SGIS_generate_mipmap */ - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - radeon_generate_mipmap(ctx, texObj->Target, texObj); + /* SGIS_generate_mipmap */ + if (level == texObj->BaseLevel && texObj->GenerateMipmap) { + radeon_generate_mipmap(ctx, texObj->Target, texObj); + } } _mesa_unmap_teximage_pbo(ctx, packing); @@ -740,12 +740,12 @@ static void radeon_texsubimage(GLcontext* ctx, int dims, int level, _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage"); } + /* GL_SGIS_generate_mipmap */ + if (level == texObj->BaseLevel && texObj->GenerateMipmap) { + radeon_generate_mipmap(ctx, texObj->Target, texObj); + } } - /* GL_SGIS_generate_mipmap */ - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - radeon_generate_mipmap(ctx, texObj->Target, texObj); - } radeon_teximage_unmap(image); _mesa_unmap_teximage_pbo(ctx, packing); @@ -830,7 +830,7 @@ static void migrate_image_to_miptree(radeon_mipmap_tree *mt, radeon_texture_imag * In fact, that memcpy() could be done by the hardware in many * cases, provided that we have a proper memory manager. */ - radeon_mipmap_level *srclvl = &image->mt->levels[image->mtlevel]; + radeon_mipmap_level *srclvl = &image->mt->levels[image->mtlevel-image->mt->firstLevel]; assert(srclvl->size == dstlvl->size); assert(srclvl->rowstride == dstlvl->rowstride); diff --git a/src/mesa/drivers/osmesa/Makefile b/src/mesa/drivers/osmesa/Makefile index 3b3984200aa..92d41494665 100644 --- a/src/mesa/drivers/osmesa/Makefile +++ b/src/mesa/drivers/osmesa/Makefile @@ -19,11 +19,12 @@ INCLUDE_DIRS = \ -I$(TOP)/src/mesa \ -I$(TOP)/src/mesa/main +# Standalone osmesa needs to be linked with core Mesa APIs +ifeq ($(DRIVER_DIRS), osmesa) CORE_MESA = $(TOP)/src/mesa/libmesa.a $(TOP)/src/mesa/libglapi.a - - -.PHONY: osmesa8 -.PHONY: osmesa16 +else +CORE_MESA = +endif .c.o: @@ -31,31 +32,12 @@ CORE_MESA = $(TOP)/src/mesa/libmesa.a $(TOP)/src/mesa/libglapi.a default: $(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME) - @ if [ "${DRIVER_DIRS}" = "osmesa" ] ; then \ - $(MAKE) osmesa16 ; \ - else \ - $(MAKE) osmesa8 ; \ - fi - - - - -# The normal libOSMesa is used in conjuction with libGL -osmesa8: $(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME) - -$(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME): $(OBJECTS) - $(MKLIB) -o $(OSMESA_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \ - -major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \ - -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \ - -id $(INSTALL_LIB_DIR)/lib$(OSMESA_LIB).$(MESA_MAJOR).dylib \ - $(OSMESA_LIB_DEPS) $(OBJECTS) - - -# The libOSMesa16/libOSMesa32 libraries do not use libGL but rather are built -# with all the other Mesa sources (compiled with -DCHAN_BITS=16/32 -osmesa16: $(OBJECTS) $(CORE_MESA) +# libOSMesa can be used in conjuction with libGL or with all other Mesa +# sources. We can also build libOSMesa16/libOSMesa32 by setting +# -DCHAN_BITS=16/32. +$(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME): $(OBJECTS) $(CORE_MESA) $(MKLIB) -o $(OSMESA_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \ -major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \ -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \ diff --git a/src/mesa/drivers/windows/gdi/wmesa.c b/src/mesa/drivers/windows/gdi/wmesa.c index d1103f31670..e1971db6933 100644 --- a/src/mesa/drivers/windows/gdi/wmesa.c +++ b/src/mesa/drivers/windows/gdi/wmesa.c @@ -1679,80 +1679,3 @@ void WMesaShareLists(WMesaContext ctx_to_share, WMesaContext ctx) _mesa_share_state(&ctx->gl_ctx, &ctx_to_share->gl_ctx); } -/* This is hopefully a temporary hack to define some needed dispatch - * table entries. Hopefully, I'll find a better solution. The - * dispatch table generation scripts ought to be making these dummy - * stubs as well. */ -#if !defined(__MINGW32__) || !defined(GL_NO_STDCALL) -void gl_dispatch_stub_543(void){} -void gl_dispatch_stub_544(void){} -void gl_dispatch_stub_545(void){} -void gl_dispatch_stub_546(void){} -void gl_dispatch_stub_547(void){} -void gl_dispatch_stub_548(void){} -void gl_dispatch_stub_549(void){} -void gl_dispatch_stub_550(void){} -void gl_dispatch_stub_551(void){} -void gl_dispatch_stub_552(void){} -void gl_dispatch_stub_553(void){} -void gl_dispatch_stub_554(void){} -void gl_dispatch_stub_555(void){} -void gl_dispatch_stub_556(void){} -void gl_dispatch_stub_557(void){} -void gl_dispatch_stub_558(void){} -void gl_dispatch_stub_559(void){} -void gl_dispatch_stub_560(void){} -void gl_dispatch_stub_561(void){} -void gl_dispatch_stub_565(void){} -void gl_dispatch_stub_566(void){} -void gl_dispatch_stub_577(void){} -void gl_dispatch_stub_578(void){} -void gl_dispatch_stub_603(void){} -void gl_dispatch_stub_645(void){} -void gl_dispatch_stub_646(void){} -void gl_dispatch_stub_647(void){} -void gl_dispatch_stub_648(void){} -void gl_dispatch_stub_649(void){} -void gl_dispatch_stub_650(void){} -void gl_dispatch_stub_651(void){} -void gl_dispatch_stub_652(void){} -void gl_dispatch_stub_653(void){} -void gl_dispatch_stub_733(void){} -void gl_dispatch_stub_734(void){} -void gl_dispatch_stub_735(void){} -void gl_dispatch_stub_736(void){} -void gl_dispatch_stub_737(void){} -void gl_dispatch_stub_738(void){} -void gl_dispatch_stub_744(void){} -void gl_dispatch_stub_745(void){} -void gl_dispatch_stub_746(void){} -void gl_dispatch_stub_760(void){} -void gl_dispatch_stub_761(void){} -void gl_dispatch_stub_763(void){} -void gl_dispatch_stub_765(void){} -void gl_dispatch_stub_766(void){} -void gl_dispatch_stub_767(void){} -void gl_dispatch_stub_768(void){} - -void gl_dispatch_stub_562(void){} -void gl_dispatch_stub_563(void){} -void gl_dispatch_stub_564(void){} -void gl_dispatch_stub_567(void){} -void gl_dispatch_stub_568(void){} -void gl_dispatch_stub_569(void){} -void gl_dispatch_stub_580(void){} -void gl_dispatch_stub_581(void){} -void gl_dispatch_stub_606(void){} -void gl_dispatch_stub_654(void){} -void gl_dispatch_stub_655(void){} -void gl_dispatch_stub_656(void){} -void gl_dispatch_stub_739(void){} -void gl_dispatch_stub_740(void){} -void gl_dispatch_stub_741(void){} -void gl_dispatch_stub_748(void){} -void gl_dispatch_stub_749(void){} -void gl_dispatch_stub_769(void){} -void gl_dispatch_stub_770(void){} -void gl_dispatch_stub_771(void){} - -#endif diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c index 305df548fa2..9a01465bdf9 100644 --- a/src/mesa/drivers/x11/xm_dd.c +++ b/src/mesa/drivers/x11/xm_dd.c @@ -912,8 +912,9 @@ xmesa_update_state( GLcontext *ctx, GLbitfield new_state ) /* * GL_DITHER, GL_READ/DRAW_BUFFER, buffer binding state, etc. effect * renderbuffer span/clear funcs. + * Check _NEW_COLOR to detect dither enable/disable. */ - if (new_state & (_NEW_COLOR | _NEW_PIXEL | _NEW_BUFFERS)) { + if (new_state & (_NEW_COLOR | _NEW_BUFFERS)) { XMesaBuffer xmbuf = XMESA_BUFFER(ctx->DrawBuffer); struct xmesa_renderbuffer *front_xrb, *back_xrb; |