diff options
Diffstat (limited to 'src/mesa/drivers')
83 files changed, 3597 insertions, 874 deletions
diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c new file mode 100644 index 00000000000..e42beabc9ba --- /dev/null +++ b/src/mesa/drivers/common/meta.c @@ -0,0 +1,1249 @@ +/* + * Mesa 3-D graphics library + * Version: 7.6 + * + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * Meta operations. Some GL operations can be expressed in terms of + * other GL operations. For example, glBlitFramebuffer() can be done + * with texture mapping and glClear() can be done with polygon rendering. + * + * \author Brian Paul + */ + + +#include "main/glheader.h" +#include "main/mtypes.h" +#include "main/imports.h" +#include "main/arrayobj.h" +#include "main/blend.h" +#include "main/bufferobj.h" +#include "main/depth.h" +#include "main/enable.h" +#include "main/image.h" +#include "main/macros.h" +#include "main/matrix.h" +#include "main/polygon.h" +#include "main/scissor.h" +#include "main/shaders.h" +#include "main/stencil.h" +#include "main/texobj.h" +#include "main/texenv.h" +#include "main/teximage.h" +#include "main/texparam.h" +#include "main/texstate.h" +#include "main/varray.h" +#include "main/viewport.h" +#include "shader/program.h" +#include "swrast/swrast.h" +#include "drivers/common/meta.h" + + +/** + * State which we may save/restore across meta ops. + * XXX this may be incomplete... + */ +struct save_state +{ + GLbitfield SavedState; /**< bitmask of META_* flags */ + + /** META_ALPHA_TEST */ + GLboolean AlphaEnabled; + + /** META_BLEND */ + GLboolean BlendEnabled; + GLboolean ColorLogicOpEnabled; + + /** META_COLOR_MASK */ + GLubyte ColorMask[4]; + + /** META_DEPTH_TEST */ + struct gl_depthbuffer_attrib Depth; + + /** META_FOG */ + GLboolean Fog; + + /** META_PIXELSTORE */ + /* XXX / TO-DO */ + + /** META_RASTERIZATION */ + GLenum FrontPolygonMode, BackPolygonMode; + GLboolean PolygonOffset; + GLboolean PolygonSmooth; + GLboolean PolygonStipple; + GLboolean PolygonCull; + + /** META_SCISSOR */ + struct gl_scissor_attrib Scissor; + + /** META_SHADER */ + GLboolean VertexProgramEnabled; + struct gl_vertex_program *VertexProgram; + GLboolean FragmentProgramEnabled; + struct gl_fragment_program *FragmentProgram; + GLuint Shader; + + /** META_STENCIL_TEST */ + struct gl_stencil_attrib Stencil; + + /** META_TRANSFORM */ + GLenum MatrixMode; + GLfloat ModelviewMatrix[16]; + GLfloat ProjectionMatrix[16]; + GLfloat TextureMatrix[16]; + GLbitfield ClipPlanesEnabled; + + /** META_TEXTURE */ + GLuint ActiveUnit; + GLuint ClientActiveUnit; + /** for unit[0] only */ + struct gl_texture_object *CurrentTexture[NUM_TEXTURE_TARGETS]; + /** mask of TEXTURE_2D_BIT, etc */ + GLbitfield TexEnabled[MAX_TEXTURE_UNITS]; + GLbitfield TexGenEnabled[MAX_TEXTURE_UNITS]; + GLuint EnvMode; /* unit[0] only */ + + /** META_VERTEX */ + struct gl_array_object *ArrayObj; + struct gl_buffer_object *ArrayBufferObj; + + /** META_VIEWPORT */ + GLint ViewportX, ViewportY, ViewportW, ViewportH; + GLclampd DepthNear, DepthFar; + + /** Miscellaneous (always disabled) */ + GLboolean Lighting; +}; + + +/** + * State for glBlitFramebufer() + */ +struct blit_state +{ + GLuint TexObj; + GLsizei TexWidth, TexHeight; + GLenum TexType; + GLuint ArrayObj; + GLuint VBO; + GLfloat verts[4][4]; /** four verts of X,Y,S,T */ +}; + + +/** + * State for glClear() + */ +struct clear_state +{ + GLuint ArrayObj; + GLuint VBO; + GLfloat verts[4][7]; /** four verts of X,Y,Z,R,G,B,A */ +}; + + +/** + * State for glCopyPixels() + */ +struct copypix_state +{ + GLuint TexObj; + GLsizei TexWidth, TexHeight; + GLenum TexType; + GLuint ArrayObj; + GLuint VBO; + GLfloat verts[4][5]; /** four verts of X,Y,Z,S,T */ +}; + + +/** + * State for glDrawPixels() + */ +struct drawpix_state +{ + GLuint TexObj; + GLsizei TexWidth, TexHeight; + GLenum TexIntFormat; + GLuint ArrayObj; + GLuint VBO; + GLfloat verts[4][5]; /** four verts of X,Y,Z,S,T */ +}; + + + +/** + * All per-context meta state. + */ +struct gl_meta_state +{ + struct save_state Save; /**< state saved during meta-ops */ + + struct blit_state Blit; /**< For _mesa_meta_blit_framebuffer() */ + struct clear_state Clear; /**< For _mesa_meta_clear() */ + struct copypix_state CopyPix; /**< For _mesa_meta_copy_pixels() */ + struct drawpix_state DrawPix; /**< For _mesa_meta_draw_pixels() */ + + /* other possible meta-ops: + * glDrawPixels() + * glBitmap() + * glGenerateMipmap() + */ +}; + + +/** + * Initialize meta-ops for a context. + * To be called once during context creation. + */ +void +_mesa_meta_init(GLcontext *ctx) +{ + ASSERT(!ctx->Meta); + + ctx->Meta = CALLOC_STRUCT(gl_meta_state); +} + + +/** + * Free context meta-op state. + * To be called once during context destruction. + */ +void +_mesa_meta_free(GLcontext *ctx) +{ + struct gl_meta_state *meta = ctx->Meta; + + if (meta->Blit.TexObj) { + _mesa_DeleteTextures(1, &meta->Blit.TexObj); + _mesa_DeleteBuffersARB(1, & meta->Blit.VBO); + _mesa_DeleteVertexArraysAPPLE(1, &meta->Blit.ArrayObj); + } + + if (meta->Clear.VBO) { + _mesa_DeleteBuffersARB(1, & meta->Clear.VBO); + _mesa_DeleteVertexArraysAPPLE(1, &meta->Clear.ArrayObj); + } + + if (meta->CopyPix.TexObj) { + _mesa_DeleteTextures(1, &meta->CopyPix.TexObj); + _mesa_DeleteBuffersARB(1, & meta->CopyPix.VBO); + _mesa_DeleteVertexArraysAPPLE(1, &meta->CopyPix.ArrayObj); + } + + if (meta->DrawPix.TexObj) { + _mesa_DeleteTextures(1, &meta->DrawPix.TexObj); + _mesa_DeleteBuffersARB(1, & meta->DrawPix.VBO); + _mesa_DeleteVertexArraysAPPLE(1, &meta->DrawPix.ArrayObj); + } + + _mesa_free(ctx->Meta); + ctx->Meta = NULL; +} + + +/** + * Enter meta state. This is like a light-weight version of glPushAttrib + * but it also resets most GL state back to default values. + * + * \param state bitmask of META_* flags indicating which attribute groups + * to save and reset to their defaults + */ +static void +_mesa_meta_begin(GLcontext *ctx, GLbitfield state) +{ + struct save_state *save = &ctx->Meta->Save; + + save->SavedState = state; + + if (state & META_ALPHA_TEST) { + save->AlphaEnabled = ctx->Color.AlphaEnabled; + if (ctx->Color.AlphaEnabled) + _mesa_Disable(GL_ALPHA_TEST); + } + + if (state & META_BLEND) { + save->BlendEnabled = ctx->Color.BlendEnabled; + if (ctx->Color.BlendEnabled) + _mesa_Disable(GL_BLEND); + save->ColorLogicOpEnabled = ctx->Color.ColorLogicOpEnabled; + if (ctx->Color.ColorLogicOpEnabled) + _mesa_Disable(GL_COLOR_LOGIC_OP); + } + + if (state & META_COLOR_MASK) { + COPY_4V(save->ColorMask, ctx->Color.ColorMask); + if (!ctx->Color.ColorMask[0] || + !ctx->Color.ColorMask[1] || + !ctx->Color.ColorMask[2] || + !ctx->Color.ColorMask[3]) + _mesa_ColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + } + + if (state & META_DEPTH_TEST) { + save->Depth = ctx->Depth; /* struct copy */ + if (ctx->Depth.Test) + _mesa_Disable(GL_DEPTH_TEST); + } + + if (state & META_FOG) { + save->Fog = ctx->Fog.Enabled; + if (ctx->Fog.Enabled) + _mesa_set_enable(ctx, GL_FOG, GL_FALSE); + } + + if (state & META_RASTERIZATION) { + save->FrontPolygonMode = ctx->Polygon.FrontMode; + save->BackPolygonMode = ctx->Polygon.BackMode; + save->PolygonOffset = ctx->Polygon.OffsetFill; + save->PolygonSmooth = ctx->Polygon.SmoothFlag; + save->PolygonStipple = ctx->Polygon.StippleFlag; + save->PolygonCull = ctx->Polygon.CullFlag; + _mesa_PolygonMode(GL_FRONT_AND_BACK, GL_FILL); + _mesa_set_enable(ctx, GL_POLYGON_OFFSET_FILL, GL_FALSE); + _mesa_set_enable(ctx, GL_POLYGON_SMOOTH, GL_FALSE); + _mesa_set_enable(ctx, GL_POLYGON_STIPPLE, GL_FALSE); + _mesa_set_enable(ctx, GL_CULL_FACE, GL_FALSE); + } + + if (state & META_SCISSOR) { + save->Scissor = ctx->Scissor; /* struct copy */ + } + + if (state & META_SHADER) { + if (ctx->Extensions.ARB_vertex_program) { + save->VertexProgramEnabled = ctx->VertexProgram.Enabled; + save->VertexProgram = ctx->VertexProgram.Current; + _mesa_set_enable(ctx, GL_VERTEX_PROGRAM_ARB, GL_FALSE); + } + + if (ctx->Extensions.ARB_fragment_program) { + save->FragmentProgramEnabled = ctx->FragmentProgram.Enabled; + save->FragmentProgram = ctx->FragmentProgram.Current; + _mesa_set_enable(ctx, GL_FRAGMENT_PROGRAM_ARB, GL_FALSE); + } + + if (ctx->Extensions.ARB_shader_objects) { + save->Shader = ctx->Shader.CurrentProgram ? + ctx->Shader.CurrentProgram->Name : 0; + _mesa_UseProgramObjectARB(0); + } + } + + if (state & META_STENCIL_TEST) { + save->Stencil = ctx->Stencil; /* struct copy */ + if (ctx->Stencil.Enabled) + _mesa_Disable(GL_STENCIL_TEST); + /* NOTE: other stencil state not reset */ + } + + if (state & META_TEXTURE) { + GLuint u, tgt; + + save->ActiveUnit = ctx->Texture.CurrentUnit; + save->ClientActiveUnit = ctx->Array.ActiveTexture; + save->EnvMode = ctx->Texture.Unit[0].EnvMode; + + /* Disable all texture units */ + for (u = 0; u < ctx->Const.MaxTextureUnits; u++) { + save->TexEnabled[u] = ctx->Texture.Unit[u].Enabled; + save->TexGenEnabled[u] = ctx->Texture.Unit[u].TexGenEnabled; + if (ctx->Texture.Unit[u].Enabled || + ctx->Texture.Unit[u].TexGenEnabled) { + _mesa_ActiveTextureARB(GL_TEXTURE0 + u); + _mesa_set_enable(ctx, GL_TEXTURE_1D, GL_FALSE); + _mesa_set_enable(ctx, GL_TEXTURE_2D, GL_FALSE); + _mesa_set_enable(ctx, GL_TEXTURE_3D, GL_FALSE); + _mesa_set_enable(ctx, GL_TEXTURE_CUBE_MAP, GL_FALSE); + _mesa_set_enable(ctx, GL_TEXTURE_RECTANGLE, GL_FALSE); + _mesa_set_enable(ctx, GL_TEXTURE_GEN_S, GL_FALSE); + _mesa_set_enable(ctx, GL_TEXTURE_GEN_T, GL_FALSE); + _mesa_set_enable(ctx, GL_TEXTURE_GEN_R, GL_FALSE); + _mesa_set_enable(ctx, GL_TEXTURE_GEN_Q, GL_FALSE); + } + } + + /* save current texture objects for unit[0] only */ + for (tgt = 0; tgt < NUM_TEXTURE_TARGETS; tgt++) { + _mesa_reference_texobj(&save->CurrentTexture[tgt], + ctx->Texture.Unit[0].CurrentTex[tgt]); + } + + /* set defaults for unit[0] */ + _mesa_ActiveTextureARB(GL_TEXTURE0); + _mesa_ClientActiveTextureARB(GL_TEXTURE0); + _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); + } + + if (state & META_TRANSFORM) { + GLuint activeTexture = ctx->Texture.CurrentUnit; + _mesa_memcpy(save->ModelviewMatrix, ctx->ModelviewMatrixStack.Top->m, + 16 * sizeof(GLfloat)); + _mesa_memcpy(save->ProjectionMatrix, ctx->ProjectionMatrixStack.Top->m, + 16 * sizeof(GLfloat)); + _mesa_memcpy(save->TextureMatrix, ctx->TextureMatrixStack[0].Top->m, + 16 * sizeof(GLfloat)); + save->MatrixMode = ctx->Transform.MatrixMode; + /* set 1:1 vertex:pixel coordinate transform */ + _mesa_ActiveTextureARB(GL_TEXTURE0); + _mesa_MatrixMode(GL_TEXTURE); + _mesa_LoadIdentity(); + _mesa_ActiveTextureARB(GL_TEXTURE0 + activeTexture); + _mesa_MatrixMode(GL_MODELVIEW); + _mesa_LoadIdentity(); + _mesa_MatrixMode(GL_PROJECTION); + _mesa_LoadIdentity(); + _mesa_Ortho(0.0F, ctx->DrawBuffer->Width, + 0.0F, ctx->DrawBuffer->Height, + -1.0F, 1.0F); + save->ClipPlanesEnabled = ctx->Transform.ClipPlanesEnabled; + if (ctx->Transform.ClipPlanesEnabled) { + GLuint i; + for (i = 0; i < ctx->Const.MaxClipPlanes; i++) { + _mesa_set_enable(ctx, GL_CLIP_PLANE0 + i, GL_FALSE); + } + } + } + + if (state & META_VERTEX) { + /* save vertex array object state */ + _mesa_reference_array_object(ctx, &save->ArrayObj, + ctx->Array.ArrayObj); + _mesa_reference_buffer_object(ctx, &save->ArrayBufferObj, + ctx->Array.ArrayBufferObj); + /* set some default state? */ + } + + if (state & META_VIEWPORT) { + save->ViewportX = ctx->Viewport.X; + save->ViewportY = ctx->Viewport.Y; + save->ViewportW = ctx->Viewport.Width; + save->ViewportH = ctx->Viewport.Height; + _mesa_Viewport(0, 0, ctx->DrawBuffer->Width, ctx->DrawBuffer->Height); + save->DepthNear = ctx->Viewport.Near; + save->DepthFar = ctx->Viewport.Far; + _mesa_DepthRange(0.0, 1.0); + } + + /* misc */ + { + save->Lighting = ctx->Light.Enabled; + if (ctx->Light.Enabled) + _mesa_set_enable(ctx, GL_LIGHTING, GL_FALSE); + } +} + + +/** + * Leave meta state. This is like a light-weight version of glPopAttrib(). + */ +static void +_mesa_meta_end(GLcontext *ctx) +{ + struct save_state *save = &ctx->Meta->Save; + const GLbitfield state = save->SavedState; + + if (state & META_ALPHA_TEST) { + if (ctx->Color.AlphaEnabled != save->AlphaEnabled) + _mesa_set_enable(ctx, GL_ALPHA_TEST, save->AlphaEnabled); + } + + if (state & META_BLEND) { + if (ctx->Color.BlendEnabled != save->BlendEnabled) + _mesa_set_enable(ctx, GL_BLEND, save->BlendEnabled); + if (ctx->Color.ColorLogicOpEnabled != save->ColorLogicOpEnabled) + _mesa_set_enable(ctx, GL_COLOR_LOGIC_OP, save->ColorLogicOpEnabled); + } + + if (state & META_COLOR_MASK) { + if (!TEST_EQ_4V(ctx->Color.ColorMask, save->ColorMask)) + _mesa_ColorMask(save->ColorMask[0], save->ColorMask[1], + save->ColorMask[2], save->ColorMask[3]); + } + + if (state & META_DEPTH_TEST) { + if (ctx->Depth.Test != save->Depth.Test) + _mesa_set_enable(ctx, GL_DEPTH_TEST, save->Depth.Test); + _mesa_DepthFunc(save->Depth.Func); + _mesa_DepthMask(save->Depth.Mask); + } + + if (state & META_FOG) { + _mesa_set_enable(ctx, GL_FOG, save->Fog); + } + + if (state & META_RASTERIZATION) { + _mesa_PolygonMode(GL_FRONT, save->FrontPolygonMode); + _mesa_PolygonMode(GL_BACK, save->BackPolygonMode); + _mesa_set_enable(ctx, GL_POLYGON_STIPPLE, save->PolygonStipple); + _mesa_set_enable(ctx, GL_POLYGON_OFFSET_FILL, save->PolygonOffset); + _mesa_set_enable(ctx, GL_POLYGON_SMOOTH, save->PolygonSmooth); + _mesa_set_enable(ctx, GL_CULL_FACE, save->PolygonCull); + } + + if (state & META_SCISSOR) { + _mesa_set_enable(ctx, GL_SCISSOR_TEST, save->Scissor.Enabled); + _mesa_Scissor(save->Scissor.X, save->Scissor.Y, + save->Scissor.Width, save->Scissor.Height); + } + + if (state & META_SHADER) { + if (ctx->Extensions.ARB_vertex_program) { + _mesa_set_enable(ctx, GL_VERTEX_PROGRAM_ARB, + save->VertexProgramEnabled); + _mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current, + save->VertexProgram); + } + + if (ctx->Extensions.ARB_fragment_program) { + _mesa_set_enable(ctx, GL_FRAGMENT_PROGRAM_ARB, + save->FragmentProgramEnabled); + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, + save->FragmentProgram); + } + + if (ctx->Extensions.ARB_shader_objects) { + _mesa_UseProgramObjectARB(save->Shader); + } + } + + if (state & META_STENCIL_TEST) { + const struct gl_stencil_attrib *stencil = &save->Stencil; + + _mesa_set_enable(ctx, GL_STENCIL_TEST, stencil->Enabled); + _mesa_ClearStencil(stencil->Clear); + if (ctx->Extensions.EXT_stencil_two_side) { + _mesa_set_enable(ctx, GL_STENCIL_TEST_TWO_SIDE_EXT, + stencil->TestTwoSide); + _mesa_ActiveStencilFaceEXT(stencil->ActiveFace + ? GL_BACK : GL_FRONT); + } + /* front state */ + _mesa_StencilFuncSeparate(GL_FRONT, + stencil->Function[0], + stencil->Ref[0], + stencil->ValueMask[0]); + _mesa_StencilMaskSeparate(GL_FRONT, stencil->WriteMask[0]); + _mesa_StencilOpSeparate(GL_FRONT, stencil->FailFunc[0], + stencil->ZFailFunc[0], + stencil->ZPassFunc[0]); + /* back state */ + _mesa_StencilFuncSeparate(GL_BACK, + stencil->Function[1], + stencil->Ref[1], + stencil->ValueMask[1]); + _mesa_StencilMaskSeparate(GL_BACK, stencil->WriteMask[1]); + _mesa_StencilOpSeparate(GL_BACK, stencil->FailFunc[1], + stencil->ZFailFunc[1], + stencil->ZPassFunc[1]); + } + + if (state & META_TEXTURE) { + GLuint u, tgt; + + ASSERT(ctx->Texture.CurrentUnit == 0); + + /* restore texenv for unit[0] */ + _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, save->EnvMode); + + /* restore texture objects for unit[0] only */ + for (tgt = 0; tgt < NUM_TEXTURE_TARGETS; tgt++) { + _mesa_reference_texobj(&ctx->Texture.Unit[0].CurrentTex[tgt], + save->CurrentTexture[tgt]); + } + + /* Re-enable textures, texgen */ + for (u = 0; u < ctx->Const.MaxTextureUnits; u++) { + if (save->TexEnabled[u]) { + _mesa_ActiveTextureARB(GL_TEXTURE0 + u); + + if (save->TexEnabled[u] & TEXTURE_1D_BIT) + _mesa_set_enable(ctx, GL_TEXTURE_1D, GL_TRUE); + if (save->TexEnabled[u] & TEXTURE_2D_BIT) + _mesa_set_enable(ctx, GL_TEXTURE_2D, GL_TRUE); + if (save->TexEnabled[u] & TEXTURE_3D_BIT) + _mesa_set_enable(ctx, GL_TEXTURE_3D, GL_TRUE); + if (save->TexEnabled[u] & TEXTURE_CUBE_BIT) + _mesa_set_enable(ctx, GL_TEXTURE_CUBE_MAP, GL_TRUE); + if (save->TexEnabled[u] & TEXTURE_RECT_BIT) + _mesa_set_enable(ctx, GL_TEXTURE_RECTANGLE, GL_TRUE); + } + + if (save->TexGenEnabled[u]) { + _mesa_ActiveTextureARB(GL_TEXTURE0 + u); + + if (save->TexGenEnabled[u] & S_BIT) + _mesa_set_enable(ctx, GL_TEXTURE_GEN_S, GL_TRUE); + if (save->TexGenEnabled[u] & T_BIT) + _mesa_set_enable(ctx, GL_TEXTURE_GEN_T, GL_TRUE); + if (save->TexGenEnabled[u] & R_BIT) + _mesa_set_enable(ctx, GL_TEXTURE_GEN_R, GL_TRUE); + if (save->TexGenEnabled[u] & Q_BIT) + _mesa_set_enable(ctx, GL_TEXTURE_GEN_Q, GL_TRUE); + } + } + + /* restore current unit state */ + _mesa_ActiveTextureARB(GL_TEXTURE0 + save->ActiveUnit); + _mesa_ClientActiveTextureARB(GL_TEXTURE0 + save->ClientActiveUnit); + } + + if (state & META_TRANSFORM) { + GLuint activeTexture = ctx->Texture.CurrentUnit; + _mesa_ActiveTextureARB(GL_TEXTURE0); + _mesa_MatrixMode(GL_TEXTURE); + _mesa_LoadMatrixf(save->TextureMatrix); + _mesa_ActiveTextureARB(GL_TEXTURE0 + activeTexture); + + _mesa_MatrixMode(GL_MODELVIEW); + _mesa_LoadMatrixf(save->ModelviewMatrix); + + _mesa_MatrixMode(GL_PROJECTION); + _mesa_LoadMatrixf(save->ProjectionMatrix); + + _mesa_MatrixMode(save->MatrixMode); + + save->ClipPlanesEnabled = ctx->Transform.ClipPlanesEnabled; + if (save->ClipPlanesEnabled) { + GLuint i; + for (i = 0; i < ctx->Const.MaxClipPlanes; i++) { + if (save->ClipPlanesEnabled & (1 << i)) { + _mesa_set_enable(ctx, GL_CLIP_PLANE0 + i, GL_TRUE); + } + } + } + } + + if (state & META_VERTEX) { + /* restore vertex buffer object */ + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, save->ArrayBufferObj->Name); + _mesa_reference_buffer_object(ctx, &save->ArrayBufferObj, NULL); + + /* restore vertex array object */ + _mesa_BindVertexArray(save->ArrayObj->Name); + _mesa_reference_array_object(ctx, &save->ArrayObj, NULL); + } + + if (state & META_VIEWPORT) { + _mesa_Viewport(save->ViewportX, save->ViewportY, + save->ViewportW, save->ViewportH); + _mesa_DepthRange(save->DepthNear, save->DepthFar); + } + + /* misc */ + if (save->Lighting) { + _mesa_set_enable(ctx, GL_LIGHTING, GL_TRUE); + } + if (save->Fog) { + _mesa_set_enable(ctx, GL_FOG, GL_TRUE); + } +} + + +/** + * Meta implementation of ctx->Driver.BlitFramebuffer() in terms + * of texture mapping and polygon rendering. + * Note: this function requires GL_ARB_texture_rectangle support. + */ +void +_mesa_meta_blit_framebuffer(GLcontext *ctx, + GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, + GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, + GLbitfield mask, GLenum filter) +{ + struct blit_state *blit = &ctx->Meta->Blit; + const GLint srcX = MIN2(srcX0, srcX1); + const GLint srcY = MIN2(srcY0, srcY1); + const GLint srcW = abs(srcX1 - srcX0); + const GLint srcH = abs(srcY1 - srcY0); + GLboolean srcFlipX = srcX1 < srcX0; + GLboolean srcFlipY = srcY1 < srcY0; + + ASSERT(ctx->Extensions.NV_texture_rectangle); + + if (srcW > ctx->Const.MaxTextureRectSize || + srcH > ctx->Const.MaxTextureRectSize) { + /* XXX avoid this fallback */ + _swrast_BlitFramebuffer(ctx, srcX0, srcY0, srcX1, srcY1, + dstX0, dstY0, dstX1, dstY1, mask, filter); + return; + } + + + if (srcFlipX) { + GLint tmp = dstX0; + dstX0 = dstX1; + dstX1 = tmp; + } + + if (srcFlipY) { + GLint tmp = dstY0; + dstY0 = dstY1; + dstY1 = tmp; + } + + /* only scissor effects blit so save/clear all other relevant state */ + _mesa_meta_begin(ctx, ~META_SCISSOR); + + if (blit->TexObj == 0) { + /* one-time setup */ + + /* create texture object */ + _mesa_GenTextures(1, &blit->TexObj); + _mesa_BindTexture(GL_TEXTURE_RECTANGLE, blit->TexObj); + _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); + } + else { + _mesa_BindTexture(GL_TEXTURE_RECTANGLE, blit->TexObj); + } + + _mesa_TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, filter); + _mesa_TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, filter); + + if (blit->ArrayObj == 0) { + /* one-time setup */ + + /* create vertex array object */ + _mesa_GenVertexArrays(1, &blit->ArrayObj); + _mesa_BindVertexArray(blit->ArrayObj); + + /* create vertex array buffer */ + _mesa_GenBuffersARB(1, &blit->VBO); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, blit->VBO); + _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(blit->verts), + blit->verts, GL_STREAM_DRAW_ARB); + + /* setup vertex arrays */ + _mesa_VertexPointer(2, GL_FLOAT, 4 * sizeof(GLfloat), + (void*) (0 * sizeof(GLfloat))); + _mesa_TexCoordPointer(2, GL_FLOAT, 4 * sizeof(GLfloat), + (void *) (2 * sizeof(GLfloat))); + _mesa_EnableClientState(GL_VERTEX_ARRAY); + _mesa_EnableClientState(GL_TEXTURE_COORD_ARRAY); + } + else { + _mesa_BindVertexArray(blit->ArrayObj); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, blit->VBO); + } + + /* vertex positions */ + blit->verts[0][0] = (GLfloat) dstX0; + blit->verts[0][1] = (GLfloat) dstY0; + blit->verts[1][0] = (GLfloat) dstX1; + blit->verts[1][1] = (GLfloat) dstY0; + blit->verts[2][0] = (GLfloat) dstX1; + blit->verts[2][1] = (GLfloat) dstY1; + blit->verts[3][0] = (GLfloat) dstX0; + blit->verts[3][1] = (GLfloat) dstY1; + + /* texcoords */ + blit->verts[0][2] = 0.0F; + blit->verts[0][3] = 0.0F; + blit->verts[1][2] = (GLfloat) srcW; + blit->verts[1][3] = 0.0F; + blit->verts[2][2] = (GLfloat) srcW; + blit->verts[2][3] = (GLfloat) srcH; + blit->verts[3][2] = 0.0F; + blit->verts[3][3] = (GLfloat) srcH; + + /* upload new vertex data */ + _mesa_BufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, + sizeof(blit->verts), blit->verts); + + /* copy framebuffer image to texture */ + if (mask & GL_COLOR_BUFFER_BIT) { + if (blit->TexWidth == srcW && + blit->TexHeight == srcH && + blit->TexType == GL_RGBA) { + /* replace existing tex image */ + _mesa_CopyTexSubImage2D(GL_TEXTURE_RECTANGLE, 0, + 0, 0, srcX, srcY, srcW, srcH); + } + else { + /* create new tex image */ + _mesa_CopyTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_RGBA, + srcX, srcY, srcW, srcH, 0); + blit->TexWidth = srcW; + blit->TexHeight = srcH; + blit->TexType = GL_RGBA; + } + + mask &= ~GL_COLOR_BUFFER_BIT; + } + + _mesa_Enable(GL_TEXTURE_RECTANGLE); + + /* draw textured quad */ + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); + + _mesa_Disable(GL_TEXTURE_RECTANGLE); + + _mesa_meta_end(ctx); + + /* XXX, TO-DO: try to handle these cases above! */ + if (mask & (GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT)) { + _swrast_BlitFramebuffer(ctx, srcX0, srcY0, srcX1, srcY1, + dstX0, dstY0, dstX1, dstY1, mask, filter); + } +} + + +/** + * Meta implementation of ctx->Driver.Clear() in terms of polygon rendering. + */ +void +_mesa_meta_clear(GLcontext *ctx, GLbitfield buffers) +{ + struct clear_state *clear = &ctx->Meta->Clear; + GLfloat z = 1.0 - 2.0 * ctx->Depth.Clear; + GLuint i; + + /* only scissor and color mask effects clearing */ + _mesa_meta_begin(ctx, ~(META_SCISSOR | META_COLOR_MASK)); + + if (clear->ArrayObj == 0) { + /* one-time setup */ + + /* create vertex array object */ + _mesa_GenVertexArrays(1, &clear->ArrayObj); + _mesa_BindVertexArray(clear->ArrayObj); + + /* create vertex array buffer */ + _mesa_GenBuffersARB(1, &clear->VBO); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, clear->VBO); + _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(clear->verts), + clear->verts, GL_STREAM_DRAW_ARB); + + /* setup vertex arrays */ + _mesa_VertexPointer(3, GL_FLOAT, 7 * sizeof(GLfloat), (void *) 0); + _mesa_ColorPointer(4, GL_FLOAT, 7 * sizeof(GLfloat), + (void *) (3 * sizeof(GLfloat))); + _mesa_EnableClientState(GL_VERTEX_ARRAY); + _mesa_EnableClientState(GL_COLOR_ARRAY); + } + else { + _mesa_BindVertexArray(clear->ArrayObj); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, clear->VBO); + } + + /* GL_COLOR_BUFFER_BIT */ + if (buffers & BUFFER_BITS_COLOR) { + /* leave colormask, glDrawBuffer state as-is */ + } + else { + _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + } + + /* GL_DEPTH_BUFFER_BIT */ + if (buffers & BUFFER_BIT_DEPTH) { + _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_TRUE); + _mesa_DepthFunc(GL_ALWAYS); + _mesa_DepthMask(GL_TRUE); + } + else { + assert(!ctx->Depth.Test); + } + + /* GL_STENCIL_BUFFER_BIT */ + if (buffers & BUFFER_BIT_STENCIL) { + _mesa_set_enable(ctx, GL_STENCIL_TEST, GL_TRUE); + _mesa_StencilOpSeparate(GL_FRONT_AND_BACK, + GL_REPLACE, GL_REPLACE, GL_REPLACE); + _mesa_StencilFuncSeparate(GL_FRONT_AND_BACK, GL_ALWAYS, + ctx->Stencil.Clear & 0x7fffffff, + ctx->Stencil.WriteMask[0]); + } + else { + assert(!ctx->Stencil.Enabled); + } + + /* vertex positions */ + clear->verts[0][0] = (GLfloat) ctx->DrawBuffer->_Xmin; + clear->verts[0][1] = (GLfloat) ctx->DrawBuffer->_Ymin; + clear->verts[0][2] = z; + clear->verts[1][0] = (GLfloat) ctx->DrawBuffer->_Xmax; + clear->verts[1][1] = (GLfloat) ctx->DrawBuffer->_Ymin; + clear->verts[1][2] = z; + clear->verts[2][0] = (GLfloat) ctx->DrawBuffer->_Xmax; + clear->verts[2][1] = (GLfloat) ctx->DrawBuffer->_Ymax; + clear->verts[2][2] = z; + clear->verts[3][0] = (GLfloat) ctx->DrawBuffer->_Xmin; + clear->verts[3][1] = (GLfloat) ctx->DrawBuffer->_Ymax; + clear->verts[3][2] = z; + + /* vertex colors */ + for (i = 0; i < 4; i++) { + COPY_4FV(&clear->verts[i][3], ctx->Color.ClearColor); + } + + /* upload new vertex data */ + _mesa_BufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, + sizeof(clear->verts), clear->verts); + + /* draw quad */ + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); + + _mesa_meta_end(ctx); +} + + +/** + * Meta implementation of ctx->Driver.CopyPixels() in terms + * of texture mapping and polygon rendering. + * Note: this function requires GL_ARB_texture_rectangle support. + */ +void +_mesa_meta_copy_pixels(GLcontext *ctx, GLint srcX, GLint srcY, + GLsizei width, GLsizei height, + GLint dstX, GLint dstY, GLenum type) +{ + const GLenum filter = GL_NEAREST; + struct copypix_state *copypix = &ctx->Meta->CopyPix; + const GLfloat z = ctx->Current.RasterPos[2]; + const GLfloat dstX1 = dstX + width * ctx->Pixel.ZoomX; + const GLfloat dstY1 = dstY + height * ctx->Pixel.ZoomY; + + ASSERT(ctx->Extensions.NV_texture_rectangle); + + if (type != GL_COLOR || + ctx->_ImageTransferState || + ctx->Fog.Enabled || + width > ctx->Const.MaxTextureRectSize || + height > ctx->Const.MaxTextureRectSize) { + /* XXX avoid this fallback */ + _swrast_CopyPixels(ctx, srcX, srcY, width, height, dstX, dstY, type); + return; + } + + /* Most GL state applies to glCopyPixels, but a there's a few things + * we need to override: + */ + _mesa_meta_begin(ctx, (META_RASTERIZATION | + META_SHADER | + META_TEXTURE | + META_TRANSFORM | + META_VERTEX | + META_VIEWPORT)); + + if (copypix->TexObj == 0) { + /* one-time setup */ + + /* create texture object */ + _mesa_GenTextures(1, ©pix->TexObj); + _mesa_BindTexture(GL_TEXTURE_RECTANGLE, copypix->TexObj); + _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); + _mesa_TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, filter); + _mesa_TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, filter); + } + else { + _mesa_BindTexture(GL_TEXTURE_RECTANGLE, copypix->TexObj); + } + + if (copypix->ArrayObj == 0) { + /* one-time setup */ + + /* create vertex array object */ + _mesa_GenVertexArrays(1, ©pix->ArrayObj); + _mesa_BindVertexArray(copypix->ArrayObj); + + /* create vertex array buffer */ + _mesa_GenBuffersARB(1, ©pix->VBO); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, copypix->VBO); + _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(copypix->verts), + copypix->verts, GL_STREAM_DRAW_ARB); + + /* setup vertex arrays */ + _mesa_VertexPointer(3, GL_FLOAT, sizeof(copypix->verts[0]), + (void*) (0 * sizeof(GLfloat))); + _mesa_TexCoordPointer(2, GL_FLOAT, sizeof(copypix->verts[0]), + (void *) (3 * sizeof(GLfloat))); + _mesa_EnableClientState(GL_VERTEX_ARRAY); + _mesa_EnableClientState(GL_TEXTURE_COORD_ARRAY); + } + else { + _mesa_BindVertexArray(copypix->ArrayObj); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, copypix->VBO); + } + + /* vertex positions, texcoords */ + copypix->verts[0][0] = (GLfloat) dstX; + copypix->verts[0][1] = (GLfloat) dstY; + copypix->verts[0][2] = z; + copypix->verts[0][3] = 0.0F; + copypix->verts[0][4] = 0.0F; + copypix->verts[1][0] = (GLfloat) dstX1; + copypix->verts[1][1] = (GLfloat) dstY; + copypix->verts[1][2] = z; + copypix->verts[1][3] = (GLfloat) width; + copypix->verts[1][4] = 0.0F; + copypix->verts[2][0] = (GLfloat) dstX1; + copypix->verts[2][1] = (GLfloat) dstY1; + copypix->verts[2][2] = z; + copypix->verts[2][3] = (GLfloat) width; + copypix->verts[2][4] = (GLfloat) height; + copypix->verts[3][0] = (GLfloat) dstX; + copypix->verts[3][1] = (GLfloat) dstY1; + copypix->verts[3][2] = z; + copypix->verts[3][3] = 0.0F; + copypix->verts[3][4] = (GLfloat) height; + + /* upload new vertex data */ + _mesa_BufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, + sizeof(copypix->verts), copypix->verts); + + /* copy framebuffer image to texture */ + if (type == GL_COLOR) { + if (copypix->TexWidth == width && + copypix->TexHeight == height && + copypix->TexType == type) { + /* replace existing tex image */ + _mesa_CopyTexSubImage2D(GL_TEXTURE_RECTANGLE, 0, + 0, 0, srcX, srcY, width, height); + } + else { + /* create new tex image */ + _mesa_CopyTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_RGBA, + srcX, srcY, width, height, 0); + copypix->TexWidth = width; + copypix->TexHeight = height; + copypix->TexType = type; + } + } + else if (type == GL_DEPTH) { + /* TO-DO: Use a GL_DEPTH_COMPONENT texture and a fragment program/shader + * that replaces the fragment.z value. + */ + } + else { + ASSERT(type == GL_STENCIL); + /* have to use sw fallback */ + } + + _mesa_Enable(GL_TEXTURE_RECTANGLE); + + /* draw textured quad */ + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); + + _mesa_Disable(GL_TEXTURE_RECTANGLE); + + _mesa_meta_end(ctx); +} + + + +/** + * When the glDrawPixels() image size is greater than the max rectangle + * texture size we use this function to break the glDrawPixels() image + * into tiles which fit into the max texture size. + */ +static void +tiled_draw_pixels(GLcontext *ctx, + GLint x, GLint y, GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *unpack, + const GLvoid *pixels) +{ + const GLint maxSize = ctx->Const.MaxTextureRectSize; + struct gl_pixelstore_attrib tileUnpack = *unpack; + GLint i, j; + + for (i = 0; i < width; i += maxSize) { + const GLint tileWidth = MIN2(maxSize, width - i); + const GLint tileX = (GLint) (x + i * ctx->Pixel.ZoomX); + + tileUnpack.SkipPixels = unpack->SkipPixels + i; + + for (j = 0; j < height; j += maxSize) { + const GLint tileHeight = MIN2(maxSize, height - j); + const GLint tileY = (GLint) (y + j * ctx->Pixel.ZoomY); + + tileUnpack.SkipRows = unpack->SkipRows + j; + + _mesa_meta_draw_pixels(ctx, tileX, tileY, + tileWidth, tileHeight, + format, type, &tileUnpack, pixels); + } + } +} + + +/** + * Meta implementation of ctx->Driver.DrawPixels() in terms + * of texture mapping and polygon rendering. + * Note: this function requires GL_ARB_texture_rectangle support. + */ +void +_mesa_meta_draw_pixels(GLcontext *ctx, + GLint x, GLint y, GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *unpack, + const GLvoid *pixels) +{ + const GLenum filter = GL_NEAREST; + struct drawpix_state *drawpix = &ctx->Meta->DrawPix; + const GLfloat z = ctx->Current.RasterPos[2]; + const GLfloat x1 = x + width * ctx->Pixel.ZoomX; + const GLfloat y1 = y + height * ctx->Pixel.ZoomY; + const struct gl_pixelstore_attrib unpackSave = ctx->Unpack; + GLenum texIntFormat; + GLboolean fallback; + + ASSERT(ctx->Extensions.NV_texture_rectangle); + + /* + * Determine if we can do the glDrawPixels with texture mapping. + */ + fallback = GL_FALSE; + if (ctx->_ImageTransferState || + ctx->Fog.Enabled) { + fallback = GL_TRUE; + } + + if (_mesa_is_color_format(format)) { + texIntFormat = GL_RGBA; + } + else { + fallback = GL_TRUE; + } + + if (fallback) { + _swrast_DrawPixels(ctx, x, y, width, height, + format, type, unpack, pixels); + return; + } + + /* + * Check image size against max texture size, draw as tiles if needed. + */ + if (width > ctx->Const.MaxTextureRectSize || + height > ctx->Const.MaxTextureRectSize) { + tiled_draw_pixels(ctx, x, y, width, height, + format, type, unpack, pixels); + return; + } + + /* Most GL state applies to glDrawPixels, but a there's a few things + * we need to override: + */ + _mesa_meta_begin(ctx, (META_RASTERIZATION | + META_SHADER | + META_TEXTURE | + META_TRANSFORM | + META_VERTEX | + META_VIEWPORT)); + + if (drawpix->TexObj == 0) { + /* one-time setup */ + + /* create texture object */ + _mesa_GenTextures(1, &drawpix->TexObj); + _mesa_BindTexture(GL_TEXTURE_RECTANGLE, drawpix->TexObj); + _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); + _mesa_TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, filter); + _mesa_TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, filter); + } + else { + _mesa_BindTexture(GL_TEXTURE_RECTANGLE, drawpix->TexObj); + } + + if (drawpix->ArrayObj == 0) { + /* one-time setup */ + + /* create vertex array object */ + _mesa_GenVertexArrays(1, &drawpix->ArrayObj); + _mesa_BindVertexArray(drawpix->ArrayObj); + + /* create vertex array buffer */ + _mesa_GenBuffersARB(1, &drawpix->VBO); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, drawpix->VBO); + _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(drawpix->verts), + drawpix->verts, GL_STREAM_DRAW_ARB); + + /* setup vertex arrays */ + _mesa_VertexPointer(3, GL_FLOAT, sizeof(drawpix->verts[0]), + (void*) (0 * sizeof(GLfloat))); + _mesa_TexCoordPointer(2, GL_FLOAT, sizeof(drawpix->verts[0]), + (void *) (3 * sizeof(GLfloat))); + _mesa_EnableClientState(GL_VERTEX_ARRAY); + _mesa_EnableClientState(GL_TEXTURE_COORD_ARRAY); + } + else { + _mesa_BindVertexArray(drawpix->ArrayObj); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, drawpix->VBO); + } + + /* vertex positions, texcoords */ + drawpix->verts[0][0] = (GLfloat) x; + drawpix->verts[0][1] = (GLfloat) y; + drawpix->verts[0][2] = z; + drawpix->verts[0][3] = 0.0F; + drawpix->verts[0][4] = 0.0F; + drawpix->verts[1][0] = (GLfloat) x1; + drawpix->verts[1][1] = (GLfloat) y; + drawpix->verts[1][2] = z; + drawpix->verts[1][3] = (GLfloat) width; + drawpix->verts[1][4] = 0.0F; + drawpix->verts[2][0] = (GLfloat) x1; + drawpix->verts[2][1] = (GLfloat) y1; + drawpix->verts[2][2] = z; + drawpix->verts[2][3] = (GLfloat) width; + drawpix->verts[2][4] = (GLfloat) height; + drawpix->verts[3][0] = (GLfloat) x; + drawpix->verts[3][1] = (GLfloat) y1; + drawpix->verts[3][2] = z; + drawpix->verts[3][3] = 0.0F; + drawpix->verts[3][4] = (GLfloat) height; + + /* upload new vertex data */ + _mesa_BufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, + sizeof(drawpix->verts), drawpix->verts); + + /* set given unpack params */ + ctx->Unpack = *unpack; /* XXX bufobj */ + + /* copy pixel data to texture */ + if (drawpix->TexWidth == width && + drawpix->TexHeight == height && + drawpix->TexIntFormat == texIntFormat) { + /* replace existing tex image */ + _mesa_TexSubImage2D(GL_TEXTURE_RECTANGLE, 0, + 0, 0, width, height, format, type, pixels); + } + else { + /* create new tex image */ + _mesa_TexImage2D(GL_TEXTURE_RECTANGLE, 0, texIntFormat, + width, height, 0, format, type, pixels); + drawpix->TexWidth = width; + drawpix->TexHeight = height; + drawpix->TexIntFormat = texIntFormat; + } + + /* restore unpack params */ + ctx->Unpack = unpackSave; /* XXX bufobj */ + + _mesa_Enable(GL_TEXTURE_RECTANGLE); + + /* draw textured quad */ + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); + + _mesa_Disable(GL_TEXTURE_RECTANGLE); + + _mesa_meta_end(ctx); +} diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h new file mode 100644 index 00000000000..a9c5f980432 --- /dev/null +++ b/src/mesa/drivers/common/meta.h @@ -0,0 +1,80 @@ +/* + * Mesa 3-D graphics library + * Version: 7.6 + * + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#ifndef META_H +#define META_H + + +/** + * Flags passed to _mesa_meta_begin(). + * XXX these flags may evolve... + */ +/*@{*/ +#define META_ALPHA_TEST 0x1 +#define META_BLEND 0x2 /**< includes logicop */ +#define META_COLOR_MASK 0x4 +#define META_DEPTH_TEST 0x8 +#define META_FOG 0x10 +#define META_RASTERIZATION 0x20 +#define META_SCISSOR 0x40 +#define META_SHADER 0x80 +#define META_STENCIL_TEST 0x100 +#define META_TRANSFORM 0x200 /**< modelview, projection */ +#define META_TEXTURE 0x400 +#define META_VERTEX 0x800 +#define META_VIEWPORT 0x1000 +#define META_ALL ~0x0 +/*@}*/ + + +extern void +_mesa_meta_init(GLcontext *ctx); + +extern void +_mesa_meta_free(GLcontext *ctx); + +extern void +_mesa_meta_blit_framebuffer(GLcontext *ctx, + GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, + GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, + GLbitfield mask, GLenum filter); + +extern void +_mesa_meta_clear(GLcontext *ctx, GLbitfield buffers); + +extern void +_mesa_meta_copy_pixels(GLcontext *ctx, GLint srcx, GLint srcy, + GLsizei width, GLsizei height, + GLint dstx, GLint dsty, GLenum type); + +extern void +_mesa_meta_draw_pixels(GLcontext *ctx, + GLint x, GLint y, GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *unpack, + const GLvoid *pixels); + + +#endif /* META_H */ diff --git a/src/mesa/drivers/dri/common/.gitignore b/src/mesa/drivers/dri/common/.gitignore new file mode 100644 index 00000000000..1edeb79fd12 --- /dev/null +++ b/src/mesa/drivers/dri/common/.gitignore @@ -0,0 +1 @@ +*.os diff --git a/src/mesa/drivers/dri/common/dri_metaops.c b/src/mesa/drivers/dri/common/dri_metaops.c index fe183c2e870..cdbea344951 100644 --- a/src/mesa/drivers/dri/common/dri_metaops.c +++ b/src/mesa/drivers/dri/common/dri_metaops.c @@ -357,6 +357,7 @@ meta_clear_tris(struct dri_metaops *meta, GLbitfield mask) GLuint saved_shader_program = 0; unsigned int saved_active_texture; struct gl_array_object *arraySave = NULL; + GLfloat saved_near, saved_far; if (!meta->clear.arrayObj) meta_init_clear(meta); @@ -370,8 +371,7 @@ meta_clear_tris(struct dri_metaops *meta, GLbitfield mask) GL_POLYGON_BIT | GL_STENCIL_BUFFER_BIT | GL_TRANSFORM_BIT | - GL_CURRENT_BIT | - GL_VIEWPORT_BIT); + GL_CURRENT_BIT); saved_active_texture = ctx->Texture.CurrentUnit; /* Disable existing GL state we don't want to apply to a clear. */ @@ -440,6 +440,8 @@ meta_clear_tris(struct dri_metaops *meta, GLbitfield mask) /* The ClearDepth value is unaffected by DepthRange, so do a default * mapping. */ + saved_near = ctx->Viewport.Near; + saved_far = ctx->Viewport.Far; _mesa_DepthRange(0.0, 1.0); /* Prepare the vertices, which are the same regardless of which buffer we're @@ -519,6 +521,7 @@ meta_clear_tris(struct dri_metaops *meta, GLbitfield mask) if (saved_shader_program) _mesa_UseProgramObjectARB(saved_shader_program); + _mesa_DepthRange(saved_near, saved_far); _mesa_PopAttrib(); /* restore current array object */ diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index 1d940603fa8..e48e10d7c06 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -778,7 +778,7 @@ dri2CreateNewScreen(int scrn, int fd, if (driDriverAPI.InitScreen2 == NULL) return NULL; - psp = _mesa_malloc(sizeof(*psp)); + psp = _mesa_calloc(sizeof(*psp)); if (!psp) return NULL; diff --git a/src/mesa/drivers/dri/common/extension_helper.h b/src/mesa/drivers/dri/common/extension_helper.h index 62801494ce3..08a97bb1110 100644 --- a/src/mesa/drivers/dri/common/extension_helper.h +++ b/src/mesa/drivers/dri/common/extension_helper.h @@ -752,6 +752,13 @@ static const char VertexAttrib4ubNV_names[] = ""; #endif +#if defined(need_GL_APPLE_texture_range) +static const char TextureRangeAPPLE_names[] = + "iip\0" /* Parameter signature */ + "glTextureRangeAPPLE\0" + ""; +#endif + #if defined(need_GL_SUN_vertex) static const char TexCoord2fColor4fNormal3fVertex3fSUN_names[] = "ffffffffffff\0" /* Parameter signature */ @@ -1567,6 +1574,13 @@ static const char FragmentLightivSGIX_names[] = ""; #endif +#if defined(need_GL_APPLE_texture_range) +static const char GetTexParameterPointervAPPLE_names[] = + "iip\0" /* Parameter signature */ + "glGetTexParameterPointervAPPLE\0" + ""; +#endif + #if defined(need_GL_EXT_pixel_transform) static const char PixelTransformParameterfvEXT_names[] = "iip\0" /* Parameter signature */ @@ -1771,6 +1785,13 @@ static const char DeleteFencesNV_names[] = ""; #endif +#if defined(need_GL_SGIX_polynomial_ffd) +static const char DeformationMap3dSGIX_names[] = + "iddiiddiiddiip\0" /* Parameter signature */ + "glDeformationMap3dSGIX\0" + ""; +#endif + #if defined(need_GL_VERSION_2_0) static const char IsShader_names[] = "i\0" /* Parameter signature */ @@ -1997,13 +2018,6 @@ static const char GetCombinerOutputParameterivNV_names[] = ""; #endif -#if defined(need_GL_IBM_multimode_draw_arrays) -static const char MultiModeDrawArraysIBM_names[] = - "pppii\0" /* Parameter signature */ - "glMultiModeDrawArraysIBM\0" - ""; -#endif - #if defined(need_GL_SGIS_pixel_texture) static const char PixelTexGenParameterivSGIS_names[] = "ip\0" /* Parameter signature */ @@ -2079,6 +2093,13 @@ static const char Uniform2fvARB_names[] = ""; #endif +#if defined(need_GL_APPLE_flush_buffer_range) +static const char BufferParameteriAPPLE_names[] = + "iii\0" /* Parameter signature */ + "glBufferParameteriAPPLE\0" + ""; +#endif + #if defined(need_GL_VERSION_1_3) static const char MultiTexCoord3dvARB_names[] = "ip\0" /* Parameter signature */ @@ -2803,6 +2824,13 @@ static const char IsProgramNV_names[] = ""; #endif +#if defined(need_GL_APPLE_flush_buffer_range) +static const char FlushMappedBufferRangeAPPLE_names[] = + "iii\0" /* Parameter signature */ + "glFlushMappedBufferRangeAPPLE\0" + ""; +#endif + #if defined(need_GL_SUN_triangle_list) static const char ReplacementCodePointerSUN_names[] = "iip\0" /* Parameter signature */ @@ -3920,6 +3948,13 @@ static const char VertexAttribs4dvNV_names[] = ""; #endif +#if defined(need_GL_IBM_multimode_draw_arrays) +static const char MultiModeDrawArraysIBM_names[] = + "pppii\0" /* Parameter signature */ + "glMultiModeDrawArraysIBM\0" + ""; +#endif + #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) static const char VertexAttrib4dARB_names[] = "idddd\0" /* Parameter signature */ @@ -4604,13 +4639,6 @@ static const char Minmax_names[] = ""; #endif -#if defined(need_GL_SGIX_polynomial_ffd) -static const char DeformationMap3dSGIX_names[] = - "iddiiddiiddiip\0" /* Parameter signature */ - "glDeformationMap3dSGIX\0" - ""; -#endif - #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_fog_coord) static const char FogCoorddvEXT_names[] = "p\0" /* Parameter signature */ @@ -4973,6 +5001,22 @@ static const struct dri_extension_function GL_3DFX_tbuffer_functions[] = { }; #endif +#if defined(need_GL_APPLE_flush_buffer_range) +static const struct dri_extension_function GL_APPLE_flush_buffer_range_functions[] = { + { BufferParameteriAPPLE_names, BufferParameteriAPPLE_remap_index, -1 }, + { FlushMappedBufferRangeAPPLE_names, FlushMappedBufferRangeAPPLE_remap_index, -1 }, + { NULL, 0, 0 } +}; +#endif + +#if defined(need_GL_APPLE_texture_range) +static const struct dri_extension_function GL_APPLE_texture_range_functions[] = { + { TextureRangeAPPLE_names, TextureRangeAPPLE_remap_index, -1 }, + { GetTexParameterPointervAPPLE_names, GetTexParameterPointervAPPLE_remap_index, -1 }, + { NULL, 0, 0 } +}; +#endif + #if defined(need_GL_APPLE_vertex_array_object) static const struct dri_extension_function GL_APPLE_vertex_array_object_functions[] = { { DeleteVertexArraysAPPLE_names, DeleteVertexArraysAPPLE_remap_index, -1 }, @@ -6131,9 +6175,9 @@ static const struct dri_extension_function GL_SGIX_pixel_texture_functions[] = { #if defined(need_GL_SGIX_polynomial_ffd) static const struct dri_extension_function GL_SGIX_polynomial_ffd_functions[] = { { LoadIdentityDeformationMapSGIX_names, LoadIdentityDeformationMapSGIX_remap_index, -1 }, + { DeformationMap3dSGIX_names, DeformationMap3dSGIX_remap_index, -1 }, { DeformSGIX_names, DeformSGIX_remap_index, -1 }, { DeformationMap3fSGIX_names, DeformationMap3fSGIX_remap_index, -1 }, - { DeformationMap3dSGIX_names, DeformationMap3dSGIX_remap_index, -1 }, { NULL, 0, 0 } }; #endif diff --git a/src/mesa/drivers/dri/fb/fb_egl.c b/src/mesa/drivers/dri/fb/fb_egl.c index dee67feb5ac..4e41860d8c8 100644 --- a/src/mesa/drivers/dri/fb/fb_egl.c +++ b/src/mesa/drivers/dri/fb/fb_egl.c @@ -605,7 +605,7 @@ fbDestroySurface(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surface) { fbSurface *fs = Lookup_fbSurface(surface); _eglUnlinkSurface(&fs->Base); - if (!fs->Base.IsBound) + if (!_eglIsSurfaceBound(&fs->Base)) free(fs); return EGL_TRUE; } @@ -616,7 +616,7 @@ fbDestroyContext(_EGLDriver *drv, EGLDisplay dpy, EGLContext context) { fbContext *fc = Lookup_fbContext(context); _eglUnlinkContext(&fc->Base); - if (!fc->Base.IsBound) + if (!_eglIsContextBound(&fc->Base)) free(fc); return EGL_TRUE; } diff --git a/src/mesa/drivers/dri/i915/i830_context.h b/src/mesa/drivers/dri/i915/i830_context.h index 1bdb32049d7..f73cbbf88bb 100644 --- a/src/mesa/drivers/dri/i915/i830_context.h +++ b/src/mesa/drivers/dri/i915/i830_context.h @@ -40,6 +40,7 @@ #define I830_UPLOAD_BUFFERS 0x2 #define I830_UPLOAD_STIPPLE 0x4 #define I830_UPLOAD_INVARIENT 0x8 +#define I830_UPLOAD_RASTER_RULES 0x10 #define I830_UPLOAD_TEX(i) (0x10<<(i)) #define I830_UPLOAD_TEXBLEND(i) (0x100<<(i)) #define I830_UPLOAD_TEX_ALL (0x0f0) @@ -99,6 +100,11 @@ #define I830_TEXBLEND_SIZE 12 /* (4 args + op) * 2 + COLOR_FACTOR */ +enum { + I830_RASTER_RULES, + I830_RASTER_RULES_SIZE +}; + struct i830_texture_object { struct intel_texture_object intel; @@ -112,6 +118,7 @@ struct i830_hw_state GLuint Ctx[I830_CTX_SETUP_SIZE]; GLuint Buffer[I830_DEST_SETUP_SIZE]; GLuint Stipple[I830_STP_SETUP_SIZE]; + GLuint RasterRules[I830_RASTER_RULES_SIZE]; GLuint Tex[I830_TEX_UNITS][I830_TEX_SETUP_SIZE]; GLuint TexBlend[I830_TEX_UNITS][I830_TEXBLEND_SIZE]; GLuint TexBlendWordsUsed[I830_TEX_UNITS]; @@ -197,6 +204,7 @@ extern void i830InitStateFuncs(struct dd_function_table *functions); extern void i830EmitState(struct i830_context *i830); extern void i830InitState(struct i830_context *i830); +extern void i830_update_provoking_vertex(GLcontext *ctx); /* i830_metaops.c */ diff --git a/src/mesa/drivers/dri/i915/i830_reg.h b/src/mesa/drivers/dri/i915/i830_reg.h index db16871001d..ae1317029a2 100644 --- a/src/mesa/drivers/dri/i915/i830_reg.h +++ b/src/mesa/drivers/dri/i915/i830_reg.h @@ -420,8 +420,11 @@ #define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8) #define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5) #define ENABLE_TRI_STRIP_PROVOKE_VRTX (1<<2) +#define LINE_STRIP_PROVOKE_VRTX_MASK (3<<6) #define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6) +#define TRI_FAN_PROVOKE_VRTX_MASK (3<<3) #define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3) +#define TRI_STRIP_PROVOKE_VRTX_MASK (3<<0) #define TRI_STRIP_PROVOKE_VRTX(x) (x) /* _3DSTATE_SCISSOR_ENABLE, p200 */ diff --git a/src/mesa/drivers/dri/i915/i830_state.c b/src/mesa/drivers/dri/i915/i830_state.c index 8ef6c9144f1..645ebe30577 100644 --- a/src/mesa/drivers/dri/i915/i830_state.c +++ b/src/mesa/drivers/dri/i915/i830_state.c @@ -1047,6 +1047,16 @@ i830_init_packets(struct i830_context *i830) TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) | TEXBIND_SET0(TEXCOORDSRC_VTXSET_0)); + i830->state.RasterRules[I830_RASTER_RULES] = (_3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + ENABLE_TRI_STRIP_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | + TRI_STRIP_PROVOKE_VRTX(2)); + i830->state.Stipple[I830_STPREG_ST0] = _3DSTATE_STIPPLE; @@ -1058,6 +1068,27 @@ i830_init_packets(struct i830_context *i830) i830->state.Buffer[I830_DESTREG_SR2] = 0; } +void +i830_update_provoking_vertex(GLcontext * ctx) +{ + struct i830_context *i830 = i830_context(ctx); + + I830_STATECHANGE(i830, I830_UPLOAD_RASTER_RULES); + i830->state.RasterRules[I830_RASTER_RULES] &= ~(LINE_STRIP_PROVOKE_VRTX_MASK | + TRI_FAN_PROVOKE_VRTX_MASK | + TRI_STRIP_PROVOKE_VRTX_MASK); + + /* _NEW_LIGHT */ + if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION) { + i830->state.RasterRules[I830_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | + TRI_STRIP_PROVOKE_VRTX(2)); + } else { + i830->state.RasterRules[I830_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(0) | + TRI_FAN_PROVOKE_VRTX(1) | + TRI_STRIP_PROVOKE_VRTX(0)); + } +} void i830InitStateFuncs(struct dd_function_table *functions) @@ -1101,6 +1132,7 @@ i830InitState(struct i830_context *i830) i830->current = &i830->state; i830->state.emitted = 0; i830->state.active = (I830_UPLOAD_INVARIENT | + I830_UPLOAD_RASTER_RULES | I830_UPLOAD_TEXBLEND(0) | I830_UPLOAD_STIPPLE | I830_UPLOAD_CTX | I830_UPLOAD_BUFFERS); diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index 9c6f891dd32..983f6724c98 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -299,7 +299,7 @@ i830_emit_invarient_state(struct intel_context *intel) { BATCH_LOCALS; - BEGIN_BATCH(30, IGNORE_CLIPRECTS); + BEGIN_BATCH(29, IGNORE_CLIPRECTS); OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); OUT_BATCH(0); @@ -351,15 +351,6 @@ i830_emit_invarient_state(struct intel_context *intel) OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM); OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(3)); - OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | - ENABLE_POINT_RASTER_RULE | - OGL_POINT_RASTER_RULE | - ENABLE_LINE_STRIP_PROVOKE_VRTX | - ENABLE_TRI_FAN_PROVOKE_VRTX | - ENABLE_TRI_STRIP_PROVOKE_VRTX | - LINE_STRIP_PROVOKE_VRTX(1) | - TRI_FAN_PROVOKE_VRTX(2) | TRI_STRIP_PROVOKE_VRTX(2)); - OUT_BATCH(_3DSTATE_VERTEX_TRANSFORM); OUT_BATCH(DISABLE_VIEWPORT_TRANSFORM | DISABLE_PERSPECTIVE_DIVIDE); @@ -394,6 +385,9 @@ get_state_size(struct i830_hw_state *state) if (dirty & I830_UPLOAD_INVARIENT) sz += 40 * sizeof(int); + if (dirty & I830_UPLOAD_RASTER_RULES) + sz += sizeof(state->RasterRules); + if (dirty & I830_UPLOAD_CTX) sz += sizeof(state->Ctx); @@ -486,6 +480,11 @@ i830_emit_state(struct intel_context *intel) i830_emit_invarient_state(intel); } + if (dirty & I830_UPLOAD_RASTER_RULES) { + DBG("I830_UPLOAD_RASTER_RULES:\n"); + emit(intel, state->RasterRules, sizeof(state->RasterRules)); + } + if (dirty & I830_UPLOAD_CTX) { DBG("I830_UPLOAD_CTX:\n"); emit(intel, state->Ctx, sizeof(state->Ctx)); @@ -737,6 +736,13 @@ i830_assert_not_dirty( struct intel_context *intel ) assert(!get_dirty(state)); } +static void +i830_invalidate_state(struct intel_context *intel, GLuint new_state) +{ + if (new_state & _NEW_LIGHT) + i830_update_provoking_vertex(&intel->ctx); +} + void i830InitVtbl(struct i830_context *i830) { @@ -752,4 +758,5 @@ i830InitVtbl(struct i830_context *i830) i830->intel.vtbl.render_prevalidate = i830_render_prevalidate; i830->intel.vtbl.assert_not_dirty = i830_assert_not_dirty; i830->intel.vtbl.finish_batch = intel_finish_vb; + i830->intel.vtbl.invalidate_state = i830_invalidate_state; } diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index 5aa41334b0b..bb08cf8d18f 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -77,6 +77,8 @@ i915InvalidateState(GLcontext * ctx, GLuint new_state) i915_update_fog(ctx); if (new_state & (_NEW_STENCIL | _NEW_BUFFERS | _NEW_POLYGON)) i915_update_stencil(ctx); + if (new_state & (_NEW_LIGHT)) + i915_update_provoking_vertex(ctx); } diff --git a/src/mesa/drivers/dri/i915/i915_context.h b/src/mesa/drivers/dri/i915/i915_context.h index c6b7377da80..8de4a9d0d36 100644 --- a/src/mesa/drivers/dri/i915/i915_context.h +++ b/src/mesa/drivers/dri/i915/i915_context.h @@ -48,6 +48,7 @@ #define I915_UPLOAD_FOG 0x20 #define I915_UPLOAD_INVARIENT 0x40 #define I915_UPLOAD_DEFAULTS 0x80 +#define I915_UPLOAD_RASTER_RULES 0x100 #define I915_UPLOAD_TEX(i) (0x00010000<<(i)) #define I915_UPLOAD_TEX_ALL (0x00ff0000) #define I915_UPLOAD_TEX_0_SHIFT 16 @@ -112,6 +113,10 @@ #define I915_DEFREG_Z1 5 #define I915_DEF_SETUP_SIZE 6 +enum { + I915_RASTER_RULES, + I915_RASTER_RULES_SETUP_SIZE, +}; #define I915_MAX_CONSTANT 32 #define I915_CONSTANT_SIZE (2+(4*I915_MAX_CONSTANT)) @@ -208,6 +213,7 @@ struct i915_hw_state GLuint Stipple[I915_STP_SETUP_SIZE]; GLuint Fog[I915_FOG_SETUP_SIZE]; GLuint Defaults[I915_DEF_SETUP_SIZE]; + GLuint RasterRules[I915_RASTER_RULES_SETUP_SIZE]; GLuint Tex[I915_TEX_UNITS][I915_TEX_SETUP_SIZE]; GLuint Constant[I915_CONSTANT_SIZE]; GLuint ConstantSize; @@ -324,6 +330,7 @@ extern void i915InitStateFunctions(struct dd_function_table *functions); extern void i915InitState(struct i915_context *i915); extern void i915_update_fog(GLcontext * ctx); extern void i915_update_stencil(GLcontext * ctx); +extern void i915_update_provoking_vertex(GLcontext *ctx); /*====================================================================== diff --git a/src/mesa/drivers/dri/i915/i915_reg.h b/src/mesa/drivers/dri/i915/i915_reg.h index 80ec46190d7..b5fa7fddb96 100644 --- a/src/mesa/drivers/dri/i915/i915_reg.h +++ b/src/mesa/drivers/dri/i915/i915_reg.h @@ -297,7 +297,9 @@ #define TEXKILL_4D (1<<9) #define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8) #define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5) +#define LINE_STRIP_PROVOKE_VRTX_MASK (3 << 6) #define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6) +#define TRI_FAN_PROVOKE_VRTX_MASK (3 << 3) #define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3) /* _3DSTATE_SCISSOR_ENABLE, p256 */ diff --git a/src/mesa/drivers/dri/i915/i915_state.c b/src/mesa/drivers/dri/i915/i915_state.c index 670451bc838..b60efea75bd 100644 --- a/src/mesa/drivers/dri/i915/i915_state.c +++ b/src/mesa/drivers/dri/i915/i915_state.c @@ -1033,6 +1033,13 @@ i915_init_packets(struct i915_context *i915) i915->state.Buffer[I915_DESTREG_SR2] = 0; } + i915->state.RasterRules[I915_RASTER_RULES] = _3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D; #if 0 { @@ -1053,7 +1060,33 @@ i915_init_packets(struct i915_context *i915) i915->state.active = (I915_UPLOAD_PROGRAM | I915_UPLOAD_STIPPLE | I915_UPLOAD_CTX | - I915_UPLOAD_BUFFERS | I915_UPLOAD_INVARIENT); + I915_UPLOAD_BUFFERS | + I915_UPLOAD_INVARIENT | + I915_UPLOAD_RASTER_RULES); +} + +void +i915_update_provoking_vertex(GLcontext * ctx) +{ + struct i915_context *i915 = I915_CONTEXT(ctx); + + I915_STATECHANGE(i915, I915_UPLOAD_CTX); + i915->state.Ctx[I915_CTXREG_LIS6] &= ~(S6_TRISTRIP_PV_MASK); + + I915_STATECHANGE(i915, I915_UPLOAD_RASTER_RULES); + i915->state.RasterRules[I915_RASTER_RULES] &= ~(LINE_STRIP_PROVOKE_VRTX_MASK | + TRI_FAN_PROVOKE_VRTX_MASK); + + /* _NEW_LIGHT */ + if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION) { + i915->state.RasterRules[I915_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2)); + i915->state.Ctx[I915_CTXREG_LIS6] |= (2 << S6_TRISTRIP_PV_SHIFT); + } else { + i915->state.RasterRules[I915_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(0) | + TRI_FAN_PROVOKE_VRTX(1)); + i915->state.Ctx[I915_CTXREG_LIS6] |= (0 << S6_TRISTRIP_PV_SHIFT); + } } void diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 707864ebfdb..9a723d3cd73 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -176,7 +176,7 @@ i915_emit_invarient_state(struct intel_context *intel) { BATCH_LOCALS; - BEGIN_BATCH(18, IGNORE_CLIPRECTS); + BEGIN_BATCH(17, IGNORE_CLIPRECTS); OUT_BATCH(_3DSTATE_AA_CMD | AA_LINE_ECAAR_WIDTH_ENABLE | @@ -200,14 +200,6 @@ i915_emit_invarient_state(struct intel_context *intel) CSB_TCB(3, 3) | CSB_TCB(4, 4) | CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7)); - OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | - ENABLE_POINT_RASTER_RULE | - OGL_POINT_RASTER_RULE | - ENABLE_LINE_STRIP_PROVOKE_VRTX | - ENABLE_TRI_FAN_PROVOKE_VRTX | - LINE_STRIP_PROVOKE_VRTX(1) | - TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D); - /* Need to initialize this to zero. */ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (0)); @@ -258,6 +250,9 @@ get_state_size(struct i915_hw_state *state) if (dirty & I915_UPLOAD_INVARIENT) sz += 30 * 4; + if (dirty & I915_UPLOAD_RASTER_RULES) + sz += sizeof(state->RasterRules); + if (dirty & I915_UPLOAD_CTX) sz += sizeof(state->Ctx); @@ -366,6 +361,12 @@ i915_emit_state(struct intel_context *intel) i915_emit_invarient_state(intel); } + if (dirty & I915_UPLOAD_RASTER_RULES) { + if (INTEL_DEBUG & DEBUG_STATE) + fprintf(stderr, "I915_UPLOAD_RASTER_RULES:\n"); + emit(intel, state->RasterRules, sizeof(state->RasterRules)); + } + if (dirty & I915_UPLOAD_CTX) { if (INTEL_DEBUG & DEBUG_STATE) fprintf(stderr, "I915_UPLOAD_CTX:\n"); diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 00a42111da0..128afb56866 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -43,6 +43,7 @@ DRIVER_SOURCES = \ brw_clip_util.c \ brw_context.c \ brw_curbe.c \ + brw_disasm.c \ brw_draw.c \ brw_draw_upload.c \ brw_eu.c \ diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 57ddf75413a..847c44ed83a 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -143,6 +143,7 @@ struct brw_context; #define BRW_NEW_DEPTH_BUFFER 0x20000 #define BRW_NEW_NR_WM_SURFACES 0x40000 #define BRW_NEW_NR_VS_SURFACES 0x80000 +#define BRW_NEW_INDEX_BUFFER 0x100000 struct brw_state_flags { /** State update flags signalled by mesa internals */ @@ -438,9 +439,13 @@ struct brw_query_object { unsigned int count; }; + +/** + * brw_context is derived from intel_context. + */ struct brw_context { - struct intel_context intel; + struct intel_context intel; /**< base class, must be first field */ GLuint primitive; GLboolean emit_state_always; @@ -475,6 +480,9 @@ struct brw_context struct { struct brw_vertex_element inputs[VERT_ATTRIB_MAX]; + struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; + GLuint nr_enabled; + #define BRW_NR_UPLOAD_BUFS 17 #define BRW_UPLOAD_INIT_SIZE (128*1024) @@ -498,8 +506,15 @@ struct brw_context */ const struct _mesa_index_buffer *ib; + /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */ dri_bo *bo; unsigned int offset; + unsigned int size; + /* Offset to index buffer index to use in CMD_3D_PRIM so that we can + * avoid re-uploading the IB packet over and over if we're actually + * referencing the same index buffer. + */ + unsigned int start_vertex_offset; } ib; /* Active vertex program: @@ -706,6 +721,8 @@ void brw_upload_urb_fence(struct brw_context *brw); */ void brw_upload_cs_urb_state(struct brw_context *brw); +/* brw_disasm.c */ +int brw_disasm (FILE *file, struct brw_instruction *inst); /*====================================================================== * Inline conversion functions. These are better-typed than the diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index d166250b4fe..78d457ad2be 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -471,8 +471,9 @@ #define BRW_CONDITIONAL_GE 4 #define BRW_CONDITIONAL_L 5 #define BRW_CONDITIONAL_LE 6 -#define BRW_CONDITIONAL_C 7 +#define BRW_CONDITIONAL_R 7 #define BRW_CONDITIONAL_O 8 +#define BRW_CONDITIONAL_U 9 #define BRW_DEBUG_NONE 0 #define BRW_DEBUG_BREAKPOINT 1 @@ -512,6 +513,7 @@ #define BRW_OPCODE_RSL 11 #define BRW_OPCODE_ASR 12 #define BRW_OPCODE_CMP 16 +#define BRW_OPCODE_CMPN 17 #define BRW_OPCODE_JMPI 32 #define BRW_OPCODE_IF 34 #define BRW_OPCODE_IFF 35 diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c new file mode 100644 index 00000000000..9fef230507f --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -0,0 +1,903 @@ +/* + * Copyright © 2008 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include <unistd.h> +#include <stdarg.h> + +#include "main/mtypes.h" + +#include "brw_context.h" +#include "brw_defines.h" + +struct { + char *name; + int nsrc; + int ndst; +} opcode[128] = { + [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 }, + + [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 }, + + [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 }, + + [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 }, + [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 }, +}; + +char *conditional_modifier[16] = { + [BRW_CONDITIONAL_NONE] = "", + [BRW_CONDITIONAL_Z] = ".e", + [BRW_CONDITIONAL_NZ] = ".ne", + [BRW_CONDITIONAL_G] = ".g", + [BRW_CONDITIONAL_GE] = ".ge", + [BRW_CONDITIONAL_L] = ".l", + [BRW_CONDITIONAL_LE] = ".le", + [BRW_CONDITIONAL_R] = ".r", + [BRW_CONDITIONAL_O] = ".o", + [BRW_CONDITIONAL_U] = ".u", +}; + +char *negate[2] = { + [0] = "", + [1] = "-", +}; + +char *_abs[2] = { + [0] = "", + [1] = "(abs)", +}; + +char *vert_stride[16] = { + [0] = "0", + [1] = "1", + [2] = "2", + [3] = "4", + [4] = "8", + [5] = "16", + [6] = "32", + [15] = "VxH", +}; + +char *width[8] = { + [0] = "1", + [1] = "2", + [2] = "4", + [3] = "8", + [4] = "16", +}; + +char *horiz_stride[4] = { + [0] = "0", + [1] = "1", + [2] = "2", + [3] = "4" +}; + +char *chan_sel[4] = { + [0] = "x", + [1] = "y", + [2] = "z", + [3] = "w", +}; + +char *dest_condmod[16] = { +}; + +char *debug_ctrl[2] = { + [0] = "", + [1] = ".breakpoint" +}; + +char *saturate[2] = { + [0] = "", + [1] = ".sat" +}; + +char *exec_size[8] = { + [0] = "1", + [1] = "2", + [2] = "4", + [3] = "8", + [4] = "16", + [5] = "32" +}; + +char *pred_inv[2] = { + [0] = "+", + [1] = "-" +}; + +char *pred_ctrl_align16[16] = { + [1] = "", + [2] = ".x", + [3] = ".y", + [4] = ".z", + [5] = ".w", + [6] = ".any4h", + [7] = ".all4h", +}; + +char *pred_ctrl_align1[16] = { + [1] = "", + [2] = ".anyv", + [3] = ".allv", + [4] = ".any2h", + [5] = ".all2h", + [6] = ".any4h", + [7] = ".all4h", + [8] = ".any8h", + [9] = ".all8h", + [10] = ".any16h", + [11] = ".all16h", +}; + +char *thread_ctrl[4] = { + [0] = "", + [2] = "switch" +}; + +char *compr_ctrl[4] = { + [0] = "", + [1] = "sechalf", + [2] = "compr", +}; + +char *dep_ctrl[4] = { + [0] = "", + [1] = "NoDDClr", + [2] = "NoDDChk", + [3] = "NoDDClr,NoDDChk", +}; + +char *mask_ctrl[4] = { + [0] = "", + [1] = "nomask", +}; + +char *access_mode[2] = { + [0] = "align1", + [1] = "align16", +}; + +char *reg_encoding[8] = { + [0] = "UD", + [1] = "D", + [2] = "UW", + [3] = "W", + [4] = "UB", + [5] = "B", + [7] = "F" +}; + +char *imm_encoding[8] = { + [0] = "UD", + [1] = "D", + [2] = "UW", + [3] = "W", + [5] = "VF", + [5] = "V", + [7] = "F" +}; + +char *reg_file[4] = { + [0] = "A", + [1] = "g", + [2] = "m", + [3] = "imm", +}; + +char *writemask[16] = { + [0x0] = ".", + [0x1] = ".x", + [0x2] = ".y", + [0x3] = ".xy", + [0x4] = ".z", + [0x5] = ".xz", + [0x6] = ".yz", + [0x7] = ".xyz", + [0x8] = ".w", + [0x9] = ".xw", + [0xa] = ".yw", + [0xb] = ".xyw", + [0xc] = ".zw", + [0xd] = ".xzw", + [0xe] = ".yzw", + [0xf] = "", +}; + +char *end_of_thread[2] = { + [0] = "", + [1] = "EOT" +}; + +char *target_function[16] = { + [BRW_MESSAGE_TARGET_NULL] = "null", + [BRW_MESSAGE_TARGET_MATH] = "math", + [BRW_MESSAGE_TARGET_SAMPLER] = "sampler", + [BRW_MESSAGE_TARGET_GATEWAY] = "gateway", + [BRW_MESSAGE_TARGET_DATAPORT_READ] = "read", + [BRW_MESSAGE_TARGET_DATAPORT_WRITE] = "write", + [BRW_MESSAGE_TARGET_URB] = "urb", + [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner" +}; + +char *math_function[16] = { + [BRW_MATH_FUNCTION_INV] = "inv", + [BRW_MATH_FUNCTION_LOG] = "log", + [BRW_MATH_FUNCTION_EXP] = "exp", + [BRW_MATH_FUNCTION_SQRT] = "sqrt", + [BRW_MATH_FUNCTION_RSQ] = "rsq", + [BRW_MATH_FUNCTION_SIN] = "sin", + [BRW_MATH_FUNCTION_COS] = "cos", + [BRW_MATH_FUNCTION_SINCOS] = "sincos", + [BRW_MATH_FUNCTION_TAN] = "tan", + [BRW_MATH_FUNCTION_POW] = "pow", + [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod", + [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intmod", + [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intdiv", +}; + +char *math_saturate[2] = { + [0] = "", + [1] = "sat" +}; + +char *math_signed[2] = { + [0] = "", + [1] = "signed" +}; + +char *math_scalar[2] = { + [0] = "", + [1] = "scalar" +}; + +char *math_precision[2] = { + [0] = "", + [1] = "partial_precision" +}; + +char *urb_swizzle[4] = { + [BRW_URB_SWIZZLE_NONE] = "", + [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave", + [BRW_URB_SWIZZLE_TRANSPOSE] = "transpose", +}; + +char *urb_allocate[2] = { + [0] = "", + [1] = "allocate" +}; + +char *urb_used[2] = { + [0] = "", + [1] = "used" +}; + +char *urb_complete[2] = { + [0] = "", + [1] = "complete" +}; + +char *sampler_target_format[4] = { + [0] = "F", + [2] = "UD", + [3] = "D" +}; + + +static int column; + +static int string (FILE *file, char *string) +{ + fputs (string, file); + column += strlen (string); + return 0; +} + +static int format (FILE *f, char *format, ...) +{ + char buf[1024]; + va_list args; + va_start (args, format); + + vsnprintf (buf, sizeof (buf) - 1, format, args); + string (f, buf); + return 0; +} + +static int newline (FILE *f) +{ + putc ('\n', f); + column = 0; + return 0; +} + +static int pad (FILE *f, int c) +{ + do + string (f, " "); + while (column < c); + return 0; +} + +static int control (FILE *file, char *name, char *ctrl[], GLuint id, int *space) +{ + if (!ctrl[id]) { + fprintf (file, "*** invalid %s value %d ", + name, id); + return 1; + } + if (ctrl[id][0]) + { + if (space && *space) + string (file, " "); + string (file, ctrl[id]); + if (space) + *space = 1; + } + return 0; +} + +static int print_opcode (FILE *file, int id) +{ + if (!opcode[id].name) { + format (file, "*** invalid opcode value %d ", id); + return 1; + } + string (file, opcode[id].name); + return 0; +} + +static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr) +{ + int err = 0; + if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) { + switch (_reg_nr & 0xf0) { + case BRW_ARF_NULL: + string (file, "null"); + return -1; + case BRW_ARF_ADDRESS: + format (file, "a%d", _reg_nr & 0x0f); + break; + case BRW_ARF_ACCUMULATOR: + format (file, "acc%d", _reg_nr & 0x0f); + break; + case BRW_ARF_MASK: + format (file, "mask%d", _reg_nr & 0x0f); + break; + case BRW_ARF_MASK_STACK: + format (file, "msd%d", _reg_nr & 0x0f); + break; + case BRW_ARF_STATE: + format (file, "sr%d", _reg_nr & 0x0f); + break; + case BRW_ARF_CONTROL: + format (file, "cr%d", _reg_nr & 0x0f); + break; + case BRW_ARF_NOTIFICATION_COUNT: + format (file, "n%d", _reg_nr & 0x0f); + break; + case BRW_ARF_IP: + string (file, "ip"); + return -1; + break; + default: + format (file, "ARF%d", _reg_nr); + break; + } + } else { + err |= control (file, "src reg file", reg_file, _reg_file, NULL); + format (file, "%d", _reg_nr); + } + return err; +} + +static int dest (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + + if (inst->header.access_mode == BRW_ALIGN_1) + { + if (inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT) + { + err |= reg (file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr); + if (err == -1) + return 0; + if (inst->bits1.da1.dest_subreg_nr) + format (file, ".%d", inst->bits1.da1.dest_subreg_nr); + format (file, "<%d>", inst->bits1.da1.dest_horiz_stride); + err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL); + } + else + { + string (file, "g[a0"); + if (inst->bits1.ia1.dest_subreg_nr) + format (file, ".%d", inst->bits1.ia1.dest_subreg_nr); + if (inst->bits1.ia1.dest_indirect_offset) + format (file, " %d", inst->bits1.ia1.dest_indirect_offset); + string (file, "]"); + format (file, "<%d>", inst->bits1.ia1.dest_horiz_stride); + err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL); + } + } + else + { + if (inst->bits1.da16.dest_address_mode == BRW_ADDRESS_DIRECT) + { + err |= reg (file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr); + if (err == -1) + return 0; + if (inst->bits1.da16.dest_subreg_nr) + format (file, ".%d", inst->bits1.da16.dest_subreg_nr); + string (file, "<1>"); + err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL); + err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL); + } + else + { + err = 1; + string (file, "Indirect align16 address mode not supported"); + } + } + + return 0; +} + +static int src_align1_region (FILE *file, + GLuint _vert_stride, GLuint _width, GLuint _horiz_stride) +{ + int err = 0; + string (file, "<"); + err |= control (file, "vert stride", vert_stride, _vert_stride, NULL); + string (file, ","); + err |= control (file, "width", width, _width, NULL); + string (file, ","); + err |= control (file, "horiz_stride", horiz_stride, _horiz_stride, NULL); + string (file, ">"); + return err; +} + +static int src_da1 (FILE *file, GLuint type, GLuint _reg_file, + GLuint _vert_stride, GLuint _width, GLuint _horiz_stride, + GLuint reg_num, GLuint sub_reg_num, GLuint __abs, GLuint _negate) +{ + int err = 0; + err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "abs", _abs, __abs, NULL); + + err |= reg (file, _reg_file, reg_num); + if (err == -1) + return 0; + if (sub_reg_num) + format (file, ".%d", sub_reg_num); + src_align1_region (file, _vert_stride, _width, _horiz_stride); + err |= control (file, "src reg encoding", reg_encoding, type, NULL); + return err; +} + +static int src_ia1 (FILE *file, + GLuint type, + GLuint _reg_file, + GLint _addr_imm, + GLuint _addr_subreg_nr, + GLuint _negate, + GLuint __abs, + GLuint _addr_mode, + GLuint _horiz_stride, + GLuint _width, + GLuint _vert_stride) +{ + int err = 0; + err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "abs", _abs, __abs, NULL); + + string (file, "g[a0"); + if (_addr_subreg_nr) + format (file, ".%d", _addr_subreg_nr); + if (_addr_imm) + format (file, " %d", _addr_imm); + string (file, "]"); + src_align1_region (file, _vert_stride, _width, _horiz_stride); + err |= control (file, "src reg encoding", reg_encoding, type, NULL); + return err; +} + +static int src_da16 (FILE *file, + GLuint _reg_type, + GLuint _reg_file, + GLuint _vert_stride, + GLuint _reg_nr, + GLuint _subreg_nr, + GLuint __abs, + GLuint _negate, + GLuint swz_x, + GLuint swz_y, + GLuint swz_z, + GLuint swz_w) +{ + int err = 0; + err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "abs", _abs, __abs, NULL); + + err |= reg (file, _reg_file, _reg_nr); + if (err == -1) + return 0; + if (_subreg_nr) + format (file, ".%d", _subreg_nr); + string (file, "<"); + err |= control (file, "vert stride", vert_stride, _vert_stride, NULL); + string (file, ",1,1>"); + err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL); + /* + * Three kinds of swizzle display: + * identity - nothing printed + * 1->all - print the single channel + * 1->1 - print the mapping + */ + if (swz_x == BRW_CHANNEL_X && + swz_y == BRW_CHANNEL_Y && + swz_z == BRW_CHANNEL_Z && + swz_w == BRW_CHANNEL_W) + { + ; + } + else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + } + else + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + err |= control (file, "channel select", chan_sel, swz_y, NULL); + err |= control (file, "channel select", chan_sel, swz_z, NULL); + err |= control (file, "channel select", chan_sel, swz_w, NULL); + } + return err; +} + + +static int imm (FILE *file, GLuint type, struct brw_instruction *inst) { + switch (type) { + case BRW_REGISTER_TYPE_UD: + format (file, "0x%08xUD", inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_D: + format (file, "%dD", inst->bits3.d); + break; + case BRW_REGISTER_TYPE_UW: + format (file, "0x%04xUW", (uint16_t) inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_W: + format (file, "%dW", (int16_t) inst->bits3.d); + break; + case BRW_REGISTER_TYPE_UB: + format (file, "0x%02xUB", (int8_t) inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_VF: + format (file, "Vector Float"); + break; + case BRW_REGISTER_TYPE_V: + format (file, "0x%08xV", inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_F: + format (file, "%-gF", inst->bits3.f); + } + return 0; +} + +static int src0 (FILE *file, struct brw_instruction *inst) +{ + if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE) + return imm (file, inst->bits1.da1.src0_reg_type, + inst); + else if (inst->header.access_mode == BRW_ALIGN_1) + { + if (inst->bits2.da1.src0_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da1 (file, + inst->bits1.da1.src0_reg_type, + inst->bits1.da1.src0_reg_file, + inst->bits2.da1.src0_vert_stride, + inst->bits2.da1.src0_width, + inst->bits2.da1.src0_horiz_stride, + inst->bits2.da1.src0_reg_nr, + inst->bits2.da1.src0_subreg_nr, + inst->bits2.da1.src0_abs, + inst->bits2.da1.src0_negate); + } + else + { + return src_ia1 (file, + inst->bits1.ia1.src0_reg_type, + inst->bits1.ia1.src0_reg_file, + inst->bits2.ia1.src0_indirect_offset, + inst->bits2.ia1.src0_subreg_nr, + inst->bits2.ia1.src0_negate, + inst->bits2.ia1.src0_abs, + inst->bits2.ia1.src0_address_mode, + inst->bits2.ia1.src0_horiz_stride, + inst->bits2.ia1.src0_width, + inst->bits2.ia1.src0_vert_stride); + } + } + else + { + if (inst->bits2.da16.src0_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da16 (file, + inst->bits1.da16.src0_reg_type, + inst->bits1.da16.src0_reg_file, + inst->bits2.da16.src0_vert_stride, + inst->bits2.da16.src0_reg_nr, + inst->bits2.da16.src0_subreg_nr, + inst->bits2.da16.src0_abs, + inst->bits2.da16.src0_negate, + inst->bits2.da16.src0_swz_x, + inst->bits2.da16.src0_swz_y, + inst->bits2.da16.src0_swz_z, + inst->bits2.da16.src0_swz_w); + } + else + { + string (file, "Indirect align16 address mode not supported"); + return 1; + } + } +} + +static int src1 (FILE *file, struct brw_instruction *inst) +{ + if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE) + return imm (file, inst->bits1.da1.src1_reg_type, + inst); + else if (inst->header.access_mode == BRW_ALIGN_1) + { + if (inst->bits3.da1.src1_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da1 (file, + inst->bits1.da1.src1_reg_type, + inst->bits1.da1.src1_reg_file, + inst->bits3.da1.src1_vert_stride, + inst->bits3.da1.src1_width, + inst->bits3.da1.src1_horiz_stride, + inst->bits3.da1.src1_reg_nr, + inst->bits3.da1.src1_subreg_nr, + inst->bits3.da1.src1_abs, + inst->bits3.da1.src1_negate); + } + else + { + return src_ia1 (file, + inst->bits1.ia1.src1_reg_type, + inst->bits1.ia1.src1_reg_file, + inst->bits3.ia1.src1_indirect_offset, + inst->bits3.ia1.src1_subreg_nr, + inst->bits3.ia1.src1_negate, + inst->bits3.ia1.src1_abs, + inst->bits3.ia1.src1_address_mode, + inst->bits3.ia1.src1_horiz_stride, + inst->bits3.ia1.src1_width, + inst->bits3.ia1.src1_vert_stride); + } + } + else + { + if (inst->bits3.da16.src1_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da16 (file, + inst->bits1.da16.src1_reg_type, + inst->bits1.da16.src1_reg_file, + inst->bits3.da16.src1_vert_stride, + inst->bits3.da16.src1_reg_nr, + inst->bits3.da16.src1_subreg_nr, + inst->bits3.da16.src1_abs, + inst->bits3.da16.src1_negate, + inst->bits3.da16.src1_swz_x, + inst->bits3.da16.src1_swz_y, + inst->bits3.da16.src1_swz_z, + inst->bits3.da16.src1_swz_w); + } + else + { + string (file, "Indirect align16 address mode not supported"); + return 1; + } + } +} + +int brw_disasm (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + int space = 0; + + if (inst->header.predicate_control) { + string (file, "("); + err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL); + string (file, "f0"); + if (inst->bits2.da1.flag_reg_nr) + format (file, ".%d", inst->bits2.da1.flag_reg_nr); + if (inst->header.access_mode == BRW_ALIGN_1) + err |= control (file, "predicate control align1", pred_ctrl_align1, + inst->header.predicate_control, NULL); + else + err |= control (file, "predicate control align16", pred_ctrl_align16, + inst->header.predicate_control, NULL); + string (file, ") "); + } + + err |= print_opcode (file, inst->header.opcode); + err |= control (file, "saturate", saturate, inst->header.saturate, NULL); + err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL); + + if (inst->header.opcode != BRW_OPCODE_SEND) + err |= control (file, "conditional modifier", conditional_modifier, + inst->header.destreg__conditionalmod, NULL); + + if (inst->header.opcode != BRW_OPCODE_NOP) { + string (file, "("); + err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL); + string (file, ")"); + } + + if (inst->header.opcode == BRW_OPCODE_SEND) + format (file, " %d", inst->header.destreg__conditionalmod); + + if (opcode[inst->header.opcode].ndst > 0) { + pad (file, 16); + err |= dest (file, inst); + } + if (opcode[inst->header.opcode].nsrc > 0) { + pad (file, 32); + err |= src0 (file, inst); + } + if (opcode[inst->header.opcode].nsrc > 1) { + pad (file, 48); + err |= src1 (file, inst); + } + + if (inst->header.opcode == BRW_OPCODE_SEND) { + newline (file); + pad (file, 16); + space = 0; + err |= control (file, "target function", target_function, + inst->bits3.generic.msg_target, &space); + switch (inst->bits3.generic.msg_target) { + case BRW_MESSAGE_TARGET_MATH: + err |= control (file, "math function", math_function, + inst->bits3.math.function, &space); + err |= control (file, "math saturate", math_saturate, + inst->bits3.math.saturate, &space); + err |= control (file, "math signed", math_signed, + inst->bits3.math.int_type, &space); + err |= control (file, "math scalar", math_scalar, + inst->bits3.math.data_type, &space); + err |= control (file, "math precision", math_precision, + inst->bits3.math.precision, &space); + break; + case BRW_MESSAGE_TARGET_SAMPLER: + format (file, " (%d, %d, ", + inst->bits3.sampler.binding_table_index, + inst->bits3.sampler.sampler); + err |= control (file, "sampler target format", sampler_target_format, + inst->bits3.sampler.return_format, NULL); + string (file, ")"); + break; + case BRW_MESSAGE_TARGET_DATAPORT_WRITE: + format (file, " (%d, %d, %d, %d)", + inst->bits3.dp_write.binding_table_index, + (inst->bits3.dp_write.pixel_scoreboard_clear << 3) | + inst->bits3.dp_write.msg_control, + inst->bits3.dp_write.msg_type, + inst->bits3.dp_write.send_commit_msg); + break; + case BRW_MESSAGE_TARGET_URB: + format (file, " %d", inst->bits3.urb.offset); + space = 1; + err |= control (file, "urb swizzle", urb_swizzle, + inst->bits3.urb.swizzle_control, &space); + err |= control (file, "urb allocate", urb_allocate, + inst->bits3.urb.allocate, &space); + err |= control (file, "urb used", urb_used, + inst->bits3.urb.used, &space); + err |= control (file, "urb complete", urb_complete, + inst->bits3.urb.complete, &space); + break; + case BRW_MESSAGE_TARGET_THREAD_SPAWNER: + break; + default: + format (file, "unsupported target %d", inst->bits3.generic.msg_target); + break; + } + if (space) + string (file, " "); + format (file, "mlen %d", + inst->bits3.generic.msg_length); + format (file, " rlen %d", + inst->bits3.generic.response_length); + } + pad (file, 64); + if (inst->header.opcode != BRW_OPCODE_NOP) { + string (file, "{"); + space = 1; + err |= control(file, "access mode", access_mode, inst->header.access_mode, &space); + err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space); + err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space); + err |= control (file, "compression control", compr_ctrl, inst->header.compression_control, &space); + err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space); + if (inst->header.opcode == BRW_OPCODE_SEND) + err |= control (file, "end of thread", end_of_thread, + inst->bits3.generic.end_of_thread, &space); + if (space) + string (file, " "); + string (file, "}"); + } + string (file, ";"); + newline (file); + return err; +} diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 5152c3f3a5f..682094ff139 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -141,6 +141,8 @@ static void brw_emit_prim(struct brw_context *brw, prim_packet.verts_per_instance = trim(prim->mode, prim->count); prim_packet.start_vert_location = prim->start; + if (prim->indexed) + prim_packet.start_vert_location += brw->ib.start_vertex_offset; prim_packet.instance_count = 1; prim_packet.start_instance_location = 0; prim_packet.base_vert_location = 0; @@ -422,54 +424,31 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, return retval; } -static GLboolean brw_need_rebase( GLcontext *ctx, - const struct gl_client_array *arrays[], - const struct _mesa_index_buffer *ib, - GLuint min_index ) -{ - if (min_index == 0) - return GL_FALSE; - - if (ib) { - if (!vbo_all_varyings_in_vbos(arrays)) - return GL_TRUE; - else - return GL_FALSE; - } - else { - /* Hmm. This isn't quite what I wanted. BRW can actually - * handle the mixed case well enough that we shouldn't need to - * rebase. However, it's probably not very common, nor hugely - * expensive to do it this way: - */ - if (!vbo_all_varyings_in_vbos(arrays)) - return GL_TRUE; - else - return GL_FALSE; - } -} - - void brw_draw_prims( GLcontext *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prim, GLuint nr_prims, const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, GLuint min_index, GLuint max_index ) { GLboolean retval; - /* Decide if we want to rebase. If so we end up recursing once - * only into this function. - */ - if (brw_need_rebase( ctx, arrays, ib, min_index )) { - vbo_rebase_prims( ctx, arrays, - prim, nr_prims, - ib, min_index, max_index, - brw_draw_prims ); - - return; + if (!vbo_all_varyings_in_vbos(arrays)) { + if (!index_bounds_valid) + vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + + /* Decide if we want to rebase. If so we end up recursing once + * only into this function. + */ + if (min_index != 0) { + vbo_rebase_prims(ctx, arrays, + prim, nr_prims, + ib, min_index, max_index, + brw_draw_prims ); + return; + } } /* Make a first attempt at drawing: diff --git a/src/mesa/drivers/dri/i965/brw_draw.h b/src/mesa/drivers/dri/i965/brw_draw.h index 9aebbdb1b86..2a14db217fc 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.h +++ b/src/mesa/drivers/dri/i965/brw_draw.h @@ -39,6 +39,7 @@ void brw_draw_prims( GLcontext *ctx, const struct _mesa_prim *prims, GLuint nr_prims, const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, GLuint min_index, GLuint max_index ); diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index c29f1dd5c03..ab6b62812f1 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -350,9 +350,6 @@ static void brw_prepare_vertices(struct brw_context *brw) unsigned int min_index = brw->vb.min_index; unsigned int max_index = brw->vb.max_index; - struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; - GLuint nr_enabled = 0; - struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; GLuint nr_uploads = 0; @@ -362,12 +359,13 @@ static void brw_prepare_vertices(struct brw_context *brw) _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); /* Accumulate the list of enabled arrays. */ + brw->vb.nr_enabled = 0; while (vs_inputs) { GLuint i = _mesa_ffsll(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[i]; vs_inputs &= ~(1 << i); - enabled[nr_enabled++] = input; + brw->vb.enabled[brw->vb.nr_enabled++] = input; } /* XXX: In the rare cases where this happens we fallback all @@ -376,16 +374,15 @@ static void brw_prepare_vertices(struct brw_context *brw) * cases with > 17 vertex attributes enabled, so it probably * isn't an issue at this point. */ - if (nr_enabled >= BRW_VEP_MAX) { + if (brw->vb.nr_enabled >= BRW_VEP_MAX) { intel->Fallback = 1; return; } - for (i = 0; i < nr_enabled; i++) { - struct brw_vertex_element *input = enabled[i]; + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; input->element_size = get_size(input->glarray->Type) * input->glarray->Size; - input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1; if (input->glarray->BufferObj->Name != 0) { struct intel_buffer_object *intel_buffer = @@ -398,7 +395,23 @@ static void brw_prepare_vertices(struct brw_context *brw) dri_bo_reference(input->bo); input->offset = (unsigned long)input->glarray->Ptr; input->stride = input->glarray->StrideB; + input->count = input->glarray->_MaxElement; + + /* This is a common place to reach if the user mistakenly supplies + * a pointer in place of a VBO offset. If we just let it go through, + * we may end up dereferencing a pointer beyond the bounds of the + * GTT. We would hope that the VBO's max_index would save us, but + * Mesa appears to hand us min/max values not clipped to the + * array object's _MaxElement, and _MaxElement frequently appears + * to be wrong anyway. + * + * The VBO spec allows application termination in this case, and it's + * probably a service to the poor programmer to do so rather than + * trying to just not render. + */ + assert(input->offset < input->bo->size); } else { + input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1; if (input->bo != NULL) { /* Already-uploaded vertex data is present from a previous * prepare_vertices, but we had to re-validate state due to @@ -466,8 +479,8 @@ static void brw_prepare_vertices(struct brw_context *brw) brw_prepare_query_begin(brw); - for (i = 0; i < nr_enabled; i++) { - struct brw_vertex_element *input = enabled[i]; + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; brw_add_validated_bo(brw, input->bo); } @@ -477,34 +490,44 @@ static void brw_emit_vertices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); - GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; - struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; GLuint i; - GLuint nr_enabled = 0; - /* Accumulate the list of enabled arrays. */ - while (vs_inputs) { - i = _mesa_ffsll(vs_inputs) - 1; - struct brw_vertex_element *input = &brw->vb.inputs[i]; + brw_emit_query_begin(brw); - vs_inputs &= ~(1 << i); - enabled[nr_enabled++] = input; + /* If the VS doesn't read any inputs (calculating vertex position from + * a state variable for some reason, for example), emit a single pad + * VERTEX_ELEMENT struct and bail. + * + * The stale VB state stays in place, but they don't do anything unless + * a VE loads from them. + */ + if (brw->vb.nr_enabled == 0) { + BEGIN_BATCH(3, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1); + OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | + BRW_VE0_VALID | + (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | + (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); + ADVANCE_BATCH(); + return; } - brw_emit_query_begin(brw); - /* Now emit VB and VEP state packets. * * This still defines a hardware VB for each input, even if they * are interleaved or from the same VBO. TBD if this makes a * performance difference. */ - BEGIN_BATCH(1 + nr_enabled * 4, IGNORE_CLIPRECTS); + BEGIN_BATCH(1 + brw->vb.nr_enabled * 4, IGNORE_CLIPRECTS); OUT_BATCH((CMD_VERTEX_BUFFER << 16) | - ((1 + nr_enabled * 4) - 2)); + ((1 + brw->vb.nr_enabled * 4) - 2)); - for (i = 0; i < nr_enabled; i++) { - struct brw_vertex_element *input = enabled[i]; + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) | BRW_VB0_ACCESS_VERTEXDATA | @@ -524,15 +547,15 @@ static void brw_emit_vertices(struct brw_context *brw) input->offset + input->element_size); } } else - OUT_BATCH(brw->vb.max_index); + OUT_BATCH(input->count); OUT_BATCH(0); /* Instance data step rate */ } ADVANCE_BATCH(); - BEGIN_BATCH(1 + nr_enabled * 2, IGNORE_CLIPRECTS); - OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + nr_enabled * 2) - 2)); - for (i = 0; i < nr_enabled; i++) { - struct brw_vertex_element *input = enabled[i]; + BEGIN_BATCH(1 + brw->vb.nr_enabled * 2, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_enabled * 2) - 2)); + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; uint32_t format = get_surface_type(input->glarray->Type, input->glarray->Size, input->glarray->Format, @@ -589,17 +612,20 @@ static void brw_prepare_indices(struct brw_context *brw) dri_bo *bo = NULL; struct gl_buffer_object *bufferobj; GLuint offset; + GLuint ib_type_size; if (index_buffer == NULL) return; - ib_size = get_size(index_buffer->type) * index_buffer->count; + ib_type_size = get_size(index_buffer->type); + ib_size = ib_type_size * index_buffer->count; bufferobj = index_buffer->obj;; /* Turn into a proper VBO: */ if (!bufferobj->Name) { - + brw->ib.start_vertex_offset = 0; + /* Get new bufferobj, offset: */ get_space(brw, ib_size, &bo, &offset); @@ -615,6 +641,7 @@ static void brw_prepare_indices(struct brw_context *brw) } } else { offset = (GLuint) (unsigned long) index_buffer->ptr; + brw->ib.start_vertex_offset = 0; /* If the index buffer isn't aligned to its element size, we have to * rebase it into a temporary. @@ -635,39 +662,62 @@ static void brw_prepare_indices(struct brw_context *brw) bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj), INTEL_READ); dri_bo_reference(bo); + + /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading + * the index buffer state when we're just moving the start index + * of our drawing. + */ + brw->ib.start_vertex_offset = offset / ib_type_size; + offset = 0; + ib_size = bo->size; } } - dri_bo_unreference(brw->ib.bo); - brw->ib.bo = bo; - brw->ib.offset = offset; + if (brw->ib.bo != bo || + brw->ib.offset != offset || + brw->ib.size != ib_size) + { + drm_intel_bo_unreference(brw->ib.bo); + brw->ib.bo = bo; + brw->ib.offset = offset; + brw->ib.size = ib_size; + + brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER; + } else { + drm_intel_bo_unreference(bo); + } brw_add_validated_bo(brw, brw->ib.bo); } -static void brw_emit_indices(struct brw_context *brw) +const struct brw_tracked_state brw_indices = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_INDICES, + .cache = 0, + }, + .prepare = brw_prepare_indices, +}; + +static void brw_emit_index_buffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; const struct _mesa_index_buffer *index_buffer = brw->ib.ib; - GLuint ib_size; if (index_buffer == NULL) return; - ib_size = get_size(index_buffer->type) * index_buffer->count - 1; - /* Emit the indexbuffer packet: */ { struct brw_indexbuffer ib; memset(&ib, 0, sizeof(ib)); - + ib.header.bits.opcode = CMD_INDEX_BUFFER; ib.header.bits.length = sizeof(ib)/4 - 2; ib.header.bits.index_format = get_index_type(index_buffer->type); ib.header.bits.cut_index_enable = 0; - BEGIN_BATCH(4, IGNORE_CLIPRECTS); OUT_BATCH( ib.header.dword ); @@ -676,18 +726,17 @@ static void brw_emit_indices(struct brw_context *brw) brw->ib.offset); OUT_RELOC(brw->ib.bo, I915_GEM_DOMAIN_VERTEX, 0, - brw->ib.offset + ib_size); + brw->ib.offset + brw->ib.size); OUT_BATCH( 0 ); ADVANCE_BATCH(); } } -const struct brw_tracked_state brw_indices = { +const struct brw_tracked_state brw_index_buffer = { .dirty = { .mesa = 0, - .brw = BRW_NEW_BATCH | BRW_NEW_INDICES, + .brw = BRW_NEW_BATCH | BRW_NEW_INDEX_BUFFER, .cache = 0, }, - .prepare = brw_prepare_indices, - .emit = brw_emit_indices, + .emit = brw_emit_index_buffer, }; diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index c53efba5991..1df561386e6 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -62,7 +62,7 @@ void brw_set_predicate_control( struct brw_compile *p, GLuint pc ) void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional ) { - p->current->header.destreg__conditonalmod = conditional; + p->current->header.destreg__conditionalmod = conditional; } void brw_set_access_mode( struct brw_compile *p, GLuint access_mode ) diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 2412014248c..241cdc33f86 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -481,8 +481,8 @@ static struct brw_instruction *next_insn( struct brw_compile *p, /* Reset this one-shot flag: */ - if (p->current->header.destreg__conditonalmod) { - p->current->header.destreg__conditonalmod = 0; + if (p->current->header.destreg__conditionalmod) { + p->current->header.destreg__conditionalmod = 0; p->current->header.predicate_control = BRW_PREDICATE_NORMAL; } @@ -679,7 +679,7 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, assert(if_insn->header.opcode == BRW_OPCODE_IF); if_insn->bits3.if_else.jump_count = br * (insn - if_insn); - if_insn->bits3.if_else.pop_count = 1; + if_insn->bits3.if_else.pop_count = 0; if_insn->bits3.if_else.pad0 = 0; } @@ -871,7 +871,7 @@ void brw_CMP(struct brw_compile *p, { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); - insn->header.destreg__conditonalmod = conditional; + insn->header.destreg__conditionalmod = conditional; brw_set_dest(insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, src1); @@ -915,7 +915,7 @@ void brw_math( struct brw_compile *p, * instructions. */ insn->header.predicate_control = 0; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src); @@ -952,7 +952,7 @@ void brw_math_16( struct brw_compile *p, brw_set_compression_control(p, BRW_COMPRESSION_NONE); insn = next_insn(p, BRW_OPCODE_SEND); - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src); @@ -969,7 +969,7 @@ void brw_math_16( struct brw_compile *p, */ insn = next_insn(p, BRW_OPCODE_SEND); insn->header.compression_control = BRW_COMPRESSION_2NDHALF; - insn->header.destreg__conditonalmod = msg_reg_nr+1; + insn->header.destreg__conditionalmod = msg_reg_nr+1; brw_set_dest(insn, offset(dest,1)); brw_set_src0(insn, src); @@ -1016,7 +1016,7 @@ void brw_dp_WRITE_16( struct brw_compile *p, insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src); @@ -1062,7 +1062,7 @@ void brw_dp_READ_16( struct brw_compile *p, insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); /* UW? */ brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); @@ -1116,7 +1116,7 @@ void brw_dp_READ_4( struct brw_compile *p, insn->header.predicate_control = BRW_PREDICATE_NONE; insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; insn->header.mask_control = BRW_MASK_DISABLE; /* cast dest to a uword[8] vector */ @@ -1190,7 +1190,7 @@ void brw_dp_READ_4_vs(struct brw_compile *p, insn->header.predicate_control = BRW_PREDICATE_NONE; insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; insn->header.mask_control = BRW_MASK_DISABLE; /*insn->header.access_mode = BRW_ALIGN_16;*/ @@ -1224,7 +1224,7 @@ void brw_fb_WRITE(struct brw_compile *p, insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src0); @@ -1322,7 +1322,7 @@ void brw_SAMPLE(struct brw_compile *p, insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src0); @@ -1375,7 +1375,7 @@ void brw_urb_WRITE(struct brw_compile *p, brw_set_src0(insn, src0); brw_set_src1(insn, brw_imm_d(0)); - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_urb_message(p->brw, insn, @@ -1410,7 +1410,7 @@ void brw_ff_sync(struct brw_compile *p, brw_set_src0(insn, src0); brw_set_src1(insn, brw_imm_d(0)); - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_ff_sync_message(p->brw, insn, diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c index 980eac7646d..a9b2aa2eace 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c @@ -101,7 +101,7 @@ static void brw_gs_emit_vue(struct brw_gs_compile *c, BRW_URB_SWIZZLE_NONE); } -void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim) +static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim) { struct brw_compile *p = &c->func; brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim)); diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index b5f6371c82c..bc0f0760738 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -113,7 +113,7 @@ struct brw_sf_unit_key { unsigned int nr_urb_entries, urb_size, sfsize; - GLenum front_face, cull_face; + GLenum front_face, cull_face, provoking_vertex; unsigned scissor:1; unsigned line_smooth:1; unsigned point_sprite:1; @@ -153,6 +153,9 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key) key->point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize); key->point_attenuated = ctx->Point._Attenuated; + /* _NEW_LIGHT */ + key->provoking_vertex = ctx->Light.ProvokingVertex; + key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; } @@ -284,9 +287,15 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons: */ - sf.sf7.trifan_pv = 2; - sf.sf7.linestrip_pv = 1; - sf.sf7.tristrip_pv = 2; + if (key->provoking_vertex == GL_LAST_VERTEX_CONVENTION) { + sf.sf7.trifan_pv = 2; + sf.sf7.linestrip_pv = 1; + sf.sf7.tristrip_pv = 2; + } else { + sf.sf7.trifan_pv = 1; + sf.sf7.linestrip_pv = 0; + sf.sf7.tristrip_pv = 0; + } sf.sf7.line_last_pixel_enable = 0; /* Set bias for OpenGL rasterization rules: @@ -300,6 +309,9 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, &sf, sizeof(sf), NULL, NULL); + /* STATE_PREFETCH command description describes this state as being + * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain. + */ /* Emit SF program relocation */ dri_bo_emit_reloc(bo, I915_GEM_DOMAIN_INSTRUCTION, 0, @@ -340,6 +352,7 @@ static void upload_sf_unit( struct brw_context *brw ) const struct brw_tracked_state brw_sf_unit = { .dirty = { .mesa = (_NEW_POLYGON | + _NEW_LIGHT | _NEW_LINE | _NEW_POINT | _NEW_SCISSOR | diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index bf9f6cae55e..78572356a3d 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -92,6 +92,7 @@ const struct brw_tracked_state brw_clear_batch_cache; const struct brw_tracked_state brw_drawing_rect; const struct brw_tracked_state brw_indices; const struct brw_tracked_state brw_vertices; +const struct brw_tracked_state brw_index_buffer; /** * Use same key for WM and VS surfaces. diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 38d9dd8991e..95d42d2dcc5 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -94,6 +94,7 @@ const struct brw_tracked_state *atoms[] = &brw_drawing_rect, &brw_indices, + &brw_index_buffer, &brw_vertices, &brw_constant_buffer @@ -208,6 +209,7 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_PSP), DEFINE_BIT(BRW_NEW_FENCE), DEFINE_BIT(BRW_NEW_INDICES), + DEFINE_BIT(BRW_NEW_INDEX_BUFFER), DEFINE_BIT(BRW_NEW_VERTICES), DEFINE_BIT(BRW_NEW_BATCH), DEFINE_BIT(BRW_NEW_DEPTH_BUFFER), diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 8ba7eb27b36..a6de09207b7 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -1200,7 +1200,7 @@ struct brw_instruction GLuint predicate_control:4; GLuint predicate_inverse:1; GLuint execution_size:3; - GLuint destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */ + GLuint destreg__conditionalmod:4; /* destreg - send, conditionalmod - others */ GLuint pad0:2; GLuint debug_control:1; GLuint saturate:1; @@ -1228,7 +1228,9 @@ struct brw_instruction GLuint dest_reg_type:3; GLuint src0_reg_file:2; GLuint src0_reg_type:3; - GLuint pad:6; + GLuint src1_reg_file:2; /* 0x00000c00 */ + GLuint src1_reg_type:3; /* 0x00007000 */ + GLuint pad:1; GLint dest_indirect_offset:10; /* offset against the deref'd address reg */ GLuint dest_subreg_nr:3; /* subnr for the address reg a0.x */ GLuint dest_horiz_stride:2; @@ -1243,7 +1245,7 @@ struct brw_instruction GLuint src0_reg_type:3; GLuint src1_reg_file:2; GLuint src1_reg_type:3; - GLuint pad0:1; + GLuint pad:1; GLuint dest_writemask:4; GLuint dest_subreg_nr:1; GLuint dest_reg_nr:8; @@ -1348,7 +1350,7 @@ struct brw_instruction GLuint src1_reg_nr:8; GLuint src1_abs:1; GLuint src1_negate:1; - GLuint pad:1; + GLuint src1_address_mode:1; GLuint src1_horiz_stride:2; GLuint src1_width:3; GLuint src1_vert_stride:4; @@ -1363,7 +1365,7 @@ struct brw_instruction GLuint src1_reg_nr:8; GLuint src1_abs:1; GLuint src1_negate:1; - GLuint pad0:1; + GLuint src1_address_mode:1; GLuint src1_swz_z:2; GLuint src1_swz_w:2; GLuint pad1:1; @@ -1377,7 +1379,7 @@ struct brw_instruction GLuint src1_subreg_nr:3; GLuint src1_abs:1; GLuint src1_negate:1; - GLuint pad0:1; + GLuint src1_address_mode:1; GLuint src1_horiz_stride:2; GLuint src1_width:3; GLuint src1_vert_stride:4; @@ -1565,6 +1567,7 @@ struct brw_instruction GLint d; GLuint ud; + float f; } bits3; }; diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 7f9b2535340..1d2e953eb1e 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -65,11 +65,6 @@ GLboolean brw_miptree_layout(struct intel_context *intel, if (mt->compressed) { mt->pitch = ALIGN(mt->width0, align_w); - qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * mt->pitch * mt->cpp; - mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6; - } else { - qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * mt->pitch * mt->cpp; - mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6; } if (mt->first_level != mt->last_level) { @@ -90,6 +85,14 @@ GLboolean brw_miptree_layout(struct intel_context *intel, mt->pitch = intel_miptree_pitch_align(intel, mt, tiling, mt->pitch); + if (mt->compressed) { + qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * mt->pitch * mt->cpp; + mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6; + } else { + qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * mt->pitch * mt->cpp; + mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6; + } + for (level = mt->first_level; level <= mt->last_level; level++) { GLuint img_height; GLuint nr_images = 6; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 514f15d5e3a..83167b9258e 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -68,6 +68,7 @@ static void release_tmps( struct brw_vs_compile *c ) static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { GLuint i, reg = 0, mrf; + int attributes_in_vue; /* Determine whether to use a real constant buffer or use a block * of GRF registers for constants. The later is faster but only @@ -128,6 +129,11 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) reg++; } } + /* If there are no inputs, we'll still be reading one attribute's worth + * because it's required -- see urb_read_length setting. + */ + if (c->nr_inputs == 0) + reg++; /* Allocate outputs. The non-position outputs go straight into message regs. */ @@ -220,11 +226,22 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) * vertex urb, so is half the amount: */ c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2; + /* Setting this field to 0 leads to undefined behavior according to the + * the VS_STATE docs. Our VUEs will always have at least one attribute + * sitting in them, even if it's padding. + */ + if (c->prog_data.urb_read_length == 0) + c->prog_data.urb_read_length = 1; + + /* The VS VUEs are shared by VF (outputting our inputs) and VS, so size + * them to fit the biggest thing they need to. + */ + attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs); if (BRW_IS_IGDNG(c->func.brw)) - c->prog_data.urb_entry_size = (c->nr_outputs + 6 + 3) / 4; + c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4; else - c->prog_data.urb_entry_size = (c->nr_outputs + 2 + 3) / 4; + c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; c->prog_data.total_grf = reg; @@ -1245,9 +1262,30 @@ post_vs_emit( struct brw_vs_compile *c, /* patch up the END code to jump past subroutines, etc */ offset = last_inst - end_inst; - brw_set_src1(end_inst, brw_imm_d(offset * 16)); + if (offset > 1) { + brw_set_src1(end_inst, brw_imm_d(offset * 16)); + } else { + end_inst->header.opcode = BRW_OPCODE_NOP; + } } +static uint32_t +get_predicate(uint32_t swizzle) +{ + switch (swizzle) { + case SWIZZLE_XXXX: + return BRW_PREDICATE_ALIGN16_REPLICATE_X; + case SWIZZLE_YYYY: + return BRW_PREDICATE_ALIGN16_REPLICATE_Y; + case SWIZZLE_ZZZZ: + return BRW_PREDICATE_ALIGN16_REPLICATE_Z; + case SWIZZLE_WWWW: + return BRW_PREDICATE_ALIGN16_REPLICATE_W; + default: + _mesa_problem(NULL, "Unexpected predicate: 0x%08x\n", swizzle); + return BRW_PREDICATE_NORMAL; + } +} /* Emit the vertex program instructions here. */ @@ -1266,7 +1304,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) GLuint file; if (INTEL_DEBUG & DEBUG_VS) { - _mesa_printf("vs-emit:\n"); + _mesa_printf("vs-mesa:\n"); _mesa_print_program(&c->vp->program.Base); _mesa_printf("\n"); } @@ -1453,7 +1491,10 @@ void brw_vs_emit(struct brw_vs_compile *c ) break; case OPCODE_IF: assert(if_depth < MAX_IF_DEPTH); - if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8); + if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8); + if_inst[if_depth]->header.predicate_control = + get_predicate(inst->DstReg.CondSwizzle); + if_depth++; break; case OPCODE_ELSE: if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); @@ -1541,6 +1582,19 @@ void brw_vs_emit(struct brw_vs_compile *c ) "unknown"); } + /* Set the predication update on the last instruction of the native + * instruction sequence. + * + * This would be problematic if it was set on a math instruction, + * but that shouldn't be the case with the current GLSL compiler. + */ + if (inst->CondUpdate) { + struct brw_instruction *hw_insn = &p->store[p->nr_insn - 1]; + + assert(hw_insn->header.destreg__conditionalmod == 0); + hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ; + } + if ((inst->DstReg.File == PROGRAM_OUTPUT) && (inst->DstReg.Index != VERT_RESULT_HPOS) && c->output_regs[inst->DstReg.Index].used_in_src) { @@ -1578,4 +1632,13 @@ void brw_vs_emit(struct brw_vs_compile *c ) emit_vertex_write(c); post_vs_emit(c, end_inst, last_inst); + + if (INTEL_DEBUG & DEBUG_VS) { + int i; + + _mesa_printf("vs-native:\n"); + for (i = 0; i < p->nr_insn; i++) + brw_disasm(stderr, &p->store[i]); + _mesa_printf("\n"); + } } diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 14e05be4f6c..2292de94c44 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -41,13 +41,13 @@ GLuint brw_wm_nr_args( GLuint opcode ) { switch (opcode) { case WM_FRONTFACING: - return 0; case WM_PIXELXY: + return 0; case WM_CINTERP: case WM_WPOSXY: + case WM_DELTAXY: return 1; case WM_LINTERP: - case WM_DELTAXY: case WM_PIXELW: return 2; case WM_FB_WRITE: @@ -171,9 +171,11 @@ static void do_wm_prog( struct brw_context *brw, * differently from "simple" shaders. */ if (fp->isGLSL) { + c->dispatch_width = 8; brw_wm_glsl_emit(brw, c); } else { + c->dispatch_width = 16; brw_wm_non_glsl_emit(brw, c); } @@ -202,6 +204,7 @@ static void brw_wm_populate_key( struct brw_context *brw, /* BRW_NEW_FRAGMENT_PROGRAM */ const struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; + GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0; GLuint lookup = 0; GLuint line_aa; GLuint i; @@ -263,6 +266,7 @@ static void brw_wm_populate_key( struct brw_context *brw, brw_wm_lookup_iz(line_aa, lookup, + uses_depth, key); diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index ba497432c60..ae98b5492db 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -260,6 +260,7 @@ struct brw_wm_compile { GLuint tmp_index; GLuint tmp_max; GLuint subroutines[BRW_WM_MAX_SUBROUTINE]; + GLuint dispatch_width; /** we may need up to 3 constants per instruction (if use_const_buffer) */ struct { @@ -292,6 +293,7 @@ void brw_wm_print_program( struct brw_wm_compile *c, void brw_wm_lookup_iz( GLuint line_aa, GLuint lookup, + GLboolean ps_uses_depth, struct brw_wm_prog_key *key ); GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 9f82916c025..b3cf524c63e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -65,8 +65,7 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg ) static void emit_pixel_xy(struct brw_compile *p, const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0) + GLuint mask) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); @@ -98,8 +97,7 @@ static void emit_pixel_xy(struct brw_compile *p, static void emit_delta_xy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1) + const struct brw_reg *arg0) { struct brw_reg r1 = brw_vec1_grf(1, 0); @@ -545,16 +543,18 @@ static void emit_dp3( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); + assert(is_power_of_two(mask & WRITEMASK_XYZW)); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MAC(p, dst[0], arg0[2], arg1[2]); + brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]); brw_set_saturate(p, 0); } @@ -565,17 +565,19 @@ static void emit_dp4( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); + assert(is_power_of_two(mask & WRITEMASK_XYZW)); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MAC(p, dst[0], arg0[3], arg1[3]); + brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]); brw_set_saturate(p, 0); } @@ -632,18 +634,19 @@ static void emit_math1( struct brw_compile *p, GLuint mask, const struct brw_reg *arg0 ) { + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ - //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X || - // function == BRW_MATH_FUNCTION_SINCOS); - + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + brw_MOV(p, brw_message_reg(2), arg0[0]); /* Send two messages to perform all 16 operations: */ brw_math_16(p, - dst[0], + dst[dst_chan], function, (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, 2, @@ -659,10 +662,12 @@ static void emit_math2( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1) { + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); + assert(is_power_of_two(mask & WRITEMASK_XYZW)); brw_push_insn_state(p); @@ -681,7 +686,7 @@ static void emit_math2( struct brw_compile *p, */ brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math(p, - dst[0], + dst[dst_chan], function, (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, 2, @@ -691,7 +696,7 @@ static void emit_math2( struct brw_compile *p, brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_math(p, - offset(dst[0],1), + offset(dst[dst_chan],1), function, (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, 4, @@ -1194,11 +1199,11 @@ void brw_wm_emit( struct brw_wm_compile *c ) /* Generated instructions for calculating triangle interpolants: */ case WM_PIXELXY: - emit_pixel_xy(p, dst, dst_flags, args[0]); + emit_pixel_xy(p, dst, dst_flags); break; case WM_DELTAXY: - emit_delta_xy(p, dst, dst_flags, args[0], args[1]); + emit_delta_xy(p, dst, dst_flags, args[0]); break; case WM_WPOSXY: @@ -1385,4 +1390,13 @@ void brw_wm_emit( struct brw_wm_compile *c ) inst->dst[i]->hw_reg, inst->dst[i]->spill_slot); } + + if (INTEL_DEBUG & DEBUG_WM) { + int i; + + _mesa_printf("wm-native:\n"); + for (i = 0; i < p->nr_insn; i++) + brw_disasm(stderr, &p->store[i]); + _mesa_printf("\n"); + } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index b9e8dd2e96e..5dc076a8257 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -226,9 +226,42 @@ static struct prog_instruction * emit_op(struct brw_wm_compile *c, 0, 0, 0, /* tex unit, target, shadow */ src0, src1, src2); } - +/* Many Mesa opcodes produce the same value across all the result channels. + * We'd rather not have to support that splatting in the opcode implementations, + * and brw_wm_pass*.c wants to optimize them out by shuffling references around + * anyway. We can easily get both by emitting the opcode to one channel, and + * then MOVing it to the others, which brw_wm_pass*.c already understands. + */ +static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c, + const struct prog_instruction *inst0) +{ + struct prog_instruction *inst; + unsigned int dst_chan; + unsigned int other_channel_mask; + + if (inst0->DstReg.WriteMask == 0) + return NULL; + + dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1; + inst = get_fp_inst(c); + *inst = *inst0; + inst->DstReg.WriteMask = 1 << dst_chan; + + other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan); + if (other_channel_mask != 0) { + inst = emit_op(c, + OPCODE_MOV, + dst_mask(inst0->DstReg, other_channel_mask), + 0, + src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan), + src_undef(), + src_undef()); + } + return inst; +} + /*********************************************************************** * Special instructions for interpolation and other tasks @@ -376,14 +409,6 @@ static void emit_interp( struct brw_wm_compile *c, } break; case FRAG_ATTRIB_FOGC: - /* The FOGC input is really special. When a program uses glFogFragCoord, - * the results returned are supposed to be (f,0,0,1). But for Mesa GLSL, - * the glFrontFacing and glPointCoord values are also stashed in FOGC. - * So, write the interpolated fog value to X, then either 0, 1, or the - * stashed values to Y, Z, W. Note that this means that - * glFogFragCoord.yzw can be wrong in those cases! - */ - /* Interpolate the fog coordinate */ emit_op(c, WM_PINTERP, @@ -393,26 +418,40 @@ static void emit_interp( struct brw_wm_compile *c, deltas, get_pixel_w(c)); - /* Move the front facing value into FOGC.y if it's needed. */ - if (c->fp->program.UsesFrontFacing) { - emit_op(c, - WM_FRONTFACING, - dst_mask(dst, WRITEMASK_Y), - 0, - src_undef(), - src_undef(), - src_undef()); - } else { - emit_op(c, - OPCODE_MOV, - dst_mask(dst, WRITEMASK_Y), - 0, - src_swizzle1(interp, SWIZZLE_ZERO), - src_undef(), - src_undef()); - } + emit_op(c, + OPCODE_MOV, + dst_mask(dst, WRITEMASK_YZW), + 0, + src_swizzle(interp, + SWIZZLE_ZERO, + SWIZZLE_ZERO, + SWIZZLE_ZERO, + SWIZZLE_ONE), + src_undef(), + src_undef()); + break; + + case FRAG_ATTRIB_FACE: + /* XXX review/test this case */ + emit_op(c, + WM_FRONTFACING, + dst_mask(dst, WRITEMASK_X), + 0, + src_undef(), + src_undef(), + src_undef()); + break; + + case FRAG_ATTRIB_PNTC: + /* XXX review/test this case */ + emit_op(c, + WM_PINTERP, + dst_mask(dst, WRITEMASK_XY), + 0, + interp, + deltas, + get_pixel_w(c)); - /* Should do the PointCoord thing here. */ emit_op(c, OPCODE_MOV, dst_mask(dst, WRITEMASK_ZW), @@ -425,6 +464,7 @@ static void emit_interp( struct brw_wm_compile *c, src_undef(), src_undef()); break; + default: emit_op(c, WM_PINTERP, @@ -683,7 +723,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* tmp0 = 1 / tmp1 */ emit_op(c, OPCODE_RCP, - tmp0, + dst_mask(tmp0, WRITEMASK_X), 0, tmp1src, src_undef(), @@ -694,7 +734,7 @@ static void precalc_tex( struct brw_wm_compile *c, tmpcoord, 0, src0, - tmp0src, + src_swizzle1(tmp0src, SWIZZLE_X), src_undef()); release_temp(c, tmp0); @@ -717,7 +757,11 @@ static void precalc_tex( struct brw_wm_compile *c, tmpcoord, 0, inst->SrcReg[0], - scale, + src_swizzle(scale, + SWIZZLE_X, + SWIZZLE_Y, + SWIZZLE_ONE, + SWIZZLE_ONE), src_undef()); coord = src_reg_from_dst(tmpcoord); @@ -1134,9 +1178,11 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) break; case OPCODE_PRINT: break; - default: - emit_insn(c, inst); + if (brw_wm_is_scalar_result(inst->Opcode)) + emit_scalar_insn(c, inst); + else + emit_insn(c, inst); break; } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 85a4237d5a7..a5b18ec7d76 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -10,6 +10,9 @@ enum _subroutine { SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4 }; +static struct brw_reg get_dst_reg(struct brw_wm_compile *c, + const struct prog_instruction *inst, + GLuint component); /** * Determine if the given fragment program uses GLSL features such @@ -22,6 +25,7 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) for (i = 0; i < fp->Base.NumInstructions; i++) { const struct prog_instruction *inst = &fp->Base.Instructions[i]; switch (inst->Opcode) { + case OPCODE_ARL: case OPCODE_IF: case OPCODE_ENDIF: case OPCODE_CAL: @@ -130,19 +134,6 @@ static void set_reg(struct brw_wm_compile *c, int file, int index, c->wm_regs[file][index][component].inited = GL_TRUE; } -/** - * Examine instruction's write mask to find index of first component - * enabled for writing. - */ -static int get_scalar_dst_index(const struct prog_instruction *inst) -{ - int i; - for (i = 0; i < 4; i++) - if (inst->DstReg.WriteMask & (1<<i)) - break; - return i; -} - static struct brw_reg alloc_tmp(struct brw_wm_compile *c) { struct brw_reg reg; @@ -402,6 +393,27 @@ static void prealloc_reg(struct brw_wm_compile *c) prealloc_grf(c, 126); prealloc_grf(c, 127); + for (i = 0; i < c->nr_fp_insns; i++) { + const struct prog_instruction *inst = &c->prog_instructions[i]; + struct brw_reg dst[4]; + + switch (inst->Opcode) { + case OPCODE_TEX: + case OPCODE_TXB: + /* Allocate the channels of texture results contiguously, + * since they are written out that way by the sampler unit. + */ + for (j = 0; j < 4; j++) { + dst[j] = get_dst_reg(c, inst, j); + if (j != 0) + assert(dst[j].nr == dst[j - 1].nr + 1); + } + break; + default: + break; + } + } + /* An instruction may reference up to three constants. * They'll be found in these registers. * XXX alloc these on demand! @@ -639,23 +651,6 @@ static void invoke_subroutine( struct brw_wm_compile *c, } } -static void emit_abs( struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - int i; - struct brw_compile *p = &c->func; - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for (i = 0; i < 4; i++) { - if (inst->DstReg.WriteMask & (1<<i)) { - struct brw_reg src, dst; - dst = get_dst_reg(c, inst, i); - src = get_src_reg(c, inst, 0, i); - brw_MOV(p, dst, brw_abs(src)); - } - } - brw_set_saturate(p, 0); -} - static void emit_trunc( struct brw_wm_compile *c, const struct prog_instruction *inst) { @@ -1031,12 +1026,20 @@ static void emit_dp3(struct brw_wm_compile *c, struct brw_reg src0[3], src1[3], dst; int i; struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + for (i = 0; i < 3; i++) { src0[i] = get_src_reg(c, inst, 0, i); src1[i] = get_src_reg_imm(c, inst, 1, i); } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + dst = get_dst_reg(c, inst, dst_chan); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); @@ -1050,11 +1053,19 @@ static void emit_dp4(struct brw_wm_compile *c, struct brw_reg src0[4], src1[4], dst; int i; struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + for (i = 0; i < 4; i++) { src0[i] = get_src_reg(c, inst, 0, i); src1[i] = get_src_reg_imm(c, inst, 1, i); } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + dst = get_dst_reg(c, inst, dst_chan); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_MAC(p, brw_null_reg(), src0[2], src1[2]); @@ -1069,11 +1080,19 @@ static void emit_dph(struct brw_wm_compile *c, struct brw_reg src0[4], src1[4], dst; int i; struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + for (i = 0; i < 4; i++) { src0[i] = get_src_reg(c, inst, 0, i); src1[i] = get_src_reg_imm(c, inst, 1, i); } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + dst = get_dst_reg(c, inst, dst_chan); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_MAC(p, dst, src0[2], src1[2]); @@ -1091,37 +1110,28 @@ static void emit_math1(struct brw_wm_compile *c, const struct prog_instruction *inst, GLuint func) { struct brw_compile *p = &c->func; - struct brw_reg src0, dst, tmp; - const int mark = mark_tmps( c ); - int i; + struct brw_reg src0, dst; + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; - tmp = alloc_tmp(c); + assert(is_power_of_two(mask & WRITEMASK_XYZW)); /* Get first component of source register */ + dst = get_dst_reg(c, inst, dst_chan); src0 = get_src_reg(c, inst, 0, 0); - /* tmp = func(src0) */ brw_MOV(p, brw_message_reg(2), src0); brw_math(p, - tmp, + dst, func, (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, 2, brw_null_reg(), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); - - /*tmp.dw1.bits.swizzle = SWIZZLE_XXXX;*/ - - /* replicate tmp value across enabled dest channels */ - for (i = 0; i < 4; i++) { - if (inst->DstReg.WriteMask & (1 << i)) { - dst = get_dst_reg(c, inst, i); - brw_MOV(p, dst, tmp); - } - } - - release_tmps(c, mark); } static void emit_rcp(struct brw_wm_compile *c, @@ -1192,24 +1202,6 @@ static void emit_arl(struct brw_wm_compile *c, brw_set_saturate(p, 0); } -static void emit_sub(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, src1, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - src0 = get_src_reg(c, inst, 0, i); - src1 = get_src_reg_imm(c, inst, 1, i); - brw_ADD(p, dst, src0, negate(src1)); - } - } - brw_set_saturate(p, 0); -} static void emit_mul(struct brw_wm_compile *c, const struct prog_instruction *inst) @@ -1321,7 +1313,15 @@ static void emit_pow(struct brw_wm_compile *c, { struct brw_compile *p = &c->func; struct brw_reg dst, src0, src1; - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + + dst = get_dst_reg(c, inst, dst_chan); src0 = get_src_reg_imm(c, inst, 0, 0); src1 = get_src_reg_imm(c, inst, 1, 0); @@ -2828,18 +2828,12 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case WM_FRONTFACING: emit_frontfacing(c, inst); break; - case OPCODE_ABS: - emit_abs(c, inst); - break; case OPCODE_ADD: emit_add(c, inst); break; case OPCODE_ARL: emit_arl(c, inst); break; - case OPCODE_SUB: - emit_sub(c, inst); - break; case OPCODE_FRC: emit_frc(c, inst); break; @@ -3032,8 +3026,14 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) brw_set_predicate_control(p, BRW_PREDICATE_NONE); } post_wm_emit(c); -} + if (INTEL_DEBUG & DEBUG_WM) { + _mesa_printf("wm-native:\n"); + for (i = 0; i < p->nr_insn; i++) + brw_disasm(stderr, &p->store[i]); + _mesa_printf("\n"); + } +} /** * Do GPU code generation for shaders that use GLSL features such as diff --git a/src/mesa/drivers/dri/i965/brw_wm_iz.c b/src/mesa/drivers/dri/i965/brw_wm_iz.c index 8fd067abe7d..5e399ac62a8 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_iz.c +++ b/src/mesa/drivers/dri/i965/brw_wm_iz.c @@ -122,6 +122,7 @@ const struct { */ void brw_wm_lookup_iz( GLuint line_aa, GLuint lookup, + GLboolean ps_uses_depth, struct brw_wm_prog_key *key ) { GLuint reg = 2; @@ -131,7 +132,7 @@ void brw_wm_lookup_iz( GLuint line_aa, if (lookup & IZ_PS_COMPUTES_DEPTH_BIT) key->computes_depth = 1; - if (wm_iz_table[lookup].sd_present) { + if (wm_iz_table[lookup].sd_present || ps_uses_depth) { key->source_depth_reg = reg; reg += 2; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c index 92142764f5d..62792583396 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -257,34 +257,6 @@ static void pass0_set_dst( struct brw_wm_compile *c, } -static void pass0_set_dst_scalar( struct brw_wm_compile *c, - struct brw_wm_instruction *out, - const struct prog_instruction *inst, - GLuint writemask ) -{ - if (writemask) { - const struct prog_dst_register *dst = &inst->DstReg; - GLuint i; - - /* Compute only the first (X) value: - */ - out->writemask = WRITEMASK_X; - out->dst[0] = get_value(c); - - /* Update our tracking register file for all the components in - * writemask: - */ - for (i = 0; i < 4; i++) { - if (writemask & (1<<i)) { - pass0_set_fpreg_value(c, dst->File, dst->Index, i, out->dst[0]); - } - } - } - else - out->writemask = 0; -} - - static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c, struct prog_src_register src, GLuint i ) @@ -363,10 +335,7 @@ translate_insn(struct brw_wm_compile *c, /* Dst: */ - if (brw_wm_is_scalar_result(out->opcode)) - pass0_set_dst_scalar(c, out, inst, writemask); - else - pass0_set_dst(c, out, inst, writemask); + pass0_set_dst(c, out, inst, writemask); } diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 0f87fc46a44..6aa36d10b12 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -196,6 +196,16 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, struct intel_context *intel = batch->intel; GLuint used = batch->ptr - batch->map; + if (intel->first_post_swapbuffers_batch == NULL) { + intel->first_post_swapbuffers_batch = intel->batch->buf; + drm_intel_bo_reference(intel->first_post_swapbuffers_batch); + } + + if (intel->first_post_swapbuffers_batch == NULL) { + intel->first_post_swapbuffers_batch = intel->batch->buf; + drm_intel_bo_reference(intel->first_post_swapbuffers_batch); + } + if (used == 0) { batch->cliprect_mode = IGNORE_CLIPRECTS; return; diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 2e95bd1013f..979f2025842 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -477,6 +477,8 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) BR13 |= BR13_565; } + assert(irb->region->tiling != I915_TILING_Y); + #ifndef I915 if (irb->region->tiling != I915_TILING_NONE) { CMD |= XY_DST_TILED; @@ -571,6 +573,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, assert( logic_op - GL_CLEAR >= 0 ); assert( logic_op - GL_CLEAR < 0x10 ); + assert(dst_pitch > 0); if (w < 0 || h < 0) return GL_TRUE; diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 4abb525f78e..46f1a7f7205 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -38,6 +38,7 @@ #include "swrast_setup/swrast_setup.h" #include "tnl/tnl.h" #include "drivers/common/driverfuncs.h" +#include "drivers/common/meta.h" #include "i830_dri.h" @@ -513,7 +514,7 @@ intel_flush(GLcontext *ctx, GLboolean needs_mi_flush) * each of N places that do rendering. This has worse performances, * but it is much easier to get correct. */ - if (intel->is_front_buffer_rendering) { + if (!intel->is_front_buffer_rendering) { intel->front_buffer_dirty = GL_FALSE; } } @@ -529,7 +530,27 @@ intelFlush(GLcontext * ctx) static void intel_glFlush(GLcontext *ctx) { + struct intel_context *intel = intel_context(ctx); + intel_flush(ctx, GL_TRUE); + + /* We're using glFlush as an indicator that a frame is done, which is + * what DRI2 does before calling SwapBuffers (and means we should catch + * people doing front-buffer rendering, as well).. + * + * Wait for the swapbuffers before the one we just emitted, so we don't + * get too many swaps outstanding for apps that are GPU-heavy but not + * CPU-heavy. + * + * Unfortunately, we don't have a handle to the batch containing the swap, + * and getting our hands on that doesn't seem worth it, so we just us the + * first batch we emitted after the last swap. + */ + if (intel->first_post_swapbuffers_batch != NULL) { + drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch); + drm_intel_bo_unreference(intel->first_post_swapbuffers_batch); + intel->first_post_swapbuffers_batch = NULL; + } } void @@ -692,6 +713,8 @@ intelInitContext(struct intel_context *intel, _swrast_allow_pixel_fog(ctx, GL_FALSE); _swrast_allow_vertex_fog(ctx, GL_TRUE); + _mesa_meta_init(ctx); + intel->hw_stencil = mesaVis->stencilBits && mesaVis->depthBits == 24; intel->hw_stipple = 1; @@ -752,7 +775,7 @@ intelInitContext(struct intel_context *intel, if (intel->use_texture_tiling && !intel->intelScreen->kernel_exec_fencing) { fprintf(stderr, "No kernel support for execution fencing, " - "disabling texture tiling"); + "disabling texture tiling\n"); intel->use_texture_tiling = GL_FALSE; } intel->use_early_z = driQueryOptionb(&intel->optionCache, "early_z"); @@ -795,6 +818,8 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) INTEL_FIREVERTICES(intel); + _mesa_meta_free(&intel->ctx); + meta_destroy_metaops(&intel->meta); intel->vtbl.destroy(intel); @@ -814,6 +839,8 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) intel->prim.vb = NULL; dri_bo_unreference(intel->prim.vb_bo); intel->prim.vb_bo = NULL; + dri_bo_unreference(intel->first_post_swapbuffers_batch); + intel->first_post_swapbuffers_batch = NULL; if (release_texture_heaps) { /* Nothing is currently done here to free texture heaps; diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 08bea88c958..0d9db5eb1da 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -80,9 +80,13 @@ extern void intelFallback(struct intel_context *intel, GLuint bit, #define INTEL_MAX_FIXUP 64 + +/** + * intel_context is derived from Mesa's context class: GLcontext. + */ struct intel_context { - GLcontext ctx; /* the parent class */ + GLcontext ctx; /**< base class, must be first field */ struct { @@ -178,6 +182,7 @@ struct intel_context GLboolean ttm; struct intel_batchbuffer *batch; + drm_intel_bo *first_post_swapbuffers_batch; GLboolean no_batch_wrap; unsigned batch_id; @@ -307,7 +312,7 @@ struct intel_context __DRIdrawablePrivate *driReadDrawable; __DRIscreenPrivate *driScreen; intelScreenPrivate *intelScreen; - volatile struct drm_i915_sarea *sarea; + volatile drm_i915_sarea_t *sarea; GLuint lastStamp; diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c index 6a68021c691..aa3d704f299 100644 --- a/src/mesa/drivers/dri/intel/intel_extensions.c +++ b/src/mesa/drivers/dri/intel/intel_extensions.c @@ -48,6 +48,7 @@ #define need_GL_EXT_framebuffer_blit #define need_GL_EXT_gpu_program_parameters #define need_GL_EXT_point_parameters +#define need_GL_EXT_provoking_vertex #define need_GL_EXT_secondary_color #define need_GL_EXT_stencil_two_side #define need_GL_APPLE_vertex_array_object @@ -93,6 +94,7 @@ static const struct dri_extension card_extensions[] = { { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, { "GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions }, { "GL_EXT_packed_depth_stencil", NULL }, + { "GL_EXT_provoking_vertex", GL_EXT_provoking_vertex_functions }, { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, { "GL_EXT_stencil_wrap", NULL }, { "GL_EXT_texture_edge_clamp", NULL }, diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 666893596e1..804c0348401 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -35,6 +35,7 @@ #include "main/context.h" #include "main/texformat.h" #include "main/texrender.h" +#include "drivers/common/meta.h" #include "intel_context.h" #include "intel_buffers.h" @@ -700,74 +701,6 @@ intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb) /** - * Called from glBlitFramebuffer(). - * For now, we're doing an approximation with glCopyPixels(). - * XXX we need to bypass all the per-fragment operations, except scissor. - */ -static void -intel_blit_framebuffer(GLcontext *ctx, - GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, - GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, - GLbitfield mask, GLenum filter) -{ - const GLfloat xZoomSave = ctx->Pixel.ZoomX; - const GLfloat yZoomSave = ctx->Pixel.ZoomY; - GLsizei width, height; - GLfloat xFlip = 1.0F, yFlip = 1.0F; - - if (srcX1 < srcX0) { - GLint tmp = srcX1; - srcX1 = srcX0; - srcX0 = tmp; - xFlip = -1.0F; - } - - if (srcY1 < srcY0) { - GLint tmp = srcY1; - srcY1 = srcY0; - srcY0 = tmp; - yFlip = -1.0F; - } - - width = srcX1 - srcX0; - height = srcY1 - srcY0; - - ctx->Pixel.ZoomX = xFlip * (dstX1 - dstX0) / (srcX1 - srcY0); - ctx->Pixel.ZoomY = yFlip * (dstY1 - dstY0) / (srcY1 - srcY0); - - if (ctx->Pixel.ZoomX < 0.0F) { - dstX0 = MAX2(dstX0, dstX1); - } - else { - dstX0 = MIN2(dstX0, dstX1); - } - - if (ctx->Pixel.ZoomY < 0.0F) { - dstY0 = MAX2(dstY0, dstY1); - } - else { - dstY0 = MIN2(dstY0, dstY1); - } - - if (mask & GL_COLOR_BUFFER_BIT) { - ctx->Driver.CopyPixels(ctx, srcX0, srcY0, width, height, - dstX0, dstY0, GL_COLOR); - } - if (mask & GL_DEPTH_BUFFER_BIT) { - ctx->Driver.CopyPixels(ctx, srcX0, srcY0, width, height, - dstX0, dstY0, GL_DEPTH); - } - if (mask & GL_STENCIL_BUFFER_BIT) { - ctx->Driver.CopyPixels(ctx, srcX0, srcY0, width, height, - dstX0, dstY0, GL_STENCIL); - } - - ctx->Pixel.ZoomX = xZoomSave; - ctx->Pixel.ZoomY = yZoomSave; -} - - -/** * Do one-time context initializations related to GL_EXT_framebuffer_object. * Hook in device driver functions. */ @@ -782,5 +715,5 @@ intel_fbo_init(struct intel_context *intel) intel->ctx.Driver.FinishRenderTexture = intel_finish_render_texture; intel->ctx.Driver.ResizeBuffers = intel_resize_buffers; intel->ctx.Driver.ValidateFramebuffer = intel_validate_framebuffer; - intel->ctx.Driver.BlitFramebuffer = intel_blit_framebuffer; + intel->ctx.Driver.BlitFramebuffer = _mesa_meta_blit_framebuffer; } diff --git a/src/mesa/drivers/dri/intel/intel_pixel_copy.c b/src/mesa/drivers/dri/intel/intel_pixel_copy.c index 5d52335dee3..ca796b36559 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_copy.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_copy.c @@ -31,7 +31,7 @@ #include "main/state.h" #include "main/mtypes.h" #include "main/macros.h" -#include "swrast/swrast.h" +#include "drivers/common/meta.h" #include "intel_screen.h" #include "intel_context.h" @@ -97,162 +97,6 @@ intel_check_copypixel_blit_fragment_ops(GLcontext * ctx) ctx->Color.BlendEnabled); } -#ifdef I915 -/* Doesn't work for overlapping regions. Could do a double copy or - * just fallback. - */ -static GLboolean -do_texture_copypixels(GLcontext * ctx, - GLint srcx, GLint srcy, - GLsizei width, GLsizei height, - GLint dstx, GLint dsty, GLenum type) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_region *dst = intel_drawbuf_region(intel); - struct intel_region *src = copypix_src_region(intel, type); - GLenum src_format; - GLenum src_type; - - DBG("%s %d,%d %dx%d --> %d,%d\n", __FUNCTION__, - srcx, srcy, width, height, dstx, dsty); - - if (!src || !dst || type != GL_COLOR) - return GL_FALSE; - - if (ctx->_ImageTransferState) { - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s: check_color failed\n", __FUNCTION__); - return GL_FALSE; - } - - /* Can't handle overlapping regions. Don't have sufficient control - * over rasterization to pull it off in-place. Punt on these for - * now. - * - * XXX: do a copy to a temporary. - */ - if (src->buffer == dst->buffer) { - drm_clip_rect_t srcbox; - drm_clip_rect_t dstbox; - drm_clip_rect_t tmp; - - srcbox.x1 = srcx; - srcbox.y1 = srcy; - srcbox.x2 = srcx + width; - srcbox.y2 = srcy + height; - - if (ctx->Pixel.ZoomX > 0) { - dstbox.x1 = dstx; - dstbox.x2 = dstx + width * ctx->Pixel.ZoomX; - } else { - dstbox.x1 = dstx + width * ctx->Pixel.ZoomX; - dstbox.x2 = dstx; - } - if (ctx->Pixel.ZoomY > 0) { - dstbox.y1 = dsty; - dstbox.y2 = dsty + height * ctx->Pixel.ZoomY; - } else { - dstbox.y1 = dsty + height * ctx->Pixel.ZoomY; - dstbox.y2 = dsty; - } - - DBG("src %d,%d %d,%d\n", srcbox.x1, srcbox.y1, srcbox.x2, srcbox.y2); - DBG("dst %d,%d %d,%d (%dx%d) (%f,%f)\n", dstbox.x1, dstbox.y1, dstbox.x2, dstbox.y2, - width, height, ctx->Pixel.ZoomX, ctx->Pixel.ZoomY); - - if (intel_intersect_cliprects(&tmp, &srcbox, &dstbox)) { - DBG("%s: regions overlap\n", __FUNCTION__); - return GL_FALSE; - } - } - - intelFlush(&intel->ctx); - - intel->vtbl.install_meta_state(intel); - - /* Is this true? Also will need to turn depth testing on according - * to state: - */ - intel->vtbl.meta_no_stencil_write(intel); - intel->vtbl.meta_no_depth_write(intel); - - /* Set the 3d engine to draw into the destination region: - */ - intel->vtbl.meta_draw_region(intel, dst, intel->depth_region); - - intel->vtbl.meta_import_pixel_state(intel); - - if (src->cpp == 2) { - src_format = GL_RGB; - src_type = GL_UNSIGNED_SHORT_5_6_5; - } - else { - src_format = GL_BGRA; - src_type = GL_UNSIGNED_BYTE; - } - - /* Set the frontbuffer up as a large rectangular texture. - */ - if (!intel->vtbl.meta_tex_rect_source(intel, src->buffer, 0, - src->pitch, - src->height, src_format, src_type)) { - intel->vtbl.leave_meta_state(intel); - return GL_FALSE; - } - - - intel->vtbl.meta_texture_blend_replace(intel); - - LOCK_HARDWARE(intel); - - if (intel->driDrawable->numClipRects) { - __DRIdrawablePrivate *dPriv = intel->driDrawable; - - - srcy = dPriv->h - srcy - height; /* convert from gl to hardware coords */ - - srcx += dPriv->x; - srcy += dPriv->y; - - /* Clip against the source region. This is the only source - * clipping we do. XXX: Just set the texcord wrap mode to clamp - * or similar. - * - */ - if (0) { - GLint orig_x = srcx; - GLint orig_y = srcy; - - if (!_mesa_clip_to_region(0, 0, src->pitch, src->height, - &srcx, &srcy, &width, &height)) - goto out; - - dstx += srcx - orig_x; - dsty += (srcy - orig_y) * ctx->Pixel.ZoomY; - } - - /* Just use the regular cliprect mechanism... Does this need to - * even hold the lock??? - */ - intel->vtbl.meta_draw_quad(intel, - dstx, - dstx + width * ctx->Pixel.ZoomX, - dPriv->h - (dsty + height * ctx->Pixel.ZoomY), - dPriv->h - (dsty), 0, /* XXX: what z value? */ - 0x00ff00ff, - srcx, srcx + width, srcy, srcy + height); - - out: - intel->vtbl.leave_meta_state(intel); - intel_batchbuffer_emit_mi_flush(intel->batch); - } - UNLOCK_HARDWARE(intel); - - DBG("%s: success\n", __FUNCTION__); - return GL_TRUE; -} -#endif /* I915 */ - /** * CopyPixels with the blitter. Don't support zooming, pixel transfer, etc. @@ -400,12 +244,5 @@ intelCopyPixels(GLcontext * ctx, if (do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type)) return; -#ifdef I915 - if (do_texture_copypixels(ctx, srcx, srcy, width, height, destx, desty, type)) - return; -#endif - - DBG("fallback to _swrast_CopyPixels\n"); - - _swrast_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type); + _mesa_meta_copy_pixels(ctx, srcx, srcy, width, height, destx, desty, type); } diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index 7525cd9c4d7..497f7967649 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -181,6 +181,11 @@ intel_region_alloc(struct intel_context *intel, dri_bo *buffer; struct intel_region *region; + if (tiling == I915_TILING_X) + height = ALIGN(height, 8); + else if (tiling == I915_TILING_Y) + height = ALIGN(height, 32); + if (expect_accelerated_upload) { buffer = drm_intel_bo_alloc_for_render(intel->bufmgr, "region", pitch * cpp * height, 64); @@ -452,6 +457,7 @@ void intel_region_cow(struct intel_context *intel, struct intel_region *region) { struct intel_buffer_object *pbo = region->pbo; + GLboolean ok; intel_region_release_pbo(intel, region); @@ -463,13 +469,14 @@ intel_region_cow(struct intel_context *intel, struct intel_region *region) */ LOCK_HARDWARE(intel); - assert(intelEmitCopyBlit(intel, - region->cpp, - region->pitch, pbo->buffer, 0, region->tiling, - region->pitch, region->buffer, 0, region->tiling, - 0, 0, 0, 0, - region->pitch, region->height, - GL_COPY)); + ok = intelEmitCopyBlit(intel, + region->cpp, + region->pitch, pbo->buffer, 0, region->tiling, + region->pitch, region->buffer, 0, region->tiling, + 0, 0, 0, 0, + region->pitch, region->height, + GL_COPY); + assert(ok); UNLOCK_HARDWARE(intel); } diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 14cc815139c..1b8c56e68d6 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -69,7 +69,11 @@ PUBLIC const char __driConfigOptions[] = DRI_CONF_DESC_END DRI_CONF_OPT_END - DRI_CONF_TEXTURE_TILING(false) +#ifdef I915 + DRI_CONF_TEXTURE_TILING(false) +#else + DRI_CONF_TEXTURE_TILING(true) +#endif DRI_CONF_OPT_BEGIN(early_z, bool, false) DRI_CONF_DESC(en, "Enable early Z in classic mode (unstable, 945-only).") diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index 34b78ebc1ab..8df49908806 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -501,7 +501,7 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map) /** - * Prepare for softare rendering. Map current read/draw framebuffers' + * Prepare for software rendering. Map current read/draw framebuffers' * renderbuffes and all currently bound texture objects. * * Old note: Moved locking out to get reasonable span performance. @@ -526,7 +526,7 @@ intelSpanRenderStart(GLcontext * ctx) } /** - * Called when done softare rendering. Unmap the buffers we mapped in + * Called when done software rendering. Unmap the buffers we mapped in * the above function. */ void diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c index 1f27131dac0..89037073f84 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c +++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c @@ -44,10 +44,12 @@ intelTexSubimage(GLcontext * ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint width, GLint height, GLint depth, + GLsizei imageSize, GLenum format, GLenum type, const void *pixels, const struct gl_pixelstore_attrib *packing, struct gl_texture_object *texObj, - struct gl_texture_image *texImage) + struct gl_texture_image *texImage, + GLboolean compressed) { struct intel_context *intel = intel_context(ctx); struct intel_texture_image *intelImage = intel_texture_image(texImage); @@ -59,9 +61,14 @@ intelTexSubimage(GLcontext * ctx, intelFlush(ctx); - pixels = - _mesa_validate_pbo_teximage(ctx, dims, width, height, depth, format, - type, pixels, packing, "glTexSubImage2D"); + if (compressed) + pixels = _mesa_validate_pbo_compressed_teximage(ctx, imageSize, + pixels, packing, + "glCompressedTexImage"); + else + pixels = _mesa_validate_pbo_teximage(ctx, dims, width, height, depth, + format, type, pixels, packing, + "glTexSubImage"); if (!pixels) return; @@ -90,15 +97,28 @@ intelTexSubimage(GLcontext * ctx, assert(dstRowStride); - if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat, - texImage->TexFormat, - texImage->Data, - xoffset, yoffset, zoffset, - dstRowStride, - texImage->ImageOffsets, - width, height, depth, - format, type, pixels, packing)) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage"); + if (compressed) { + if (intelImage->mt) { + struct intel_region *dst = intelImage->mt->region; + + _mesa_copy_rect(texImage->Data, dst->cpp, dst->pitch, + xoffset, yoffset / 4, + (width + 3) & ~3, (height + 3) / 4, + pixels, (width + 3) & ~3, 0, 0); + } else + memcpy(texImage->Data, pixels, imageSize); + } + else { + if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat, + texImage->TexFormat, + texImage->Data, + xoffset, yoffset, zoffset, + dstRowStride, + texImage->ImageOffsets, + width, height, depth, + format, type, pixels, packing)) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage"); + } } _mesa_unmap_teximage_pbo(ctx, packing); @@ -132,8 +152,8 @@ intelTexSubImage3D(GLcontext * ctx, intelTexSubimage(ctx, 3, target, level, xoffset, yoffset, zoffset, - width, height, depth, - format, type, pixels, packing, texObj, texImage); + width, height, depth, 0, + format, type, pixels, packing, texObj, texImage, GL_FALSE); } @@ -152,8 +172,8 @@ intelTexSubImage2D(GLcontext * ctx, intelTexSubimage(ctx, 2, target, level, xoffset, yoffset, 0, - width, height, 1, - format, type, pixels, packing, texObj, texImage); + width, height, 1, 0, + format, type, pixels, packing, texObj, texImage, GL_FALSE); } @@ -172,8 +192,8 @@ intelTexSubImage1D(GLcontext * ctx, intelTexSubimage(ctx, 1, target, level, xoffset, 0, 0, - width, 1, 1, - format, type, pixels, packing, texObj, texImage); + width, 1, 1, 0, + format, type, pixels, packing, texObj, texImage, GL_FALSE); } static void @@ -187,8 +207,11 @@ intelCompressedTexSubImage2D(GLcontext * ctx, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { - fprintf(stderr, "stubbed CompressedTexSubImage2D: %dx%d@%dx%d\n", - width, height, xoffset, yoffset); + intelTexSubimage(ctx, 2, + target, level, + xoffset, yoffset, 0, + width, height, 1, imageSize, + format, 0, pixels, &ctx->Unpack, texObj, texImage, GL_TRUE); } diff --git a/src/mesa/drivers/dri/r200/r200_cmdbuf.c b/src/mesa/drivers/dri/r200/r200_cmdbuf.c index d49f4fabe72..5d0f367b387 100644 --- a/src/mesa/drivers/dri/r200/r200_cmdbuf.c +++ b/src/mesa/drivers/dri/r200/r200_cmdbuf.c @@ -107,31 +107,37 @@ void r200SetUpAtomList( r200ContextPtr rmesa ) void r200EmitScissor(r200ContextPtr rmesa) { + unsigned x1, y1, x2, y2; + struct radeon_renderbuffer *rrb; BATCH_LOCALS(&rmesa->radeon); if (!rmesa->radeon.radeonScreen->kernel_mm) { return; } + rrb = radeon_get_colorbuffer(&rmesa->radeon); + if (!rrb || !rrb->bo) + return; + if (rmesa->radeon.state.scissor.enabled) { - BEGIN_BATCH(8); - OUT_BATCH(CP_PACKET0(R200_RE_CNTL, 0)); - OUT_BATCH(R200_SCISSOR_ENABLE | rmesa->hw.set.cmd[SET_RE_CNTL]); - OUT_BATCH(CP_PACKET0(R200_RE_AUX_SCISSOR_CNTL, 0)); - OUT_BATCH(R200_SCISSOR_ENABLE_0); - OUT_BATCH(CP_PACKET0(R200_RE_SCISSOR_TL_0, 0)); - OUT_BATCH((rmesa->radeon.state.scissor.rect.y1 << 16) | - rmesa->radeon.state.scissor.rect.x1); - OUT_BATCH(CP_PACKET0(R200_RE_SCISSOR_BR_0, 0)); - OUT_BATCH(((rmesa->radeon.state.scissor.rect.y2 - 1) << 16) | - (rmesa->radeon.state.scissor.rect.x2 - 1)); - END_BATCH(); + x1 = rmesa->radeon.state.scissor.rect.x1; + y1 = rmesa->radeon.state.scissor.rect.y1; + x2 = rmesa->radeon.state.scissor.rect.x2 - 1; + y2 = rmesa->radeon.state.scissor.rect.y2 - 1; } else { - BEGIN_BATCH(4); - OUT_BATCH(CP_PACKET0(R200_RE_CNTL, 0)); - OUT_BATCH(rmesa->hw.set.cmd[SET_RE_CNTL] & ~R200_SCISSOR_ENABLE); - OUT_BATCH(CP_PACKET0(R200_RE_AUX_SCISSOR_CNTL, 0)); - OUT_BATCH(0); - END_BATCH(); + x1 = 0; + y1 = 0; + x2 = rrb->base.Width - 1; + y2 = rrb->base.Height - 1; } + BEGIN_BATCH(8); + OUT_BATCH(CP_PACKET0(R200_RE_CNTL, 0)); + OUT_BATCH(R200_SCISSOR_ENABLE | rmesa->hw.set.cmd[SET_RE_CNTL]); + OUT_BATCH(CP_PACKET0(R200_RE_AUX_SCISSOR_CNTL, 0)); + OUT_BATCH(0); + OUT_BATCH(CP_PACKET0(R200_RE_TOP_LEFT, 0)); + OUT_BATCH((y1 << 16) | x1); + OUT_BATCH(CP_PACKET0(R200_RE_WIDTH_HEIGHT, 0)); + OUT_BATCH((y2 << 16) | x2); + END_BATCH(); } /* Fire a section of the retained (indexed_verts) buffer as a regular diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c index 9a92a320797..8cb287de268 100644 --- a/src/mesa/drivers/dri/r200/r200_context.c +++ b/src/mesa/drivers/dri/r200/r200_context.c @@ -500,3 +500,15 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, } +void r200DestroyContext( __DRIcontextPrivate *driContextPriv ) +{ + int i; + r200ContextPtr rmesa = (r200ContextPtr)driContextPriv->driverPrivate; + if (rmesa) + { + for ( i = 0 ; i < R200_MAX_TEXTURE_UNITS ; i++ ) { + _math_matrix_dtr( &rmesa->TexGenMatrix[i] ); + } + } + radeonDestroyContext(driContextPriv); +} diff --git a/src/mesa/drivers/dri/r200/r200_state_init.c b/src/mesa/drivers/dri/r200/r200_state_init.c index bc871d99048..78ad5baebb3 100644 --- a/src/mesa/drivers/dri/r200/r200_state_init.c +++ b/src/mesa/drivers/dri/r200/r200_state_init.c @@ -515,7 +515,7 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) if (drb) dwords += 6; if (rrb) - dwords += 6; + dwords += 8; if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) dwords += 4; @@ -546,7 +546,7 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0)); - OUT_BATCH(cbpitch); + OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); } if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) { @@ -563,7 +563,6 @@ static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom) uint32_t dwords = atom->cmd_size; int i = atom->idx; radeonTexObj *t = r200->state.texture.unit[i].texobj; - radeon_mipmap_level *lvl; if (t && t->mt && !t->image_override) dwords += 2; @@ -591,7 +590,6 @@ static void tex_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) uint32_t dwords = atom->cmd_size; int i = atom->idx; radeonTexObj *t = r200->state.texture.unit[i].texobj; - radeon_mipmap_level *lvl; int hastexture = 1; if (!r200->state.texture.unit[i].unitneeded) @@ -774,7 +772,7 @@ void r200InitState( r200ContextPtr rmesa ) ALLOC_STATE( afs[1], never, AFS_STATE_SIZE, "AFS/afsinst-1", 1 ); } - for (i = 0; i < 5; i++) + for (i = 0; i < 6; i++) if (rmesa->radeon.radeonScreen->kernel_mm) rmesa->hw.tex[i].emit = tex_emit_cs; else @@ -786,7 +784,7 @@ void r200InitState( r200ContextPtr rmesa ) ALLOC_STATE( cube[3], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-3", 3 ); ALLOC_STATE( cube[4], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-4", 4 ); ALLOC_STATE( cube[5], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-5", 5 ); - for (i = 0; i < 5; i++) + for (i = 0; i < 6; i++) if (rmesa->radeon.radeonScreen->kernel_mm) rmesa->hw.cube[i].emit = cube_emit_cs; else diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 7eb11aaf271..bd46f9acf2e 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -177,14 +177,17 @@ static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom) } else if (!t) { /* Texture unit hasn't a texture bound. * We assign the current color buffer as a fakery to make - * KIL work. */ - struct radeon_renderbuffer *rrb = radeon_get_colorbuffer(&r300->radeon); - if (rrb && rrb->bo) { - BEGIN_BATCH_NO_AUTOSTATE(4); - OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); - OUT_BATCH_RELOC(0, rrb->bo, 0, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); - END_BATCH(); + * KIL work on KMS (without it, the CS checker will complain). + */ + if (r300->radeon.radeonScreen->kernel_mm) { + struct radeon_renderbuffer *rrb = radeon_get_colorbuffer(&r300->radeon); + if (rrb && rrb->bo) { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); + OUT_BATCH_RELOC(0, rrb->bo, 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); + } } } else { /* override cases */ if (t->bo) { diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index cebb9a10d8b..ab2287a5e25 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -604,11 +604,19 @@ static void r300DrawPrims(GLcontext *ctx, const struct _mesa_prim *prim, GLuint nr_prims, const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, GLuint min_index, GLuint max_index) { GLboolean retval; + /* This check should get folded into just the places that + * min/max index are really needed. + */ + if (!index_bounds_valid) { + vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + } + if (min_index) { vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r300DrawPrims ); return; diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c index 8debecbab99..dc2fb0144a2 100644 --- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c @@ -457,6 +457,9 @@ void r600InitCmdBuf(context_t *r600) /* from rcommonInitCmdBuf */ GLuint size; rmesa->hw.max_state_size = 4000; /* rough estimate */ + rmesa->hw.all_dirty = GL_TRUE; + rmesa->hw.is_dirty = GL_TRUE; + /* Initialize command buffer */ size = 256 * driQueryOptioni(&rmesa->optionCache, "command_buffer_size"); diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index eacf8112825..7009374b0ca 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -395,6 +395,8 @@ r600DestroyContext (__DRIcontextPrivate * driContextPriv) if (context) FREE(context->hw.pStateList); + + radeonDestroyContext(driContextPriv); } diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h index fbb8164af59..30ddce682c1 100644 --- a/src/mesa/drivers/dri/r600/r600_context.h +++ b/src/mesa/drivers/dri/r600/r600_context.h @@ -158,6 +158,17 @@ extern GLboolean r700InitChipObject(context_t *context); extern GLboolean r700SendContextStates(context_t *context); extern GLboolean r700SendViewportState(context_t *context, int id); extern GLboolean r700SendRenderTargetState(context_t *context, int id); +extern GLboolean r700SendTextureState(context_t *context); +extern GLboolean r700SendDepthTargetState(context_t *context); +extern GLboolean r700SendUCPState(context_t *context); +extern GLboolean r700SendFSState(context_t *context); +extern void r700EmitState(GLcontext * ctx); + +extern GLboolean r700SyncSurf(context_t *context, + struct radeon_bo *pbo, + uint32_t read_domain, + uint32_t write_domain, + uint32_t sync_type); extern int r700SetupStreams(GLcontext * ctx); extern void r700SetupVTXConstants(GLcontext * ctx, diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c index 4840586858d..ee9b64ee43a 100644 --- a/src/mesa/drivers/dri/r600/r600_texstate.c +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -568,9 +568,6 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex } } - if (t->image_override && t->bo) - return; - switch (texObj->Target) { case GL_TEXTURE_1D: SETfield(t->SQ_TEX_RESOURCE0, SQ_TEX_DIM_1D, DIM_shift, DIM_mask); @@ -701,7 +698,7 @@ void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname, struct gl_texture_object *tObj = _mesa_lookup_texture(rmesa->radeon.glCtx, texname); radeonTexObjPtr t = radeon_tex_obj(tObj); - uint32_t pitch_val; + uint32_t pitch_val, size; if (!tObj) return; @@ -711,7 +708,12 @@ void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname, if (!offset) return; - t->bo = NULL; + size = pitch;//h * w * (depth / 8); + if (t->bo) { + radeon_bo_unref(t->bo); + t->bo = NULL; + } + t->bo = radeon_legacy_bo_alloc_fake(rmesa->radeon.radeonScreen->bom, size, offset); t->override_offset = offset; pitch_val = pitch; switch (depth) { diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 0abf112b55f..eaacd061137 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -3839,6 +3839,9 @@ GLboolean Process_Export(r700_AssemblerBase* pAsm, if (export_count == 1) { ucWriteMask = pAsm->pucOutMask[starting_register_number - pAsm->starting_export_register_number]; + /* exports Z as a float into Red channel */ + if (GL_TRUE == is_depth_export) + ucWriteMask = 0x1; if( (ucWriteMask & 0x1) != 0) { diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index 78779e841d7..0fb355a0b6a 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -138,6 +138,19 @@ GLboolean r700InitChipObject(context_t *context) LINK_STATES(CB_CLRCMP_MSK); LINK_STATES(CB_BLEND_CONTROL); + //DB + LINK_STATES(DB_HTILE_DATA_BASE); + LINK_STATES(DB_STENCIL_CLEAR); + LINK_STATES(DB_DEPTH_CLEAR); + LINK_STATES(DB_STENCILREFMASK); + LINK_STATES(DB_STENCILREFMASK_BF); + LINK_STATES(DB_DEPTH_CONTROL); + LINK_STATES(DB_SHADER_CONTROL); + LINK_STATES(DB_RENDER_CONTROL); + LINK_STATES(DB_RENDER_OVERRIDE); + LINK_STATES(DB_HTILE_SURFACE); + LINK_STATES(DB_ALPHA_TO_MASK); + // SX LINK_STATES(SX_MISC); LINK_STATES(SX_ALPHA_TEST_CONTROL); @@ -322,7 +335,7 @@ void r700SetupVTXConstants(GLcontext * ctx, unsigned int uSQ_VTX_CONSTANT_WORD6_0 = 0; if (!paos->bo) - return GL_FALSE; + return; if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) || (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) || @@ -474,6 +487,18 @@ GLboolean r700SendContextStates(context_t *context) for(ui = 0; ui < R700_MAX_SHADER_EXPORTS; ui++) R600_OUT_BATCH(r700->SPI_PS_INPUT_CNTL[ui].u32All); END_BATCH(); + + if (context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) { + for (ui = 0; ui < R700_MAX_RENDER_TARGETS; ui++) { + if (r700->render_target[ui].enabled) { + BEGIN_BATCH_NO_AUTOSTATE(3); + R600_OUT_BATCH_REGVAL(CB_BLEND0_CONTROL + (4 * ui), + r700->render_target[ui].CB_BLEND0_CONTROL.u32All); + END_BATCH(); + } + } + } + COMMIT_BATCH(); return GL_TRUE; @@ -491,38 +516,25 @@ GLboolean r700SendDepthTargetState(context_t *context) return GL_FALSE; } - BEGIN_BATCH_NO_AUTOSTATE(9); + BEGIN_BATCH_NO_AUTOSTATE(8); R600_OUT_BATCH_REGSEQ(DB_DEPTH_SIZE, 2); R600_OUT_BATCH(r700->DB_DEPTH_SIZE.u32All); R600_OUT_BATCH(r700->DB_DEPTH_VIEW.u32All); - R600_OUT_BATCH_REGSEQ(DB_DEPTH_BASE, 3); + R600_OUT_BATCH_REGSEQ(DB_DEPTH_BASE, 2); R600_OUT_BATCH_RELOC(r700->DB_DEPTH_BASE.u32All, rrb->bo, r700->DB_DEPTH_BASE.u32All, 0, RADEON_GEM_DOMAIN_VRAM, 0); R600_OUT_BATCH(r700->DB_DEPTH_INFO.u32All); - R600_OUT_BATCH(r700->DB_HTILE_DATA_BASE.u32All); END_BATCH(); - BEGIN_BATCH_NO_AUTOSTATE(24); - R600_OUT_BATCH_REGSEQ(DB_STENCIL_CLEAR, 2); - R600_OUT_BATCH(r700->DB_STENCIL_CLEAR.u32All); - R600_OUT_BATCH(r700->DB_DEPTH_CLEAR.u32All); - - R600_OUT_BATCH_REGSEQ(DB_STENCILREFMASK, 2); - R600_OUT_BATCH(r700->DB_STENCILREFMASK.u32All); - R600_OUT_BATCH(r700->DB_STENCILREFMASK_BF.u32All); - - R600_OUT_BATCH_REGVAL(DB_DEPTH_CONTROL, r700->DB_DEPTH_CONTROL.u32All); - R600_OUT_BATCH_REGVAL(DB_SHADER_CONTROL, r700->DB_SHADER_CONTROL.u32All); - - R600_OUT_BATCH_REGSEQ(DB_RENDER_CONTROL, 2); - R600_OUT_BATCH(r700->DB_RENDER_CONTROL.u32All); - R600_OUT_BATCH(r700->DB_RENDER_OVERRIDE.u32All); - - R600_OUT_BATCH_REGVAL(DB_HTILE_SURFACE, r700->DB_HTILE_SURFACE.u32All); - R600_OUT_BATCH_REGVAL(DB_ALPHA_TO_MASK, r700->DB_ALPHA_TO_MASK.u32All); - END_BATCH(); + if ((context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) && + (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)) { + BEGIN_BATCH_NO_AUTOSTATE(2); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0)); + R600_OUT_BATCH(1 << 0); + END_BATCH(); + } COMMIT_BATCH(); @@ -575,12 +587,6 @@ GLboolean r700SendRenderTargetState(context_t *context, int id) R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), r700->render_target[id].CB_COLOR0_MASK.u32All); END_BATCH(); - if (context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) { - BEGIN_BATCH_NO_AUTOSTATE(3); - R600_OUT_BATCH_REGVAL(CB_BLEND0_CONTROL + (4 * id), r700->render_target[id].CB_BLEND0_CONTROL.u32All); - END_BATCH(); - } - COMMIT_BATCH(); r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, diff --git a/src/mesa/drivers/dri/r600/r700_clear.c b/src/mesa/drivers/dri/r600/r700_clear.c index e84be386221..05d4af331e0 100644 --- a/src/mesa/drivers/dri/r600/r700_clear.c +++ b/src/mesa/drivers/dri/r600/r700_clear.c @@ -31,6 +31,7 @@ #include "main/imports.h" #include "main/mtypes.h" #include "main/enums.h" +#include "swrast/swrast.h" #include "radeon_lock.h" #include "r600_context.h" diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index 4ac37f1dfe8..6249bde6f18 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -55,6 +55,12 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm, //Input mapping : mesa_fp->Base.InputsRead set the flag, set in //The flags parsed in parse_attrib_binding. FRAG_ATTRIB_COLx, FRAG_ATTRIB_TEXx, ... //MUST match order in Map_Vertex_Output + unBit = 1 << FRAG_ATTRIB_WPOS; + if(mesa_fp->Base.InputsRead & unBit) + { + pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS] = pAsm->number_used_registers++; + } + unBit = 1 << FRAG_ATTRIB_COL0; if(mesa_fp->Base.InputsRead & unBit) { @@ -112,6 +118,7 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm, pAsm->uiFP_OutputMap[FRAG_RESULT_DEPTH] = pAsm->number_used_registers++; pAsm->number_of_exports++; pAsm->number_of_colorandz_exports++; + pAsm->pR700Shader->depthIsExported = 1; } pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports); @@ -263,11 +270,13 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); - + BATCH_LOCALS(&context->radeon); struct r700_fragment_program *fp = (struct r700_fragment_program *) (ctx->FragmentProgram._Current); r700_AssemblerBase *pAsm = &(fp->r700AsmCode); struct gl_fragment_program *mesa_fp = &(fp->mesa_program); + struct gl_program_parameter_list *paramList; + unsigned int unNumParamData; unsigned int ui, i; unsigned int unNumOfReg; unsigned int unBit; @@ -302,6 +311,16 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) ui = (r700->SPI_PS_IN_CONTROL_0.u32All & NUM_INTERP_mask) / (1 << NUM_INTERP_shift); + /* PS uses fragment.position */ + if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) + { + ui += 1; + SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask); + SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, CENTERS_ONLY, BARYC_SAMPLE_CNTL_shift, BARYC_SAMPLE_CNTL_mask); + SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit); + SETbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit); + } + ui = (unNumOfReg < ui) ? ui : unNumOfReg; SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask); @@ -335,7 +354,49 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) CLEARbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit); } + /* sent out shader constants. */ + paramList = fp->mesa_program.Base.Parameters; + + if(NULL != paramList) + { + _mesa_load_state_parameters(ctx, paramList); + + unNumParamData = paramList->NumParameters * 4; + + BEGIN_BATCH_NO_AUTOSTATE(2 + unNumParamData); + + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_ALU_CONST, unNumParamData)); + + /* assembler map const from very beginning. */ + R600_OUT_BATCH(SQ_ALU_CONSTANT_PS_OFFSET * 4); + + unNumParamData = paramList->NumParameters; + + for(ui=0; ui<unNumParamData; ui++) + { + R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][0]))); + R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][1]))); + R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][2]))); + R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][3]))); + } + END_BATCH(); + COMMIT_BATCH(); + } + // emit ps input map + unBit = 1 << FRAG_ATTRIB_WPOS; + if(mesa_fp->Base.InputsRead & unBit) + { + ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS]; + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); + SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, + SEMANTIC_shift, SEMANTIC_mask); + if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + else + CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + } + unBit = 1 << FRAG_ATTRIB_COL0; if(mesa_fp->Base.InputsRead & unBit) { @@ -391,45 +452,3 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) return GL_TRUE; } -GLboolean r700SendPSConstants(GLcontext * ctx) -{ - context_t *context = R700_CONTEXT(ctx); - BATCH_LOCALS(&context->radeon); - struct r700_fragment_program *fp = (struct r700_fragment_program *) - (ctx->FragmentProgram._Current); - struct gl_program_parameter_list *paramList; - unsigned int unNumParamData; - unsigned int ui; - - /* sent out shader constants. */ - paramList = fp->mesa_program.Base.Parameters; - - if(NULL != paramList) - { - _mesa_load_state_parameters(ctx, paramList); - - unNumParamData = paramList->NumParameters * 4; - - BEGIN_BATCH_NO_AUTOSTATE(2 + unNumParamData); - - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_ALU_CONST, unNumParamData)); - - /* assembler map const from very beginning. */ - R600_OUT_BATCH(SQ_ALU_CONSTANT_PS_OFFSET * 4); - - unNumParamData = paramList->NumParameters; - - for(ui=0; ui<unNumParamData; ui++) - { - R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][0]))); - R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][1]))); - R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][2]))); - R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][3]))); - } - END_BATCH(); - COMMIT_BATCH(); - } - - return GL_TRUE; -} - diff --git a/src/mesa/drivers/dri/r600/r700_oglprog.c b/src/mesa/drivers/dri/r600/r700_oglprog.c index 36de143b1a7..c49b90c1cc9 100644 --- a/src/mesa/drivers/dri/r600/r700_oglprog.c +++ b/src/mesa/drivers/dri/r600/r700_oglprog.c @@ -33,6 +33,7 @@ #include "tnl/tnl.h" #include "r600_context.h" +#include "r600_emit.h" #include "r700_oglprog.h" #include "r700_fragprog.h" @@ -87,7 +88,6 @@ static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog) { struct r700_vertex_program * vp; struct r700_fragment_program * fp; - context_t *context = R700_CONTEXT(ctx); switch (prog->Target) { diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 2592d7df148..6705dbcf4b9 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -139,6 +139,13 @@ static GLboolean r700SetupShaders(GLcontext * ctx) r600UpdateTextureState(ctx); + r700SendFSState(context); // FIXME just a place holder for now + r700SendPSState(context); + r700SendVSState(context); + + r700SendTextureState(context); + r700SetupStreams(ctx); + return GL_TRUE; } @@ -274,20 +281,15 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim void r700EmitState(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); + radeonContextPtr radeon = &context->radeon; + + if (radeon->cmdbuf.cs->cdw && !radeon->hw.is_dirty && !radeon->hw.all_dirty) + return; rcommonEnsureCmdBufSpace(&context->radeon, context->radeon.hw.max_state_size, __FUNCTION__); - r700Start3D(context); r700SendSQConfig(context); - r700SendFSState(context); // FIXME just a place holder for now - r700SendPSState(context); - r700SendVSState(context); - r700SendVSConstants(ctx); - r700SendPSConstants(ctx); - - r700SendTextureState(context); - r700SetupStreams(ctx); r700SendUCPState(context); r700SendContextStates(context); @@ -305,13 +307,12 @@ static GLboolean r700RunRender(GLcontext * ctx, TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *vb = &tnl->vb; + r700Start3D(context); + r700UpdateShaders(ctx); r700SetScissor(context); r700SetupShaders(ctx); - r700SetRenderTarget(context, 0); - r700SetDepthTarget(context); - r700EmitState(ctx); /* richard test code */ @@ -327,8 +328,6 @@ static GLboolean r700RunRender(GLcontext * ctx, radeonReleaseArrays(ctx, ~0); - rcommonFlushCmdBuf( &context->radeon, __FUNCTION__ ); - return GL_FALSE; } diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c index e0a57425917..835b5e18c2c 100644 --- a/src/mesa/drivers/dri/r600/r700_state.c +++ b/src/mesa/drivers/dri/r600/r700_state.c @@ -60,6 +60,8 @@ static void r700SetClipPlaneState(GLcontext * ctx, GLenum cap, GLboolean state); static void r700UpdatePolygonMode(GLcontext * ctx); static void r700SetPolygonOffsetState(GLcontext * ctx, GLboolean state); static void r700SetStencilState(GLcontext * ctx, GLboolean state); +static void r700SetRenderTarget(context_t *context, int id); +static void r700SetDepthTarget(context_t *context); void r700SetDefaultStates(context_t *context) //-------------------- { @@ -158,29 +160,16 @@ void r700UpdateViewportOffset(GLcontext * ctx) //------------------ */ void r700UpdateDrawBuffer(GLcontext * ctx) /* TODO */ //--------------------- { -#if 0 /* to be enabled */ - context_t *context = R700_CONTEXT(ctx); + context_t *context = R700_CONTEXT(ctx); - switch (ctx->DrawBuffer->_ColorDrawBufferIndexes[0]) - { - case BUFFER_FRONT_LEFT: - context->target.rt = context->screen->frontBuffer; - break; - case BUFFER_BACK_LEFT: - context->target.rt = context->screen->backBuffer; - break; - default: - memset (&context->target.rt, sizeof(context->target.rt), 0); - } -#endif /* to be enabled */ + r700SetRenderTarget(context, 0); + r700SetDepthTarget(context); } static void r700FetchStateParameter(GLcontext * ctx, const gl_state_index state[STATE_LENGTH], GLfloat * value) { - context_t *context = R700_CONTEXT(ctx); - /* TODO */ } @@ -600,14 +589,46 @@ static void r700BlendFuncSeparate(GLcontext * ctx, /** * Translate LogicOp enums into hardware representation. - * Both use a very logical bit-wise layout, but unfortunately the order - * of bits is reversed. */ static GLuint translate_logicop(GLenum logicop) { - GLuint bits = logicop - GL_CLEAR; - bits = ((bits & 1) << 3) | ((bits & 2) << 1) | ((bits & 4) >> 1) | ((bits & 8) >> 3); - return bits; + switch (logicop) { + case GL_CLEAR: + return 0x00; + case GL_SET: + return 0xff; + case GL_COPY: + return 0xcc; + case GL_COPY_INVERTED: + return 0x33; + case GL_NOOP: + return 0xaa; + case GL_INVERT: + return 0x55; + case GL_AND: + return 0x88; + case GL_NAND: + return 0x77; + case GL_OR: + return 0xee; + case GL_NOR: + return 0x11; + case GL_XOR: + return 0x66; + case GL_EQUIV: + return 0xaa; + case GL_AND_REVERSE: + return 0x44; + case GL_AND_INVERTED: + return 0x22; + case GL_OR_REVERSE: + return 0xdd; + case GL_OR_INVERTED: + return 0xbb; + default: + fprintf(stderr, "unknown blend logic operation %x\n", logicop); + return 0xcc; + } } /** @@ -1327,22 +1348,22 @@ void r700SetScissor(context_t *context) //--------------- r700->viewport[id].enabled = GL_TRUE; } -void r700SetRenderTarget(context_t *context, int id) +static void r700SetRenderTarget(context_t *context, int id) { R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); struct radeon_renderbuffer *rrb; unsigned int nPitchInPixel; + rrb = radeon_get_colorbuffer(&context->radeon); + if (!rrb || !rrb->bo) { + fprintf(stderr, "no rrb\n"); + return; + } + /* screen/window/view */ SETfield(r700->CB_TARGET_MASK.u32All, 0xF, (4 * id), TARGET0_ENABLE_mask); - rrb = radeon_get_colorbuffer(&context->radeon); - if (!rrb || !rrb->bo) { - fprintf(stderr, "no rrb\n"); - return; - } - /* color buffer */ r700->render_target[id].CB_COLOR0_BASE.u32All = context->radeon.state.color.draw_offset; @@ -1375,39 +1396,22 @@ void r700SetRenderTarget(context_t *context, int id) r700->render_target[id].enabled = GL_TRUE; } -void r700SetDepthTarget(context_t *context) +static void r700SetDepthTarget(context_t *context) { R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); struct radeon_renderbuffer *rrb; unsigned int nPitchInPixel; + rrb = radeon_get_depthbuffer(&context->radeon); + if (!rrb) + return; + /* depth buf */ r700->DB_DEPTH_SIZE.u32All = 0; r700->DB_DEPTH_BASE.u32All = 0; r700->DB_DEPTH_INFO.u32All = 0; - - r700->DB_DEPTH_CLEAR.u32All = 0x3F800000; - r700->DB_DEPTH_VIEW.u32All = 0; - r700->DB_RENDER_CONTROL.u32All = 0; - SETbit(r700->DB_RENDER_CONTROL.u32All, STENCIL_COMPRESS_DISABLE_bit); - SETbit(r700->DB_RENDER_CONTROL.u32All, DEPTH_COMPRESS_DISABLE_bit); - r700->DB_RENDER_OVERRIDE.u32All = 0; - if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) - SETbit(r700->DB_RENDER_OVERRIDE.u32All, FORCE_SHADER_Z_ORDER_bit); - SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIZ_ENABLE_shift, FORCE_HIZ_ENABLE_mask); - SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE0_shift, FORCE_HIS_ENABLE0_mask); - SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE1_shift, FORCE_HIS_ENABLE1_mask); - - r700->DB_ALPHA_TO_MASK.u32All = 0; - SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET0_shift, ALPHA_TO_MASK_OFFSET0_mask); - SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET1_shift, ALPHA_TO_MASK_OFFSET1_mask); - SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET2_shift, ALPHA_TO_MASK_OFFSET2_mask); - SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET3_shift, ALPHA_TO_MASK_OFFSET3_mask); - - rrb = radeon_get_depthbuffer(&context->radeon); - if (!rrb) - return; + r700->DB_DEPTH_VIEW.u32All = 0; nPitchInPixel = rrb->pitch/rrb->cpp; @@ -1757,6 +1761,24 @@ void r700InitState(GLcontext * ctx) //------------------- r700DepthFunc(ctx, ctx->Depth.Func); SETbit(r700->DB_SHADER_CONTROL.u32All, DUAL_EXPORT_ENABLE_bit); + r700->DB_DEPTH_CLEAR.u32All = 0x3F800000; + + r700->DB_RENDER_CONTROL.u32All = 0; + SETbit(r700->DB_RENDER_CONTROL.u32All, STENCIL_COMPRESS_DISABLE_bit); + SETbit(r700->DB_RENDER_CONTROL.u32All, DEPTH_COMPRESS_DISABLE_bit); + r700->DB_RENDER_OVERRIDE.u32All = 0; + if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) + SETbit(r700->DB_RENDER_OVERRIDE.u32All, FORCE_SHADER_Z_ORDER_bit); + SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIZ_ENABLE_shift, FORCE_HIZ_ENABLE_mask); + SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE0_shift, FORCE_HIS_ENABLE0_mask); + SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE1_shift, FORCE_HIS_ENABLE1_mask); + + r700->DB_ALPHA_TO_MASK.u32All = 0; + SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET0_shift, ALPHA_TO_MASK_OFFSET0_mask); + SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET1_shift, ALPHA_TO_MASK_OFFSET1_mask); + SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET2_shift, ALPHA_TO_MASK_OFFSET2_mask); + SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET3_shift, ALPHA_TO_MASK_OFFSET3_mask); + /* stencil */ r700Enable(ctx, GL_STENCIL_TEST, ctx->Stencil._Enabled); r700StencilMaskSeparate(ctx, 0, ctx->Stencil.WriteMask[0]); @@ -1822,6 +1844,8 @@ void r700InitState(GLcontext * ctx) //------------------- /* Set up color compare mask */ r700->CB_CLRCMP_MSK.u32All = 0xFFFFFFFF; + context->radeon.hw.all_dirty = GL_TRUE; + } void r700InitStateFuncs(struct dd_function_table *functions) //----------------- diff --git a/src/mesa/drivers/dri/r600/r700_state.h b/src/mesa/drivers/dri/r600/r700_state.h index 23246367db8..30eb54e8b0a 100644 --- a/src/mesa/drivers/dri/r600/r700_state.h +++ b/src/mesa/drivers/dri/r600/r700_state.h @@ -42,10 +42,8 @@ extern void r700UpdateDrawBuffer (GLcontext * ctx); extern void r700InitState (GLcontext * ctx); extern void r700InitStateFuncs (struct dd_function_table *functions); -extern void r700SetRenderTarget(context_t *context, int id); extern void r700SetDefaultStates(context_t * context); -void r700SetScissor(context_t *context); -void r700SetDepthTarget(context_t *context); +extern void r700SetScissor(context_t *context); #endif /* _R600_SCREEN_H */ diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index 1c5c20f66e9..31e71cdfa30 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -336,10 +336,14 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); - + BATCH_LOCALS(&context->radeon); struct r700_vertex_program *vp = (struct r700_vertex_program *)ctx->VertexProgram._Current; + struct gl_program_parameter_list *paramList; + unsigned int unNumParamData; + unsigned int ui; + if(GL_FALSE == vp->loaded) { if(vp->r700Shader.bNeedsAssembly == GL_TRUE) @@ -385,21 +389,7 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx) CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit); */ - return GL_TRUE; -} - -GLboolean r700SendVSConstants(GLcontext * ctx) -{ - context_t *context = R700_CONTEXT(ctx); - BATCH_LOCALS(&context->radeon); - struct r700_vertex_program *vp - = (struct r700_vertex_program *)ctx->VertexProgram._Current; - struct gl_program_parameter_list *paramList; - unsigned int unNumParamData; - unsigned int ui; - /* sent out shader constants. */ - paramList = vp->mesa_program.Base.Parameters; if(NULL != paramList) diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_drm.h b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h index 8eeaea1cb20..d52fb017d8a 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bo_drm.h +++ b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h @@ -83,6 +83,10 @@ struct radeon_bo_funcs { int (*bo_unmap)(struct radeon_bo *bo); int (*bo_wait)(struct radeon_bo *bo); int (*bo_is_static)(struct radeon_bo *bo); + int (*bo_set_tiling)(struct radeon_bo *bo, uint32_t tiling_flags, + uint32_t pitch); + int (*bo_get_tiling)(struct radeon_bo *bo, uint32_t *tiling_flags, + uint32_t *pitch); }; struct radeon_bo_manager { @@ -187,6 +191,18 @@ static inline int _radeon_bo_wait(struct radeon_bo *bo, return bo->bom->funcs->bo_wait(bo); } +static inline int radeon_bo_set_tiling(struct radeon_bo *bo, + uint32_t tiling_flags, uint32_t pitch) +{ + return bo->bom->funcs->bo_set_tiling(bo, tiling_flags, pitch); +} + +static inline int radeon_bo_get_tiling(struct radeon_bo *bo, + uint32_t *tiling_flags, uint32_t *pitch) +{ + return bo->bom->funcs->bo_get_tiling(bo, tiling_flags, pitch); +} + static inline int radeon_bo_is_static(struct radeon_bo *bo) { if (bo->bom->funcs->bo_is_static) diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c index 992eb4611b1..d6d22cb4c37 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c +++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c @@ -577,6 +577,8 @@ static struct radeon_bo_funcs bo_legacy_funcs = { bo_unmap, NULL, bo_is_static, + NULL, + NULL, }; static int bo_vram_validate(struct radeon_bo *bo, @@ -622,12 +624,34 @@ static int bo_vram_validate(struct radeon_bo *bo, if (bo_legacy->dirty || bo_legacy->tobj->base.dirty_images[0]) { if (IS_R600_CLASS(boml->screen)) { - char *src = bo_legacy->ptr; - char *dst = (char *) boml->screen->driScreen->pFB + - (bo_legacy->offset - boml->fb_location); + drm_radeon_texture_t tex; + drm_radeon_tex_image_t tmp; + int ret; - /* FIXME: alignment, pitch, etc. */ - memcpy(dst, src, bo->size); + tex.offset = bo_legacy->offset; + tex.image = &tmp; + assert(!(tex.offset & 1023)); + + tmp.x = 0; + tmp.y = 0; + tmp.width = bo->size; + tmp.height = 1; + tmp.data = bo_legacy->ptr; + tex.format = RADEON_TXFORMAT_ARGB8888; + tex.width = tmp.width; + tex.height = tmp.height; + tex.pitch = bo->size; + do { + ret = drmCommandWriteRead(bo->bom->fd, + DRM_RADEON_TEXTURE, + &tex, + sizeof(drm_radeon_texture_t)); + if (ret) { + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "DRM_RADEON_TEXTURE: again!\n"); + usleep(1); + } + } while (ret == -EAGAIN); } else { /* Copy to VRAM using a blit. * All memory is 4K aligned. We're using 1024 pixels wide blits. @@ -904,3 +928,32 @@ unsigned radeon_bo_legacy_relocs_size(struct radeon_bo *bo) return bo->size; } +/* + * Fake up a bo for things like texture image_override. + * bo->offset already includes fb_location + */ +struct radeon_bo *radeon_legacy_bo_alloc_fake(struct radeon_bo_manager *bom, + int size, + uint32_t offset) +{ + struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom; + struct bo_legacy *bo; + +#ifdef RADEON_DEBUG_BO + bo = bo_allocate(boml, size, 0, RADEON_GEM_DOMAIN_VRAM, 0, "fake bo"); +#else + bo = bo_allocate(boml, size, 0, RADEON_GEM_DOMAIN_VRAM, 0); +#endif /* RADEON_DEBUG_BO */ + if (bo == NULL) + return NULL; + bo->static_bo = 1; + bo->offset = offset; + bo->base.handle = bo->offset; + bo->ptr = boml->screen->driScreen->pFB + (offset - boml->fb_location); + if (bo->base.handle > boml->nhandle) { + boml->nhandle = bo->base.handle + 1; + } + radeon_bo_ref(&(bo->base)); + return &(bo->base); +} + diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h index 0db817cab07..455adebc099 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h +++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h @@ -42,5 +42,8 @@ struct radeon_bo_manager *radeon_bo_manager_legacy_ctor(struct radeon_screen *sc void radeon_bo_manager_legacy_dtor(struct radeon_bo_manager *bom); void radeon_bo_legacy_texture_age(struct radeon_bo_manager *bom); unsigned radeon_bo_legacy_relocs_size(struct radeon_bo *bo); +struct radeon_bo *radeon_legacy_bo_alloc_fake(struct radeon_bo_manager *bom, + int size, + uint32_t offset); #endif diff --git a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h index e0c70dd9a11..a42870f4a93 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h +++ b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h @@ -15,6 +15,12 @@ #define RADEON_GEM_DOMAIN_GTT 0x2 // GTT or cache flushed #define RADEON_GEM_DOMAIN_VRAM 0x4 // VRAM domain +#define RADEON_TILING_MACRO 0x1 +#define RADEON_TILING_MICRO 0x2 +#define RADEON_TILING_SWAP 0x4 +#define RADEON_TILING_SURFACE 0x8 /* this object requires a surface + * when mapped - i.e. front buffer */ + /* to be used to build locally in mesa with no libdrm bits */ #include "../radeon/radeon_bo_drm.h" #include "../radeon/radeon_cs_drm.h" diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h index 0a6a2df35b8..a275c8fb143 100644 --- a/src/mesa/drivers/dri/radeon/radeon_chipset.h +++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h @@ -356,11 +356,14 @@ #define PCI_CHIP_RV770_947A 0x947A #define PCI_CHIP_RV770_947B 0x947B +#define PCI_CHIP_RV730_9480 0x9480 #define PCI_CHIP_RV730_9487 0x9487 +#define PCI_CHIP_RV730_9488 0x9488 #define PCI_CHIP_RV730_9489 0x9489 #define PCI_CHIP_RV730_948F 0x948F #define PCI_CHIP_RV730_9490 0x9490 #define PCI_CHIP_RV730_9491 0x9491 +#define PCI_CHIP_RV730_9495 0x9495 #define PCI_CHIP_RV730_9498 0x9498 #define PCI_CHIP_RV730_949C 0x949C #define PCI_CHIP_RV730_949E 0x949E @@ -374,12 +377,16 @@ #define PCI_CHIP_RV710_9552 0x9552 #define PCI_CHIP_RV710_9553 0x9553 #define PCI_CHIP_RV710_9555 0x9555 +#define PCI_CHIP_RV710_9557 0x9557 #define PCI_CHIP_RV740_94A0 0x94A0 #define PCI_CHIP_RV740_94A1 0x94A1 +#define PCI_CHIP_RV740_94A3 0x94A3 #define PCI_CHIP_RV740_94B1 0x94B1 #define PCI_CHIP_RV740_94B3 0x94B3 +#define PCI_CHIP_RV740_94B4 0x94B4 #define PCI_CHIP_RV740_94B5 0x94B5 +#define PCI_CHIP_RV740_94B9 0x94B9 enum { CHIP_FAMILY_R100, diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index 7f503a9ff71..330c2c8a86e 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -481,32 +481,6 @@ void radeonCopyBuffer( __DRIdrawablePrivate *dPriv, if (!n) continue; - if (IS_R600_CLASS(rmesa->radeonScreen)) { - int cpp = rmesa->radeonScreen->cpp; - int src_pitch = rmesa->radeonScreen->backPitch * cpp; - int dst_pitch = rmesa->radeonScreen->frontPitch * cpp; - char *src = (char *)rmesa->radeonScreen->driScreen->pFB + rmesa->radeonScreen->backOffset; - char *dst = (char *)rmesa->radeonScreen->driScreen->pFB + rmesa->radeonScreen->frontOffset; - int j; - drm_clip_rect_t *pb = rmesa->sarea->boxes; - - for (j = 0; j < n; j++) { - int x = pb[j].x1; - int y = pb[j].y1; - int w = pb[j].x2 - x; - int h = pb[j].y2 - y; - - src += (y * src_pitch) + (x * cpp); - dst += (y * dst_pitch) + (x * cpp); - - while (h--) { - memcpy(dst, src, w * cpp); - src += src_pitch; - dst += dst_pitch; - } - } - } - ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP ); if ( ret ) { @@ -1093,7 +1067,7 @@ void radeonFlush(GLcontext *ctx) * each of N places that do rendering. This has worse performances, * but it is much easier to get correct. */ - if (radeon->is_front_buffer_rendering) { + if (!radeon->is_front_buffer_rendering) { radeon->front_buffer_dirty = GL_FALSE; } } diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index 2a017b59cfc..f71dc1cb233 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -211,6 +211,7 @@ GLboolean radeonInitContext(radeonContextPtr radeon, radeon->dri.screen = sPriv; radeon->dri.hwContext = driContextPriv->hHWContext; radeon->dri.hwLock = &sPriv->pSAREA->lock; + radeon->dri.hwLockCount = 0; radeon->dri.fd = sPriv->fd; radeon->dri.drmMinor = sPriv->drm_version.minor; @@ -294,11 +295,10 @@ void radeonDestroyContext(__DRIcontextPrivate *driContextPriv ) GET_CURRENT_CONTEXT(ctx); radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate; radeonContextPtr current = ctx ? RADEON_CONTEXT(ctx) : NULL; - - /* +r6/r7 */ - __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; +#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) /* +r6/r7 */ + __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private); - /* --------- */ +#endif if (radeon == current) { radeon_firevertices(radeon); @@ -306,16 +306,7 @@ void radeonDestroyContext(__DRIcontextPrivate *driContextPriv ) } assert(radeon); - if (radeon) - { - -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) /* +r6/r7 */ - if (IS_R600_CLASS(screen)) - { - r600DestroyContext(driContextPriv); - } -#endif - + if (radeon) { if (radeon->dma.current) { rcommonFlushCmdBuf( radeon, __FUNCTION__ ); } @@ -762,8 +753,10 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) bo = depth_bo; radeon_bo_ref(bo); } else { + uint32_t tiling_flags = 0, pitch = 0; + int ret; #ifdef RADEON_DEBUG_BO - bo = radeon_bo_open(radeon->radeonScreen->bom, + bo = radeon_bo_open(radeon->radeonScreen->bom, buffers[i].name, 0, 0, @@ -784,6 +777,13 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) regname, buffers[i].name); } + + ret = radeon_bo_get_tiling(bo, &tiling_flags, &pitch); + if (tiling_flags & RADEON_TILING_MACRO) + bo->flags |= RADEON_BO_FLAGS_MACRO_TILE; + if (tiling_flags & RADEON_TILING_MICRO) + bo->flags |= RADEON_BO_FLAGS_MICRO_TILE; + } if (buffers[i].attachment == __DRI_BUFFER_DEPTH) { diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h index d7e94a68949..f8e1a25c9fa 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h @@ -365,6 +365,7 @@ struct radeon_dri_mirror { drm_context_t hwContext; drm_hw_lock_t *hwLock; + int hwLockCount; int fd; int drmMinor; }; diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c index f28efa33e9a..f05b106aaf8 100644 --- a/src/mesa/drivers/dri/radeon/radeon_fbo.c +++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c @@ -291,7 +291,7 @@ radeon_create_renderbuffer(GLenum format, __DRIdrawablePrivate *driDrawPriv) rrb->base.RedBits = 8; rrb->base.GreenBits = 8; rrb->base.BlueBits = 8; - rrb->base.AlphaBits = 8; + rrb->base.AlphaBits = 0; rrb->base.DataType = GL_UNSIGNED_BYTE; break; case GL_RGBA8: @@ -407,7 +407,7 @@ restart: rrb->cpp = 2; rrb->base._ActualFormat = GL_RGB5; rrb->base._BaseFormat = GL_RGB; - rrb->base.DataType = GL_UNSIGNED_SHORT; + rrb->base.DataType = GL_UNSIGNED_BYTE; DBG("Render to RGB5 texture OK\n"); } else if (texImage->TexFormat == &_mesa_texformat_argb1555) { diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c index 2f0ed1cfced..6294b7e42be 100644 --- a/src/mesa/drivers/dri/radeon/radeon_lock.c +++ b/src/mesa/drivers/dri/radeon/radeon_lock.c @@ -86,8 +86,34 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags) rmesa->vtbl.get_lock(rmesa); } - -void radeon_lock_hardware(radeonContextPtr radeon) +#ifndef NDEBUG +struct lock_debug { + const char* function; + const char* file; + int line; +}; + +static struct lock_debug ldebug = {0}; +#endif + +#if 0 +/** TODO: use atomic operations for reference counting **/ +/** gcc 4.2 has builtin functios for this **/ +#define ATOMIC_INC_AND_FETCH(atomic) __sync_add_and_fetch(&atomic, 1) +#define ATOMIC_DEC_AND_FETCH(atomic) __sync_sub_and_fetch(&atomic, 1) +#else +#define ATOMIC_INC_AND_FETCH(atomic) (++atomic) +#define ATOMIC_DEC_AND_FETCH(atomic) (--atomic) +#endif + + +void radeon_lock_hardware(radeonContextPtr radeon +#ifndef NDEBUG + ,const char* function + ,const char* file + ,const int line +#endif + ) { char ret = 0; struct radeon_framebuffer *rfb = NULL; @@ -102,16 +128,39 @@ void radeon_lock_hardware(radeonContextPtr radeon) } if (!radeon->radeonScreen->driScreen->dri2.enabled) { + if (ATOMIC_INC_AND_FETCH(radeon->dri.hwLockCount) > 1) + { +#ifndef NDEBUG + if ( RADEON_DEBUG & DEBUG_SANITY ) + fprintf(stderr, "*** %d times of recursive call to %s ***\n" + "Original call was from %s (file: %s line: %d)\n" + "Now call is coming from %s (file: %s line: %d)\n" + , radeon->dri.hwLockCount, __FUNCTION__ + , ldebug.function, ldebug.file, ldebug.line + , function, file, line + ); +#endif + return; + } DRM_CAS(radeon->dri.hwLock, radeon->dri.hwContext, (DRM_LOCK_HELD | radeon->dri.hwContext), ret ); if (ret) radeonGetLock(radeon, 0); +#ifndef NDEBUG + ldebug.function = function; + ldebug.file = file; + ldebug.line = line; +#endif } } void radeon_unlock_hardware(radeonContextPtr radeon) { if (!radeon->radeonScreen->driScreen->dri2.enabled) { + if (ATOMIC_DEC_AND_FETCH(radeon->dri.hwLockCount) > 0) + { + return; + } DRM_UNLOCK( radeon->dri.fd, radeon->dri.hwLock, radeon->dri.hwContext ); diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.h b/src/mesa/drivers/dri/radeon/radeon_lock.h index 2817709eed6..da5a5b43715 100644 --- a/src/mesa/drivers/dri/radeon/radeon_lock.h +++ b/src/mesa/drivers/dri/radeon/radeon_lock.h @@ -48,12 +48,22 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags); -void radeon_lock_hardware(radeonContextPtr rmesa); +void radeon_lock_hardware(radeonContextPtr rmesa +#ifndef NDEBUG + ,const char* function + ,const char* file + ,const int line +#endif + ); void radeon_unlock_hardware(radeonContextPtr rmesa); /* Lock the hardware and validate our state. */ +#ifdef NDEBUG #define LOCK_HARDWARE( rmesa ) radeon_lock_hardware(rmesa) +#else +#define LOCK_HARDWARE( rmesa ) radeon_lock_hardware(rmesa, __FUNCTION__, __FILE__, __LINE__) +#endif #define UNLOCK_HARDWARE( rmesa ) radeon_unlock_hardware(rmesa) #endif diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c index 071a18e7d86..d4082bf68f4 100644 --- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c +++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c @@ -326,7 +326,8 @@ GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt, if (face >= mt->faces || level < mt->firstLevel || level > mt->lastLevel) return GL_FALSE; - if (texImage->IsCompressed != mt->compressed) + if ((!texImage->IsCompressed && mt->compressed) || + (texImage->IsCompressed && !mt->compressed)) return GL_FALSE; if (!texImage->IsCompressed && @@ -366,8 +367,8 @@ GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct gl_textu mt->width0 == firstImage->Width && mt->height0 == firstImage->Height && mt->depth0 == firstImage->Depth && - mt->bpp == firstImage->TexFormat->TexelBytes && - mt->compressed == compressed); + mt->compressed == compressed && + (!mt->compressed ? (mt->bpp == firstImage->TexFormat->TexelBytes) : 1)); } diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 5f1af5b0da5..7b759661ca7 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -878,11 +878,14 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) screen->chip_flags = RADEON_CHIPSET_TCL; break; + case PCI_CHIP_RV730_9480: case PCI_CHIP_RV730_9487: + case PCI_CHIP_RV730_9488: case PCI_CHIP_RV730_9489: case PCI_CHIP_RV730_948F: case PCI_CHIP_RV730_9490: case PCI_CHIP_RV730_9491: + case PCI_CHIP_RV730_9495: case PCI_CHIP_RV730_9498: case PCI_CHIP_RV730_949C: case PCI_CHIP_RV730_949E: @@ -899,15 +902,19 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) case PCI_CHIP_RV710_9552: case PCI_CHIP_RV710_9553: case PCI_CHIP_RV710_9555: + case PCI_CHIP_RV710_9557: screen->chip_family = CHIP_FAMILY_RV710; screen->chip_flags = RADEON_CHIPSET_TCL; break; case PCI_CHIP_RV740_94A0: case PCI_CHIP_RV740_94A1: + case PCI_CHIP_RV740_94A3: case PCI_CHIP_RV740_94B1: case PCI_CHIP_RV740_94B3: + case PCI_CHIP_RV740_94B4: case PCI_CHIP_RV740_94B5: + case PCI_CHIP_RV740_94B9: screen->chip_family = CHIP_FAMILY_RV740; screen->chip_flags = RADEON_CHIPSET_TCL; break; @@ -1579,21 +1586,11 @@ static GLboolean radeonCreateContext(const __GLcontextModes * glVisual, { __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private); -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) - if (IS_R600_CLASS(screen)) - return r600CreateContext(glVisual, driContextPriv, sharedContextPriv); -#endif - #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) if (IS_R300_CLASS(screen)) return r300CreateContext(glVisual, driContextPriv, sharedContextPriv); #endif -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) - if (IS_R200_CLASS(screen)) - return r200CreateContext(glVisual, driContextPriv, sharedContextPriv); -#endif - #if !RADEON_COMMON (void)screen; return r100CreateContext(glVisual, driContextPriv, sharedContextPriv); @@ -1793,8 +1790,16 @@ getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo ) const struct __DriverAPIRec driDriverAPI = { .InitScreen = radeonInitScreen, .DestroyScreen = radeonDestroyScreen, +#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) + .CreateContext = r200CreateContext, + .DestroyContext = r200DestroyContext, +#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) + .CreateContext = r600CreateContext, + .DestroyContext = r600DestroyContext, +#else .CreateContext = radeonCreateContext, .DestroyContext = radeonDestroyContext, +#endif .CreateBuffer = radeonCreateBuffer, .DestroyBuffer = radeonDestroyBuffer, .SwapBuffers = radeonSwapBuffers, diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c index b2a468b4fd6..5e4bf00d7ab 100644 --- a/src/mesa/drivers/dri/radeon/radeon_span.c +++ b/src/mesa/drivers/dri/radeon/radeon_span.c @@ -51,6 +51,59 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb); + +/* r200 depth buffer is always tiled - this is the formula + according to the docs unless I typo'ed in it +*/ +static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb, + GLint x, GLint y) +{ + GLubyte *ptr = rrb->bo->ptr; + GLint offset; + if (rrb->has_surface) { + offset = x * rrb->cpp + y * rrb->pitch; + } else { + GLuint b; + offset = 0; + b = (((y >> 4) * (rrb->pitch >> 8) + (x >> 6))); + offset += (b >> 1) << 12; + offset += (((rrb->pitch >> 8) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11; + offset += ((y >> 2) & 0x3) << 9; + offset += ((x >> 3) & 0x1) << 8; + offset += ((x >> 4) & 0x3) << 6; + offset += ((x >> 2) & 0x1) << 5; + offset += ((y >> 1) & 0x1) << 4; + offset += ((x >> 1) & 0x1) << 3; + offset += (y & 0x1) << 2; + offset += (x & 0x1) << 1; + } + return &ptr[offset]; +} + +static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb, + GLint x, GLint y) +{ + GLubyte *ptr = rrb->bo->ptr; + GLint offset; + if (rrb->has_surface) { + offset = x * rrb->cpp + y * rrb->pitch; + } else { + GLuint b; + offset = 0; + b = (((y & 0x7ff) >> 4) * (rrb->pitch >> 7) + (x >> 5)); + offset += (b >> 1) << 12; + offset += (((rrb->pitch >> 7) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11; + offset += ((y >> 2) & 0x3) << 9; + offset += ((x >> 2) & 0x1) << 8; + offset += ((x >> 3) & 0x3) << 6; + offset += ((y >> 1) & 0x1) << 5; + offset += ((x >> 1) & 0x1) << 4; + offset += (y & 0x1) << 3; + offset += (x & 0x1) << 2; + } + return &ptr[offset]; +} + /* radeon tiling on r300-r500 has 4 states, macro-linear/micro-linear macro-linear/micro-tiled @@ -61,7 +114,6 @@ static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb); 4 byte surface 8/16 byte (unused) */ - static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb, GLint x, GLint y) { @@ -285,11 +337,21 @@ s8z24_to_z24s8(uint32_t val) */ #define VALUE_TYPE GLushort +#if defined(RADEON_COMMON_FOR_R200) +#define WRITE_DEPTH( _x, _y, d ) \ + *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d +#else #define WRITE_DEPTH( _x, _y, d ) \ *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d +#endif +#if defined(RADEON_COMMON_FOR_R200) +#define READ_DEPTH( d, _x, _y ) \ + d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) +#else #define READ_DEPTH( d, _x, _y ) \ d = *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) +#endif #define TAG(x) radeon##x##_z16 #include "depthtmp.h" @@ -301,7 +363,7 @@ s8z24_to_z24s8(uint32_t val) */ #define VALUE_TYPE GLuint -#ifdef COMPILE_R300 +#if defined(COMPILE_R300) #define WRITE_DEPTH( _x, _y, d ) \ do { \ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ @@ -310,6 +372,15 @@ do { \ tmp |= ((d << 8) & 0xffffff00); \ *_ptr = tmp; \ } while (0) +#elif defined(RADEON_COMMON_FOR_R200) +#define WRITE_DEPTH( _x, _y, d ) \ +do { \ + GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = *_ptr; \ + tmp &= 0xff000000; \ + tmp |= ((d) & 0x00ffffff); \ + *_ptr = tmp; \ +} while (0) #else #define WRITE_DEPTH( _x, _y, d ) \ do { \ @@ -321,19 +392,21 @@ do { \ } while (0) #endif -#ifdef COMPILE_R300 +#if defined(COMPILE_R300) #define READ_DEPTH( d, _x, _y ) \ do { \ d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \ }while(0) +#elif defined(RADEON_COMMON_FOR_R200) +#define READ_DEPTH( d, _x, _y ) \ + do { \ + d = *(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; \ + }while(0) #else #define READ_DEPTH( d, _x, _y ) \ d = *(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; #endif -/* - fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\ - d = *(GLuint*)(radeon_ptr(rrb, _x, _y )) & 0x00ffffff; -*/ + #define TAG(x) radeon##x##_z24 #include "depthtmp.h" @@ -345,12 +418,19 @@ do { \ */ #define VALUE_TYPE GLuint -#ifdef COMPILE_R300 +#if defined(COMPILE_R300) #define WRITE_DEPTH( _x, _y, d ) \ do { \ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ *_ptr = d; \ } while (0) +#elif defined(RADEON_COMMON_FOR_R200) +#define WRITE_DEPTH( _x, _y, d ) \ +do { \ + GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = z24s8_to_s8z24(d); \ + *_ptr = tmp; \ +} while (0) #else #define WRITE_DEPTH( _x, _y, d ) \ do { \ @@ -360,20 +440,22 @@ do { \ } while (0) #endif -#ifdef COMPILE_R300 +#if defined(COMPILE_R300) #define READ_DEPTH( d, _x, _y ) \ do { \ d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \ }while(0) +#elif defined(RADEON_COMMON_FOR_R200) +#define READ_DEPTH( d, _x, _y ) \ + do { \ + d = s8z24_to_z24s8(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off))); \ + }while(0) #else #define READ_DEPTH( d, _x, _y ) do { \ d = s8z24_to_z24s8(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off ))); \ } while (0) #endif -/* - fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\ - d = *(GLuint*)(radeon_ptr(rrb, _x, _y )) & 0x00ffffff; -*/ + #define TAG(x) radeon##x##_z24_s8 #include "depthtmp.h" @@ -392,6 +474,15 @@ do { \ tmp |= (d) & 0xff; \ *_ptr = tmp; \ } while (0) +#elif defined(RADEON_COMMON_FOR_R200) +#define WRITE_STENCIL( _x, _y, d ) \ +do { \ + GLuint *_ptr = (GLuint*)r200_depth_4byte(rrb, _x + x_off, _y + y_off); \ + GLuint tmp = *_ptr; \ + tmp &= 0x00ffffff; \ + tmp |= (((d) & 0xff) << 24); \ + *_ptr = tmp; \ +} while (0) #else #define WRITE_STENCIL( _x, _y, d ) \ do { \ @@ -410,6 +501,13 @@ do { \ GLuint tmp = *_ptr; \ d = tmp & 0x000000ff; \ } while (0) +#elif defined(RADEON_COMMON_FOR_R200) +#define READ_STENCIL( d, _x, _y ) \ +do { \ + GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = *_ptr; \ + d = (tmp & 0xff000000) >> 24; \ +} while (0) #else #define READ_STENCIL( d, _x, _y ) \ do { \ diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c index 6a065f04688..fa16f44c18e 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.c +++ b/src/mesa/drivers/dri/radeon/radeon_texture.c @@ -610,9 +610,17 @@ static void radeon_teximage( if (pixels) { radeon_teximage_map(image, GL_TRUE); - if (compressed) { - memcpy(texImage->Data, pixels, imageSize); + if (image->mt) { + uint32_t srcRowStride, bytesPerRow, rows; + srcRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, width); + bytesPerRow = srcRowStride; + rows = (height + 3) / 4; + copy_rows(texImage->Data, image->mt->levels[level].rowstride, + pixels, srcRowStride, rows, bytesPerRow); + } else { + memcpy(texImage->Data, pixels, imageSize); + } } else { GLuint dstRowStride; GLuint *dstImageOffsets; @@ -756,14 +764,23 @@ static void radeon_texsubimage(GLcontext* ctx, int dims, GLenum target, int leve } if (compressed) { - uint32_t srcRowStride, bytesPerRow, rows; - dstRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, texImage->Width); + uint32_t srcRowStride, bytesPerRow, rows; + GLubyte *img_start; + if (!image->mt) { + dstRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, texImage->Width); + img_start = _mesa_compressed_image_address(xoffset, yoffset, 0, + texImage->TexFormat->MesaFormat, + texImage->Width, texImage->Data); + } + else { + uint32_t blocks_x = dstRowStride / (image->mt->bpp * 4); + img_start = texImage->Data + image->mt->bpp * 4 * (blocks_x * (yoffset / 4) + xoffset / 4); + } srcRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, width); bytesPerRow = srcRowStride; - rows = height / 4; + rows = (height + 3) / 4; - copy_rows(texImage->Data, dstRowStride, image->base.Data, srcRowStride, rows, - bytesPerRow); + copy_rows(img_start, dstRowStride, pixels, srcRowStride, rows, bytesPerRow); } else { if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat, @@ -884,8 +901,8 @@ static void migrate_image_to_miptree(radeon_mipmap_tree *mt, radeon_texture_imag uint32_t height; /* need to confirm this value is correct */ if (mt->compressed) { - height = image->base.Height / 4; - srcrowstride = image->base.RowStride * mt->bpp; + height = (image->base.Height + 3) / 4; + srcrowstride = _mesa_compressed_row_stride(image->base.TexFormat->MesaFormat, image->base.Width); } else { height = image->base.Height * image->base.Depth; srcrowstride = image->base.Width * image->base.TexFormat->TexelBytes; @@ -1000,6 +1017,8 @@ radeon_get_tex_image(GLcontext * ctx, GLenum target, GLint level, } if (compressed) { + /* FIXME: this can't work for small textures (mips) which + use different hw stride */ _mesa_get_compressed_teximage(ctx, target, level, pixels, texObj, texImage); } else { |