diff options
Diffstat (limited to 'src/mesa')
193 files changed, 5307 insertions, 3571 deletions
diff --git a/src/mesa/Makefile b/src/mesa/Makefile index a903a260ac9..88f31b68695 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -12,11 +12,10 @@ DRICORE_OBJ_DIR := objs-dricore include sources.mak # adjust object dirs +DRICORE_OBJECTS := $(addprefix $(DRICORE_OBJ_DIR)/, $(MESA_OBJECTS)) MESA_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_OBJECTS)) MESA_GALLIUM_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_GALLIUM_OBJECTS)) -DRICORE_OBJECTS := $(addprefix $(DRICORE_OBJ_DIR)/, $(MESA_OBJECTS)) - # define preprocessor flags MESA_CPPFLAGS := $(API_DEFINES) $(DEFINES) @@ -124,6 +123,8 @@ depend: $(ALL_SOURCES) @ touch depend @$(MKDEP) $(MKDEP_OPTIONS) -p$(MESA_OBJ_DIR)/ $(MESA_CPPFLAGS) \ $(ALL_SOURCES) > /dev/null 2>/dev/null + @$(MKDEP) $(MKDEP_OPTIONS) -a -p$(DRICORE_OBJ_DIR)/ $(MESA_CPPFLAGS) \ + $(ALL_SOURCES) > /dev/null 2>/dev/null ###################################################################### # Installation rules diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 8ab129dd73d..76630264bf7 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -95,8 +95,6 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->TexSubImage2D = _mesa_store_texsubimage2d; driver->TexSubImage3D = _mesa_store_texsubimage3d; driver->GetTexImage = _mesa_get_teximage; - driver->CopyTexImage1D = _mesa_meta_CopyTexImage1D; - driver->CopyTexImage2D = _mesa_meta_CopyTexImage2D; driver->CopyTexSubImage1D = _mesa_meta_CopyTexSubImage1D; driver->CopyTexSubImage2D = _mesa_meta_CopyTexSubImage2D; driver->CopyTexSubImage3D = _mesa_meta_CopyTexSubImage3D; diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index 0e58aeca3f5..f9b4755988b 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -62,6 +62,7 @@ #include "main/teximage.h" #include "main/texparam.h" #include "main/texstate.h" +#include "main/uniforms.h" #include "main/varray.h" #include "main/viewport.h" #include "program/program.h" @@ -89,13 +90,14 @@ #define META_SCISSOR 0x100 #define META_SHADER 0x200 #define META_STENCIL_TEST 0x400 -#define META_TRANSFORM 0x800 /**< modelview, projection, clip planes */ +#define META_TRANSFORM 0x800 /**< modelview/projection matrix state */ #define META_TEXTURE 0x1000 #define META_VERTEX 0x2000 #define META_VIEWPORT 0x4000 #define META_CLAMP_FRAGMENT_COLOR 0x8000 #define META_CLAMP_VERTEX_COLOR 0x10000 #define META_CONDITIONAL_RENDER 0x20000 +#define META_CLIP 0x40000 /*@}*/ @@ -164,6 +166,8 @@ struct save_state GLfloat ModelviewMatrix[16]; GLfloat ProjectionMatrix[16]; GLfloat TextureMatrix[16]; + + /** META_CLIP */ GLbitfield ClipPlanesEnabled; /** META_TEXTURE */ @@ -235,6 +239,8 @@ struct clear_state { GLuint ArrayObj; GLuint VBO; + GLuint ShaderProg; + GLint ColorLocation; }; @@ -544,6 +550,9 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) _mesa_Ortho(0.0, ctx->DrawBuffer->Width, 0.0, ctx->DrawBuffer->Height, -1.0, 1.0); + } + + if (state & META_CLIP) { save->ClipPlanesEnabled = ctx->Transform.ClipPlanesEnabled; if (ctx->Transform.ClipPlanesEnabled) { GLuint i; @@ -843,7 +852,9 @@ _mesa_meta_end(struct gl_context *ctx) _mesa_LoadMatrixf(save->ProjectionMatrix); _mesa_MatrixMode(save->MatrixMode); + } + if (state & META_CLIP) { if (save->ClipPlanesEnabled) { GLuint i; for (i = 0; i < ctx->Const.MaxClipPlanes; i++) { @@ -1589,10 +1600,166 @@ _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers) _mesa_meta_end(ctx); } +static void +meta_glsl_clear_init(struct gl_context *ctx, struct clear_state *clear) +{ + const char *vs_source = + "attribute vec4 position;\n" + "void main()\n" + "{\n" + " gl_Position = position;\n" + "}\n"; + const char *fs_source = + "uniform vec4 color;\n" + "void main()\n" + "{\n" + " gl_FragColor = color;\n" + "}\n"; + GLuint vs, fs; + + if (clear->ArrayObj != 0) + return; + + /* create vertex array object */ + _mesa_GenVertexArrays(1, &clear->ArrayObj); + _mesa_BindVertexArray(clear->ArrayObj); + + /* create vertex array buffer */ + _mesa_GenBuffersARB(1, &clear->VBO); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, clear->VBO); + + /* setup vertex arrays */ + _mesa_VertexAttribPointerARB(0, 3, GL_FLOAT, GL_FALSE, 0, (void *)0); + _mesa_EnableVertexAttribArrayARB(0); + + vs = _mesa_CreateShaderObjectARB(GL_VERTEX_SHADER); + _mesa_ShaderSourceARB(vs, 1, &vs_source, NULL); + _mesa_CompileShaderARB(vs); + + fs = _mesa_CreateShaderObjectARB(GL_FRAGMENT_SHADER); + _mesa_ShaderSourceARB(fs, 1, &fs_source, NULL); + _mesa_CompileShaderARB(fs); + + clear->ShaderProg = _mesa_CreateProgramObjectARB(); + _mesa_AttachShader(clear->ShaderProg, fs); + _mesa_AttachShader(clear->ShaderProg, vs); + _mesa_BindAttribLocationARB(clear->ShaderProg, 0, "position"); + _mesa_LinkProgramARB(clear->ShaderProg); + + clear->ColorLocation = _mesa_GetUniformLocationARB(clear->ShaderProg, + "color"); +} + +/** + * Meta implementation of ctx->Driver.Clear() in terms of polygon rendering. + */ +void +_mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers) +{ + struct clear_state *clear = &ctx->Meta->Clear; + GLbitfield metaSave; + const GLuint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1; + struct gl_framebuffer *fb = ctx->DrawBuffer; + const float x0 = ((float)fb->_Xmin / fb->Width) * 2.0f - 1.0f; + const float y0 = ((float)fb->_Ymin / fb->Height) * 2.0f - 1.0f; + const float x1 = ((float)fb->_Xmax / fb->Width) * 2.0f - 1.0f; + const float y1 = ((float)fb->_Ymax / fb->Height) * 2.0f - 1.0f; + const float z = -invert_z(ctx->Depth.Clear); + struct vertex { + GLfloat x, y, z; + } verts[4]; + + metaSave = (META_ALPHA_TEST | + META_BLEND | + META_DEPTH_TEST | + META_RASTERIZATION | + META_SHADER | + META_STENCIL_TEST | + META_VERTEX | + META_VIEWPORT | + META_CLIP | + META_CLAMP_FRAGMENT_COLOR); + + if (!(buffers & BUFFER_BITS_COLOR)) { + /* We'll use colormask to disable color writes. Otherwise, + * respect color mask + */ + metaSave |= META_COLOR_MASK; + } + + _mesa_meta_begin(ctx, metaSave); + + meta_glsl_clear_init(ctx, clear); + + _mesa_UseProgramObjectARB(clear->ShaderProg); + _mesa_Uniform4fvARB(clear->ColorLocation, 1, + ctx->Color.ClearColorUnclamped); + + _mesa_BindVertexArray(clear->ArrayObj); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, clear->VBO); + + /* GL_COLOR_BUFFER_BIT */ + if (buffers & BUFFER_BITS_COLOR) { + /* leave colormask, glDrawBuffer state as-is */ + + /* Clears never have the color clamped. */ + _mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE); + } + else { + ASSERT(metaSave & META_COLOR_MASK); + _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + } + + /* GL_DEPTH_BUFFER_BIT */ + if (buffers & BUFFER_BIT_DEPTH) { + _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_TRUE); + _mesa_DepthFunc(GL_ALWAYS); + _mesa_DepthMask(GL_TRUE); + } + else { + assert(!ctx->Depth.Test); + } + + /* GL_STENCIL_BUFFER_BIT */ + if (buffers & BUFFER_BIT_STENCIL) { + _mesa_set_enable(ctx, GL_STENCIL_TEST, GL_TRUE); + _mesa_StencilOpSeparate(GL_FRONT_AND_BACK, + GL_REPLACE, GL_REPLACE, GL_REPLACE); + _mesa_StencilFuncSeparate(GL_FRONT_AND_BACK, GL_ALWAYS, + ctx->Stencil.Clear & stencilMax, + ctx->Stencil.WriteMask[0]); + } + else { + assert(!ctx->Stencil.Enabled); + } + + /* vertex positions */ + verts[0].x = x0; + verts[0].y = y0; + verts[0].z = z; + verts[1].x = x1; + verts[1].y = y0; + verts[1].z = z; + verts[2].x = x1; + verts[2].y = y1; + verts[2].z = z; + verts[3].x = x0; + verts[3].y = y1; + verts[3].z = z; + + /* upload new vertex data */ + _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(verts), verts, + GL_DYNAMIC_DRAW_ARB); + + /* draw quad */ + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); + + _mesa_meta_end(ctx); +} /** * Meta implementation of ctx->Driver.CopyPixels() in terms - * of texture mapping and polygon rendering. + * of texture mapping and polygon rendering and GLSL shaders. */ void _mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcX, GLint srcY, @@ -1625,6 +1792,7 @@ _mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcX, GLint srcY, META_SHADER | META_TEXTURE | META_TRANSFORM | + META_CLIP | META_VERTEX | META_VIEWPORT)); @@ -1946,6 +2114,7 @@ _mesa_meta_DrawPixels(struct gl_context *ctx, META_SHADER | META_TEXTURE | META_TRANSFORM | + META_CLIP | META_VERTEX | META_VIEWPORT | META_CLAMP_FRAGMENT_COLOR | @@ -2155,6 +2324,7 @@ _mesa_meta_Bitmap(struct gl_context *ctx, META_SHADER | META_TEXTURE | META_TRANSFORM | + META_CLIP | META_VERTEX | META_VIEWPORT)); @@ -2678,119 +2848,6 @@ get_temp_image_type(struct gl_context *ctx, GLenum baseFormat) /** - * Helper for _mesa_meta_CopyTexImage1/2D() functions. - * Have to be careful with locking and meta state for pixel transfer. - */ -static void -copy_tex_image(struct gl_context *ctx, GLuint dims, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLsizei height, GLint border) -{ - struct gl_texture_object *texObj; - struct gl_texture_image *texImage; - GLenum format, type; - GLint bpp; - void *buf; - struct gl_renderbuffer *read_rb = ctx->ReadBuffer->_ColorReadBuffer; - - texObj = _mesa_get_current_tex_object(ctx, target); - texImage = _mesa_get_tex_image(ctx, texObj, target, level); - - /* Choose format/type for temporary image buffer */ - format = _mesa_base_tex_format(ctx, internalFormat); - - if (format == GL_LUMINANCE && - _mesa_get_format_base_format(read_rb->Format) != GL_LUMINANCE) { - /* The glReadPixels() path will convert RGB to luminance by - * summing R+G+B. glCopyTexImage() is supposed to behave as - * glCopyPixels, which doesn't do that change, and instead - * leaves it up to glTexImage which converts RGB to luminance by - * just taking the R channel. To avoid glReadPixels() trashing - * our data, use RGBA for our temporary image. - */ - format = GL_RGBA; - } - - type = get_temp_image_type(ctx, format); - bpp = _mesa_bytes_per_pixel(format, type); - if (bpp <= 0) { - _mesa_problem(ctx, "Bad bpp in meta copy_tex_image()"); - return; - } - - /* - * Alloc image buffer (XXX could use a PBO) - */ - buf = malloc(width * height * bpp); - if (!buf) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims); - return; - } - - _mesa_unlock_texture(ctx, texObj); /* need to unlock first */ - - /* - * Read image from framebuffer (disable pixel transfer ops) - */ - _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER); - ctx->Driver.ReadPixels(ctx, x, y, width, height, - format, type, &ctx->Pack, buf); - _mesa_meta_end(ctx); - - if (texImage->Data) { - ctx->Driver.FreeTexImageData(ctx, texImage); - } - - /* The texture's format was already chosen in _mesa_CopyTexImage() */ - ASSERT(texImage->TexFormat != MESA_FORMAT_NONE); - - /* - * Store texture data (with pixel transfer ops) - */ - _mesa_meta_begin(ctx, META_PIXEL_STORE); - - _mesa_update_state(ctx); /* to update pixel transfer state */ - - if (target == GL_TEXTURE_1D) { - ctx->Driver.TexImage1D(ctx, target, level, internalFormat, - width, border, format, type, - buf, &ctx->Unpack, texObj, texImage); - } - else { - ctx->Driver.TexImage2D(ctx, target, level, internalFormat, - width, height, border, format, type, - buf, &ctx->Unpack, texObj, texImage); - } - _mesa_meta_end(ctx); - - _mesa_lock_texture(ctx, texObj); /* re-lock */ - - free(buf); -} - - -void -_mesa_meta_CopyTexImage1D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLint border) -{ - copy_tex_image(ctx, 1, target, level, internalFormat, x, y, - width, 1, border); -} - - -void -_mesa_meta_CopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLsizei height, GLint border) -{ - copy_tex_image(ctx, 2, target, level, internalFormat, x, y, - width, height, border); -} - - - -/** * Helper for _mesa_meta_CopyTexSubImage1/2/3D() functions. * Have to be careful with locking and meta state for pixel transfer. */ @@ -2812,6 +2869,16 @@ copy_tex_sub_image(struct gl_context *ctx, /* Choose format/type for temporary image buffer */ format = _mesa_get_format_base_format(texImage->TexFormat); + if (format == GL_LUMINANCE || + format == GL_LUMINANCE_ALPHA || + format == GL_INTENSITY) { + /* We don't want to use GL_LUMINANCE, GL_INTENSITY, etc. for the + * temp image buffer because glReadPixels will do L=R+G+B which is + * not what we want (should be L=R). + */ + format = GL_RGBA; + } + type = get_temp_image_type(ctx, format); bpp = _mesa_bytes_per_pixel(format, type); if (bpp <= 0) { diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h index b0797d3d91a..95b4b5579c8 100644 --- a/src/mesa/drivers/common/meta.h +++ b/src/mesa/drivers/common/meta.h @@ -43,6 +43,9 @@ extern void _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers); extern void +_mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers); + +extern void _mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, GLsizei width, GLsizei height, GLint dstx, GLint dsty, GLenum type); @@ -69,16 +72,6 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target, struct gl_texture_object *texObj); extern void -_mesa_meta_CopyTexImage1D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLint border); - -extern void -_mesa_meta_CopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLsizei height, GLint border); - -extern void _mesa_meta_CopyTexSubImage1D(struct gl_context *ctx, GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width); diff --git a/src/mesa/drivers/dri/common/xmlconfig.c b/src/mesa/drivers/dri/common/xmlconfig.c index 0226b38c4fc..77967ac2a43 100644 --- a/src/mesa/drivers/dri/common/xmlconfig.c +++ b/src/mesa/drivers/dri/common/xmlconfig.c @@ -765,9 +765,9 @@ static void parseDeviceAttr (struct OptConfData *data, const XML_Char **attr) { /** \brief Parse attributes of an application element. */ static void parseAppAttr (struct OptConfData *data, const XML_Char **attr) { GLuint i; - const XML_Char *name = NULL, *exec = NULL; + const XML_Char *exec = NULL; for (i = 0; attr[i]; i += 2) { - if (!strcmp (attr[i], "name")) name = attr[i+1]; + if (!strcmp (attr[i], "name")) /* not needed here */; else if (!strcmp (attr[i], "executable")) exec = attr[i+1]; else XML_WARNING("unknown application attribute: %s.", attr[i]); } diff --git a/src/mesa/drivers/dri/i915/i830_context.c b/src/mesa/drivers/dri/i915/i830_context.c index abfb32be3ae..d22118beb0b 100644 --- a/src/mesa/drivers/dri/i915/i830_context.c +++ b/src/mesa/drivers/dri/i915/i830_context.c @@ -33,6 +33,7 @@ #include "tnl/t_pipeline.h" #include "intel_span.h" #include "intel_tris.h" +#include "../glsl/ralloc.h" /*************************************** * Mesa's Driver Functions @@ -53,7 +54,7 @@ i830CreateContext(const struct gl_config * mesaVis, void *sharedContextPrivate) { struct dd_function_table functions; - struct i830_context *i830 = CALLOC_STRUCT(i830_context); + struct i830_context *i830 = rzalloc(NULL, struct i830_context); struct intel_context *intel = &i830->intel; struct gl_context *ctx = &intel->ctx; if (!i830) diff --git a/src/mesa/drivers/dri/i915/i830_texstate.c b/src/mesa/drivers/dri/i915/i830_texstate.c index d4af5e51026..71ce44fd5c9 100644 --- a/src/mesa/drivers/dri/i915/i830_texstate.c +++ b/src/mesa/drivers/dri/i915/i830_texstate.c @@ -76,7 +76,8 @@ translate_texture_format(GLuint mesa_format) case MESA_FORMAT_RGBA_DXT5: return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); default: - fprintf(stderr, "%s: bad image format %x\n", __FUNCTION__, mesa_format); + fprintf(stderr, "%s: bad image format %s\n", __FUNCTION__, + _mesa_get_format_name(mesa_format)); abort(); return 0; } diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index 08ea2870aa0..6d43726beb1 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -31,8 +31,13 @@ #include "intel_regions.h" #include "intel_tris.h" #include "intel_fbo.h" +#include "intel_buffers.h" +#include "tnl/tnl.h" #include "tnl/t_context.h" #include "tnl/t_vertex.h" +#include "swrast_setup/swrast_setup.h" +#include "main/renderbuffer.h" +#include "main/framebuffer.h" #define FILE_DEBUG_FLAG DEBUG_STATE @@ -616,11 +621,9 @@ i830_set_draw_region(struct intel_context *intel, uint32_t draw_x, draw_y; if (state->draw_region != color_regions[0]) { - intel_region_release(&state->draw_region); intel_region_reference(&state->draw_region, color_regions[0]); } if (state->depth_region != depth_region) { - intel_region_release(&state->depth_region); intel_region_reference(&state->depth_region, depth_region); } @@ -695,6 +698,159 @@ i830_set_draw_region(struct intel_context *intel, I830_STATECHANGE(i830, I830_UPLOAD_BUFFERS); } +/** + * Update the hardware state for drawing into a window or framebuffer object. + * + * Called by glDrawBuffer, glBindFramebufferEXT, MakeCurrent, and other + * places within the driver. + * + * Basically, this needs to be called any time the current framebuffer + * changes, the renderbuffers change, or we need to draw into different + * color buffers. + */ +static void +i830_update_draw_buffer(struct intel_context *intel) +{ + struct gl_context *ctx = &intel->ctx; + struct gl_framebuffer *fb = ctx->DrawBuffer; + struct intel_region *colorRegions[MAX_DRAW_BUFFERS], *depthRegion = NULL; + struct intel_renderbuffer *irbDepth = NULL, *irbStencil = NULL; + bool fb_has_hiz = intel_framebuffer_has_hiz(fb); + + if (!fb) { + /* this can happen during the initial context initialization */ + return; + } + + irbDepth = intel_get_renderbuffer(fb, BUFFER_DEPTH); + irbStencil = intel_get_renderbuffer(fb, BUFFER_STENCIL); + + /* Do this here, not core Mesa, since this function is called from + * many places within the driver. + */ + if (ctx->NewState & _NEW_BUFFERS) { + /* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */ + _mesa_update_framebuffer(ctx); + /* this updates the DrawBuffer's Width/Height if it's a FBO */ + _mesa_update_draw_buffer_bounds(ctx); + } + + if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) { + /* this may occur when we're called by glBindFrameBuffer() during + * the process of someone setting up renderbuffers, etc. + */ + /*_mesa_debug(ctx, "DrawBuffer: incomplete user FBO\n");*/ + return; + } + + /* How many color buffers are we drawing into? + * + * If there are zero buffers or the buffer is too big, don't configure any + * regions for hardware drawing. We'll fallback to software below. Not + * having regions set makes some of the software fallback paths faster. + */ + if ((fb->Width > ctx->Const.MaxRenderbufferSize) + || (fb->Height > ctx->Const.MaxRenderbufferSize) + || (fb->_NumColorDrawBuffers == 0)) { + /* writing to 0 */ + colorRegions[0] = NULL; + } + else if (fb->_NumColorDrawBuffers > 1) { + int i; + struct intel_renderbuffer *irb; + + for (i = 0; i < fb->_NumColorDrawBuffers; i++) { + irb = intel_renderbuffer(fb->_ColorDrawBuffers[i]); + colorRegions[i] = irb ? irb->region : NULL; + } + } + else { + /* Get the intel_renderbuffer for the single colorbuffer we're drawing + * into. + */ + if (fb->Name == 0) { + /* drawing to window system buffer */ + if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) + colorRegions[0] = intel_get_rb_region(fb, BUFFER_FRONT_LEFT); + else + colorRegions[0] = intel_get_rb_region(fb, BUFFER_BACK_LEFT); + } + else { + /* drawing to user-created FBO */ + struct intel_renderbuffer *irb; + irb = intel_renderbuffer(fb->_ColorDrawBuffers[0]); + colorRegions[0] = (irb && irb->region) ? irb->region : NULL; + } + } + + if (!colorRegions[0]) { + FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_TRUE); + } + else { + FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE); + } + + /* Check for depth fallback. */ + if (irbDepth && irbDepth->region) { + assert(!fb_has_hiz || irbDepth->Base.Format != MESA_FORMAT_S8_Z24); + FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, GL_FALSE); + depthRegion = irbDepth->region; + } else if (irbDepth && !irbDepth->region) { + FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, GL_TRUE); + depthRegion = NULL; + } else { /* !irbDepth */ + /* No fallback is needed because there is no depth buffer. */ + FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, GL_FALSE); + depthRegion = NULL; + } + + /* Check for stencil fallback. */ + if (irbStencil && irbStencil->region) { + assert(irbStencil->Base.Format == MESA_FORMAT_S8_Z24); + FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE); + } else if (irbStencil && !irbStencil->region) { + FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_TRUE); + } else { /* !irbStencil */ + /* No fallback is needed because there is no stencil buffer. */ + FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE); + } + + /* If we have a (packed) stencil buffer attached but no depth buffer, + * we still need to set up the shared depth/stencil state so we can use it. + */ + if (depthRegion == NULL && irbStencil && irbStencil->region + && irbStencil->Base.Format == MESA_FORMAT_S8_Z24) { + depthRegion = irbStencil->region; + } + + /* + * Update depth and stencil test state + */ + ctx->Driver.Enable(ctx, GL_DEPTH_TEST, + (ctx->Depth.Test && fb->Visual.depthBits > 0)); + ctx->Driver.Enable(ctx, GL_STENCIL_TEST, + (ctx->Stencil.Enabled && fb->Visual.stencilBits > 0)); + + intel->vtbl.set_draw_region(intel, colorRegions, depthRegion, + fb->_NumColorDrawBuffers); + intel->NewGLState |= _NEW_BUFFERS; + + /* update viewport since it depends on window size */ + intelCalcViewport(ctx); + + /* Set state we know depends on drawable parameters: + */ + ctx->Driver.Scissor(ctx, ctx->Scissor.X, ctx->Scissor.Y, + ctx->Scissor.Width, ctx->Scissor.Height); + + ctx->Driver.DepthRange(ctx, ctx->Viewport.Near, ctx->Viewport.Far); + + /* Update culling direction which changes depending on the + * orientation of the buffer: + */ + ctx->Driver.FrontFace(ctx, ctx->Polygon.FrontFace); +} + /* This isn't really handled at the moment. */ static void @@ -715,6 +871,12 @@ i830_assert_not_dirty( struct intel_context *intel ) static void i830_invalidate_state(struct intel_context *intel, GLuint new_state) { + struct gl_context *ctx = &intel->ctx; + + _swsetup_InvalidateState(ctx, new_state); + _tnl_InvalidateState(ctx, new_state); + _tnl_invalidate_vertex_state(ctx, new_state); + if (new_state & _NEW_LIGHT) i830_update_provoking_vertex(&intel->ctx); } @@ -728,6 +890,7 @@ i830InitVtbl(struct i830_context *i830) i830->intel.vtbl.new_batch = i830_new_batch; i830->intel.vtbl.reduced_primitive_state = i830_reduced_primitive_state; i830->intel.vtbl.set_draw_region = i830_set_draw_region; + i830->intel.vtbl.update_draw_buffer = i830_update_draw_buffer; i830->intel.vtbl.update_texture_state = i830UpdateTextureState; i830->intel.vtbl.render_start = i830_render_start; i830->intel.vtbl.render_prevalidate = i830_render_prevalidate; diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index f02f2d78267..11bee140ab6 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -36,6 +36,7 @@ #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" #include "tnl/tnl.h" +#include "../glsl/ralloc.h" #include "i915_reg.h" #include "i915_program.h" @@ -97,8 +98,7 @@ i915CreateContext(int api, void *sharedContextPrivate) { struct dd_function_table functions; - struct i915_context *i915 = - (struct i915_context *) CALLOC_STRUCT(i915_context); + struct i915_context *i915 = rzalloc(NULL, struct i915_context); struct intel_context *intel = &i915->intel; struct gl_context *ctx = &intel->ctx; diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index b67ebb9a1ec..32050cebf33 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -210,6 +210,7 @@ get_result_vector(struct i915_fragment_program *p, case PROGRAM_OUTPUT: switch (inst->DstReg.Index) { case FRAG_RESULT_COLOR: + case FRAG_RESULT_DATA0: return UREG(REG_TYPE_OC, 0); case FRAG_RESULT_DEPTH: p->depth_written = 1; @@ -302,7 +303,7 @@ do { \ /* * TODO: consider moving this into core */ -static void calc_live_regs( struct i915_fragment_program *p ) +static bool calc_live_regs( struct i915_fragment_program *p ) { const struct gl_fragment_program *program = &p->FragProg; GLuint regsUsed = 0xffff0000; @@ -316,6 +317,9 @@ static void calc_live_regs( struct i915_fragment_program *p ) /* Register is written to: unmark as live for this and preceeding ops */ if (inst->DstReg.File == PROGRAM_TEMPORARY) { + if (inst->DstReg.Index > 16) + return false; + live_components[inst->DstReg.Index] &= ~inst->DstReg.WriteMask; if (live_components[inst->DstReg.Index] == 0) regsUsed &= ~(1 << inst->DstReg.Index); @@ -326,6 +330,9 @@ static void calc_live_regs( struct i915_fragment_program *p ) if (inst->SrcReg[a].File == PROGRAM_TEMPORARY) { unsigned c; + if (inst->SrcReg[a].Index > 16) + return false; + regsUsed |= 1 << inst->SrcReg[a].Index; for (c = 0; c < 4; c++) { @@ -339,6 +346,8 @@ static void calc_live_regs( struct i915_fragment_program *p ) p->usedRegs[i] = regsUsed; } + + return true; } static GLuint get_live_regs( struct i915_fragment_program *p, @@ -393,7 +402,10 @@ upload_program(struct i915_fragment_program *p) /* Not always needed: */ - calc_live_regs(p); + if (!calc_live_regs(p)) { + i915_program_error(p, "Could not allocate registers"); + return; + } while (1) { GLuint src0, src1, src2, flags; @@ -1341,11 +1353,10 @@ i915ValidateFragmentProgram(struct i915_context *i915) intel->vertex_attr_count = 0; intel->wpos_offset = 0; - intel->wpos_size = 0; intel->coloroffset = 0; intel->specoffset = 0; - if (inputsRead & FRAG_BITS_TEX_ANY) { + if (inputsRead & FRAG_BITS_TEX_ANY || p->wpos_tex != -1) { EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16); } else { @@ -1384,17 +1395,15 @@ i915ValidateFragmentProgram(struct i915_context *i915) EMIT_ATTR(_TNL_ATTRIB_GENERIC0 + i, EMIT_SZ(sz), 0, sz * 4); } else if (i == p->wpos_tex) { - + int wpos_size = 4 * sizeof(float); /* If WPOS is required, duplicate the XYZ position data in an * unused texture coordinate: */ s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK); - s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(3)); + s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(wpos_size)); intel->wpos_offset = offset; - intel->wpos_size = 3 * sizeof(GLuint); - - EMIT_PAD(intel->wpos_size); + EMIT_PAD(wpos_size); } } diff --git a/src/mesa/drivers/dri/i915/i915_program.c b/src/mesa/drivers/dri/i915/i915_program.c index 507adf1d3dc..0a600d30bef 100644 --- a/src/mesa/drivers/dri/i915/i915_program.c +++ b/src/mesa/drivers/dri/i915/i915_program.c @@ -442,14 +442,16 @@ i915_emit_param4fv(struct i915_fragment_program * p, const GLfloat * values) void i915_program_error(struct i915_fragment_program *p, const char *fmt, ...) { - va_list args; + if (unlikely((INTEL_DEBUG & (DEBUG_WM | DEBUG_FALLBACKS)) != 0)) { + va_list args; - fprintf(stderr, "i915_program_error: "); - va_start(args, fmt); - vfprintf(stderr, fmt, args); - va_end(args); + fprintf(stderr, "i915_program_error: "); + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); - fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + } p->error = 1; } @@ -538,7 +540,6 @@ i915_upload_program(struct i915_context *i915, { GLuint program_size = p->csr - p->program; GLuint decl_size = p->decl - p->declarations; - GLuint nr; if (p->error) return; @@ -555,33 +556,33 @@ i915_upload_program(struct i915_context *i915, i915->state.ProgramSize = decl_size + program_size; } - nr = p->nr_constants; - if (i915->state.ConstantSize != 2 + nr*4 || - memcmp(i915->state.Constant + 2, - p->constant, 4*sizeof(int)*nr)) { - if (nr) { - I915_ACTIVESTATE(i915, I915_UPLOAD_CONSTANTS, 1); - I915_STATECHANGE(i915, I915_UPLOAD_CONSTANTS); - - i915->state.Constant[0] = _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4); - i915->state.Constant[1] = (1 << nr) -1; - - memcpy(&i915->state.Constant[2], p->constant, 4 * sizeof(int) * nr); - i915->state.ConstantSize = 2 + nr * 4; - - if (0) { - GLuint i; - for (i = 0; i < nr; i++) { - fprintf(stderr, "const[%d]: %f %f %f %f\n", i, - p->constant[i][0], - p->constant[i][1], p->constant[i][2], p->constant[i][3]); - } - } - } - else { - I915_ACTIVESTATE(i915, I915_UPLOAD_CONSTANTS, 0); + /* Always seemed to get a failure if I used memcmp() to + * shortcircuit this state upload. Needs further investigation? + */ + if (p->nr_constants) { + GLuint nr = p->nr_constants; + + I915_ACTIVESTATE(i915, I915_UPLOAD_CONSTANTS, 1); + I915_STATECHANGE(i915, I915_UPLOAD_CONSTANTS); + + i915->state.Constant[0] = _3DSTATE_PIXEL_SHADER_CONSTANTS | ((nr) * 4); + i915->state.Constant[1] = (1 << (nr - 1)) | ((1 << (nr - 1)) - 1); + + memcpy(&i915->state.Constant[2], p->constant, 4 * sizeof(int) * (nr)); + i915->state.ConstantSize = 2 + (nr) * 4; + + if (0) { + GLuint i; + for (i = 0; i < nr; i++) { + fprintf(stderr, "const[%d]: %f %f %f %f\n", i, + p->constant[i][0], + p->constant[i][1], p->constant[i][2], p->constant[i][3]); + } } } + else { + I915_ACTIVESTATE(i915, I915_UPLOAD_CONSTANTS, 0); + } p->on_hardware = 1; } diff --git a/src/mesa/drivers/dri/i915/i915_state.c b/src/mesa/drivers/dri/i915/i915_state.c index 99212ad4fd1..2b35ed7105a 100644 --- a/src/mesa/drivers/dri/i915/i915_state.c +++ b/src/mesa/drivers/dri/i915/i915_state.c @@ -375,6 +375,9 @@ i915DepthMask(struct gl_context * ctx, GLboolean flag) DBG("%s flag (%d)\n", __FUNCTION__, flag); + if (!ctx->DrawBuffer || !ctx->DrawBuffer->Visual.depthBits) + flag = false; + dw = i915->state.Ctx[I915_CTXREG_LIS6]; if (flag && ctx->Depth.Test) dw |= S6_DEPTH_WRITE_ENABLE; @@ -398,31 +401,26 @@ void intelCalcViewport(struct gl_context * ctx) { struct intel_context *intel = intel_context(ctx); - const GLfloat *v = ctx->Viewport._WindowMap.m; - const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF; - GLfloat *m = intel->ViewportMatrix.m; - GLfloat yScale, yBias; - - if (ctx->DrawBuffer->Name) { - /* User created FBO */ - /* y=0=bottom */ - yScale = 1.0; - yBias = 0.0; - } - else { - /* window buffer, y=0=top */ - yScale = -1.0; - yBias = ctx->DrawBuffer->Height; - } - - m[MAT_SX] = v[MAT_SX]; - m[MAT_TX] = v[MAT_TX]; - m[MAT_SY] = v[MAT_SY] * yScale; - m[MAT_TY] = v[MAT_TY] * yScale + yBias; - - m[MAT_SZ] = v[MAT_SZ] * depthScale; - m[MAT_TZ] = v[MAT_TZ] * depthScale; + if (ctx->DrawBuffer->Name == 0) { + _math_matrix_viewport(&intel->ViewportMatrix, + ctx->Viewport.X, + ctx->DrawBuffer->Height - ctx->Viewport.Y, + ctx->Viewport.Width, + -ctx->Viewport.Height, + ctx->Viewport.Near, + ctx->Viewport.Far, + 1.0); + } else { + _math_matrix_viewport(&intel->ViewportMatrix, + ctx->Viewport.X, + ctx->Viewport.Y, + ctx->Viewport.Width, + ctx->Viewport.Height, + ctx->Viewport.Near, + ctx->Viewport.Far, + 1.0); + } } @@ -797,6 +795,10 @@ i915Enable(struct gl_context * ctx, GLenum cap, GLboolean state) case GL_DEPTH_TEST: dw = i915->state.Ctx[I915_CTXREG_LIS6]; + + if (!ctx->DrawBuffer || !ctx->DrawBuffer->Visual.depthBits) + state = false; + if (state) dw |= S6_DEPTH_TEST_ENABLE; else @@ -836,27 +838,17 @@ i915Enable(struct gl_context * ctx, GLenum cap, GLboolean state) break; case GL_STENCIL_TEST: - { - GLboolean hw_stencil = GL_FALSE; - if (ctx->DrawBuffer) { - struct intel_renderbuffer *irbStencil - = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL); - hw_stencil = (irbStencil && irbStencil->region); - } - if (hw_stencil) { - dw = i915->state.Ctx[I915_CTXREG_LIS5]; - if (state) - dw |= (S5_STENCIL_TEST_ENABLE | S5_STENCIL_WRITE_ENABLE); - else - dw &= ~(S5_STENCIL_TEST_ENABLE | S5_STENCIL_WRITE_ENABLE); - if (dw != i915->state.Ctx[I915_CTXREG_LIS5]) { - i915->state.Ctx[I915_CTXREG_LIS5] = dw; - I915_STATECHANGE(i915, I915_UPLOAD_CTX); - } - } - else { - FALLBACK(&i915->intel, I915_FALLBACK_STENCIL, state); - } + if (!ctx->DrawBuffer || !ctx->DrawBuffer->Visual.stencilBits) + state = false; + + dw = i915->state.Ctx[I915_CTXREG_LIS5]; + if (state) + dw |= (S5_STENCIL_TEST_ENABLE | S5_STENCIL_WRITE_ENABLE); + else + dw &= ~(S5_STENCIL_TEST_ENABLE | S5_STENCIL_WRITE_ENABLE); + if (dw != i915->state.Ctx[I915_CTXREG_LIS5]) { + i915->state.Ctx[I915_CTXREG_LIS5] = dw; + I915_STATECHANGE(i915, I915_UPLOAD_CTX); } break; diff --git a/src/mesa/drivers/dri/i915/i915_tex_layout.c b/src/mesa/drivers/dri/i915/i915_tex_layout.c index 6e4512129cd..e6a47116223 100644 --- a/src/mesa/drivers/dri/i915/i915_tex_layout.c +++ b/src/mesa/drivers/dri/i915/i915_tex_layout.c @@ -219,9 +219,9 @@ i915_miptree_layout_2d(struct intel_context *intel, width, height, 1); if (mt->compressed) - img_height = MAX2(1, height / 4); + img_height = ALIGN(height, 4) / 4; else - img_height = (MAX2(2, height) + 1) & ~1; + img_height = ALIGN(height, 2); mt->total_height += img_height; diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c index bcf42d59969..7cd6820cd51 100644 --- a/src/mesa/drivers/dri/i915/i915_texstate.c +++ b/src/mesa/drivers/dri/i915/i915_texstate.c @@ -82,6 +82,7 @@ translate_texture_format(gl_format mesa_format, GLenum DepthMode) case MESA_FORMAT_RGBA_DXT5: return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); case MESA_FORMAT_S8_Z24: + case MESA_FORMAT_X8_Z24: if (DepthMode == GL_ALPHA) return (MAPSURF_32BIT | MT_32BIT_x8A24); else if (DepthMode == GL_INTENSITY) @@ -89,7 +90,8 @@ translate_texture_format(gl_format mesa_format, GLenum DepthMode) else return (MAPSURF_32BIT | MT_32BIT_x8L24); default: - fprintf(stderr, "%s: bad image format %x\n", __FUNCTION__, mesa_format); + fprintf(stderr, "%s: bad image format %s\n", __FUNCTION__, + _mesa_get_format_name(mesa_format)); abort(); return 0; } diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index baff49bb2f5..495426aa6d8 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -32,14 +32,19 @@ #include "main/imports.h" #include "main/macros.h" #include "main/colormac.h" +#include "main/renderbuffer.h" +#include "main/framebuffer.h" +#include "tnl/tnl.h" #include "tnl/t_context.h" #include "tnl/t_vertex.h" +#include "swrast_setup/swrast_setup.h" #include "intel_batchbuffer.h" #include "intel_regions.h" #include "intel_tris.h" #include "intel_fbo.h" +#include "intel_buffers.h" #include "i915_reg.h" #include "i915_context.h" @@ -313,7 +318,8 @@ i915_emit_state(struct intel_context *intel) aper_array[aper_count++] = intel->batch.bo; if (dirty & I915_UPLOAD_BUFFERS) { - aper_array[aper_count++] = state->draw_region->buffer; + if (state->draw_region) + aper_array[aper_count++] = state->draw_region->buffer; if (state->depth_region) aper_array[aper_count++] = state->depth_region->buffer; } @@ -383,23 +389,27 @@ i915_emit_state(struct intel_context *intel) if (INTEL_DEBUG & DEBUG_STATE) fprintf(stderr, "I915_UPLOAD_BUFFERS:\n"); - count = 14; + count = 17; if (state->Buffer[I915_DESTREG_DRAWRECT0] != MI_NOOP) count++; - if (state->depth_region) - count += 3; BEGIN_BATCH(count); OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR0]); OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR1]); - OUT_RELOC(state->draw_region->buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); + if (state->draw_region) { + OUT_RELOC(state->draw_region->buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); + } else { + OUT_BATCH(0); + } + OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR0]); + OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR1]); if (state->depth_region) { - OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR0]); - OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR1]); OUT_RELOC(state->depth_region->buffer, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); + } else { + OUT_BATCH(0); } OUT_BATCH(state->Buffer[I915_DESTREG_DV0]); @@ -527,6 +537,12 @@ i915_set_buf_info_for_region(uint32_t *state, struct intel_region *region, if (region->tiling == I915_TILING_Y) state[1] |= BUF_3D_TILE_WALK_Y; } + } else { + /* Fill in a default pitch, since 0 is invalid. We'll be + * setting the buffer offset to 0 and not referencing the + * buffer, so the pitch could really be any valid value. + */ + state[1] |= BUF_3D_PITCH(4096); } } @@ -568,11 +584,9 @@ i915_set_draw_region(struct intel_context *intel, uint32_t draw_x, draw_y, draw_offset; if (state->draw_region != color_regions[0]) { - intel_region_release(&state->draw_region); intel_region_reference(&state->draw_region, color_regions[0]); } if (state->depth_region != depth_region) { - intel_region_release(&state->depth_region); intel_region_reference(&state->depth_region, depth_region); } @@ -593,6 +607,8 @@ i915_set_draw_region(struct intel_context *intel, LOD_PRECLAMP_OGL | TEX_DEFAULT_COLOR_OGL); if (irb != NULL) { value |= i915_render_target_format_for_mesa_format[irb->Base.Format]; + } else { + value |= DV_PF_8888; } /* This isn't quite safe, thus being hidden behind an option. When changing @@ -667,7 +683,142 @@ i915_set_draw_region(struct intel_context *intel, I915_STATECHANGE(i915, I915_UPLOAD_BUFFERS); } +static void +i915_update_color_write_enable(struct i915_context *i915, bool enable) +{ + uint32_t dw = i915->state.Ctx[I915_CTXREG_LIS6]; + if (enable) + dw |= S6_COLOR_WRITE_ENABLE; + else + dw &= ~S6_COLOR_WRITE_ENABLE; + if (dw != i915->state.Ctx[I915_CTXREG_LIS6]) { + I915_STATECHANGE(i915, I915_UPLOAD_CTX); + i915->state.Ctx[I915_CTXREG_LIS6] = dw; + } +} +/** + * Update the hardware state for drawing into a window or framebuffer object. + * + * Called by glDrawBuffer, glBindFramebufferEXT, MakeCurrent, and other + * places within the driver. + * + * Basically, this needs to be called any time the current framebuffer + * changes, the renderbuffers change, or we need to draw into different + * color buffers. + */ +static void +i915_update_draw_buffer(struct intel_context *intel) +{ + struct i915_context *i915 = (struct i915_context *)intel; + struct gl_context *ctx = &intel->ctx; + struct gl_framebuffer *fb = ctx->DrawBuffer; + struct intel_region *colorRegion = NULL, *depthRegion = NULL; + struct intel_renderbuffer *irbDepth = NULL, *irbStencil = NULL; + bool fb_has_hiz = intel_framebuffer_has_hiz(fb); + + if (!fb) { + /* this can happen during the initial context initialization */ + return; + } + + irbDepth = intel_get_renderbuffer(fb, BUFFER_DEPTH); + irbStencil = intel_get_renderbuffer(fb, BUFFER_STENCIL); + + /* Do this here, not core Mesa, since this function is called from + * many places within the driver. + */ + if (ctx->NewState & _NEW_BUFFERS) { + /* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */ + _mesa_update_framebuffer(ctx); + /* this updates the DrawBuffer's Width/Height if it's a FBO */ + _mesa_update_draw_buffer_bounds(ctx); + } + + if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) { + /* this may occur when we're called by glBindFrameBuffer() during + * the process of someone setting up renderbuffers, etc. + */ + /*_mesa_debug(ctx, "DrawBuffer: incomplete user FBO\n");*/ + return; + } + + /* How many color buffers are we drawing into? + * + * If there is more than one drawbuffer (GL_FRONT_AND_BACK), or the + * drawbuffers are too big, we have to fallback to software. + */ + if ((fb->Width > ctx->Const.MaxRenderbufferSize) + || (fb->Height > ctx->Const.MaxRenderbufferSize)) { + FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, true); + } else if (fb->_NumColorDrawBuffers > 1) { + FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, true); + } else { + struct intel_renderbuffer *irb; + irb = intel_renderbuffer(fb->_ColorDrawBuffers[0]); + colorRegion = irb ? irb->region : NULL; + FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, false); + } + + /* Check for depth fallback. */ + if (irbDepth && irbDepth->region) { + assert(!fb_has_hiz || irbDepth->Base.Format != MESA_FORMAT_S8_Z24); + FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, GL_FALSE); + depthRegion = irbDepth->region; + } else if (irbDepth && !irbDepth->region) { + FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, GL_TRUE); + depthRegion = NULL; + } else { /* !irbDepth */ + /* No fallback is needed because there is no depth buffer. */ + FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, GL_FALSE); + depthRegion = NULL; + } + + /* Check for stencil fallback. */ + if (irbStencil && irbStencil->region) { + assert(irbStencil->Base.Format == MESA_FORMAT_S8_Z24); + FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE); + } else if (irbStencil && !irbStencil->region) { + FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_TRUE); + } else { /* !irbStencil */ + /* No fallback is needed because there is no stencil buffer. */ + FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE); + } + + /* If we have a (packed) stencil buffer attached but no depth buffer, + * we still need to set up the shared depth/stencil state so we can use it. + */ + if (depthRegion == NULL && irbStencil && irbStencil->region + && irbStencil->Base.Format == MESA_FORMAT_S8_Z24) { + depthRegion = irbStencil->region; + } + + /* + * Update depth and stencil test state + */ + ctx->Driver.Enable(ctx, GL_DEPTH_TEST, ctx->Depth.Test); + ctx->Driver.Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled); + + i915_update_color_write_enable(i915, colorRegion != NULL); + + intel->vtbl.set_draw_region(intel, &colorRegion, depthRegion, + fb->_NumColorDrawBuffers); + intel->NewGLState |= _NEW_BUFFERS; + + /* update viewport since it depends on window size */ + intelCalcViewport(ctx); + + /* Set state we know depends on drawable parameters: + */ + ctx->Driver.Scissor(ctx, ctx->Scissor.X, ctx->Scissor.Y, + ctx->Scissor.Width, ctx->Scissor.Height); + ctx->Driver.DepthRange(ctx, ctx->Viewport.Near, ctx->Viewport.Far); + + /* Update culling direction which changes depending on the + * orientation of the buffer: + */ + ctx->Driver.FrontFace(ctx, ctx->Polygon.FrontFace); +} static void i915_new_batch(struct intel_context *intel) @@ -703,6 +854,16 @@ i915_is_hiz_depth_format(struct intel_context *intel, return false; } +static void +i915_invalidate_state(struct intel_context *intel, GLuint new_state) +{ + struct gl_context *ctx = &intel->ctx; + + _swsetup_InvalidateState(ctx, new_state); + _tnl_InvalidateState(ctx, new_state); + _tnl_invalidate_vertex_state(ctx, new_state); +} + void i915InitVtbl(struct i915_context *i915) { @@ -714,9 +875,11 @@ i915InitVtbl(struct i915_context *i915) i915->intel.vtbl.render_start = i915_render_start; i915->intel.vtbl.render_prevalidate = i915_render_prevalidate; i915->intel.vtbl.set_draw_region = i915_set_draw_region; + i915->intel.vtbl.update_draw_buffer = i915_update_draw_buffer; i915->intel.vtbl.update_texture_state = i915UpdateTextureState; i915->intel.vtbl.assert_not_dirty = i915_assert_not_dirty; i915->intel.vtbl.finish_batch = intel_finish_vb; + i915->intel.vtbl.invalidate_state = i915_invalidate_state; i915->intel.vtbl.render_target_supported = i915_render_target_supported; i915->intel.vtbl.is_hiz_depth_format = i915_is_hiz_depth_format; } diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index 7bcb72f42d0..a5eab0720ae 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -484,28 +484,33 @@ intel_atten_point(struct intel_context *intel, intelVertexPtr v0) * Fixup for I915 WPOS texture coordinate * ***********************************************************************/ +static void +intel_emit_fragcoord(struct intel_context *intel, intelVertexPtr v) +{ + struct gl_context *ctx = &intel->ctx; + struct gl_framebuffer *fb = ctx->DrawBuffer; + GLuint offset = intel->wpos_offset; + float *vertex_position = (float *)v; + float *fragcoord = (float *)((char *)v + offset); + + fragcoord[0] = vertex_position[0]; + + if (fb->Name) + fragcoord[1] = vertex_position[1]; + else + fragcoord[1] = fb->Height - vertex_position[1]; + fragcoord[2] = vertex_position[2]; + fragcoord[3] = vertex_position[3]; +} static void intel_wpos_triangle(struct intel_context *intel, intelVertexPtr v0, intelVertexPtr v1, intelVertexPtr v2) { - const struct gl_framebuffer *fb = intel->ctx.DrawBuffer; - GLuint offset = intel->wpos_offset; - GLuint size = intel->wpos_size; - GLfloat *v0_wpos = (GLfloat *)((char *)v0 + offset); - GLfloat *v1_wpos = (GLfloat *)((char *)v1 + offset); - GLfloat *v2_wpos = (GLfloat *)((char *)v2 + offset); - - __memcpy(v0_wpos, v0, size); - __memcpy(v1_wpos, v1, size); - __memcpy(v2_wpos, v2, size); - - if (!fb->Name) { - v0_wpos[1] = -v0_wpos[1] + fb->Height; - v1_wpos[1] = -v1_wpos[1] + fb->Height; - v2_wpos[1] = -v2_wpos[1] + fb->Height; - } + intel_emit_fragcoord(intel, v0); + intel_emit_fragcoord(intel, v1); + intel_emit_fragcoord(intel, v2); intel_draw_triangle(intel, v0, v1, v2); } @@ -515,20 +520,8 @@ static void intel_wpos_line(struct intel_context *intel, intelVertexPtr v0, intelVertexPtr v1) { - const struct gl_framebuffer *fb = intel->ctx.DrawBuffer; - GLuint offset = intel->wpos_offset; - GLuint size = intel->wpos_size; - GLfloat *v0_wpos = (GLfloat *)((char *)v0 + offset); - GLfloat *v1_wpos = (GLfloat *)((char *)v1 + offset); - - __memcpy(v0_wpos, v0, size); - __memcpy(v1_wpos, v1, size); - - if (!fb->Name) { - v0_wpos[1] = -v0_wpos[1] + fb->Height; - v1_wpos[1] = -v1_wpos[1] + fb->Height; - } - + intel_emit_fragcoord(intel, v0); + intel_emit_fragcoord(intel, v1); intel_draw_line(intel, v0, v1); } @@ -536,16 +529,7 @@ intel_wpos_line(struct intel_context *intel, static void intel_wpos_point(struct intel_context *intel, intelVertexPtr v0) { - const struct gl_framebuffer *fb = intel->ctx.DrawBuffer; - GLuint offset = intel->wpos_offset; - GLuint size = intel->wpos_size; - GLfloat *v0_wpos = (GLfloat *)((char *)v0 + offset); - - __memcpy(v0_wpos, v0, size); - - if (!fb->Name) - v0_wpos[1] = -v0_wpos[1] + fb->Height; - + intel_emit_fragcoord(intel, v0); intel_draw_point(intel, v0); } @@ -1078,6 +1062,13 @@ intelRunPipeline(struct gl_context * ctx) if (ctx->NewState) _mesa_update_state_locked(ctx); + /* We need to get this done before we start the pipeline, or a + * change in the INTEL_FALLBACK() of its intel_draw_buffers() call + * while the pipeline is running will result in mismatched swrast + * map/unmaps, and later assertion failures. + */ + intel_prepare_render(intel); + if (intel->NewGLState) { if (intel->NewGLState & _NEW_TEXTURE) { intel->vtbl.update_texture_state(intel); @@ -1092,7 +1083,9 @@ intelRunPipeline(struct gl_context * ctx) } intel_map_vertex_shader_textures(ctx); + intel->tnl_pipeline_running = true; _tnl_run_pipeline(ctx); + intel->tnl_pipeline_running = false; intel_unmap_vertex_shader_textures(ctx); _mesa_unlock_context_textures(ctx); @@ -1228,6 +1221,8 @@ intelFallback(struct intel_context *intel, GLbitfield bit, GLboolean mode) if (mode) { intel->Fallback |= bit; if (oldfallback == 0) { + assert(!intel->tnl_pipeline_running); + intel_flush(ctx); if (INTEL_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "ENTER FALLBACK %x: %s\n", @@ -1239,6 +1234,8 @@ intelFallback(struct intel_context *intel, GLbitfield bit, GLboolean mode) else { intel->Fallback &= ~bit; if (oldfallback == bit) { + assert(!intel->tnl_pipeline_running); + _swrast_flush(ctx); if (INTEL_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "LEAVE FALLBACK %s\n", getFallbackString(bit)); diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index 94b8c20b019..9c26150d241 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -43,7 +43,8 @@ prepare_cc_vp(struct brw_context *brw) struct gl_context *ctx = &brw->intel.ctx; struct brw_cc_viewport *ccv; - ccv = brw_state_batch(brw, sizeof(*ccv), 32, &brw->cc.vp_offset); + ccv = brw_state_batch(brw, AUB_TRACE_CC_VP_STATE, + sizeof(*ccv), 32, &brw->cc.vp_offset); /* _NEW_TRANSOFORM */ if (ctx->Transform.DepthClamp) { @@ -98,7 +99,8 @@ static void upload_cc_unit(struct brw_context *brw) struct gl_context *ctx = &brw->intel.ctx; struct brw_cc_unit_state *cc; - cc = brw_state_batch(brw, sizeof(*cc), 64, &brw->cc.state_offset); + cc = brw_state_batch(brw, AUB_TRACE_CC_STATE, + sizeof(*cc), 64, &brw->cc.state_offset); memset(cc, 0, sizeof(*cc)); /* _NEW_STENCIL */ diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index c7d428ba48d..d82206bae52 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -146,15 +146,12 @@ static void compile_clip_prog( struct brw_context *brw, printf("\n"); } - /* Upload - */ - drm_intel_bo_unreference(brw->clip.prog_bo); - brw->clip.prog_bo = brw_upload_cache(&brw->cache, - BRW_CLIP_PROG, - &c.key, sizeof(c.key), - program, program_size, - &c.prog_data, sizeof(c.prog_data), - &brw->clip.prog_data); + brw_upload_cache(&brw->cache, + BRW_CLIP_PROG, + &c.key, sizeof(c.key), + program, program_size, + &c.prog_data, sizeof(c.prog_data), + &brw->clip.prog_offset, &brw->clip.prog_data); ralloc_free(mem_ctx); } @@ -271,12 +268,11 @@ static void upload_clip_prog(struct brw_context *brw) } } - drm_intel_bo_unreference(brw->clip.prog_bo); - brw->clip.prog_bo = brw_search_cache(&brw->cache, BRW_CLIP_PROG, - &key, sizeof(key), - &brw->clip.prog_data); - if (brw->clip.prog_bo == NULL) + if (!brw_search_cache(&brw->cache, BRW_CLIP_PROG, + &key, sizeof(key), + &brw->clip.prog_offset, &brw->clip.prog_data)) { compile_clip_prog( brw, &key ); + } } diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index 6015c8cbe9f..31fbadf5ef2 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -40,14 +40,19 @@ brw_prepare_clip_unit(struct brw_context *brw) struct gl_context *ctx = &intel->ctx; struct brw_clip_unit_state *clip; - clip = brw_state_batch(brw, sizeof(*clip), 32, &brw->clip.state_offset); + clip = brw_state_batch(brw, AUB_TRACE_CLIP_STATE, + sizeof(*clip), 32, &brw->clip.state_offset); memset(clip, 0, sizeof(*clip)); - /* CACHE_NEW_CLIP_PROG */ + /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_CLIP_PROG */ clip->thread0.grf_reg_count = (ALIGN(brw->clip.prog_data->total_grf, 16) / 16 - 1); - /* reloc */ - clip->thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6; + clip->thread0.kernel_start_pointer = + brw_program_reloc(brw, + brw->clip.state_offset + + offsetof(struct brw_clip_unit_state, thread0), + brw->clip.prog_offset + + (clip->thread0.grf_reg_count << 1)) >> 6; clip->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; clip->thread1.single_program_flow = 1; @@ -110,14 +115,6 @@ brw_prepare_clip_unit(struct brw_context *brw) clip->viewport_ymin = -1; clip->viewport_ymax = 1; - /* Emit clip program relocation */ - assert(brw->clip.prog_bo); - drm_intel_bo_emit_reloc(intel->batch.bo, - (brw->clip.state_offset + - offsetof(struct brw_clip_unit_state, thread0)), - brw->clip.prog_bo, clip->thread0.grf_reg_count << 1, - I915_GEM_DOMAIN_INSTRUCTION, 0); - brw->state.dirty.cache |= CACHE_NEW_CLIP_UNIT; } @@ -125,6 +122,7 @@ const struct brw_tracked_state brw_clip_unit = { .dirty = { .mesa = _NEW_TRANSFORM, .brw = (BRW_NEW_BATCH | + BRW_NEW_PROGRAM_CACHE | BRW_NEW_CURBE_OFFSETS | BRW_NEW_URB_FENCE), .cache = CACHE_NEW_CLIP_PROG diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index d6a99ab06e2..e00e24885ad 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -40,6 +40,7 @@ #include "brw_state.h" #include "intel_span.h" #include "tnl/t_pipeline.h" +#include "../glsl/ralloc.h" /*************************************** * Mesa's Driver Functions @@ -59,7 +60,7 @@ GLboolean brwCreateContext( int api, void *sharedContextPrivate) { struct dd_function_table functions; - struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context); + struct brw_context *brw = rzalloc(NULL, struct brw_context); struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; unsigned i; @@ -165,7 +166,7 @@ GLboolean brwCreateContext( int api, ctx->Const.QuadsFollowProvokingVertexConvention = GL_FALSE; if (intel->is_g4x || intel->gen >= 5) { - brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_GM45; + brw->CMD_VF_STATISTICS = GM45_3DSTATE_VF_STATISTICS; brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45; brw->has_surface_tile_offset = GL_TRUE; if (intel->gen < 6) @@ -173,7 +174,7 @@ GLboolean brwCreateContext( int api, brw->has_aa_line_parameters = GL_TRUE; brw->has_pln = GL_TRUE; } else { - brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_965; + brw->CMD_VF_STATISTICS = GEN4_3DSTATE_VF_STATISTICS; brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965; } @@ -240,6 +241,8 @@ GLboolean brwCreateContext( int api, brw->emit_state_always = 0; + intel->batch.need_workaround_flush = true; + ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 621b6f8990b..471015cf9d0 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -142,7 +142,9 @@ enum brw_state_id { BRW_STATE_NR_VS_SURFACES, BRW_STATE_INDEX_BUFFER, BRW_STATE_VS_CONSTBUF, - BRW_STATE_WM_CONSTBUF + BRW_STATE_WM_CONSTBUF, + BRW_STATE_PROGRAM_CACHE, + BRW_STATE_STATE_BASE_ADDRESS, }; #define BRW_NEW_URB_FENCE (1 << BRW_STATE_URB_FENCE) @@ -172,6 +174,8 @@ enum brw_state_id { #define BRW_NEW_INDEX_BUFFER (1 << BRW_STATE_INDEX_BUFFER) #define BRW_NEW_VS_CONSTBUF (1 << BRW_STATE_VS_CONSTBUF) #define BRW_NEW_WM_CONSTBUF (1 << BRW_STATE_WM_CONSTBUF) +#define BRW_NEW_PROGRAM_CACHE (1 << BRW_STATE_PROGRAM_CACHE) +#define BRW_NEW_STATE_BASE_ADDRESS (1 << BRW_STATE_STATE_BASE_ADDRESS) struct brw_state_flags { /** State update flags signalled by mesa internals */ @@ -184,6 +188,31 @@ struct brw_state_flags { GLuint cache; }; +enum state_struct_type { + AUB_TRACE_VS_STATE = 1, + AUB_TRACE_GS_STATE = 2, + AUB_TRACE_CLIP_STATE = 3, + AUB_TRACE_SF_STATE = 4, + AUB_TRACE_WM_STATE = 5, + AUB_TRACE_CC_STATE = 6, + AUB_TRACE_CLIP_VP_STATE = 7, + AUB_TRACE_SF_VP_STATE = 8, + AUB_TRACE_CC_VP_STATE = 0x9, + AUB_TRACE_SAMPLER_STATE = 0xa, + AUB_TRACE_KERNEL_INSTRUCTIONS = 0xb, + AUB_TRACE_SCRATCH_SPACE = 0xc, + AUB_TRACE_SAMPLER_DEFAULT_COLOR = 0xd, + + AUB_TRACE_SCISSOR_STATE = 0x15, + AUB_TRACE_BLEND_STATE = 0x16, + AUB_TRACE_DEPTH_STENCIL_STATE = 0x17, + + /* Not written to .aub files the same way the structures above are. */ + AUB_TRACE_NO_TYPE = 0x100, + AUB_TRACE_BINDING_TABLE = 0x101, + AUB_TRACE_SURFACE_STATE = 0x102, + AUB_TRACE_VS_CONSTANTS = 0x103, +}; /** Subclass of Mesa vertex program */ struct brw_vertex_program { @@ -363,9 +392,11 @@ struct brw_cache_item { /** 32-bit hash of the key data */ GLuint hash; GLuint key_size; /* for variable-sized keys */ + GLuint aux_size; const void *key; - drm_intel_bo *bo; + uint32_t offset; + uint32_t size; struct brw_cache_item *next; }; @@ -376,14 +407,11 @@ struct brw_cache { struct brw_context *brw; struct brw_cache_item **items; + drm_intel_bo *bo; GLuint size, n_items; - char *name[BRW_MAX_CACHE]; - - /* Record of the last BOs chosen for each cache_id. Used to set - * brw->state.dirty.cache when a new cache item is chosen. - */ - drm_intel_bo *last_bo[BRW_MAX_CACHE]; + uint32_t next_offset; + bool bo_used_by_gpu; }; @@ -634,8 +662,9 @@ struct brw_context struct brw_vs_prog_data *prog_data; int8_t *constant_map; /* variable array following prog_data */ - drm_intel_bo *prog_bo; drm_intel_bo *const_bo; + /** Offset in the program cache to the VS program */ + uint32_t prog_offset; uint32_t state_offset; /** Binding table of pointers to surf_bo entries */ @@ -651,14 +680,16 @@ struct brw_context struct brw_gs_prog_data *prog_data; GLboolean prog_active; + /** Offset in the program cache to the CLIP program pre-gen6 */ + uint32_t prog_offset; uint32_t state_offset; - drm_intel_bo *prog_bo; } gs; struct { struct brw_clip_prog_data *prog_data; - drm_intel_bo *prog_bo; + /** Offset in the program cache to the CLIP program pre-gen6 */ + uint32_t prog_offset; /* Offset in the batch to the CLIP state on pre-gen6. */ uint32_t state_offset; @@ -673,7 +704,8 @@ struct brw_context struct { struct brw_sf_prog_data *prog_data; - drm_intel_bo *prog_bo; + /** Offset in the program cache to the CLIP program pre-gen6 */ + uint32_t prog_offset; uint32_t state_offset; uint32_t vp_offset; } sf; @@ -700,12 +732,14 @@ struct brw_context GLuint sampler_count; uint32_t sampler_offset; + /** Offset in the program cache to the WM program */ + uint32_t prog_offset; + /** Binding table of pointers to surf_bo entries */ uint32_t bind_bo_offset; uint32_t surf_offset[BRW_WM_MAX_SURF]; uint32_t state_offset; /* offset in batchbuffer to pre-gen6 WM state */ - drm_intel_bo *prog_bo; drm_intel_bo *const_bo; /* pull constant buffer. */ /** * This is offset in the batch to the push constants on gen6. @@ -717,9 +751,6 @@ struct brw_context struct { - /* gen4 */ - drm_intel_bo *prog_bo; - uint32_t state_offset; uint32_t blend_state_offset; uint32_t depth_stencil_state_offset; @@ -738,6 +769,14 @@ struct brw_context int num_prepare_atoms, num_emit_atoms; struct brw_tracked_state prepare_atoms[64], emit_atoms[64]; + + /* If (INTEL_DEBUG & DEBUG_BATCH) */ + struct { + uint32_t offset; + uint32_t size; + enum state_struct_type type; + } *state_batch_list; + int state_batch_count; }; @@ -874,6 +913,26 @@ brw_register_blocks(int reg_count) return ALIGN(reg_count, 16) / 16 - 1; } +static inline uint32_t +brw_program_reloc(struct brw_context *brw, uint32_t state_offset, + uint32_t prog_offset) +{ + struct intel_context *intel = &brw->intel; + + if (intel->gen >= 5) { + /* Using state base address. */ + return prog_offset; + } + + drm_intel_bo_emit_reloc(intel->batch.bo, + state_offset, + brw->cache.bo, + prog_offset, + I915_GEM_DOMAIN_INSTRUCTION, 0); + + return brw->cache.bo->offset + prog_offset; +} + GLboolean brw_do_cubemap_normalize(struct exec_list *instructions); #endif diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 6d41b1e69d3..0a3027d04ad 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -867,7 +867,7 @@ #define CMD_CONST_BUFFER 0x6002 #define CMD_STATE_BASE_ADDRESS 0x6101 -#define CMD_STATE_INSN_POINTER 0x6102 +#define CMD_STATE_SIP 0x6102 #define CMD_PIPELINE_SELECT_965 0x6104 #define CMD_PIPELINE_SELECT_GM45 0x6904 @@ -895,7 +895,7 @@ #define _3DSTATE_SAMPLER_STATE_POINTERS_GS 0x782E /* GEN7+ */ #define _3DSTATE_SAMPLER_STATE_POINTERS_PS 0x782F /* GEN7+ */ -#define CMD_VERTEX_BUFFER 0x7808 +#define _3DSTATE_VERTEX_BUFFERS 0x7808 # define BRW_VB0_INDEX_SHIFT 27 # define GEN6_VB0_INDEX_SHIFT 26 # define BRW_VB0_ACCESS_VERTEXDATA (0 << 26) @@ -905,7 +905,7 @@ # define GEN7_VB0_ADDRESS_MODIFYENABLE (1 << 14) # define BRW_VB0_PITCH_SHIFT 0 -#define CMD_VERTEX_ELEMENT 0x7809 +#define _3DSTATE_VERTEX_ELEMENTS 0x7809 # define BRW_VE0_INDEX_SHIFT 27 # define GEN6_VE0_INDEX_SHIFT 26 # define BRW_VE0_FORMAT_SHIFT 16 @@ -927,8 +927,8 @@ # define BRW_VE1_DST_OFFSET_SHIFT 0 #define CMD_INDEX_BUFFER 0x780a -#define CMD_VF_STATISTICS_965 0x780b -#define CMD_VF_STATISTICS_GM45 0x680b +#define GEN4_3DSTATE_VF_STATISTICS 0x780b +#define GM45_3DSTATE_VF_STATISTICS 0x680b #define _3DSTATE_CC_STATE_POINTERS 0x780e /* GEN6+ */ #define _3DSTATE_BLEND_STATE_POINTERS 0x7824 /* GEN7+ */ #define _3DSTATE_DEPTH_STENCIL_STATE_POINTERS 0x7825 /* GEN7+ */ diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 6144f0a2bce..bdb5b672899 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -177,6 +177,8 @@ static void brw_emit_prim(struct brw_context *brw, OUT_BATCH(base_vertex_location); ADVANCE_BATCH(); + intel->batch.need_workaround_flush = true; + if (intel->always_flush_cache) { intel_batchbuffer_emit_mi_flush(intel); } @@ -434,6 +436,7 @@ void brw_draw_prims( struct gl_context *ctx, */ if (!retval) { _swsetup_Wakeup(ctx); + _tnl_wakeup(ctx); _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); } diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index c6e53951069..56a46ced6e3 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -278,6 +278,7 @@ static void brw_prepare_vertices(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); + /* CACHE_NEW_VS_PROG */ GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; const unsigned char *ptr = NULL; GLuint interleaved = 0, total_size = 0; @@ -499,6 +500,8 @@ static void brw_prepare_vertices(struct brw_context *brw) break; d = brw->vb.buffers[i].offset - brw->vb.current_buffers[i].offset; + if (d < 0) + break; if (i == 0) delta = d / brw->vb.current_buffers[i].stride; if (delta * brw->vb.current_buffers[i].stride != d) @@ -539,7 +542,7 @@ static void brw_emit_vertices(struct brw_context *brw) */ if (brw->vb.nr_enabled == 0) { BEGIN_BATCH(3); - OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1); + OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | 1); if (intel->gen >= 6) { OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | @@ -564,7 +567,7 @@ static void brw_emit_vertices(struct brw_context *brw) if (brw->vb.nr_buffers) { BEGIN_BATCH(1 + 4*brw->vb.nr_buffers); - OUT_BATCH((CMD_VERTEX_BUFFER << 16) | (4*brw->vb.nr_buffers - 1)); + OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4*brw->vb.nr_buffers - 1)); for (i = 0; i < brw->vb.nr_buffers; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; uint32_t dw0; @@ -595,7 +598,7 @@ static void brw_emit_vertices(struct brw_context *brw) } BEGIN_BATCH(1 + brw->vb.nr_enabled * 2); - OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | (2*brw->vb.nr_enabled - 1)); + OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2*brw->vb.nr_enabled - 1)); for (i = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; uint32_t format = get_surface_type(input->glarray->Type, @@ -646,7 +649,7 @@ const struct brw_tracked_state brw_vertices = { .dirty = { .mesa = 0, .brw = BRW_NEW_BATCH | BRW_NEW_VERTICES, - .cache = 0, + .cache = CACHE_NEW_VS_PROG, }, .prepare = brw_prepare_vertices, .emit = brw_emit_vertices, diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 31f76f8c939..02041b3bc03 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -463,9 +463,21 @@ fs_visitor::emit_general_interpolation(ir_variable *ir) } else { /* Perspective interpolation case. */ for (unsigned int k = 0; k < type->vector_elements; k++) { - struct brw_reg interp = interp_reg(location, k); - emit(FS_OPCODE_LINTERP, attr, - this->delta_x, this->delta_y, fs_reg(interp)); + /* FINISHME: At some point we probably want to push + * this farther by giving similar treatment to the + * other potentially constant components of the + * attribute, as well as making brw_vs_constval.c + * handle varyings other than gl_TexCoord. + */ + if (location >= FRAG_ATTRIB_TEX0 && + location <= FRAG_ATTRIB_TEX7 && + k == 3 && !(c->key.proj_attrib_mask & (1 << location))) { + emit(BRW_OPCODE_MOV, attr, fs_reg(1.0f)); + } else { + struct brw_reg interp = interp_reg(location, k); + emit(FS_OPCODE_LINTERP, attr, + this->delta_x, this->delta_y, fs_reg(interp)); + } attr.reg_offset++; } @@ -621,8 +633,8 @@ fs_visitor::assign_curb_setup() } /* Map the offsets in the UNIFORM file to fixed HW regs. */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == UNIFORM) { @@ -684,8 +696,8 @@ fs_visitor::assign_urb_setup() /* Offset all the urb_setup[] index by the actual position of the * setup regs, now that the location of the constants has been chosen. */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->opcode == FS_OPCODE_LINTERP) { assert(inst->src[2].file == FIXED_HW_REG); @@ -739,8 +751,8 @@ fs_visitor::split_virtual_grfs() split_grf[this->delta_x.reg] = false; } - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; /* Texturing produces 4 contiguous registers, so no splitting. */ if (inst->is_tex()) { @@ -763,8 +775,8 @@ fs_visitor::split_virtual_grfs() } } - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->dst.file == GRF && split_grf[inst->dst.reg] && @@ -815,8 +827,8 @@ fs_visitor::setup_pull_constants() int pull_uniform_base = max_uniform_components; int pull_uniform_count = c->prog_data.nr_params - pull_uniform_base; - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; for (int i = 0; i < 3; i++) { if (inst->src[i].file != UNIFORM) @@ -871,8 +883,8 @@ fs_visitor::calculate_live_intervals() } int ip = 0; - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->opcode == BRW_OPCODE_DO) { if (loop_depth++ == 0) @@ -945,8 +957,8 @@ fs_visitor::propagate_constants() calculate_live_intervals(); - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->opcode != BRW_OPCODE_MOV || inst->predicated || @@ -965,11 +977,9 @@ fs_visitor::propagate_constants() /* Found a move of a constant to a GRF. Find anything else using the GRF * before it's written, and replace it with the constant if we can. */ - exec_list_iterator scan_iter = iter; - scan_iter.next(); - for (; scan_iter.has_next(); scan_iter.next()) { - fs_inst *scan_inst = (fs_inst *)scan_iter.get(); - + for (fs_inst *scan_inst = (fs_inst *)inst->next; + !scan_inst->is_tail_sentinel(); + scan_inst = (fs_inst *)scan_inst->next) { if (scan_inst->opcode == BRW_OPCODE_DO || scan_inst->opcode == BRW_OPCODE_WHILE || scan_inst->opcode == BRW_OPCODE_ELSE || @@ -1046,6 +1056,21 @@ fs_visitor::propagate_constants() progress = true; } break; + + case FS_OPCODE_RCP: + /* The hardware doesn't do math on immediate values + * (because why are you doing that, seriously?), but + * the correct answer is to just constant fold it + * anyway. + */ + assert(i == 0); + if (inst->src[0].imm.f != 0.0f) { + scan_inst->opcode = BRW_OPCODE_MOV; + scan_inst->src[0] = inst->src[0]; + scan_inst->src[0].imm.f = 1.0f / scan_inst->src[0].imm.f; + progress = true; + } + break; } } @@ -1063,6 +1088,47 @@ fs_visitor::propagate_constants() return progress; } + + +/** + * Attempts to move immediate constants into the immediate + * constant slot of following instructions. + * + * Immediate constants are a bit tricky -- they have to be in the last + * operand slot, you can't do abs/negate on them, + */ + +bool +fs_visitor::opt_algebraic() +{ + bool progress = false; + + calculate_live_intervals(); + + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; + + switch (inst->opcode) { + case BRW_OPCODE_MUL: + if (inst->src[1].file != IMM) + continue; + + /* a * 1.0 = a */ + if (inst->src[1].type == BRW_REGISTER_TYPE_F && + inst->src[1].imm.f == 1.0) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[1] = reg_undef; + progress = true; + break; + } + + break; + } + } + + return progress; +} + /** * Must be called after calculate_live_intervales() to remove unused * writes to registers -- register allocation will fail otherwise @@ -1077,8 +1143,8 @@ fs_visitor::dead_code_eliminate() calculate_live_intervals(); - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list_safe(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) { inst->remove(); @@ -1101,8 +1167,8 @@ fs_visitor::register_coalesce() int if_depth = 0; int loop_depth = 0; - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list_safe(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; /* Make sure that we dominate the instructions we're going to * scan for interfering with our coalescing, or we won't have @@ -1130,7 +1196,8 @@ fs_visitor::register_coalesce() if (inst->opcode != BRW_OPCODE_MOV || inst->predicated || inst->saturate || - inst->dst.file != GRF || inst->src[0].file != GRF || + inst->dst.file != GRF || (inst->src[0].file != GRF && + inst->src[0].file != UNIFORM)|| inst->dst.type != inst->src[0].type) continue; @@ -1141,11 +1208,10 @@ fs_visitor::register_coalesce() * program. */ bool interfered = false; - exec_list_iterator scan_iter = iter; - scan_iter.next(); - for (; scan_iter.has_next(); scan_iter.next()) { - fs_inst *scan_inst = (fs_inst *)scan_iter.get(); + for (fs_inst *scan_inst = (fs_inst *)inst->next; + !scan_inst->is_tail_sentinel(); + scan_inst = (fs_inst *)scan_inst->next) { if (scan_inst->dst.file == GRF) { if (scan_inst->dst.reg == inst->dst.reg && (scan_inst->dst.reg_offset == inst->dst.reg_offset || @@ -1153,7 +1219,8 @@ fs_visitor::register_coalesce() interfered = true; break; } - if (scan_inst->dst.reg == inst->src[0].reg && + if (inst->src[0].file == GRF && + scan_inst->dst.reg == inst->src[0].reg && (scan_inst->dst.reg_offset == inst->src[0].reg_offset || scan_inst->is_tex())) { interfered = true; @@ -1161,10 +1228,13 @@ fs_visitor::register_coalesce() } } - /* The gen6 MATH instruction can't handle source modifiers, so avoid - * coalescing those for now. We should do something more specific. + /* The gen6 MATH instruction can't handle source modifiers or + * unusual register regions, so avoid coalescing those for + * now. We should do something more specific. */ - if (intel->gen >= 6 && scan_inst->is_math() && has_source_modifiers) { + if (intel->gen >= 6 && + scan_inst->is_math() && + (has_source_modifiers || inst->src[0].file == UNIFORM)) { interfered = true; break; } @@ -1176,19 +1246,17 @@ fs_visitor::register_coalesce() /* Rewrite the later usage to point at the source of the move to * be removed. */ - for (exec_list_iterator scan_iter = iter; scan_iter.has_next(); - scan_iter.next()) { - fs_inst *scan_inst = (fs_inst *)scan_iter.get(); - + for (fs_inst *scan_inst = inst; + !scan_inst->is_tail_sentinel(); + scan_inst = (fs_inst *)scan_inst->next) { for (int i = 0; i < 3; i++) { if (scan_inst->src[i].file == GRF && scan_inst->src[i].reg == inst->dst.reg && scan_inst->src[i].reg_offset == inst->dst.reg_offset) { - scan_inst->src[i].reg = inst->src[0].reg; - scan_inst->src[i].reg_offset = inst->src[0].reg_offset; - scan_inst->src[i].abs |= inst->src[0].abs; - scan_inst->src[i].negate ^= inst->src[0].negate; - scan_inst->src[i].smear = inst->src[0].smear; + fs_reg new_src = inst->src[0]; + new_src.negate ^= scan_inst->src[i].negate; + new_src.abs |= scan_inst->src[i].abs; + scan_inst->src[i] = new_src; } } } @@ -1212,8 +1280,8 @@ fs_visitor::compute_to_mrf() calculate_live_intervals(); - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list_safe(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; int ip = next_ip; next_ip++; @@ -1392,8 +1460,8 @@ fs_visitor::remove_duplicate_mrf_writes() memset(last_mrf_move, 0, sizeof(last_mrf_move)); - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list_safe(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; switch (inst->opcode) { case BRW_OPCODE_DO: @@ -1527,12 +1595,14 @@ fs_visitor::run() /* Generate FS IR for main(). (the visitor only descends into * functions called "main"). */ - foreach_iter(exec_list_iterator, iter, *shader->ir) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &*shader->ir) { + ir_instruction *ir = (ir_instruction *)node; base_ir = ir; this->result = reg_undef; ir->accept(this); } + if (failed) + return false; emit_fb_writes(); @@ -1548,6 +1618,7 @@ fs_visitor::run() progress = remove_duplicate_mrf_writes() || progress; progress = propagate_constants() || progress; + progress = opt_algebraic() || progress; progress = register_coalesce() || progress; progress = compute_to_mrf() || progress; progress = dead_code_eliminate() || progress; @@ -1684,6 +1755,9 @@ brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog) key.clamp_fragment_color = true; for (int i = 0; i < BRW_MAX_TEX_UNIT; i++) { + if (fp->Base.ShadowSamplers & (1 << i)) + key.compare_funcs[i] = GL_LESS; + /* FINISHME: depth compares might use (0,0,0,W) for example */ key.tex_swizzles[i] = SWIZZLE_XYZW; } @@ -1697,14 +1771,12 @@ brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog) key.program_string_id = bfp->id; - drm_intel_bo *old_prog_bo = brw->wm.prog_bo; + uint32_t old_prog_offset = brw->wm.prog_offset; struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data; - brw->wm.prog_bo = NULL; bool success = do_wm_prog(brw, prog, bfp, &key); - drm_intel_bo_unreference(brw->wm.prog_bo); - brw->wm.prog_bo = old_prog_bo; + brw->wm.prog_offset = old_prog_offset; brw->wm.prog_data = old_prog_data; return success; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 7570dda1024..89d6cda7e4f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -441,6 +441,8 @@ public: void visit(ir_function *ir); void visit(ir_function_signature *ir); + void swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler); + fs_inst *emit(fs_inst inst); fs_inst *emit(int opcode) @@ -483,6 +485,7 @@ public: void setup_pull_constants(); void calculate_live_intervals(); bool propagate_constants(); + bool opt_algebraic(); bool register_coalesce(); bool compute_to_mrf(); bool dead_code_eliminate(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index 7f3f52854d2..46677a6f2ef 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -191,6 +191,8 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) case ir_unop_log: case ir_unop_exp2: case ir_unop_log2: + case ir_unop_i2u: + case ir_unop_u2i: case ir_unop_f2i: case ir_unop_i2f: case ir_unop_f2b: diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 6b7c434949c..9fb0153d1f8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -59,7 +59,8 @@ fs_visitor::generate_fb_write(fs_inst *inst) if (inst->target > 0) { /* Set the render target index for choosing BLEND_STATE. */ - brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2), + brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, + inst->base_mrf, 2), BRW_REGISTER_TYPE_UD), brw_imm_ud(inst->target)); } @@ -273,7 +274,8 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) } break; case FS_OPCODE_TXD: - assert(!"TXD isn't supported on gen5+ yet."); + /* There is no sample_d_c message; comparisons are done manually */ + msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; break; } } else { @@ -311,7 +313,9 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) } break; case FS_OPCODE_TXD: - assert(!"TXD isn't supported on gen4 yet."); + /* There is no sample_d_c message; comparisons are done manually */ + assert(inst->mlen == 7 || inst->mlen == 10); + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS; break; } } @@ -605,8 +609,8 @@ fs_visitor::generate_code() prog->Name, c->dispatch_width); } - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; struct brw_reg src[3], dst; if (unlikely(INTEL_DEBUG & DEBUG_WM)) { diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index f88b1316775..78daa491156 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -75,8 +75,8 @@ fs_visitor::assign_regs_trivial() last_grf = hw_reg_mapping[i - 1] + (this->virtual_grf_sizes[i - 1] * reg_width); - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; assign_reg(hw_reg_mapping, &inst->dst, reg_width); assign_reg(hw_reg_mapping, &inst->src[0], reg_width); @@ -101,7 +101,6 @@ fs_visitor::assign_regs() * for reg_width == 2. */ int reg_width = c->dispatch_width / 8; - int last_grf = 0; int hw_reg_mapping[this->virtual_grf_next + 1]; int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width); int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width; @@ -263,6 +262,7 @@ fs_visitor::assign_regs() * regs in the register classes back down to real hardware reg * numbers. */ + this->grf_used = first_assigned_grf; hw_reg_mapping[0] = 0; /* unused */ for (int i = 1; i < this->virtual_grf_next; i++) { int reg = ra_get_node_reg(g, i); @@ -278,20 +278,19 @@ fs_visitor::assign_regs() assert(hw_reg >= 0); hw_reg_mapping[i] = first_assigned_grf + hw_reg * reg_width; - last_grf = MAX2(last_grf, - hw_reg_mapping[i] + this->virtual_grf_sizes[i] - 1); + this->grf_used = MAX2(this->grf_used, + hw_reg_mapping[i] + this->virtual_grf_sizes[i] * + reg_width); } - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; assign_reg(hw_reg_mapping, &inst->dst, reg_width); assign_reg(hw_reg_mapping, &inst->src[0], reg_width); assign_reg(hw_reg_mapping, &inst->src[1], reg_width); } - this->grf_used = last_grf + reg_width; - ralloc_free(g); ralloc_free(regs); @@ -337,8 +336,8 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) * spill/unspill we'll have to do, and guess that the insides of * loops run 10 times. */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == GRF) { @@ -395,8 +394,8 @@ fs_visitor::spill_reg(int spill_reg) * virtual grf of the same size. For most instructions, though, we * could just spill/unspill the GRF being accessed. */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == GRF && diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp index d8218c26edb..9ec3f502764 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -283,8 +283,8 @@ instruction_scheduler::calculate_deps() memset(last_mrf_write, 0, sizeof(last_mrf_write)); /* top-to-bottom dependencies: RAW and WAW. */ - foreach_iter(exec_list_iterator, iter, instructions) { - schedule_node *n = (schedule_node *)iter.get(); + foreach_list(node, &instructions) { + schedule_node *n = (schedule_node *)node; fs_inst *inst = n->inst; /* read-after-write deps. */ @@ -437,8 +437,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header) int time = 0; /* Remove non-DAG heads from the list. */ - foreach_iter(exec_list_iterator, iter, instructions) { - schedule_node *n = (schedule_node *)iter.get(); + foreach_list_safe(node, &instructions) { + schedule_node *n = (schedule_node *)node; if (n->parent_count != 0) n->remove(); } @@ -447,8 +447,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header) schedule_node *chosen = NULL; int chosen_time = 0; - foreach_iter(exec_list_iterator, iter, instructions) { - schedule_node *n = (schedule_node *)iter.get(); + foreach_list(node, &instructions) { + schedule_node *n = (schedule_node *)node; if (!chosen || n->unblocked_time < chosen_time) { chosen = n; @@ -490,8 +490,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header) * progress until the first is done. */ if (chosen->inst->is_math()) { - foreach_iter(exec_list_iterator, iter, instructions) { - schedule_node *n = (schedule_node *)iter.get(); + foreach_list(node, &instructions) { + schedule_node *n = (schedule_node *)node; if (n->inst->is_math()) n->unblocked_time = MAX2(n->unblocked_time, diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp index 530ffa26580..a9a60c2fd8a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp @@ -122,8 +122,8 @@ ir_vector_reference_visitor::get_variable_entry(ir_variable *var) break; } - foreach_iter(exec_list_iterator, iter, this->variable_list) { - variable_entry *entry = (variable_entry *)iter.get(); + foreach_list(node, &this->variable_list) { + variable_entry *entry = (variable_entry *)node; if (entry->var == var) return entry; } @@ -222,8 +222,8 @@ ir_vector_splitting_visitor::get_splitting_entry(ir_variable *var) if (!var->type->is_vector()) return NULL; - foreach_iter(exec_list_iterator, iter, *this->variable_list) { - variable_entry *entry = (variable_entry *)iter.get(); + foreach_list(node, &*this->variable_list) { + variable_entry *entry = (variable_entry *)node; if (entry->var == var) { return entry; } @@ -341,8 +341,8 @@ brw_do_vector_splitting(exec_list *instructions) visit_list_elements(&refs, instructions); /* Trim out variables we can't split. */ - foreach_iter(exec_list_iterator, iter, refs.variable_list) { - variable_entry *entry = (variable_entry *)iter.get(); + foreach_list_safe(node, &refs.variable_list) { + variable_entry *entry = (variable_entry *)node; if (debug) { printf("vector %s@%p: decl %d, whole_access %d\n", @@ -363,8 +363,8 @@ brw_do_vector_splitting(exec_list *instructions) /* Replace the decls of the vectors to be split with their split * components. */ - foreach_iter(exec_list_iterator, iter, refs.variable_list) { - variable_entry *entry = (variable_entry *)iter.get(); + foreach_list(node, &refs.variable_list) { + variable_entry *entry = (variable_entry *)node; const struct glsl_type *type; type = glsl_type::get_instance(entry->var->type->base_type, 1, 1); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index b4857871c78..2b769ccbba1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -349,6 +349,14 @@ fs_visitor::visit(ir_expression *ir) emit_math(FS_OPCODE_RSQ, this->result, op[0]); break; + case ir_unop_i2u: + op[0].type = BRW_REGISTER_TYPE_UD; + this->result = op[0]; + break; + case ir_unop_u2i: + op[0].type = BRW_REGISTER_TYPE_D; + this->result = op[0]; + break; case ir_unop_i2f: case ir_unop_b2f: case ir_unop_b2i: @@ -549,7 +557,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, /* g0 header. */ mlen = 1; - if (ir->shadow_comparitor) { + if (ir->shadow_comparitor && ir->op != ir_txd) { for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate); @@ -595,7 +603,46 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ mlen += 3; } else if (ir->op == ir_txd) { - assert(!"TXD isn't supported on gen4 yet."); + this->result = reg_undef; + ir->lod_info.grad.dPdx->accept(this); + fs_reg dPdx = this->result; + + this->result = reg_undef; + ir->lod_info.grad.dPdy->accept(this); + fs_reg dPdy = this->result; + + for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate); + coordinate.reg_offset++; + } + /* the slots for u and v are always present, but r is optional */ + mlen += MAX2(ir->coordinate->type->vector_elements, 2); + + /* P = u, v, r + * dPdx = dudx, dvdx, drdx + * dPdy = dudy, dvdy, drdy + * + * 1-arg: Does not exist. + * + * 2-arg: dudx dvdx dudy dvdy + * dPdx.x dPdx.y dPdy.x dPdy.y + * m4 m5 m6 m7 + * + * 3-arg: dudx dvdx drdx dudy dvdy drdy + * dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z + * m5 m6 m7 m8 m9 m10 + */ + for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) { + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx); + dPdx.reg_offset++; + } + mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2); + + for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) { + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy); + dPdy.reg_offset++; + } + mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2); } else { /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod * instructions. We'll need to do SIMD16 here. @@ -709,7 +756,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, } mlen += ir->coordinate->type->vector_elements * reg_width; - if (ir->shadow_comparitor) { + if (ir->shadow_comparitor && ir->op != ir_txd) { mlen = MAX2(mlen, header_present + 4 * reg_width); this->result = reg_undef; @@ -742,7 +789,39 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, inst = emit(FS_OPCODE_TXL, dst); break; - case ir_txd: + case ir_txd: { + this->result = reg_undef; + ir->lod_info.grad.dPdx->accept(this); + fs_reg dPdx = this->result; + + this->result = reg_undef; + ir->lod_info.grad.dPdy->accept(this); + fs_reg dPdy = this->result; + + mlen = MAX2(mlen, header_present + 4 * reg_width); /* skip over 'ai' */ + + /** + * P = u, v, r + * dPdx = dudx, dvdx, drdx + * dPdy = dudy, dvdy, drdy + * + * Load up these values: + * - dudx dudy dvdx dvdy drdx drdy + * - dPdx.x dPdy.x dPdx.y dPdy.y dPdx.z dPdy.z + */ + for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) { + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx); + dPdx.reg_offset++; + mlen += reg_width; + + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy); + dPdy.reg_offset++; + mlen += reg_width; + } + + inst = emit(FS_OPCODE_TXD, dst); + break; + } case ir_txf: assert(!"GLSL 1.30 features unsupported"); break; @@ -776,7 +855,8 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, base_mrf--; } - if (ir->shadow_comparitor) { + if (ir->shadow_comparitor && ir->op != ir_txd) { + this->result = reg_undef; ir->shadow_comparitor->accept(this); emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen += reg_width; @@ -787,29 +867,65 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, case ir_tex: break; case ir_txb: + this->result = reg_undef; ir->lod_info.bias->accept(this); emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen += reg_width; break; case ir_txl: + this->result = reg_undef; ir->lod_info.lod->accept(this); emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen += reg_width; break; - case ir_txd: + case ir_txd: { + if (c->dispatch_width == 16) + fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode."); + + this->result = reg_undef; + ir->lod_info.grad.dPdx->accept(this); + fs_reg dPdx = this->result; + + this->result = reg_undef; + ir->lod_info.grad.dPdy->accept(this); + fs_reg dPdy = this->result; + + /* Load dPdx and the coordinate together: + * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z + */ + for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { + fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), + coordinate); + if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler)) + inst->saturate = true; + coordinate.reg_offset++; + mlen += reg_width; + + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx); + dPdx.reg_offset++; + mlen += reg_width; + + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy); + dPdy.reg_offset++; + mlen += reg_width; + } + break; + } case ir_txf: assert(!"GLSL 1.30 features unsupported"); break; } - /* Set up the coordinate */ - for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), - coordinate); - if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler)) - inst->saturate = true; - coordinate.reg_offset++; - mlen += reg_width; + /* Set up the coordinate (except for TXD where it was done earlier) */ + if (ir->op != ir_txd) { + for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { + fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), + coordinate); + if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler)) + inst->saturate = true; + coordinate.reg_offset++; + mlen += reg_width; + } } /* Generate the SEND */ @@ -835,9 +951,24 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, void fs_visitor::visit(ir_texture *ir) { - int sampler; fs_inst *inst = NULL; + int sampler = _mesa_get_sampler_uniform_value(ir->sampler, prog, &fp->Base); + sampler = fp->Base.SamplerUnits[sampler]; + + /* Our hardware doesn't have a sample_d_c message, so shadow compares + * for textureGrad/TXD need to be emulated with instructions. + */ + bool hw_compare_supported = ir->op != ir_txd; + if (ir->shadow_comparitor && !hw_compare_supported) { + assert(c->key.compare_funcs[sampler] != GL_NONE); + /* No need to even sample for GL_ALWAYS or GL_NEVER...bail early */ + if (c->key.compare_funcs[sampler] == GL_ALWAYS) + return swizzle_result(ir, fs_reg(1.0f), sampler); + else if (c->key.compare_funcs[sampler] == GL_NEVER) + return swizzle_result(ir, fs_reg(0.0f), sampler); + } + this->result = reg_undef; ir->coordinate->accept(this); fs_reg coordinate = this->result; @@ -876,11 +1007,6 @@ fs_visitor::visit(ir_texture *ir) /* Should be lowered by do_lower_texture_projection */ assert(!ir->projector); - sampler = _mesa_get_sampler_uniform_value(ir->sampler, - prog, - &fp->Base); - sampler = fp->Base.SamplerUnits[sampler]; - /* The 965 requires the EU to do the normalization of GL rectangle * texture coordinates. We use the program parameter state * tracking to get the scaling factor. @@ -951,20 +1077,70 @@ fs_visitor::visit(ir_texture *ir) inst->sampler = sampler; - this->result = dst; + if (ir->shadow_comparitor) { + if (hw_compare_supported) { + inst->shadow_compare = true; + } else { + this->result = reg_undef; + ir->shadow_comparitor->accept(this); + fs_reg ref = this->result; + + fs_reg value = dst; + dst = fs_reg(this, glsl_type::vec4_type); + + /* FINISHME: This needs to be done pre-filtering. */ + + uint32_t conditional = 0; + switch (c->key.compare_funcs[sampler]) { + /* GL_ALWAYS and GL_NEVER were handled at the top of the function */ + case GL_LESS: conditional = BRW_CONDITIONAL_L; break; + case GL_GREATER: conditional = BRW_CONDITIONAL_G; break; + case GL_LEQUAL: conditional = BRW_CONDITIONAL_LE; break; + case GL_GEQUAL: conditional = BRW_CONDITIONAL_GE; break; + case GL_EQUAL: conditional = BRW_CONDITIONAL_EQ; break; + case GL_NOTEQUAL: conditional = BRW_CONDITIONAL_NEQ; break; + default: assert(!"Should not get here: bad shadow compare function"); + } + + /* Use conditional moves to load 0 or 1 as the result */ + this->current_annotation = "manual shadow comparison"; + for (int i = 0; i < 4; i++) { + inst = emit(BRW_OPCODE_MOV, dst, fs_reg(0.0f)); + + inst = emit(BRW_OPCODE_CMP, reg_null_f, ref, value); + inst->conditional_mod = conditional; - if (ir->shadow_comparitor) - inst->shadow_compare = true; + inst = emit(BRW_OPCODE_MOV, dst, fs_reg(1.0f)); + inst->predicated = true; + + dst.reg_offset++; + value.reg_offset++; + } + dst.reg_offset = 0; + } + } + + swizzle_result(ir, dst, sampler); +} + +/** + * Swizzle the result of a texture result. This is necessary for + * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons. + */ +void +fs_visitor::swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler) +{ + this->result = orig_val; if (ir->type == glsl_type::float_type) { /* Ignore DEPTH_TEXTURE_MODE swizzling. */ assert(ir->sampler->type->sampler_shadow); - } else if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) { - fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type); + } else if (c->key.tex_swizzles[sampler] != SWIZZLE_NOOP) { + fs_reg swizzled_result = fs_reg(this, glsl_type::vec4_type); for (int i = 0; i < 4; i++) { - int swiz = GET_SWZ(c->key.tex_swizzles[inst->sampler], i); - fs_reg l = swizzle_dst; + int swiz = GET_SWZ(c->key.tex_swizzles[sampler], i); + fs_reg l = swizzled_result; l.reg_offset += i; if (swiz == SWIZZLE_ZERO) { @@ -972,12 +1148,12 @@ fs_visitor::visit(ir_texture *ir) } else if (swiz == SWIZZLE_ONE) { emit(BRW_OPCODE_MOV, l, fs_reg(1.0f)); } else { - fs_reg r = dst; - r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i); + fs_reg r = orig_val; + r.reg_offset += GET_SWZ(c->key.tex_swizzles[sampler], i); emit(BRW_OPCODE_MOV, l, r); } } - this->result = swizzle_dst; + this->result = swizzled_result; } } @@ -1301,8 +1477,8 @@ fs_visitor::visit(ir_if *ir) inst->predicated = true; } - foreach_iter(exec_list_iterator, iter, ir->then_instructions) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &ir->then_instructions) { + ir_instruction *ir = (ir_instruction *)node; this->base_ir = ir; this->result = reg_undef; ir->accept(this); @@ -1311,8 +1487,8 @@ fs_visitor::visit(ir_if *ir) if (!ir->else_instructions.is_empty()) { emit(BRW_OPCODE_ELSE); - foreach_iter(exec_list_iterator, iter, ir->else_instructions) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &ir->else_instructions) { + ir_instruction *ir = (ir_instruction *)node; this->base_ir = ir; this->result = reg_undef; ir->accept(this); @@ -1362,8 +1538,8 @@ fs_visitor::visit(ir_loop *ir) inst->predicated = true; } - foreach_iter(exec_list_iterator, iter, ir->body_instructions) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &ir->body_instructions) { + ir_instruction *ir = (ir_instruction *)node; this->base_ir = ir; this->result = reg_undef; @@ -1419,8 +1595,8 @@ fs_visitor::visit(ir_function *ir) assert(sig); - foreach_iter(exec_list_iterator, iter, sig->body) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &sig->body) { + ir_instruction *ir = (ir_instruction *)node; this->base_ir = ir; this->result = reg_undef; ir->accept(this); @@ -1466,7 +1642,7 @@ fs_visitor::emit_dummy_fs() fs_inst *write; write = emit(FS_OPCODE_FB_WRITE, fs_reg(0), fs_reg(0)); - write->base_mrf = 0; + write->base_mrf = 2; } /* The register location here is relative to the start of the URB @@ -1569,6 +1745,7 @@ void fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color) { int reg_width = c->dispatch_width / 8; + fs_inst *inst; if (c->dispatch_width == 8 || intel->gen == 6) { /* SIMD8 write looks like: @@ -1587,8 +1764,10 @@ fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color) * m + 6: a0 * m + 7: a1 */ - emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index * reg_width), - color); + inst = emit(BRW_OPCODE_MOV, + fs_reg(MRF, first_color_mrf + index * reg_width), + color); + inst->saturate = c->key.clamp_fragment_color; } else { /* pre-gen6 SIMD16 single source DP write looks like: * m + 0: r0 @@ -1606,16 +1785,22 @@ fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color) * usual destination + 1 for the second half we get * destination + 4. */ - emit(BRW_OPCODE_MOV, - fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index), color); + inst = emit(BRW_OPCODE_MOV, + fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index), + color); + inst->saturate = c->key.clamp_fragment_color; } else { push_force_uncompressed(); - emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), color); + inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), + color); + inst->saturate = c->key.clamp_fragment_color; pop_force_uncompressed(); push_force_sechalf(); color.sechalf = true; - emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), color); + inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), + color); + inst->saturate = c->key.clamp_fragment_color; pop_force_sechalf(); color.sechalf = false; } @@ -1627,7 +1812,8 @@ fs_visitor::emit_fb_writes() { this->current_annotation = "FB write header"; GLboolean header_present = GL_TRUE; - int nr = 0; + int base_mrf = 2; + int nr = base_mrf; int reg_width = c->dispatch_width / 8; if (intel->gen >= 6 && @@ -1637,7 +1823,7 @@ fs_visitor::emit_fb_writes() } if (header_present) { - /* m0, m1 header */ + /* m2, m3 header */ nr += 2; } @@ -1706,8 +1892,8 @@ fs_visitor::emit_fb_writes() fs_inst *inst = emit(FS_OPCODE_FB_WRITE); inst->target = target; - inst->base_mrf = 0; - inst->mlen = nr; + inst->base_mrf = base_mrf; + inst->mlen = nr - base_mrf; if (target == c->key.nr_color_regions - 1) inst->eot = true; inst->header_present = header_present; @@ -1724,8 +1910,8 @@ fs_visitor::emit_fb_writes() } fs_inst *inst = emit(FS_OPCODE_FB_WRITE); - inst->base_mrf = 0; - inst->mlen = nr; + inst->base_mrf = base_mrf; + inst->mlen = nr - base_mrf; inst->eot = true; inst->header_present = header_present; } diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 001cd62f8ca..3171e97d7af 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -121,14 +121,11 @@ static void compile_gs_prog( struct brw_context *brw, printf("\n"); } - /* Upload - */ - drm_intel_bo_unreference(brw->gs.prog_bo); - brw->gs.prog_bo = brw_upload_cache(&brw->cache, BRW_GS_PROG, - &c.key, sizeof(c.key), - program, program_size, - &c.prog_data, sizeof(c.prog_data), - &brw->gs.prog_data); + brw_upload_cache(&brw->cache, BRW_GS_PROG, + &c.key, sizeof(c.key), + program, program_size, + &c.prog_data, sizeof(c.prog_data), + &brw->gs.prog_offset, &brw->gs.prog_data); ralloc_free(mem_ctx); } @@ -189,15 +186,12 @@ static void prepare_gs_prog(struct brw_context *brw) brw->gs.prog_active = key.need_gs_prog; } - drm_intel_bo_unreference(brw->gs.prog_bo); - brw->gs.prog_bo = NULL; - if (brw->gs.prog_active) { - brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG, - &key, sizeof(key), - &brw->gs.prog_data); - if (brw->gs.prog_bo == NULL) + if (!brw_search_cache(&brw->cache, BRW_GS_PROG, + &key, sizeof(key), + &brw->gs.prog_offset, &brw->gs.prog_data)) { compile_gs_prog( brw, &key ); + } } } diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c index 542874b7706..e0309e71fc3 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c @@ -41,16 +41,22 @@ brw_prepare_gs_unit(struct brw_context *brw) struct intel_context *intel = &brw->intel; struct brw_gs_unit_state *gs; - gs = brw_state_batch(brw, sizeof(*gs), 32, &brw->gs.state_offset); + gs = brw_state_batch(brw, AUB_TRACE_GS_STATE, + sizeof(*gs), 32, &brw->gs.state_offset); memset(gs, 0, sizeof(*gs)); - /* CACHE_NEW_GS_PROG */ + /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_GS_PROG */ if (brw->gs.prog_active) { gs->thread0.grf_reg_count = (ALIGN(brw->gs.prog_data->total_grf, 16) / 16 - 1); - /* reloc */ - gs->thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6; + + gs->thread0.kernel_start_pointer = + brw_program_reloc(brw, + brw->gs.state_offset + + offsetof(struct brw_gs_unit_state, thread0), + brw->gs.prog_offset + + (gs->thread0.grf_reg_count << 1)) >> 6; gs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; gs->thread1.single_program_flow = 1; @@ -69,13 +75,6 @@ brw_prepare_gs_unit(struct brw_context *brw) gs->thread4.max_threads = 1; else gs->thread4.max_threads = 0; - - /* Emit GS program relocation */ - drm_intel_bo_emit_reloc(intel->batch.bo, - (brw->gs.state_offset + - offsetof(struct brw_gs_unit_state, thread0)), - brw->gs.prog_bo, gs->thread0.grf_reg_count << 1, - I915_GEM_DOMAIN_INSTRUCTION, 0); } if (intel->gen == 5) @@ -91,6 +90,7 @@ const struct brw_tracked_state brw_gs_unit = { .dirty = { .mesa = 0, .brw = (BRW_NEW_BATCH | + BRW_NEW_PROGRAM_CACHE | BRW_NEW_CURBE_OFFSETS | BRW_NEW_URB_FENCE), .cache = CACHE_NEW_GS_PROG diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 2b5ec8ac677..f7e6e7c81d1 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -46,7 +46,7 @@ static void upload_drawing_rect(struct brw_context *brw) struct gl_context *ctx = &intel->ctx; BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965); + OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); OUT_BATCH(0); /* xmin, ymin */ OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) | ((ctx->DrawBuffer->Height - 1) << 16)); @@ -87,10 +87,11 @@ static void upload_binding_table_pointers(struct brw_context *brw) const struct brw_tracked_state brw_binding_table_pointers = { .dirty = { .mesa = 0, - .brw = BRW_NEW_BATCH - | BRW_NEW_VS_BINDING_TABLE - | BRW_NEW_GS_BINDING_TABLE - | BRW_NEW_PS_BINDING_TABLE, + .brw = (BRW_NEW_BATCH | + BRW_NEW_STATE_BASE_ADDRESS | + BRW_NEW_VS_BINDING_TABLE | + BRW_NEW_GS_BINDING_TABLE | + BRW_NEW_PS_BINDING_TABLE), .cache = 0, }, .emit = upload_binding_table_pointers, @@ -122,10 +123,11 @@ static void upload_gen6_binding_table_pointers(struct brw_context *brw) const struct brw_tracked_state gen6_binding_table_pointers = { .dirty = { .mesa = 0, - .brw = BRW_NEW_BATCH - | BRW_NEW_VS_BINDING_TABLE - | BRW_NEW_GS_BINDING_TABLE - | BRW_NEW_PS_BINDING_TABLE, + .brw = (BRW_NEW_BATCH | + BRW_NEW_STATE_BASE_ADDRESS | + BRW_NEW_VS_BINDING_TABLE | + BRW_NEW_GS_BINDING_TABLE | + BRW_NEW_PS_BINDING_TABLE), .cache = 0, }, .emit = upload_gen6_binding_table_pointers, @@ -180,7 +182,9 @@ static void upload_psp_urb_cbs(struct brw_context *brw ) const struct brw_tracked_state brw_psp_urb_cbs = { .dirty = { .mesa = 0, - .brw = BRW_NEW_URB_FENCE | BRW_NEW_BATCH, + .brw = (BRW_NEW_URB_FENCE | + BRW_NEW_BATCH | + BRW_NEW_STATE_BASE_ADDRESS), .cache = (CACHE_NEW_VS_UNIT | CACHE_NEW_GS_UNIT | CACHE_NEW_GS_PROG | @@ -219,6 +223,12 @@ static void emit_depthbuffer(struct brw_context *brw) struct intel_region *hiz_region = depth_irb ? depth_irb->hiz_region : NULL; unsigned int len; + /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both + * non-pipelined state that will need the PIPE_CONTROL workaround. + */ + if (intel->gen == 6) + intel_emit_post_sync_nonzero_flush(intel); + /* * If either depth or stencil buffer has packed depth/stencil format, * then don't use separate stencil. Emit only a depth buffer. @@ -355,26 +365,48 @@ static void emit_depthbuffer(struct brw_context *brw) ADVANCE_BATCH(); } - /* Emit hiz buffer. */ if (hiz_region || stencil_irb) { - BEGIN_BATCH(3); - OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); - OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1); - OUT_RELOC(hiz_region->buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - 0); - ADVANCE_BATCH(); - } + /* + * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate + * stencil enable' and 'hiz enable' bits were set. Therefore we must + * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if + * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted; + * failure to do so causes hangs on gen5 and a stall on gen6. + */ - /* Emit stencil buffer. */ - if (hiz_region || stencil_irb) { - BEGIN_BATCH(3); - OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); - OUT_BATCH(stencil_irb->region->pitch * stencil_irb->region->cpp - 1); - OUT_RELOC(stencil_irb->region->buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - 0); - ADVANCE_BATCH(); + /* Emit hiz buffer. */ + if (hiz_region) { + BEGIN_BATCH(3); + OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); + OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1); + OUT_RELOC(hiz_region->buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(3); + OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* Emit stencil buffer. */ + if (stencil_irb) { + BEGIN_BATCH(3); + OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); + OUT_BATCH(stencil_irb->region->pitch * stencil_irb->region->cpp - 1); + OUT_RELOC(stencil_irb->region->buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(3); + OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } } /* @@ -386,6 +418,9 @@ static void emit_depthbuffer(struct brw_context *brw) * when HiZ is enabled and the DEPTH_BUFFER_STATE changes. */ if (intel->gen >= 6 || hiz_region) { + if (intel->gen == 6) + intel_emit_post_sync_nonzero_flush(intel); + BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2)); OUT_BATCH(0); @@ -418,6 +453,9 @@ static void upload_polygon_stipple(struct brw_context *brw) if (!ctx->Polygon.StippleFlag) return; + if (intel->gen == 6) + intel_emit_post_sync_nonzero_flush(intel); + BEGIN_BATCH(33); OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2)); @@ -461,6 +499,9 @@ static void upload_polygon_stipple_offset(struct brw_context *brw) if (!ctx->Polygon.StippleFlag) return; + if (intel->gen == 6) + intel_emit_post_sync_nonzero_flush(intel); + BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2)); @@ -501,6 +542,9 @@ static void upload_aa_line_parameters(struct brw_context *brw) if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters) return; + if (intel->gen == 6) + intel_emit_post_sync_nonzero_flush(intel); + OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2)); /* use legacy aa line coverage computation */ OUT_BATCH(0); @@ -531,6 +575,9 @@ static void upload_line_stipple(struct brw_context *brw) if (!ctx->Line.StippleFlag) return; + if (intel->gen == 6) + intel_emit_post_sync_nonzero_flush(intel); + BEGIN_BATCH(3); OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2)); OUT_BATCH(ctx->Line.StipplePattern); @@ -558,28 +605,21 @@ static void upload_invarient_state( struct brw_context *brw ) { struct intel_context *intel = &brw->intel; - { - /* 0x61040000 Pipeline Select */ - /* PipelineSelect : 0 */ - struct brw_pipeline_select ps; + /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */ + if (intel->gen == 6) + intel_emit_post_sync_nonzero_flush(intel); - memset(&ps, 0, sizeof(ps)); - ps.header.opcode = brw->CMD_PIPELINE_SELECT; - ps.header.pipeline_select = 0; - BRW_BATCH_STRUCT(brw, &ps); - } + /* Select the 3D pipeline (as opposed to media) */ + BEGIN_BATCH(1); + OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16 | 0); + ADVANCE_BATCH(); if (intel->gen < 6) { - struct brw_global_depth_offset_clamp gdo; - memset(&gdo, 0, sizeof(gdo)); - - /* Disable depth offset clamping. - */ - gdo.header.opcode = _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP; - gdo.header.length = sizeof(gdo)/4 - 2; - gdo.depth_offset_clamp = 0.0; - - BRW_BATCH_STRUCT(brw, &gdo); + /* Disable depth offset clamping. */ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2)); + OUT_BATCH_F(0.0); + ADVANCE_BATCH(); } if (intel->gen >= 6) { @@ -612,29 +652,15 @@ static void upload_invarient_state( struct brw_context *brw ) } } - /* 0x61020000 State Instruction Pointer */ - { - struct brw_system_instruction_pointer sip; - memset(&sip, 0, sizeof(sip)); - - sip.header.opcode = CMD_STATE_INSN_POINTER; - sip.header.length = 0; - sip.bits0.pad = 0; - sip.bits0.system_instruction_pointer = 0; - BRW_BATCH_STRUCT(brw, &sip); - } - - - { - struct brw_vf_statistics vfs; - memset(&vfs, 0, sizeof(vfs)); - - vfs.opcode = brw->CMD_VF_STATISTICS; - if (unlikely(INTEL_DEBUG & DEBUG_STATS)) - vfs.statistics_enable = 1; + BEGIN_BATCH(2); + OUT_BATCH(CMD_STATE_SIP << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); - BRW_BATCH_STRUCT(brw, &vfs); - } + BEGIN_BATCH(1); + OUT_BATCH(brw->CMD_VF_STATISTICS << 16 | + (unlikely(INTEL_DEBUG & DEBUG_STATS) ? 1 : 0)); + ADVANCE_BATCH(); } const struct brw_tracked_state brw_invarient_state = { @@ -660,7 +686,19 @@ static void upload_state_base_address( struct brw_context *brw ) { struct intel_context *intel = &brw->intel; + /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of + * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be + * programmed prior to STATE_BASE_ADDRESS. + * + * However, given that the instruction SBA (general state base + * address) on this chipset is always set to 0 across X and GL, + * maybe this isn't required for us in particular. + */ + if (intel->gen >= 6) { + if (intel->gen == 6) + intel_emit_post_sync_nonzero_flush(intel); + BEGIN_BATCH(10); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2)); /* General state base address: stateless DP read/write requests */ @@ -684,7 +722,9 @@ static void upload_state_base_address( struct brw_context *brw ) I915_GEM_DOMAIN_INSTRUCTION), 0, 1); OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */ - OUT_BATCH(1); /* Instruction base address: shader kernels (incl. SIP) */ + OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, + 1); /* Instruction base address: shader kernels (incl. SIP) */ + OUT_BATCH(1); /* General state upper bound */ OUT_BATCH(1); /* Dynamic state upper bound */ OUT_BATCH(1); /* Indirect object upper bound */ @@ -697,7 +737,8 @@ static void upload_state_base_address( struct brw_context *brw ) OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1); /* Surface state base address */ OUT_BATCH(1); /* Indirect object base address */ - OUT_BATCH(1); /* Instruction base address */ + OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, + 1); /* Instruction base address */ OUT_BATCH(1); /* General state upper bound */ OUT_BATCH(1); /* Indirect object upper bound */ OUT_BATCH(1); /* Instruction access upper bound */ @@ -713,12 +754,37 @@ static void upload_state_base_address( struct brw_context *brw ) OUT_BATCH(1); /* Indirect object upper bound */ ADVANCE_BATCH(); } + + /* According to section 3.6.1 of VOL1 of the 965 PRM, + * STATE_BASE_ADDRESS updates require a reissue of: + * + * 3DSTATE_PIPELINE_POINTERS + * 3DSTATE_BINDING_TABLE_POINTERS + * MEDIA_STATE_POINTERS + * + * and this continues through Ironlake. The Sandy Bridge PRM, vol + * 1 part 1 says that the folowing packets must be reissued: + * + * 3DSTATE_CC_POINTERS + * 3DSTATE_BINDING_TABLE_POINTERS + * 3DSTATE_SAMPLER_STATE_POINTERS + * 3DSTATE_VIEWPORT_STATE_POINTERS + * MEDIA_STATE_POINTERS + * + * Those are always reissued following SBA updates anyway (new + * batch time), except in the case of the program cache BO + * changing. Having a separate state flag makes the sequence more + * obvious. + */ + + brw->state.dirty.brw |= BRW_NEW_STATE_BASE_ADDRESS; } const struct brw_tracked_state brw_state_base_address = { .dirty = { .mesa = 0, - .brw = BRW_NEW_BATCH, + .brw = (BRW_NEW_BATCH | + BRW_NEW_PROGRAM_CACHE), .cache = 0, }, .emit = upload_state_base_address diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index c2227777cfb..fca30a74aaf 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -120,14 +120,11 @@ static void compile_sf_prog( struct brw_context *brw, printf("\n"); } - /* Upload - */ - drm_intel_bo_unreference(brw->sf.prog_bo); - brw->sf.prog_bo = brw_upload_cache(&brw->cache, BRW_SF_PROG, - &c.key, sizeof(c.key), - program, program_size, - &c.prog_data, sizeof(c.prog_data), - &brw->sf.prog_data); + brw_upload_cache(&brw->cache, BRW_SF_PROG, + &c.key, sizeof(c.key), + program, program_size, + &c.prog_data, sizeof(c.prog_data), + &brw->sf.prog_offset, &brw->sf.prog_data); ralloc_free(mem_ctx); } @@ -191,12 +188,11 @@ static void upload_sf_prog(struct brw_context *brw) key.frontface_ccw = (ctx->Polygon.FrontFace == GL_CCW) ^ (ctx->DrawBuffer->Name != 0); } - drm_intel_bo_unreference(brw->sf.prog_bo); - brw->sf.prog_bo = brw_search_cache(&brw->cache, BRW_SF_PROG, - &key, sizeof(key), - &brw->sf.prog_data); - if (brw->sf.prog_bo == NULL) + if (!brw_search_cache(&brw->cache, BRW_SF_PROG, + &key, sizeof(key), + &brw->sf.prog_offset, &brw->sf.prog_data)) { compile_sf_prog( brw, &key ); + } } diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 78b22c4df3d..9201be7caab 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -46,7 +46,8 @@ static void upload_sf_vp(struct brw_context *brw) const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); const GLfloat *v = ctx->Viewport._WindowMap.m; - sfv = brw_state_batch(brw, sizeof(*sfv), 32, &brw->sf.vp_offset); + sfv = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE, + sizeof(*sfv), 32, &brw->sf.vp_offset); memset(sfv, 0, sizeof(*sfv)); if (render_to_fbo) { @@ -129,13 +130,19 @@ static void upload_sf_unit( struct brw_context *brw ) int chipset_max_threads; bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; - sf = brw_state_batch(brw, sizeof(*sf), 64, &brw->sf.state_offset); + sf = brw_state_batch(brw, AUB_TRACE_SF_STATE, + sizeof(*sf), 64, &brw->sf.state_offset); memset(sf, 0, sizeof(*sf)); - /* CACHE_NEW_SF_PROG */ + /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_SF_PROG */ sf->thread0.grf_reg_count = ALIGN(brw->sf.prog_data->total_grf, 16) / 16 - 1; - sf->thread0.kernel_start_pointer = brw->sf.prog_bo->offset >> 6; /* reloc */ + sf->thread0.kernel_start_pointer = + brw_program_reloc(brw, + brw->sf.state_offset + + offsetof(struct brw_sf_unit_state, thread0), + brw->sf.prog_offset + + (sf->thread0.grf_reg_count << 1)) >> 6; sf->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; @@ -282,11 +289,6 @@ static void upload_sf_unit( struct brw_context *brw ) /* STATE_PREFETCH command description describes this state as being * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain. */ - /* Emit SF program relocation */ - drm_intel_bo_emit_reloc(bo, (brw->sf.state_offset + - offsetof(struct brw_sf_unit_state, thread0)), - brw->sf.prog_bo, sf->thread0.grf_reg_count << 1, - I915_GEM_DOMAIN_INSTRUCTION, 0); /* Emit SF viewport relocation */ drm_intel_bo_emit_reloc(bo, (brw->sf.state_offset + @@ -308,6 +310,7 @@ const struct brw_tracked_state brw_sf_unit = { _NEW_SCISSOR | _NEW_BUFFERS), .brw = (BRW_NEW_BATCH | + BRW_NEW_PROGRAM_CACHE | BRW_NEW_URB_FENCE), .cache = (CACHE_NEW_SF_VP | CACHE_NEW_SF_PROG) diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 544ef7d47e6..cede4e5c916 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -145,21 +145,21 @@ void brw_clear_validated_bos(struct brw_context *brw); * brw_state_cache.c */ -drm_intel_bo *brw_upload_cache(struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_sz, - const void *data, - GLuint data_sz, - const void *aux, - GLuint aux_sz, - void *aux_return); - -drm_intel_bo *brw_search_cache( struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_size, - void *aux_return); +void brw_upload_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_sz, + const void *data, + GLuint data_sz, + const void *aux, + GLuint aux_sz, + uint32_t *out_offset, void *out_aux); + +bool brw_search_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + uint32_t *inout_offset, void *out_aux); void brw_state_cache_check_size( struct brw_context *brw ); void brw_init_caches( struct brw_context *brw ); @@ -172,6 +172,7 @@ void brw_destroy_caches( struct brw_context *brw ); sizeof(*(s)), false) void *brw_state_batch(struct brw_context *brw, + enum state_struct_type type, int size, int alignment, uint32_t *out_offset); diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index 213c7a38d8c..5a983c3d847 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -32,6 +32,29 @@ #include "brw_state.h" #include "intel_batchbuffer.h" #include "main/imports.h" +#include "../glsl/ralloc.h" + +static void +brw_track_state_batch(struct brw_context *brw, + enum state_struct_type type, + uint32_t offset, + int size) +{ + struct intel_batchbuffer *batch = &brw->intel.batch; + + if (!brw->state_batch_list) { + /* Our structs are always aligned to at least 32 bytes, so + * our array doesn't need to be any larger + */ + brw->state_batch_list = ralloc_size(brw, sizeof(*brw->state_batch_list) * + batch->bo->size / 32); + } + + brw->state_batch_list[brw->state_batch_count].offset = offset; + brw->state_batch_list[brw->state_batch_count].size = size; + brw->state_batch_list[brw->state_batch_count].type = type; + brw->state_batch_count++; +} /** * Allocates a block of space in the batchbuffer for indirect state. @@ -49,6 +72,7 @@ */ void * brw_state_batch(struct brw_context *brw, + enum state_struct_type type, int size, int alignment, uint32_t *out_offset) @@ -71,6 +95,9 @@ brw_state_batch(struct brw_context *brw, batch->state_batch_offset = offset; + if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) + brw_track_state_batch(brw, type, offset, size); + *out_offset = offset; return batch->map + (offset>>2); } diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index f13a41fa7cc..3988625ea91 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -45,6 +45,7 @@ */ #include "main/imports.h" +#include "intel_batchbuffer.h" #include "brw_state.h" #define FILE_DEBUG_FLAG DEBUG_STATE @@ -67,23 +68,6 @@ hash_key(struct brw_cache_item *item) return hash; } - -/** - * Marks a new buffer as being chosen for the given cache id. - */ -static void -update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, - drm_intel_bo *bo) -{ - if (bo == cache->last_bo[cache_id]) - return; /* no change */ - - drm_intel_bo_unreference(cache->last_bo[cache_id]); - cache->last_bo[cache_id] = bo; - drm_intel_bo_reference(cache->last_bo[cache_id]); - cache->brw->state.dirty.cache |= 1 << cache_id; -} - static int brw_cache_item_equals(const struct brw_cache_item *a, const struct brw_cache_item *b) @@ -145,12 +129,13 @@ rehash(struct brw_cache *cache) /** * Returns the buffer object matching cache_id and key, or NULL. */ -drm_intel_bo * +bool brw_search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, const void *key, GLuint key_size, - void *aux_return) + uint32_t *inout_offset, void *out_aux) { + struct brw_context *brw = cache->brw; struct brw_cache_item *item; struct brw_cache_item lookup; GLuint hash; @@ -164,19 +149,116 @@ brw_search_cache(struct brw_cache *cache, item = search_cache(cache, hash, &lookup); if (item == NULL) - return NULL; + return false; - if (aux_return) - *(void **)aux_return = (void *)((char *)item->key + item->key_size); + *(void **)out_aux = ((char *)item->key + item->key_size); - update_cache_last(cache, cache_id, item->bo); + if (item->offset != *inout_offset) { + brw->state.dirty.cache |= (1 << cache_id); + *inout_offset = item->offset; + } - drm_intel_bo_reference(item->bo); - return item->bo; + return true; } +static void +brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size) +{ + struct brw_context *brw = cache->brw; + struct intel_context *intel = &brw->intel; + drm_intel_bo *new_bo; + + new_bo = drm_intel_bo_alloc(intel->bufmgr, "program cache", new_size, 64); + + /* Copy any existing data that needs to be saved. */ + if (cache->next_offset != 0) { + drm_intel_bo_map(cache->bo, false); + drm_intel_bo_subdata(new_bo, 0, cache->next_offset, cache->bo->virtual); + drm_intel_bo_unmap(cache->bo); + } + + drm_intel_bo_unreference(cache->bo); + cache->bo = new_bo; + cache->bo_used_by_gpu = false; + + /* Since we have a new BO in place, we need to signal the units + * that depend on it (state base address on gen5+, or unit state before). + */ + brw->state.dirty.brw |= BRW_NEW_PROGRAM_CACHE; +} + +/** + * Attempts to find an item in the cache with identical data and aux + * data to use + */ +static bool +brw_try_upload_using_copy(struct brw_cache *cache, + struct brw_cache_item *result_item, + const void *data, + const void *aux) +{ + int i; + struct brw_cache_item *item; + + for (i = 0; i < cache->size; i++) { + for (item = cache->items[i]; item; item = item->next) { + const void *item_aux = item->key + item->key_size; + int ret; + + if (item->cache_id != result_item->cache_id || + item->size != result_item->size || + item->aux_size != result_item->aux_size) { + continue; + } + + if (memcmp(item_aux, aux, item->aux_size) != 0) { + continue; + } + + drm_intel_bo_map(cache->bo, false); + ret = memcmp(cache->bo->virtual + item->offset, data, item->size); + drm_intel_bo_unmap(cache->bo); + if (ret) + continue; + + result_item->offset = item->offset; + + return true; + } + } + + return false; +} + +static void +brw_upload_item_data(struct brw_cache *cache, + struct brw_cache_item *item, + const void *data) +{ + /* Allocate space in the cache BO for our new program. */ + if (cache->next_offset + item->size > cache->bo->size) { + uint32_t new_size = cache->bo->size * 2; + + while (cache->next_offset + item->size > new_size) + new_size *= 2; + + brw_cache_new_bo(cache, new_size); + } + + /* If we would block on writing to an in-use program BO, just + * recreate it. + */ + if (cache->bo_used_by_gpu) { + brw_cache_new_bo(cache, cache->bo->size); + } + + item->offset = cache->next_offset; + + /* Programs are always 64-byte aligned, so set up the next one now */ + cache->next_offset = ALIGN(item->offset + item->size, 64); +} -drm_intel_bo * +void brw_upload_cache(struct brw_cache *cache, enum brw_cache_id cache_id, const void *key, @@ -185,23 +267,31 @@ brw_upload_cache(struct brw_cache *cache, GLuint data_size, const void *aux, GLuint aux_size, - void *aux_return) + uint32_t *out_offset, + void *out_aux) { struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); GLuint hash; void *tmp; - drm_intel_bo *bo; item->cache_id = cache_id; + item->size = data_size; item->key = key; item->key_size = key_size; + item->aux_size = aux_size; hash = hash_key(item); item->hash = hash; - /* Create the buffer object to contain the data */ - bo = drm_intel_bo_alloc(cache->brw->intel.bufmgr, - cache->name[cache_id], data_size, 1 << 6); - + /* If we can find a matching prog/prog_data combo in the cache + * already, then reuse the existing stuff. This will mean not + * flagging CACHE_NEW_* when transitioning between the two + * equivalent hash keys. This is notably useful for programs + * generating shaders at runtime, where multiple shaders may + * compile to the thing in our backend. + */ + if (!brw_try_upload_using_copy(cache, item, data, aux)) { + brw_upload_item_data(cache, item, data); + } /* Set up the memory containing the key and aux_data */ tmp = malloc(key_size + aux_size); @@ -211,9 +301,6 @@ brw_upload_cache(struct brw_cache *cache, item->key = tmp; - item->bo = bo; - drm_intel_bo_reference(bo); - if (cache->n_items > cache->size * 1.5) rehash(cache); @@ -222,34 +309,18 @@ brw_upload_cache(struct brw_cache *cache, cache->items[hash] = item; cache->n_items++; - if (aux_return) { - *(void **)aux_return = (void *)((char *)item->key + item->key_size); - } - - DBG("upload %s: %d bytes to cache id %d\n", - cache->name[cache_id], - data_size, cache_id); - /* Copy data to the buffer */ - drm_intel_bo_subdata(bo, 0, data_size, data); - - update_cache_last(cache, cache_id, bo); + drm_intel_bo_subdata(cache->bo, item->offset, data_size, data); - return bo; -} - -static void -brw_init_cache_id(struct brw_cache *cache, - const char *name, - enum brw_cache_id id) -{ - cache->name[id] = strdup(name); + *out_offset = item->offset; + *(void **)out_aux = (void *)((char *)item->key + item->key_size); + cache->brw->state.dirty.cache |= 1 << cache_id; } - void brw_init_caches(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; struct brw_cache *cache = &brw->cache; cache->brw = brw; @@ -259,36 +330,15 @@ brw_init_caches(struct brw_context *brw) cache->items = (struct brw_cache_item **) calloc(1, cache->size * sizeof(struct brw_cache_item)); - brw_init_cache_id(cache, "CC_VP", BRW_CC_VP); - brw_init_cache_id(cache, "CC_UNIT", BRW_CC_UNIT); - brw_init_cache_id(cache, "WM_PROG", BRW_WM_PROG); - brw_init_cache_id(cache, "SAMPLER", BRW_SAMPLER); - brw_init_cache_id(cache, "WM_UNIT", BRW_WM_UNIT); - brw_init_cache_id(cache, "SF_PROG", BRW_SF_PROG); - brw_init_cache_id(cache, "SF_VP", BRW_SF_VP); - - brw_init_cache_id(cache, "SF_UNIT", BRW_SF_UNIT); - - brw_init_cache_id(cache, "VS_UNIT", BRW_VS_UNIT); - - brw_init_cache_id(cache, "VS_PROG", BRW_VS_PROG); - - brw_init_cache_id(cache, "CLIP_UNIT", BRW_CLIP_UNIT); - - brw_init_cache_id(cache, "CLIP_PROG", BRW_CLIP_PROG); - brw_init_cache_id(cache, "CLIP_VP", BRW_CLIP_VP); - - brw_init_cache_id(cache, "GS_UNIT", BRW_GS_UNIT); - - brw_init_cache_id(cache, "GS_PROG", BRW_GS_PROG); - brw_init_cache_id(cache, "BLEND_STATE", BRW_BLEND_STATE); - brw_init_cache_id(cache, "COLOR_CALC_STATE", BRW_COLOR_CALC_STATE); - brw_init_cache_id(cache, "DEPTH_STENCIL_STATE", BRW_DEPTH_STENCIL_STATE); + cache->bo = drm_intel_bo_alloc(intel->bufmgr, + "program cache", + 4096, 64); } static void brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) { + struct intel_context *intel = &brw->intel; struct brw_cache_item *c, *next; GLuint i; @@ -297,7 +347,6 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) for (i = 0; i < cache->size; i++) { for (c = cache->items[i]; c; c = next) { next = c->next; - drm_intel_bo_unreference(c->bo); free((void *)c->key); free(c); } @@ -306,9 +355,18 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) cache->n_items = 0; + /* Start putting programs into the start of the BO again, since + * we'll never find the old results. + */ + cache->next_offset = 0; + + /* We need to make sure that the programs get regenerated, since + * any offsets leftover in brw_context will no longer be valid. + */ brw->state.dirty.mesa |= ~0; brw->state.dirty.brw |= ~0; brw->state.dirty.cache |= ~0; + intel_batchbuffer_flush(intel); } void @@ -325,15 +383,10 @@ brw_state_cache_check_size(struct brw_context *brw) static void brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) { - GLuint i; DBG("%s\n", __FUNCTION__); brw_clear_cache(brw, cache); - for (i = 0; i < BRW_MAX_CACHE; i++) { - drm_intel_bo_unreference(cache->last_bo[i]); - free(cache->name[i]); - } free(cache->items); cache->items = NULL; cache->size = 0; diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index ff06cb3a91e..b9e5cc1a534 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -31,44 +31,23 @@ #include "brw_context.h" #include "brw_defines.h" -/** - * Prints out a header, the contents, and the message associated with - * the hardware state data given. - * - * \param name Name of the state object - * \param data Pointer to the base of the state object - * \param hw_offset Hardware offset of the base of the state data. - * \param index Index of the DWORD being output. - */ static void -state_out(const char *name, void *data, uint32_t hw_offset, int index, - char *fmt, ...) -{ - va_list va; +batch_out(struct brw_context *brw, const char *name, uint32_t offset, + int index, char *fmt, ...) PRINTFLIKE(5, 6); - fprintf(stderr, "%8s: 0x%08x: 0x%08x: ", - name, hw_offset + index * 4, ((uint32_t *)data)[index]); - va_start(va, fmt); - vfprintf(stderr, fmt, va); - va_end(va); -} - -/** Generic, undecoded state buffer debug printout */ static void -state_struct_out(const char *name, drm_intel_bo *buffer, - unsigned int offset, unsigned int size) +batch_out(struct brw_context *brw, const char *name, uint32_t offset, + int index, char *fmt, ...) { - int i; - - if (buffer == NULL) - return; - - drm_intel_bo_map(buffer, GL_FALSE); - for (i = 0; i < size / 4; i++) { - state_out(name, buffer->virtual + offset, buffer->offset + offset, i, - "dword %d\n", i); - } - drm_intel_bo_unmap(buffer); + struct intel_context *intel = &brw->intel; + uint32_t *data = intel->batch.bo->virtual + offset; + va_list va; + + fprintf(stderr, "0x%08x: 0x%08x: %8s: ", + offset + index * 4, data[index], name); + va_start(va, fmt); + vfprintf(stderr, fmt, va); + va_end(va); } static const char * @@ -98,397 +77,537 @@ get_965_surface_format(unsigned int surface_format) } } -static void dump_wm_surface_state(struct brw_context *brw) +static void dump_vs_state(struct brw_context *brw, uint32_t offset) { - dri_bo *bo; - GLubyte *base; - int i; + struct intel_context *intel = &brw->intel; + const char *name = "VS_STATE"; + struct brw_vs_unit_state *vs = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "thread4: %d threads\n", + vs->thread4.max_threads + 1); + batch_out(brw, name, offset, 5, "vs5\n"); + batch_out(brw, name, offset, 6, "vs6\n"); +} + +static void dump_gs_state(struct brw_context *brw, uint32_t offset) +{ + struct intel_context *intel = &brw->intel; + const char *name = "GS_STATE"; + struct brw_gs_unit_state *gs = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "thread4: %d threads\n", + gs->thread4.max_threads + 1); + batch_out(brw, name, offset, 5, "vs5\n"); + batch_out(brw, name, offset, 6, "vs6\n"); +} - bo = brw->intel.batch.bo; - drm_intel_bo_map(bo, GL_FALSE); - base = bo->virtual; +static void dump_clip_state(struct brw_context *brw, uint32_t offset) +{ + struct intel_context *intel = &brw->intel; + const char *name = "CLIP_STATE"; + struct brw_clip_unit_state *clip = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "thread4: %d threads\n", + clip->thread4.max_threads + 1); + batch_out(brw, name, offset, 5, "clip5\n"); + batch_out(brw, name, offset, 6, "clip6\n"); + batch_out(brw, name, offset, 7, "vp xmin %f\n", clip->viewport_xmin); + batch_out(brw, name, offset, 8, "vp xmax %f\n", clip->viewport_xmax); + batch_out(brw, name, offset, 9, "vp ymin %f\n", clip->viewport_ymin); + batch_out(brw, name, offset, 10, "vp ymax %f\n", clip->viewport_ymax); +} - for (i = 0; i < brw->wm.nr_surfaces; i++) { - unsigned int surfoff; - uint32_t *surf; - char name[20]; +static void dump_sf_state(struct brw_context *brw, uint32_t offset) +{ + struct intel_context *intel = &brw->intel; + const char *name = "SF_STATE"; + struct brw_sf_unit_state *sf = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "thread4: %d threads\n", + sf->thread4.max_threads + 1); + batch_out(brw, name, offset, 5, "sf5: viewport offset\n"); + batch_out(brw, name, offset, 6, "sf6\n"); + batch_out(brw, name, offset, 7, "sf7\n"); +} - if (brw->wm.surf_offset[i] == 0) { - fprintf(stderr, "WM SURF%d: NULL\n", i); - continue; - } - surfoff = bo->offset + brw->wm.surf_offset[i]; - surf = (uint32_t *)(base + brw->wm.surf_offset[i]); - - sprintf(name, "WM SURF%d", i); - state_out(name, surf, surfoff, 0, "%s %s\n", - get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)), - get_965_surface_format(GET_FIELD(surf[0], BRW_SURFACE_FORMAT))); - state_out(name, surf, surfoff, 1, "offset\n"); - state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n", - GET_FIELD(surf[2], BRW_SURFACE_WIDTH) + 1, - GET_FIELD(surf[2], BRW_SURFACE_HEIGHT) + 1); - state_out(name, surf, surfoff, 3, "pitch %d, %s tiled\n", - GET_FIELD(surf[3], BRW_SURFACE_PITCH) + 1, - (surf[3] & BRW_SURFACE_TILED) ? - ((surf[3] & BRW_SURFACE_TILED_Y) ? "Y" : "X") : "not"); - state_out(name, surf, surfoff, 4, "mip base %d\n", - GET_FIELD(surf[4], BRW_SURFACE_MIN_LOD)); - state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n", - GET_FIELD(surf[5], BRW_SURFACE_X_OFFSET), - GET_FIELD(surf[5], BRW_SURFACE_Y_OFFSET)); - } - drm_intel_bo_unmap(bo); +static void dump_wm_state(struct brw_context *brw, uint32_t offset) +{ + struct intel_context *intel = &brw->intel; + const char *name = "WM_STATE"; + struct brw_wm_unit_state *wm = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "wm4\n"); + batch_out(brw, name, offset, 5, "wm5: %s%s%s%s%s%s, %d threads\n", + wm->wm5.enable_8_pix ? "8pix" : "", + wm->wm5.enable_16_pix ? "16pix" : "", + wm->wm5.program_uses_depth ? ", uses depth" : "", + wm->wm5.program_computes_depth ? ", computes depth" : "", + wm->wm5.program_uses_killpixel ? ", kills" : "", + wm->wm5.thread_dispatch_enable ? "" : ", no dispatch", + wm->wm5.max_threads + 1); + batch_out(brw, name, offset, 6, "depth offset constant %f\n", + wm->global_depth_offset_constant); + batch_out(brw, name, offset, 7, "depth offset scale %f\n", + wm->global_depth_offset_scale); + batch_out(brw, name, offset, 8, "wm8: kernel 1 (gen5+)\n"); + batch_out(brw, name, offset, 9, "wm9: kernel 2 (gen5+)\n"); + batch_out(brw, name, offset, 10, "wm10: kernel 3 (gen5+)\n"); } -static void dump_gen7_surface_state(struct brw_context *brw) +static void dump_surface_state(struct brw_context *brw, uint32_t offset) { - dri_bo *bo; - GLubyte *base; - int i; + const char *name = "SURF"; + uint32_t *surf = brw->intel.batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "%s %s\n", + get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)), + get_965_surface_format(GET_FIELD(surf[0], BRW_SURFACE_FORMAT))); + batch_out(brw, name, offset, 1, "offset\n"); + batch_out(brw, name, offset, 2, "%dx%d size, %d mips\n", + GET_FIELD(surf[2], BRW_SURFACE_WIDTH) + 1, + GET_FIELD(surf[2], BRW_SURFACE_HEIGHT) + 1, + GET_FIELD(surf[2], BRW_SURFACE_LOD)); + batch_out(brw, name, offset, 3, "pitch %d, %s tiled\n", + GET_FIELD(surf[3], BRW_SURFACE_PITCH) + 1, + (surf[3] & BRW_SURFACE_TILED) ? + ((surf[3] & BRW_SURFACE_TILED_Y) ? "Y" : "X") : "not"); + batch_out(brw, name, offset, 4, "mip base %d\n", + GET_FIELD(surf[4], BRW_SURFACE_MIN_LOD)); + batch_out(brw, name, offset, 5, "x,y offset: %d,%d\n", + GET_FIELD(surf[5], BRW_SURFACE_X_OFFSET), + GET_FIELD(surf[5], BRW_SURFACE_Y_OFFSET)); +} - bo = brw->intel.batch.bo; - drm_intel_bo_map(bo, GL_FALSE); - base = bo->virtual; +static void dump_gen7_surface_state(struct brw_context *brw, uint32_t offset) +{ + const char *name = "SURF"; + struct gen7_surface_state *surf = brw->intel.batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "%s %s\n", + get_965_surfacetype(surf->ss0.surface_type), + get_965_surface_format(surf->ss0.surface_format)); + batch_out(brw, name, offset, 1, "offset\n"); + batch_out(brw, name, offset, 2, "%dx%d size, %d mips\n", + surf->ss2.width + 1, surf->ss2.height + 1, surf->ss5.mip_count); + batch_out(brw, name, offset, 3, "pitch %d, %stiled\n", + surf->ss3.pitch + 1, surf->ss0.tiled_surface ? "" : "not "); + batch_out(brw, name, offset, 4, "mip base %d\n", + surf->ss5.min_lod); + batch_out(brw, name, offset, 5, "x,y offset: %d,%d\n", + surf->ss5.x_offset, surf->ss5.y_offset); +} - for (i = 0; i < brw->wm.nr_surfaces; i++) { - unsigned int surfoff; - struct gen7_surface_state *surf; - char name[20]; +static void +dump_sdc(struct brw_context *brw, uint32_t offset) +{ + const char *name = "SDC"; + struct intel_context *intel = &brw->intel; - if (brw->wm.surf_offset[i] == 0) { - fprintf(stderr, "WM SURF%d: NULL\n", i); - continue; - } - surfoff = bo->offset + brw->wm.surf_offset[i]; - surf = (struct gen7_surface_state *) (base + brw->wm.surf_offset[i]); - - sprintf(name, "WM SURF%d", i); - state_out(name, surf, surfoff, 0, "%s %s\n", - get_965_surfacetype(surf->ss0.surface_type), - get_965_surface_format(surf->ss0.surface_format)); - state_out(name, surf, surfoff, 1, "offset\n"); - state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n", - surf->ss2.width + 1, surf->ss2.height + 1, surf->ss5.mip_count); - state_out(name, surf, surfoff, 3, "pitch %d, %stiled\n", - surf->ss3.pitch + 1, surf->ss0.tiled_surface ? "" : "not "); - state_out(name, surf, surfoff, 4, "mip base %d\n", - surf->ss5.min_lod); - state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n", - surf->ss5.x_offset, surf->ss5.y_offset); + if (intel->gen >= 5 && intel->gen <= 6) { + struct gen5_sampler_default_color *sdc = (intel->batch.bo->virtual + + offset); + batch_out(brw, name, offset, 0, "unorm rgba\n"); + batch_out(brw, name, offset, 1, "r %f\n", sdc->f[0]); + batch_out(brw, name, offset, 2, "b %f\n", sdc->f[1]); + batch_out(brw, name, offset, 3, "g %f\n", sdc->f[2]); + batch_out(brw, name, offset, 4, "a %f\n", sdc->f[3]); + batch_out(brw, name, offset, 5, "half float rg\n"); + batch_out(brw, name, offset, 6, "half float ba\n"); + batch_out(brw, name, offset, 7, "u16 rg\n"); + batch_out(brw, name, offset, 8, "u16 ba\n"); + batch_out(brw, name, offset, 9, "s16 rg\n"); + batch_out(brw, name, offset, 10, "s16 ba\n"); + batch_out(brw, name, offset, 11, "s8 rgba\n"); + } else { + struct brw_sampler_default_color *sdc = (intel->batch.bo->virtual + + offset); + batch_out(brw, name, offset, 0, "r %f\n", sdc->color[0]); + batch_out(brw, name, offset, 1, "g %f\n", sdc->color[1]); + batch_out(brw, name, offset, 2, "b %f\n", sdc->color[2]); + batch_out(brw, name, offset, 3, "a %f\n", sdc->color[3]); } - drm_intel_bo_unmap(bo); } -static void dump_wm_sampler_state(struct brw_context *brw) +static void dump_sampler_state(struct brw_context *brw, + uint32_t offset, uint32_t size) { struct intel_context *intel = &brw->intel; - struct gl_context *ctx = &brw->intel.ctx; int i; + struct brw_sampler_state *samp = intel->batch.bo->virtual + offset; assert(intel->gen < 7); - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - unsigned int offset; - uint32_t sdc_offset; - struct brw_sampler_state *samp; + for (i = 0; i < size / sizeof(*samp); i++) { char name[20]; - if (!ctx->Texture.Unit[i]._ReallyEnabled) { - fprintf(stderr, "WM SAMP%d: disabled\n", i); - continue; - } - - offset = (intel->batch.bo->offset + - brw->wm.sampler_offset + - i * sizeof(struct brw_sampler_state)); - samp = (struct brw_sampler_state *)(intel->batch.bo->virtual + - brw->wm.sampler_offset + - i * sizeof(struct brw_sampler_state)); - sprintf(name, "WM SAMP%d", i); - state_out(name, samp, offset, 0, "filtering\n"); - state_out(name, samp, offset, 1, "wrapping, lod\n"); - state_out(name, samp, offset, 2, "default color pointer\n"); - state_out(name, samp, offset, 3, "chroma key, aniso\n"); - - sprintf(name, " WM SDC%d", i); - - sdc_offset = intel->batch.bo->offset + brw->wm.sdc_offset[i]; - if (intel->gen >= 5) { - struct gen5_sampler_default_color *sdc = (intel->batch.bo->virtual + - brw->wm.sdc_offset[i]); - state_out(name, sdc, sdc_offset, 0, "unorm rgba\n"); - state_out(name, sdc, sdc_offset, 1, "r %f\n", sdc->f[0]); - state_out(name, sdc, sdc_offset, 2, "b %f\n", sdc->f[1]); - state_out(name, sdc, sdc_offset, 3, "g %f\n", sdc->f[2]); - state_out(name, sdc, sdc_offset, 4, "a %f\n", sdc->f[3]); - state_out(name, sdc, sdc_offset, 5, "half float rg\n"); - state_out(name, sdc, sdc_offset, 6, "half float ba\n"); - state_out(name, sdc, sdc_offset, 7, "u16 rg\n"); - state_out(name, sdc, sdc_offset, 8, "u16 ba\n"); - state_out(name, sdc, sdc_offset, 9, "s16 rg\n"); - state_out(name, sdc, sdc_offset, 10, "s16 ba\n"); - state_out(name, sdc, sdc_offset, 11, "s8 rgba\n"); - } else { - struct brw_sampler_default_color *sdc = (intel->batch.bo->virtual + - brw->wm.sdc_offset[i]); - state_out(name, sdc, sdc_offset, 0, "r %f\n", sdc->color[0]); - state_out(name, sdc, sdc_offset, 1, "g %f\n", sdc->color[1]); - state_out(name, sdc, sdc_offset, 2, "b %f\n", sdc->color[2]); - state_out(name, sdc, sdc_offset, 3, "a %f\n", sdc->color[3]); - } + batch_out(brw, name, offset, 0, "filtering\n"); + batch_out(brw, name, offset, 1, "wrapping, lod\n"); + batch_out(brw, name, offset, 2, "default color pointer\n"); + batch_out(brw, name, offset, 3, "chroma key, aniso\n"); + + samp++; + offset += sizeof(*samp); } - drm_intel_bo_unmap(intel->batch.bo); } -static void dump_gen7_sampler_state(struct brw_context *brw) +static void dump_gen7_sampler_state(struct brw_context *brw, + uint32_t offset, uint32_t size) { struct intel_context *intel = &brw->intel; - struct gl_context *ctx = &brw->intel.ctx; + struct gen7_sampler_state *samp = intel->batch.bo->virtual + offset; int i; assert(intel->gen >= 7); - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - unsigned int offset; - uint32_t sdc_offset; - struct gen7_sampler_state *samp; + for (i = 0; i < size / sizeof(*samp); i++) { char name[20]; - if (!ctx->Texture.Unit[i]._ReallyEnabled) { - fprintf(stderr, "WM SAMP%d: disabled\n", i); - continue; - } - - offset = (intel->batch.bo->offset + - brw->wm.sampler_offset + - i * sizeof(struct gen7_sampler_state)); - samp = (struct gen7_sampler_state *) - (intel->batch.bo->virtual + brw->wm.sampler_offset + - i * sizeof(struct gen7_sampler_state)); - sprintf(name, "WM SAMP%d", i); - state_out(name, samp, offset, 0, "filtering\n"); - state_out(name, samp, offset, 1, "wrapping, lod\n"); - state_out(name, samp, offset, 2, "default color pointer\n"); - state_out(name, samp, offset, 3, "chroma key, aniso\n"); - - sprintf(name, " WM SDC%d", i); - - sdc_offset = intel->batch.bo->offset + brw->wm.sdc_offset[i]; - struct brw_sampler_default_color *sdc = - intel->batch.bo->virtual + brw->wm.sdc_offset[i]; - state_out(name, sdc, sdc_offset, 0, "r %f\n", sdc->color[0]); - state_out(name, sdc, sdc_offset, 1, "g %f\n", sdc->color[1]); - state_out(name, sdc, sdc_offset, 2, "b %f\n", sdc->color[2]); - state_out(name, sdc, sdc_offset, 3, "a %f\n", sdc->color[3]); + batch_out(brw, name, offset, 0, "filtering\n"); + batch_out(brw, name, offset, 1, "wrapping, lod\n"); + batch_out(brw, name, offset, 2, "default color pointer\n"); + batch_out(brw, name, offset, 3, "chroma key, aniso\n"); + + samp++; + offset += sizeof(*samp); } drm_intel_bo_unmap(intel->batch.bo); } -static void dump_sf_viewport_state(struct brw_context *brw) +static void dump_sf_viewport_state(struct brw_context *brw, + uint32_t offset) { struct intel_context *intel = &brw->intel; const char *name = "SF VP"; - struct brw_sf_viewport *vp; - uint32_t vp_off; + struct brw_sf_viewport *vp = intel->batch.bo->virtual + offset; assert(intel->gen < 7); - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - - vp = intel->batch.bo->virtual + brw->sf.vp_offset; - vp_off = intel->batch.bo->offset + brw->sf.vp_offset; - - state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00); - state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11); - state_out(name, vp, vp_off, 2, "m22 = %f\n", vp->viewport.m22); - state_out(name, vp, vp_off, 3, "m30 = %f\n", vp->viewport.m30); - state_out(name, vp, vp_off, 4, "m31 = %f\n", vp->viewport.m31); - state_out(name, vp, vp_off, 5, "m32 = %f\n", vp->viewport.m32); + batch_out(brw, name, offset, 0, "m00 = %f\n", vp->viewport.m00); + batch_out(brw, name, offset, 1, "m11 = %f\n", vp->viewport.m11); + batch_out(brw, name, offset, 2, "m22 = %f\n", vp->viewport.m22); + batch_out(brw, name, offset, 3, "m30 = %f\n", vp->viewport.m30); + batch_out(brw, name, offset, 4, "m31 = %f\n", vp->viewport.m31); + batch_out(brw, name, offset, 5, "m32 = %f\n", vp->viewport.m32); - state_out(name, vp, vp_off, 6, "top left = %d,%d\n", + batch_out(brw, name, offset, 6, "top left = %d,%d\n", vp->scissor.xmin, vp->scissor.ymin); - state_out(name, vp, vp_off, 7, "bottom right = %d,%d\n", + batch_out(brw, name, offset, 7, "bottom right = %d,%d\n", vp->scissor.xmax, vp->scissor.ymax); - - drm_intel_bo_unmap(intel->batch.bo); } -static void dump_clip_viewport_state(struct brw_context *brw) +static void dump_clip_viewport_state(struct brw_context *brw, + uint32_t offset) { struct intel_context *intel = &brw->intel; const char *name = "CLIP VP"; - struct brw_clipper_viewport *vp; - uint32_t vp_off; + struct brw_clipper_viewport *vp = intel->batch.bo->virtual + offset; assert(intel->gen < 7); - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - - vp = intel->batch.bo->virtual + brw->clip.vp_offset; - vp_off = intel->batch.bo->offset + brw->clip.vp_offset; - - state_out(name, vp, vp_off, 0, "xmin = %f\n", vp->xmin); - state_out(name, vp, vp_off, 1, "xmax = %f\n", vp->xmax); - state_out(name, vp, vp_off, 2, "ymin = %f\n", vp->ymin); - state_out(name, vp, vp_off, 3, "ymax = %f\n", vp->ymax); - drm_intel_bo_unmap(intel->batch.bo); + batch_out(brw, name, offset, 0, "xmin = %f\n", vp->xmin); + batch_out(brw, name, offset, 1, "xmax = %f\n", vp->xmax); + batch_out(brw, name, offset, 2, "ymin = %f\n", vp->ymin); + batch_out(brw, name, offset, 3, "ymax = %f\n", vp->ymax); } -static void dump_sf_clip_viewport_state(struct brw_context *brw) +static void dump_sf_clip_viewport_state(struct brw_context *brw, + uint32_t offset) { struct intel_context *intel = &brw->intel; const char *name = "SF_CLIP VP"; - struct gen7_sf_clip_viewport *vp; - uint32_t vp_off; + struct gen7_sf_clip_viewport *vp = intel->batch.bo->virtual + offset; assert(intel->gen >= 7); - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - - vp = intel->batch.bo->virtual + brw->sf.vp_offset; - vp_off = intel->batch.bo->offset + brw->sf.vp_offset; - - state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00); - state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11); - state_out(name, vp, vp_off, 2, "m22 = %f\n", vp->viewport.m22); - state_out(name, vp, vp_off, 3, "m30 = %f\n", vp->viewport.m30); - state_out(name, vp, vp_off, 4, "m31 = %f\n", vp->viewport.m31); - state_out(name, vp, vp_off, 5, "m32 = %f\n", vp->viewport.m32); - state_out(name, vp, vp_off, 6, "guardband xmin = %f\n", vp->guardband.xmin); - state_out(name, vp, vp_off, 7, "guardband xmax = %f\n", vp->guardband.xmax); - state_out(name, vp, vp_off, 8, "guardband ymin = %f\n", vp->guardband.ymin); - state_out(name, vp, vp_off, 9, "guardband ymax = %f\n", vp->guardband.ymax); - drm_intel_bo_unmap(intel->batch.bo); + batch_out(brw, name, offset, 0, "m00 = %f\n", vp->viewport.m00); + batch_out(brw, name, offset, 1, "m11 = %f\n", vp->viewport.m11); + batch_out(brw, name, offset, 2, "m22 = %f\n", vp->viewport.m22); + batch_out(brw, name, offset, 3, "m30 = %f\n", vp->viewport.m30); + batch_out(brw, name, offset, 4, "m31 = %f\n", vp->viewport.m31); + batch_out(brw, name, offset, 5, "m32 = %f\n", vp->viewport.m32); + batch_out(brw, name, offset, 6, "guardband xmin = %f\n", vp->guardband.xmin); + batch_out(brw, name, offset, 7, "guardband xmax = %f\n", vp->guardband.xmax); + batch_out(brw, name, offset, 8, "guardband ymin = %f\n", vp->guardband.ymin); + batch_out(brw, name, offset, 9, "guardband ymax = %f\n", vp->guardband.ymax); } -static void dump_cc_viewport_state(struct brw_context *brw) +static void dump_cc_viewport_state(struct brw_context *brw, uint32_t offset) { - struct intel_context *intel = &brw->intel; const char *name = "CC VP"; - struct brw_cc_viewport *vp; - uint32_t vp_off; - - drm_intel_bo_map(intel->batch.bo, GL_FALSE); + struct brw_cc_viewport *vp = brw->intel.batch.bo->virtual + offset; - vp = intel->batch.bo->virtual + brw->cc.vp_offset; - vp_off = intel->batch.bo->offset + brw->cc.vp_offset; + batch_out(brw, name, offset, 0, "min_depth = %f\n", vp->min_depth); + batch_out(brw, name, offset, 1, "max_depth = %f\n", vp->max_depth); +} - state_out(name, vp, vp_off, 0, "min_depth = %f\n", vp->min_depth); - state_out(name, vp, vp_off, 1, "max_depth = %f\n", vp->max_depth); - drm_intel_bo_unmap(intel->batch.bo); +static void dump_depth_stencil_state(struct brw_context *brw, uint32_t offset) +{ + const char *name = "D_S"; + struct gen6_depth_stencil_state *ds = brw->intel.batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, + "stencil %sable, func %d, write %sable\n", + ds->ds0.stencil_enable ? "en" : "dis", + ds->ds0.stencil_func, + ds->ds0.stencil_write_enable ? "en" : "dis"); + batch_out(brw, name, offset, 1, + "stencil test mask 0x%x, write mask 0x%x\n", + ds->ds1.stencil_test_mask, ds->ds1.stencil_write_mask); + batch_out(brw, name, offset, 2, + "depth test %sable, func %d, write %sable\n", + ds->ds2.depth_test_enable ? "en" : "dis", + ds->ds2.depth_test_func, + ds->ds2.depth_write_enable ? "en" : "dis"); } -static void dump_depth_stencil_state(struct brw_context *brw) +static void dump_cc_state_gen4(struct brw_context *brw, uint32_t offset) { - struct intel_context *intel = &brw->intel; - const char *name = "DEPTH STENCIL"; - struct gen6_depth_stencil_state *ds; - uint32_t ds_off; - - drm_intel_bo_map(intel->batch.bo, GL_FALSE); - - ds = intel->batch.bo->virtual + brw->cc.depth_stencil_state_offset; - ds_off = intel->batch.bo->offset + brw->cc.depth_stencil_state_offset; - - state_out(name, ds, ds_off, 0, "stencil %sable, func %d, write %sable\n", - ds->ds0.stencil_enable ? "en" : "dis", - ds->ds0.stencil_func, - ds->ds0.stencil_write_enable ? "en" : "dis"); - state_out(name, ds, ds_off, 1, "stencil test mask 0x%x, write mask 0x%x\n", - ds->ds1.stencil_test_mask, ds->ds1.stencil_write_mask); - state_out(name, ds, ds_off, 2, "depth test %sable, func %d, write %sable\n", - ds->ds2.depth_test_enable ? "en" : "dis", - ds->ds2.depth_test_func, - ds->ds2.depth_write_enable ? "en" : "dis"); - drm_intel_bo_unmap(intel->batch.bo); + const char *name = "CC"; + + batch_out(brw, name, offset, 0, "cc0\n"); + batch_out(brw, name, offset, 1, "cc1\n"); + batch_out(brw, name, offset, 2, "cc2\n"); + batch_out(brw, name, offset, 3, "cc3\n"); + batch_out(brw, name, offset, 4, "cc4: viewport offset\n"); + batch_out(brw, name, offset, 5, "cc5\n"); + batch_out(brw, name, offset, 6, "cc6\n"); + batch_out(brw, name, offset, 7, "cc7\n"); } -static void dump_cc_state(struct brw_context *brw) +static void dump_cc_state_gen6(struct brw_context *brw, uint32_t offset) { const char *name = "CC"; - struct gen6_color_calc_state *cc; - uint32_t cc_off; - dri_bo *bo = brw->intel.batch.bo; - - if (brw->cc.state_offset == 0) - return; - - drm_intel_bo_map(bo, GL_FALSE); - cc = bo->virtual + brw->cc.state_offset; - cc_off = bo->offset + brw->cc.state_offset; - - state_out(name, cc, cc_off, 0, "alpha test format %s, round disable %d, stencil ref %d," - "bf stencil ref %d\n", - cc->cc0.alpha_test_format ? "FLOAT32" : "UNORM8", - cc->cc0.round_disable, - cc->cc0.stencil_ref, - cc->cc0.bf_stencil_ref); - state_out(name, cc, cc_off, 1, "\n"); - state_out(name, cc, cc_off, 2, "constant red %f\n", cc->constant_r); - state_out(name, cc, cc_off, 3, "constant green %f\n", cc->constant_g); - state_out(name, cc, cc_off, 4, "constant blue %f\n", cc->constant_b); - state_out(name, cc, cc_off, 5, "constant alpha %f\n", cc->constant_a); - - drm_intel_bo_unmap(bo); + struct gen6_color_calc_state *cc = brw->intel.batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, + "alpha test format %s, round disable %d, stencil ref %d, " + "bf stencil ref %d\n", + cc->cc0.alpha_test_format ? "FLOAT32" : "UNORM8", + cc->cc0.round_disable, + cc->cc0.stencil_ref, + cc->cc0.bf_stencil_ref); + batch_out(brw, name, offset, 1, "\n"); + batch_out(brw, name, offset, 2, "constant red %f\n", cc->constant_r); + batch_out(brw, name, offset, 3, "constant green %f\n", cc->constant_g); + batch_out(brw, name, offset, 4, "constant blue %f\n", cc->constant_b); + batch_out(brw, name, offset, 5, "constant alpha %f\n", cc->constant_a); +} +static void dump_blend_state(struct brw_context *brw, uint32_t offset) +{ + const char *name = "BLEND"; + + batch_out(brw, name, offset, 0, "\n"); + batch_out(brw, name, offset, 1, "\n"); } -static void dump_blend_state(struct brw_context *brw) +static void +dump_scissor(struct brw_context *brw, uint32_t offset) { + const char *name = "SCISSOR"; struct intel_context *intel = &brw->intel; - const char *name = "BLEND"; - struct gen6_blend_state *blend; - uint32_t blend_off; + struct gen6_scissor_rect *scissor = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "xmin %d, ymin %d\n", + scissor->xmin, scissor->ymin); + batch_out(brw, name, offset, 1, "xmax %d, ymax %d\n", + scissor->xmax, scissor->ymax); +} - drm_intel_bo_map(intel->batch.bo, GL_FALSE); +static void +dump_vs_constants(struct brw_context *brw, uint32_t offset, uint32_t size) +{ + const char *name = "VS_CONST"; + struct intel_context *intel = &brw->intel; + uint32_t *as_uint = intel->batch.bo->virtual + offset; + float *as_float = intel->batch.bo->virtual + offset; + int i; - blend = intel->batch.bo->virtual + brw->cc.blend_state_offset; - blend_off = intel->batch.bo->offset + brw->cc.blend_state_offset; + for (i = 0; i < size / 4; i += 4) { + batch_out(brw, name, offset, i, "%3d: (% f % f % f % f) (0x%08x 0x%08x 0x%08x 0x%08x)\n", + i / 4, + as_float[i], as_float[i + 1], as_float[i + 2], as_float[i + 3], + as_uint[i], as_uint[i + 1], as_uint[i + 2], as_uint[i + 3]); + } +} - state_out(name, blend, blend_off, 0, "\n"); - state_out(name, blend, blend_off, 1, "\n"); +static void dump_binding_table(struct brw_context *brw, uint32_t offset, + uint32_t size) +{ + char name[20]; + int i; + uint32_t *data = brw->intel.batch.bo->virtual + offset; - drm_intel_bo_unmap(intel->batch.bo); + for (i = 0; i < size / 4; i++) { + if (data[i] == 0) + continue; + sprintf(name, "BIND%d", i); + batch_out(brw, name, offset, i, "surface state address\n"); + } } -static void brw_debug_prog(const char *name, drm_intel_bo *prog) +static void +dump_prog_cache(struct brw_context *brw) { - unsigned int i; + struct intel_context *intel = &brw->intel; + struct brw_cache *cache = &brw->cache; + unsigned int b, i; uint32_t *data; - if (prog == NULL) - return; + drm_intel_bo_map(brw->cache.bo, false); + + for (b = 0; b < cache->size; b++) { + struct brw_cache_item *item; + + for (item = cache->items[b]; item; item = item->next) { + const char *name; + uint32_t offset = item->offset; + + data = brw->cache.bo->virtual + item->offset; + + switch (item->cache_id) { + case BRW_VS_PROG: + name = "VS kernel"; + break; + case BRW_GS_PROG: + name = "GS kernel"; + break; + case BRW_CLIP_PROG: + name = "CLIP kernel"; + break; + case BRW_SF_PROG: + name = "SF kernel"; + break; + case BRW_WM_PROG: + name = "WM kernel"; + break; + default: + name = "unknown"; + break; + } + + for (i = 0; i < item->size / 4 / 4; i++) { + fprintf(stderr, "0x%08x: %8s: 0x%08x 0x%08x 0x%08x 0x%08x ", + offset + i * 4 * 4, + name, + data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]); + + brw_disasm(stderr, (void *)(data + i * 4), intel->gen); + } + } + } + + drm_intel_bo_unmap(brw->cache.bo); +} - drm_intel_bo_map(prog, GL_FALSE); +static void +dump_state_batch(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + int i; - data = prog->virtual; + for (i = 0; i < brw->state_batch_count; i++) { + uint32_t offset = brw->state_batch_list[i].offset; + uint32_t size = brw->state_batch_list[i].size; - for (i = 0; i < prog->size / 4 / 4; i++) { - fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", - name, (unsigned int)prog->offset + i * 4 * 4, - data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]); - /* Stop at the end of the program. It'd be nice to keep track of the actual - * intended program size instead of guessing like this. - */ - if (data[i * 4 + 0] == 0 && - data[i * 4 + 1] == 0 && - data[i * 4 + 2] == 0 && - data[i * 4 + 3] == 0) + switch (brw->state_batch_list[i].type) { + case AUB_TRACE_VS_STATE: + dump_vs_state(brw, offset); + break; + case AUB_TRACE_GS_STATE: + dump_gs_state(brw, offset); + break; + case AUB_TRACE_CLIP_STATE: + dump_clip_state(brw, offset); + break; + case AUB_TRACE_SF_STATE: + dump_sf_state(brw, offset); + break; + case AUB_TRACE_WM_STATE: + dump_wm_state(brw, offset); + break; + case AUB_TRACE_CLIP_VP_STATE: + dump_clip_viewport_state(brw, offset); + break; + case AUB_TRACE_SF_VP_STATE: + if (intel->gen >= 7) { + dump_sf_clip_viewport_state(brw, offset); + } else { + dump_sf_viewport_state(brw, offset); + } + break; + case AUB_TRACE_CC_VP_STATE: + dump_cc_viewport_state(brw, offset); + break; + case AUB_TRACE_DEPTH_STENCIL_STATE: + dump_depth_stencil_state(brw, offset); + break; + case AUB_TRACE_CC_STATE: + if (intel->gen >= 6) + dump_cc_state_gen6(brw, offset); + else + dump_cc_state_gen4(brw, offset); + break; + case AUB_TRACE_BLEND_STATE: + dump_blend_state(brw, offset); + break; + case AUB_TRACE_BINDING_TABLE: + dump_binding_table(brw, offset, size); break; + case AUB_TRACE_SURFACE_STATE: + if (intel->gen < 7) { + dump_surface_state(brw, offset); + } else { + dump_gen7_surface_state(brw, offset); + } + break; + case AUB_TRACE_SAMPLER_STATE: + if (intel->gen < 7) { + dump_sampler_state(brw, offset, size); + } else { + dump_gen7_sampler_state(brw, offset, size); + } + break; + case AUB_TRACE_SAMPLER_DEFAULT_COLOR: + dump_sdc(brw, offset); + break; + case AUB_TRACE_SCISSOR_STATE: + dump_scissor(brw, offset); + break; + case AUB_TRACE_VS_CONSTANTS: + dump_vs_constants(brw, offset, size); + break; + default: + break; + } } - - drm_intel_bo_unmap(prog); } - /** * Print additional debug information associated with the batchbuffer * when DEBUG_BATCH is set. @@ -503,49 +622,10 @@ void brw_debug_batch(struct intel_context *intel) { struct brw_context *brw = brw_context(&intel->ctx); - state_struct_out("WM bind", - brw->intel.batch.bo, - brw->wm.bind_bo_offset, - 4 * brw->wm.nr_surfaces); - if (intel->gen < 7) { - dump_wm_surface_state(brw); - dump_wm_sampler_state(brw); - } else { - dump_gen7_surface_state(brw); - dump_gen7_sampler_state(brw); - } - - if (intel->gen < 6) - state_struct_out("VS", intel->batch.bo, brw->vs.state_offset, - sizeof(struct brw_vs_unit_state)); - brw_debug_prog("VS prog", brw->vs.prog_bo); - - if (intel->gen < 6) - state_struct_out("GS", intel->batch.bo, brw->gs.state_offset, - sizeof(struct brw_gs_unit_state)); - brw_debug_prog("GS prog", brw->gs.prog_bo); + drm_intel_bo_map(intel->batch.bo, false); + dump_state_batch(brw); + drm_intel_bo_unmap(intel->batch.bo); - if (intel->gen < 6) { - state_struct_out("SF", intel->batch.bo, brw->sf.state_offset, - sizeof(struct brw_sf_unit_state)); - brw_debug_prog("SF prog", brw->sf.prog_bo); - } - if (intel->gen >= 7) - dump_sf_clip_viewport_state(brw); - else - dump_sf_viewport_state(brw); - if (intel->gen == 6) - dump_clip_viewport_state(brw); - - if (intel->gen < 6) - state_struct_out("WM", intel->batch.bo, brw->wm.state_offset, - sizeof(struct brw_wm_unit_state)); - brw_debug_prog("WM prog", brw->wm.prog_bo); - - if (intel->gen >= 6) { - dump_cc_viewport_state(brw); - dump_depth_stencil_state(brw); - dump_cc_state(brw); - dump_blend_state(brw); - } + if (0) + dump_prog_cache(brw); } diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 6a4c112dcf5..76ffa0daefe 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -47,11 +47,11 @@ static const struct brw_tracked_state *gen4_atoms[] = &brw_check_fallback, &brw_wm_input_sizes, - &brw_vs_prog, - &brw_gs_prog, - &brw_clip_prog, - &brw_sf_prog, - &brw_wm_prog, + &brw_vs_prog, /* must do before GS prog, state base address. */ + &brw_gs_prog, /* must do before state base address */ + &brw_clip_prog, /* must do before state base address */ + &brw_sf_prog, /* must do before state base address */ + &brw_wm_prog, /* must do before state base address */ /* Once all the programs are done, we know how large urb entry * sizes need to be and can decide if we need to change the urb @@ -110,9 +110,9 @@ static const struct brw_tracked_state *gen6_atoms[] = &brw_check_fallback, &brw_wm_input_sizes, - &brw_vs_prog, - &brw_gs_prog, - &brw_wm_prog, + &brw_vs_prog, /* must do before state base address */ + &brw_gs_prog, /* must do before state base address */ + &brw_wm_prog, /* must do before state base address */ &gen6_clip_vp, &gen6_sf_vp, @@ -365,6 +365,7 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_PRIMITIVE), DEFINE_BIT(BRW_NEW_CONTEXT), DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS), + DEFINE_BIT(BRW_NEW_PROGRAM_CACHE), DEFINE_BIT(BRW_NEW_PSP), DEFINE_BIT(BRW_NEW_WM_SURFACES), DEFINE_BIT(BRW_NEW_INDICES), @@ -378,6 +379,7 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_VS_BINDING_TABLE), DEFINE_BIT(BRW_NEW_GS_BINDING_TABLE), DEFINE_BIT(BRW_NEW_PS_BINDING_TABLE), + DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS), {0, 0, 0} }; diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 7b9cdba4cbf..e1947d5ea64 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -40,46 +40,6 @@ /** Number of message register file registers */ #define BRW_MAX_MRF 16 - -/* Command packets: - */ -struct header -{ - GLuint length:16; - GLuint opcode:16; -}; - - -union header_union -{ - struct header bits; - GLuint dword; -}; - -struct brw_3d_control -{ - struct - { - GLuint length:8; - GLuint notify_enable:1; - GLuint pad:3; - GLuint wc_flush_enable:1; - GLuint depth_stall_enable:1; - GLuint operation:2; - GLuint opcode:16; - } header; - - struct - { - GLuint pad:2; - GLuint dest_addr_type:1; - GLuint dest_addr:29; - } dest; - - GLuint dword2; - GLuint dword3; -}; - /* These seem to be passed around as function args, so it works out * better to keep them as #defines: */ @@ -88,314 +48,6 @@ struct brw_3d_control #define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4 #define BRW_FLUSH_SNAPSHOT_COUNTERS 0x8 -struct brw_mi_flush -{ - GLuint flags:4; - GLuint pad:12; - GLuint opcode:16; -}; - -struct brw_vf_statistics -{ - GLuint statistics_enable:1; - GLuint pad:15; - GLuint opcode:16; -}; - - - -struct brw_binding_table_pointers -{ - struct header header; - GLuint vs; - GLuint gs; - GLuint clp; - GLuint sf; - GLuint wm; -}; - - -struct brw_blend_constant_color -{ - struct header header; - GLfloat blend_constant_color[4]; -}; - - -struct brw_depthbuffer -{ - union header_union header; - - union { - struct { - GLuint pitch:18; - GLuint format:3; - GLuint pad:2; - GLuint software_tiled_rendering_mode:2; - GLuint depth_offset_disable:1; - GLuint tile_walk:1; - GLuint tiled_surface:1; - GLuint pad2:1; - GLuint surface_type:3; - } bits; - GLuint dword; - } dword1; - - GLuint dword2_base_addr; - - union { - struct { - GLuint pad:1; - GLuint mipmap_layout:1; - GLuint lod:4; - GLuint width:13; - GLuint height:13; - } bits; - GLuint dword; - } dword3; - - union { - struct { - GLuint pad:10; - GLuint min_array_element:11; - GLuint depth:11; - } bits; - GLuint dword; - } dword4; -}; - -struct brw_depthbuffer_g4x -{ - union header_union header; - - union { - struct { - GLuint pitch:18; - GLuint format:3; - GLuint pad:2; - GLuint software_tiled_rendering_mode:2; - GLuint depth_offset_disable:1; - GLuint tile_walk:1; - GLuint tiled_surface:1; - GLuint pad2:1; - GLuint surface_type:3; - } bits; - GLuint dword; - } dword1; - - GLuint dword2_base_addr; - - union { - struct { - GLuint pad:1; - GLuint mipmap_layout:1; - GLuint lod:4; - GLuint width:13; - GLuint height:13; - } bits; - GLuint dword; - } dword3; - - union { - struct { - GLuint pad:10; - GLuint min_array_element:11; - GLuint depth:11; - } bits; - GLuint dword; - } dword4; - - union { - struct { - GLuint xoffset:16; - GLuint yoffset:16; - } bits; - GLuint dword; - } dword5; /* NEW in Integrated Graphics Device */ -}; - -struct brw_drawrect -{ - struct header header; - GLuint xmin:16; - GLuint ymin:16; - GLuint xmax:16; - GLuint ymax:16; - GLuint xorg:16; - GLuint yorg:16; -}; - - - - -struct brw_global_depth_offset_clamp -{ - struct header header; - GLfloat depth_offset_clamp; -}; - -struct brw_indexbuffer -{ - union { - struct - { - GLuint length:8; - GLuint index_format:2; - GLuint cut_index_enable:1; - GLuint pad:5; - GLuint opcode:16; - } bits; - GLuint dword; - - } header; - - GLuint buffer_start; - GLuint buffer_end; -}; - -/* NEW in Integrated Graphics Device */ -struct brw_aa_line_parameters -{ - struct header header; - - struct { - GLuint aa_coverage_slope:8; - GLuint pad0:8; - GLuint aa_coverage_bias:8; - GLuint pad1:8; - } bits0; - - struct { - GLuint aa_coverage_endcap_slope:8; - GLuint pad0:8; - GLuint aa_coverage_endcap_bias:8; - GLuint pad1:8; - } bits1; -}; - -struct brw_line_stipple -{ - struct header header; - - struct - { - GLuint pattern:16; - GLuint pad:16; - } bits0; - - struct - { - GLuint repeat_count:9; - GLuint pad:7; - GLuint inverse_repeat_count:16; - } bits1; -}; - - -struct brw_pipelined_state_pointers -{ - struct header header; - - struct { - GLuint pad:5; - GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ - } vs; - - struct - { - GLuint enable:1; - GLuint pad:4; - GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ - } gs; - - struct - { - GLuint enable:1; - GLuint pad:4; - GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ - } clp; - - struct - { - GLuint pad:5; - GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ - } sf; - - struct - { - GLuint pad:5; - GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ - } wm; - - struct - { - GLuint pad:5; - GLuint offset:27; /* Offset from GENERAL_STATE_BASE. KW: check me! */ - } cc; -}; - - -struct brw_polygon_stipple_offset -{ - struct header header; - - struct { - GLuint y_offset:5; - GLuint pad:3; - GLuint x_offset:5; - GLuint pad0:19; - } bits0; -}; - - - -struct brw_polygon_stipple -{ - struct header header; - GLuint stipple[32]; -}; - - - -struct brw_pipeline_select -{ - struct - { - GLuint pipeline_select:1; - GLuint pad:15; - GLuint opcode:16; - } header; -}; - - -struct brw_pipe_control -{ - struct - { - GLuint length:8; - GLuint notify_enable:1; - GLuint texture_cache_flush_enable:1; - GLuint indirect_state_pointers_disable:1; - GLuint instruction_state_cache_flush_enable:1; - GLuint write_cache_flush_enable:1; - GLuint depth_stall_enable:1; - GLuint post_sync_operation:2; - - GLuint opcode:16; - } header; - - struct - { - GLuint pad:2; - GLuint dest_addr_type:1; - GLuint dest_addr:29; - } bits1; - - GLuint data0; - GLuint data1; -}; - - struct brw_urb_fence { struct @@ -428,102 +80,6 @@ struct brw_urb_fence } bits1; }; -struct brw_cs_urb_state -{ - struct header header; - - struct - { - GLuint nr_urb_entries:3; - GLuint pad:1; - GLuint urb_entry_size:5; - GLuint pad0:23; - } bits0; -}; - -struct brw_constant_buffer -{ - struct - { - GLuint length:8; - GLuint valid:1; - GLuint pad:7; - GLuint opcode:16; - } header; - - struct - { - GLuint buffer_length:6; - GLuint buffer_address:26; - } bits0; -}; - -struct brw_state_base_address -{ - struct header header; - - struct - { - GLuint modify_enable:1; - GLuint pad:4; - GLuint general_state_address:27; - } bits0; - - struct - { - GLuint modify_enable:1; - GLuint pad:4; - GLuint surface_state_address:27; - } bits1; - - struct - { - GLuint modify_enable:1; - GLuint pad:4; - GLuint indirect_object_state_address:27; - } bits2; - - struct - { - GLuint modify_enable:1; - GLuint pad:11; - GLuint general_state_upper_bound:20; - } bits3; - - struct - { - GLuint modify_enable:1; - GLuint pad:11; - GLuint indirect_object_state_upper_bound:20; - } bits4; -}; - -struct brw_state_prefetch -{ - struct header header; - - struct - { - GLuint prefetch_count:3; - GLuint pad:3; - GLuint prefetch_pointer:26; - } bits0; -}; - -struct brw_system_instruction_pointer -{ - struct header header; - - struct - { - GLuint pad:4; - GLuint system_instruction_pointer:28; - } bits0; -}; - - - - /* State structs for the various fixed function units: */ @@ -1327,12 +883,6 @@ struct brw_vertex_element_state #define BRW_VEP_MAX 18 -struct brw_vertex_element_packet { - struct header header; - struct brw_vertex_element_state ve[BRW_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */ -}; - - struct brw_urb_immediate { GLuint opcode:4; GLuint offset:6; diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index f462f32b19a..46a417a08ed 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -60,7 +60,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel, * given in Volume 1 of the BSpec. */ h0 = ALIGN(mt->height0, align_h); - h1 = ALIGN(minify(h0), align_h); + h1 = ALIGN(minify(mt->height0), align_h); qpitch = (h0 + h1 + (intel->gen >= 7 ? 12 : 11) * align_h); if (mt->compressed) qpitch /= 4; diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 80d5e78ed0b..a9ad5311fe3 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -105,12 +105,11 @@ static void do_vs_prog( struct brw_context *brw, /* constant_map */ aux_size += c.vp->program.Base.Parameters->NumParameters; - drm_intel_bo_unreference(brw->vs.prog_bo); - brw->vs.prog_bo = brw_upload_cache(&brw->cache, BRW_VS_PROG, - &c.key, sizeof(c.key), - program, program_size, - &c.prog_data, aux_size, - &brw->vs.prog_data); + brw_upload_cache(&brw->cache, BRW_VS_PROG, + &c.key, sizeof(c.key), + program, program_size, + &c.prog_data, aux_size, + &brw->vs.prog_offset, &brw->vs.prog_data); ralloc_free(mem_ctx); } @@ -153,14 +152,11 @@ static void brw_upload_vs_prog(struct brw_context *brw) } } - /* Make an early check for the key. - */ - drm_intel_bo_unreference(brw->vs.prog_bo); - brw->vs.prog_bo = brw_search_cache(&brw->cache, BRW_VS_PROG, - &key, sizeof(key), - &brw->vs.prog_data); - if (brw->vs.prog_bo == NULL) + if (!brw_search_cache(&brw->cache, BRW_VS_PROG, + &key, sizeof(key), + &brw->vs.prog_offset, &brw->vs.prog_data)) { do_vs_prog(brw, vp, &key); + } brw->vs.constant_map = ((int8_t *)brw->vs.prog_data + sizeof(*brw->vs.prog_data)); } diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c index 9fdfebe9f76..47cc0a7da7a 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_constval.c +++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c @@ -194,19 +194,11 @@ static void calc_wm_input_sizes( struct brw_context *brw ) /* BRW_NEW_VERTEX_PROGRAM */ const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); - /* BRW_NEW_FRAGMENT_PROGRAM */ - struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram; /* BRW_NEW_INPUT_DIMENSIONS */ struct tracker t; GLuint insn; GLuint i; - /* If we're going to go through brw_fs.cpp, we don't end up using - * brw->wm.input_size_masks. - */ - if (prog && prog->_LinkedShaders[MESA_SHADER_FRAGMENT]) - return; - memset(&t, 0, sizeof(t)); /* _NEW_LIGHT */ @@ -246,9 +238,7 @@ static void calc_wm_input_sizes( struct brw_context *brw ) const struct brw_tracked_state brw_wm_input_sizes = { .dirty = { .mesa = _NEW_LIGHT, - .brw = (BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_VERTEX_PROGRAM | - BRW_NEW_INPUT_DIMENSIONS), + .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_INPUT_DIMENSIONS, .cache = 0 }, .prepare = calc_wm_input_sizes diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 2fa04a15a34..dbabb44e45c 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1635,7 +1635,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) else m = brw_message_reg(4); - brw_DP4(p, brw_writemask(m, (1 << (i & 7))),pos, c->userplane[i]); + brw_DP4(p, brw_writemask(m, (1 << (i & 3))),pos, c->userplane[i]); } } } else if ((c->prog_data.outputs_written & @@ -1821,6 +1821,9 @@ accumulator_contains(struct brw_vs_compile *c, struct brw_reg val) if (val.address_mode != BRW_ADDRESS_DIRECT) return GL_FALSE; + if (val.negate || val.abs) + return GL_FALSE; + switch (prev_insn->header.opcode) { case BRW_OPCODE_MOV: case BRW_OPCODE_MAC: @@ -1980,9 +1983,22 @@ void brw_vs_emit(struct brw_vs_compile *c ) const struct prog_src_register *src = &inst->SrcReg[i]; index = src->Index; file = src->File; - if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) - args[i] = c->output_regs[index].reg; - else + if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) { + /* Can't just make get_arg "do the right thing" here because + * other callers of get_arg and get_src_reg don't expect any + * special behavior for the c->output_regs[index].used_in_src + * case. + */ + args[i] = c->output_regs[index].reg; + args[i].dw1.bits.swizzle = + BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0), + GET_SWZ(src->Swizzle, 1), + GET_SWZ(src->Swizzle, 2), + GET_SWZ(src->Swizzle, 3)); + + /* Note this is ok for non-swizzle ARB_vp instructions */ + args[i].negate = src->Negate ? 1 : 0; + } else args[i] = get_arg(c, inst, i); } @@ -1993,7 +2009,11 @@ void brw_vs_emit(struct brw_vs_compile *c ) index = inst->DstReg.Index; file = inst->DstReg.File; if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) - dst = c->output_regs[index].reg; + /* Can't just make get_dst "do the right thing" here because other + * callers of get_dst don't expect any special behavior for the + * c->output_regs[index].used_in_src case. + */ + dst = brw_writemask(c->output_regs[index].reg, inst->DstReg.WriteMask); else dst = get_dst(c, inst->DstReg); diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index 1eee5b7e5de..fc4373ab311 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -36,18 +36,6 @@ #include "brw_defines.h" #include "main/macros.h" -struct brw_vs_unit_key { - unsigned int total_grf; - unsigned int urb_entry_read_length; - unsigned int curb_entry_read_length; - - unsigned int curbe_offset; - - unsigned int nr_urb_entries, urb_size; - - unsigned int nr_surfaces; -}; - static void brw_prepare_vs_unit(struct brw_context *brw) { @@ -55,12 +43,19 @@ brw_prepare_vs_unit(struct brw_context *brw) struct gl_context *ctx = &intel->ctx; struct brw_vs_unit_state *vs; - vs = brw_state_batch(brw, sizeof(*vs), 32, &brw->vs.state_offset); + vs = brw_state_batch(brw, AUB_TRACE_VS_STATE, + sizeof(*vs), 32, &brw->vs.state_offset); memset(vs, 0, sizeof(*vs)); - /* CACHE_NEW_VS_PROG */ - vs->thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */ + /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_VS_PROG */ vs->thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1; + vs->thread0.kernel_start_pointer = + brw_program_reloc(brw, + brw->vs.state_offset + + offsetof(struct brw_vs_unit_state, thread0), + brw->vs.prog_offset + + (vs->thread0.grf_reg_count << 1)) >> 6; + vs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; /* Choosing multiple program flow means that we may get 2-vertex threads, * which will have the channel mask for dwords 4-7 enabled in the thread, @@ -152,13 +147,6 @@ brw_prepare_vs_unit(struct brw_context *brw) */ vs->vs6.vs_enable = 1; - /* Emit VS program relocation */ - drm_intel_bo_emit_reloc(intel->batch.bo, (brw->vs.state_offset + - offsetof(struct brw_vs_unit_state, - thread0)), - brw->vs.prog_bo, vs->thread0.grf_reg_count << 1, - I915_GEM_DOMAIN_INSTRUCTION, 0); - brw->state.dirty.cache |= CACHE_NEW_VS_UNIT; } @@ -166,6 +154,7 @@ const struct brw_tracked_state brw_vs_unit = { .dirty = { .mesa = _NEW_TRANSFORM, .brw = (BRW_NEW_BATCH | + BRW_NEW_PROGRAM_CACHE | BRW_NEW_CURBE_OFFSETS | BRW_NEW_NR_VS_SURFACES | BRW_NEW_URB_FENCE), diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index 611f6333689..f9ee4d112a5 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -182,7 +182,8 @@ static void upload_vs_surfaces(struct brw_context *brw) /* Might want to calculate nr_surfaces first, to avoid taking up so much * space for the binding table. (once we have vs samplers) */ - bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_VS_MAX_SURF, + bind = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + sizeof(uint32_t) * BRW_VS_MAX_SURF, 32, &brw->vs.bind_bo_offset); for (i = 0; i < BRW_VS_MAX_SURF; i++) { diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 69650e1df77..40360b23fff 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -35,9 +35,12 @@ #include "main/imports.h" #include "main/macros.h" #include "main/colormac.h" +#include "main/renderbuffer.h" +#include "main/framebuffer.h" #include "intel_batchbuffer.h" #include "intel_regions.h" +#include "intel_fbo.h" #include "brw_context.h" #include "brw_defines.h" @@ -69,30 +72,108 @@ static void brw_destroy_context( struct intel_context *intel ) ralloc_free(brw->wm.compile_data); dri_bo_release(&brw->curbe.curbe_bo); - dri_bo_release(&brw->vs.prog_bo); dri_bo_release(&brw->vs.const_bo); - dri_bo_release(&brw->gs.prog_bo); - dri_bo_release(&brw->clip.prog_bo); - dri_bo_release(&brw->sf.prog_bo); - dri_bo_release(&brw->wm.prog_bo); dri_bo_release(&brw->wm.const_bo); - dri_bo_release(&brw->cc.prog_bo); free(brw->curbe.last_buf); free(brw->curbe.next_buf); } - /** - * called from intelDrawBuffer() + * Update the hardware state for drawing into a window or framebuffer object. + * + * Called by glDrawBuffer, glBindFramebufferEXT, MakeCurrent, and other + * places within the driver. + * + * Basically, this needs to be called any time the current framebuffer + * changes, the renderbuffers change, or we need to draw into different + * color buffers. */ -static void brw_set_draw_region( struct intel_context *intel, - struct intel_region *color_regions[], - struct intel_region *depth_region, - GLuint num_color_regions) +static void +brw_update_draw_buffer(struct intel_context *intel) { -} + struct gl_context *ctx = &intel->ctx; + struct gl_framebuffer *fb = ctx->DrawBuffer; + struct intel_renderbuffer *irbDepth = NULL, *irbStencil = NULL; + bool fb_has_hiz = intel_framebuffer_has_hiz(fb); + + if (!fb) { + /* this can happen during the initial context initialization */ + return; + } + /* + * If intel_context is using separate stencil, but the depth attachment + * (gl_framebuffer.Attachment[BUFFER_DEPTH]) has a packed depth/stencil + * format, then we must install the real depth buffer at fb->_DepthBuffer + * and set fb->_DepthBuffer->Wrapped before calling _mesa_update_framebuffer. + * Otherwise, _mesa_update_framebuffer will create and install a swras + * depth wrapper instead. + * + * Ditto for stencil. + */ + irbDepth = intel_get_renderbuffer(fb, BUFFER_DEPTH); + if (irbDepth && irbDepth->Base.Format == MESA_FORMAT_X8_Z24) { + _mesa_reference_renderbuffer(&fb->_DepthBuffer, &irbDepth->Base); + irbDepth->Base.Wrapped = fb->Attachment[BUFFER_DEPTH].Renderbuffer; + } + + irbStencil = intel_get_renderbuffer(fb, BUFFER_STENCIL); + if (irbStencil && irbStencil->Base.Format == MESA_FORMAT_S8) { + _mesa_reference_renderbuffer(&fb->_StencilBuffer, &irbStencil->Base); + irbStencil->Base.Wrapped = fb->Attachment[BUFFER_STENCIL].Renderbuffer; + } + + /* Do this here, not core Mesa, since this function is called from + * many places within the driver. + */ + if (ctx->NewState & _NEW_BUFFERS) { + /* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */ + _mesa_update_framebuffer(ctx); + /* this updates the DrawBuffer's Width/Height if it's a FBO */ + _mesa_update_draw_buffer_bounds(ctx); + } + + if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) { + /* this may occur when we're called by glBindFrameBuffer() during + * the process of someone setting up renderbuffers, etc. + */ + /*_mesa_debug(ctx, "DrawBuffer: incomplete user FBO\n");*/ + return; + } + + /* Check some stencil invariants. These should probably be in + * emit_depthbuffer(). + */ + if (irbStencil && irbStencil->region) { + if (!intel->has_separate_stencil) + assert(irbStencil->Base.Format == MESA_FORMAT_S8_Z24); + if (fb_has_hiz || intel->must_use_separate_stencil) + assert(irbStencil->Base.Format == MESA_FORMAT_S8); + if (irbStencil->Base.Format == MESA_FORMAT_S8) + assert(intel->has_separate_stencil); + } + + /* Mesa's Stencil._Enabled field is updated when + * _NEW_BUFFERS | _NEW_STENCIL, but i965 code assumes that the value + * only changes with _NEW_STENCIL (which seems sensible). So flag it + * here since this is the _NEW_BUFFERS path. + */ + intel->NewGLState |= (_NEW_DEPTH | _NEW_STENCIL); + + /* The driver uses this in places that need to look up + * renderbuffers' buffer objects. + */ + intel->NewGLState |= _NEW_BUFFERS; + + /* update viewport/scissor since it depends on window size */ + intel->NewGLState |= _NEW_VIEWPORT | _NEW_SCISSOR; + + /* Update culling direction which changes depending on the + * orientation of the buffer: + */ + intel->NewGLState |= _NEW_POLYGON; +} /** * called from intel_batchbuffer_flush and children before sending a @@ -122,13 +203,23 @@ static void brw_new_batch( struct intel_context *intel ) * This is probably not as severe as on 915, since almost all of our state * is just in referenced buffers. */ - brw->state.dirty.brw |= BRW_NEW_CONTEXT; + brw->state.dirty.brw |= BRW_NEW_CONTEXT | BRW_NEW_BATCH; - brw->state.dirty.mesa |= ~0; - brw->state.dirty.brw |= ~0; - brw->state.dirty.cache |= ~0; + /* Assume that the last command before the start of our batch was a + * primitive, for safety. + */ + intel->batch.need_workaround_flush = true; + + brw->state_batch_count = 0; brw->vb.nr_current_buffers = 0; + brw->ib.type = -1; + + /* Mark that the current program cache BO has been used by the GPU. + * It will be reallocated if we need to put new programs in for the + * next batch. + */ + brw->cache.bo_used_by_gpu = true; } static void brw_invalidate_state( struct intel_context *intel, GLuint new_state ) @@ -159,7 +250,7 @@ void brwInitVtbl( struct brw_context *brw ) brw->intel.vtbl.new_batch = brw_new_batch; brw->intel.vtbl.finish_batch = brw_finish_batch; brw->intel.vtbl.destroy = brw_destroy_context; - brw->intel.vtbl.set_draw_region = brw_set_draw_region; + brw->intel.vtbl.update_draw_buffer = brw_update_draw_buffer; brw->intel.vtbl.debug_batch = brw_debug_batch; brw->intel.vtbl.render_target_supported = brw_render_target_supported; brw->intel.vtbl.is_hiz_depth_format = brw_is_hiz_depth_format; diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 1aebd12df49..b0dfdd536aa 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -273,12 +273,11 @@ bool do_wm_prog(struct brw_context *brw, */ program = brw_get_program(&c->func, &program_size); - drm_intel_bo_unreference(brw->wm.prog_bo); - brw->wm.prog_bo = brw_upload_cache(&brw->cache, BRW_WM_PROG, - &c->key, sizeof(c->key), - program, program_size, - &c->prog_data, sizeof(c->prog_data), - &brw->wm.prog_data); + brw_upload_cache(&brw->cache, BRW_WM_PROG, + &c->key, sizeof(c->key), + program, program_size, + &c->prog_data, sizeof(c->prog_data), + &brw->wm.prog_offset, &brw->wm.prog_data); return true; } @@ -389,6 +388,8 @@ static void brw_wm_populate_key( struct brw_context *brw, * all 4 channels. */ if (sampler->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) { + key->compare_funcs[i] = sampler->CompareFunc; + if (sampler->DepthMode == GL_ALPHA) { swizzles[0] = SWIZZLE_ZERO; swizzles[1] = SWIZZLE_ZERO; @@ -477,13 +478,9 @@ static void brw_prepare_wm_prog(struct brw_context *brw) brw_wm_populate_key(brw, &key); - /* Make an early check for the key. - */ - drm_intel_bo_unreference(brw->wm.prog_bo); - brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG, - &key, sizeof(key), - &brw->wm.prog_data); - if (brw->wm.prog_bo == NULL) { + if (!brw_search_cache(&brw->cache, BRW_WM_PROG, + &key, sizeof(key), + &brw->wm.prog_offset, &brw->wm.prog_data)) { bool success = do_wm_prog(brw, ctx->Shader.CurrentFragmentProgram, fp, &key); assert(success); diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index e244b55a083..29082c19088 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -68,6 +68,18 @@ struct brw_wm_prog_key { GLuint clamp_fragment_color:1; GLuint line_aa:2; + /** + * Per-sampler comparison functions: + * + * If comparison mode is GL_COMPARE_R_TO_TEXTURE, then this is set to one + * of GL_NEVER, GL_LESS, GL_EQUAL, GL_LEQUAL, GL_GREATER, GL_NOTEQUAL, + * GL_GEQUAL, or GL_ALWAYS. Otherwise (comparison mode is GL_NONE), this + * field is irrelevant so it's left as GL_NONE (0). + * + * While this is a GLenum, all possible values fit in 16-bits. + */ + uint16_t compare_funcs[BRW_MAX_TEX_UNIT]; + GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */ GLuint yuvtex_mask:16; GLuint yuvtex_swap_mask:16; /* UV swaped */ diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index f61757a8cac..6ea4a7d6e50 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -1094,9 +1094,16 @@ void emit_tex(struct brw_wm_compile *c, if (intel->gen < 5 && c->dispatch_width == 8) nr_texcoords = 3; - /* For shadow comparisons, we have to supply u,v,r. */ - if (shadow) - nr_texcoords = 3; + if (shadow) { + if (intel->gen < 7) { + /* For shadow comparisons, we have to supply u,v,r. */ + nr_texcoords = 3; + } else { + /* On Ivybridge, the shadow comparitor comes first. Just load it. */ + brw_MOV(p, brw_message_reg(cur_mrf), arg[2]); + cur_mrf += mrf_per_channel; + } + } /* Emit the texcoords. */ for (i = 0; i < nr_texcoords; i++) { @@ -1113,7 +1120,7 @@ void emit_tex(struct brw_wm_compile *c, } /* Fill in the shadow comparison reference value. */ - if (shadow) { + if (shadow && intel->gen < 7) { if (intel->gen >= 5) { /* Fill in the cube map array index value. */ brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 59dcda7b414..7cd3edad235 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -563,13 +563,14 @@ static void precalc_dst( struct brw_wm_compile *c, struct prog_src_register src0 = inst->SrcReg[0]; struct prog_src_register src1 = inst->SrcReg[1]; struct prog_dst_register dst = inst->DstReg; - + struct prog_dst_register temp = get_temp(c); + if (dst.WriteMask & WRITEMASK_Y) { /* dst.y = mul src0.y, src1.y */ emit_op(c, OPCODE_MUL, - dst_mask(dst, WRITEMASK_Y), + dst_mask(temp, WRITEMASK_Y), inst->SaturateMode, src0, src1, @@ -584,7 +585,7 @@ static void precalc_dst( struct brw_wm_compile *c, */ swz = emit_op(c, OPCODE_SWZ, - dst_mask(dst, WRITEMASK_XZ), + dst_mask(temp, WRITEMASK_XZ), inst->SaturateMode, src_swizzle(src0, SWIZZLE_ONE, z, z, z), src_undef(), @@ -597,12 +598,26 @@ static void precalc_dst( struct brw_wm_compile *c, */ emit_op(c, OPCODE_MOV, - dst_mask(dst, WRITEMASK_W), + dst_mask(temp, WRITEMASK_W), inst->SaturateMode, src1, src_undef(), src_undef()); } + + /* This will get optimized out in general, but it ensures that we + * don't overwrite src operands in our channel-wise splitting + * above. See piglit fp-dst-aliasing-[12]. + */ + emit_op(c, + OPCODE_MOV, + dst, + 0, + src_reg_from_dst(temp), + src_undef(), + src_undef()); + + release_temp(c, temp); } @@ -611,7 +626,17 @@ static void precalc_lit( struct brw_wm_compile *c, { struct prog_src_register src0 = inst->SrcReg[0]; struct prog_dst_register dst = inst->DstReg; - + + if (dst.WriteMask & WRITEMASK_YZ) { + emit_op(c, + OPCODE_LIT, + dst_mask(dst, WRITEMASK_YZ), + inst->SaturateMode, + src0, + src_undef(), + src_undef()); + } + if (dst.WriteMask & WRITEMASK_XW) { struct prog_instruction *swz; @@ -627,16 +652,6 @@ static void precalc_lit( struct brw_wm_compile *c, /* Avoid letting the negation flag of src0 affect our 1 constant. */ swz->SrcReg[0].Negate = NEGATE_NONE; } - - if (dst.WriteMask & WRITEMASK_YZ) { - emit_op(c, - OPCODE_LIT, - dst_mask(dst, WRITEMASK_YZ), - inst->SaturateMode, - src0, - src_undef(), - src_undef()); - } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 5de39aa4575..98146136703 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -108,7 +108,8 @@ upload_default_color(struct brw_context *brw, struct gl_sampler_object *sampler, if (intel->gen == 5 || intel->gen == 6) { struct gen5_sampler_default_color *sdc; - sdc = brw_state_batch(brw, sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]); + sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR, + sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]); memset(sdc, 0, sizeof(*sdc)); @@ -144,7 +145,8 @@ upload_default_color(struct brw_context *brw, struct gl_sampler_object *sampler, } else { struct brw_sampler_default_color *sdc; - sdc = brw_state_batch(brw, sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]); + sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR, + sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]); COPY_4V(sdc->color, color); } @@ -326,7 +328,8 @@ prepare_wm_samplers(struct brw_context *brw) if (brw->wm.sampler_count == 0) return; - samplers = brw_state_batch(brw, brw->wm.sampler_count * sizeof(*samplers), + samplers = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE, + brw->wm.sampler_count * sizeof(*samplers), 32, &brw->wm.sampler_offset); memset(samplers, 0, brw->wm.sampler_count * sizeof(*samplers)); diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index ef98f8126dc..c820ce48c29 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -78,7 +78,8 @@ brw_prepare_wm_unit(struct brw_context *brw) const struct gl_fragment_program *fp = brw->fragment_program; struct brw_wm_unit_state *wm; - wm = brw_state_batch(brw, sizeof(*wm), 32, &brw->wm.state_offset); + wm = brw_state_batch(brw, AUB_TRACE_WM_STATE, + sizeof(*wm), 32, &brw->wm.state_offset); memset(wm, 0, sizeof(*wm)); if (brw->wm.prog_data->prog_offset_16) { @@ -90,13 +91,25 @@ brw_prepare_wm_unit(struct brw_context *brw) brw->wm.prog_data->first_curbe_grf_16); } - /* CACHE_NEW_WM_PROG */ + /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_WM_PROG */ wm->thread0.grf_reg_count = brw->wm.prog_data->reg_blocks; wm->wm9.grf_reg_count_2 = brw->wm.prog_data->reg_blocks_16; - wm->thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */ - /* reloc */ - wm->wm9.kernel_start_pointer_2 = (brw->wm.prog_bo->offset + - brw->wm.prog_data->prog_offset_16) >> 6; + + wm->thread0.kernel_start_pointer = + brw_program_reloc(brw, + brw->wm.state_offset + + offsetof(struct brw_wm_unit_state, thread0), + brw->wm.prog_offset + + (wm->thread0.grf_reg_count << 1)) >> 6; + + wm->wm9.kernel_start_pointer_2 = + brw_program_reloc(brw, + brw->wm.state_offset + + offsetof(struct brw_wm_unit_state, wm9), + brw->wm.prog_offset + + brw->wm.prog_data->prog_offset_16 + + (wm->wm9.grf_reg_count_2 << 1)) >> 6; + wm->thread1.depth_coef_urb_read_offset = 1; wm->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; @@ -214,23 +227,6 @@ brw_prepare_wm_unit(struct brw_context *brw) if (unlikely(INTEL_DEBUG & DEBUG_STATS) || intel->stats_wm) wm->wm4.stats_enable = 1; - /* Emit WM program relocation */ - drm_intel_bo_emit_reloc(intel->batch.bo, - brw->wm.state_offset + - offsetof(struct brw_wm_unit_state, thread0), - brw->wm.prog_bo, wm->thread0.grf_reg_count << 1, - I915_GEM_DOMAIN_INSTRUCTION, 0); - - if (brw->wm.prog_data->prog_offset_16) { - drm_intel_bo_emit_reloc(intel->batch.bo, - brw->wm.state_offset + - offsetof(struct brw_wm_unit_state, wm9), - brw->wm.prog_bo, - ((wm->wm9.grf_reg_count_2 << 1) + - brw->wm.prog_data->prog_offset_16), - I915_GEM_DOMAIN_INSTRUCTION, 0); - } - /* Emit scratch space relocation */ if (brw->wm.prog_data->total_scratch != 0) { drm_intel_bo_emit_reloc(intel->batch.bo, @@ -265,6 +261,7 @@ const struct brw_tracked_state brw_wm_unit = { _NEW_BUFFERS), .brw = (BRW_NEW_BATCH | + BRW_NEW_PROGRAM_CACHE | BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_CURBE_OFFSETS | BRW_NEW_NR_WM_SURFACES), diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index ac8005dc6c2..fb4fb146f8d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -73,7 +73,7 @@ translate_tex_target(GLenum target) uint32_t brw_format_for_mesa_format(gl_format mesa_format) { - uint32_t table[MESA_FORMAT_COUNT] = + static const uint32_t table[MESA_FORMAT_COUNT] = { [MESA_FORMAT_L8] = BRW_SURFACEFORMAT_L8_UNORM, [MESA_FORMAT_I8] = BRW_SURFACEFORMAT_I8_UNORM, @@ -226,7 +226,8 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit ) const GLuint surf_index = SURF_INDEX_TEXTURE(unit); uint32_t *surf; - surf = brw_state_batch(brw, 6 * 4, 32, &brw->wm.surf_offset[surf_index]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + 6 * 4, 32, &brw->wm.surf_offset[surf_index]); surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT | BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | @@ -272,7 +273,8 @@ brw_create_constant_surface(struct brw_context *brw, const GLint w = width - 1; uint32_t *surf; - surf = brw_state_batch(brw, 6 * 4, 32, out_offset); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + 6 * 4, 32, out_offset); surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT | BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | @@ -404,7 +406,8 @@ brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit) struct intel_context *intel = &brw->intel; uint32_t *surf; - surf = brw_state_batch(brw, 6 * 4, 32, &brw->wm.surf_offset[unit]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + 6 * 4, 32, &brw->wm.surf_offset[unit]); surf[0] = (BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT | BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT); @@ -439,7 +442,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw, uint32_t tile_x, tile_y; uint32_t format = 0; - surf = brw_state_batch(brw, 6 * 4, 32, &brw->wm.surf_offset[unit]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + 6 * 4, 32, &brw->wm.surf_offset[unit]); switch (irb->Base.Format) { case MESA_FORMAT_XRGB8888: @@ -637,7 +641,8 @@ brw_wm_upload_binding_table(struct brw_context *brw) /* Might want to calculate nr_surfaces first, to avoid taking up so much * space for the binding table. */ - bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_WM_MAX_SURF, + bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE, + sizeof(uint32_t) * BRW_WM_MAX_SURF, 32, &brw->wm.bind_bo_offset); for (i = 0; i < BRW_WM_MAX_SURF; i++) { diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c index 2b16d6cdc01..41d13ad2bf4 100644 --- a/src/mesa/drivers/dri/i965/gen6_cc.c +++ b/src/mesa/drivers/dri/i965/gen6_cc.c @@ -51,7 +51,8 @@ prepare_blend_state(struct brw_context *brw) nr_draw_buffers = 1; size = sizeof(*blend) * nr_draw_buffers; - blend = brw_state_batch(brw, size, 64, &brw->cc.blend_state_offset); + blend = brw_state_batch(brw, AUB_TRACE_BLEND_STATE, + size, 64, &brw->cc.blend_state_offset); memset(blend, 0, size); @@ -139,7 +140,8 @@ gen6_prepare_color_calc_state(struct brw_context *brw) struct gl_context *ctx = &brw->intel.ctx; struct gen6_color_calc_state *cc; - cc = brw_state_batch(brw, sizeof(*cc), 64, &brw->cc.state_offset); + cc = brw_state_batch(brw, AUB_TRACE_CC_STATE, + sizeof(*cc), 64, &brw->cc.state_offset); memset(cc, 0, sizeof(*cc)); /* _NEW_COLOR */ @@ -183,7 +185,8 @@ static void upload_cc_state_pointers(struct brw_context *brw) const struct brw_tracked_state gen6_cc_state_pointers = { .dirty = { .mesa = 0, - .brw = BRW_NEW_BATCH, + .brw = (BRW_NEW_BATCH | + BRW_NEW_STATE_BASE_ADDRESS), .cache = (CACHE_NEW_BLEND_STATE | CACHE_NEW_COLOR_CALC_STATE | CACHE_NEW_DEPTH_STENCIL_STATE) diff --git a/src/mesa/drivers/dri/i965/gen6_depthstencil.c b/src/mesa/drivers/dri/i965/gen6_depthstencil.c index 775e1ce2c9c..5d14147db3d 100644 --- a/src/mesa/drivers/dri/i965/gen6_depthstencil.c +++ b/src/mesa/drivers/dri/i965/gen6_depthstencil.c @@ -34,7 +34,8 @@ gen6_prepare_depth_stencil_state(struct brw_context *brw) struct gl_context *ctx = &brw->intel.ctx; struct gen6_depth_stencil_state *ds; - ds = brw_state_batch(brw, sizeof(*ds), 64, + ds = brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE, + sizeof(*ds), 64, &brw->cc.depth_stencil_state_offset); memset(ds, 0, sizeof(*ds)); diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c index c1d0a739394..d29f0290727 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c @@ -45,7 +45,7 @@ upload_gs_state(struct brw_context *brw) ADVANCE_BATCH(); // GS should never be used on Gen6. Disable it. - assert(brw->gs.prog_bo == NULL); + assert(!brw->gs.prog_active); BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); OUT_BATCH(0); /* prog_bo */ @@ -65,8 +65,7 @@ upload_gs_state(struct brw_context *brw) const struct brw_tracked_state gen6_gs_state = { .dirty = { .mesa = _NEW_TRANSFORM, - .brw = (BRW_NEW_CURBE_OFFSETS | - BRW_NEW_URB_FENCE | + .brw = (BRW_NEW_URB_FENCE | BRW_NEW_CONTEXT), .cache = CACHE_NEW_GS_PROG }, diff --git a/src/mesa/drivers/dri/i965/gen6_sampler_state.c b/src/mesa/drivers/dri/i965/gen6_sampler_state.c index 4cdec699df6..89326872faa 100644 --- a/src/mesa/drivers/dri/i965/gen6_sampler_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sampler_state.c @@ -50,7 +50,8 @@ upload_sampler_state_pointers(struct brw_context *brw) const struct brw_tracked_state gen6_sampler_state = { .dirty = { .mesa = 0, - .brw = BRW_NEW_BATCH, + .brw = (BRW_NEW_BATCH | + BRW_NEW_STATE_BASE_ADDRESS), .cache = CACHE_NEW_SAMPLER }, .emit = upload_sampler_state_pointers, diff --git a/src/mesa/drivers/dri/i965/gen6_scissor_state.c b/src/mesa/drivers/dri/i965/gen6_scissor_state.c index fad3ca0dd04..dc73b10f4cd 100644 --- a/src/mesa/drivers/dri/i965/gen6_scissor_state.c +++ b/src/mesa/drivers/dri/i965/gen6_scissor_state.c @@ -31,7 +31,7 @@ #include "intel_batchbuffer.h" static void -gen6_prepare_scissor_state(struct brw_context *brw) +gen6_upload_scissor_state(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; @@ -39,7 +39,8 @@ gen6_prepare_scissor_state(struct brw_context *brw) struct gen6_scissor_rect *scissor; uint32_t scissor_state_offset; - scissor = brw_state_batch(brw, sizeof(*scissor), 32, &scissor_state_offset); + scissor = brw_state_batch(brw, AUB_TRACE_SCISSOR_STATE, + sizeof(*scissor), 32, &scissor_state_offset); /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT */ @@ -89,5 +90,5 @@ const struct brw_tracked_state gen6_scissor_state = { .brw = BRW_NEW_BATCH, .cache = 0, }, - .prepare = gen6_prepare_scissor_state, + .emit = gen6_upload_scissor_state, }; diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c index 62645a6a30f..b4105111c8c 100644 --- a/src/mesa/drivers/dri/i965/gen6_urb.c +++ b/src/mesa/drivers/dri/i965/gen6_urb.c @@ -64,7 +64,7 @@ upload_urb(struct brw_context *brw) assert(brw->urb.nr_vs_entries % 4 == 0); assert(brw->urb.nr_gs_entries % 4 == 0); /* GS requirement */ - assert(!brw->gs.prog_bo || brw->urb.vs_size < 5); + assert(!brw->gs.prog_active || brw->urb.vs_size < 5); BEGIN_BATCH(3); OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2)); diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c index 4116bdb96de..a4bfa54837d 100644 --- a/src/mesa/drivers/dri/i965/gen6_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c @@ -43,7 +43,8 @@ prepare_clip_vp(struct brw_context *brw) { struct brw_clipper_viewport *vp; - vp = brw_state_batch(brw, sizeof(*vp), 32, &brw->clip.vp_offset); + vp = brw_state_batch(brw, AUB_TRACE_CLIP_VP_STATE, + sizeof(*vp), 32, &brw->clip.vp_offset); vp->xmin = -1.0; vp->xmax = 1.0; @@ -72,7 +73,8 @@ prepare_sf_vp(struct brw_context *brw) const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); const GLfloat *v = ctx->Viewport._WindowMap.m; - sfv = brw_state_batch(brw, sizeof(*sfv), 32, &brw->sf.vp_offset); + sfv = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE, + sizeof(*sfv), 32, &brw->sf.vp_offset); memset(sfv, 0, sizeof(*sfv)); /* _NEW_BUFFERS */ @@ -122,7 +124,8 @@ static void upload_viewport_state_pointers(struct brw_context *brw) const struct brw_tracked_state gen6_viewport_state = { .dirty = { .mesa = 0, - .brw = BRW_NEW_BATCH, + .brw = (BRW_NEW_BATCH | + BRW_NEW_STATE_BASE_ADDRESS), .cache = (CACHE_NEW_CLIP_VP | CACHE_NEW_SF_VP | CACHE_NEW_CC_VP) diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index b46368e36e2..e70454416bf 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -60,7 +60,7 @@ gen6_prepare_vs_push_constants(struct brw_context *brw) float *param; int i; - param = brw_state_batch(brw, + param = brw_state_batch(brw, AUB_TRACE_VS_CONSTANTS, (MAX_CLIP_PLANES + nr_params) * 4 * sizeof(float), 32, &brw->vs.push_const_offset); @@ -110,7 +110,7 @@ const struct brw_tracked_state gen6_vs_constants = { .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS, .brw = (BRW_NEW_BATCH | BRW_NEW_VERTEX_PROGRAM), - .cache = 0, + .cache = CACHE_NEW_VS_PROG, }, .prepare = gen6_prepare_vs_push_constants, }; @@ -147,7 +147,7 @@ upload_vs_state(struct brw_context *brw) BEGIN_BATCH(6); OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); - OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH(brw->vs.prog_offset); OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | GEN6_VS_FLOATING_POINT_MODE_ALT | (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); @@ -160,13 +160,38 @@ upload_vs_state(struct brw_context *brw) GEN6_VS_STATISTICS_ENABLE | GEN6_VS_ENABLE); ADVANCE_BATCH(); + + /* Based on my reading of the simulator, the VS constants don't get + * pulled into the VS FF unit until an appropriate pipeline flush + * happens, and instead the 3DSTATE_CONSTANT_VS packet just adds + * references to them into a little FIFO. The flushes are common, + * but don't reliably happen between this and a 3DPRIMITIVE, causing + * the primitive to use the wrong constants. Then the FIFO + * containing the constant setup gets added to again on the next + * constants change, and eventually when a flush does happen the + * unit is overwhelmed by constant changes and dies. + * + * To avoid this, send a PIPE_CONTROL down the line that will + * update the unit immediately loading the constants. The flush + * type bits here were those set by the STATE_BASE_ADDRESS whose + * move in a82a43e8d99e1715dd11c9c091b5ab734079b6a6 triggered the + * bug reports that led to this workaround, and may be more than + * what is strictly required to avoid the issue. + */ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL); + OUT_BATCH(PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_INSTRUCTION_FLUSH | + PIPE_CONTROL_STATE_CACHE_INVALIDATE); + OUT_BATCH(0); /* address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); } const struct brw_tracked_state gen6_vs_state = { .dirty = { .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS, - .brw = (BRW_NEW_CURBE_OFFSETS | - BRW_NEW_NR_VS_SURFACES | + .brw = (BRW_NEW_NR_VS_SURFACES | BRW_NEW_URB_FENCE | BRW_NEW_CONTEXT | BRW_NEW_VERTEX_PROGRAM | diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 43e651db3ef..185da9c355f 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -39,6 +39,7 @@ gen6_prepare_wm_push_constants(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; + /* BRW_NEW_FRAGMENT_PROGRAM */ const struct brw_fragment_program *fp = brw_fragment_program_const(brw->fragment_program); @@ -48,11 +49,12 @@ gen6_prepare_wm_push_constants(struct brw_context *brw) /* XXX: Should this happen somewhere before to get our state flag set? */ _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); + /* CACHE_NEW_VS_PROG */ if (brw->wm.prog_data->nr_params != 0) { float *constants; unsigned int i; - constants = brw_state_batch(brw, + constants = brw_state_batch(brw, AUB_TRACE_NO_TYPE, brw->wm.prog_data->nr_params * sizeof(float), 32, &brw->wm.push_const_offset); @@ -83,7 +85,7 @@ const struct brw_tracked_state gen6_wm_constants = { .mesa = _NEW_PROGRAM_CONSTANTS, .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM), - .cache = 0, + .cache = CACHE_NEW_VS_PROG, }, .prepare = gen6_prepare_wm_push_constants, }; @@ -97,7 +99,7 @@ upload_wm_state(struct brw_context *brw) brw_fragment_program_const(brw->fragment_program); uint32_t dw2, dw4, dw5, dw6; - /* CACHE_NEW_WM_PROG */ + /* CACHE_NEW_WM_PROG */ if (brw->wm.prog_data->nr_params == 0) { /* Disable the push constant buffers. */ BEGIN_BATCH(5); @@ -157,7 +159,7 @@ upload_wm_state(struct brw_context *brw) if (ctx->Line.StippleFlag) dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE; - /* _NEW_POLYGONSTIPPLE */ + /* _NEW_POLYGON */ if (ctx->Polygon.StippleFlag) dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE; @@ -183,7 +185,7 @@ upload_wm_state(struct brw_context *brw) BEGIN_BATCH(9); OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2)); - OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH(brw->wm.prog_offset); OUT_BATCH(dw2); if (brw->wm.prog_data->total_scratch) { OUT_RELOC(brw->wm.scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, @@ -195,21 +197,19 @@ upload_wm_state(struct brw_context *brw) OUT_BATCH(dw5); OUT_BATCH(dw6); OUT_BATCH(0); /* kernel 1 pointer */ - if (brw->wm.prog_data->prog_offset_16) { - OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - brw->wm.prog_data->prog_offset_16); - } else { - OUT_BATCH(0); /* kernel 2 pointer */ - } + /* kernel 2 pointer */ + OUT_BATCH(brw->wm.prog_offset + brw->wm.prog_data->prog_offset_16); ADVANCE_BATCH(); } const struct brw_tracked_state gen6_wm_state = { .dirty = { - .mesa = (_NEW_LINE | _NEW_POLYGONSTIPPLE | _NEW_COLOR | _NEW_BUFFERS | - _NEW_PROGRAM_CONSTANTS | _NEW_POLYGON), - .brw = (BRW_NEW_CURBE_OFFSETS | - BRW_NEW_FRAGMENT_PROGRAM | + .mesa = (_NEW_LINE | + _NEW_COLOR | + _NEW_BUFFERS | + _NEW_PROGRAM_CONSTANTS | + _NEW_POLYGON), + .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_NR_WM_SURFACES | BRW_NEW_URB_FENCE | BRW_NEW_BATCH), diff --git a/src/mesa/drivers/dri/i965/gen7_disable.c b/src/mesa/drivers/dri/i965/gen7_disable.c index 4e9461739d0..a44d31596b9 100644 --- a/src/mesa/drivers/dri/i965/gen7_disable.c +++ b/src/mesa/drivers/dri/i965/gen7_disable.c @@ -31,7 +31,7 @@ disable_stages(struct brw_context *brw) { struct intel_context *intel = &brw->intel; - assert(brw->gs.prog_bo == NULL); + assert(!brw->gs.prog_active); /* Disable the Geometry Shader (GS) Unit */ BEGIN_BATCH(7); diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c b/src/mesa/drivers/dri/i965/gen7_sampler_state.c index 95f6fbf7414..e787c21f4d1 100644 --- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c @@ -183,7 +183,8 @@ gen7_prepare_samplers(struct brw_context *brw) if (brw->wm.sampler_count == 0) return; - samplers = brw_state_batch(brw, brw->wm.sampler_count * sizeof(*samplers), + samplers = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE, + brw->wm.sampler_count * sizeof(*samplers), 32, &brw->wm.sampler_offset); memset(samplers, 0, brw->wm.sampler_count * sizeof(*samplers)); diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index 99efe96a1fa..0f97cea652d 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -39,11 +39,12 @@ upload_sbe_state(struct brw_context *brw) uint32_t num_outputs = brw_count_bits(brw->fragment_program->Base.InputsRead); uint32_t dw1, dw10, dw11; int i; - int attr = 0; + int attr = 0, input_index = 0; /* _NEW_TRANSFORM */ int urb_start = ctx->Transform.ClipPlanesEnabled ? 2 : 1; /* _NEW_LIGHT */ int two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); + uint16_t attr_overrides[FRAG_ATTRIB_MAX]; /* FINISHME: Attribute Swizzle Control Mode? */ dw1 = @@ -57,12 +58,6 @@ upload_sbe_state(struct brw_context *brw) dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT; dw10 = 0; - if (ctx->Point.PointSprite) { - for (i = 0; i < 8; i++) { - if (ctx->Point.CoordReplace[i]) - dw10 |= (1 << i); - } - } /* _NEW_LIGHT (flat shading) */ dw11 = 0; @@ -71,30 +66,43 @@ upload_sbe_state(struct brw_context *brw) ((brw->fragment_program->Base.InputsRead & FRAG_BIT_WPOS) ? 0 : 1)); } + /* Create the mapping from the FS inputs we produce to the VS outputs + * they source from. + */ + for (; attr < FRAG_ATTRIB_MAX; attr++) { + if (!(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr))) + continue; + + if (ctx->Point.PointSprite && + attr >= FRAG_ATTRIB_TEX0 && attr <= FRAG_ATTRIB_TEX7 && + ctx->Point.CoordReplace[attr - FRAG_ATTRIB_TEX0]) { + dw10 |= (1 << input_index); + } + + if (attr == FRAG_ATTRIB_PNTC) + dw10 |= (1 << input_index); + + /* The hardware can only do the overrides on 16 overrides at a + * time, and the other up to 16 have to be lined up so that the + * input index = the output index. We'll need to do some + * tweaking to make sure that's the case. + */ + assert(input_index < 16 || attr == input_index); + + attr_overrides[input_index++] = get_attr_override(brw, attr, + two_side_color); + } + + for (; attr < FRAG_ATTRIB_MAX; attr++) + attr_overrides[input_index++] = 0; + BEGIN_BATCH(14); OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2)); OUT_BATCH(dw1); /* Output dwords 2 through 9 */ for (i = 0; i < 8; i++) { - uint32_t attr_overrides = 0; - - for (; attr < 64; attr++) { - if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) { - attr_overrides |= get_attr_override(brw, attr, two_side_color); - attr++; - break; - } - } - - for (; attr < 64; attr++) { - if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) { - attr_overrides |= get_attr_override(brw, attr, two_side_color) << 16; - attr++; - break; - } - } - OUT_BATCH(attr_overrides); + OUT_BATCH(attr_overrides[i * 2] | attr_overrides[i * 2 + 1] << 16); } OUT_BATCH(dw10); /* point sprite texcoord bitmask */ diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c index 3a614693dfc..2b650e9bc45 100644 --- a/src/mesa/drivers/dri/i965/gen7_urb.c +++ b/src/mesa/drivers/dri/i965/gen7_urb.c @@ -78,7 +78,7 @@ upload_urb(struct brw_context *brw) assert(brw->urb.nr_vs_entries % 8 == 0); assert(brw->urb.nr_gs_entries % 8 == 0); /* GS requirement */ - assert(!brw->gs.prog_bo); + assert(!brw->gs.prog_active); BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_VS << 16 | (2 - 2)); diff --git a/src/mesa/drivers/dri/i965/gen7_viewport_state.c b/src/mesa/drivers/dri/i965/gen7_viewport_state.c index 838ad3a3948..e9aacd56317 100644 --- a/src/mesa/drivers/dri/i965/gen7_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen7_viewport_state.c @@ -36,7 +36,8 @@ prepare_sf_clip_viewport(struct brw_context *brw) const GLfloat *v = ctx->Viewport._WindowMap.m; struct gen7_sf_clip_viewport *vp; - vp = brw_state_batch(brw, sizeof(vp), 64, &brw->sf.vp_offset); + vp = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE, + sizeof(vp), 64, &brw->sf.vp_offset); /* Also assign to clip.vp_offset in case something uses it. */ brw->clip.vp_offset = brw->sf.vp_offset; diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index ae7a1d6c35c..0fad3d2fb68 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -67,7 +67,7 @@ upload_vs_state(struct brw_context *brw) BEGIN_BATCH(6); OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); - OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH(brw->vs.prog_offset); OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | GEN6_VS_FLOATING_POINT_MODE_ALT | (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 6a64eb8a2d3..a102ca772b3 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -36,6 +36,7 @@ gen7_prepare_wm_constants(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; + /* BRW_NEW_FRAGMENT_PROGRAM */ const struct brw_fragment_program *fp = brw_fragment_program_const(brw->fragment_program); @@ -45,12 +46,12 @@ gen7_prepare_wm_constants(struct brw_context *brw) /* XXX: Should this happen somewhere before to get our state flag set? */ _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); - /* BRW_NEW_FRAGMENT_PROGRAM */ + /* CACHE_NEW_WM_PROG */ if (brw->wm.prog_data->nr_params != 0) { float *constants; unsigned int i; - constants = brw_state_batch(brw, + constants = brw_state_batch(brw, AUB_TRACE_NO_TYPE, brw->wm.prog_data->nr_params * sizeof(float), 32, &brw->wm.push_const_offset); @@ -80,7 +81,7 @@ const struct brw_tracked_state gen7_wm_constants = { .dirty = { .mesa = _NEW_PROGRAM_CONSTANTS, .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM), - .cache = 0, + .cache = CACHE_NEW_WM_PROG, }, .prepare = gen7_prepare_wm_constants, }; @@ -104,7 +105,7 @@ upload_wm_state(struct brw_context *brw) if (ctx->Line.StippleFlag) dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE; - /* _NEW_POLYGONSTIPPLE */ + /* _NEW_POLYGON */ if (ctx->Polygon.StippleFlag) dw1 |= GEN7_WM_POLYGON_STIPPLE_ENABLE; @@ -137,11 +138,9 @@ upload_wm_state(struct brw_context *brw) const struct brw_tracked_state gen7_wm_state = { .dirty = { - .mesa = (_NEW_LINE | _NEW_POLYGON | _NEW_POLYGONSTIPPLE | + .mesa = (_NEW_LINE | _NEW_POLYGON | _NEW_COLOR | _NEW_BUFFERS), - .brw = (BRW_NEW_CURBE_OFFSETS | - BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_NR_WM_SURFACES | + .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_URB_FENCE | BRW_NEW_BATCH), .cache = 0, @@ -227,25 +226,19 @@ upload_ps_state(struct brw_context *brw) BEGIN_BATCH(8); OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); - OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH(brw->wm.prog_offset); OUT_BATCH(dw2); OUT_BATCH(0); /* scratch space base offset */ OUT_BATCH(dw4); OUT_BATCH(dw5); OUT_BATCH(0); /* kernel 1 pointer */ - if (brw->wm.prog_data->prog_offset_16) { - OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - brw->wm.prog_data->prog_offset_16); - } else { - OUT_BATCH(0); /* kernel 2 pointer */ - } + OUT_BATCH(brw->wm.prog_offset + brw->wm.prog_data->prog_offset_16); ADVANCE_BATCH(); } const struct brw_tracked_state gen7_ps_state = { .dirty = { - .mesa = (_NEW_LINE | _NEW_POLYGON | _NEW_POLYGONSTIPPLE | - _NEW_PROGRAM_CONSTANTS), + .mesa = _NEW_PROGRAM_CONSTANTS, .brw = (BRW_NEW_CURBE_OFFSETS | BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_NR_WM_SURFACES | diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c index 9994b67bfc5..4add1a69f02 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c @@ -65,8 +65,8 @@ gen7_update_texture_surface(struct gl_context *ctx, GLuint unit) const GLuint surf_index = SURF_INDEX_TEXTURE(unit); struct gen7_surface_state *surf; - surf = brw_state_batch(brw, sizeof(*surf), 32, - &brw->wm.surf_offset[surf_index]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + sizeof(*surf), 32, &brw->wm.surf_offset[surf_index]); memset(surf, 0, sizeof(*surf)); surf->ss0.surface_type = translate_tex_target(tObj->Target); @@ -135,7 +135,8 @@ gen7_create_constant_surface(struct brw_context *brw, const GLint w = width - 1; struct gen7_surface_state *surf; - surf = brw_state_batch(brw, sizeof(*surf), 32, out_offset); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + sizeof(*surf), 32, out_offset); memset(surf, 0, sizeof(*surf)); surf->ss0.surface_type = BRW_SURFACE_BUFFER; @@ -210,8 +211,8 @@ gen7_update_null_renderbuffer_surface(struct brw_context *brw, unsigned unit) { struct gen7_surface_state *surf; - surf = brw_state_batch(brw, sizeof(*surf), 32, - &brw->wm.surf_offset[unit]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + sizeof(*surf), 32, &brw->wm.surf_offset[unit]); memset(surf, 0, sizeof(*surf)); surf->ss0.surface_type = BRW_SURFACE_NULL; @@ -235,8 +236,8 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, struct gen7_surface_state *surf; uint32_t tile_x, tile_y; - surf = brw_state_batch(brw, sizeof(*surf), 32, - &brw->wm.surf_offset[unit]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, + sizeof(*surf), 32, &brw->wm.surf_offset[unit]); memset(surf, 0, sizeof(*surf)); switch (irb->Base.Format) { diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 77edc3a6bfe..db4343be10c 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -53,6 +53,22 @@ static void clear_cache( struct intel_context *intel ) } void +intel_batchbuffer_init(struct intel_context *intel) +{ + intel_batchbuffer_reset(intel); + + if (intel->gen == 6) { + /* We can't just use brw_state_batch to get a chunk of space for + * the gen6 workaround because it involves actually writing to + * the buffer, and the kernel doesn't let us write to the batch. + */ + intel->batch.workaround_bo = drm_intel_bo_alloc(intel->bufmgr, + "gen6 workaround", + 4096, 4096); + } +} + +void intel_batchbuffer_reset(struct intel_context *intel) { if (intel->batch.last_bo != NULL) { @@ -76,6 +92,7 @@ intel_batchbuffer_free(struct intel_context *intel) { drm_intel_bo_unreference(intel->batch.last_bo); drm_intel_bo_unreference(intel->batch.bo); + drm_intel_bo_unreference(intel->batch.workaround_bo); clear_cache(intel); } @@ -110,9 +127,11 @@ do_flush_locked(struct intel_context *intel) } if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) { - intel_decode(batch->map, batch->used, + drm_intel_bo_map(batch->bo, false); + intel_decode(batch->bo->virtual, batch->used, batch->bo->offset, intel->intelScreen->deviceID, GL_TRUE); + drm_intel_bo_unmap(batch->bo); if (intel->vtbl.debug_batch != NULL) intel->vtbl.debug_batch(intel); @@ -276,6 +295,68 @@ emit: item->header = intel->batch.emit; } +/** + * Emits a PIPE_CONTROL with a non-zero post-sync operation, for + * implementing two workarounds on gen6. From section 1.4.7.1 + * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: + * + * [DevSNB-C+{W/A}] Before any depth stall flush (including those + * produced by non-pipelined state commands), software needs to first + * send a PIPE_CONTROL with no bits set except Post-Sync Operation != + * 0. + * + * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable + * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. + * + * And the workaround for these two requires this workaround first: + * + * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent + * BEFORE the pipe-control with a post-sync op and no write-cache + * flushes. + * + * And this last workaround is tricky because of the requirements on + * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM + * volume 2 part 1: + * + * "1 of the following must also be set: + * - Render Target Cache Flush Enable ([12] of DW1) + * - Depth Cache Flush Enable ([0] of DW1) + * - Stall at Pixel Scoreboard ([1] of DW1) + * - Depth Stall ([13] of DW1) + * - Post-Sync Operation ([13] of DW1) + * - Notify Enable ([8] of DW1)" + * + * The cache flushes require the workaround flush that triggered this + * one, so we can't use it. Depth stall would trigger the same. + * Post-sync nonzero is what triggered this second workaround, so we + * can't use that one either. Notify enable is IRQs, which aren't + * really our business. That leaves only stall at scoreboard. + */ +void +intel_emit_post_sync_nonzero_flush(struct intel_context *intel) +{ + if (!intel->batch.need_workaround_flush) + return; + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL); + OUT_BATCH(PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_STALL_AT_SCOREBOARD); + OUT_BATCH(0); /* address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL); + OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); + OUT_RELOC(intel->batch.workaround_bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); + + intel->batch.need_workaround_flush = false; +} + /* Emit a pipelined flush to either flush render and texture cache for * reading from a FBO-drawn texture, or flush so that frontbuffer * render appears on the screen in DRI1. @@ -294,19 +375,22 @@ intel_batchbuffer_emit_mi_flush(struct intel_context *intel) OUT_BATCH(0); ADVANCE_BATCH(); } else { - BEGIN_BATCH(8); - /* XXX workaround: issue any post sync != 0 before write - * cache flush = 1 - */ - OUT_BATCH(_3DSTATE_PIPE_CONTROL); - OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); - OUT_BATCH(0); /* write address */ - OUT_BATCH(0); /* write data */ + if (intel->gen == 6) { + /* Hardware workaround: SNB B-Spec says: + * + * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache + * Flush Enable =1, a PIPE_CONTROL with any non-zero + * post-sync-op is required. + */ + intel_emit_post_sync_nonzero_flush(intel); + } + BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_PIPE_CONTROL); OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH | PIPE_CONTROL_WRITE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_TC_FLUSH | PIPE_CONTROL_NO_WRITE); OUT_BATCH(0); /* write address */ OUT_BATCH(0); /* write data */ diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index a0a5c9841c6..fb4134d889e 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -9,6 +9,7 @@ #define BATCH_RESERVED 16 +void intel_batchbuffer_init(struct intel_context *intel); void intel_batchbuffer_reset(struct intel_context *intel); void intel_batchbuffer_free(struct intel_context *intel); @@ -38,6 +39,7 @@ GLboolean intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel, uint32_t write_domain, uint32_t offset); void intel_batchbuffer_emit_mi_flush(struct intel_context *intel); +void intel_emit_post_sync_nonzero_flush(struct intel_context *intel); static INLINE uint32_t float_as_int(float f) { diff --git a/src/mesa/drivers/dri/intel/intel_buffers.c b/src/mesa/drivers/dri/intel/intel_buffers.c index 7eb50edc6b4..fccdcbd7303 100644 --- a/src/mesa/drivers/dri/intel/intel_buffers.c +++ b/src/mesa/drivers/dri/intel/intel_buffers.c @@ -28,7 +28,9 @@ #include "intel_context.h" #include "intel_buffers.h" #include "intel_fbo.h" + #include "main/framebuffer.h" +#include "main/renderbuffer.h" /** * Return pointer to current color drawing region, or NULL. @@ -76,197 +78,6 @@ intel_check_front_buffer_rendering(struct intel_context *intel) } } - -/** - * Update the hardware state for drawing into a window or framebuffer object. - * - * Called by glDrawBuffer, glBindFramebufferEXT, MakeCurrent, and other - * places within the driver. - * - * Basically, this needs to be called any time the current framebuffer - * changes, the renderbuffers change, or we need to draw into different - * color buffers. - */ -void -intel_draw_buffer(struct gl_context * ctx, struct gl_framebuffer *fb) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_region *colorRegions[MAX_DRAW_BUFFERS], *depthRegion = NULL; - struct intel_renderbuffer *irbDepth = NULL, *irbStencil = NULL; - bool fb_has_hiz = intel_framebuffer_has_hiz(fb); - - if (!fb) { - /* this can happen during the initial context initialization */ - return; - } - - /* Do this here, not core Mesa, since this function is called from - * many places within the driver. - */ - if (ctx->NewState & _NEW_BUFFERS) { - /* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */ - _mesa_update_framebuffer(ctx); - /* this updates the DrawBuffer's Width/Height if it's a FBO */ - _mesa_update_draw_buffer_bounds(ctx); - } - - if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) { - /* this may occur when we're called by glBindFrameBuffer() during - * the process of someone setting up renderbuffers, etc. - */ - /*_mesa_debug(ctx, "DrawBuffer: incomplete user FBO\n");*/ - return; - } - - /* How many color buffers are we drawing into? - * - * If there are zero buffers or the buffer is too big, don't configure any - * regions for hardware drawing. We'll fallback to software below. Not - * having regions set makes some of the software fallback paths faster. - */ - if ((fb->Width > ctx->Const.MaxRenderbufferSize) - || (fb->Height > ctx->Const.MaxRenderbufferSize) - || (fb->_NumColorDrawBuffers == 0)) { - /* writing to 0 */ - colorRegions[0] = NULL; - } - else if (fb->_NumColorDrawBuffers > 1) { - int i; - struct intel_renderbuffer *irb; - - for (i = 0; i < fb->_NumColorDrawBuffers; i++) { - irb = intel_renderbuffer(fb->_ColorDrawBuffers[i]); - colorRegions[i] = irb ? irb->region : NULL; - } - } - else { - /* Get the intel_renderbuffer for the single colorbuffer we're drawing - * into. - */ - if (fb->Name == 0) { - /* drawing to window system buffer */ - if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) - colorRegions[0] = intel_get_rb_region(fb, BUFFER_FRONT_LEFT); - else - colorRegions[0] = intel_get_rb_region(fb, BUFFER_BACK_LEFT); - } - else { - /* drawing to user-created FBO */ - struct intel_renderbuffer *irb; - irb = intel_renderbuffer(fb->_ColorDrawBuffers[0]); - colorRegions[0] = (irb && irb->region) ? irb->region : NULL; - } - } - - if (!colorRegions[0]) { - FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_TRUE); - } - else { - FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE); - } - - /*** - *** Get depth buffer region and check if we need a software fallback. - ***/ - if (fb->_DepthBuffer && fb->_DepthBuffer->Wrapped) { - irbDepth = intel_renderbuffer(fb->_DepthBuffer->Wrapped); - if (irbDepth && irbDepth->region) { - assert(!fb_has_hiz || irbDepth->Base.Format != MESA_FORMAT_S8_Z24); - FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, GL_FALSE); - depthRegion = irbDepth->region; - } - else { - FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, GL_TRUE); - depthRegion = NULL; - } - } - else { - /* not using depth buffer */ - FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, GL_FALSE); - depthRegion = NULL; - } - - /*** - *** Stencil buffer - ***/ - if (fb->_StencilBuffer && fb->_StencilBuffer->Wrapped) { - irbStencil = intel_renderbuffer(fb->_StencilBuffer->Wrapped); - if (irbStencil && irbStencil->region) { - if (!intel->has_separate_stencil) - assert(irbStencil->Base.Format == MESA_FORMAT_S8_Z24); - if (fb_has_hiz || intel->must_use_separate_stencil) - assert(irbStencil->Base.Format == MESA_FORMAT_S8); - if (irbStencil->Base.Format == MESA_FORMAT_S8) - assert(intel->has_separate_stencil); - FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE); - } - else { - FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_TRUE); - } - } - else { - /* XXX FBO: instead of FALSE, pass ctx->Stencil._Enabled ??? */ - FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE); - } - - /* If we have a (packed) stencil buffer attached but no depth buffer, - * we still need to set up the shared depth/stencil state so we can use it. - */ - if (depthRegion == NULL && irbStencil && irbStencil->region - && irbStencil->Base.Format == MESA_FORMAT_S8_Z24) { - depthRegion = irbStencil->region; - } - - /* - * Update depth and stencil test state - */ - if (ctx->Driver.Enable) { - ctx->Driver.Enable(ctx, GL_DEPTH_TEST, - (ctx->Depth.Test && fb->Visual.depthBits > 0)); - ctx->Driver.Enable(ctx, GL_STENCIL_TEST, - (ctx->Stencil.Enabled && fb->Visual.stencilBits > 0)); - } - else { - /* Mesa's Stencil._Enabled field is updated when - * _NEW_BUFFERS | _NEW_STENCIL, but i965 code assumes that the value - * only changes with _NEW_STENCIL (which seems sensible). So flag it - * here since this is the _NEW_BUFFERS path. - */ - intel->NewGLState |= (_NEW_DEPTH | _NEW_STENCIL); - } - - intel->vtbl.set_draw_region(intel, colorRegions, depthRegion, - fb->_NumColorDrawBuffers); - intel->NewGLState |= _NEW_BUFFERS; - - /* update viewport since it depends on window size */ -#ifdef I915 - intelCalcViewport(ctx); -#else - intel->NewGLState |= _NEW_VIEWPORT; -#endif - /* Set state we know depends on drawable parameters: - */ - if (ctx->Driver.Scissor) - ctx->Driver.Scissor(ctx, ctx->Scissor.X, ctx->Scissor.Y, - ctx->Scissor.Width, ctx->Scissor.Height); - intel->NewGLState |= _NEW_SCISSOR; - - if (ctx->Driver.DepthRange) - ctx->Driver.DepthRange(ctx, - ctx->Viewport.Near, - ctx->Viewport.Far); - - /* Update culling direction which changes depending on the - * orientation of the buffer: - */ - if (ctx->Driver.FrontFace) - ctx->Driver.FrontFace(ctx, ctx->Polygon.FrontFace); - else - intel->NewGLState |= _NEW_POLYGON; -} - - static void intelDrawBuffer(struct gl_context * ctx, GLenum mode) { diff --git a/src/mesa/drivers/dri/intel/intel_buffers.h b/src/mesa/drivers/dri/intel/intel_buffers.h index 2d4613b2954..a7e77976497 100644 --- a/src/mesa/drivers/dri/intel/intel_buffers.h +++ b/src/mesa/drivers/dri/intel/intel_buffers.h @@ -31,6 +31,7 @@ #include "dri_util.h" #include "drm.h" +#include "intel_context.h" struct intel_context; struct intel_framebuffer; @@ -41,7 +42,13 @@ extern struct intel_region *intel_drawbuf_region(struct intel_context *intel); extern void intel_check_front_buffer_rendering(struct intel_context *intel); -extern void intel_draw_buffer(struct gl_context * ctx, struct gl_framebuffer *fb); +static inline void +intel_draw_buffer(struct gl_context * ctx, struct gl_framebuffer *fb) +{ + struct intel_context *intel = intel_context(ctx); + + intel->vtbl.update_draw_buffer(intel); +} extern void intelInitBufferFuncs(struct dd_function_table *functions); diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c index 5a96232107e..76d33f9b37e 100644 --- a/src/mesa/drivers/dri/intel/intel_clear.c +++ b/src/mesa/drivers/dri/intel/intel_clear.c @@ -116,14 +116,13 @@ intelClear(struct gl_context *ctx, GLbitfield mask) } /* HW color buffers (front, back, aux, generic FBO, etc) */ - if (colorMask == ~0) { + if (intel->gen < 6 && colorMask == ~0) { /* clear all R,G,B,A */ - /* XXX FBO: need to check if colorbuffers are software RBOs! */ blit_mask |= (mask & BUFFER_BITS_COLOR); } else { /* glColorMask in effect */ - tri_mask |= (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT)); + tri_mask |= (mask & BUFFER_BITS_COLOR); } /* Make sure we have up to date buffers before we start looking at @@ -144,6 +143,12 @@ intelClear(struct gl_context *ctx, GLbitfield mask) */ tri_mask |= BUFFER_BIT_STENCIL; } + else if (intel->has_separate_stencil && + stencilRegion->tiling == I915_TILING_NONE) { + /* The stencil buffer is actually W tiled, which the hardware + * cannot blit to. */ + tri_mask |= BUFFER_BIT_STENCIL; + } else { /* clearing all stencil bits, use blitting */ blit_mask |= BUFFER_BIT_STENCIL; @@ -183,7 +188,10 @@ intelClear(struct gl_context *ctx, GLbitfield mask) if (tri_mask) { debug_mask("tri", tri_mask); - _mesa_meta_Clear(&intel->ctx, tri_mask); + if (ctx->Extensions.ARB_fragment_shader) + _mesa_meta_glsl_Clear(&intel->ctx, tri_mask); + else + _mesa_meta_Clear(&intel->ctx, tri_mask); } } diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 0c2ba413ad7..fe8be082dfc 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -56,7 +56,7 @@ #include "drirenderbuffer.h" #include "utils.h" - +#include "../glsl/ralloc.h" #ifndef INTEL_DEBUG int INTEL_DEBUG = (0); @@ -504,10 +504,7 @@ intelInvalidateState(struct gl_context * ctx, GLuint new_state) struct intel_context *intel = intel_context(ctx); _swrast_InvalidateState(ctx, new_state); - _swsetup_InvalidateState(ctx, new_state); _vbo_InvalidateState(ctx, new_state); - _tnl_InvalidateState(ctx, new_state); - _tnl_invalidate_vertex_state(ctx, new_state); intel->NewGLState |= new_state; @@ -663,8 +660,8 @@ intelInitContext(struct intel_context *intel, ctx->TextureFormatSupported[MESA_FORMAT_AL1616] = GL_TRUE; /* Depth and stencil */ - ctx->TextureFormatSupported[MESA_FORMAT_S8_Z24] = !intel->must_use_separate_stencil; - ctx->TextureFormatSupported[MESA_FORMAT_X8_Z24] = intel->has_separate_stencil; + ctx->TextureFormatSupported[MESA_FORMAT_S8_Z24] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_X8_Z24] = GL_TRUE; ctx->TextureFormatSupported[MESA_FORMAT_S8] = intel->has_separate_stencil; /* @@ -854,7 +851,7 @@ intelInitContext(struct intel_context *intel, if (INTEL_DEBUG & DEBUG_BUFMGR) dri_bufmgr_set_debug(intel->bufmgr, GL_TRUE); - intel_batchbuffer_reset(intel); + intel_batchbuffer_init(intel); intel_fbo_init(intel); @@ -925,7 +922,9 @@ intelDestroyContext(__DRIcontext * driContextPriv) /* free the Mesa context */ _mesa_free_context_data(&intel->ctx); - FREE(intel); + _math_matrix_dtr(&intel->ViewportMatrix); + + ralloc_free(intel); driContextPriv->driverPrivate = NULL; } } @@ -1115,7 +1114,6 @@ intel_query_dri2_buffers_no_separate_stencil(struct intel_context *intel, * * \see intel_update_renderbuffers() * \see intel_region_alloc_for_handle() - * \see intel_renderbuffer_set_region() */ static void intel_process_dri2_buffer_no_separate_stencil(struct intel_context *intel, @@ -1127,7 +1125,6 @@ intel_process_dri2_buffer_no_separate_stencil(struct intel_context *intel, assert(!intel->must_use_separate_stencil); struct gl_framebuffer *fb = drawable->driverPrivate; - struct intel_region *region = NULL; struct intel_renderbuffer *depth_rb = NULL; if (!rb) @@ -1154,20 +1151,18 @@ intel_process_dri2_buffer_no_separate_stencil(struct intel_context *intel, if (unlikely(INTEL_DEBUG & DEBUG_DRI)) { fprintf(stderr, "(reusing depth buffer as stencil)\n"); } - intel_region_reference(®ion, depth_rb->region); + intel_region_reference(&rb->region, depth_rb->region); } else { - region = intel_region_alloc_for_handle(intel->intelScreen, - buffer->cpp, - drawable->w, - drawable->h, - buffer->pitch / buffer->cpp, - buffer->name, - buffer_name); + intel_region_release(&rb->region); + rb->region = intel_region_alloc_for_handle(intel->intelScreen, + buffer->cpp, + drawable->w, + drawable->h, + buffer->pitch / buffer->cpp, + buffer->name, + buffer_name); } - intel_renderbuffer_set_region(intel, rb, region); - intel_region_release(®ion); - if (buffer->attachment == __DRI_BUFFER_DEPTH_STENCIL) { struct intel_renderbuffer *stencil_rb = intel_get_renderbuffer(fb, BUFFER_STENCIL); @@ -1175,10 +1170,10 @@ intel_process_dri2_buffer_no_separate_stencil(struct intel_context *intel, if (!stencil_rb) return; - if (stencil_rb->region && stencil_rb->region->name == buffer->name) - return; - - intel_renderbuffer_set_region(intel, stencil_rb, region); + /* The rb passed in is the BUFFER_DEPTH attachment, and we need + * to associate this region to BUFFER_STENCIL as well. + */ + intel_region_reference(&stencil_rb->region, rb->region); } } @@ -1303,7 +1298,6 @@ intel_query_dri2_buffers_with_separate_stencil(struct intel_context *intel, * * \see intel_update_renderbuffers() * \see intel_region_alloc_for_handle() - * \see intel_renderbuffer_set_region() * \see enum intel_dri2_has_hiz */ static void @@ -1363,9 +1357,9 @@ intel_process_dri2_buffer_with_separate_stencil(struct intel_context *intel, buffer_name); if (buffer->attachment == __DRI_BUFFER_HIZ) { - intel_renderbuffer_set_hiz_region(intel, rb, region); + intel_region_reference(&rb->hiz_region, region); } else { - intel_renderbuffer_set_region(intel, rb, region); + intel_region_reference(&rb->region, region); } intel_region_release(®ion); @@ -1445,7 +1439,12 @@ intel_verify_dri2_has_hiz(struct intel_context *intel, assert(stencil_rb->Base.Format == MESA_FORMAT_S8); assert(depth_rb && depth_rb->Base.Format == MESA_FORMAT_X8_Z24); - if (stencil_rb->region->tiling == I915_TILING_Y) { + if (stencil_rb->region->tiling == I915_TILING_NONE) { + /* + * The stencil buffer is actually W tiled. The region's tiling is + * I915_TILING_NONE, however, because the GTT is incapable of W + * fencing. + */ intel->intelScreen->dri2_has_hiz = INTEL_DRI2_HAS_HIZ_TRUE; return; } else { @@ -1514,12 +1513,10 @@ intel_verify_dri2_has_hiz(struct intel_context *intel, / depth_stencil_buffer->cpp, depth_stencil_buffer->name, "dri2 depth / stencil buffer"); - intel_renderbuffer_set_region(intel, - intel_get_renderbuffer(fb, BUFFER_DEPTH), - region); - intel_renderbuffer_set_region(intel, - intel_get_renderbuffer(fb, BUFFER_STENCIL), - region); + intel_region_reference(&intel_get_renderbuffer(fb, BUFFER_DEPTH)->region, + region); + intel_region_reference(&intel_get_renderbuffer(fb, BUFFER_STENCIL)->region, + region); intel_region_release(®ion); } } @@ -1535,7 +1532,7 @@ intel_verify_dri2_has_hiz(struct intel_context *intel, * Presently, however, no verification or clean up is necessary, and * execution should not reach here. If the framebuffer still has a hiz * region, then we have already set dri2_has_hiz to true after - * confirming above that the stencil buffer is Y tiled. + * confirming above that the stencil buffer is W tiled. */ assert(0); } diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 80dee4ef38e..b7989dd42f5 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -136,6 +136,7 @@ struct intel_context struct intel_region * draw_regions[], struct intel_region * depth_region, GLuint num_regions); + void (*update_draw_buffer)(struct intel_context *intel); void (*reduced_primitive_state) (struct intel_context * intel, GLenum rprim); @@ -181,6 +182,9 @@ struct intel_context drm_intel_bo *bo; /** Last BO submitted to the hardware. Used for glFinish(). */ drm_intel_bo *last_bo; + /** BO for post-sync nonzero writes for gen6 workaround. */ + drm_intel_bo *workaround_bo; + bool need_workaround_flush; struct cached_batch_item *cached_items; @@ -196,6 +200,7 @@ struct intel_context drm_intel_bo *first_post_swapbuffers_batch; GLboolean need_throttle; GLboolean no_batch_wrap; + bool tnl_pipeline_running; /**< Set while i915's _tnl_run_pipeline. */ struct { @@ -225,7 +230,6 @@ struct intel_context GLuint coloroffset; GLuint specoffset; GLuint wpos_offset; - GLuint wpos_size; struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX]; GLuint vertex_attr_count; diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c index 3fd987abd8c..64c996ca5cd 100644 --- a/src/mesa/drivers/dri/intel/intel_extensions.c +++ b/src/mesa/drivers/dri/intel/intel_extensions.c @@ -172,6 +172,7 @@ static const struct dri_extension brw_extensions[] = { { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions }, { "GL_ARB_point_sprite", NULL }, { "GL_ARB_seamless_cube_map", NULL }, + { "GL_ARB_shader_texture_lod", NULL }, { "GL_ARB_shadow", NULL }, #ifdef TEXTURE_FLOAT_ENABLED { "GL_ARB_texture_float", NULL }, diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index e7c23f02f65..65ad621e770 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -70,18 +70,15 @@ intel_new_framebuffer(struct gl_context * ctx, GLuint name) static void intel_delete_renderbuffer(struct gl_renderbuffer *rb) { - GET_CURRENT_CONTEXT(ctx); - struct intel_context *intel = intel_context(ctx); struct intel_renderbuffer *irb = intel_renderbuffer(rb); ASSERT(irb); - if (intel && irb->region) { - intel_region_release(&irb->region); - } - if (intel && irb->hiz_region) { - intel_region_release(&irb->hiz_region); - } + intel_region_release(&irb->region); + intel_region_release(&irb->hiz_region); + + _mesa_reference_renderbuffer(&irb->wrapped_depth, NULL); + _mesa_reference_renderbuffer(&irb->wrapped_stencil, NULL); free(irb); } @@ -105,7 +102,7 @@ intel_get_pointer(struct gl_context * ctx, struct gl_renderbuffer *rb, * Called via glRenderbufferStorageEXT() to set the format and allocate * storage for a user-created renderbuffer. */ -static GLboolean +GLboolean intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer *rb, GLenum internalFormat, GLuint width, GLuint height) @@ -141,6 +138,8 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer break; } + rb->Width = width; + rb->Height = height; rb->_BaseFormat = _mesa_base_fbo_format(ctx, internalFormat); rb->DataType = intel_mesa_format_to_rb_datatype(rb->Format); cpp = _mesa_get_format_bytes(rb->Format); @@ -174,6 +173,9 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer if (irb->Base.Format == MESA_FORMAT_S8) { /* + * The stencil buffer is W tiled. However, we request from the kernel a + * non-tiled buffer because the GTT is incapable of W fencing. + * * The stencil buffer has quirky pitch requirements. From Vol 2a, * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch": * The pitch must be set to 2x the value computed based on width, as @@ -181,41 +183,71 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer * To accomplish this, we resort to the nasty hack of doubling the drm * region's cpp and halving its height. * - * If we neglect to double the pitch, then drm_intel_gem_bo_map_gtt() - * maps the memory incorrectly. + * If we neglect to double the pitch, then render corruption occurs. */ irb->region = intel_region_alloc(intel->intelScreen, - I915_TILING_Y, + I915_TILING_NONE, cpp * 2, - width, - height / 2, + ALIGN(width, 64), + ALIGN((height + 1) / 2, 64), GL_TRUE); + if (!irb->region) + return false; + + } else if (irb->Base.Format == MESA_FORMAT_S8_Z24 + && intel->must_use_separate_stencil) { + + bool ok = true; + struct gl_renderbuffer *depth_rb; + struct gl_renderbuffer *stencil_rb; + + depth_rb = intel_create_wrapped_renderbuffer(ctx, width, height, + MESA_FORMAT_X8_Z24); + stencil_rb = intel_create_wrapped_renderbuffer(ctx, width, height, + MESA_FORMAT_S8); + ok = depth_rb && stencil_rb; + ok = ok && intel_alloc_renderbuffer_storage(ctx, depth_rb, + depth_rb->InternalFormat, + width, height); + ok = ok && intel_alloc_renderbuffer_storage(ctx, stencil_rb, + stencil_rb->InternalFormat, + width, height); + + if (!ok) { + if (depth_rb) { + intel_delete_renderbuffer(depth_rb); + } + if (stencil_rb) { + intel_delete_renderbuffer(stencil_rb); + } + return false; + } + + depth_rb->Wrapped = rb; + stencil_rb->Wrapped = rb; + _mesa_reference_renderbuffer(&irb->wrapped_depth, depth_rb); + _mesa_reference_renderbuffer(&irb->wrapped_stencil, stencil_rb); + } else { irb->region = intel_region_alloc(intel->intelScreen, tiling, cpp, width, height, GL_TRUE); - } - - if (!irb->region) - return GL_FALSE; /* out of memory? */ - - ASSERT(irb->region->buffer); - - if (intel->vtbl.is_hiz_depth_format(intel, rb->Format)) { - irb->hiz_region = intel_region_alloc(intel->intelScreen, - I915_TILING_Y, - irb->region->cpp, - irb->region->width, - irb->region->height, - GL_TRUE); - if (!irb->hiz_region) { - intel_region_release(&irb->region); - return GL_FALSE; + if (!irb->region) + return false; + + if (intel->vtbl.is_hiz_depth_format(intel, rb->Format)) { + irb->hiz_region = intel_region_alloc(intel->intelScreen, + I915_TILING_Y, + irb->region->cpp, + irb->region->width, + irb->region->height, + GL_TRUE); + if (!irb->hiz_region) { + intel_region_release(&irb->region); + return false; + } } } - rb->Width = width; - rb->Height = height; - return GL_TRUE; } @@ -238,8 +270,6 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx, return; irb = intel_renderbuffer(rb); - if (irb->region) - intel_region_release(&irb->region); intel_region_reference(&irb->region, image->region); rb->InternalFormat = image->internal_format; @@ -306,33 +336,6 @@ intel_nop_alloc_storage(struct gl_context * ctx, struct gl_renderbuffer *rb, return GL_FALSE; } - -void -intel_renderbuffer_set_region(struct intel_context *intel, - struct intel_renderbuffer *rb, - struct intel_region *region) -{ - struct intel_region *old; - - old = rb->region; - rb->region = NULL; - intel_region_reference(&rb->region, region); - intel_region_release(&old); -} - - -void -intel_renderbuffer_set_hiz_region(struct intel_context *intel, - struct intel_renderbuffer *rb, - struct intel_region *region) -{ - struct intel_region *old = rb->hiz_region; - rb->hiz_region = NULL; - intel_region_reference(&rb->hiz_region, region); - intel_region_release(&old); -} - - /** * Create a new intel_renderbuffer which corresponds to an on-screen window, * not a user-created renderbuffer. @@ -366,6 +369,37 @@ intel_create_renderbuffer(gl_format format) } +struct gl_renderbuffer* +intel_create_wrapped_renderbuffer(struct gl_context * ctx, + int width, int height, + gl_format format) +{ + /* + * The name here is irrelevant, as long as its nonzero, because the + * renderbuffer never gets entered into Mesa's renderbuffer hash table. + */ + GLuint name = ~0; + + struct intel_renderbuffer *irb = CALLOC_STRUCT(intel_renderbuffer); + if (!irb) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "creating renderbuffer"); + return NULL; + } + + struct gl_renderbuffer *rb = &irb->Base; + _mesa_init_renderbuffer(rb, name); + rb->ClassID = INTEL_RB_CLASS; + rb->_BaseFormat = _mesa_get_format_base_format(format); + rb->Format = format; + rb->InternalFormat = rb->_BaseFormat; + rb->DataType = intel_mesa_format_to_rb_datatype(format); + rb->Width = width; + rb->Height = height; + + return rb; +} + + /** * Create a new renderbuffer object. * Typically called via glBindRenderbufferEXT(). @@ -427,6 +461,10 @@ intel_framebuffer_renderbuffer(struct gl_context * ctx, intel_draw_buffer(ctx, fb); } +static bool +intel_update_tex_wrapper_regions(struct intel_context *intel, + struct intel_renderbuffer *irb, + struct intel_texture_image *intel_image); static GLboolean intel_update_wrapper(struct gl_context *ctx, struct intel_renderbuffer *irb, @@ -453,21 +491,63 @@ intel_update_wrapper(struct gl_context *ctx, struct intel_renderbuffer *irb, irb->Base.Delete = intel_delete_renderbuffer; irb->Base.AllocStorage = intel_nop_alloc_storage; + if (intel_image->stencil_rb) { + /* The tex image has packed depth/stencil format, but is using separate + * stencil. */ + + bool ok; + struct intel_renderbuffer *depth_irb = + intel_renderbuffer(intel_image->depth_rb); + + /* Update the hiz region if necessary. */ + ok = intel_update_tex_wrapper_regions(intel, depth_irb, intel_image); + if (!ok) { + return false; + } + + /* The tex image shares its embedded depth and stencil renderbuffers with + * the renderbuffer wrapper. */ + if (irb->wrapped_depth != intel_image->depth_rb) { + _mesa_reference_renderbuffer(&irb->wrapped_depth, + intel_image->depth_rb); + } + if (irb->wrapped_stencil != intel_image->stencil_rb) { + _mesa_reference_renderbuffer(&irb->wrapped_stencil, + intel_image->stencil_rb); + } + + return true; + + } else { + return intel_update_tex_wrapper_regions(intel, irb, intel_image); + } +} + +/** + * FIXME: The handling of the hiz region is broken for mipmapped depth textures + * FIXME: because intel_finalize_mipmap_tree is unaware of it. + */ +static bool +intel_update_tex_wrapper_regions(struct intel_context *intel, + struct intel_renderbuffer *irb, + struct intel_texture_image *intel_image) +{ + struct gl_renderbuffer *rb = &irb->Base; + /* Point the renderbuffer's region to the texture's region. */ if (irb->region != intel_image->mt->region) { - intel_region_release(&irb->region); intel_region_reference(&irb->region, intel_image->mt->region); } /* Allocate the texture's hiz region if necessary. */ - if (intel->vtbl.is_hiz_depth_format(intel, texImage->TexFormat) + if (intel->vtbl.is_hiz_depth_format(intel, rb->Format) && !intel_image->mt->hiz_region) { intel_image->mt->hiz_region = intel_region_alloc(intel->intelScreen, I915_TILING_Y, - _mesa_get_format_bytes(texImage->TexFormat), - texImage->Width, - texImage->Height, + _mesa_get_format_bytes(rb->Format), + rb->Width, + rb->Height, GL_TRUE); if (!intel_image->mt->hiz_region) return GL_FALSE; @@ -475,7 +555,6 @@ intel_update_wrapper(struct gl_context *ctx, struct intel_renderbuffer *irb, /* Point the renderbuffer's hiz region to the texture's hiz region. */ if (irb->hiz_region != intel_image->mt->hiz_region) { - intel_region_release(&irb->hiz_region); intel_region_reference(&irb->hiz_region, intel_image->mt->hiz_region); } @@ -512,12 +591,11 @@ intel_wrap_texture(struct gl_context * ctx, struct gl_texture_image *texImage) return irb; } -static void +void intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb, struct intel_texture_image *intel_image, int zoffset) { - struct intel_mipmap_tree *mt = intel_image->mt; unsigned int dst_x, dst_y; /* compute offset of the particular 2D image within the texture region */ @@ -527,7 +605,6 @@ intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb, zoffset, &dst_x, &dst_y); - irb->draw_offset = (dst_y * mt->region->pitch + dst_x) * mt->cpp; irb->draw_x = dst_x; irb->draw_y = dst_y; } @@ -568,6 +645,22 @@ intel_renderbuffer_tile_offsets(struct intel_renderbuffer *irb, } } +#ifndef I915 +static bool +need_tile_offset_workaround(struct brw_context *brw, + struct intel_renderbuffer *irb) +{ + uint32_t tile_x, tile_y; + + if (brw->has_surface_tile_offset) + return false; + + intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y); + + return tile_x != 0 || tile_y != 0; +} +#endif + /** * Called by glFramebufferTexture[123]DEXT() (and other places) to * prepare for rendering into texture memory. This might be called @@ -621,8 +714,7 @@ intel_render_texture(struct gl_context * ctx, intel_image->used_as_render_target = GL_TRUE; #ifndef I915 - if (!brw_context(ctx)->has_surface_tile_offset && - (irb->draw_offset & 4095) != 0) { + if (need_tile_offset_workaround(brw_context(ctx), irb)) { /* Original gen4 hardware couldn't draw to a non-tile-aligned * destination in a miptree unless you actually setup your * renderbuffer as a miptree and used the fragile @@ -653,7 +745,6 @@ intel_render_texture(struct gl_context * ctx, intel_image->mt = new_mt; intel_renderbuffer_set_draw_offset(irb, intel_image, att->Zoffset); - intel_region_release(&irb->region); intel_region_reference(&irb->region, intel_image->mt->region); } #endif @@ -707,29 +798,21 @@ intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb) * The depth and stencil renderbuffers are the same renderbuffer or wrap * the same texture. */ - bool depth_stencil_are_same; - if (depthRb && stencilRb && depthRb == stencilRb) - depth_stencil_are_same = true; - else if (depthRb && stencilRb && depthRb != stencilRb - && (fb->Attachment[BUFFER_DEPTH].Type == GL_TEXTURE) - && (fb->Attachment[BUFFER_STENCIL].Type == GL_TEXTURE) - && (fb->Attachment[BUFFER_DEPTH].Texture->Name - == fb->Attachment[BUFFER_STENCIL].Texture->Name)) - depth_stencil_are_same = true; - else - depth_stencil_are_same = false; - - bool fb_has_combined_depth_stencil_format = - (depthRb && depthRb->Base.Format == MESA_FORMAT_S8_Z24) || - (stencilRb && stencilRb->Base.Format == MESA_FORMAT_S8_Z24); - - bool fb_has_hiz = intel_framebuffer_has_hiz(fb); - - if ((intel->must_use_separate_stencil || fb_has_hiz) - && (depth_stencil_are_same || fb_has_combined_depth_stencil_format)) { - fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; - } else if (!intel->has_separate_stencil && depthRb && stencilRb && !depth_stencil_are_same) { - fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; + if (depthRb && stencilRb) { + bool depth_stencil_are_same; + if (depthRb == stencilRb) + depth_stencil_are_same = true; + else if ((fb->Attachment[BUFFER_DEPTH].Type == GL_TEXTURE) && + (fb->Attachment[BUFFER_STENCIL].Type == GL_TEXTURE) && + (fb->Attachment[BUFFER_DEPTH].Texture->Name == + fb->Attachment[BUFFER_STENCIL].Texture->Name)) + depth_stencil_are_same = true; + else + depth_stencil_are_same = false; + + if (!intel->has_separate_stencil && !depth_stencil_are_same) { + fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; + } } for (i = 0; i < Elements(fb->Attachment); i++) { diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h index 509f5887225..2487994fde5 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.h +++ b/src/mesa/drivers/dri/intel/intel_fbo.h @@ -33,6 +33,7 @@ #include "intel_screen.h" struct intel_context; +struct intel_texture_image; /** * Intel renderbuffer, derived from gl_renderbuffer. @@ -45,7 +46,18 @@ struct intel_renderbuffer /** Only used by depth renderbuffers for which HiZ is enabled. */ struct intel_region *hiz_region; - GLuint draw_offset; /**< Offset of drawing address within the region */ + /** + * \name Packed depth/stencil unwrappers + * + * If the intel_context is using separate stencil and this renderbuffer has + * a a packed depth/stencil format, then wrapped_depth and wrapped_stencil + * are the real renderbuffers. + */ + struct gl_renderbuffer *wrapped_depth; + struct gl_renderbuffer *wrapped_stencil; + + /** \} */ + GLuint draw_x, draw_y; /**< Offset of drawing within the region */ }; @@ -76,15 +88,47 @@ intel_renderbuffer(struct gl_renderbuffer *rb) /** - * Return a framebuffer's renderbuffer, named by a BUFFER_x index. + * \brief Return the framebuffer attachment specified by attIndex. + * + * If the framebuffer lacks the specified attachment, then return null. + * + * If the attached renderbuffer is a wrapper, then return wrapped + * renderbuffer. */ static INLINE struct intel_renderbuffer * -intel_get_renderbuffer(struct gl_framebuffer *fb, int attIndex) +intel_get_renderbuffer(struct gl_framebuffer *fb, gl_buffer_index attIndex) { - if (attIndex >= 0) - return intel_renderbuffer(fb->Attachment[attIndex].Renderbuffer); - else + struct gl_renderbuffer *rb; + struct intel_renderbuffer *irb; + + /* XXX: Who passes -1 to intel_get_renderbuffer? */ + if (attIndex < 0) + return NULL; + + rb = fb->Attachment[attIndex].Renderbuffer; + if (!rb) + return NULL; + + irb = intel_renderbuffer(rb); + if (!irb) return NULL; + + switch (attIndex) { + case BUFFER_DEPTH: + if (irb->wrapped_depth) { + irb = intel_renderbuffer(irb->wrapped_depth); + } + break; + case BUFFER_STENCIL: + if (irb->wrapped_stencil) { + irb = intel_renderbuffer(irb->wrapped_stencil); + } + break; + default: + break; + } + + return irb; } /** @@ -110,21 +154,19 @@ intel_framebuffer_has_hiz(struct gl_framebuffer *fb) return intel_framebuffer_get_hiz_region(fb) != NULL; } - -extern void -intel_renderbuffer_set_region(struct intel_context *intel, - struct intel_renderbuffer *irb, - struct intel_region *region); - -extern void -intel_renderbuffer_set_hiz_region(struct intel_context *intel, - struct intel_renderbuffer *rb, - struct intel_region *region); - - extern struct intel_renderbuffer * intel_create_renderbuffer(gl_format format); +struct gl_renderbuffer* +intel_create_wrapped_renderbuffer(struct gl_context * ctx, + int width, int height, + gl_format format); + +GLboolean +intel_alloc_renderbuffer_storage(struct gl_context * ctx, + struct gl_renderbuffer *rb, + GLenum internalFormat, + GLuint width, GLuint height); extern void intel_fbo_init(struct intel_context *intel); @@ -133,6 +175,11 @@ intel_fbo_init(struct intel_context *intel); extern void intel_flip_renderbuffers(struct gl_framebuffer *fb); +void +intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb, + struct intel_texture_image *intel_image, + int zoffset); + uint32_t intel_renderbuffer_tile_offsets(struct intel_renderbuffer *irb, uint32_t *tile_x, diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c index 64c7acce1e9..86d0ef2d748 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c @@ -175,7 +175,7 @@ do_blit_bitmap( struct gl_context *ctx, const GLubyte *bitmap ) { struct intel_context *intel = intel_context(ctx); - struct intel_region *dst = intel_drawbuf_region(intel); + struct intel_region *dst; struct gl_framebuffer *fb = ctx->DrawBuffer; GLfloat tmpColor[4]; GLubyte ubcolor[4]; @@ -198,6 +198,9 @@ do_blit_bitmap( struct gl_context *ctx, return GL_FALSE; } + intel_prepare_render(intel); + dst = intel_drawbuf_region(intel); + if (!dst) return GL_FALSE; @@ -226,8 +229,6 @@ do_blit_bitmap( struct gl_context *ctx, if (!intel_check_blit_fragment_ops(ctx, tmpColor[3] == 1.0F)) return GL_FALSE; - intel_prepare_render(intel); - /* Clip to buffer bounds and scissor. */ if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin, fb->_Xmax, fb->_Ymax, diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h index 5258699d3f9..a98a669af21 100644 --- a/src/mesa/drivers/dri/intel/intel_reg.h +++ b/src/mesa/drivers/dri/intel/intel_reg.h @@ -49,7 +49,6 @@ #define I1_LOAD_S(n) (1<<(4+n)) #define _3DSTATE_DRAWRECT_INFO (CMD_3D | (0x1d<<24) | (0x80<<16) | 0x3) -#define _3DSTATE_DRAWRECT_INFO_I965 (CMD_3D | (3 << 27) | (1 << 24) | 0x2) /** @{ * @@ -76,6 +75,7 @@ #define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4) #define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1 << 3) #define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1 << 2) +#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1) #define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) #define PIPE_CONTROL_PPGTT_WRITE (0 << 2) #define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2) diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index a4da1ce4fa5..4c4945c7941 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -264,12 +264,15 @@ intel_region_alloc_for_handle(struct intel_screen *screen, void intel_region_reference(struct intel_region **dst, struct intel_region *src) { - if (src) - _DBG("%s %p %d\n", __FUNCTION__, src, src->refcount); + _DBG("%s: %p(%d) -> %p(%d)\n", __FUNCTION__, + *dst, *dst ? (*dst)->refcount : 0, src, src ? src->refcount : 0); - assert(*dst == NULL); - if (src) { - src->refcount++; + if (src != *dst) { + if (*dst) + intel_region_release(dst); + + if (src) + src->refcount++; *dst = src; } } diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index e915ca04fe0..bd8d574a29e 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -282,13 +282,37 @@ intel_query_image(__DRIimage *image, int attrib, int *value) } } +static __DRIimage * +intel_dup_image(__DRIimage *orig_image, void *loaderPrivate) +{ + __DRIimage *image; + + image = CALLOC(sizeof *image); + if (image == NULL) + return NULL; + + intel_region_reference(&image->region, orig_image->region); + if (image->region == NULL) { + FREE(image); + return NULL; + } + + image->internal_format = orig_image->internal_format; + image->format = orig_image->format; + image->data_type = orig_image->data_type; + image->data = loaderPrivate; + + return image; +} + static struct __DRIimageExtensionRec intelImageExtension = { { __DRI_IMAGE, __DRI_IMAGE_VERSION }, intel_create_image_from_name, intel_create_image_from_renderbuffer, intel_destroy_image, intel_create_image, - intel_query_image + intel_query_image, + intel_dup_image }; static const __DRIextension *intelScreenExtensions[] = { diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h index b2013af1a29..9dd6a525566 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.h +++ b/src/mesa/drivers/dri/intel/intel_screen.h @@ -63,9 +63,12 @@ * x8_z24 and s8). * * Eventually, intel_update_renderbuffers() makes a DRI2 request for - * DRI2BufferStencil and DRI2BufferHiz. If the returned buffers are Y tiled, - * then we joyfully set intel_screen.dri2_has_hiz to true and continue as if - * nothing happend. + * DRI2BufferStencil and DRI2BufferHiz. If the stencil buffer's tiling is + * I915_TILING_NONE [1], then we joyfully set intel_screen.dri2_has_hiz to + * true and continue as if nothing happend. + * + * [1] The stencil buffer is actually W tiled. However, we request from the + * kernel a non-tiled buffer because the GTT is incapable of W fencing. * * If the buffers are X tiled, however, the handshake has failed and we must * clean up. diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index 9343f40eef0..2e1c80c4766 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -131,38 +131,84 @@ intel_set_span_functions(struct intel_context *intel, int miny = 0; \ int maxx = rb->Width; \ int maxy = rb->Height; \ - int stride = rb->RowStride; \ - uint8_t *buf = rb->Data; \ + \ + /* \ + * Here we ignore rb->Data and rb->RowStride as set by \ + * intelSpanRenderStart. Since intel_offset_S8 decodes the W tile \ + * manually, the region's *real* base address and stride is \ + * required. \ + */ \ + struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ + uint8_t *buf = irb->region->buffer->virtual; \ + unsigned stride = irb->region->pitch; \ + unsigned height = 2 * irb->region->height; \ + bool flip = rb->Name == 0; \ + int y_scale = flip ? -1 : 1; \ + int y_bias = flip ? (height - 1) : 0; \ -/* Don't flip y. */ #undef Y_FLIP -#define Y_FLIP(y) y +#define Y_FLIP(y) (y_scale * (y) + y_bias) /** * \brief Get pointer offset into stencil buffer. * - * The stencil buffer interleaves two rows into one. Yay for crazy hardware. - * The table below demonstrates how the pointer arithmetic behaves for a buffer - * with positive stride (s=stride). - * - * x | y | byte offset - * -------------------------- - * 0 | 0 | 0 - * 0 | 0 | 1 - * 1 | 0 | 2 - * 1 | 1 | 3 - * ... | ... | ... - * 0 | 2 | s - * 0 | 3 | s + 1 - * 1 | 2 | s + 2 - * 1 | 3 | s + 3 + * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we + * must decode the tile's layout in software. * + * See + * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile + * Format. + * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm * + * Even though the returned offset is always positive, the return type is + * signed due to + * commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137 + * mesa: Fix return type of _mesa_get_format_bytes() (#37351) */ static inline intptr_t -intel_offset_S8(int stride, GLint x, GLint y) +intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y) { - return 2 * ((y / 2) * stride + x) + y % 2; + uint32_t tile_size = 4096; + uint32_t tile_width = 64; + uint32_t tile_height = 64; + uint32_t row_size = 64 * stride; + + uint32_t tile_x = x / tile_width; + uint32_t tile_y = y / tile_height; + + /* The byte's address relative to the tile's base addres. */ + uint32_t byte_x = x % tile_width; + uint32_t byte_y = y % tile_height; + + uintptr_t u = tile_y * row_size + + tile_x * tile_size + + 512 * (byte_x / 8) + + 64 * (byte_y / 8) + + 32 * ((byte_y / 4) % 2) + + 16 * ((byte_x / 4) % 2) + + 8 * ((byte_y / 2) % 2) + + 4 * ((byte_x / 2) % 2) + + 2 * (byte_y % 2) + + 1 * (byte_x % 2); + + /* + * Errata for Gen5: + * + * An additional offset is needed which is not documented in the PRM. + * + * if ((byte_x / 8) % 2 == 1) { + * if ((byte_y / 8) % 2) == 0) { + * u += 64; + * } else { + * u -= 64; + * } + * } + * + * The offset is expressed more tersely as + * u += ((int) x & 0x8) * (8 - (((int) y & 0x8) << 1)); + */ + + return u; } #define WRITE_STENCIL(x, y, src) buf[intel_offset_S8(stride, x, y)] = src; @@ -177,7 +223,15 @@ intel_renderbuffer_map(struct intel_context *intel, struct gl_renderbuffer *rb) { struct intel_renderbuffer *irb = intel_renderbuffer(rb); - if (irb == NULL || irb->region == NULL) + if (!irb) + return; + + if (irb->wrapped_depth) + intel_renderbuffer_map(intel, irb->wrapped_depth); + if (irb->wrapped_stencil) + intel_renderbuffer_map(intel, irb->wrapped_stencil); + + if (!irb->region) return; drm_intel_gem_bo_map_gtt(irb->region->buffer); @@ -206,7 +260,15 @@ intel_renderbuffer_unmap(struct intel_context *intel, { struct intel_renderbuffer *irb = intel_renderbuffer(rb); - if (irb == NULL || irb->region == NULL) + if (!irb) + return; + + if (irb->wrapped_depth) + intel_renderbuffer_unmap(intel, irb->wrapped_depth); + if (irb->wrapped_stencil) + intel_renderbuffer_unmap(intel, irb->wrapped_stencil); + + if (!irb->region) return; drm_intel_gem_bo_unmap_gtt(irb->region->buffer); diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c index 32e1fb7397a..21c4a1dddba 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.c +++ b/src/mesa/drivers/dri/intel/intel_tex.c @@ -1,4 +1,5 @@ #include "swrast/swrast.h" +#include "main/renderbuffer.h" #include "main/texobj.h" #include "main/teximage.h" #include "main/mipmap.h" @@ -59,6 +60,14 @@ intelFreeTextureImageData(struct gl_context * ctx, struct gl_texture_image *texI _mesa_free_texmemory(texImage->Data); texImage->Data = NULL; } + + if (intelImage->depth_rb) { + _mesa_reference_renderbuffer(&intelImage->depth_rb, NULL); + } + + if (intelImage->stencil_rb) { + _mesa_reference_renderbuffer(&intelImage->stencil_rb, NULL); + } } /** diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index eda07a43dee..e89e91dee3e 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -55,15 +55,11 @@ get_teximage_readbuffer(struct intel_context *intel, GLenum internalFormat) DBG("%s %s\n", __FUNCTION__, _mesa_lookup_enum_by_nr(internalFormat)); - switch (internalFormat) { - case GL_DEPTH_COMPONENT: - case GL_DEPTH_COMPONENT16: - case GL_DEPTH24_STENCIL8_EXT: - case GL_DEPTH_STENCIL_EXT: + if (_mesa_is_depth_format(internalFormat) || + _mesa_is_depthstencil_format(internalFormat)) return intel_get_renderbuffer(intel->ctx.ReadBuffer, BUFFER_DEPTH); - default: - return intel_renderbuffer(intel->ctx.ReadBuffer->_ColorReadBuffer); - } + + return intel_renderbuffer(intel->ctx.ReadBuffer->_ColorReadBuffer); } @@ -168,101 +164,6 @@ intel_copy_texsubimage(struct intel_context *intel, static void -intelCopyTexImage1D(struct gl_context * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLint border) -{ - struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - int srcx, srcy, dstx, dsty, height; - - if (border) - goto fail; - - /* Setup or redefine the texture object, mipmap tree and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage1D(ctx, target, level, internalFormat, - width, border, - GL_RGBA, CHAN_TYPE, NULL, - &ctx->DefaultPacking, texObj, texImage); - srcx = x; - srcy = y; - dstx = 0; - dsty = 0; - height = 1; - if (!_mesa_clip_copytexsubimage(ctx, - &dstx, &dsty, - &srcx, &srcy, - &width, &height)) - return; - - if (!intel_copy_texsubimage(intel_context(ctx), target, - intel_texture_image(texImage), - internalFormat, 0, 0, x, y, width, height)) - goto fail; - - return; - - fail: - fallback_debug("%s - fallback to swrast\n", __FUNCTION__); - _mesa_meta_CopyTexImage1D(ctx, target, level, internalFormat, x, y, - width, border); -} - - -static void -intelCopyTexImage2D(struct gl_context * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border) -{ - struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - int srcx, srcy, dstx, dsty; - - if (border) - goto fail; - - /* Setup or redefine the texture object, mipmap tree and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage2D(ctx, target, level, internalFormat, - width, height, border, - GL_RGBA, GL_UNSIGNED_BYTE, NULL, - &ctx->DefaultPacking, texObj, texImage); - - srcx = x; - srcy = y; - dstx = 0; - dsty = 0; - if (!_mesa_clip_copytexsubimage(ctx, - &dstx, &dsty, - &srcx, &srcy, - &width, &height)) - return; - - if (!intel_copy_texsubimage(intel_context(ctx), target, - intel_texture_image(texImage), - internalFormat, 0, 0, x, y, width, height)) - goto fail; - - return; - - fail: - fallback_debug("%s - fallback to swrast\n", __FUNCTION__); - _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y, - width, height, border); -} - - -static void intelCopyTexSubImage1D(struct gl_context * ctx, GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width) { @@ -316,8 +217,6 @@ intelCopyTexSubImage2D(struct gl_context * ctx, GLenum target, GLint level, void intelInitTextureCopyImageFuncs(struct dd_function_table *functions) { - functions->CopyTexImage1D = intelCopyTexImage1D; - functions->CopyTexImage2D = intelCopyTexImage2D; functions->CopyTexSubImage1D = intelCopyTexSubImage1D; functions->CopyTexSubImage2D = intelCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index 90d4117ec8f..1f8b885bbec 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -8,6 +8,7 @@ #include "main/context.h" #include "main/formats.h" #include "main/pbo.h" +#include "main/renderbuffer.h" #include "main/texcompress.h" #include "main/texstore.h" #include "main/texgetimage.h" @@ -21,6 +22,7 @@ #include "intel_tex.h" #include "intel_blit.h" #include "intel_fbo.h" +#include "intel_span.h" #define FILE_DEBUG_FLAG DEBUG_TEXTURE @@ -118,25 +120,6 @@ intel_miptree_create_for_teximage(struct intel_context *intel, expect_accelerated_upload); } - - - -static GLuint -target_to_face(GLenum target) -{ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: - return ((GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X); - default: - return 0; - } -} - /* There are actually quite a few combinations this will work for, * more than what I've listed here. */ @@ -277,6 +260,130 @@ try_pbo_zcopy(struct intel_context *intel, return GL_TRUE; } +/** + * \param scatter Scatter if true. Gather if false. + * + * \see intel_tex_image_x8z24_scatter + * \see intel_tex_image_x8z24_gather + */ +static void +intel_tex_image_s8z24_scattergather(struct intel_context *intel, + struct intel_texture_image *intel_image, + bool scatter) +{ + struct gl_context *ctx = &intel->ctx; + struct gl_renderbuffer *depth_rb = intel_image->depth_rb; + struct gl_renderbuffer *stencil_rb = intel_image->stencil_rb; + + int w = intel_image->base.Width; + int h = intel_image->base.Height; + + uint32_t depth_row[w]; + uint8_t stencil_row[w]; + + intel_renderbuffer_map(intel, depth_rb); + intel_renderbuffer_map(intel, stencil_rb); + + if (scatter) { + for (int y = 0; y < h; ++y) { + depth_rb->GetRow(ctx, depth_rb, w, 0, y, depth_row); + for (int x = 0; x < w; ++x) { + stencil_row[x] = depth_row[x] >> 24; + } + stencil_rb->PutRow(ctx, stencil_rb, w, 0, y, stencil_row, NULL); + } + } else { /* gather */ + for (int y = 0; y < h; ++y) { + depth_rb->GetRow(ctx, depth_rb, w, 0, y, depth_row); + stencil_rb->GetRow(ctx, stencil_rb, w, 0, y, stencil_row); + for (int x = 0; x < w; ++x) { + uint32_t s8_x24 = stencil_row[x] << 24; + uint32_t x8_z24 = depth_row[x] & 0x00ffffff; + depth_row[x] = s8_x24 | x8_z24; + } + depth_rb->PutRow(ctx, depth_rb, w, 0, y, depth_row, NULL); + } + } + + intel_renderbuffer_unmap(intel, depth_rb); + intel_renderbuffer_unmap(intel, stencil_rb); +} + +/** + * Copy the x8 bits from intel_image->depth_rb to intel_image->stencil_rb. + */ +static void +intel_tex_image_s8z24_scatter(struct intel_context *intel, + struct intel_texture_image *intel_image) +{ + intel_tex_image_s8z24_scattergather(intel, intel_image, true); +} + +/** + * Copy the data in intel_image->stencil_rb to the x8 bits in + * intel_image->depth_rb. + */ +static void +intel_tex_image_s8z24_gather(struct intel_context *intel, + struct intel_texture_image *intel_image) +{ + intel_tex_image_s8z24_scattergather(intel, intel_image, false); +} + +static bool +intel_tex_image_s8z24_create_renderbuffers(struct intel_context *intel, + struct intel_texture_image *image) +{ + struct gl_context *ctx = &intel->ctx; + + bool ok = true; + int width = image->base.Width; + int height = image->base.Height; + struct gl_renderbuffer *drb; + struct gl_renderbuffer *srb; + struct intel_renderbuffer *idrb; + struct intel_renderbuffer *isrb; + + assert(intel->has_separate_stencil); + assert(image->base.TexFormat == MESA_FORMAT_S8_Z24); + assert(image->mt != NULL); + + drb = intel_create_wrapped_renderbuffer(ctx, width, height, + MESA_FORMAT_X8_Z24); + srb = intel_create_wrapped_renderbuffer(ctx, width, height, + MESA_FORMAT_S8); + + if (!drb || !srb) { + if (drb) { + drb->Delete(drb); + } + if (srb) { + srb->Delete(srb); + } + return false; + } + + idrb = intel_renderbuffer(drb); + isrb = intel_renderbuffer(srb); + + intel_region_reference(&idrb->region, image->mt->region); + ok = intel_alloc_renderbuffer_storage(ctx, srb, GL_STENCIL_INDEX8, + width, height); + + if (!ok) { + drb->Delete(drb); + srb->Delete(srb); + return false; + } + + intel_renderbuffer_set_draw_offset(idrb, image, 0); + intel_renderbuffer_set_draw_offset(isrb, image, 0); + + _mesa_reference_renderbuffer(&image->depth_rb, drb); + _mesa_reference_renderbuffer(&image->stencil_rb, srb); + + return true; +} static void intelTexImage(struct gl_context * ctx, @@ -300,7 +407,7 @@ intelTexImage(struct gl_context * ctx, DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__, _mesa_lookup_enum_by_nr(target), level, width, height, depth, border); - intelImage->face = target_to_face(target); + intelImage->face = _mesa_tex_target_to_face(target); intelImage->level = level; if (_mesa_is_format_compressed(texImage->TexFormat)) { @@ -314,18 +421,7 @@ intelTexImage(struct gl_context * ctx, } } - /* Release the reference to a potentially orphaned buffer. - * Release any old malloced memory. - */ - if (intelImage->mt) { - intel_miptree_release(intel, &intelImage->mt); - assert(!texImage->Data); - } - else if (texImage->Data) { - _mesa_free_texmemory(texImage->Data); - texImage->Data = NULL; - } - + ctx->Driver.FreeTexImageData(ctx, texImage); assert(!intelImage->mt); if (intelObj->mt && @@ -484,6 +580,12 @@ intelTexImage(struct gl_context * ctx, _mesa_unmap_teximage_pbo(ctx, unpack); + if (intel->must_use_separate_stencil + && texImage->TexFormat == MESA_FORMAT_S8_Z24) { + intel_tex_image_s8z24_create_renderbuffers(intel, intelImage); + intel_tex_image_s8z24_scatter(intel, intelImage); + } + if (intelImage->mt) { if (pixels != NULL) intel_miptree_image_unmap(intel, intelImage->mt); @@ -600,6 +702,14 @@ intel_get_tex_image(struct gl_context * ctx, GLenum target, GLint level, assert(intelImage->base.Data); } + if (intelImage->stencil_rb) { + /* + * The texture has packed depth/stencil format, but uses separate + * stencil. The texture's embedded stencil buffer contains the real + * stencil data, so copy that into the miptree. + */ + intel_tex_image_s8z24_gather(intel, intelImage); + } if (compressed) { _mesa_get_compressed_teximage(ctx, target, level, pixels, @@ -706,7 +816,7 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, rb->region->width, rb->region->height, 1, 0, internalFormat, texFormat); - intelImage->face = target_to_face(target); + intelImage->face = _mesa_tex_target_to_face(target); intelImage->level = level; texImage->RowStride = rb->region->pitch; intel_miptree_reference(&intelImage->mt, intelObj->mt); @@ -764,7 +874,7 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target, image->region->width, image->region->height, 1, 0, image->internal_format, image->format); - intelImage->face = target_to_face(target); + intelImage->face = _mesa_tex_target_to_face(target); intelImage->level = 0; texImage->RowStride = image->region->pitch; intel_miptree_reference(&intelImage->mt, intelObj->mt); diff --git a/src/mesa/drivers/dri/intel/intel_tex_obj.h b/src/mesa/drivers/dri/intel/intel_tex_obj.h index e93ef4a4727..a9ae2ec5429 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_obj.h +++ b/src/mesa/drivers/dri/intel/intel_tex_obj.h @@ -63,6 +63,36 @@ struct intel_texture_image */ struct intel_mipmap_tree *mt; GLboolean used_as_render_target; + + /** + * \name Renderbuffers for faking packed depth/stencil + * + * These renderbuffers are non-null only if the intel_context is using + * separate stencil and this texture has a packed depth/stencil format. When + * glFramebufferTexture is called on this image, the resultant renderbuffer + * wrapper reuses these renderbuffers as its own. + * + * \see intel_wrap_texture + * \see intel_tex_image_s8z24_create_renderbuffers + * \see intel_tex_image_s8z24_scatter + * \see intel_tex_image_s8z24_gather + * + * \{ + */ + + /** + * The depth buffer has format X8_Z24. The x8 bits are undefined unless + * intel_tex_image_s8z24_gather has been immediately called. The depth buffer + * resuses the image miptree's region and hiz_region as its own. + */ + struct gl_renderbuffer *depth_rb; + + /** + * The stencil buffer has format S8 and keeps its data in its own region. + */ + struct gl_renderbuffer *stencil_rb; + + /** \} */ }; static INLINE struct intel_texture_object * diff --git a/src/mesa/drivers/dri/nouveau/nv10_render.c b/src/mesa/drivers/dri/nouveau/nv10_render.c index 20fb4478426..6134650346d 100644 --- a/src/mesa/drivers/dri/nouveau/nv10_render.c +++ b/src/mesa/drivers/dri/nouveau/nv10_render.c @@ -99,7 +99,7 @@ get_hw_format(int type) case GL_UNSIGNED_SHORT: return NV10_3D_VTXBUF_FMT_TYPE_V16_SNORM; case GL_UNSIGNED_BYTE: - return NV10_3D_VTXBUF_FMT_TYPE_B8G8R8A8_UNORM; + return NV10_3D_VTXBUF_FMT_TYPE_U8_UNORM; default: assert(0); } diff --git a/src/mesa/drivers/dri/r200/r200_cmdbuf.c b/src/mesa/drivers/dri/r200/r200_cmdbuf.c index 931a9ecf8fe..a512c9d112a 100644 --- a/src/mesa/drivers/dri/r200/r200_cmdbuf.c +++ b/src/mesa/drivers/dri/r200/r200_cmdbuf.c @@ -47,9 +47,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #define insert_at_tail_if(atom_list, atom) \ do { \ - struct radeon_state_atom* __atom = (atom); \ - if (__atom->check) \ - insert_at_tail((atom_list), __atom); \ + struct radeon_state_atom* current_atom = (atom); \ + if (current_atom->check) \ + insert_at_tail((atom_list), current_atom); \ } while(0) void r200SetUpAtomList( r200ContextPtr rmesa ) diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c index 02201cb53d6..44a794da396 100644 --- a/src/mesa/drivers/dri/r200/r200_ioctl.c +++ b/src/mesa/drivers/dri/r200/r200_ioctl.c @@ -185,7 +185,6 @@ static void r200Clear( struct gl_context *ctx, GLbitfield mask ) r200ContextPtr rmesa = R200_CONTEXT(ctx); __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon); GLuint flags = 0; - GLuint color_mask = 0; GLuint orig_mask = mask; if ( R200_DEBUG & RADEON_IOCTL ) { @@ -206,13 +205,11 @@ static void r200Clear( struct gl_context *ctx, GLbitfield mask ) if ( mask & BUFFER_BIT_FRONT_LEFT ) { flags |= RADEON_FRONT; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; mask &= ~BUFFER_BIT_FRONT_LEFT; } if ( mask & BUFFER_BIT_BACK_LEFT ) { flags |= RADEON_BACK; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; mask &= ~BUFFER_BIT_BACK_LEFT; } diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c index d42e8f12041..91e77f9f7da 100644 --- a/src/mesa/drivers/dri/r200/r200_tex.c +++ b/src/mesa/drivers/dri/r200/r200_tex.c @@ -527,7 +527,6 @@ void r200InitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *fu functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c index 7adf9ad73ed..8c9bd6d00b2 100644 --- a/src/mesa/drivers/dri/r200/r200_texstate.c +++ b/src/mesa/drivers/dri/r200/r200_texstate.c @@ -773,18 +773,12 @@ void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format struct radeon_renderbuffer *rb; radeon_texture_image *rImage; radeonContextPtr radeon; - r200ContextPtr rmesa; struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, format; gl_format texFormat; - format = GL_UNSIGNED_BYTE; - internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); - radeon = pDRICtx->driverPrivate; - rmesa = pDRICtx->driverPrivate; rfb = dPriv->driverPrivate; texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit]; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index b0deb751be0..a8decacedaf 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -687,7 +687,7 @@ static void get_readers_for_single_write( struct rc_instruction * tmp; unsigned int branch_depth = 0; struct rc_instruction * endloop = NULL; - unsigned int abort_on_read_at_endloop; + unsigned int abort_on_read_at_endloop = 0; struct get_readers_callback_data * d = userdata; d->ReaderData->Writer = writer; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index ac73608839e..39dcb21d4f4 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -560,32 +560,30 @@ static int peephole_add_presub_add( struct radeon_compiler * c, struct rc_instruction * inst_add) { - struct rc_src_register * src0 = NULL; - struct rc_src_register * src1 = NULL; - unsigned int i; - - if (!is_presub_candidate(c, inst_add)) - return 0; + unsigned dstmask = inst_add->U.I.DstReg.WriteMask; + unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask; + unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask; if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) return 0; - /* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */ - for (i = 0; i < 2; i++) { - if (inst_add->U.I.SrcReg[i].Abs) - return 0; - if ((inst_add->U.I.SrcReg[i].Negate - & inst_add->U.I.DstReg.WriteMask) == - inst_add->U.I.DstReg.WriteMask) { - src0 = &inst_add->U.I.SrcReg[i]; - } else if (!src1) { - src1 = &inst_add->U.I.SrcReg[i]; - } else { - src0 = &inst_add->U.I.SrcReg[i]; - } - } + /* src0 and src1 can't have absolute values */ + if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs) + return 0; + + /* presub_replace_add() assumes only one is negative */ + if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate) + return 0; + + /* if src0 is negative, at least all bits of dstmask have to be set */ + if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask) + return 0; - if (!src1) + /* if src1 is negative, at least all bits of dstmask have to be set */ + if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask) + return 0; + + if (!is_presub_candidate(c, inst_add)) return 0; if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) { @@ -618,7 +616,7 @@ static void presub_replace_inv( * of the add instruction must have the constatnt 1 swizzle. This function * does not check const registers to see if their value is 1.0, so it should * be called after the constant_folding optimization. - * @return + * @return * 0 if the ADD instruction is still part of the program. * 1 if the ADD instruction is no longer part of the program. */ diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index 590d9afe14a..93d8fe185ef 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -379,7 +379,6 @@ void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *fun functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index e24ad6f088d..e4388a021ed 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -427,13 +427,8 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, type, format; gl_format texFormat; - type = GL_BGRA; - format = GL_UNSIGNED_BYTE; - internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); - radeon = pDRICtx->driverPrivate; rmesa = pDRICtx->driverPrivate; diff --git a/src/mesa/drivers/dri/r600/evergreen_tex.c b/src/mesa/drivers/dri/r600/evergreen_tex.c index 33a5f277683..d240a216817 100644 --- a/src/mesa/drivers/dri/r600/evergreen_tex.c +++ b/src/mesa/drivers/dri/r600/evergreen_tex.c @@ -1288,19 +1288,12 @@ void evergreenSetTexBuffer(__DRIcontext *pDRICtx, GLint target, GLint glx_textur struct radeon_renderbuffer *rb; radeon_texture_image *rImage; radeonContextPtr radeon; - context_t *rmesa; struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, type, format; gl_format texFormat; - type = GL_BGRA; - format = GL_UNSIGNED_BYTE; - internalFormat = (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); - radeon = pDRICtx->driverPrivate; - rmesa = pDRICtx->driverPrivate; rfb = dPriv->driverPrivate; texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit]; @@ -1688,7 +1681,6 @@ void evergreenInitTextureFuncs(radeonContextPtr radeon, struct dd_function_table functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c index ce2f7779563..74f048b1062 100644 --- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c @@ -259,13 +259,11 @@ static int r600_cs_process_relocs(struct radeon_cs_int *csi, uint32_t * reloc_chunk, uint32_t * length_dw_reloc_chunk) { - struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm; struct r600_cs_reloc_legacy *relocs; int i, j, r; uint32_t offset_dw = 0; - csm = (struct r600_cs_manager_legacy*)csi->csm; relocs = (struct r600_cs_reloc_legacy *)csi->relocs; restart: for (i = 0; i < csi->crelocs; i++) { diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c index eb7ed30c7a3..3efa1d197fa 100644 --- a/src/mesa/drivers/dri/r600/r600_tex.c +++ b/src/mesa/drivers/dri/r600/r600_tex.c @@ -470,7 +470,6 @@ void r600InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *fun functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c index 949db29c189..65fae7195fd 100644 --- a/src/mesa/drivers/dri/r600/r600_texstate.c +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -1141,13 +1141,8 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, type, format; gl_format texFormat; - type = GL_BGRA; - format = GL_UNSIGNED_BYTE; - internalFormat = (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); - radeon = pDRICtx->driverPrivate; rmesa = pDRICtx->driverPrivate; diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 2bd3b62bfdb..0f7a7a46b71 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -329,7 +329,7 @@ static void r700RunRenderPrimitiveImmediate(struct gl_context * ctx, int start, { context_t *context = R700_CONTEXT(ctx); BATCH_LOCALS(&context->radeon); - int type, i; + int type; uint32_t num_indices, total_emit = 0; uint32_t vgt_draw_initiator = 0; uint32_t vgt_index_type = 0; @@ -370,22 +370,7 @@ static void r700RunRenderPrimitiveImmediate(struct gl_context * ctx, int start, vgt_num_indices = num_indices; SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); - if (start == 0) - { - SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift, SOURCE_SELECT_mask); - } - else - { - if (num_indices > 0xffff) - { - total_emit += num_indices; - } - else - { - total_emit += (num_indices + 1) / 2; - } - SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask); - } + SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift, SOURCE_SELECT_mask); total_emit += 3 /* VGT_PRIMITIVE_TYPE */ + 2 /* VGT_INDEX_TYPE */ @@ -406,45 +391,13 @@ static void r700RunRenderPrimitiveImmediate(struct gl_context * ctx, int start, /* offset */ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 2)); R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX); - R600_OUT_BATCH(0); //VTX_BASE_VTX_LOC + R600_OUT_BATCH(start); //VTX_BASE_VTX_LOC R600_OUT_BATCH(0); //VTX_START_INST_LOC // draw packet - if(start == 0) - { - R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1)); - R600_OUT_BATCH(vgt_num_indices); - R600_OUT_BATCH(vgt_draw_initiator); - } - else - { - if (num_indices > 0xffff) - { - R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1))); - R600_OUT_BATCH(vgt_num_indices); - R600_OUT_BATCH(vgt_draw_initiator); - for (i = start; i < (start + num_indices); i++) - { - R600_OUT_BATCH(i); - } - } - else - { - R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (((num_indices + 1) / 2) + 1))); - R600_OUT_BATCH(vgt_num_indices); - R600_OUT_BATCH(vgt_draw_initiator); - for (i = start; i < (start + num_indices); i += 2) - { - if ((i + 1) == (start + num_indices)) - { - R600_OUT_BATCH(i); - } - else - { - R600_OUT_BATCH(((i + 1) << 16) | (i)); - } - } - } - } + + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1)); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); END_BATCH(); COMMIT_BATCH(); @@ -469,12 +422,7 @@ static GLuint r700PredictRenderSize(struct gl_context* ctx, else { for (i = 0; i < nr_prims; ++i) { - if (prim[i].start == 0) - dwords += 14; - else if (prim[i].count > 0xffff) - dwords += prim[i].count + 14; - else - dwords += ((prim[i].count + 1) / 2) + 14; + dwords += 14; } } diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index bfc307ca987..e7a6623cf84 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -436,7 +436,6 @@ void radeonCopyBuffer( __DRIdrawable *dPriv, const drm_clip_rect_t *rect) { radeonContextPtr rmesa; - struct radeon_framebuffer *rfb; GLint nbox, i, ret; assert(dPriv); @@ -447,8 +446,6 @@ void radeonCopyBuffer( __DRIdrawable *dPriv, LOCK_HARDWARE(rmesa); - rfb = dPriv->driverPrivate; - if ( RADEON_DEBUG & RADEON_IOCTL ) { fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx ); } @@ -527,8 +524,6 @@ static GLboolean radeonPageFlip( __DRIdrawable *dPriv ) { radeonContextPtr radeon; GLint ret; - __DRIscreen *psp; - struct radeon_renderbuffer *rrb; struct radeon_framebuffer *rfb; assert(dPriv); @@ -537,9 +532,6 @@ static GLboolean radeonPageFlip( __DRIdrawable *dPriv ) radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; rfb = dPriv->driverPrivate; - rrb = (void *)rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer; - - psp = dPriv->driScreenPriv; LOCK_HARDWARE(radeon); diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index bf8925f61d0..c08b79484af 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -515,7 +515,6 @@ void radeon_prepare_render(radeonContextPtr radeon) __DRIcontext *driContext = radeon->dri.context; __DRIdrawable *drawable; __DRIscreen *screen; - struct radeon_framebuffer *draw; screen = driContext->driScreenPriv; if (!screen->dri2.loader) @@ -527,7 +526,6 @@ void radeon_prepare_render(radeonContextPtr radeon) radeon_update_renderbuffers(driContext, drawable, GL_FALSE); /* Intel driver does the equivalent of this, no clue if it is needed:*/ - draw = drawable->driverPrivate; radeon_draw_buffer(radeon->glCtx, radeon->glCtx->DrawBuffer); driContext->dri2.draw_stamp = drawable->dri2.stamp; diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c index c2722a4e195..5595b705b15 100644 --- a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c +++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c @@ -218,11 +218,9 @@ static int cs_end(struct radeon_cs_int *cs, static int cs_process_relocs(struct radeon_cs_int *cs) { - struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm; struct cs_reloc_legacy *relocs; int i, j, r; - csm = (struct cs_manager_legacy*)cs->csm; relocs = (struct cs_reloc_legacy *)cs->relocs; restart: for (i = 0; i < cs->crelocs; i++) diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c index a91d8727792..c23e9c2d2a2 100644 --- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c +++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c @@ -560,7 +560,6 @@ static void radeonClear( struct gl_context *ctx, GLbitfield mask ) r100ContextPtr rmesa = R100_CONTEXT(ctx); __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon); GLuint flags = 0; - GLuint color_mask = 0; GLuint orig_mask = mask; if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) { @@ -582,13 +581,11 @@ static void radeonClear( struct gl_context *ctx, GLbitfield mask ) if ( mask & BUFFER_BIT_FRONT_LEFT ) { flags |= RADEON_FRONT; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; mask &= ~BUFFER_BIT_FRONT_LEFT; } if ( mask & BUFFER_BIT_BACK_LEFT ) { flags |= RADEON_BACK; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; mask &= ~BUFFER_BIT_BACK_LEFT; } diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c index 7b6bd36dcf7..ae8a212f806 100644 --- a/src/mesa/drivers/dri/radeon/radeon_lock.c +++ b/src/mesa/drivers/dri/radeon/radeon_lock.c @@ -114,16 +114,6 @@ void radeon_lock_hardware(radeonContextPtr radeon ) { char ret = 0; - struct radeon_framebuffer *rfb = NULL; - struct radeon_renderbuffer *rrb = NULL; - - if (radeon_get_drawable(radeon)) { - rfb = radeon_get_drawable(radeon)->driverPrivate; - - if (rfb) - rrb = radeon_get_renderbuffer(&rfb->base, - rfb->base._ColorDrawBufferIndexes[0]); - } if (!radeon->radeonScreen->driScreen->dri2.enabled) { if (ATOMIC_INC_AND_FETCH(radeon->dri.hwLockCount) > 1) diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index c5ddb6d3ebe..583a6446462 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -1217,6 +1217,7 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) case PCI_CHIP_TURKS_6750: case PCI_CHIP_TURKS_6758: case PCI_CHIP_TURKS_6759: + case PCI_CHIP_TURKS_675F: screen->chip_family = CHIP_FAMILY_TURKS; screen->chip_flags = RADEON_CHIPSET_TCL; break; @@ -1231,6 +1232,7 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) case PCI_CHIP_CAICOS_6767: case PCI_CHIP_CAICOS_6768: case PCI_CHIP_CAICOS_6770: + case PCI_CHIP_CAICOS_6778: case PCI_CHIP_CAICOS_6779: screen->chip_family = CHIP_FAMILY_CAICOS; screen->chip_flags = RADEON_CHIPSET_TCL; @@ -1739,6 +1741,9 @@ radeonCreateScreen2(__DRIscreen *sPriv) case 1: screen->num_banks = 8; break; + case 2: + screen->num_banks = 16; + break; default: fprintf(stderr, "bad banks\n"); break; diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c index 25a8ddf7b6a..a0b5506ae76 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex.c @@ -455,7 +455,6 @@ void radeonInitTextureFuncs( radeonContextPtr radeon, struct dd_function_table * functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c index f14dfa25d40..94ff3c4a727 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c @@ -141,61 +141,6 @@ do_copy_texsubimage(struct gl_context *ctx, } void -radeonCopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border) -{ - struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - int srcx, srcy, dstx, dsty; - - radeonContextPtr radeon = RADEON_CONTEXT(ctx); - radeon_prepare_render(radeon); - - if (border) - goto fail; - - /* Setup or redefine the texture object, mipmap tree and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage2D(ctx, target, level, internalFormat, - width, height, border, - GL_RGBA, GL_UNSIGNED_BYTE, NULL, - &ctx->DefaultPacking, texObj, texImage); - - srcx = x; - srcy = y; - dstx = 0; - dsty = 0; - if (!_mesa_clip_copytexsubimage(ctx, - &dstx, &dsty, - &srcx, &srcy, - &width, &height)) { - return; - } - - if (!do_copy_texsubimage(ctx, target, level, - radeon_tex_obj(texObj), (radeon_texture_image *)texImage, - 0, 0, x, y, width, height)) { - goto fail; - } - - return; - -fail: - radeon_print(RADEON_FALLBACKS, RADEON_NORMAL, - "Falling back to sw for glCopyTexImage2D (internalFormat %s, border %d)\n", - _mesa_lookup_enum_by_nr(internalFormat), border); - - _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y, - width, height, border); -} - -void radeonCopyTexSubImage2D(struct gl_context *ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c index 9ba98e303a7..430309392a0 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texstate.c +++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c @@ -648,18 +648,12 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_form struct radeon_renderbuffer *rb; radeon_texture_image *rImage; radeonContextPtr radeon; - r100ContextPtr rmesa; struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, format; gl_format texFormat; - format = GL_UNSIGNED_BYTE; - internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? GL_RGB : GL_RGBA); - radeon = pDRICtx->driverPrivate; - rmesa = pDRICtx->driverPrivate; rfb = dPriv->driverPrivate; texUnit = _mesa_get_current_tex_unit(radeon->glCtx); @@ -1018,7 +1012,7 @@ static GLboolean radeon_validate_texgen( struct gl_context *ctx, GLuint unit ) static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int unit) { const struct gl_texture_image *firstImage; - GLint log2Width, log2Height, log2Depth, texelBytes; + GLint log2Width, log2Height, texelBytes; if ( t->bo ) { return GL_TRUE; @@ -1033,7 +1027,6 @@ static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int log2Width = firstImage->WidthLog2; log2Height = firstImage->HeightLog2; - log2Depth = firstImage->DepthLog2; texelBytes = _mesa_get_format_bytes(firstImage->TexFormat); if (!t->image_override) { diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.h b/src/mesa/drivers/dri/radeon/radeon_texture.h index 538a07fbba8..6fc06d967dd 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.h +++ b/src/mesa/drivers/dri/radeon/radeon_texture.h @@ -126,11 +126,6 @@ void radeonGetCompressedTexImage(struct gl_context *ctx, GLenum target, GLint le struct gl_texture_object *texObj, struct gl_texture_image *texImage); -void radeonCopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border); - void radeonCopyTexSubImage2D(struct gl_context *ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, diff --git a/src/mesa/drivers/x11/fakeglx.c b/src/mesa/drivers/x11/fakeglx.c index 48657b44be1..1f5fc33d775 100644 --- a/src/mesa/drivers/x11/fakeglx.c +++ b/src/mesa/drivers/x11/fakeglx.c @@ -1097,12 +1097,16 @@ choose_visual( Display *dpy, int screen, const int *list, GLboolean fbConfig ) parselist++; break; case GLX_FBCONFIG_ID: + case GLX_VISUAL_ID: if (!fbConfig) return NULL; parselist++; desiredVisualID = *parselist++; break; case GLX_X_RENDERABLE: + case GLX_MAX_PBUFFER_WIDTH: + case GLX_MAX_PBUFFER_HEIGHT: + case GLX_MAX_PBUFFER_PIXELS: if (!fbConfig) return NULL; parselist += 2; diff --git a/src/mesa/main/arbprogram.c b/src/mesa/main/arbprogram.c index 26d781954ed..b83369d9e04 100644 --- a/src/mesa/main/arbprogram.c +++ b/src/mesa/main/arbprogram.c @@ -269,6 +269,71 @@ _mesa_IsProgramARB(GLuint id) return GL_FALSE; } +static GLboolean +get_local_param_pointer(struct gl_context *ctx, const char *func, + GLenum target, GLuint index, GLfloat **param) +{ + struct gl_program *prog; + GLuint maxParams; + + if (target == GL_VERTEX_PROGRAM_ARB + && ctx->Extensions.ARB_vertex_program) { + prog = &(ctx->VertexProgram.Current->Base); + maxParams = ctx->Const.VertexProgram.MaxLocalParams; + } + else if (target == GL_FRAGMENT_PROGRAM_ARB + && ctx->Extensions.ARB_fragment_program) { + prog = &(ctx->FragmentProgram.Current->Base); + maxParams = ctx->Const.FragmentProgram.MaxLocalParams; + } + else if (target == GL_FRAGMENT_PROGRAM_NV + && ctx->Extensions.NV_fragment_program) { + prog = &(ctx->FragmentProgram.Current->Base); + maxParams = MAX_NV_FRAGMENT_PROGRAM_PARAMS; + } + else { + _mesa_error(ctx, GL_INVALID_ENUM, + "%s(target)", func); + return GL_FALSE; + } + + if (index >= maxParams) { + _mesa_error(ctx, GL_INVALID_VALUE, "%s(index)", func); + return GL_FALSE; + } + + *param = prog->LocalParams[index]; + return GL_TRUE; +} + + +static GLboolean +get_env_param_pointer(struct gl_context *ctx, const char *func, + GLenum target, GLuint index, GLfloat **param) +{ + if (target == GL_FRAGMENT_PROGRAM_ARB + && ctx->Extensions.ARB_fragment_program) { + if (index >= ctx->Const.FragmentProgram.MaxEnvParams) { + _mesa_error(ctx, GL_INVALID_VALUE, "%s(index)", func); + return GL_FALSE; + } + *param = ctx->FragmentProgram.Parameters[index]; + return GL_TRUE; + } + else if (target == GL_VERTEX_PROGRAM_ARB && + (ctx->Extensions.ARB_vertex_program || + ctx->Extensions.NV_vertex_program)) { + if (index >= ctx->Const.VertexProgram.MaxEnvParams) { + _mesa_error(ctx, GL_INVALID_VALUE, "%s(index)", func); + return GL_FALSE; + } + *param = ctx->VertexProgram.Parameters[index]; + return GL_TRUE; + } else { + _mesa_error(ctx, GL_INVALID_ENUM, "%s(target)", func); + return GL_FALSE; + } +} void GLAPIENTRY _mesa_ProgramStringARB(GLenum target, GLenum format, GLsizei len, @@ -383,30 +448,16 @@ void GLAPIENTRY _mesa_ProgramEnvParameter4fARB(GLenum target, GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w) { + GLfloat *param; + GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS); - if (target == GL_FRAGMENT_PROGRAM_ARB - && ctx->Extensions.ARB_fragment_program) { - if (index >= ctx->Const.FragmentProgram.MaxEnvParams) { - _mesa_error(ctx, GL_INVALID_VALUE, "glProgramEnvParameter(index)"); - return; - } - ASSIGN_4V(ctx->FragmentProgram.Parameters[index], x, y, z, w); - } - else if (target == GL_VERTEX_PROGRAM_ARB /* == GL_VERTEX_PROGRAM_NV */ - && (ctx->Extensions.ARB_vertex_program || ctx->Extensions.NV_vertex_program)) { - if (index >= ctx->Const.VertexProgram.MaxEnvParams) { - _mesa_error(ctx, GL_INVALID_VALUE, "glProgramEnvParameter(index)"); - return; - } - ASSIGN_4V(ctx->VertexProgram.Parameters[index], x, y, z, w); - } - else { - _mesa_error(ctx, GL_INVALID_ENUM, "glProgramEnvParameter(target)"); - return; + if (get_env_param_pointer(ctx, "glProgramEnvParameter", + target, index, ¶m)) { + ASSIGN_4V(param, x, y, z, w); } } @@ -422,32 +473,16 @@ void GLAPIENTRY _mesa_ProgramEnvParameter4fvARB(GLenum target, GLuint index, const GLfloat *params) { + GLfloat *param; + GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS); - if (target == GL_FRAGMENT_PROGRAM_ARB - && ctx->Extensions.ARB_fragment_program) { - if (index >= ctx->Const.FragmentProgram.MaxEnvParams) { - _mesa_error(ctx, GL_INVALID_VALUE, "glProgramEnvParameter4fv(index)"); - return; - } - memcpy(ctx->FragmentProgram.Parameters[index], params, - 4 * sizeof(GLfloat)); - } - else if (target == GL_VERTEX_PROGRAM_ARB /* == GL_VERTEX_PROGRAM_NV */ - && (ctx->Extensions.ARB_vertex_program || ctx->Extensions.NV_vertex_program)) { - if (index >= ctx->Const.VertexProgram.MaxEnvParams) { - _mesa_error(ctx, GL_INVALID_VALUE, "glProgramEnvParameter4fv(index)"); - return; - } - memcpy(ctx->VertexProgram.Parameters[index], params, - 4 * sizeof(GLfloat)); - } - else { - _mesa_error(ctx, GL_INVALID_ENUM, "glProgramEnvParameter4fv(target)"); - return; + if (get_env_param_pointer(ctx, "glProgramEnvParameter4fv", + target, index, ¶m)) { + memcpy(param, params, 4 * sizeof(GLfloat)); } } @@ -496,14 +531,11 @@ _mesa_GetProgramEnvParameterdvARB(GLenum target, GLuint index, GLdouble *params) { GET_CURRENT_CONTEXT(ctx); - GLfloat fparams[4]; + GLfloat *fparam; - _mesa_GetProgramEnvParameterfvARB(target, index, fparams); - if (ctx->ErrorValue == GL_NO_ERROR) { - params[0] = fparams[0]; - params[1] = fparams[1]; - params[2] = fparams[2]; - params[3] = fparams[3]; + if (get_env_param_pointer(ctx, "glGetProgramEnvParameterdv", + target, index, &fparam)) { + COPY_4V(params, fparam); } } @@ -512,29 +544,15 @@ void GLAPIENTRY _mesa_GetProgramEnvParameterfvARB(GLenum target, GLuint index, GLfloat *params) { + GLfloat *param; + GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); - if (target == GL_FRAGMENT_PROGRAM_ARB - && ctx->Extensions.ARB_fragment_program) { - if (index >= ctx->Const.FragmentProgram.MaxEnvParams) { - _mesa_error(ctx, GL_INVALID_VALUE, "glGetProgramEnvParameter(index)"); - return; - } - COPY_4V(params, ctx->FragmentProgram.Parameters[index]); - } - else if (target == GL_VERTEX_PROGRAM_ARB - && ctx->Extensions.ARB_vertex_program) { - if (index >= ctx->Const.VertexProgram.MaxEnvParams) { - _mesa_error(ctx, GL_INVALID_VALUE, "glGetProgramEnvParameter(index)"); - return; - } - COPY_4V(params, ctx->VertexProgram.Parameters[index]); - } - else { - _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramEnvParameter(target)"); - return; + if (get_env_param_pointer(ctx, "glGetProgramEnvParameterfv", + target, index, ¶m)) { + COPY_4V(params, param); } } @@ -547,39 +565,16 @@ _mesa_ProgramLocalParameter4fARB(GLenum target, GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w) { GET_CURRENT_CONTEXT(ctx); - struct gl_program *prog; + GLfloat *param; ASSERT_OUTSIDE_BEGIN_END(ctx); FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS); - if ((target == GL_FRAGMENT_PROGRAM_NV - && ctx->Extensions.NV_fragment_program) || - (target == GL_FRAGMENT_PROGRAM_ARB - && ctx->Extensions.ARB_fragment_program)) { - if (index >= ctx->Const.FragmentProgram.MaxLocalParams) { - _mesa_error(ctx, GL_INVALID_VALUE, "glProgramLocalParameterARB"); - return; - } - prog = &(ctx->FragmentProgram.Current->Base); - } - else if (target == GL_VERTEX_PROGRAM_ARB - && ctx->Extensions.ARB_vertex_program) { - if (index >= ctx->Const.VertexProgram.MaxLocalParams) { - _mesa_error(ctx, GL_INVALID_VALUE, "glProgramLocalParameterARB"); - return; - } - prog = &(ctx->VertexProgram.Current->Base); + if (get_local_param_pointer(ctx, "glProgramLocalParameterARB", + target, index, ¶m)) { + ASSERT(index < MAX_PROGRAM_LOCAL_PARAMS); + ASSIGN_4V(param, x, y, z, w); } - else { - _mesa_error(ctx, GL_INVALID_ENUM, "glProgramLocalParameterARB"); - return; - } - - ASSERT(index < MAX_PROGRAM_LOCAL_PARAMS); - prog->LocalParams[index][0] = x; - prog->LocalParams[index][1] = y; - prog->LocalParams[index][2] = z; - prog->LocalParams[index][3] = w; } @@ -667,41 +662,14 @@ void GLAPIENTRY _mesa_GetProgramLocalParameterfvARB(GLenum target, GLuint index, GLfloat *params) { - const struct gl_program *prog; - GLuint maxParams; + GLfloat *param; GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); - if (target == GL_VERTEX_PROGRAM_ARB - && ctx->Extensions.ARB_vertex_program) { - prog = &(ctx->VertexProgram.Current->Base); - maxParams = ctx->Const.VertexProgram.MaxLocalParams; - } - else if (target == GL_FRAGMENT_PROGRAM_ARB - && ctx->Extensions.ARB_fragment_program) { - prog = &(ctx->FragmentProgram.Current->Base); - maxParams = ctx->Const.FragmentProgram.MaxLocalParams; - } - else if (target == GL_FRAGMENT_PROGRAM_NV - && ctx->Extensions.NV_fragment_program) { - prog = &(ctx->FragmentProgram.Current->Base); - maxParams = MAX_NV_FRAGMENT_PROGRAM_PARAMS; - } - else { - _mesa_error(ctx, GL_INVALID_ENUM, - "glGetProgramLocalParameterARB(target)"); - return; - } - - if (index >= maxParams) { - _mesa_error(ctx, GL_INVALID_VALUE, - "glGetProgramLocalParameterARB(index)"); - return; + if (get_local_param_pointer(ctx, "glProgramLocalParameters4fvEXT", + target, index, ¶m)) { + COPY_4V(params, param); } - - ASSERT(prog); - ASSERT(index < MAX_PROGRAM_LOCAL_PARAMS); - COPY_4V(params, prog->LocalParams[index]); } @@ -712,12 +680,13 @@ void GLAPIENTRY _mesa_GetProgramLocalParameterdvARB(GLenum target, GLuint index, GLdouble *params) { + GLfloat *param; GET_CURRENT_CONTEXT(ctx); - GLfloat floatParams[4]; - ASSIGN_4V(floatParams, 0.0F, 0.0F, 0.0F, 0.0F); - _mesa_GetProgramLocalParameterfvARB(target, index, floatParams); - if (ctx->ErrorValue == GL_NO_ERROR) { - COPY_4V(params, floatParams); + ASSERT_OUTSIDE_BEGIN_END(ctx); + + if (get_local_param_pointer(ctx, "glProgramLocalParameters4fvEXT", + target, index, ¶m)) { + COPY_4V(params, param); } } diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c index 3e28d342674..c52358ecb04 100644 --- a/src/mesa/main/bufferobj.c +++ b/src/mesa/main/bufferobj.c @@ -241,15 +241,14 @@ _mesa_delete_buffer_object(struct gl_context *ctx, /** * Set ptr to bufObj w/ reference counting. + * This is normally only called from the _mesa_reference_buffer_object() macro + * when there's a real pointer change. */ void -_mesa_reference_buffer_object(struct gl_context *ctx, - struct gl_buffer_object **ptr, - struct gl_buffer_object *bufObj) +_mesa_reference_buffer_object_(struct gl_context *ctx, + struct gl_buffer_object **ptr, + struct gl_buffer_object *bufObj) { - if (*ptr == bufObj) - return; - if (*ptr) { /* Unreference the old buffer */ GLboolean deleteFlag = GL_FALSE; diff --git a/src/mesa/main/bufferobj.h b/src/mesa/main/bufferobj.h index 91fa073b649..f82e41411b7 100644 --- a/src/mesa/main/bufferobj.h +++ b/src/mesa/main/bufferobj.h @@ -75,9 +75,19 @@ _mesa_initialize_buffer_object( struct gl_buffer_object *obj, GLuint name, GLenum target ); extern void +_mesa_reference_buffer_object_(struct gl_context *ctx, + struct gl_buffer_object **ptr, + struct gl_buffer_object *bufObj); + +static INLINE void _mesa_reference_buffer_object(struct gl_context *ctx, struct gl_buffer_object **ptr, - struct gl_buffer_object *bufObj); + struct gl_buffer_object *bufObj) +{ + if (*ptr != bufObj) + _mesa_reference_buffer_object_(ctx, ptr, bufObj); +} + extern void _mesa_init_buffer_object_functions(struct dd_function_table *driver); diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c index 63f53e2b080..a75c9c2e782 100644 --- a/src/mesa/main/buffers.c +++ b/src/mesa/main/buffers.c @@ -462,6 +462,27 @@ _mesa_drawbuffers(struct gl_context *ctx, GLuint n, const GLenum *buffers, /** + * Update the current drawbuffer's _ColorDrawBufferIndex[] list, etc. + * from the context's Color.DrawBuffer[] state. + * Use when changing contexts. + */ +void +_mesa_update_draw_buffers(struct gl_context *ctx) +{ + GLenum buffers[MAX_DRAW_BUFFERS]; + GLuint i; + + /* should be a window system FBO */ + assert(ctx->DrawBuffer->Name == 0); + + for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) + buffers[i] = ctx->Color.DrawBuffer[i]; + + _mesa_drawbuffers(ctx, ctx->Const.MaxDrawBuffers, buffers, NULL); +} + + +/** * Like \sa _mesa_drawbuffers(), this is a helper function for setting * GL_READ_BUFFER state in the context and current FBO. * \param ctx the rendering context diff --git a/src/mesa/main/buffers.h b/src/mesa/main/buffers.h index 1404112c411..8083bc3d353 100644 --- a/src/mesa/main/buffers.h +++ b/src/mesa/main/buffers.h @@ -50,6 +50,10 @@ _mesa_drawbuffers(struct gl_context *ctx, GLuint n, const GLenum *buffers, extern void _mesa_readbuffer(struct gl_context *ctx, GLenum buffer, GLint bufferIndex); +extern void +_mesa_update_draw_buffers(struct gl_context *ctx); + + extern void GLAPIENTRY _mesa_ReadBuffer( GLenum mode ); diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h index 743841be4ef..d736fdfc58a 100644 --- a/src/mesa/main/compiler.h +++ b/src/mesa/main/compiler.h @@ -139,26 +139,28 @@ extern "C" { /** * Function inlining */ -#if defined(__GNUC__) -# define INLINE __inline__ -#elif defined(__MSC__) -# define INLINE __inline -#elif defined(_MSC_VER) -# define INLINE __inline -#elif defined(__ICL) -# define INLINE __inline -#elif defined(__INTEL_COMPILER) -# define INLINE inline -#elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100) -# define INLINE __inline -#elif defined(__SUNPRO_C) && defined(__C99FEATURES__) -# define INLINE inline -# define __inline inline -# define __inline__ inline -#elif (__STDC_VERSION__ >= 199901L) /* C99 */ -# define INLINE inline -#else -# define INLINE +#ifndef INLINE +# if defined(__GNUC__) +# define INLINE __inline__ +# elif defined(__MSC__) +# define INLINE __inline +# elif defined(_MSC_VER) +# define INLINE __inline +# elif defined(__ICL) +# define INLINE __inline +# elif defined(__INTEL_COMPILER) +# define INLINE inline +# elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100) +# define INLINE __inline +# elif defined(__SUNPRO_C) && defined(__C99FEATURES__) +# define INLINE inline +# define __inline inline +# define __inline__ inline +# elif (__STDC_VERSION__ >= 199901L) /* C99 */ +# define INLINE inline +# else +# define INLINE +# endif #endif diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index ea13bdd6835..b83a5d621fa 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -1430,7 +1430,8 @@ _mesa_make_current( struct gl_context *newCtx, } if (curCtx && - (curCtx->WinSysDrawBuffer || curCtx->WinSysReadBuffer) && /* make sure this context is valid for flushing */ + (curCtx->WinSysDrawBuffer || curCtx->WinSysReadBuffer) && + /* make sure this context is valid for flushing */ curCtx != newCtx) _mesa_flush(curCtx); @@ -1445,8 +1446,6 @@ _mesa_make_current( struct gl_context *newCtx, _glapi_set_dispatch(newCtx->CurrentDispatch); if (drawBuffer && readBuffer) { - /* TODO: check if newCtx and buffer's visual match??? */ - ASSERT(drawBuffer->Name == 0); ASSERT(readBuffer->Name == 0); _mesa_reference_framebuffer(&newCtx->WinSysDrawBuffer, drawBuffer); @@ -1457,23 +1456,12 @@ _mesa_make_current( struct gl_context *newCtx, * or not bound to a user-created FBO. */ if (!newCtx->DrawBuffer || newCtx->DrawBuffer->Name == 0) { - /* KW: merge conflict here, revisit. - */ - /* fix up the fb fields - these will end up wrong otherwise - * if the DRIdrawable changes, and everything relies on them. - * This is a bit messy (same as needed in _mesa_BindFramebufferEXT) - */ - unsigned int i; - GLenum buffers[MAX_DRAW_BUFFERS]; - _mesa_reference_framebuffer(&newCtx->DrawBuffer, drawBuffer); - - for(i = 0; i < newCtx->Const.MaxDrawBuffers; i++) { - buffers[i] = newCtx->Color.DrawBuffer[i]; - } - - _mesa_drawbuffers(newCtx, newCtx->Const.MaxDrawBuffers, - buffers, NULL); + /* Update the FBO's list of drawbuffers/renderbuffers. + * For winsys FBOs this comes from the GL state (which may have + * changed since the last time this FBO was bound). + */ + _mesa_update_draw_buffers(newCtx); } if (!newCtx->ReadBuffer || newCtx->ReadBuffer->Name == 0) { _mesa_reference_framebuffer(&newCtx->ReadBuffer, readBuffer); diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 9fe6d527f92..e0c5844e193 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -189,31 +189,22 @@ struct dd_function_table { /*@{*/ /** - * Choose texture format. - * - * This is called by the \c _mesa_store_tex[sub]image[123]d() fallback - * functions. The driver should examine \p internalFormat and return a - * gl_format value. + * Choose actual hardware texture format given the user-provided source + * image format and type and the desired internal format. In some + * cases, srcFormat and srcType can be GL_NONE. + * Called by glTexImage(), etc. */ GLuint (*ChooseTextureFormat)( struct gl_context *ctx, GLint internalFormat, GLenum srcFormat, GLenum srcType ); /** - * Called by glTexImage1D(). - * - * \param target user specified. - * \param format user specified. - * \param type user specified. - * \param pixels user specified. - * \param packing indicates the image packing of pixels. + * Called by glTexImage1D(). Simply copy the source texture data into the + * destination texture memory. The gl_texture_image fields, etc. will be + * fully initialized. + * The parameters are the same as glTexImage1D(), plus: + * \param packing describes how to unpack the source data. * \param texObj is the target texture object. - * \param texImage is the target texture image. It will have the texture \p - * width, \p height, \p depth, \p border and \p internalFormat information. - * - * \p retainInternalCopy is returned by this function and indicates whether - * core Mesa should keep an internal copy of the texture image. - * - * Drivers should call a fallback routine from texstore.c if needed. + * \param texImage is the target texture image. */ void (*TexImage1D)( struct gl_context *ctx, GLenum target, GLint level, GLint internalFormat, @@ -250,25 +241,9 @@ struct dd_function_table { struct gl_texture_image *texImage ); /** - * Called by glTexSubImage1D(). - * - * \param target user specified. - * \param level user specified. - * \param xoffset user specified. - * \param yoffset user specified. - * \param zoffset user specified. - * \param width user specified. - * \param height user specified. - * \param depth user specified. - * \param format user specified. - * \param type user specified. - * \param pixels user specified. - * \param packing indicates the image packing of pixels. - * \param texObj is the target texture object. - * \param texImage is the target texture image. It will have the texture \p - * width, \p height, \p border and \p internalFormat information. - * - * The driver should use a fallback routine from texstore.c if needed. + * Called by glTexSubImage1D(). Replace a subset of the target texture + * with new texel data. + * \sa dd_function_table::TexImage1D. */ void (*TexSubImage1D)( struct gl_context *ctx, GLenum target, GLint level, GLint xoffset, GLsizei width, @@ -315,24 +290,6 @@ struct dd_function_table { struct gl_texture_image *texImage ); /** - * Called by glCopyTexImage1D(). - * - * Drivers should use a fallback routine from texstore.c if needed. - */ - void (*CopyTexImage1D)( struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLint border ); - - /** - * Called by glCopyTexImage2D(). - * - * Drivers should use a fallback routine from texstore.c if needed. - */ - void (*CopyTexImage2D)( struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLsizei height, GLint border ); - - /** * Called by glCopyTexSubImage1D(). * * Drivers should use a fallback routine from texstore.c if needed. diff --git a/src/mesa/main/depthstencil.c b/src/mesa/main/depthstencil.c index ab62c97fe5a..40d6c9612a2 100644 --- a/src/mesa/main/depthstencil.c +++ b/src/mesa/main/depthstencil.c @@ -63,7 +63,8 @@ static void delete_wrapper(struct gl_renderbuffer *rb) { ASSERT(rb->Format == MESA_FORMAT_S8 || - rb->Format == MESA_FORMAT_X8_Z24); + rb->Format == MESA_FORMAT_X8_Z24 || + rb->Format == MESA_FORMAT_Z32_FLOAT); _mesa_reference_renderbuffer(&rb->Wrapped, NULL); free(rb); } @@ -393,6 +394,217 @@ _mesa_new_z24_renderbuffer_wrapper(struct gl_context *ctx, } +static void +get_row_z32f(struct gl_context *ctx, struct gl_renderbuffer *z32frb, GLuint count, + GLint x, GLint y, void *values) +{ + struct gl_renderbuffer *dsrb = z32frb->Wrapped; + GLfloat temp[MAX_WIDTH*2]; + GLfloat *dst = (GLfloat *) values; + const GLfloat *src = (const GLfloat *) dsrb->GetPointer(ctx, dsrb, x, y); + GLuint i; + ASSERT(z32frb->DataType == GL_FLOAT); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + if (!src) { + dsrb->GetRow(ctx, dsrb, count, x, y, temp); + src = temp; + } + for (i = 0; i < count; i++) { + dst[i] = src[i*2]; + } +} + +static void +get_values_z32f(struct gl_context *ctx, struct gl_renderbuffer *z32frb, GLuint count, + const GLint x[], const GLint y[], void *values) +{ + struct gl_renderbuffer *dsrb = z32frb->Wrapped; + GLfloat temp[MAX_WIDTH*2]; + GLfloat *dst = (GLfloat *) values; + GLuint i; + ASSERT(z32frb->DataType == GL_FLOAT); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + ASSERT(count <= MAX_WIDTH); + /* don't bother trying direct access */ + dsrb->GetValues(ctx, dsrb, count, x, y, temp); + for (i = 0; i < count; i++) { + dst[i] = temp[i*2]; + } +} + +static void +put_row_z32f(struct gl_context *ctx, struct gl_renderbuffer *z32frb, GLuint count, + GLint x, GLint y, const void *values, const GLubyte *mask) +{ + struct gl_renderbuffer *dsrb = z32frb->Wrapped; + const GLfloat *src = (const GLfloat *) values; + GLfloat *dst = (GLfloat *) dsrb->GetPointer(ctx, dsrb, x, y); + ASSERT(z32frb->DataType == GL_FLOAT); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + if (dst) { + /* direct access */ + GLuint i; + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + dst[i*2] = src[i]; + } + } + } + else { + /* get, modify, put */ + GLfloat temp[MAX_WIDTH*2]; + GLuint i; + dsrb->GetRow(ctx, dsrb, count, x, y, temp); + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2] = src[i]; + } + } + dsrb->PutRow(ctx, dsrb, count, x, y, temp, mask); + } +} + +static void +put_mono_row_z32f(struct gl_context *ctx, struct gl_renderbuffer *z32frb, GLuint count, + GLint x, GLint y, const void *value, const GLubyte *mask) +{ + struct gl_renderbuffer *dsrb = z32frb->Wrapped; + GLfloat *dst = (GLfloat *) dsrb->GetPointer(ctx, dsrb, x, y); + ASSERT(z32frb->DataType == GL_FLOAT); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + if (dst) { + /* direct access */ + GLuint i; + const GLfloat val = *(GLfloat*)value; + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + dst[i*2] = val; + } + } + } + else { + /* get, modify, put */ + GLfloat temp[MAX_WIDTH*2]; + GLuint i; + const GLfloat val = *(GLfloat *)value; + dsrb->GetRow(ctx, dsrb, count, x, y, temp); + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2] = val; + } + } + dsrb->PutRow(ctx, dsrb, count, x, y, temp, mask); + } +} + +static void +put_values_z32f(struct gl_context *ctx, struct gl_renderbuffer *z32frb, GLuint count, + const GLint x[], const GLint y[], + const void *values, const GLubyte *mask) +{ + struct gl_renderbuffer *dsrb = z32frb->Wrapped; + const GLfloat *src = (const GLfloat *) values; + ASSERT(z32frb->DataType == GL_FLOAT); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + if (dsrb->GetPointer(ctx, dsrb, 0, 0)) { + /* direct access */ + GLuint i; + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + GLfloat *dst = (GLfloat *) dsrb->GetPointer(ctx, dsrb, x[i], y[i]); + *dst = src[i]; + } + } + } + else { + /* get, modify, put */ + GLfloat temp[MAX_WIDTH*2]; + GLuint i; + dsrb->GetValues(ctx, dsrb, count, x, y, temp); + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2] = src[i]; + } + } + dsrb->PutValues(ctx, dsrb, count, x, y, temp, mask); + } +} + +static void +put_mono_values_z32f(struct gl_context *ctx, struct gl_renderbuffer *z32frb, + GLuint count, const GLint x[], const GLint y[], + const void *value, const GLubyte *mask) +{ + struct gl_renderbuffer *dsrb = z32frb->Wrapped; + GLfloat temp[MAX_WIDTH*2]; + GLuint i; + const GLfloat val = *(GLfloat *)value; + ASSERT(z32frb->DataType == GL_FLOAT); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + /* get, modify, put */ + dsrb->GetValues(ctx, dsrb, count, x, y, temp); + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2] = val; + } + } + dsrb->PutValues(ctx, dsrb, count, x, y, temp, mask); +} + + +/** + * Wrap the given GL_DEPTH_STENCIL renderbuffer so that it acts like + * a depth renderbuffer. + * \return new depth renderbuffer + */ +struct gl_renderbuffer * +_mesa_new_z32f_renderbuffer_wrapper(struct gl_context *ctx, + struct gl_renderbuffer *dsrb) +{ + struct gl_renderbuffer *z32frb; + + ASSERT(dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + ASSERT(dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + + z32frb = ctx->Driver.NewRenderbuffer(ctx, 0); + if (!z32frb) + return NULL; + + /* NOTE: need to do manual refcounting here */ + z32frb->Wrapped = dsrb; + dsrb->RefCount++; + + z32frb->Name = dsrb->Name; + z32frb->RefCount = 0; + z32frb->Width = dsrb->Width; + z32frb->Height = dsrb->Height; + z32frb->RowStride = dsrb->RowStride; + z32frb->InternalFormat = GL_DEPTH_COMPONENT32F; + z32frb->Format = MESA_FORMAT_Z32_FLOAT; + z32frb->_BaseFormat = GL_DEPTH_COMPONENT; + z32frb->DataType = GL_FLOAT; + z32frb->Data = NULL; + z32frb->Delete = delete_wrapper; + z32frb->AllocStorage = alloc_wrapper_storage; + z32frb->GetPointer = nop_get_pointer; + z32frb->GetRow = get_row_z32f; + z32frb->GetValues = get_values_z32f; + z32frb->PutRow = put_row_z32f; + z32frb->PutRowRGB = NULL; + z32frb->PutMonoRow = put_mono_row_z32f; + z32frb->PutValues = put_values_z32f; + z32frb->PutMonoValues = put_mono_values_z32f; + + return z32frb; +} + + /*====================================================================== * Stencil wrapper around depth/stencil renderbuffer */ @@ -402,16 +614,22 @@ get_row_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint count, GLint x, GLint y, void *values) { struct gl_renderbuffer *dsrb = s8rb->Wrapped; - GLuint temp[MAX_WIDTH], i; + GLuint temp[MAX_WIDTH*2], i; GLubyte *dst = (GLubyte *) values; const GLuint *src = (const GLuint *) dsrb->GetPointer(ctx, dsrb, x, y); ASSERT(s8rb->DataType == GL_UNSIGNED_BYTE); - ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT); + ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT || + dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); if (!src) { dsrb->GetRow(ctx, dsrb, count, x, y, temp); src = temp; } - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + dst[i] = src[i*2+1] & 0xff; + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { dst[i] = src[i] & 0xff; } @@ -429,14 +647,20 @@ get_values_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint count const GLint x[], const GLint y[], void *values) { struct gl_renderbuffer *dsrb = s8rb->Wrapped; - GLuint temp[MAX_WIDTH], i; + GLuint temp[MAX_WIDTH*2], i; GLubyte *dst = (GLubyte *) values; ASSERT(s8rb->DataType == GL_UNSIGNED_BYTE); - ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT); + ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT || + dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); ASSERT(count <= MAX_WIDTH); /* don't bother trying direct access */ dsrb->GetValues(ctx, dsrb, count, x, y, temp); - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + dst[i] = temp[i*2+1] & 0xff; + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { dst[i] = temp[i] & 0xff; } @@ -457,11 +681,19 @@ put_row_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint count, const GLubyte *src = (const GLubyte *) values; GLuint *dst = (GLuint *) dsrb->GetPointer(ctx, dsrb, x, y); ASSERT(s8rb->DataType == GL_UNSIGNED_BYTE); - ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT); + ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT || + dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); if (dst) { /* direct access */ GLuint i; - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + dst[i*2+1] = src[i]; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { dst[i] = (dst[i] & 0xffffff00) | src[i]; @@ -479,9 +711,16 @@ put_row_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint count, } else { /* get, modify, put */ - GLuint temp[MAX_WIDTH], i; + GLuint temp[MAX_WIDTH*2], i; dsrb->GetRow(ctx, dsrb, count, x, y, temp); - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2+1] = src[i]; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { temp[i] = (temp[i] & 0xffffff00) | src[i]; @@ -508,11 +747,19 @@ put_mono_row_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint cou const GLubyte val = *((GLubyte *) value); GLuint *dst = (GLuint *) dsrb->GetPointer(ctx, dsrb, x, y); ASSERT(s8rb->DataType == GL_UNSIGNED_BYTE); - ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT); + ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT || + dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); if (dst) { /* direct access */ GLuint i; - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + dst[i*2+1] = val; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { dst[i] = (dst[i] & 0xffffff00) | val; @@ -530,9 +777,16 @@ put_mono_row_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint cou } else { /* get, modify, put */ - GLuint temp[MAX_WIDTH], i; + GLuint temp[MAX_WIDTH*2], i; dsrb->GetRow(ctx, dsrb, count, x, y, temp); - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2+1] = val; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { temp[i] = (temp[i] & 0xffffff00) | val; @@ -559,11 +813,20 @@ put_values_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint count struct gl_renderbuffer *dsrb = s8rb->Wrapped; const GLubyte *src = (const GLubyte *) values; ASSERT(s8rb->DataType == GL_UNSIGNED_BYTE); - ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT); + ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT || + dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); if (dsrb->GetPointer(ctx, dsrb, 0, 0)) { /* direct access */ GLuint i; - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + GLuint *dst = (GLuint *) dsrb->GetPointer(ctx, dsrb, x[i], y[i]); + dst[1] = src[i]; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { GLuint *dst = (GLuint *) dsrb->GetPointer(ctx, dsrb, x[i], y[i]); @@ -583,9 +846,16 @@ put_values_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint count } else { /* get, modify, put */ - GLuint temp[MAX_WIDTH], i; + GLuint temp[MAX_WIDTH*2], i; dsrb->GetValues(ctx, dsrb, count, x, y, temp); - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2+1] = src[i]; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { temp[i] = (temp[i] & 0xffffff00) | src[i]; @@ -610,11 +880,18 @@ put_mono_values_s8(struct gl_context *ctx, struct gl_renderbuffer *s8rb, GLuint const void *value, const GLubyte *mask) { struct gl_renderbuffer *dsrb = s8rb->Wrapped; - GLuint temp[MAX_WIDTH], i; + GLuint temp[MAX_WIDTH*2], i; const GLubyte val = *((GLubyte *) value); /* get, modify, put */ dsrb->GetValues(ctx, dsrb, count, x, y, temp); - if (dsrb->Format == MESA_FORMAT_Z24_S8) { + if (dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + temp[i*2+1] = val; + } + } + } + else if (dsrb->Format == MESA_FORMAT_Z24_S8) { for (i = 0; i < count; i++) { if (!mask || mask[i]) { temp[i] = (temp[i] & 0xffffff00) | val; @@ -644,8 +921,10 @@ _mesa_new_s8_renderbuffer_wrapper(struct gl_context *ctx, struct gl_renderbuffer struct gl_renderbuffer *s8rb; ASSERT(dsrb->Format == MESA_FORMAT_Z24_S8 || - dsrb->Format == MESA_FORMAT_S8_Z24); - ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT); + dsrb->Format == MESA_FORMAT_S8_Z24 || + dsrb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); + ASSERT(dsrb->DataType == GL_UNSIGNED_INT_24_8_EXT || + dsrb->DataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); s8rb = ctx->Driver.NewRenderbuffer(ctx, 0); if (!s8rb) diff --git a/src/mesa/main/depthstencil.h b/src/mesa/main/depthstencil.h index ef63c5d7a31..b47a2e482c2 100644 --- a/src/mesa/main/depthstencil.h +++ b/src/mesa/main/depthstencil.h @@ -34,6 +34,11 @@ _mesa_new_z24_renderbuffer_wrapper(struct gl_context *ctx, extern struct gl_renderbuffer * +_mesa_new_z32f_renderbuffer_wrapper(struct gl_context *ctx, + struct gl_renderbuffer *dsrb); + + +extern struct gl_renderbuffer * _mesa_new_s8_renderbuffer_wrapper(struct gl_context *ctx, struct gl_renderbuffer *dsrb); diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c index 958f5a69739..f9282398c21 100644 --- a/src/mesa/main/dlist.c +++ b/src/mesa/main/dlist.c @@ -7542,32 +7542,32 @@ execute_list(struct gl_context *ctx, GLuint list) break; case OPCODE_CLEAR_BUFFER_IV: { - GLint value[4]; + /*GLint value[4]; value[0] = n[3].i; value[1] = n[4].i; value[2] = n[5].i; value[3] = n[6].i; - /*CALL_ClearBufferiv(ctx->Exec, (n[1].e, n[2].i, value));*/ + CALL_ClearBufferiv(ctx->Exec, (n[1].e, n[2].i, value));*/ } break; case OPCODE_CLEAR_BUFFER_UIV: { - GLuint value[4]; + /*GLuint value[4]; value[0] = n[3].ui; value[1] = n[4].ui; value[2] = n[5].ui; value[3] = n[6].ui; - /*CALL_ClearBufferiv(ctx->Exec, (n[1].e, n[2].i, value));*/ + CALL_ClearBufferiv(ctx->Exec, (n[1].e, n[2].i, value));*/ } break; case OPCODE_CLEAR_BUFFER_FV: { - GLfloat value[4]; + /*GLfloat value[4]; value[0] = n[3].f; value[1] = n[4].f; value[2] = n[5].f; value[3] = n[6].f; - /*CALL_ClearBufferfv(ctx->Exec, (n[1].e, n[2].i, value));*/ + CALL_ClearBufferfv(ctx->Exec, (n[1].e, n[2].i, value));*/ } break; case OPCODE_CLEAR_BUFFER_FI: diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c index aac8b9c5eaf..3ba4df6342f 100644 --- a/src/mesa/main/enable.c +++ b/src/mesa/main/enable.c @@ -5,7 +5,6 @@ /* * Mesa 3-D graphics library - * Version: 7.0.3 * * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. * @@ -560,7 +559,6 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state) ctx->Polygon.OffsetLine = state; break; case GL_POLYGON_OFFSET_FILL: - /*case GL_POLYGON_OFFSET_EXT:*/ if (ctx->Polygon.OffsetFill == state) return; FLUSH_VERTICES(ctx, _NEW_POLYGON); @@ -643,9 +641,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state) break; #endif - /* - * CLIENT STATE!!! - */ + /* client-side state */ case GL_VERTEX_ARRAY: case GL_NORMAL_ARRAY: case GL_COLOR_ARRAY: @@ -1174,7 +1170,6 @@ _mesa_IsEnabled( GLenum cap ) case GL_POLYGON_OFFSET_LINE: return ctx->Polygon.OffsetLine; case GL_POLYGON_OFFSET_FILL: - /*case GL_POLYGON_OFFSET_EXT:*/ return ctx->Polygon.OffsetFill; case GL_RESCALE_NORMAL_EXT: return ctx->Transform.RescaleNormals; @@ -1213,9 +1208,7 @@ _mesa_IsEnabled( GLenum cap ) } #endif - /* - * CLIENT STATE!!! - */ + /* client-side state */ case GL_VERTEX_ARRAY: return (ctx->Array.ArrayObj->Vertex.Enabled != 0); case GL_NORMAL_ARRAY: diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 2230b262336..82eb7fb718d 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -78,9 +78,32 @@ static struct gl_renderbuffer DummyRenderbuffer; static struct gl_framebuffer IncompleteFramebuffer; -#define IS_CUBE_FACE(TARGET) \ - ((TARGET) >= GL_TEXTURE_CUBE_MAP_POSITIVE_X && \ - (TARGET) <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z) +static INLINE GLboolean +is_cube_face(GLenum target) +{ + return (target >= GL_TEXTURE_CUBE_MAP_POSITIVE_X && + target <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z); +} + + +/** + * Is the given FBO a user-created FBO? + */ +static INLINE GLboolean +is_user_fbo(const struct gl_framebuffer *fb) +{ + return fb->Name != 0; +} + + +/** + * Is the given FBO a window system FBO (like an X window)? + */ +static INLINE GLboolean +is_winsys_fbo(const struct gl_framebuffer *fb) +{ + return fb->Name == 0; +} static void @@ -196,7 +219,7 @@ _mesa_get_attachment(struct gl_context *ctx, struct gl_framebuffer *fb, { GLuint i; - assert(fb->Name > 0); + assert(is_user_fbo(fb)); switch (attachment) { case GL_COLOR_ATTACHMENT0_EXT: @@ -244,7 +267,7 @@ static struct gl_renderbuffer_attachment * _mesa_get_fb0_attachment(struct gl_context *ctx, struct gl_framebuffer *fb, GLenum attachment) { - assert(fb->Name == 0); + assert(is_winsys_fbo(fb)); switch (attachment) { case GL_FRONT_LEFT: @@ -669,7 +692,7 @@ _mesa_test_framebuffer_completeness(struct gl_context *ctx, GLint i; GLuint j; - assert(fb->Name != 0); + assert(is_user_fbo(fb)); numImages = 0; fb->Width = 0; @@ -968,10 +991,11 @@ _mesa_DeleteRenderbuffersEXT(GLsizei n, const GLuint *renderbuffers) _mesa_BindRenderbufferEXT(GL_RENDERBUFFER_EXT, 0); } - if (ctx->DrawBuffer->Name) { + if (is_user_fbo(ctx->DrawBuffer)) { detach_renderbuffer(ctx, ctx->DrawBuffer, rb); } - if (ctx->ReadBuffer->Name && ctx->ReadBuffer != ctx->DrawBuffer) { + if (is_user_fbo(ctx->ReadBuffer) + && ctx->ReadBuffer != ctx->DrawBuffer) { detach_renderbuffer(ctx, ctx->ReadBuffer, rb); } @@ -1107,6 +1131,16 @@ _mesa_base_fbo_format(struct gl_context *ctx, GLenum internalFormat) return GL_DEPTH_STENCIL_EXT; else return 0; + case GL_DEPTH_COMPONENT32F: + if (ctx->Extensions.ARB_depth_buffer_float) + return GL_DEPTH_COMPONENT; + else + return 0; + case GL_DEPTH32F_STENCIL8: + if (ctx->Extensions.ARB_depth_buffer_float) + return GL_DEPTH_STENCIL; + else + return 0; case GL_RED: case GL_R8: case GL_R16: @@ -1203,7 +1237,7 @@ invalidate_rb(GLuint key, void *data, void *userData) struct gl_renderbuffer *rb = (struct gl_renderbuffer *) userData; /* If this is a user-created FBO */ - if (fb->Name) { + if (is_user_fbo(fb)) { GLuint i; for (i = 0; i < BUFFER_COUNT; i++) { struct gl_renderbuffer_attachment *att = fb->Attachment + i; @@ -1532,7 +1566,7 @@ check_begin_texture_render(struct gl_context *ctx, struct gl_framebuffer *fb) GLuint i; ASSERT(ctx->Driver.RenderTexture); - if (fb->Name == 0) + if (is_winsys_fbo(fb)) return; /* can't render to texture with winsys framebuffers */ for (i = 0; i < BUFFER_COUNT; i++) { @@ -1552,7 +1586,7 @@ check_begin_texture_render(struct gl_context *ctx, struct gl_framebuffer *fb) static void check_end_texture_render(struct gl_context *ctx, struct gl_framebuffer *fb) { - if (fb->Name == 0) + if (is_winsys_fbo(fb)) return; /* can't render to texture with winsys framebuffers */ if (ctx->Driver.FinishRenderTexture) { @@ -1805,7 +1839,7 @@ _mesa_CheckFramebufferStatusEXT(GLenum target) return 0; } - if (buffer->Name == 0) { + if (is_winsys_fbo(buffer)) { /* The window system / default framebuffer is always complete */ return GL_FRAMEBUFFER_COMPLETE_EXT; } @@ -1843,7 +1877,7 @@ framebuffer_texture(struct gl_context *ctx, const char *caller, GLenum target, } /* check framebuffer binding */ - if (fb->Name == 0) { + if (is_winsys_fbo(fb)) { _mesa_error(ctx, GL_INVALID_OPERATION, "glFramebufferTexture%sEXT", caller); return; @@ -1866,7 +1900,7 @@ framebuffer_texture(struct gl_context *ctx, const char *caller, GLenum target, } else { err = (texObj->Target == GL_TEXTURE_CUBE_MAP) - ? !IS_CUBE_FACE(textarget) + ? !is_cube_face(textarget) : (texObj->Target != textarget); } } @@ -1970,7 +2004,7 @@ _mesa_FramebufferTexture2DEXT(GLenum target, GLenum attachment, if ((texture != 0) && (textarget != GL_TEXTURE_2D) && (textarget != GL_TEXTURE_RECTANGLE_ARB) && - (!IS_CUBE_FACE(textarget))) { + (!is_cube_face(textarget))) { _mesa_error(ctx, GL_INVALID_OPERATION, "glFramebufferTexture2DEXT(textarget=0x%x)", textarget); return; @@ -2034,7 +2068,7 @@ _mesa_FramebufferRenderbufferEXT(GLenum target, GLenum attachment, return; } - if (fb->Name == 0) { + if (is_winsys_fbo(fb)) { /* Can't attach new renderbuffers to a window system framebuffer */ _mesa_error(ctx, GL_INVALID_OPERATION, "glFramebufferRenderbufferEXT"); return; @@ -2100,10 +2134,14 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, { const struct gl_renderbuffer_attachment *att; struct gl_framebuffer *buffer; + GLenum err; GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); + /* The error differs in GL andd GLES. */ + err = ctx->API == API_OPENGL ? GL_INVALID_OPERATION : GL_INVALID_ENUM; + buffer = get_framebuffer_target(ctx, target); if (!buffer) { _mesa_error(ctx, GL_INVALID_ENUM, @@ -2111,7 +2149,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, return; } - if (buffer->Name == 0) { + if (is_winsys_fbo(buffer)) { /* the default / window-system FBO */ att = _mesa_get_fb0_attachment(ctx, buffer, attachment); } @@ -2143,7 +2181,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, switch (pname) { case GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE_EXT: - *params = buffer->Name == 0 ? GL_FRAMEBUFFER_DEFAULT : att->Type; + *params = is_winsys_fbo(buffer) ? GL_FRAMEBUFFER_DEFAULT : att->Type; return; case GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME_EXT: if (att->Type == GL_RENDERBUFFER_EXT) { @@ -2154,7 +2192,12 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, } else { assert(att->Type == GL_NONE); - *params = 0; + if (ctx->API == API_OPENGL) { + *params = 0; + } else { + _mesa_error(ctx, GL_INVALID_ENUM, + "glGetFramebufferAttachmentParameterivEXT(pname)"); + } } return; case GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL_EXT: @@ -2162,7 +2205,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, *params = att->TextureLevel; } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else { @@ -2180,7 +2223,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, } } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else { @@ -2198,7 +2241,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, } } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else { @@ -2212,7 +2255,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else { @@ -2233,7 +2276,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, return; } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else { @@ -2242,6 +2285,15 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, /* special cases */ *params = GL_INDEX; } + else if (format == MESA_FORMAT_Z32_FLOAT_X24S8) { + /* depends on the attachment parameter */ + if (attachment == GL_STENCIL_ATTACHMENT) { + *params = GL_INDEX; + } + else { + *params = GL_FLOAT; + } + } else { *params = _mesa_get_format_datatype(format); } @@ -2258,7 +2310,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else if (att->Texture) { @@ -2560,6 +2612,10 @@ _mesa_BlitFramebufferEXT(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, } } + if (!mask) { + return; + } + ASSERT(ctx->Driver.BlitFramebuffer); ctx->Driver.BlitFramebuffer(ctx, srcX0, srcY0, srcX1, srcY1, diff --git a/src/mesa/main/feedback.c b/src/mesa/main/feedback.c index fcb089f1f31..597ec1e3f9a 100644 --- a/src/mesa/main/feedback.c +++ b/src/mesa/main/feedback.c @@ -64,7 +64,7 @@ _mesa_FeedbackBuffer( GLsizei size, GLenum type, GLfloat *buffer ) _mesa_error( ctx, GL_INVALID_VALUE, "glFeedbackBuffer(size<0)" ); return; } - if (!buffer) { + if (!buffer && size > 0) { _mesa_error( ctx, GL_INVALID_VALUE, "glFeedbackBuffer(buffer==NULL)" ); ctx->Feedback.BufferSize = 0; return; diff --git a/src/mesa/main/ff_fragment_shader.cpp b/src/mesa/main/ff_fragment_shader.cpp index 2ccbaf8f8c3..7cc17216884 100644 --- a/src/mesa/main/ff_fragment_shader.cpp +++ b/src/mesa/main/ff_fragment_shader.cpp @@ -330,8 +330,7 @@ static GLbitfield get_fp_input_mask( struct gl_context *ctx ) /* _NEW_RENDERMODE */ fp_inputs = (FRAG_BIT_COL0 | FRAG_BIT_TEX0); } - else if (!(vertexProgram || vertexShader) || - !ctx->VertexProgram._Current) { + else if (!(vertexProgram || vertexShader)) { /* Fixed function vertex logic */ /* _NEW_ARRAY */ GLbitfield varying_inputs = ctx->varying_vp_inputs; diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c index e88ba43971b..f9298d2d1e9 100644 --- a/src/mesa/main/formats.c +++ b/src/mesa/main/formats.c @@ -1091,6 +1091,25 @@ static struct gl_format_info format_info[MESA_FORMAT_COUNT] = 0, 0, 0, 0, 0, 1, 1, 4 }, + /* ARB_depth_buffer_float */ + { + MESA_FORMAT_Z32_FLOAT, /* Name */ + "MESA_FORMAT_Z32_FLOAT", /* StrName */ + GL_DEPTH_COMPONENT, /* BaseFormat */ + GL_FLOAT, /* DataType */ + 0, 0, 0, 0, /* Red/Green/Blue/AlphaBits */ + 0, 0, 0, 32, 0, /* Lum/Int/Index/Depth/StencilBits */ + 1, 1, 4 /* BlockWidth/Height,Bytes */ + }, + { + MESA_FORMAT_Z32_FLOAT_X24S8, /* Name */ + "MESA_FORMAT_Z32_FLOAT_X24S8", /* StrName */ + GL_DEPTH_STENCIL, /* BaseFormat */ + GL_NONE /* XXX */, /* DataType */ + 0, 0, 0, 0, /* Red/Green/Blue/AlphaBits */ + 0, 0, 0, 32, 8, /* Lum/Int/Index/Depth/StencilBits */ + 1, 1, 8 /* BlockWidth/Height,Bytes */ + }, }; @@ -1466,7 +1485,9 @@ _mesa_test_formats(void) info->DataType == GL_SIGNED_NORMALIZED || info->DataType == GL_UNSIGNED_INT || info->DataType == GL_INT || - info->DataType == GL_FLOAT); + info->DataType == GL_FLOAT || + /* Z32_FLOAT_X24S8 has DataType of GL_NONE */ + info->DataType == GL_NONE); if (info->BaseFormat == GL_RGB) { assert(info->RedBits > 0); @@ -1654,6 +1675,16 @@ _mesa_format_to_type_and_comps(gl_format format, *comps = 1; return; + case MESA_FORMAT_Z32_FLOAT: + *datatype = GL_FLOAT; + *comps = 1; + return; + + case MESA_FORMAT_Z32_FLOAT_X24S8: + *datatype = GL_FLOAT_32_UNSIGNED_INT_24_8_REV; + *comps = 1; + return; + case MESA_FORMAT_DUDV8: *datatype = GL_BYTE; *comps = 2; diff --git a/src/mesa/main/formats.h b/src/mesa/main/formats.h index 0640bbc4af1..5b8c01781a6 100644 --- a/src/mesa/main/formats.h +++ b/src/mesa/main/formats.h @@ -209,6 +209,9 @@ typedef enum MESA_FORMAT_RGB9_E5_FLOAT, MESA_FORMAT_R11_G11_B10_FLOAT, + MESA_FORMAT_Z32_FLOAT, + MESA_FORMAT_Z32_FLOAT_X24S8, + MESA_FORMAT_COUNT } gl_format; diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index 66c9bd91096..23fa1b2c11e 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -232,17 +232,13 @@ _mesa_free_framebuffer_data(struct gl_framebuffer *fb) /** * Set *ptr to point to fb, with refcounting and locking. + * This is normally only called from the _mesa_reference_framebuffer() macro + * when there's a real pointer change. */ void -_mesa_reference_framebuffer(struct gl_framebuffer **ptr, - struct gl_framebuffer *fb) +_mesa_reference_framebuffer_(struct gl_framebuffer **ptr, + struct gl_framebuffer *fb) { - assert(ptr); - if (*ptr == fb) { - /* no change */ - return; - } - if (*ptr) { /* unreference old renderbuffer */ GLboolean deleteFlag = GL_FALSE; @@ -552,6 +548,7 @@ _mesa_update_framebuffer_visual(struct gl_context *ctx, fb->Visual.rgbBits = fb->Visual.redBits + fb->Visual.greenBits + fb->Visual.blueBits; fb->Visual.samples = rb->NumSamples; + fb->Visual.sampleBuffers = rb->NumSamples > 0 ? 1 : 0; if (_mesa_get_format_color_encoding(fmt) == GL_SRGB) fb->Visual.sRGBCapable = ctx->Const.sRGBCapable; break; @@ -631,8 +628,14 @@ _mesa_update_depth_buffer(struct gl_context *ctx, || fb->_DepthBuffer->Wrapped != depthRb || _mesa_get_format_base_format(fb->_DepthBuffer->Format) != GL_DEPTH_COMPONENT) { /* need to update wrapper */ - struct gl_renderbuffer *wrapper - = _mesa_new_z24_renderbuffer_wrapper(ctx, depthRb); + struct gl_renderbuffer *wrapper; + + if (depthRb->Format == MESA_FORMAT_Z32_FLOAT_X24S8) { + wrapper = _mesa_new_z32f_renderbuffer_wrapper(ctx, depthRb); + } + else { + wrapper = _mesa_new_z24_renderbuffer_wrapper(ctx, depthRb); + } _mesa_reference_renderbuffer(&fb->_DepthBuffer, wrapper); ASSERT(fb->_DepthBuffer->Wrapped == depthRb); } diff --git a/src/mesa/main/framebuffer.h b/src/mesa/main/framebuffer.h index c3bd638c9de..b2b29a7a1db 100644 --- a/src/mesa/main/framebuffer.h +++ b/src/mesa/main/framebuffer.h @@ -51,8 +51,16 @@ extern void _mesa_free_framebuffer_data(struct gl_framebuffer *buffer); extern void +_mesa_reference_framebuffer_(struct gl_framebuffer **ptr, + struct gl_framebuffer *fb); + +static INLINE void _mesa_reference_framebuffer(struct gl_framebuffer **ptr, - struct gl_framebuffer *fb); + struct gl_framebuffer *fb) +{ + if (*ptr != fb) + _mesa_reference_framebuffer_(ptr, fb); +} extern void _mesa_resize_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb, diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index 0492e1585c3..d32c68a53a4 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -1569,11 +1569,11 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu break; case GL_NUM_COMPRESSED_TEXTURE_FORMATS_ARB: - v->value_int = _mesa_get_compressed_formats(ctx, NULL, GL_FALSE); + v->value_int = _mesa_get_compressed_formats(ctx, NULL); break; case GL_COMPRESSED_TEXTURE_FORMATS_ARB: v->value_int_n.n = - _mesa_get_compressed_formats(ctx, v->value_int_n.ints, GL_FALSE); + _mesa_get_compressed_formats(ctx, v->value_int_n.ints); ASSERT(v->value_int_n.n <= 100); break; diff --git a/src/mesa/main/image.c b/src/mesa/main/image.c index 6d7bc735887..37127dcb7a2 100644 --- a/src/mesa/main/image.c +++ b/src/mesa/main/image.c @@ -84,6 +84,7 @@ _mesa_type_is_packed(GLenum type) case GL_UNSIGNED_INT_24_8_EXT: case GL_UNSIGNED_INT_5_9_9_9_REV: case GL_UNSIGNED_INT_10F_11F_11F_REV: + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: return GL_TRUE; } @@ -228,6 +229,8 @@ _mesa_sizeof_packed_type( GLenum type ) return sizeof(GLuint); case GL_UNSIGNED_INT_10F_11F_11F_REV: return sizeof(GLuint); + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + return 8; default: return -1; } @@ -379,6 +382,11 @@ _mesa_bytes_per_pixel( GLenum format, GLenum type ) return sizeof(GLuint); else return -1; + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + if (format == GL_DEPTH_STENCIL) + return 8; + else + return -1; default: return -1; } @@ -531,8 +539,10 @@ _mesa_is_legal_format_and_type(const struct gl_context *ctx, else return GL_FALSE; case GL_DEPTH_STENCIL_EXT: - if (ctx->Extensions.EXT_packed_depth_stencil - && type == GL_UNSIGNED_INT_24_8_EXT) + if ((ctx->Extensions.EXT_packed_depth_stencil && + type == GL_UNSIGNED_INT_24_8_EXT) || + (ctx->Extensions.ARB_depth_buffer_float && + type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV)) return GL_TRUE; else return GL_FALSE; @@ -884,6 +894,7 @@ _mesa_is_depth_format(GLenum format) case GL_DEPTH_COMPONENT16: case GL_DEPTH_COMPONENT24: case GL_DEPTH_COMPONENT32: + case GL_DEPTH_COMPONENT32F: return GL_TRUE; default: return GL_FALSE; @@ -931,6 +942,7 @@ _mesa_is_depthstencil_format(GLenum format) switch (format) { case GL_DEPTH24_STENCIL8_EXT: case GL_DEPTH_STENCIL_EXT: + case GL_DEPTH32F_STENCIL8: return GL_TRUE; default: return GL_FALSE; @@ -956,6 +968,8 @@ _mesa_is_depth_or_stencil_format(GLenum format) case GL_STENCIL_INDEX16_EXT: case GL_DEPTH_STENCIL_EXT: case GL_DEPTH24_STENCIL8_EXT: + case GL_DEPTH_COMPONENT32F: + case GL_DEPTH32F_STENCIL8: return GL_TRUE; default: return GL_FALSE; diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c index f2724dbca7e..8a811cb7225 100644 --- a/src/mesa/main/mipmap.c +++ b/src/mesa/main/mipmap.c @@ -706,6 +706,17 @@ do_row(GLenum datatype, GLuint comps, GLint srcWidth, } } + else if (datatype == GL_FLOAT_32_UNSIGNED_INT_24_8_REV && comps == 1) { + GLuint i, j, k; + const GLfloat *rowA = (const GLfloat *) srcRowA; + const GLfloat *rowB = (const GLfloat *) srcRowB; + GLfloat *dst = (GLfloat *) dstRow; + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i*2] = (rowA[j*2] + rowA[k*2] + rowB[j*2] + rowB[k*2]) * 0.25F; + } + } + else { _mesa_problem(NULL, "bad format in do_row()"); } @@ -1341,6 +1352,15 @@ do_row_3D(GLenum datatype, GLuint comps, GLint srcWidth, } } + else if (datatype == GL_FLOAT_32_UNSIGNED_INT_24_8_REV && comps == 1) { + DECLARE_ROW_POINTERS(GLfloat, 2); + + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_F_3D(0); + } + } + else { _mesa_problem(NULL, "bad format in do_row()"); } diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index eb2efc89aed..b88118366b2 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1435,8 +1435,7 @@ struct gl_texgen /** * Texture unit state. Contains enable flags, texture environment/function/ - * combiners, texgen state, pointers to current texture objects and - * post-filter color tables. + * combiners, texgen state, and pointers to current texture objects. */ struct gl_texture_unit { @@ -1857,7 +1856,7 @@ struct gl_program GLbitfield SystemValuesRead; /**< Bitmask of SYSTEM_VALUE_x inputs used */ GLbitfield InputFlags[MAX_PROGRAM_INPUTS]; /**< PROG_PARAM_BIT_x flags */ GLbitfield OutputFlags[MAX_PROGRAM_OUTPUTS]; /**< PROG_PARAM_BIT_x flags */ - GLbitfield TexturesUsed[MAX_TEXTURE_UNITS]; /**< TEXTURE_x_BIT bitmask */ + GLbitfield TexturesUsed[MAX_COMBINED_TEXTURE_IMAGE_UNITS]; /**< TEXTURE_x_BIT bitmask */ GLbitfield SamplersUsed; /**< Bitfield of which samplers are used */ GLbitfield ShadowSamplers; /**< Texture units used for shadow sampling. */ diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c index d6470e351b8..7de1d05b919 100644 --- a/src/mesa/main/pack.c +++ b/src/mesa/main/pack.c @@ -1971,7 +1971,8 @@ extract_uint_indexes(GLuint n, GLuint indexes[], srcType == GL_INT || srcType == GL_UNSIGNED_INT_24_8_EXT || srcType == GL_HALF_FLOAT_ARB || - srcType == GL_FLOAT); + srcType == GL_FLOAT || + srcType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); switch (srcType) { case GL_BITMAP: @@ -2142,6 +2143,23 @@ extract_uint_indexes(GLuint n, GLuint indexes[], } } break; + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + { + GLuint i; + const GLuint *s = (const GLuint *) src; + if (unpack->SwapBytes) { + for (i = 0; i < n; i++) { + GLuint value = s[i*2+1]; + SWAP4BYTE(value); + indexes[i] = value & 0xff; /* lower 8 bits */ + } + } + else { + for (i = 0; i < n; i++) + indexes[i] = s[i*2+1] & 0xff; /* lower 8 bits */ + } + } + break; default: _mesa_problem(NULL, "bad srcType in extract_uint_indexes"); @@ -4412,11 +4430,13 @@ _mesa_unpack_stencil_span( struct gl_context *ctx, GLuint n, srcType == GL_INT || srcType == GL_UNSIGNED_INT_24_8_EXT || srcType == GL_HALF_FLOAT_ARB || - srcType == GL_FLOAT); + srcType == GL_FLOAT || + srcType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); ASSERT(dstType == GL_UNSIGNED_BYTE || dstType == GL_UNSIGNED_SHORT || - dstType == GL_UNSIGNED_INT); + dstType == GL_UNSIGNED_INT || + dstType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); /* only shift and offset apply to stencil */ transferOps &= IMAGE_SHIFT_OFFSET_BIT; @@ -4488,6 +4508,15 @@ _mesa_unpack_stencil_span( struct gl_context *ctx, GLuint n, case GL_UNSIGNED_INT: memcpy(dest, indexes, n * sizeof(GLuint)); break; + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + { + GLuint *dst = (GLuint *) dest; + GLuint i; + for (i = 0; i < n; i++) { + dst[i*2+1] = indexes[i] & 0xff; /* lower 8 bits */ + } + } + break; default: _mesa_problem(ctx, "bad dstType in _mesa_unpack_stencil_span"); } @@ -4683,7 +4712,7 @@ _mesa_unpack_depth_span( struct gl_context *ctx, GLuint n, GLenum srcType, const GLvoid *source, const struct gl_pixelstore_attrib *srcPacking ) { - GLfloat *depthTemp, *depthValues; + GLfloat *depthTemp = NULL, *depthValues; GLboolean needClamp = GL_FALSE; /* Look for special cases first. @@ -4729,16 +4758,16 @@ _mesa_unpack_depth_span( struct gl_context *ctx, GLuint n, /* general case path follows */ - depthTemp = (GLfloat *) malloc(n * sizeof(GLfloat)); - if (!depthTemp) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "pixel unpacking"); - return; - } - if (dstType == GL_FLOAT) { depthValues = (GLfloat *) dest; } else { + depthTemp = (GLfloat *) malloc(n * sizeof(GLfloat)); + if (!depthTemp) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "pixel unpacking"); + return; + } + depthValues = depthTemp; } @@ -4782,6 +4811,7 @@ _mesa_unpack_depth_span( struct gl_context *ctx, GLuint n, } zValues[i] = value & 0xffffff00; } + free(depthTemp); return; } else { @@ -4797,6 +4827,20 @@ _mesa_unpack_depth_span( struct gl_context *ctx, GLuint n, } } break; + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + { + GLuint i; + const GLfloat *src = (const GLfloat *)source; + for (i = 0; i < n; i++) { + GLfloat value = src[i * 2]; + if (srcPacking->SwapBytes) { + SWAP4BYTE(value); + } + depthValues[i] = value; + } + needClamp = GL_TRUE; + } + break; case GL_FLOAT: DEPTH_VALUES(GLfloat, 1*); needClamp = GL_TRUE; @@ -4873,9 +4917,18 @@ _mesa_unpack_depth_span( struct gl_context *ctx, GLuint n, zValues[i] = (GLushort) (depthValues[i] * (GLfloat) depthMax); } } + else if (dstType == GL_FLOAT) { + /* Nothing to do. depthValues is pointing to dest. */ + } + else if (dstType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV) { + GLfloat *zValues = (GLfloat*) dest; + GLuint i; + for (i = 0; i < n; i++) { + zValues[i*2] = depthValues[i]; + } + } else { - ASSERT(dstType == GL_FLOAT); - /*ASSERT(depthMax == 1.0F);*/ + ASSERT(0); } free(depthTemp); @@ -5003,10 +5056,11 @@ _mesa_pack_depth_span( struct gl_context *ctx, GLuint n, GLvoid *dest, /** - * Pack depth and stencil values as GL_DEPTH_STENCIL/GL_UNSIGNED_INT_24_8. + * Pack depth and stencil values as GL_DEPTH_STENCIL (GL_UNSIGNED_INT_24_8 etc) */ void -_mesa_pack_depth_stencil_span(struct gl_context *ctx, GLuint n, GLuint *dest, +_mesa_pack_depth_stencil_span(struct gl_context *ctx,GLuint n, + GLenum dstType, GLuint *dest, const GLfloat *depthVals, const GLstencil *stencilVals, const struct gl_pixelstore_attrib *dstPacking) @@ -5036,9 +5090,19 @@ _mesa_pack_depth_stencil_span(struct gl_context *ctx, GLuint n, GLuint *dest, stencilVals = stencilCopy; } - for (i = 0; i < n; i++) { - GLuint z = (GLuint) (depthVals[i] * 0xffffff); - dest[i] = (z << 8) | (stencilVals[i] & 0xff); + switch (dstType) { + case GL_UNSIGNED_INT_24_8: + for (i = 0; i < n; i++) { + GLuint z = (GLuint) (depthVals[i] * 0xffffff); + dest[i] = (z << 8) | (stencilVals[i] & 0xff); + } + break; + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + for (i = 0; i < n; i++) { + ((GLfloat*)dest)[i*2] = depthVals[i]; + dest[i*2+1] = stencilVals[i] & 0xff; + } + break; } if (dstPacking->SwapBytes) { diff --git a/src/mesa/main/pack.h b/src/mesa/main/pack.h index 78238ea5839..00aab409e42 100644 --- a/src/mesa/main/pack.h +++ b/src/mesa/main/pack.h @@ -130,8 +130,8 @@ _mesa_pack_depth_span(struct gl_context *ctx, GLuint n, GLvoid *dest, extern void -_mesa_pack_depth_stencil_span(struct gl_context *ctx, - GLuint n, GLuint *dest, +_mesa_pack_depth_stencil_span(struct gl_context *ctx,GLuint n, + GLenum dstType, GLuint *dest, const GLfloat *depthVals, const GLstencil *stencilVals, const struct gl_pixelstore_attrib *dstPacking); diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c index 0331a8ca2fe..84c5b22286a 100644 --- a/src/mesa/main/readpix.c +++ b/src/mesa/main/readpix.c @@ -61,6 +61,14 @@ _mesa_error_check_format_type(struct gl_context *ctx, GLenum format, return GL_TRUE; } + if (ctx->Extensions.ARB_depth_buffer_float + && type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV + && format != GL_DEPTH_STENCIL_EXT) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "gl%sPixels(format is not GL_DEPTH_STENCIL_EXT)", readDraw); + return GL_TRUE; + } + /* basic combinations test */ if (!_mesa_is_legal_format_and_type(ctx, format, type)) { _mesa_error(ctx, GL_INVALID_ENUM, @@ -142,10 +150,23 @@ _mesa_error_check_format_type(struct gl_context *ctx, GLenum format, } break; case GL_DEPTH_STENCIL_EXT: - if (!ctx->Extensions.EXT_packed_depth_stencil || - type != GL_UNSIGNED_INT_24_8_EXT) { - _mesa_error(ctx, GL_INVALID_ENUM, "gl%sPixels(type)", readDraw); - return GL_TRUE; + /* Check validity of the type first. */ + switch (type) { + case GL_UNSIGNED_INT_24_8_EXT: + if (!ctx->Extensions.EXT_packed_depth_stencil) { + _mesa_error(ctx, GL_INVALID_ENUM, "gl%sPixels(type)", readDraw); + return GL_TRUE; + } + break; + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + if (!ctx->Extensions.ARB_depth_buffer_float) { + _mesa_error(ctx, GL_INVALID_ENUM, "gl%sPixels(type)", readDraw); + return GL_TRUE; + } + break; + default: + _mesa_error(ctx, GL_INVALID_ENUM, "gl%sPixels(type)", readDraw); + return GL_TRUE; } if ((drawing && !_mesa_dest_buffer_exists(ctx, format)) || (reading && !_mesa_source_buffer_exists(ctx, format))) { diff --git a/src/mesa/main/renderbuffer.c b/src/mesa/main/renderbuffer.c index fa884c0de93..70011e6b24a 100644 --- a/src/mesa/main/renderbuffer.c +++ b/src/mesa/main/renderbuffer.c @@ -66,6 +66,9 @@ get_datatype_bytes(struct gl_renderbuffer *rb) int component_size; switch (rb->DataType) { + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + component_size = 8; + break; case GL_FLOAT: case GL_UNSIGNED_INT: case GL_UNSIGNED_INT_24_8_EXT: @@ -2527,17 +2530,13 @@ _mesa_remove_renderbuffer(struct gl_framebuffer *fb, * Set *ptr to point to rb. If *ptr points to another renderbuffer, * dereference that buffer first. The new renderbuffer's refcount will * be incremented. The old renderbuffer's refcount will be decremented. + * This is normally only called from the _mesa_reference_renderbuffer() macro + * when there's a real pointer change. */ void -_mesa_reference_renderbuffer(struct gl_renderbuffer **ptr, - struct gl_renderbuffer *rb) +_mesa_reference_renderbuffer_(struct gl_renderbuffer **ptr, + struct gl_renderbuffer *rb) { - assert(ptr); - if (*ptr == rb) { - /* no change */ - return; - } - if (*ptr) { /* Unreference the old renderbuffer */ GLboolean deleteFlag = GL_FALSE; @@ -2567,26 +2566,3 @@ _mesa_reference_renderbuffer(struct gl_renderbuffer **ptr, *ptr = rb; } } - - -/** - * Create a new combined depth/stencil renderbuffer for implementing - * the GL_EXT_packed_depth_stencil extension. - * \return new depth/stencil renderbuffer - */ -struct gl_renderbuffer * -_mesa_new_depthstencil_renderbuffer(struct gl_context *ctx, GLuint name) -{ - struct gl_renderbuffer *dsrb; - - dsrb = _mesa_new_renderbuffer(ctx, name); - if (!dsrb) - return NULL; - - /* init fields not covered by _mesa_new_renderbuffer() */ - dsrb->InternalFormat = GL_DEPTH24_STENCIL8_EXT; - dsrb->Format = MESA_FORMAT_Z24_S8; - dsrb->AllocStorage = _mesa_soft_renderbuffer_storage; - - return dsrb; -} diff --git a/src/mesa/main/renderbuffer.h b/src/mesa/main/renderbuffer.h index 39d9b3035e6..f9329d55fa5 100644 --- a/src/mesa/main/renderbuffer.h +++ b/src/mesa/main/renderbuffer.h @@ -105,11 +105,17 @@ _mesa_remove_renderbuffer(struct gl_framebuffer *fb, gl_buffer_index bufferName); extern void -_mesa_reference_renderbuffer(struct gl_renderbuffer **ptr, - struct gl_renderbuffer *rb); +_mesa_reference_renderbuffer_(struct gl_renderbuffer **ptr, + struct gl_renderbuffer *rb); -extern struct gl_renderbuffer * -_mesa_new_depthstencil_renderbuffer(struct gl_context *ctx, GLuint name); +static INLINE void +_mesa_reference_renderbuffer(struct gl_renderbuffer **ptr, + struct gl_renderbuffer *rb) +{ + if (*ptr != rb) + _mesa_reference_renderbuffer_(ptr, rb); +} + #endif /* RENDERBUFFER_H */ diff --git a/src/mesa/main/samplerobj.c b/src/mesa/main/samplerobj.c index f7774fdd7cb..8f8d87b90e8 100644 --- a/src/mesa/main/samplerobj.c +++ b/src/mesa/main/samplerobj.c @@ -251,7 +251,7 @@ _mesa_BindSampler(GLuint unit, GLuint sampler) struct gl_sampler_object *sampObj; GET_CURRENT_CONTEXT(ctx); - if (unit >= ctx->Const.MaxTextureImageUnits) { + if (unit >= ctx->Const.MaxCombinedTextureImageUnits) { _mesa_error(ctx, GL_INVALID_VALUE, "glBindSampler(unit %u)", unit); return; } diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index b58e30de9c4..8df25c3f988 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -1032,7 +1032,7 @@ validate_samplers(const struct gl_program *prog, char *errMsg) "TEXTURE_2D", "TEXTURE_1D", }; - GLint targetUsed[MAX_TEXTURE_IMAGE_UNITS]; + GLint targetUsed[MAX_COMBINED_TEXTURE_IMAGE_UNITS]; GLbitfield samplersUsed = prog->SamplersUsed; GLuint i; @@ -1050,7 +1050,7 @@ validate_samplers(const struct gl_program *prog, char *errMsg) gl_texture_index target; GLint sampler = _mesa_ffs(samplersUsed) - 1; assert(sampler >= 0); - assert(sampler < MAX_TEXTURE_IMAGE_UNITS); + assert(sampler < Elements(prog->SamplerUnits)); unit = prog->SamplerUnits[sampler]; target = prog->SamplerTargets[sampler]; if (targetUsed[unit] != -1 && targetUsed[unit] != (int) target) { diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c index 33d91ad594d..f128648f477 100644 --- a/src/mesa/main/shaderobj.c +++ b/src/mesa/main/shaderobj.c @@ -244,6 +244,8 @@ _mesa_init_shader_program(struct gl_context *ctx, struct gl_shader_program *prog prog->Geom.InputType = GL_TRIANGLES; prog->Geom.OutputType = GL_TRIANGLE_STRIP; #endif + + prog->InfoLog = ralloc_strdup(prog, ""); } /** @@ -283,6 +285,10 @@ _mesa_clear_shader_program_data(struct gl_context *ctx, _mesa_free_parameter_list(shProg->Varying); shProg->Varying = NULL; } + + assert(shProg->InfoLog != NULL); + ralloc_free(shProg->InfoLog); + shProg->InfoLog = ralloc_strdup(shProg, ""); } @@ -317,11 +323,6 @@ _mesa_free_shader_program_data(struct gl_context *ctx, shProg->Shaders = NULL; } - if (shProg->InfoLog) { - ralloc_free(shProg->InfoLog); - shProg->InfoLog = NULL; - } - /* Transform feedback varying vars */ for (i = 0; i < shProg->TransformFeedback.NumVarying; i++) { free(shProg->TransformFeedback.VaryingNames[i]); diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c index 4696dbb526f..7ad50bcaddc 100644 --- a/src/mesa/main/state.c +++ b/src/mesa/main/state.c @@ -192,7 +192,10 @@ update_arrays( struct gl_context *ctx ) static void update_program_enables(struct gl_context *ctx) { - /* These _Enabled flags indicate if the program is enabled AND valid. */ + /* These _Enabled flags indicate if the user-defined ARB/NV vertex/fragment + * program is enabled AND valid. Similarly for ATI fragment shaders. + * GLSL shaders not relevant here. + */ ctx->VertexProgram._Enabled = ctx->VertexProgram.Enabled && ctx->VertexProgram.Current->Base.Instructions; ctx->FragmentProgram._Enabled = ctx->FragmentProgram.Enabled @@ -203,11 +206,12 @@ update_program_enables(struct gl_context *ctx) /** - * Update vertex/fragment program state. In particular, update these fields: - * ctx->VertexProgram._Current - * ctx->VertexProgram._TnlProgram, - * These point to the highest priority enabled vertex/fragment program or are - * NULL if fixed-function processing is to be done. + * Update the ctx->Vertex/Geometry/FragmentProgram._Current pointers to point + * to the current/active programs. Then call ctx->Driver.BindProgram() to + * tell the driver which programs to use. + * + * Programs may come from 3 sources: GLSL shaders, ARB/NV_vertex/fragment + * programs or programs derived from fixed-function state. * * This function needs to be called after texture state validation in case * we're generating a fragment program from fixed-function texture state. @@ -243,34 +247,33 @@ update_program(struct gl_context *ctx) */ if (fsProg && fsProg->LinkStatus && fsProg->FragmentProgram) { - /* Use shader programs */ + /* Use GLSL fragment shader */ _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, fsProg->FragmentProgram); } else if (ctx->FragmentProgram._Enabled) { - /* use user-defined vertex program */ + /* Use user-defined fragment program */ _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, ctx->FragmentProgram.Current); } else if (ctx->FragmentProgram._MaintainTexEnvProgram) { - /* Use fragment program generated from fixed-function state. - */ + /* Use fragment program generated from fixed-function state */ _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, _mesa_get_fixed_func_fragment_program(ctx)); _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._TexEnvProgram, ctx->FragmentProgram._Current); } else { - /* no fragment program */ + /* No fragment program */ _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, NULL); } if (gsProg && gsProg->LinkStatus && gsProg->GeometryProgram) { - /* Use shader programs */ + /* Use GLSL geometry shader */ _mesa_reference_geomprog(ctx, &ctx->GeometryProgram._Current, gsProg->GeometryProgram); } else { - /* no fragment program */ + /* No geometry program */ _mesa_reference_geomprog(ctx, &ctx->GeometryProgram._Current, NULL); } @@ -279,18 +282,17 @@ update_program(struct gl_context *ctx) * fragprog inputs. */ if (vsProg && vsProg->LinkStatus && vsProg->VertexProgram) { - /* Use shader programs */ + /* Use GLSL vertex shader */ _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, - vsProg->VertexProgram); + vsProg->VertexProgram); } else if (ctx->VertexProgram._Enabled) { - /* use user-defined vertex program */ + /* Use user-defined vertex program */ _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, ctx->VertexProgram.Current); } else if (ctx->VertexProgram._MaintainTnlProgram) { - /* Use vertex program generated from fixed-function state. - */ + /* Use vertex program generated from fixed-function state */ _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, _mesa_get_fixed_func_vertex_program(ctx)); _mesa_reference_vertprog(ctx, &ctx->VertexProgram._TnlProgram, @@ -416,29 +418,44 @@ update_color(struct gl_context *ctx) ctx->Color._LogicOpEnabled = _mesa_rgba_logicop_enabled(ctx); } + +/** + * Update the ctx->Color._ClampFragmentColor field + */ static void update_clamp_fragment_color(struct gl_context *ctx) { - if(ctx->Color.ClampFragmentColor == GL_FIXED_ONLY_ARB) - ctx->Color._ClampFragmentColor = !ctx->DrawBuffer || !ctx->DrawBuffer->Visual.floatMode; + if (ctx->Color.ClampFragmentColor == GL_FIXED_ONLY_ARB) + ctx->Color._ClampFragmentColor = + !ctx->DrawBuffer || !ctx->DrawBuffer->Visual.floatMode; else ctx->Color._ClampFragmentColor = ctx->Color.ClampFragmentColor; } + +/** + * Update the ctx->Color._ClampVertexColor field + */ static void update_clamp_vertex_color(struct gl_context *ctx) { - if(ctx->Light.ClampVertexColor == GL_FIXED_ONLY_ARB) - ctx->Light._ClampVertexColor = !ctx->DrawBuffer || !ctx->DrawBuffer->Visual.floatMode; + if (ctx->Light.ClampVertexColor == GL_FIXED_ONLY_ARB) + ctx->Light._ClampVertexColor = + !ctx->DrawBuffer || !ctx->DrawBuffer->Visual.floatMode; else ctx->Light._ClampVertexColor = ctx->Light.ClampVertexColor; } + +/** + * Update the ctx->Color._ClampReadColor field + */ static void update_clamp_read_color(struct gl_context *ctx) { - if(ctx->Color.ClampReadColor == GL_FIXED_ONLY_ARB) - ctx->Color._ClampReadColor = !ctx->ReadBuffer || !ctx->ReadBuffer->Visual.floatMode; + if (ctx->Color.ClampReadColor == GL_FIXED_ONLY_ARB) + ctx->Color._ClampReadColor = + !ctx->ReadBuffer || !ctx->ReadBuffer->Visual.floatMode; else ctx->Color._ClampReadColor = ctx->Color.ClampReadColor; } diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c index d820ae92747..42bd1eee5ca 100644 --- a/src/mesa/main/texcompress.c +++ b/src/mesa/main/texcompress.c @@ -40,19 +40,192 @@ /** + * Get the GL base format of a specified GL compressed texture format + * + * From page 232 of the OpenGL 3.3 (Compatiblity Profile) spec: + * + * "Compressed Internal Format Base Internal Format Type + * --------------------------- -------------------- --------- + * COMPRESSED_ALPHA ALPHA Generic + * COMPRESSED_LUMINANCE LUMINANCE Generic + * COMPRESSED_LUMINANCE_ALPHA LUMINANCE_ALPHA Generic + * COMPRESSED_INTENSITY INTENSITY Generic + * COMPRESSED_RED RED Generic + * COMPRESSED_RG RG Generic + * COMPRESSED_RGB RGB Generic + * COMPRESSED_RGBA RGBA Generic + * COMPRESSED_SRGB RGB Generic + * COMPRESSED_SRGB_ALPHA RGBA Generic + * COMPRESSED_SLUMINANCE LUMINANCE Generic + * COMPRESSED_SLUMINANCE_ALPHA LUMINANCE_ALPHA Generic + * COMPRESSED_RED_RGTC1 RED Specific + * COMPRESSED_SIGNED_RED_RGTC1 RED Specific + * COMPRESSED_RG_RGTC2 RG Specific + * COMPRESSED_SIGNED_RG_RGTC2 RG Specific" + * + * \return + * The base format of \c format if \c format is a compressed format (either + * generic or specific. Otherwise 0 is returned. + */ +GLenum +_mesa_gl_compressed_format_base_format(GLenum format) +{ + switch (format) { + case GL_COMPRESSED_RED: + case GL_COMPRESSED_RED_RGTC1: + case GL_COMPRESSED_SIGNED_RED_RGTC1: + return GL_RED; + + case GL_COMPRESSED_RG: + case GL_COMPRESSED_RG_RGTC2: + case GL_COMPRESSED_SIGNED_RG_RGTC2: + return GL_RG; + + case GL_COMPRESSED_RGB: + case GL_COMPRESSED_SRGB: + case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: + case GL_COMPRESSED_RGB_FXT1_3DFX: + case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT: + return GL_RGB; + + case GL_COMPRESSED_RGBA: + case GL_COMPRESSED_SRGB_ALPHA: + case GL_COMPRESSED_RGBA_BPTC_UNORM_ARB: + case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_ARB: + case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB: + case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT_ARB: + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: + case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: + case GL_COMPRESSED_RGBA_FXT1_3DFX: + case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: + case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: + case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: + return GL_RGBA; + + case GL_COMPRESSED_ALPHA: + return GL_ALPHA; + + case GL_COMPRESSED_LUMINANCE: + case GL_COMPRESSED_SLUMINANCE: + case GL_COMPRESSED_LUMINANCE_LATC1_EXT: + case GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT: + return GL_LUMINANCE; + + case GL_COMPRESSED_LUMINANCE_ALPHA: + case GL_COMPRESSED_SLUMINANCE_ALPHA: + case GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT: + case GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT: + case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI: + return GL_LUMINANCE_ALPHA; + + case GL_COMPRESSED_INTENSITY: + return GL_INTENSITY; + + default: + return 0; + } +} + +/** * Return list of (and count of) all specific texture compression * formats that are supported. * + * Some formats are \b not returned by this function. The + * \c GL_COMPRESSED_TEXTURE_FORMATS query only returns formats that are + * "suitable for general-purpose usage." All texture compression extensions + * have taken this to mean either linear RGB or linear RGBA. + * + * The GL_ARB_texture_compress_rgtc spec says: + * + * "19) Should the GL_NUM_COMPRESSED_TEXTURE_FORMATS and + * GL_COMPRESSED_TEXTURE_FORMATS queries return the RGTC formats? + * + * RESOLVED: No. + * + * The OpenGL 2.1 specification says "The only values returned + * by this query [GL_COMPRESSED_TEXTURE_FORMATS"] are those + * corresponding to formats suitable for general-purpose usage. + * The renderer will not enumerate formats with restrictions that + * need to be specifically understood prior to use." + * + * Compressed textures with just red or red-green components are + * not general-purpose so should not be returned by these queries + * because they have restrictions. + * + * Applications that seek to use the RGTC formats should do so + * by looking for this extension's name in the string returned by + * glGetString(GL_EXTENSIONS) rather than + * what GL_NUM_COMPRESSED_TEXTURE_FORMATS and + * GL_COMPRESSED_TEXTURE_FORMATS return." + * + * There is nearly identical wording in the GL_EXT_texture_compression_rgtc + * spec. + * + * The GL_EXT_texture_rRGB spec says: + * + * "22) Should the new COMPRESSED_SRGB_* formats be listed in an + * implementation's GL_COMPRESSED_TEXTURE_FORMATS list? + * + * RESOLVED: No. Section 3.8.1 says formats listed by + * GL_COMPRESSED_TEXTURE_FORMATS are "suitable for general-purpose + * usage." The non-linear distribution of red, green, and + * blue for these sRGB compressed formats makes them not really + * general-purpose." + * + * The GL_EXT_texture_compression_latc spec says: + * + * "16) Should the GL_NUM_COMPRESSED_TEXTURE_FORMATS and + * GL_COMPRESSED_TEXTURE_FORMATS queries return the LATC formats? + * + * RESOLVED: No. + * + * The OpenGL 2.1 specification says "The only values returned + * by this query [GL_COMPRESSED_TEXTURE_FORMATS"] are those + * corresponding to formats suitable for general-purpose usage. + * The renderer will not enumerate formats with restrictions that + * need to be specifically understood prior to use." + * + * Historically, OpenGL implementation have advertised the RGB and + * RGBA versions of the S3TC extensions compressed format tokens + * through this mechanism. + * + * The specification is not sufficiently clear about what "suitable + * for general-purpose usage" means. Historically that seems to mean + * unsigned RGB or unsigned RGBA. The DXT1 format supporting alpha + * (GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) is not exposed in the list (at + * least for NVIDIA drivers) because the alpha is always 1.0 expect + * when it is 0.0 when RGB is required to be black. NVIDIA's even + * limits itself to true linear RGB or RGBA formats, specifically + * not including EXT_texture_sRGB's sRGB S3TC compressed formats. + * + * Adding luminance and luminance-alpha texture formats (and + * certainly signed versions of luminance and luminance-alpha + * formats!) invites potential comptaibility problems with old + * applications using this mechanism since old applications are + * unlikely to expect non-RGB or non-RGBA formats to be advertised + * through this mechanism. However no specific misinteractions + * with old applications is known. + * + * Applications that seek to use the LATC formats should do so + * by looking for this extension's name in the string returned by + * glGetString(GL_EXTENSIONS) rather than + * what GL_NUM_COMPRESSED_TEXTURE_FORMATS and + * GL_COMPRESSED_TEXTURE_FORMATS return." + * + * There is no formal spec for GL_ATI_texture_compression_3dc. Since the + * formats added by this extension are luminance-alpha formats, it is + * reasonable to expect them to follow the same rules as + * GL_EXT_texture_compression_latc. At the very least, Catalyst 11.6 does not + * expose the 3dc formats through this mechanism. + * * \param ctx the GL context * \param formats the resulting format list (may be NULL). - * \param all if true return all formats, even those with some kind - * of restrictions/limitations (See GL_ARB_texture_compression - * spec for more info). * * \return number of formats. */ GLuint -_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean all) +_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats) { GLuint n = 0; if (ctx->Extensions.TDFX_texture_compression_FXT1) { @@ -64,24 +237,15 @@ _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean a n += 2; } } - /* don't return RGTC - ARB_texture_compression_rgtc query 19 */ + if (ctx->Extensions.EXT_texture_compression_s3tc) { if (formats) { formats[n++] = GL_COMPRESSED_RGB_S3TC_DXT1_EXT; - /* This format has some restrictions/limitations and so should - * not be returned via the GL_COMPRESSED_TEXTURE_FORMATS query. - * Specifically, all transparent pixels become black. NVIDIA - * omits this format too. - */ - if (all) - formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT; formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT; formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT; } else { n += 3; - if (all) - n += 1; } } if (ctx->Extensions.S3_s3tc) { @@ -95,19 +259,6 @@ _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean a n += 4; } } -#if FEATURE_EXT_texture_sRGB - if (ctx->Extensions.EXT_texture_sRGB) { - if (formats) { - formats[n++] = GL_COMPRESSED_SRGB_S3TC_DXT1_EXT; - formats[n++] = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT; - formats[n++] = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT; - formats[n++] = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT; - } - else { - n += 4; - } - } -#endif /* FEATURE_EXT_texture_sRGB */ return n; #if FEATURE_ES1 || FEATURE_ES2 diff --git a/src/mesa/main/texcompress.h b/src/mesa/main/texcompress.h index 19b08bbadf6..375cf90c8a2 100644 --- a/src/mesa/main/texcompress.h +++ b/src/mesa/main/texcompress.h @@ -33,8 +33,11 @@ struct gl_context; #if _HAVE_FULL_GL +extern GLenum +_mesa_gl_compressed_format_base_format(GLenum format); + extern GLuint -_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean all); +_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats); extern gl_format _mesa_glenum_to_compressed_format(GLenum format); diff --git a/src/mesa/main/texenv.c b/src/mesa/main/texenv.c index 9228e354a4d..c0d0f3779b2 100644 --- a/src/mesa/main/texenv.c +++ b/src/mesa/main/texenv.c @@ -419,7 +419,7 @@ _mesa_TexEnvfv( GLenum target, GLenum pname, const GLfloat *param ) ASSERT_OUTSIDE_BEGIN_END(ctx); maxUnit = (target == GL_POINT_SPRITE_NV && pname == GL_COORD_REPLACE_NV) - ? ctx->Const.MaxTextureCoordUnits : ctx->Const.MaxTextureImageUnits; + ? ctx->Const.MaxTextureCoordUnits : ctx->Const.MaxCombinedTextureImageUnits; if (ctx->Texture.CurrentUnit >= maxUnit) { _mesa_error(ctx, GL_INVALID_OPERATION, "glTexEnvfv(current unit)"); return; @@ -748,7 +748,7 @@ _mesa_GetTexEnvfv( GLenum target, GLenum pname, GLfloat *params ) ASSERT_OUTSIDE_BEGIN_END(ctx); maxUnit = (target == GL_POINT_SPRITE_NV && pname == GL_COORD_REPLACE_NV) - ? ctx->Const.MaxTextureCoordUnits : ctx->Const.MaxTextureImageUnits; + ? ctx->Const.MaxTextureCoordUnits : ctx->Const.MaxCombinedTextureImageUnits; if (ctx->Texture.CurrentUnit >= maxUnit) { _mesa_error(ctx, GL_INVALID_OPERATION, "glGetTexEnvfv(current unit)"); return; @@ -817,7 +817,7 @@ _mesa_GetTexEnviv( GLenum target, GLenum pname, GLint *params ) ASSERT_OUTSIDE_BEGIN_END(ctx); maxUnit = (target == GL_POINT_SPRITE_NV && pname == GL_COORD_REPLACE_NV) - ? ctx->Const.MaxTextureCoordUnits : ctx->Const.MaxTextureImageUnits; + ? ctx->Const.MaxTextureCoordUnits : ctx->Const.MaxCombinedTextureImageUnits; if (ctx->Texture.CurrentUnit >= maxUnit) { _mesa_error(ctx, GL_INVALID_OPERATION, "glGetTexEnviv(current unit)"); return; diff --git a/src/mesa/main/texfetch.c b/src/mesa/main/texfetch.c index 6716ce1b071..72283eb68af 100644 --- a/src/mesa/main/texfetch.c +++ b/src/mesa/main/texfetch.c @@ -913,6 +913,20 @@ texfetch_funcs[MESA_FORMAT_COUNT] = fetch_texel_2d_r11_g11_b10f, fetch_texel_3d_r11_g11_b10f, store_texel_r11_g11_b10f + }, + { + MESA_FORMAT_Z32_FLOAT, + fetch_texel_1d_f_r_f32, /* Reuse the R32F functions. */ + fetch_texel_2d_f_r_f32, + fetch_texel_3d_f_r_f32, + store_texel_r_f32 + }, + { + MESA_FORMAT_Z32_FLOAT_X24S8, + fetch_texel_1d_z32f_x24s8, + fetch_texel_2d_z32f_x24s8, + fetch_texel_3d_z32f_x24s8, + store_texel_z32f_x24s8 } }; diff --git a/src/mesa/main/texfetch_tmp.h b/src/mesa/main/texfetch_tmp.h index e6fd81d4d57..d170adf2e00 100644 --- a/src/mesa/main/texfetch_tmp.h +++ b/src/mesa/main/texfetch_tmp.h @@ -2287,7 +2287,8 @@ static void FETCH(f_z24_s8)( const struct gl_texture_image *texImage, const GLuint *src = TEXEL_ADDR(GLuint, texImage, i, j, k, 1); const GLfloat scale = 1.0F / (GLfloat) 0xffffff; texel[0] = ((*src) >> 8) * scale; - ASSERT(texImage->TexFormat == MESA_FORMAT_Z24_S8); + ASSERT(texImage->TexFormat == MESA_FORMAT_Z24_S8 || + texImage->TexFormat == MESA_FORMAT_Z24_X8); ASSERT(texel[0] >= 0.0F); ASSERT(texel[0] <= 1.0F); } @@ -2314,7 +2315,8 @@ static void FETCH(f_s8_z24)( const struct gl_texture_image *texImage, const GLuint *src = TEXEL_ADDR(GLuint, texImage, i, j, k, 1); const GLfloat scale = 1.0F / (GLfloat) 0xffffff; texel[0] = ((*src) & 0x00ffffff) * scale; - ASSERT(texImage->TexFormat == MESA_FORMAT_S8_Z24); + ASSERT(texImage->TexFormat == MESA_FORMAT_S8_Z24 || + texImage->TexFormat == MESA_FORMAT_X8_Z24); ASSERT(texel[0] >= 0.0F); ASSERT(texel[0] <= 1.0F); } @@ -2374,6 +2376,29 @@ static void store_texel_r11_g11_b10f(struct gl_texture_image *texImage, #endif +/* MESA_FORMAT_Z32_FLOAT_X24S8 ***********************************************/ + +static void FETCH(z32f_x24s8)(const struct gl_texture_image *texImage, + GLint i, GLint j, GLint k, GLfloat *texel) +{ + const GLfloat *src = TEXEL_ADDR(GLfloat, texImage, i, j, k, 2); + texel[RCOMP] = src[0]; + texel[GCOMP] = 0.0F; + texel[BCOMP] = 0.0F; + texel[ACOMP] = 1.0F; +} + +#if DIM == 3 +static void store_texel_z32f_x24s8(struct gl_texture_image *texImage, + GLint i, GLint j, GLint k, const void *texel) +{ + const GLfloat *src = (const GLfloat *) texel; + GLfloat *dst = TEXEL_ADDR(GLfloat, texImage, i, j, k, 2); + dst[0] = src[0]; +} +#endif + + #undef TEXEL_ADDR #undef DIM #undef FETCH diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c index 8cbb021d8b0..c919a74e047 100644 --- a/src/mesa/main/texformat.c +++ b/src/mesa/main/texformat.c @@ -416,6 +416,19 @@ _mesa_choose_tex_format( struct gl_context *ctx, GLint internalFormat, } } + if (ctx->Extensions.ARB_depth_buffer_float) { + switch (internalFormat) { + case GL_DEPTH_COMPONENT32F: + ASSERT(ctx->TextureFormatSupported[MESA_FORMAT_Z32_FLOAT]); + return MESA_FORMAT_Z32_FLOAT; + case GL_DEPTH32F_STENCIL8: + ASSERT(ctx->TextureFormatSupported[MESA_FORMAT_Z32_FLOAT_X24S8]); + return MESA_FORMAT_Z32_FLOAT_X24S8; + default: + ; /* fallthrough */ + } + } + if (ctx->Extensions.ATI_envmap_bumpmap) { switch (internalFormat) { case GL_DUDV_ATI: diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 0827cb883e8..27717cfb0f5 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -1,6 +1,5 @@ /* - * mesa 3-D graphics library - * Version: 7.6 + * Mesa 3-D graphics library * * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. * Copyright (C) 2009 VMware, Inc. All Rights Reserved. @@ -556,8 +555,6 @@ _mesa_tex_target_to_face(GLenum target) * \param target texture target. * \param level image level. * \param texImage texture image. - * - * This was basically prompted by the introduction of cube maps. */ void _mesa_set_tex_image(struct gl_texture_object *tObj, @@ -709,15 +706,13 @@ get_proxy_target(GLenum target) /** * Get the texture object that corresponds to the target of the given - * texture unit. + * texture unit. The target should have already been checked for validity. * * \param ctx GL context. * \param texUnit texture unit. * \param target texture target. * * \return pointer to the texture object on success, or NULL on failure. - * - * \sa gl_texture_unit. */ struct gl_texture_object * _mesa_select_tex_object(struct gl_context *ctx, @@ -1685,11 +1680,15 @@ texture_error_check( struct gl_context *ctx, /* additional checks for depth textures */ if (_mesa_base_tex_format(ctx, internalFormat) == GL_DEPTH_COMPONENT) { - /* Only 1D, 2D and rectangular textures supported, not 3D or cubes */ + /* Only 1D, 2D, rect and array textures supported, not 3D or cubes */ if (target != GL_TEXTURE_1D && target != GL_PROXY_TEXTURE_1D && target != GL_TEXTURE_2D && target != GL_PROXY_TEXTURE_2D && + target != GL_TEXTURE_1D_ARRAY && + target != GL_PROXY_TEXTURE_1D_ARRAY && + target != GL_TEXTURE_2D_ARRAY && + target != GL_PROXY_TEXTURE_2D_ARRAY && target != GL_TEXTURE_RECTANGLE_ARB && target != GL_PROXY_TEXTURE_RECTANGLE_ARB) { if (!isProxy) @@ -2793,29 +2792,43 @@ copyteximage(struct gl_context *ctx, GLuint dims, _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims); } else { - gl_format texFormat; - - if (texImage->Data) { - ctx->Driver.FreeTexImageData( ctx, texImage ); - } + /* choose actual hw format */ + gl_format texFormat = _mesa_choose_texture_format(ctx, texObj, + target, level, + internalFormat, + GL_NONE, GL_NONE); - ASSERT(texImage->Data == NULL); + if (legal_texture_size(ctx, texFormat, width, height, 1)) { + GLint srcX = x, srcY = y, dstX = 0, dstY = 0; - texFormat = _mesa_choose_texture_format(ctx, texObj, target, level, - internalFormat, GL_NONE, - GL_NONE); + /* Free old texture image */ + ctx->Driver.FreeTexImageData(ctx, texImage); - if (legal_texture_size(ctx, texFormat, width, height, 1)) { _mesa_init_teximage_fields(ctx, target, texImage, width, height, 1, border, internalFormat, texFormat); - ASSERT(ctx->Driver.CopyTexImage2D); - if (dims == 1) - ctx->Driver.CopyTexImage1D(ctx, target, level, internalFormat, - x, y, width, border); - else - ctx->Driver.CopyTexImage2D(ctx, target, level, internalFormat, - x, y, width, height, border); + /* Allocate texture memory (no pixel data yet) */ + if (dims == 1) { + ctx->Driver.TexImage1D(ctx, target, level, internalFormat, + width, border, GL_NONE, GL_NONE, NULL, + &ctx->Unpack, texObj, texImage); + } + else { + ctx->Driver.TexImage2D(ctx, target, level, internalFormat, + width, height, border, GL_NONE, GL_NONE, + NULL, &ctx->Unpack, texObj, texImage); + } + + if (_mesa_clip_copytexsubimage(ctx, &dstX, &dstY, &srcX, &srcY, + &width, &height)) { + if (dims == 1) + ctx->Driver.CopyTexSubImage1D(ctx, target, level, dstX, + srcX, srcY, width); + + else + ctx->Driver.CopyTexSubImage2D(ctx, target, level, dstX, dstY, + srcX, srcY, width, height); + } check_gen_mipmap(ctx, target, texObj, level); @@ -2826,6 +2839,7 @@ copyteximage(struct gl_context *ctx, GLuint dims, ctx->NewState |= _NEW_TEXTURE; } else { + /* probably too large of image */ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims); } } @@ -3270,7 +3284,7 @@ compressedteximage(struct gl_context *ctx, GLuint dims, border, imageSize, &reason); if (error) { - _mesa_error(ctx, error, "glTexImage2D(%s)", reason); + _mesa_error(ctx, error, "glCompressedTexImage%uD(%s)", dims, reason); return; } diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c index fdf12817c9a..3021716a0b6 100644 --- a/src/mesa/main/texobj.c +++ b/src/mesa/main/texobj.c @@ -325,16 +325,14 @@ valid_texture_object(const struct gl_texture_object *tex) * Reference (or unreference) a texture object. * If '*ptr', decrement *ptr's refcount (and delete if it becomes zero). * If 'tex' is non-null, increment its refcount. + * This is normally only called from the _mesa_reference_texobj() macro + * when there's a real pointer change. */ void -_mesa_reference_texobj(struct gl_texture_object **ptr, - struct gl_texture_object *tex) +_mesa_reference_texobj_(struct gl_texture_object **ptr, + struct gl_texture_object *tex) { assert(ptr); - if (*ptr == tex) { - /* no change */ - return; - } if (*ptr) { /* Unreference the old texture */ @@ -879,6 +877,8 @@ unbind_texobj_from_fbo(struct gl_context *ctx, for (j = 0; j < BUFFER_COUNT; j++) { if (fb->Attachment[j].Type == GL_TEXTURE && fb->Attachment[j].Texture == texObj) { + /* Vertices are already flushed by _mesa_DeleteTextures */ + ctx->NewState |= _NEW_BUFFERS; _mesa_remove_attachment(ctx, fb->Attachment + j); } } @@ -897,7 +897,7 @@ unbind_texobj_from_texunits(struct gl_context *ctx, { GLuint u, tex; - for (u = 0; u < MAX_TEXTURE_IMAGE_UNITS; u++) { + for (u = 0; u < Elements(ctx->Texture.Unit); u++) { struct gl_texture_unit *unit = &ctx->Texture.Unit[u]; for (tex = 0; tex < NUM_TEXTURE_TARGETS; tex++) { if (texObj == unit->CurrentTex[tex]) { diff --git a/src/mesa/main/texobj.h b/src/mesa/main/texobj.h index 476a17537a3..1faae6f7e30 100644 --- a/src/mesa/main/texobj.h +++ b/src/mesa/main/texobj.h @@ -32,6 +32,7 @@ #define TEXTOBJ_H +#include "compiler.h" #include "glheader.h" struct gl_context; @@ -64,8 +65,17 @@ _mesa_clear_texture_object(struct gl_context *ctx, struct gl_texture_object *obj); extern void +_mesa_reference_texobj_(struct gl_texture_object **ptr, + struct gl_texture_object *tex); + +static INLINE void _mesa_reference_texobj(struct gl_texture_object **ptr, - struct gl_texture_object *tex); + struct gl_texture_object *tex) +{ + if (*ptr != tex) + _mesa_reference_texobj_(ptr, tex); +} + extern void _mesa_test_texobj_completeness( const struct gl_context *ctx, diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index 4b9dcb5d3b5..bbbb306b2d9 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -888,7 +888,7 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level, texObj = _mesa_select_tex_object(ctx, texUnit, target); img = _mesa_select_tex_image(ctx, texObj, target, level); - if (!img || !img->TexFormat) { + if (!img || img->TexFormat == MESA_FORMAT_NONE) { /* undefined texture image */ if (pname == GL_TEXTURE_COMPONENTS) *params = 1; @@ -915,9 +915,23 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level, *params = _mesa_compressed_format_to_glenum(ctx, texFormat); } else { - /* return the user's requested internal format */ - *params = img->InternalFormat; - } + /* If the true internal format is not compressed but the user + * requested a generic compressed format, we have to return the + * generic base format that matches. + * + * From page 119 (page 129 of the PDF) of the OpenGL 1.3 spec: + * + * "If no specific compressed format is available, + * internalformat is instead replaced by the corresponding base + * internal format." + * + * Otherwise just return the user's requested internal format + */ + const GLenum f = + _mesa_gl_compressed_format_base_format(img->InternalFormat); + + *params = (f != 0) ? f : img->InternalFormat; + } break; case GL_TEXTURE_BORDER: *params = img->Border; @@ -980,28 +994,21 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level, *params = 0; break; case GL_TEXTURE_DEPTH_SIZE_ARB: - if (ctx->Extensions.ARB_depth_texture) - *params = _mesa_get_format_bits(texFormat, pname); - else + if (!ctx->Extensions.ARB_depth_texture) goto invalid_pname; + *params = _mesa_get_format_bits(texFormat, pname); break; case GL_TEXTURE_STENCIL_SIZE_EXT: - if (ctx->Extensions.EXT_packed_depth_stencil || - ctx->Extensions.ARB_framebuffer_object) { - *params = _mesa_get_format_bits(texFormat, pname); - } - else { + if (!ctx->Extensions.EXT_packed_depth_stencil && + !ctx->Extensions.ARB_framebuffer_object) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, pname); break; case GL_TEXTURE_SHARED_SIZE: - if (ctx->VersionMajor >= 3 || - ctx->Extensions.EXT_texture_shared_exponent) { - *params = texFormat == MESA_FORMAT_RGB9_E5_FLOAT ? 5 : 0; - } - else { + if (ctx->VersionMajor < 3 && + !ctx->Extensions.EXT_texture_shared_exponent) goto invalid_pname; - } + *params = texFormat == MESA_FORMAT_RGB9_E5_FLOAT ? 5 : 0; break; /* GL_ARB_texture_compression */ @@ -1022,67 +1029,46 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level, /* GL_ARB_texture_float */ case GL_TEXTURE_RED_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_RED_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_RED_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_GREEN_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_GREEN_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_GREEN_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_BLUE_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_BLUE_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_BLUE_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_ALPHA_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_ALPHA_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_ALPHA_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_LUMINANCE_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_LUMINANCE_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_LUMINANCE_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_INTENSITY_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_INTENSITY_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_INTENSITY_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_DEPTH_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_DEPTH_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_DEPTH_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; default: @@ -1104,7 +1090,6 @@ void GLAPIENTRY _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) { struct gl_texture_object *obj; - GLboolean error = GL_FALSE; GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); @@ -1130,17 +1115,15 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) *params = ENUM_TO_FLOAT(obj->Sampler.WrapR); break; case GL_TEXTURE_BORDER_COLOR: - if(ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP)) + if (ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP)) _mesa_update_state_locked(ctx); - if(ctx->Color._ClampFragmentColor) - { + if (ctx->Color._ClampFragmentColor) { params[0] = CLAMP(obj->Sampler.BorderColor.f[0], 0.0F, 1.0F); params[1] = CLAMP(obj->Sampler.BorderColor.f[1], 0.0F, 1.0F); params[2] = CLAMP(obj->Sampler.BorderColor.f[2], 0.0F, 1.0F); params[3] = CLAMP(obj->Sampler.BorderColor.f[3], 0.0F, 1.0F); } - else - { + else { params[0] = obj->Sampler.BorderColor.f[0]; params[1] = obj->Sampler.BorderColor.f[1]; params[2] = obj->Sampler.BorderColor.f[2]; @@ -1148,14 +1131,8 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) } break; case GL_TEXTURE_RESIDENT: - { - GLboolean resident; - if (ctx->Driver.IsTextureResident) - resident = ctx->Driver.IsTextureResident(ctx, obj); - else - resident = GL_TRUE; - *params = ENUM_TO_FLOAT(resident); - } + *params = ctx->Driver.IsTextureResident ? + ctx->Driver.IsTextureResident(ctx, obj) : 1.0F; break; case GL_TEXTURE_PRIORITY: *params = obj->Priority; @@ -1173,49 +1150,37 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) *params = (GLfloat) obj->MaxLevel; break; case GL_TEXTURE_MAX_ANISOTROPY_EXT: - if (ctx->Extensions.EXT_texture_filter_anisotropic) { - *params = obj->Sampler.MaxAnisotropy; - } - else - error = GL_TRUE; + if (!ctx->Extensions.EXT_texture_filter_anisotropic) + goto invalid_pname; + *params = obj->Sampler.MaxAnisotropy; break; case GL_TEXTURE_COMPARE_FAIL_VALUE_ARB: - if (ctx->Extensions.ARB_shadow_ambient) { - *params = obj->Sampler.CompareFailValue; - } - else - error = GL_TRUE; + if (!ctx->Extensions.ARB_shadow_ambient) + goto invalid_pname; + *params = obj->Sampler.CompareFailValue; break; case GL_GENERATE_MIPMAP_SGIS: *params = (GLfloat) obj->GenerateMipmap; break; case GL_TEXTURE_COMPARE_MODE_ARB: - if (ctx->Extensions.ARB_shadow) { - *params = (GLfloat) obj->Sampler.CompareMode; - } - else - error = GL_TRUE; + if (!ctx->Extensions.ARB_shadow) + goto invalid_pname; + *params = (GLfloat) obj->Sampler.CompareMode; break; case GL_TEXTURE_COMPARE_FUNC_ARB: - if (ctx->Extensions.ARB_shadow) { - *params = (GLfloat) obj->Sampler.CompareFunc; - } - else - error = GL_TRUE; + if (!ctx->Extensions.ARB_shadow) + goto invalid_pname; + *params = (GLfloat) obj->Sampler.CompareFunc; break; case GL_DEPTH_TEXTURE_MODE_ARB: - if (ctx->Extensions.ARB_depth_texture) { - *params = (GLfloat) obj->Sampler.DepthMode; - } - else - error = GL_TRUE; + if (!ctx->Extensions.ARB_depth_texture) + goto invalid_pname; + *params = (GLfloat) obj->Sampler.DepthMode; break; case GL_TEXTURE_LOD_BIAS: - if (ctx->Extensions.EXT_texture_lod_bias) { - *params = obj->Sampler.LodBias; - } - else - error = GL_TRUE; + if (!ctx->Extensions.EXT_texture_lod_bias) + goto invalid_pname; + *params = obj->Sampler.LodBias; break; #if FEATURE_OES_draw_texture case GL_TEXTURE_CROP_RECT_OES: @@ -1230,45 +1195,40 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) case GL_TEXTURE_SWIZZLE_G_EXT: case GL_TEXTURE_SWIZZLE_B_EXT: case GL_TEXTURE_SWIZZLE_A_EXT: - if (ctx->Extensions.EXT_texture_swizzle) { - GLuint comp = pname - GL_TEXTURE_SWIZZLE_R_EXT; - *params = (GLfloat) obj->Swizzle[comp]; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.EXT_texture_swizzle) + goto invalid_pname; + *params = (GLfloat) obj->Swizzle[pname - GL_TEXTURE_SWIZZLE_R_EXT]; break; case GL_TEXTURE_SWIZZLE_RGBA_EXT: - if (ctx->Extensions.EXT_texture_swizzle) { + if (!ctx->Extensions.EXT_texture_swizzle) { + goto invalid_pname; + } + else { GLuint comp; for (comp = 0; comp < 4; comp++) { params[comp] = (GLfloat) obj->Swizzle[comp]; } } - else { - error = GL_TRUE; - } break; case GL_TEXTURE_CUBE_MAP_SEAMLESS: - if (ctx->Extensions.AMD_seamless_cubemap_per_texture) { + if (!ctx->Extensions.AMD_seamless_cubemap_per_texture) + goto invalid_pname; *params = (GLfloat) obj->Sampler.CubeMapSeamless; - } - else { - error = GL_TRUE; - } + break; default: - error = GL_TRUE; - break; + goto invalid_pname; } - if (error) - _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameterfv(pname=0x%x)", - pname); + /* no error if we get here */ + _mesa_unlock_texture(ctx, obj); + return; +invalid_pname: _mesa_unlock_texture(ctx, obj); + _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameterfv(pname=0x%x)", pname); } @@ -1276,13 +1236,12 @@ void GLAPIENTRY _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params ) { struct gl_texture_object *obj; - GLboolean error = GL_FALSE; GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); - obj = get_texobj(ctx, target, GL_TRUE); - if (!obj) - return; + obj = get_texobj(ctx, target, GL_TRUE); + if (!obj) + return; _mesa_lock_texture(ctx, obj); switch (pname) { @@ -1315,14 +1274,8 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params ) } break;; case GL_TEXTURE_RESIDENT: - { - GLboolean resident; - if (ctx->Driver.IsTextureResident) - resident = ctx->Driver.IsTextureResident(ctx, obj); - else - resident = GL_TRUE; - *params = (GLint) resident; - } + *params = ctx->Driver.IsTextureResident ? + ctx->Driver.IsTextureResident(ctx, obj) : 1; break;; case GL_TEXTURE_PRIORITY: *params = FLOAT_TO_INT(obj->Priority); @@ -1340,55 +1293,37 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params ) *params = obj->MaxLevel; break;; case GL_TEXTURE_MAX_ANISOTROPY_EXT: - if (ctx->Extensions.EXT_texture_filter_anisotropic) { - *params = (GLint) obj->Sampler.MaxAnisotropy; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.EXT_texture_filter_anisotropic) + goto invalid_pname; + *params = (GLint) obj->Sampler.MaxAnisotropy; break; case GL_TEXTURE_COMPARE_FAIL_VALUE_ARB: - if (ctx->Extensions.ARB_shadow_ambient) { - *params = (GLint) FLOAT_TO_INT(obj->Sampler.CompareFailValue); - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.ARB_shadow_ambient) + goto invalid_pname; + *params = (GLint) FLOAT_TO_INT(obj->Sampler.CompareFailValue); break; case GL_GENERATE_MIPMAP_SGIS: *params = (GLint) obj->GenerateMipmap; break; case GL_TEXTURE_COMPARE_MODE_ARB: - if (ctx->Extensions.ARB_shadow) { - *params = (GLint) obj->Sampler.CompareMode; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.ARB_shadow) + goto invalid_pname; + *params = (GLint) obj->Sampler.CompareMode; break; case GL_TEXTURE_COMPARE_FUNC_ARB: - if (ctx->Extensions.ARB_shadow) { - *params = (GLint) obj->Sampler.CompareFunc; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.ARB_shadow) + goto invalid_pname; + *params = (GLint) obj->Sampler.CompareFunc; break; case GL_DEPTH_TEXTURE_MODE_ARB: - if (ctx->Extensions.ARB_depth_texture) { - *params = (GLint) obj->Sampler.DepthMode; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.ARB_depth_texture) + goto invalid_pname; + *params = (GLint) obj->Sampler.DepthMode; break; case GL_TEXTURE_LOD_BIAS: - if (ctx->Extensions.EXT_texture_lod_bias) { - *params = (GLint) obj->Sampler.LodBias; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.EXT_texture_lod_bias) + goto invalid_pname; + *params = (GLint) obj->Sampler.LodBias; break; #if FEATURE_OES_draw_texture case GL_TEXTURE_CROP_RECT_OES: @@ -1402,41 +1337,34 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params ) case GL_TEXTURE_SWIZZLE_G_EXT: case GL_TEXTURE_SWIZZLE_B_EXT: case GL_TEXTURE_SWIZZLE_A_EXT: - if (ctx->Extensions.EXT_texture_swizzle) { - GLuint comp = pname - GL_TEXTURE_SWIZZLE_R_EXT; - *params = obj->Swizzle[comp]; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.EXT_texture_swizzle) + goto invalid_pname; + *params = obj->Swizzle[pname - GL_TEXTURE_SWIZZLE_R_EXT]; break; case GL_TEXTURE_SWIZZLE_RGBA_EXT: - if (ctx->Extensions.EXT_texture_swizzle) { - COPY_4V(params, obj->Swizzle); - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.EXT_texture_swizzle) + goto invalid_pname; + COPY_4V(params, obj->Swizzle); break; case GL_TEXTURE_CUBE_MAP_SEAMLESS: - if (ctx->Extensions.AMD_seamless_cubemap_per_texture) { - *params = (GLint) obj->Sampler.CubeMapSeamless; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.AMD_seamless_cubemap_per_texture) + goto invalid_pname; + *params = (GLint) obj->Sampler.CubeMapSeamless; + break; default: - ; /* silence warnings */ + goto invalid_pname; } - if (error) - _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameteriv(pname=0x%x)", - pname); + /* no error if we get here */ + _mesa_unlock_texture(ctx, obj); + return; +invalid_pname: _mesa_unlock_texture(ctx, obj); + _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameteriv(pname=0x%x)", pname); } @@ -1449,6 +1377,8 @@ _mesa_GetTexParameterIiv(GLenum target, GLenum pname, GLint *params) ASSERT_OUTSIDE_BEGIN_END(ctx); texObj = get_texobj(ctx, target, GL_TRUE); + if (!texObj) + return; switch (pname) { case GL_TEXTURE_BORDER_COLOR: @@ -1469,6 +1399,8 @@ _mesa_GetTexParameterIuiv(GLenum target, GLenum pname, GLuint *params) ASSERT_OUTSIDE_BEGIN_END(ctx); texObj = get_texobj(ctx, target, GL_TRUE); + if (!texObj) + return; switch (pname) { case GL_TEXTURE_BORDER_COLOR: diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index 6da3e4eb7b4..c4aeaa8f16d 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -1002,15 +1002,17 @@ memcpy_texture(struct gl_context *ctx, /** - * Store a 32-bit integer depth component texture image. + * Store a 32-bit integer or float depth component texture image. */ static GLboolean _mesa_texstore_z32(TEXSTORE_PARAMS) { const GLuint depthScale = 0xffffffff; const GLuint texelBytes = _mesa_get_format_bytes(dstFormat); + const GLenum dstType = _mesa_get_format_datatype(dstFormat); (void) dims; - ASSERT(dstFormat == MESA_FORMAT_Z32); + ASSERT(dstFormat == MESA_FORMAT_Z32 || + dstFormat == MESA_FORMAT_Z32_FLOAT); ASSERT(texelBytes == sizeof(GLuint)); if (ctx->Pixel.DepthScale == 1.0f && @@ -1018,7 +1020,7 @@ _mesa_texstore_z32(TEXSTORE_PARAMS) !srcPacking->SwapBytes && baseInternalFormat == GL_DEPTH_COMPONENT && srcFormat == GL_DEPTH_COMPONENT && - srcType == GL_UNSIGNED_INT) { + srcType == dstType) { /* simple memcpy path */ memcpy_texture(ctx, dims, dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset, @@ -1039,7 +1041,7 @@ _mesa_texstore_z32(TEXSTORE_PARAMS) const GLvoid *src = _mesa_image_address(dims, srcPacking, srcAddr, srcWidth, srcHeight, srcFormat, srcType, img, row, 0); _mesa_unpack_depth_span(ctx, srcWidth, - GL_UNSIGNED_INT, (GLuint *) dstRow, + dstType, dstRow, depthScale, srcType, src, srcPacking); dstRow += dstRowStride; } @@ -3303,15 +3305,16 @@ _mesa_texstore_z24_s8(TEXSTORE_PARAMS) { const GLuint depthScale = 0xffffff; const GLint srcRowStride - = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, srcType) - / sizeof(GLuint); + = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, srcType); GLint img, row; ASSERT(dstFormat == MESA_FORMAT_Z24_S8); - ASSERT(srcFormat == GL_DEPTH_STENCIL_EXT || srcFormat == GL_DEPTH_COMPONENT); + ASSERT(srcFormat == GL_DEPTH_STENCIL_EXT || + srcFormat == GL_DEPTH_COMPONENT || + srcFormat == GL_STENCIL_INDEX); ASSERT(srcFormat != GL_DEPTH_STENCIL_EXT || srcType == GL_UNSIGNED_INT_24_8_EXT); - if (srcFormat != GL_DEPTH_COMPONENT && ctx->Pixel.DepthScale == 1.0f && + if (srcFormat == GL_DEPTH_STENCIL && ctx->Pixel.DepthScale == 1.0f && ctx->Pixel.DepthBias == 0.0f && !srcPacking->SwapBytes) { /* simple path */ @@ -3322,15 +3325,16 @@ _mesa_texstore_z24_s8(TEXSTORE_PARAMS) srcWidth, srcHeight, srcDepth, srcFormat, srcType, srcAddr, srcPacking); } - else if (srcFormat == GL_DEPTH_COMPONENT) { + else if (srcFormat == GL_DEPTH_COMPONENT || + srcFormat == GL_STENCIL_INDEX) { /* In case we only upload depth we need to preserve the stencil */ for (img = 0; img < srcDepth; img++) { GLuint *dstRow = (GLuint *) dstAddr + dstImageOffsets[dstZoffset + img] + dstYoffset * dstRowStride / sizeof(GLuint) + dstXoffset; - const GLuint *src - = (const GLuint *) _mesa_image_address(dims, srcPacking, srcAddr, + const GLubyte *src + = (const GLubyte *) _mesa_image_address(dims, srcPacking, srcAddr, srcWidth, srcHeight, srcFormat, srcType, img, 0, 0); @@ -3387,8 +3391,7 @@ _mesa_texstore_s8_z24(TEXSTORE_PARAMS) { const GLuint depthScale = 0xffffff; const GLint srcRowStride - = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, srcType) - / sizeof(GLuint); + = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, srcType); GLint img, row; ASSERT(dstFormat == MESA_FORMAT_S8_Z24); @@ -3403,8 +3406,8 @@ _mesa_texstore_s8_z24(TEXSTORE_PARAMS) + dstImageOffsets[dstZoffset + img] + dstYoffset * dstRowStride / sizeof(GLuint) + dstXoffset; - const GLuint *src - = (const GLuint *) _mesa_image_address(dims, srcPacking, srcAddr, + const GLubyte *src + = (const GLubyte *) _mesa_image_address(dims, srcPacking, srcAddr, srcWidth, srcHeight, srcFormat, srcType, img, 0, 0); @@ -3476,8 +3479,7 @@ _mesa_texstore_s8(TEXSTORE_PARAMS) } else { const GLint srcRowStride - = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, srcType) - / sizeof(GLuint); + = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, srcType); GLint img, row; for (img = 0; img < srcDepth; img++) { @@ -3485,8 +3487,8 @@ _mesa_texstore_s8(TEXSTORE_PARAMS) + dstImageOffsets[dstZoffset + img] + dstYoffset * dstRowStride / sizeof(GLuint) + dstXoffset; - const GLuint *src - = (const GLuint *) _mesa_image_address(dims, srcPacking, srcAddr, + const GLubyte *src + = (const GLubyte *) _mesa_image_address(dims, srcPacking, srcAddr, srcWidth, srcHeight, srcFormat, srcType, img, 0, 0); @@ -4285,6 +4287,72 @@ _mesa_texstore_r11_g11_b10f(TEXSTORE_PARAMS) } +static GLboolean +_mesa_texstore_z32f_x24s8(TEXSTORE_PARAMS) +{ + ASSERT(dstFormat == MESA_FORMAT_Z32_FLOAT_X24S8); + ASSERT(srcFormat == GL_DEPTH_STENCIL || + srcFormat == GL_DEPTH_COMPONENT || + srcFormat == GL_STENCIL_INDEX); + ASSERT(srcFormat != GL_DEPTH_STENCIL || + srcType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + + if (srcFormat == GL_DEPTH_STENCIL && + ctx->Pixel.DepthScale == 1.0f && + ctx->Pixel.DepthBias == 0.0f && + !srcPacking->SwapBytes) { + /* simple path */ + memcpy_texture(ctx, dims, + dstFormat, dstAddr, dstXoffset, dstYoffset, dstZoffset, + dstRowStride, + dstImageOffsets, + srcWidth, srcHeight, srcDepth, srcFormat, srcType, + srcAddr, srcPacking); + } + else if (srcFormat == GL_DEPTH_COMPONENT || + srcFormat == GL_STENCIL_INDEX) { + GLint img, row; + const GLint srcRowStride + = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, srcType) + / sizeof(uint64_t); + + /* In case we only upload depth we need to preserve the stencil */ + for (img = 0; img < srcDepth; img++) { + uint64_t *dstRow = (uint64_t *) dstAddr + + dstImageOffsets[dstZoffset + img] + + dstYoffset * dstRowStride / sizeof(uint64_t) + + dstXoffset; + const uint64_t *src + = (const uint64_t *) _mesa_image_address(dims, srcPacking, srcAddr, + srcWidth, srcHeight, + srcFormat, srcType, + img, 0, 0); + for (row = 0; row < srcHeight; row++) { + /* The unpack functions with: + * dstType = GL_FLOAT_32_UNSIGNED_INT_24_8_REV + * only write their own dword, so the other dword (stencil + * or depth) is preserved. */ + if (srcFormat != GL_STENCIL_INDEX) + _mesa_unpack_depth_span(ctx, srcWidth, + GL_FLOAT_32_UNSIGNED_INT_24_8_REV, /* dst type */ + dstRow, /* dst addr */ + 1.0f, srcType, src, srcPacking); + + if (srcFormat != GL_DEPTH_COMPONENT) + _mesa_unpack_stencil_span(ctx, srcWidth, + GL_FLOAT_32_UNSIGNED_INT_24_8_REV, /* dst type */ + dstRow, /* dst addr */ + srcType, src, srcPacking, + ctx->_ImageTransferState); + + src += srcRowStride; + dstRow += dstRowStride / sizeof(uint64_t); + } + } + } + return GL_TRUE; +} + /** * Table mapping MESA_FORMAT_* to _mesa_texstore_*() @@ -4419,6 +4487,9 @@ texstore_funcs[MESA_FORMAT_COUNT] = { MESA_FORMAT_RGB9_E5_FLOAT, _mesa_texstore_rgb9_e5 }, { MESA_FORMAT_R11_G11_B10_FLOAT, _mesa_texstore_r11_g11_b10f }, + + { MESA_FORMAT_Z32_FLOAT, _mesa_texstore_z32 }, + { MESA_FORMAT_Z32_FLOAT_X24S8, _mesa_texstore_z32f_x24s8 }, }; @@ -4506,8 +4577,7 @@ texture_row_stride(const struct gl_texture_image *texImage) /** - * This is the software fallback for Driver.TexImage1D() - * and Driver.CopyTexImage1D(). + * This is the software fallback for Driver.TexImage1D(). * \sa _mesa_store_teximage2d() */ void @@ -4558,8 +4628,7 @@ _mesa_store_teximage1d(struct gl_context *ctx, GLenum target, GLint level, /** - * This is the software fallback for Driver.TexImage2D() - * and Driver.CopyTexImage2D(). + * This is the software fallback for Driver.TexImage2D(). * * This function is oriented toward storing images in main memory, rather * than VRAM. Device driver's can easily plug in their own replacement. @@ -4613,8 +4682,7 @@ _mesa_store_teximage2d(struct gl_context *ctx, GLenum target, GLint level, /** - * This is the software fallback for Driver.TexImage3D() - * and Driver.CopyTexImage3D(). + * This is the software fallback for Driver.TexImage3D(). * \sa _mesa_store_teximage2d() */ void diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c index ce4863faf78..1329af4cd7e 100644 --- a/src/mesa/main/uniforms.c +++ b/src/mesa/main/uniforms.c @@ -582,7 +582,7 @@ _mesa_update_shader_textures_used(struct gl_program *prog) if (prog->SamplersUsed & (1 << s)) { GLuint unit = prog->SamplerUnits[s]; GLuint tgt = prog->SamplerTargets[s]; - assert(unit < MAX_TEXTURE_IMAGE_UNITS); + assert(unit < Elements(prog->TexturesUsed)); assert(tgt < NUM_TEXTURE_TARGETS); prog->TexturesUsed[unit] |= (1 << tgt); } @@ -676,7 +676,7 @@ set_program_uniform(struct gl_context *ctx, struct gl_program *program, GLuint texUnit = ((GLuint *) values)[i]; /* check that the sampler (tex unit index) is legal */ - if (texUnit >= ctx->Const.MaxTextureImageUnits) { + if (texUnit >= ctx->Const.MaxCombinedTextureImageUnits) { _mesa_error(ctx, GL_INVALID_VALUE, "glUniform1(invalid sampler/tex unit index for '%s')", param->Name); diff --git a/src/mesa/main/version.h b/src/mesa/main/version.h index 2e6335846e3..0a0512c339d 100644 --- a/src/mesa/main/version.h +++ b/src/mesa/main/version.h @@ -33,9 +33,9 @@ struct gl_context; /* Mesa version */ #define MESA_MAJOR 7 -#define MESA_MINOR 11 +#define MESA_MINOR 12 #define MESA_PATCH 0 -#define MESA_VERSION_STRING "7.11-devel" +#define MESA_VERSION_STRING "7.12-devel" /* To make version comparison easy */ #define MESA_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c)) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index f27492749bd..debadb9a398 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -134,7 +134,7 @@ src_reg::src_reg(dst_reg reg) this->index = reg.index; this->swizzle = SWIZZLE_XYZW; this->negate = 0; - this->reladdr = NULL; + this->reladdr = reg.reladdr; } dst_reg::dst_reg(src_reg reg) @@ -331,20 +331,6 @@ dst_reg undef_dst = dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP); dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X); -static void -fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); - -static void -fail_link(struct gl_shader_program *prog, const char *fmt, ...) -{ - va_list args; - va_start(args, fmt); - ralloc_vasprintf_append(&prog->InfoLog, fmt, args); - va_end(args); - - prog->LinkStatus = GL_FALSE; -} - static int swizzle_for_size(int size) { @@ -789,10 +775,11 @@ ir_to_mesa_visitor::visit(ir_variable *ir) if (storage->file == PROGRAM_TEMPORARY && dst.index != storage->index + (int) ir->num_state_slots) { - fail_link(this->shader_program, - "failed to load builtin uniform `%s' (%d/%d regs loaded)\n", - ir->name, dst.index - storage->index, - type_size(ir->type)); + linker_error(this->shader_program, + "failed to load builtin uniform `%s' " + "(%d/%d regs loaded)\n", + ir->name, dst.index - storage->index, + type_size(ir->type)); } } } @@ -803,48 +790,44 @@ ir_to_mesa_visitor::visit(ir_loop *ir) ir_dereference_variable *counter = NULL; if (ir->counter != NULL) - counter = new(ir) ir_dereference_variable(ir->counter); + counter = new(mem_ctx) ir_dereference_variable(ir->counter); if (ir->from != NULL) { assert(ir->counter != NULL); - ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); + ir_assignment *a = + new(mem_ctx) ir_assignment(counter, ir->from, NULL); a->accept(this); - delete a; } emit(NULL, OPCODE_BGNLOOP); if (ir->to) { ir_expression *e = - new(ir) ir_expression(ir->cmp, glsl_type::bool_type, - counter, ir->to); - ir_if *if_stmt = new(ir) ir_if(e); + new(mem_ctx) ir_expression(ir->cmp, glsl_type::bool_type, + counter, ir->to); + ir_if *if_stmt = new(mem_ctx) ir_if(e); - ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); + ir_loop_jump *brk = + new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_break); if_stmt->then_instructions.push_tail(brk); if_stmt->accept(this); - - delete if_stmt; - delete e; - delete brk; } visit_exec_list(&ir->body_instructions, this); if (ir->increment) { ir_expression *e = - new(ir) ir_expression(ir_binop_add, counter->type, - counter, ir->increment); + new(mem_ctx) ir_expression(ir_binop_add, counter->type, + counter, ir->increment); - ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); + ir_assignment *a = + new(mem_ctx) ir_assignment(counter, e, NULL); a->accept(this); - delete a; - delete e; } emit(NULL, OPCODE_ENDLOOP); @@ -1296,8 +1279,11 @@ ir_to_mesa_visitor::visit(ir_expression *ir) emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); break; case ir_unop_i2f: + case ir_unop_u2f: case ir_unop_b2f: case ir_unop_b2i: + case ir_unop_i2u: + case ir_unop_u2i: /* Mesa IR lacks types, ints are stored as truncated floats. */ result_src = op[0]; break; @@ -1335,7 +1321,6 @@ ir_to_mesa_visitor::visit(ir_expression *ir) break; case ir_unop_bit_not: - case ir_unop_u2f: case ir_binop_lshift: case ir_binop_rshift: case ir_binop_bit_and: @@ -1417,9 +1402,9 @@ ir_to_mesa_visitor::visit(ir_dereference_variable *ir) case ir_var_in: case ir_var_inout: /* The linker assigns locations for varyings and attributes, - * including deprecated builtins (like gl_Color), user-assign - * generic attributes (glBindVertexLocation), and - * user-defined varyings. + * including deprecated builtins (like gl_Color), + * user-assigned generic attributes (glBindVertexLocation), + * and user-defined varyings. * * FINISHME: We would hit this path for function arguments. Fix! */ @@ -1498,6 +1483,18 @@ ir_to_mesa_visitor::visit(ir_dereference_array *ir) this->result, src_reg_for_float(element_size)); } + /* If there was already a relative address register involved, add the + * new and the old together to get the new offset. + */ + if (src.reladdr != NULL) { + src_reg accum_reg = get_temp(glsl_type::float_type); + + emit(ir, OPCODE_ADD, dst_reg(accum_reg), + index_reg, *src.reladdr); + + index_reg = accum_reg; + } + src.reladdr = ralloc(mem_ctx, src_reg); memcpy(src.reladdr, &index_reg, sizeof(index_reg)); } @@ -2403,29 +2400,32 @@ check_resources(const struct gl_context *ctx, case GL_VERTEX_PROGRAM_ARB: if (_mesa_bitcount(prog->SamplersUsed) > ctx->Const.MaxVertexTextureImageUnits) { - fail_link(shader_program, "Too many vertex shader texture samplers"); + linker_error(shader_program, + "Too many vertex shader texture samplers"); } if (prog->Parameters->NumParameters > MAX_UNIFORMS) { - fail_link(shader_program, "Too many vertex shader constants"); + linker_error(shader_program, "Too many vertex shader constants"); } break; case MESA_GEOMETRY_PROGRAM: if (_mesa_bitcount(prog->SamplersUsed) > ctx->Const.MaxGeometryTextureImageUnits) { - fail_link(shader_program, "Too many geometry shader texture samplers"); + linker_error(shader_program, + "Too many geometry shader texture samplers"); } if (prog->Parameters->NumParameters > MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) { - fail_link(shader_program, "Too many geometry shader constants"); + linker_error(shader_program, "Too many geometry shader constants"); } break; case GL_FRAGMENT_PROGRAM_ARB: if (_mesa_bitcount(prog->SamplersUsed) > ctx->Const.MaxTextureImageUnits) { - fail_link(shader_program, "Too many fragment shader texture samplers"); + linker_error(shader_program, + "Too many fragment shader texture samplers"); } if (prog->Parameters->NumParameters > MAX_UNIFORMS) { - fail_link(shader_program, "Too many fragment shader constants"); + linker_error(shader_program, "Too many fragment shader constants"); } break; default: @@ -2540,9 +2540,10 @@ add_uniforms_to_parameters_list(struct gl_shader_program *shader_program, * from _mesa_add_uniform) has to match what the linker chose. */ if (index != parameter_index) { - fail_link(shader_program, "Allocation of uniform `%s' to target " - "failed (%d vs %d)\n", - uniform->Name, index, parameter_index); + linker_error(shader_program, + "Allocation of uniform `%s' to target failed " + "(%d vs %d)\n", + uniform->Name, index, parameter_index); } } } @@ -2575,8 +2576,8 @@ set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, int loc = _mesa_get_uniform_location(ctx, shader_program, name); if (loc == -1) { - fail_link(shader_program, - "Couldn't find uniform for initializer %s\n", name); + linker_error(shader_program, + "Couldn't find uniform for initializer %s\n", name); return; } @@ -2976,11 +2977,31 @@ get_mesa_program(struct gl_context *ctx, if (mesa_inst->SrcReg[src].RelAddr) prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File; - if (options->EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) { - fail_link(shader_program, "Couldn't flatten if statement\n"); - } - switch (mesa_inst->Opcode) { + case OPCODE_IF: + if (options->EmitNoIfs) { + linker_warning(shader_program, + "Couldn't flatten if-statement. " + "This will likely result in software " + "rasterization.\n"); + } + break; + case OPCODE_BGNLOOP: + if (options->EmitNoLoops) { + linker_warning(shader_program, + "Couldn't unroll loop. " + "This will likely result in software " + "rasterization.\n"); + } + break; + case OPCODE_CONT: + if (options->EmitNoCont) { + linker_warning(shader_program, + "Couldn't lower continue-statement. " + "This will likely result in software " + "rasterization.\n"); + } + break; case OPCODE_BGNSUB: inst->function->inst = i; mesa_inst->Comment = strdup(inst->function->sig->function_name()); @@ -3248,7 +3269,7 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) for (i = 0; i < prog->NumShaders; i++) { if (!prog->Shaders[i]->CompileStatus) { - fail_link(prog, "linking with uncompiled shader"); + linker_error(prog, "linking with uncompiled shader"); prog->LinkStatus = GL_FALSE; } } diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c index 11debc485eb..f4a7a638d5f 100644 --- a/src/mesa/program/prog_optimize.c +++ b/src/mesa/program/prog_optimize.c @@ -1304,6 +1304,9 @@ _mesa_simplify_cmp(struct gl_program * program) assert(inst->DstReg.Index < REG_ALLOCATE_MAX_PROGRAM_TEMPS); prevWriteMask = tempWrites[inst->DstReg.Index]; tempWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask; + } else { + /* No other register type can be a destination register. */ + continue; } /* For a CMP to be considered a conditional write, the destination @@ -1316,6 +1319,15 @@ _mesa_simplify_cmp(struct gl_program * program) inst->Opcode = OPCODE_MOV; inst->SrcReg[0] = inst->SrcReg[1]; + + /* Unused operands are expected to have the file set to + * PROGRAM_UNDEFINED. This is how _mesa_init_instructions initializes + * all of the sources. + */ + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + inst->SrcReg[2].File = PROGRAM_UNDEFINED; + inst->SrcReg[2].Swizzle = SWIZZLE_NOOP; } } if (dbg) { diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c index 4f2b6270501..ecff2344a44 100644 --- a/src/mesa/program/program.c +++ b/src/mesa/program/program.c @@ -422,12 +422,15 @@ _mesa_lookup_program(struct gl_context *ctx, GLuint id) /** * Reference counting for vertex/fragment programs + * This is normally only called from the _mesa_reference_program() macro + * when there's a real pointer change. */ void -_mesa_reference_program(struct gl_context *ctx, - struct gl_program **ptr, - struct gl_program *prog) +_mesa_reference_program_(struct gl_context *ctx, + struct gl_program **ptr, + struct gl_program *prog) { +#ifndef NDEBUG assert(ptr); if (*ptr && prog) { /* sanity check */ @@ -439,9 +442,8 @@ _mesa_reference_program(struct gl_context *ctx, else if ((*ptr)->Target == MESA_GEOMETRY_PROGRAM) ASSERT(prog->Target == MESA_GEOMETRY_PROGRAM); } - if (*ptr == prog) { - return; /* no change */ - } +#endif + if (*ptr) { GLboolean deleteFlag; diff --git a/src/mesa/program/program.h b/src/mesa/program/program.h index ce37b95bf82..0f32a6af73b 100644 --- a/src/mesa/program/program.h +++ b/src/mesa/program/program.h @@ -89,9 +89,18 @@ extern struct gl_program * _mesa_lookup_program(struct gl_context *ctx, GLuint id); extern void +_mesa_reference_program_(struct gl_context *ctx, + struct gl_program **ptr, + struct gl_program *prog); + +static INLINE void _mesa_reference_program(struct gl_context *ctx, struct gl_program **ptr, - struct gl_program *prog); + struct gl_program *prog) +{ + if (*ptr != prog) + _mesa_reference_program_(ctx, ptr, prog); +} static INLINE void _mesa_reference_vertprog(struct gl_context *ctx, diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c index 800a9f1f0e0..3115a2511ce 100644 --- a/src/mesa/state_tracker/st_atom_texture.c +++ b/src/mesa/state_tracker/st_atom_texture.c @@ -221,9 +221,9 @@ update_single_texture(struct st_context *st, struct pipe_sampler_view **sampler_ if ((samp->sRGBDecode == GL_SKIP_DECODE_EXT) && (_mesa_get_format_color_encoding(texFormat) == GL_SRGB)) { - /* don't do sRGB->RGB conversion. Interpret the texture - * texture data as linear values. - */ + /* Don't do sRGB->RGB conversion. Interpret the texture data as + * linear values. + */ const gl_format linearFormat = _mesa_get_srgb_format_linear(texFormat); firstImageFormat = st_mesa_format_to_pipe_format(linearFormat); diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index f0750b518ad..beb5e7cab31 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -351,8 +351,8 @@ setup_bitmap_vertex_data(struct st_context *st, bool normalized, if(!normalized) { - sRight = width; - tBot = height; + sRight = (GLfloat) width; + tBot = (GLfloat) height; } /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as @@ -404,7 +404,7 @@ setup_bitmap_vertex_data(struct st_context *st, bool normalized, /* same for all verts: */ for (i = 0; i < 4; i++) { st->bitmap.vertices[i][0][2] = z; - st->bitmap.vertices[i][0][3] = 1.0; + st->bitmap.vertices[i][0][3] = 1.0f; st->bitmap.vertices[i][1][0] = color[0]; st->bitmap.vertices[i][1][1] = color[1]; st->bitmap.vertices[i][1][2] = color[2]; @@ -536,7 +536,7 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, cso_set_vertex_elements(cso, 3, st->velems_util_draw); /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */ - z = z * 2.0 - 1.0; + z = z * 2.0f - 1.0f; /* draw textured quad */ offset = setup_bitmap_vertex_data(st, diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c index 416be194d11..626db12431d 100644 --- a/src/mesa/state_tracker/st_cb_blit.c +++ b/src/mesa/state_tracker/st_cb_blit.c @@ -62,6 +62,82 @@ st_destroy_blit(struct st_context *st) #if FEATURE_EXT_framebuffer_blit static void +st_BlitFramebuffer_resolve(struct gl_context *ctx, + GLbitfield mask, + struct pipe_resolve_info *info) +{ + const GLbitfield depthStencil = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + + struct st_context *st = st_context(ctx); + + struct st_renderbuffer *srcRb, *dstRb; + + if (mask & GL_COLOR_BUFFER_BIT) { + srcRb = st_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer); + dstRb = st_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]); + + info->mask = PIPE_MASK_RGBA; + + info->src.res = srcRb->texture; + info->src.layer = srcRb->surface->u.tex.first_layer; + info->dst.res = dstRb->texture; + info->dst.level = dstRb->surface->u.tex.level; + info->dst.layer = dstRb->surface->u.tex.first_layer; + + st->pipe->resource_resolve(st->pipe, info); + } + + if (mask & depthStencil) { + struct gl_renderbuffer_attachment *srcDepth, *srcStencil; + struct gl_renderbuffer_attachment *dstDepth, *dstStencil; + boolean combined; + + srcDepth = &ctx->ReadBuffer->Attachment[BUFFER_DEPTH]; + dstDepth = &ctx->DrawBuffer->Attachment[BUFFER_DEPTH]; + srcStencil = &ctx->ReadBuffer->Attachment[BUFFER_STENCIL]; + dstStencil = &ctx->DrawBuffer->Attachment[BUFFER_STENCIL]; + + combined = + st_is_depth_stencil_combined(srcDepth, srcStencil) && + st_is_depth_stencil_combined(dstDepth, dstStencil); + + if ((mask & GL_DEPTH_BUFFER_BIT) || combined) { + /* resolve depth and, if combined and requested, stencil as well */ + srcRb = st_renderbuffer(srcDepth->Renderbuffer); + dstRb = st_renderbuffer(dstDepth->Renderbuffer); + + info->mask = (mask & GL_DEPTH_BUFFER_BIT) ? PIPE_MASK_Z : 0; + if (combined && (mask & GL_STENCIL_BUFFER_BIT)) + info->mask |= PIPE_MASK_S; + + info->src.res = srcRb->texture; + info->src.layer = srcRb->surface->u.tex.first_layer; + info->dst.res = dstRb->texture; + info->dst.level = dstRb->surface->u.tex.level; + info->dst.layer = dstRb->surface->u.tex.first_layer; + + st->pipe->resource_resolve(st->pipe, info); + } + + if (mask & GL_STENCIL_BUFFER_BIT) { + /* resolve separate stencil buffer */ + srcRb = st_renderbuffer(srcStencil->Renderbuffer); + dstRb = st_renderbuffer(dstStencil->Renderbuffer); + + info->mask = PIPE_MASK_S; + + info->src.res = srcRb->texture; + info->src.layer = srcRb->surface->u.tex.first_layer; + info->dst.res = dstRb->texture; + info->dst.level = dstRb->surface->u.tex.level; + info->dst.layer = dstRb->surface->u.tex.first_layer; + + st->pipe->resource_resolve(st->pipe, info); + } + } +} + +static void st_BlitFramebuffer(struct gl_context *ctx, GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, @@ -95,6 +171,42 @@ st_BlitFramebuffer(struct gl_context *ctx, srcY1 = readFB->Height - srcY1; } + /* Disable conditional rendering. */ + if (st->render_condition) { + st->pipe->render_condition(st->pipe, NULL, 0); + } + + if (readFB->Visual.sampleBuffers > drawFB->Visual.sampleBuffers) { + struct pipe_resolve_info info; + + if (dstX0 < dstX1) { + info.dst.x0 = dstX0; + info.dst.x1 = dstX1; + info.src.x0 = srcX0; + info.src.x1 = srcX1; + } else { + info.dst.x0 = dstX1; + info.dst.x1 = dstX0; + info.src.x0 = srcX1; + info.src.x1 = srcX0; + } + if (dstY0 < dstY1) { + info.dst.y0 = dstY0; + info.dst.y1 = dstY1; + info.src.y0 = srcY0; + info.src.y1 = srcY1; + } else { + info.dst.y0 = dstY1; + info.dst.y1 = dstY0; + info.src.y0 = srcY1; + info.src.y1 = srcY0; + } + + st_BlitFramebuffer_resolve(ctx, mask, &info); /* filter doesn't apply */ + + goto done; + } + if (srcY0 > srcY1 && dstY0 > dstY1) { /* Both src and dst are upside down. Swap Y to make it * right-side up to increase odds of using a fast path. @@ -109,11 +221,6 @@ st_BlitFramebuffer(struct gl_context *ctx, dstY1 = tmp; } - /* Disable conditional rendering. */ - if (st->render_condition) { - st->pipe->render_condition(st->pipe, NULL, 0); - } - if (mask & GL_COLOR_BUFFER_BIT) { struct gl_renderbuffer_attachment *srcAtt = &readFB->Attachment[readFB->_ColorReadBufferIndex]; diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index 181fedd2b99..117000ba716 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -381,7 +381,8 @@ check_clear_depth_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuff assert(rb->Format == MESA_FORMAT_S8 || rb->Format == MESA_FORMAT_Z24_S8 || - rb->Format == MESA_FORMAT_S8_Z24); + rb->Format == MESA_FORMAT_S8_Z24 || + rb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); if (ctx->Scissor.Enabled && (ctx->Scissor.X != 0 || @@ -436,7 +437,8 @@ check_clear_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb assert(rb->Format == MESA_FORMAT_S8 || rb->Format == MESA_FORMAT_Z24_S8 || - rb->Format == MESA_FORMAT_S8_Z24); + rb->Format == MESA_FORMAT_S8_Z24 || + rb->Format == MESA_FORMAT_Z32_FLOAT_X24S8); if (maskStencil) return GL_TRUE; diff --git a/src/mesa/state_tracker/st_cb_condrender.c b/src/mesa/state_tracker/st_cb_condrender.c index 64c6c117fca..1ced560e160 100644 --- a/src/mesa/state_tracker/st_cb_condrender.c +++ b/src/mesa/state_tracker/st_cb_condrender.c @@ -41,6 +41,7 @@ #include "st_context.h" #include "st_cb_queryobj.h" #include "st_cb_condrender.h" +#include "st_cb_bitmap.h" /** @@ -55,6 +56,8 @@ st_BeginConditionalRender(struct gl_context *ctx, struct gl_query_object *q, struct pipe_context *pipe = st->pipe; uint m; + st_flush_bitmap_cache(st); + switch (mode) { case GL_QUERY_WAIT: m = PIPE_RENDER_COND_WAIT; @@ -90,6 +93,8 @@ st_EndConditionalRender(struct gl_context *ctx, struct gl_query_object *q) struct pipe_context *pipe = st->pipe; (void) q; + st_flush_bitmap_cache(st); + pipe->render_condition(pipe, NULL, 0); st->render_condition = NULL; } diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 0c4dc23ccf7..390c518699f 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -317,26 +317,6 @@ make_passthrough_vertex_shader(struct st_context *st, /** - * Return a texture base format for drawing/copying an image - * of the given format. - */ -static GLenum -base_format(GLenum format) -{ - switch (format) { - case GL_DEPTH_COMPONENT: - return GL_DEPTH_COMPONENT; - case GL_DEPTH_STENCIL: - return GL_DEPTH_STENCIL; - case GL_STENCIL_INDEX: - return GL_STENCIL_INDEX; - default: - return GL_RGBA; - } -} - - -/** * Return a texture internalFormat for drawing/copying an image * of the given format and type. */ @@ -345,11 +325,36 @@ internal_format(struct gl_context *ctx, GLenum format, GLenum type) { switch (format) { case GL_DEPTH_COMPONENT: - return GL_DEPTH_COMPONENT; + switch (type) { + case GL_UNSIGNED_SHORT: + return GL_DEPTH_COMPONENT16; + + case GL_UNSIGNED_INT: + return GL_DEPTH_COMPONENT32; + + case GL_FLOAT: + if (ctx->Extensions.ARB_depth_buffer_float) + return GL_DEPTH_COMPONENT32F; + else + return GL_DEPTH_COMPONENT; + + default: + return GL_DEPTH_COMPONENT; + } + case GL_DEPTH_STENCIL: - return GL_DEPTH_STENCIL; + switch (type) { + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + return GL_DEPTH32F_STENCIL8; + + case GL_UNSIGNED_INT_24_8: + default: + return GL_DEPTH24_STENCIL8; + } + case GL_STENCIL_INDEX: return GL_STENCIL_INDEX; + default: if (_mesa_is_integer_format(format)) { switch (type) { @@ -856,6 +861,7 @@ draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, for (row = 0; row < height; row++) { GLubyte sValues[MAX_WIDTH]; GLuint zValues[MAX_WIDTH]; + GLfloat *zValuesFloat = (GLfloat*)zValues; GLenum destType = GL_UNSIGNED_BYTE; const GLvoid *source = _mesa_image_address2d(&clippedUnpack, pixels, width, height, @@ -866,7 +872,11 @@ draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, ctx->_ImageTransferState); if (format == GL_DEPTH_STENCIL) { - _mesa_unpack_depth_span(ctx, spanWidth, GL_UNSIGNED_INT, zValues, + GLenum ztype = + pt->resource->format == PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED ? + GL_FLOAT : GL_UNSIGNED_INT; + + _mesa_unpack_depth_span(ctx, spanWidth, ztype, zValues, (1 << 24) - 1, type, source, &clippedUnpack); } @@ -931,6 +941,26 @@ draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, } } break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + if (format == GL_DEPTH_STENCIL) { + uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4); + GLfloat *destf = (GLfloat*)dest; + GLint k; + assert(usage == PIPE_TRANSFER_WRITE); + for (k = 0; k < spanWidth; k++) { + destf[k*2] = zValuesFloat[k]; + dest[k*2+1] = sValues[k] & 0xff; + } + } + else { + uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4); + GLint k; + assert(usage == PIPE_TRANSFER_READ_WRITE); + for (k = 0; k < spanWidth; k++) { + dest[k*2+1] = sValues[k] & 0xff; + } + } + break; default: assert(0); } @@ -1018,7 +1048,6 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, GLboolean write_stencil = GL_FALSE, write_depth = GL_FALSE; struct pipe_sampler_view *sv[2]; int num_sampler_view = 1; - enum pipe_format stencil_format = PIPE_FORMAT_NONE; struct st_fp_variant *fpv; if (format == GL_DEPTH_STENCIL) @@ -1028,23 +1057,12 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, else if (format == GL_DEPTH_COMPONENT) write_depth = GL_TRUE; - if (write_stencil) { - enum pipe_format tex_format; - /* can we write to stencil if not fallback */ - if (!pipe->screen->get_param(pipe->screen, PIPE_CAP_SHADER_STENCIL_EXPORT)) - goto stencil_fallback; - - tex_format = st_choose_format(st->pipe->screen, base_format(format), - PIPE_TEXTURE_2D, - 0, PIPE_BIND_SAMPLER_VIEW); - if (tex_format == PIPE_FORMAT_Z24_UNORM_S8_USCALED) - stencil_format = PIPE_FORMAT_X24S8_USCALED; - else if (tex_format == PIPE_FORMAT_S8_USCALED_Z24_UNORM) - stencil_format = PIPE_FORMAT_S8X24_USCALED; - else - stencil_format = PIPE_FORMAT_S8_USCALED; - if (stencil_format == PIPE_FORMAT_NONE) - goto stencil_fallback; + if (write_stencil && + !pipe->screen->get_param(pipe->screen, PIPE_CAP_SHADER_STENCIL_EXPORT)) { + /* software fallback */ + draw_stencil_pixels(ctx, x, y, width, height, format, type, + unpack, pixels); + return; } /* Mesa state should be up to date by now */ @@ -1089,7 +1107,32 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, sv[0] = st_create_texture_sampler_view(st->pipe, pt); if (sv[0]) { - if (write_stencil) { + /* Create a second sampler view to read stencil. + * The stencil is written using the shader stencil export + * functionality. */ + if (write_stencil) { + enum pipe_format stencil_format = PIPE_FORMAT_NONE; + + switch (pt->format) { + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + case PIPE_FORMAT_X24S8_USCALED: + stencil_format = PIPE_FORMAT_X24S8_USCALED; + break; + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + case PIPE_FORMAT_S8X24_USCALED: + stencil_format = PIPE_FORMAT_S8X24_USCALED; + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + case PIPE_FORMAT_X32_S8X24_USCALED: + stencil_format = PIPE_FORMAT_X32_S8X24_USCALED; + break; + case PIPE_FORMAT_S8_USCALED: + stencil_format = PIPE_FORMAT_S8_USCALED; + break; + default: + assert(0); + } + sv[1] = st_create_texture_sampler_view_format(st->pipe, pt, stencil_format); num_sampler_view++; @@ -1110,11 +1153,6 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, pipe_resource_reference(&pt, NULL); } } - return; - -stencil_fallback: - draw_stencil_pixels(ctx, x, y, width, height, format, type, - unpack, pixels); } @@ -1231,6 +1269,18 @@ copy_stencil_pixels(struct gl_context *ctx, GLint srcx, GLint srcy, assert(usage == PIPE_TRANSFER_WRITE); memcpy(dst, src, width); break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + { + uint *dst4 = (uint *) dst; + int j; + dst4++; + assert(usage == PIPE_TRANSFER_READ_WRITE); + for (j = 0; j < width; j++) { + *dst4 = src[j] & 0xff; + dst4 += 2; + } + } + break; default: assert(0); } @@ -1443,13 +1493,14 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, /* srcFormat can't be used as a texture format */ if (type == GL_DEPTH) { texFormat = st_choose_format(screen, GL_DEPTH_COMPONENT, - st->internal_target, sample_count, - PIPE_BIND_DEPTH_STENCIL); + GL_NONE, GL_NONE, st->internal_target, + sample_count, PIPE_BIND_DEPTH_STENCIL); assert(texFormat != PIPE_FORMAT_NONE); } else { /* default color format */ - texFormat = st_choose_format(screen, GL_RGBA, st->internal_target, + texFormat = st_choose_format(screen, GL_RGBA, + GL_NONE, GL_NONE, st->internal_target, sample_count, PIPE_BIND_SAMPLER_VIEW); assert(texFormat != PIPE_FORMAT_NONE); } diff --git a/src/mesa/state_tracker/st_cb_queryobj.c b/src/mesa/state_tracker/st_cb_queryobj.c index d0ac253bcec..057499615bf 100644 --- a/src/mesa/state_tracker/st_cb_queryobj.c +++ b/src/mesa/state_tracker/st_cb_queryobj.c @@ -41,6 +41,7 @@ #include "pipe/p_defines.h" #include "st_context.h" #include "st_cb_queryobj.h" +#include "st_cb_bitmap.h" #if FEATURE_queryobj @@ -83,6 +84,8 @@ st_BeginQuery(struct gl_context *ctx, struct gl_query_object *q) struct st_query_object *stq = st_query_object(q); unsigned type; + st_flush_bitmap_cache(st_context(ctx)); + /* convert GL query type to Gallium query type */ switch (q->Target) { case GL_ANY_SAMPLES_PASSED: @@ -128,6 +131,8 @@ st_EndQuery(struct gl_context *ctx, struct gl_query_object *q) struct pipe_context *pipe = st_context(ctx)->pipe; struct st_query_object *stq = st_query_object(q); + st_flush_bitmap_cache(st_context(ctx)); + pipe->end_query(pipe, stq->pq); } diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index 67926e39297..e2b29fe3068 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -151,6 +151,24 @@ st_read_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, } } break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + if (format == GL_DEPTH_STENCIL) { + const uint *src = (uint *) (stmap + srcY * pt->stride); + const GLfloat *srcf = (const GLfloat*)src; + GLint k; + for (k = 0; k < width; k++) { + zValues[k] = srcf[k*2]; + sValues[k] = src[k*2+1] & 0xff; + } + } + else { + const uint *src = (uint *) (stmap + srcY * pt->stride); + GLint k; + for (k = 0; k < width; k++) { + sValues[k] = src[k*2+1] & 0xff; + } + } + break; default: assert(0); } @@ -159,7 +177,7 @@ st_read_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, dest = _mesa_image_address2d(packing, pixels, width, height, format, type, j, 0); if (format == GL_DEPTH_STENCIL) { - _mesa_pack_depth_stencil_span(ctx, width, dest, + _mesa_pack_depth_stencil_span(ctx, width, type, dest, zValues, sValues, packing); } else { @@ -568,6 +586,31 @@ st_readpixels(struct gl_context *ctx, GLint x, GLint y, GLsizei width, GLsizei h dst += dstStride; } } + else if (pformat == PIPE_FORMAT_Z32_FLOAT) { + for (i = 0; i < height; i++) { + GLfloat zfloat[MAX_WIDTH]; + pipe_get_tile_raw(pipe, trans, 0, y, width, 1, zfloat, 0); + y += yStep; + _mesa_pack_depth_span(ctx, width, dst, type, + zfloat, &clippedPacking); + dst += dstStride; + } + } + else if (pformat == PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED) { + assert(format == GL_DEPTH_COMPONENT); + for (i = 0; i < height; i++) { + GLfloat zfloat[MAX_WIDTH]; /* Z32 */ + GLfloat zfloat2[MAX_WIDTH*2]; /* Z32X32 */ + pipe_get_tile_raw(pipe, trans, 0, y, width, 1, zfloat2, 0); + y += yStep; + for (j = 0; j < width; j++) { + zfloat[j] = zfloat2[j*2]; + } + _mesa_pack_depth_span(ctx, width, dst, type, + zfloat, &clippedPacking); + dst += dstStride; + } + } else { /* RGBA format */ /* Do a row at a time to flip image data vertically */ diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 88f62902b25..25f08aa4d09 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -842,7 +842,7 @@ decompress_with_blit(struct gl_context * ctx, GLenum target, GLint level, else { /* format translation via floats */ GLuint row; - enum pipe_format format = util_format_linear(dst_texture->format); + enum pipe_format pformat = util_format_linear(dst_texture->format); for (row = 0; row < height; row++) { const GLbitfield transferOps = 0x0; /* bypassed for glGetTexImage() */ GLfloat rgba[4 * MAX_WIDTH]; @@ -854,7 +854,7 @@ decompress_with_blit(struct gl_context * ctx, GLenum target, GLint level, /* get float[4] rgba row from surface */ pipe_get_tile_rgba_format(pipe, tex_xfer, 0, row, width, 1, - format, rgba); + pformat, rgba); _mesa_pack_rgba_span_float(ctx, width, (GLfloat (*)[4]) rgba, format, type, dest, &ctx->Pack, transferOps); @@ -1241,7 +1241,8 @@ fallback_copy_texsubimage(struct gl_context *ctx, GLenum target, GLint level, src_trans = pipe_get_transfer(pipe, strb->texture, - 0, 0, + strb->rtt_level, + strb->rtt_face + strb->rtt_slice, PIPE_TRANSFER_READ, srcX, srcY, width, height); @@ -1465,34 +1466,6 @@ st_copy_texsubimage(struct gl_context *ctx, depth/stencil samples per pixel? Need some transfer clarifications. */ assert(sample_count < 2); - if (srcX < 0) { - width -= -srcX; - destX += -srcX; - srcX = 0; - } - - if (srcY < 0) { - height -= -srcY; - destY += -srcY; - srcY = 0; - } - - if (destX < 0) { - width -= -destX; - srcX += -destX; - destX = 0; - } - - if (destY < 0) { - height -= -destY; - srcY += -destY; - destY = 0; - } - - if (width < 0 || height < 0) - return; - - assert(strb); assert(strb->surface); assert(stImage->pt); @@ -1609,59 +1582,6 @@ st_copy_texsubimage(struct gl_context *ctx, static void -st_CopyTexImage1D(struct gl_context * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLint border) -{ - struct gl_texture_unit *texUnit = - &ctx->Texture.Unit[ctx->Texture.CurrentUnit]; - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - - /* Setup or redefine the texture object, texture and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage1D(ctx, target, level, internalFormat, - width, border, - GL_RGBA, CHAN_TYPE, NULL, - &ctx->DefaultPacking, texObj, texImage); - - st_copy_texsubimage(ctx, target, level, - 0, 0, 0, /* destX,Y,Z */ - x, y, width, 1); /* src X, Y, size */ -} - - -static void -st_CopyTexImage2D(struct gl_context * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border) -{ - struct gl_texture_unit *texUnit = - &ctx->Texture.Unit[ctx->Texture.CurrentUnit]; - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - - /* Setup or redefine the texture object, texture and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage2D(ctx, target, level, internalFormat, - width, height, border, - GL_RGBA, CHAN_TYPE, NULL, - &ctx->DefaultPacking, texObj, texImage); - - st_copy_texsubimage(ctx, target, level, - 0, 0, 0, /* destX,Y,Z */ - x, y, width, height); /* src X, Y, size */ -} - - -static void st_CopyTexSubImage1D(struct gl_context * ctx, GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width) { @@ -1946,8 +1866,6 @@ st_init_texture_functions(struct dd_function_table *functions) functions->CompressedTexSubImage1D = st_CompressedTexSubImage1D; functions->CompressedTexSubImage2D = st_CompressedTexSubImage2D; functions->CompressedTexSubImage3D = st_CompressedTexSubImage3D; - functions->CopyTexImage1D = st_CopyTexImage1D; - functions->CopyTexImage2D = st_CopyTexImage2D; functions->CopyTexSubImage1D = st_CopyTexSubImage1D; functions->CopyTexSubImage2D = st_CopyTexSubImage2D; functions->CopyTexSubImage3D = st_CopyTexSubImage3D; diff --git a/src/mesa/state_tracker/st_cb_viewport.c b/src/mesa/state_tracker/st_cb_viewport.c index 049755e45c0..d4742eb897d 100644 --- a/src/mesa/state_tracker/st_cb_viewport.c +++ b/src/mesa/state_tracker/st_cb_viewport.c @@ -56,13 +56,20 @@ static void st_viewport(struct gl_context * ctx, GLint x, GLint y, if (!st->invalidate_on_gl_viewport) return; + /* + * Normally we'd want the state tracker manager to mark the drawables + * invalid only when needed. This will force the state tracker manager + * to revalidate the drawable, rather than just update the context with + * the latest cached drawable info. + */ + stdraw = st_ws_framebuffer(st->ctx->DrawBuffer); stread = st_ws_framebuffer(st->ctx->ReadBuffer); - if (stdraw) - p_atomic_set(&stdraw->revalidate, TRUE); - if (stread && stread != stdraw) - p_atomic_set(&stread->revalidate, TRUE); + if (stdraw && stdraw->iface) + stdraw->iface_stamp = p_atomic_read(&stdraw->iface->stamp) - 1; + if (stread && stread != stdraw && stread->iface) + stread->iface_stamp = p_atomic_read(&stread->iface->stamp) - 1; } void st_init_viewport_functions(struct dd_function_table *functions) diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 6eddbfc88e4..6d4bc544d0c 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -179,6 +179,9 @@ struct st_context *st_create_context(gl_api api, struct pipe_context *pipe, st_init_driver_functions(&funcs); ctx = _mesa_create_context(api, visual, shareCtx, &funcs, NULL); + if (!ctx) { + return NULL; + } /* XXX: need a capability bit in gallium to query if the pipe * driver prefers DP4 or MUL/MAD for vertex transformation. diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index ff207039d78..0a322022149 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -204,6 +204,9 @@ struct st_context /* Active render condition. */ struct pipe_query *render_condition; unsigned condition_mode; + + int32_t draw_stamp; + int32_t read_stamp; }; @@ -227,7 +230,8 @@ struct st_framebuffer struct st_framebuffer_iface *iface; enum st_attachment_type statts[ST_ATTACHMENT_COUNT]; unsigned num_statts; - int32_t revalidate; + int32_t stamp; + int32_t iface_stamp; }; diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index fac0ab7a1f7..5040c6fa5ab 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -233,6 +233,22 @@ st_pipe_vertex_format(GLenum type, GLuint size, GLenum format, } +/** + * This is very similar to vbo_all_varyings_in_vbos() but we test + * the stride. See bug 38626. + */ +static GLboolean +all_varyings_in_vbos(const struct gl_client_array *arrays[]) +{ + GLuint i; + + for (i = 0; i < VERT_ATTRIB_MAX; i++) + if (arrays[i]->StrideB && !_mesa_is_bufferobj(arrays[i]->BufferObj)) + return GL_FALSE; + + return GL_TRUE; +} + /** * Examine the active arrays to determine if we have interleaved @@ -648,7 +664,7 @@ st_draw_vbo(struct gl_context *ctx, if (ib) { /* Gallium probably doesn't want this in some cases. */ if (!index_bounds_valid) - if (!vbo_all_varyings_in_vbos(arrays)) + if (!all_varyings_in_vbos(arrays)) vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index); for (i = 0; i < nr_prims; i++) { diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index d3aebe526dd..8e900934054 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -208,6 +208,15 @@ void st_init_limits(struct st_context *st) } +static GLboolean st_get_s3tc_override(void) +{ + const char *override = _mesa_getenv("force_s3tc_enable"); + if (override && !strcmp(override, "true")) + return GL_TRUE; + return GL_FALSE; +} + + /** * Use pipe_screen::get_param() to query PIPE_CAP_ values to determine * which GL extensions are supported. @@ -219,6 +228,7 @@ void st_init_extensions(struct st_context *st) { struct pipe_screen *screen = st->pipe->screen; struct gl_context *ctx = st->ctx; + int i; /* * Extensions that are supported by all Gallium drivers: @@ -426,7 +436,7 @@ void st_init_extensions(struct st_context *st) if (screen->is_format_supported(screen, PIPE_FORMAT_DXT5_RGBA, PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW) && - ctx->Mesa_DXTn) { + (ctx->Mesa_DXTn || st_get_s3tc_override())) { ctx->Extensions.EXT_texture_compression_s3tc = GL_TRUE; ctx->Extensions.S3_s3tc = GL_TRUE; } @@ -596,6 +606,16 @@ void st_init_extensions(struct st_context *st) ctx->Extensions.EXT_packed_float = GL_TRUE; } + /* Maximum sample count. */ + for (i = 16; i > 0; --i) { + if (screen->is_format_supported(screen, PIPE_FORMAT_B8G8R8A8_UNORM, + PIPE_TEXTURE_2D, i, + PIPE_BIND_RENDER_TARGET)) { + ctx->Const.MaxSamples = i; + break; + } + } + if (screen->get_param(screen, PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE)) { ctx->Extensions.ARB_seamless_cube_map = GL_TRUE; ctx->Extensions.AMD_seamless_cubemap_per_texture = GL_TRUE; @@ -607,4 +627,15 @@ void st_init_extensions(struct st_context *st) if (screen->get_param(screen, PIPE_CAP_SM3)) { ctx->Extensions.ARB_shader_texture_lod = GL_TRUE; } + + if (screen->is_format_supported(screen, PIPE_FORMAT_Z32_FLOAT, + PIPE_TEXTURE_2D, 0, + PIPE_BIND_DEPTH_STENCIL | + PIPE_BIND_SAMPLER_VIEW) && + screen->is_format_supported(screen, PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED, + PIPE_TEXTURE_2D, 0, + PIPE_BIND_DEPTH_STENCIL | + PIPE_BIND_SAMPLER_VIEW)) { + ctx->Extensions.ARB_depth_buffer_float = GL_TRUE; + } } diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c index 35835712547..bd4f0860c52 100644 --- a/src/mesa/state_tracker/st_format.c +++ b/src/mesa/state_tracker/st_format.c @@ -68,42 +68,70 @@ GLenum st_format_datatype(enum pipe_format format) { const struct util_format_description *desc; + int i; desc = util_format_description(format); assert(desc); + /* Find the first non-VOID channel. */ + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) { if (format == PIPE_FORMAT_B5G5R5A1_UNORM || format == PIPE_FORMAT_B5G6R5_UNORM) { return GL_UNSIGNED_SHORT; } + else if (format == PIPE_FORMAT_R11G11B10_FLOAT || + format == PIPE_FORMAT_R9G9B9E5_FLOAT) { + return GL_FLOAT; + } else if (format == PIPE_FORMAT_Z24_UNORM_S8_USCALED || format == PIPE_FORMAT_S8_USCALED_Z24_UNORM || format == PIPE_FORMAT_Z24X8_UNORM || format == PIPE_FORMAT_X8Z24_UNORM) { return GL_UNSIGNED_INT_24_8; } + else if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED) { + return GL_FLOAT_32_UNSIGNED_INT_24_8_REV; + } else { const GLuint size = format_max_bits(format); + + assert(i < 4); + if (i == 4) + return GL_NONE; + if (size == 8) { - if (desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) + if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) return GL_UNSIGNED_BYTE; else return GL_BYTE; } else if (size == 16) { - if (desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) + if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) + return GL_HALF_FLOAT; + if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) return GL_UNSIGNED_SHORT; else return GL_SHORT; } - else { - assert( size <= 32 ); - if (desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) + else if (size <= 32) { + if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) + return GL_FLOAT; + if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) return GL_UNSIGNED_INT; else return GL_INT; } + else { + assert(size == 64); + assert(desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT); + return GL_DOUBLE; + } } } else if (format == PIPE_FORMAT_UYVY) { @@ -180,6 +208,10 @@ st_mesa_format_to_pipe_format(gl_format mesaFormat) return PIPE_FORMAT_Z24X8_UNORM; case MESA_FORMAT_S8: return PIPE_FORMAT_S8_USCALED; + case MESA_FORMAT_Z32_FLOAT: + return PIPE_FORMAT_Z32_FLOAT; + case MESA_FORMAT_Z32_FLOAT_X24S8: + return PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED; case MESA_FORMAT_YCBCR: return PIPE_FORMAT_UYVY; #if FEATURE_texture_s3tc @@ -402,6 +434,10 @@ st_pipe_format_to_mesa_format(enum pipe_format format) return MESA_FORMAT_X8_Z24; case PIPE_FORMAT_Z24_UNORM_S8_USCALED: return MESA_FORMAT_S8_Z24; + case PIPE_FORMAT_Z32_FLOAT: + return MESA_FORMAT_Z32_FLOAT; + case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: + return MESA_FORMAT_Z32_FLOAT_X24S8; case PIPE_FORMAT_UYVY: return MESA_FORMAT_YCBCR; @@ -608,7 +644,7 @@ struct format_mapping * Multiple GL enums might map to multiple pipe_formats. * The first pipe format in the list that's supported is the one that's chosen. */ -static struct format_mapping format_map[] = { +static const struct format_mapping format_map[] = { /* Basic RGB, RGBA formats */ { { GL_RGB10, GL_RGB10_A2, 0 }, @@ -616,7 +652,7 @@ static struct format_mapping format_map[] = { }, { { 4, GL_RGBA, GL_RGBA8, 0 }, - { DEFAULT_RGBA_FORMATS, 0 } + { PIPE_FORMAT_R8G8B8A8_UNORM, DEFAULT_RGBA_FORMATS } }, { { GL_BGRA, 0 }, @@ -624,7 +660,7 @@ static struct format_mapping format_map[] = { }, { { 3, GL_RGB, GL_RGB8, 0 }, - { DEFAULT_RGB_FORMATS, 0 } + { DEFAULT_RGB_FORMATS } }, { { GL_RGB12, GL_RGB16, GL_RGBA12, GL_RGBA16, 0 }, @@ -759,6 +795,10 @@ static struct format_mapping format_map[] = { { GL_DEPTH_COMPONENT, 0 }, { DEFAULT_DEPTH_FORMATS } }, + { + { GL_DEPTH_COMPONENT32F, 0 }, + { PIPE_FORMAT_Z32_FLOAT, 0 } + }, /* stencil formats */ { @@ -775,6 +815,10 @@ static struct format_mapping format_map[] = { { GL_DEPTH_STENCIL_EXT, GL_DEPTH24_STENCIL8_EXT, 0 }, { PIPE_FORMAT_Z24_UNORM_S8_USCALED, PIPE_FORMAT_S8_USCALED_Z24_UNORM, 0 } }, + { + { GL_DEPTH32F_STENCIL8, 0 }, + { PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED, 0 } + }, /* sRGB formats */ { @@ -1108,7 +1152,7 @@ static struct format_mapping format_map[] = { * Return first supported format from the given list. */ static enum pipe_format -find_supported_format(struct pipe_screen *screen, +find_supported_format(struct pipe_screen *screen, const enum pipe_format formats[], enum pipe_texture_target target, unsigned sample_count, @@ -1124,6 +1168,91 @@ find_supported_format(struct pipe_screen *screen, return PIPE_FORMAT_NONE; } +struct exact_format_mapping +{ + GLenum format; + GLenum type; + enum pipe_format pformat; +}; + +static const struct exact_format_mapping rgba8888_tbl[] = +{ + { GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, PIPE_FORMAT_A8B8G8R8_UNORM }, + { GL_ABGR_EXT, GL_UNSIGNED_INT_8_8_8_8_REV, PIPE_FORMAT_A8B8G8R8_UNORM }, + { GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, PIPE_FORMAT_R8G8B8A8_UNORM }, + { GL_ABGR_EXT, GL_UNSIGNED_INT_8_8_8_8, PIPE_FORMAT_R8G8B8A8_UNORM }, + { GL_BGRA, GL_UNSIGNED_INT_8_8_8_8, PIPE_FORMAT_A8R8G8B8_UNORM }, + { GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, PIPE_FORMAT_B8G8R8A8_UNORM }, + { GL_RGBA, GL_UNSIGNED_BYTE, PIPE_FORMAT_R8G8B8A8_UNORM }, + { GL_ABGR_EXT, GL_UNSIGNED_BYTE, PIPE_FORMAT_A8B8G8R8_UNORM }, + { GL_BGRA, GL_UNSIGNED_BYTE, PIPE_FORMAT_B8G8R8A8_UNORM }, + { 0, 0, 0 } +}; + +static const struct exact_format_mapping rgbx8888_tbl[] = +{ + { GL_BGRA, GL_UNSIGNED_INT_8_8_8_8, PIPE_FORMAT_X8R8G8B8_UNORM }, + { GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, PIPE_FORMAT_B8G8R8X8_UNORM }, + { GL_BGRA, GL_UNSIGNED_BYTE, PIPE_FORMAT_B8G8R8X8_UNORM }, + /* No Mesa formats for these Gallium formats: + { GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, PIPE_FORMAT_X8B8G8R8_UNORM }, + { GL_ABGR_EXT, GL_UNSIGNED_INT_8_8_8_8_REV, PIPE_FORMAT_X8B8G8R8_UNORM }, + { GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, PIPE_FORMAT_R8G8B8X8_UNORM }, + { GL_ABGR_EXT, GL_UNSIGNED_INT_8_8_8_8, PIPE_FORMAT_R8G8B8X8_UNORM }, + { GL_RGBA, GL_UNSIGNED_BYTE, PIPE_FORMAT_R8G8B8X8_UNORM }, + { GL_ABGR_EXT, GL_UNSIGNED_BYTE, PIPE_FORMAT_X8B8G8R8_UNORM }, + */ + { 0, 0, 0 } +}; + +static const struct exact_format_mapping rgba1010102_tbl[] = +{ + { GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV, PIPE_FORMAT_B10G10R10A2_UNORM }, + /* No Mesa formats for these Gallium formats: + { GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, PIPE_FORMAT_R10G10B10A2_UNORM }, + { GL_ABGR_EXT, GL_UNSIGNED_INT_10_10_10_2, PIPE_FORMAT_R10G10B10A2_UNORM }, + { GL_ABGR_EXT, GL_UNSIGNED_INT, PIPE_FORMAT_R10G10B10A2_UNORM }, + */ + { 0, 0, 0 } +}; + +/** + * If there is an exact pipe_format match for {internalFormat, format, type} + * return that, otherwise return PIPE_FORMAT_NONE so we can do fuzzy matching. + */ +static enum pipe_format +find_exact_format(GLint internalFormat, GLenum format, GLenum type) +{ + uint i; + const struct exact_format_mapping* tbl; + + if (format == GL_NONE || type == GL_NONE) + return PIPE_FORMAT_NONE; + + switch (internalFormat) { + case 4: + case GL_RGBA: + case GL_RGBA8: + tbl = rgba8888_tbl; + break; + case 3: + case GL_RGB: + case GL_RGB8: + tbl = rgbx8888_tbl; + break; + case GL_RGB10_A2: + tbl = rgba1010102_tbl; + break; + default: + return PIPE_FORMAT_NONE; + } + + for (i = 0; tbl[i].format; i++) + if (tbl[i].format == format && tbl[i].type == type) + return tbl[i].pformat; + + return PIPE_FORMAT_NONE; +} /** * Given an OpenGL internalFormat value for a texture or surface, return @@ -1140,11 +1269,13 @@ find_supported_format(struct pipe_screen *screen, */ enum pipe_format st_choose_format(struct pipe_screen *screen, GLenum internalFormat, + GLenum format, GLenum type, enum pipe_texture_target target, unsigned sample_count, unsigned bindings) { GET_CURRENT_CONTEXT(ctx); /* XXX this should be a function parameter */ int i, j; + enum pipe_format pf; /* can't render to compressed formats at this time */ if (_mesa_is_compressed_format(ctx, internalFormat) @@ -1152,6 +1283,13 @@ st_choose_format(struct pipe_screen *screen, GLenum internalFormat, return PIPE_FORMAT_NONE; } + /* search for exact matches */ + pf = find_exact_format(internalFormat, format, type); + if (pf != PIPE_FORMAT_NONE && + screen->is_format_supported(screen, pf, + target, sample_count, bindings)) + return pf; + /* search table for internalFormat */ for (i = 0; i < Elements(format_map); i++) { const struct format_mapping *mapping = &format_map[i]; @@ -1183,14 +1321,11 @@ st_choose_renderbuffer_format(struct pipe_screen *screen, usage = PIPE_BIND_DEPTH_STENCIL; else usage = PIPE_BIND_RENDER_TARGET; - return st_choose_format(screen, internalFormat, PIPE_TEXTURE_2D, + return st_choose_format(screen, internalFormat, GL_NONE, GL_NONE, PIPE_TEXTURE_2D, sample_count, usage); } -/** - * Called via ctx->Driver.chooseTextureFormat(). - */ gl_format st_ChooseTextureFormat_renderable(struct gl_context *ctx, GLint internalFormat, GLenum format, GLenum type, GLboolean renderable) @@ -1206,20 +1341,19 @@ st_ChooseTextureFormat_renderable(struct gl_context *ctx, GLint internalFormat, * that in advance. Specify potential render target flags now. */ bindings = PIPE_BIND_SAMPLER_VIEW; - if (renderable == GL_TRUE) { - if (_mesa_is_depth_format(internalFormat) || - _mesa_is_depth_or_stencil_format(internalFormat)) + if (renderable) { + if (_mesa_is_depth_or_stencil_format(internalFormat)) bindings |= PIPE_BIND_DEPTH_STENCIL; - else + else bindings |= PIPE_BIND_RENDER_TARGET; } - pFormat = st_choose_format(screen, internalFormat, + pFormat = st_choose_format(screen, internalFormat, format, type, PIPE_TEXTURE_2D, 0, bindings); if (pFormat == PIPE_FORMAT_NONE) { /* try choosing format again, this time without render target bindings */ - pFormat = st_choose_format(screen, internalFormat, + pFormat = st_choose_format(screen, internalFormat, format, type, PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW); } @@ -1231,6 +1365,10 @@ st_ChooseTextureFormat_renderable(struct gl_context *ctx, GLint internalFormat, return st_pipe_format_to_mesa_format(pFormat); } + +/** + * Called via ctx->Driver.ChooseTextureFormat(). + */ gl_format st_ChooseTextureFormat(struct gl_context *ctx, GLint internalFormat, GLenum format, GLenum type) diff --git a/src/mesa/state_tracker/st_format.h b/src/mesa/state_tracker/st_format.h index 0fb570f6ee4..1c1f5965f66 100644 --- a/src/mesa/state_tracker/st_format.h +++ b/src/mesa/state_tracker/st_format.h @@ -52,8 +52,9 @@ st_pipe_format_to_mesa_format(enum pipe_format pipeFormat); extern enum pipe_format st_choose_format(struct pipe_screen *screen, GLenum internalFormat, + GLenum format, GLenum type, enum pipe_texture_target target, unsigned sample_count, - unsigned tex_usage); + unsigned bindings); extern enum pipe_format st_choose_renderbuffer_format(struct pipe_screen *screen, diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index b5f4253ea64..9c6a7ed738a 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1505,6 +1505,9 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]); break; } + case ir_unop_i2u: + case ir_unop_u2i: + /* Converting between signed and unsigned integers is a no-op. */ case ir_unop_b2i: /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). */ result_src = op[0]; diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c index a68544ddac7..d5228d387f7 100644 --- a/src/mesa/state_tracker/st_manager.c +++ b/src/mesa/state_tracker/st_manager.c @@ -139,23 +139,64 @@ buffer_index_to_attachment(gl_buffer_index index) } /** + * Make sure a context picks up the latest cached state of the + * drawables it binds to. + */ +static void +st_context_validate(struct st_context *st, + struct st_framebuffer *stdraw, + struct st_framebuffer *stread) +{ + if (stdraw && stdraw->stamp != st->draw_stamp) { + st->dirty.st |= ST_NEW_FRAMEBUFFER; + _mesa_resize_framebuffer(st->ctx, &stdraw->Base, + stdraw->Base.Width, + stdraw->Base.Height); + st->draw_stamp = stdraw->stamp; + } + + if (stread && stread->stamp != st->read_stamp) { + if (stread != stdraw) { + st->dirty.st |= ST_NEW_FRAMEBUFFER; + _mesa_resize_framebuffer(st->ctx, &stread->Base, + stread->Base.Width, + stread->Base.Height); + } + st->read_stamp = stread->stamp; + } +} + +/** * Validate a framebuffer to make sure up-to-date pipe_textures are used. + * The context we need to pass in is s dummy context needed only to be + * able to get a pipe context to create pipe surfaces, and to have a + * context to call _mesa_resize_framebuffer(): + * (That should probably be rethought, since those surfaces become + * drawable state, not context state, and can be freed by another pipe + * context). */ static void -st_framebuffer_validate(struct st_framebuffer *stfb, struct st_context *st) +st_framebuffer_validate(struct st_framebuffer *stfb, + struct st_context *st) { - struct pipe_context *pipe = st->pipe; struct pipe_resource *textures[ST_ATTACHMENT_COUNT]; uint width, height; unsigned i; boolean changed = FALSE; + int32_t new_stamp = p_atomic_read(&stfb->iface->stamp); - if (!p_atomic_read(&stfb->revalidate)) + if (stfb->iface_stamp == new_stamp) return; /* validate the fb */ - if (!stfb->iface->validate(stfb->iface, stfb->statts, stfb->num_statts, textures)) - return; + do { + if (!stfb->iface->validate(stfb->iface, stfb->statts, + stfb->num_statts, textures)) + return; + + stfb->iface_stamp = new_stamp; + new_stamp = p_atomic_read(&stfb->iface->stamp); + } while(stfb->iface_stamp != new_stamp); width = stfb->Base.Width; height = stfb->Base.Height; @@ -184,7 +225,7 @@ st_framebuffer_validate(struct st_framebuffer *stfb, struct st_context *st) memset(&surf_tmpl, 0, sizeof(surf_tmpl)); u_surface_default_template(&surf_tmpl, textures[i], PIPE_BIND_RENDER_TARGET); - ps = pipe->create_surface(pipe, textures[i], &surf_tmpl); + ps = st->pipe->create_surface(st->pipe, textures[i], &surf_tmpl); if (ps) { pipe_surface_reference(&strb->surface, ps); pipe_resource_reference(&strb->texture, ps->texture); @@ -204,14 +245,9 @@ st_framebuffer_validate(struct st_framebuffer *stfb, struct st_context *st) } if (changed) { - st->dirty.st |= ST_NEW_FRAMEBUFFER; + ++stfb->stamp; _mesa_resize_framebuffer(st->ctx, &stfb->Base, width, height); - - assert(stfb->Base.Width == width); - assert(stfb->Base.Height == height); } - - p_atomic_set(&stfb->revalidate, FALSE); } /** @@ -236,8 +272,7 @@ st_framebuffer_update_attachments(struct st_framebuffer *stfb) st_visual_have_buffers(stfb->iface->visual, 1 << statt)) stfb->statts[stfb->num_statts++] = statt; } - - p_atomic_set(&stfb->revalidate, TRUE); + stfb->stamp++; } /** @@ -443,6 +478,7 @@ st_framebuffer_create(struct st_framebuffer_iface *stfbi) &stfb->Base._ColorReadBufferIndex); stfb->iface = stfbi; + stfb->iface_stamp = p_atomic_read(&stfbi->stamp) - 1; /* add the color buffer */ idx = stfb->Base._ColorDrawBufferIndexes[0]; @@ -454,6 +490,7 @@ st_framebuffer_create(struct st_framebuffer_iface *stfbi) st_framebuffer_add_renderbuffer(stfb, BUFFER_DEPTH); st_framebuffer_add_renderbuffer(stfb, BUFFER_ACCUM); + stfb->stamp = 0; st_framebuffer_update_attachments(stfb); stfb->Base.Initialized = GL_TRUE; @@ -473,31 +510,6 @@ st_framebuffer_reference(struct st_framebuffer **ptr, } static void -st_context_notify_invalid_framebuffer(struct st_context_iface *stctxi, - struct st_framebuffer_iface *stfbi) -{ - struct st_context *st = (struct st_context *) stctxi; - struct st_framebuffer *stfb; - - /* either draw or read winsys fb */ - stfb = st_ws_framebuffer(st->ctx->WinSysDrawBuffer); - if (!stfb || stfb->iface != stfbi) - stfb = st_ws_framebuffer(st->ctx->WinSysReadBuffer); - - if (stfb && stfb->iface == stfbi) { - p_atomic_set(&stfb->revalidate, TRUE); - } - else { - /* This function is probably getting called when we've detected a - * change in a window's size but the currently bound context is - * not bound to that window. - * If the st_framebuffer_iface structure had a pointer to the - * corresponding st_framebuffer we'd be able to handle this. - */ - } -} - -static void st_context_flush(struct st_context_iface *stctxi, unsigned flags, struct pipe_fence_handle **fence) { @@ -575,7 +587,7 @@ st_context_teximage(struct st_context_iface *stctxi, internalFormat = GL_RGB; texFormat = st_ChooseTextureFormat(ctx, internalFormat, - GL_RGBA, GL_UNSIGNED_BYTE); + GL_BGRA, GL_UNSIGNED_BYTE); _mesa_init_teximage_fields(ctx, target, texImage, tex->width0, tex->height0, 1, 0, @@ -696,8 +708,6 @@ st_api_create_context(struct st_api *stapi, struct st_manager *smapi, smapi->get_param(smapi, ST_MANAGER_BROKEN_INVALIDATE); st->iface.destroy = st_context_destroy; - st->iface.notify_invalid_framebuffer = - st_context_notify_invalid_framebuffer; st->iface.flush = st_context_flush; st->iface.teximage = st_context_teximage; st->iface.copy = st_context_copy; @@ -707,38 +717,58 @@ st_api_create_context(struct st_api *stapi, struct st_manager *smapi, return &st->iface; } +static struct st_context_iface * +st_api_get_current(struct st_api *stapi) +{ + GET_CURRENT_CONTEXT(ctx); + struct st_context *st = (ctx) ? ctx->st : NULL; + + return (st) ? &st->iface : NULL; +} + +static struct st_framebuffer * +st_framebuffer_reuse_or_create(struct gl_framebuffer *fb, + struct st_framebuffer_iface *stfbi) +{ + struct st_framebuffer *cur = st_ws_framebuffer(fb), *stfb = NULL; + + if (cur && cur->iface == stfbi) { + /* reuse the current stfb */ + st_framebuffer_reference(&stfb, cur); + } + else { + /* create a new one */ + stfb = st_framebuffer_create(stfbi); + } + + return stfb; +} + static boolean st_api_make_current(struct st_api *stapi, struct st_context_iface *stctxi, struct st_framebuffer_iface *stdrawi, struct st_framebuffer_iface *streadi) { struct st_context *st = (struct st_context *) stctxi; - struct st_framebuffer *stdraw, *stread, *stfb; + struct st_framebuffer *stdraw, *stread; boolean ret; _glapi_check_multithread(); if (st) { - /* reuse/create the draw fb */ - stfb = st_ws_framebuffer(st->ctx->WinSysDrawBuffer); - if (stfb && stfb->iface == stdrawi) { - stdraw = NULL; - st_framebuffer_reference(&stdraw, stfb); + /* reuse or create the draw fb */ + stdraw = st_framebuffer_reuse_or_create(st->ctx->WinSysDrawBuffer, + stdrawi); + if (streadi != stdrawi) { + /* do the same for the read fb */ + stread = st_framebuffer_reuse_or_create(st->ctx->WinSysReadBuffer, + streadi); } else { - stdraw = st_framebuffer_create(stdrawi); - } - - /* reuse/create the read fb */ - stfb = st_ws_framebuffer(st->ctx->WinSysReadBuffer); - if (!stfb || stfb->iface != streadi) - stfb = stdraw; - if (stfb && stfb->iface == streadi) { stread = NULL; - st_framebuffer_reference(&stread, stfb); - } - else { - stread = st_framebuffer_create(streadi); + /* reuse the draw fb for the read fb */ + if (stdraw) + st_framebuffer_reference(&stread, stdraw); } if (stdraw && stread) { @@ -757,6 +787,10 @@ st_api_make_current(struct st_api *stapi, struct st_context_iface *stctxi, } ret = _mesa_make_current(st->ctx, &stdraw->Base, &stread->Base); + + st->draw_stamp = stdraw->stamp - 1; + st->read_stamp = stread->stamp - 1; + st_context_validate(st, stdraw, stread); } else { struct gl_framebuffer *incomplete = _mesa_get_incomplete_framebuffer(); @@ -773,15 +807,6 @@ st_api_make_current(struct st_api *stapi, struct st_context_iface *stctxi, return ret; } -static struct st_context_iface * -st_api_get_current(struct st_api *stapi) -{ - GET_CURRENT_CONTEXT(ctx); - struct st_context *st = (ctx) ? ctx->st : NULL; - - return (st) ? &st->iface : NULL; -} - static st_proc_t st_api_get_proc_address(struct st_api *stapi, const char *procname) { @@ -857,6 +882,8 @@ st_manager_validate_framebuffers(struct st_context *st) st_framebuffer_validate(stdraw, st); if (stread && stread != stdraw) st_framebuffer_validate(stread, st); + + st_context_validate(st, stdraw, stread); } /** @@ -891,6 +918,15 @@ st_manager_add_color_renderbuffer(struct st_context *st, return FALSE; st_framebuffer_update_attachments(stfb); + + /* + * Force a call to the state tracker manager to validate the + * new renderbuffer. It might be that there is a window system + * renderbuffer available. + */ + if(stfb->iface) + stfb->iface_stamp = p_atomic_read(&stfb->iface->stamp) - 1; + st_invalidate_state(st->ctx, _NEW_BUFFERS); return TRUE; diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 6d395128295..ca01d2e1976 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -613,7 +613,6 @@ st_translate_fragment_program(struct st_context *st, if (!stfp->tgsi.tokens) { /* need to translate Mesa instructions to TGSI now */ - enum pipe_error error; struct ureg_program *ureg; GLboolean write_all = st_prepare_fragment_program(st->ctx, stfp); @@ -633,38 +632,38 @@ st_translate_fragment_program(struct st_context *st, ureg_property_fs_color0_writes_all_cbufs(ureg, 1); if (stfp->glsl_to_tgsi) - error = st_translate_program(st->ctx, - TGSI_PROCESSOR_FRAGMENT, - ureg, - stfp->glsl_to_tgsi, - &stfp->Base.Base, - /* inputs */ - stfp->num_inputs, - stfp->input_to_index, - stfp->input_semantic_name, - stfp->input_semantic_index, - stfp->interp_mode, - /* outputs */ - stfp->num_outputs, - stfp->result_to_output, - stfp->output_semantic_name, - stfp->output_semantic_index, FALSE ); + st_translate_program(st->ctx, + TGSI_PROCESSOR_FRAGMENT, + ureg, + stfp->glsl_to_tgsi, + &stfp->Base.Base, + /* inputs */ + stfp->num_inputs, + stfp->input_to_index, + stfp->input_semantic_name, + stfp->input_semantic_index, + stfp->interp_mode, + /* outputs */ + stfp->num_outputs, + stfp->result_to_output, + stfp->output_semantic_name, + stfp->output_semantic_index, FALSE ); else - error = st_translate_mesa_program(st->ctx, - TGSI_PROCESSOR_FRAGMENT, - ureg, - &stfp->Base.Base, - /* inputs */ - stfp->num_inputs, - stfp->input_to_index, - stfp->input_semantic_name, - stfp->input_semantic_index, - stfp->interp_mode, - /* outputs */ - stfp->num_outputs, - stfp->result_to_output, - stfp->output_semantic_name, - stfp->output_semantic_index, FALSE ); + st_translate_mesa_program(st->ctx, + TGSI_PROCESSOR_FRAGMENT, + ureg, + &stfp->Base.Base, + /* inputs */ + stfp->num_inputs, + stfp->input_to_index, + stfp->input_semantic_name, + stfp->input_semantic_index, + stfp->interp_mode, + /* outputs */ + stfp->num_outputs, + stfp->result_to_output, + stfp->output_semantic_name, + stfp->output_semantic_index, FALSE ); stfp->tgsi.tokens = ureg_get_tokens( ureg, NULL ); ureg_destroy( ureg ); @@ -731,7 +730,6 @@ st_translate_geometry_program(struct st_context *st, GLuint inputMapping[GEOM_ATTRIB_MAX]; GLuint outputMapping[GEOM_RESULT_MAX]; struct pipe_context *pipe = st->pipe; - enum pipe_error error; GLuint attr; const GLbitfield inputsRead = stgp->Base.Base.InputsRead; GLuint vslot = 0; @@ -938,22 +936,22 @@ st_translate_geometry_program(struct st_context *st, ureg_property_gs_output_prim(ureg, stgp->Base.OutputType); ureg_property_gs_max_vertices(ureg, stgp->Base.VerticesOut); - error = st_translate_mesa_program(st->ctx, - TGSI_PROCESSOR_GEOMETRY, - ureg, - &stgp->Base.Base, - /* inputs */ - gs_num_inputs, - inputMapping, - stgp->input_semantic_name, - stgp->input_semantic_index, - NULL, - /* outputs */ - gs_num_outputs, - outputMapping, - gs_output_semantic_name, - gs_output_semantic_index, - FALSE); + st_translate_mesa_program(st->ctx, + TGSI_PROCESSOR_GEOMETRY, + ureg, + &stgp->Base.Base, + /* inputs */ + gs_num_inputs, + inputMapping, + stgp->input_semantic_name, + stgp->input_semantic_index, + NULL, + /* outputs */ + gs_num_outputs, + outputMapping, + gs_output_semantic_name, + gs_output_semantic_index, + FALSE); stgp->num_inputs = gs_num_inputs; stgp->tgsi.tokens = ureg_get_tokens( ureg, NULL ); diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index d8ba3ac9252..0e857fddcdd 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -407,7 +407,7 @@ st_create_color_map_texture(struct gl_context *ctx) const uint texSize = 256; /* simple, and usually perfect */ /* find an RGBA texture format */ - format = st_choose_format(pipe->screen, GL_RGBA, + format = st_choose_format(pipe->screen, GL_RGBA, GL_NONE, GL_NONE, PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW); /* create texture for color map/table */ diff --git a/src/mesa/swrast/s_blit.c b/src/mesa/swrast/s_blit.c index 3516a41bf41..7f53f19eb62 100644 --- a/src/mesa/swrast/s_blit.c +++ b/src/mesa/swrast/s_blit.c @@ -568,9 +568,6 @@ _swrast_BlitFramebuffer(struct gl_context *ctx, }; GLint i; - if (!ctx->DrawBuffer->_NumColorDrawBuffers) - return; - if (!_mesa_clip_blit(ctx, &srcX0, &srcY0, &srcX1, &srcY1, &dstX0, &dstY0, &dstX1, &dstY1)) { return; diff --git a/src/mesa/swrast/s_readpix.c b/src/mesa/swrast/s_readpix.c index 214f2ea1aaa..66ca39293a6 100644 --- a/src/mesa/swrast/s_readpix.c +++ b/src/mesa/swrast/s_readpix.c @@ -446,7 +446,7 @@ read_depth_stencil_pixels(struct gl_context *ctx, GLfloat depthVals[MAX_WIDTH]; _swrast_read_depth_span_float(ctx, depthRb, width, x, y + i, depthVals); - _mesa_pack_depth_stencil_span(ctx, width, depthStencilDst, + _mesa_pack_depth_stencil_span(ctx, width, type, depthStencilDst, depthVals, stencilVals, packing); } } diff --git a/src/mesa/swrast/s_span.c b/src/mesa/swrast/s_span.c index f81de3c0c7b..db102ac7946 100644 --- a/src/mesa/swrast/s_span.c +++ b/src/mesa/swrast/s_span.c @@ -961,17 +961,6 @@ convert_color_type(SWspan *span, GLenum newType, GLuint output) static INLINE void shade_texture_span(struct gl_context *ctx, SWspan *span) { - GLbitfield inputsRead; - - /* Determine which fragment attributes are actually needed */ - if (ctx->FragmentProgram._Current) { - inputsRead = ctx->FragmentProgram._Current->Base.InputsRead; - } - else { - /* XXX we could be a bit smarter about this */ - inputsRead = ~0; - } - if (ctx->FragmentProgram._Current || ctx->ATIFragmentShader._Enabled) { /* programmable shading */ diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index 7959337decb..b908d5aea7e 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -972,13 +972,13 @@ static void GLAPIENTRY vbo_exec_DrawRangeElements(GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const GLvoid *indices) { - GET_CURRENT_CONTEXT(ctx); - - if (MESA_VERBOSE & VERBOSE_DRAW) + if (MESA_VERBOSE & VERBOSE_DRAW) { + GET_CURRENT_CONTEXT(ctx); _mesa_debug(ctx, "glDrawRangeElements(%s, %u, %u, %d, %s, %p)\n", _mesa_lookup_enum_by_nr(mode), start, end, count, _mesa_lookup_enum_by_nr(type), indices); + } vbo_exec_DrawRangeElementsBaseVertex(mode, start, end, count, type, indices, 0); diff --git a/src/mesa/vbo/vbo_rebase.c b/src/mesa/vbo/vbo_rebase.c index 9068ae240a6..1de290ff602 100644 --- a/src/mesa/vbo/vbo_rebase.c +++ b/src/mesa/vbo/vbo_rebase.c @@ -78,8 +78,7 @@ GLboolean vbo_all_varyings_in_vbos( const struct gl_client_array *arrays[] ) GLuint i; for (i = 0; i < VERT_ATTRIB_MAX; i++) - if (arrays[i]->StrideB && - arrays[i]->BufferObj->Name == 0) + if (arrays[i]->BufferObj->Name == 0) return GL_FALSE; return GL_TRUE; @@ -90,8 +89,7 @@ GLboolean vbo_any_varyings_in_vbos( const struct gl_client_array *arrays[] ) GLuint i; for (i = 0; i < VERT_ATTRIB_MAX; i++) - if (arrays[i]->StrideB && - arrays[i]->BufferObj->Name != 0) + if (arrays[i]->BufferObj->Name != 0) return GL_TRUE; return GL_FALSE; |