diff options
Diffstat (limited to 'src/mesa/drivers')
31 files changed, 708 insertions, 515 deletions
diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index 329e48f46f5..ab78f4565da 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -48,6 +48,7 @@ #include "main/feedback.h" #include "main/formats.h" #include "main/format_unpack.h" +#include "main/framebuffer.h" #include "main/glformats.h" #include "main/image.h" #include "main/macros.h" @@ -104,42 +105,20 @@ static void meta_drawpix_cleanup(struct gl_context *ctx, struct drawpix_state *drawpix); void -_mesa_meta_bind_fbo_image(GLenum fboTarget, GLenum attachment, - struct gl_texture_image *texImage, GLuint layer) +_mesa_meta_framebuffer_texture_image(struct gl_context *ctx, + struct gl_framebuffer *fb, + GLenum attachment, + struct gl_texture_image *texImage, + GLuint layer) { struct gl_texture_object *texObj = texImage->TexObject; int level = texImage->Level; - GLenum texTarget = texObj->Target; + const GLenum texTarget = texObj->Target == GL_TEXTURE_CUBE_MAP + ? GL_TEXTURE_CUBE_MAP_POSITIVE_X + texImage->Face + : texObj->Target; - switch (texTarget) { - case GL_TEXTURE_1D: - _mesa_FramebufferTexture1D(fboTarget, - attachment, - texTarget, - texObj->Name, - level); - break; - case GL_TEXTURE_1D_ARRAY: - case GL_TEXTURE_2D_ARRAY: - case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: - case GL_TEXTURE_CUBE_MAP_ARRAY: - case GL_TEXTURE_3D: - _mesa_FramebufferTextureLayer(fboTarget, - attachment, - texObj->Name, - level, - layer); - break; - default: /* 2D / cube */ - if (texTarget == GL_TEXTURE_CUBE_MAP) - texTarget = GL_TEXTURE_CUBE_MAP_POSITIVE_X + texImage->Face; - - _mesa_FramebufferTexture2D(fboTarget, - attachment, - texTarget, - texObj->Name, - level); - } + _mesa_framebuffer_texture(ctx, fb, attachment, texObj, texTarget, + level, layer, false, __func__); } GLuint @@ -847,8 +826,8 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) if (ctx->RasterDiscard) _mesa_set_enable(ctx, GL_RASTERIZER_DISCARD, GL_FALSE); - save->DrawBufferName = ctx->DrawBuffer->Name; - save->ReadBufferName = ctx->ReadBuffer->Name; + _mesa_reference_framebuffer(&save->DrawBuffer, ctx->DrawBuffer); + _mesa_reference_framebuffer(&save->ReadBuffer, ctx->ReadBuffer); } } @@ -1234,11 +1213,9 @@ _mesa_meta_end(struct gl_context *ctx) if (save->TransformFeedbackNeedsResume) _mesa_ResumeTransformFeedback(); - if (ctx->DrawBuffer->Name != save->DrawBufferName) - _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, save->DrawBufferName); - - if (ctx->ReadBuffer->Name != save->ReadBufferName) - _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, save->ReadBufferName); + _mesa_bind_framebuffers(ctx, save->DrawBuffer, save->ReadBuffer); + _mesa_reference_framebuffer(&save->DrawBuffer, NULL); + _mesa_reference_framebuffer(&save->ReadBuffer, NULL); if (state & MESA_META_DRAW_BUFFERS) { _mesa_drawbuffers(ctx, ctx->DrawBuffer, ctx->Const.MaxDrawBuffers, @@ -2807,7 +2784,7 @@ copytexsubimage_using_blit_framebuffer(struct gl_context *ctx, GLuint dims, GLint x, GLint y, GLsizei width, GLsizei height) { - GLuint fbo; + struct gl_framebuffer *drawFb; bool success = false; GLbitfield mask; GLenum status; @@ -2815,32 +2792,37 @@ copytexsubimage_using_blit_framebuffer(struct gl_context *ctx, GLuint dims, if (!ctx->Extensions.ARB_framebuffer_object) return false; - _mesa_meta_begin(ctx, MESA_META_ALL & ~MESA_META_DRAW_BUFFERS); + drawFb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + if (drawFb == NULL) + return false; - _mesa_GenFramebuffers(1, &fbo); - _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo); + _mesa_meta_begin(ctx, MESA_META_ALL & ~MESA_META_DRAW_BUFFERS); + _mesa_bind_framebuffers(ctx, drawFb, ctx->ReadBuffer); if (rb->_BaseFormat == GL_DEPTH_STENCIL || rb->_BaseFormat == GL_DEPTH_COMPONENT) { - _mesa_meta_bind_fbo_image(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, - texImage, zoffset); + _mesa_meta_framebuffer_texture_image(ctx, ctx->DrawBuffer, + GL_DEPTH_ATTACHMENT, + texImage, zoffset); mask = GL_DEPTH_BUFFER_BIT; if (rb->_BaseFormat == GL_DEPTH_STENCIL && texImage->_BaseFormat == GL_DEPTH_STENCIL) { - _mesa_meta_bind_fbo_image(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, - texImage, zoffset); + _mesa_meta_framebuffer_texture_image(ctx, ctx->DrawBuffer, + GL_STENCIL_ATTACHMENT, + texImage, zoffset); mask |= GL_STENCIL_BUFFER_BIT; } _mesa_DrawBuffer(GL_NONE); } else { - _mesa_meta_bind_fbo_image(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - texImage, zoffset); + _mesa_meta_framebuffer_texture_image(ctx, ctx->DrawBuffer, + GL_COLOR_ATTACHMENT0, + texImage, zoffset); mask = GL_COLOR_BUFFER_BIT; _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0); } - status = _mesa_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER); + status = _mesa_check_framebuffer_status(ctx, ctx->DrawBuffer); if (status != GL_FRAMEBUFFER_COMPLETE) goto out; @@ -2866,7 +2848,7 @@ copytexsubimage_using_blit_framebuffer(struct gl_context *ctx, GLuint dims, success = mask == 0x0; out: - _mesa_DeleteFramebuffers(1, &fbo); + _mesa_reference_framebuffer(&drawFb, NULL); _mesa_meta_end(ctx); return success; } @@ -2961,8 +2943,8 @@ _mesa_meta_CopyTexSubImage(struct gl_context *ctx, GLuint dims, static void meta_decompress_fbo_cleanup(struct decompress_fbo_state *decompress_fbo) { - if (decompress_fbo->FBO != 0) { - _mesa_DeleteFramebuffers(1, &decompress_fbo->FBO); + if (decompress_fbo->fb != NULL) { + _mesa_reference_framebuffer(&decompress_fbo->fb, NULL); _mesa_reference_renderbuffer(&decompress_fbo->rb, NULL); } @@ -3065,7 +3047,7 @@ decompress_texture_image(struct gl_context *ctx, ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler); /* Create/bind FBO/renderbuffer */ - if (decompress_fbo->FBO == 0) { + if (decompress_fbo->fb == NULL) { decompress_fbo->rb = ctx->Driver.NewRenderbuffer(ctx, 0xDEADBEEF); if (decompress_fbo->rb == NULL) { _mesa_meta_end(ctx); @@ -3074,20 +3056,25 @@ decompress_texture_image(struct gl_context *ctx, decompress_fbo->rb->RefCount = 1; - _mesa_GenFramebuffers(1, &decompress_fbo->FBO); - _mesa_BindFramebuffer(GL_FRAMEBUFFER_EXT, decompress_fbo->FBO); + decompress_fbo->fb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + if (decompress_fbo->fb == NULL) { + _mesa_meta_end(ctx); + return false; + } + + _mesa_bind_framebuffers(ctx, decompress_fbo->fb, decompress_fbo->fb); _mesa_framebuffer_renderbuffer(ctx, ctx->DrawBuffer, GL_COLOR_ATTACHMENT0, decompress_fbo->rb); } else { - _mesa_BindFramebuffer(GL_FRAMEBUFFER_EXT, decompress_fbo->FBO); + _mesa_bind_framebuffers(ctx, decompress_fbo->fb, decompress_fbo->fb); } /* alloc dest surface */ if (width > decompress_fbo->Width || height > decompress_fbo->Height) { _mesa_renderbuffer_storage(ctx, decompress_fbo->rb, rbFormat, width, height, 0); - status = _mesa_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER); + status = _mesa_check_framebuffer_status(ctx, ctx->DrawBuffer); if (status != GL_FRAMEBUFFER_COMPLETE) { /* If the framebuffer isn't complete then we'll leave * decompress_fbo->Width as zero so that it will fail again next time @@ -3434,10 +3421,11 @@ cleartexsubimage_color(struct gl_context *ctx, GLenum datatype; GLenum status; - _mesa_meta_bind_fbo_image(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - texImage, zoffset); + _mesa_meta_framebuffer_texture_image(ctx, ctx->DrawBuffer, + GL_COLOR_ATTACHMENT0, + texImage, zoffset); - status = _mesa_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER); + status = _mesa_check_framebuffer_status(ctx, ctx->DrawBuffer); if (status != GL_FRAMEBUFFER_COMPLETE) return false; @@ -3481,14 +3469,16 @@ cleartexsubimage_depth_stencil(struct gl_context *ctx, GLfloat depthValue; GLenum status; - _mesa_meta_bind_fbo_image(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, - texImage, zoffset); + _mesa_meta_framebuffer_texture_image(ctx, ctx->DrawBuffer, + GL_DEPTH_ATTACHMENT, + texImage, zoffset); if (texImage->_BaseFormat == GL_DEPTH_STENCIL) - _mesa_meta_bind_fbo_image(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, - texImage, zoffset); + _mesa_meta_framebuffer_texture_image(ctx, ctx->DrawBuffer, + GL_STENCIL_ATTACHMENT, + texImage, zoffset); - status = _mesa_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER); + status = _mesa_check_framebuffer_status(ctx, ctx->DrawBuffer); if (status != GL_FRAMEBUFFER_COMPLETE) return false; @@ -3526,11 +3516,14 @@ cleartexsubimage_for_zoffset(struct gl_context *ctx, GLint zoffset, const GLvoid *clearValue) { - GLuint fbo; + struct gl_framebuffer *drawFb; bool success; - _mesa_GenFramebuffers(1, &fbo); - _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo); + drawFb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + if (drawFb == NULL) + return false; + + _mesa_bind_framebuffers(ctx, drawFb, ctx->ReadBuffer); switch(texImage->_BaseFormat) { case GL_DEPTH_STENCIL: @@ -3543,7 +3536,7 @@ cleartexsubimage_for_zoffset(struct gl_context *ctx, break; } - _mesa_DeleteFramebuffers(1, &fbo); + _mesa_reference_framebuffer(&drawFb, NULL); return success; } diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h index 7a120b6c44b..60ae5f7577f 100644 --- a/src/mesa/drivers/common/meta.h +++ b/src/mesa/drivers/common/meta.h @@ -186,7 +186,8 @@ struct save_state GLboolean RasterDiscard; GLboolean TransformFeedbackNeedsResume; - GLuint DrawBufferName, ReadBufferName; + struct gl_framebuffer *DrawBuffer; + struct gl_framebuffer *ReadBuffer; /** MESA_META_DRAW_BUFFERS */ GLenum ColorDrawBuffers[MAX_DRAW_BUFFERS]; @@ -368,7 +369,7 @@ struct gen_mipmap_state { GLuint VAO; struct gl_buffer_object *buf_obj; - GLuint FBO; + struct gl_framebuffer *fb; struct gl_sampler_object *samp_obj; struct blit_shader_table shaders; @@ -381,7 +382,7 @@ struct gen_mipmap_state struct decompress_fbo_state { struct gl_renderbuffer *rb; - GLuint FBO; + struct gl_framebuffer *fb; GLint Width, Height; }; @@ -661,7 +662,10 @@ _mesa_meta_glsl_generate_mipmap_cleanup(struct gl_context *ctx, struct gen_mipmap_state *mipmap); void -_mesa_meta_bind_fbo_image(GLenum target, GLenum attachment, - struct gl_texture_image *texImage, GLuint layer); +_mesa_meta_framebuffer_texture_image(struct gl_context *ctx, + struct gl_framebuffer *fb, + GLenum attachment, + struct gl_texture_image *texImage, + GLuint layer); #endif /* META_H */ diff --git a/src/mesa/drivers/common/meta_copy_image.c b/src/mesa/drivers/common/meta_copy_image.c index 57c3f686b0c..18b9681b710 100644 --- a/src/mesa/drivers/common/meta_copy_image.c +++ b/src/mesa/drivers/common/meta_copy_image.c @@ -30,6 +30,7 @@ #include "teximage.h" #include "texobj.h" #include "fbobject.h" +#include "framebuffer.h" #include "buffers.h" #include "state.h" #include "mtypes.h" @@ -166,7 +167,8 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx, GLint src_internal_format, dst_internal_format; GLuint src_view_texture = 0; struct gl_texture_image *src_view_tex_image; - GLuint fbos[2]; + struct gl_framebuffer *readFb; + struct gl_framebuffer *drawFb; bool success = false; GLbitfield mask; GLenum status, attachment; @@ -210,9 +212,15 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx, /* We really only need to stash the bound framebuffers and scissor. */ _mesa_meta_begin(ctx, MESA_META_SCISSOR); - _mesa_GenFramebuffers(2, fbos); - _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, fbos[0]); - _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbos[1]); + readFb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + if (readFb == NULL) + goto meta_end; + + drawFb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + if (drawFb == NULL) + goto meta_end; + + _mesa_bind_framebuffers(ctx, drawFb, readFb); switch (_mesa_get_format_base_format(src_format)) { case GL_DEPTH_COMPONENT: @@ -238,14 +246,14 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx, /* Prefer the tex image because, even if we have a renderbuffer, we may * have had to wrap it in a texture view. */ - _mesa_meta_bind_fbo_image(GL_READ_FRAMEBUFFER, attachment, - src_view_tex_image, src_z); + _mesa_meta_framebuffer_texture_image(ctx, ctx->ReadBuffer, attachment, + src_view_tex_image, src_z); } else { _mesa_framebuffer_renderbuffer(ctx, ctx->ReadBuffer, attachment, src_renderbuffer); } - status = _mesa_CheckFramebufferStatus(GL_READ_FRAMEBUFFER); + status = _mesa_check_framebuffer_status(ctx, ctx->ReadBuffer); if (status != GL_FRAMEBUFFER_COMPLETE) goto meta_end; @@ -253,11 +261,11 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx, _mesa_framebuffer_renderbuffer(ctx, ctx->DrawBuffer, attachment, dst_renderbuffer); } else { - _mesa_meta_bind_fbo_image(GL_DRAW_FRAMEBUFFER, attachment, - dst_tex_image, dst_z); + _mesa_meta_framebuffer_texture_image(ctx, ctx->DrawBuffer, attachment, + dst_tex_image, dst_z); } - status = _mesa_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER); + status = _mesa_check_framebuffer_status(ctx, ctx->DrawBuffer); if (status != GL_FRAMEBUFFER_COMPLETE) goto meta_end; @@ -281,7 +289,8 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx, success = true; meta_end: - _mesa_DeleteFramebuffers(2, fbos); + _mesa_reference_framebuffer(&readFb, NULL); + _mesa_reference_framebuffer(&drawFb, NULL); _mesa_meta_end(ctx); cleanup: diff --git a/src/mesa/drivers/common/meta_generate_mipmap.c b/src/mesa/drivers/common/meta_generate_mipmap.c index 27435b2b722..892d8d34619 100644 --- a/src/mesa/drivers/common/meta_generate_mipmap.c +++ b/src/mesa/drivers/common/meta_generate_mipmap.c @@ -35,6 +35,7 @@ #include "main/enums.h" #include "main/enable.h" #include "main/fbobject.h" +#include "main/framebuffer.h" #include "main/macros.h" #include "main/mipmap.h" #include "main/teximage.h" @@ -56,21 +57,11 @@ static bool fallback_required(struct gl_context *ctx, GLenum target, struct gl_texture_object *texObj) { - const GLuint fboSave = ctx->DrawBuffer->Name; struct gen_mipmap_state *mipmap = &ctx->Meta->Mipmap; struct gl_texture_image *baseImage; GLuint srcLevel; GLenum status; - /* GL_DRAW_FRAMEBUFFER does not exist in OpenGL ES 1.x, and since - * _mesa_meta_begin hasn't been called yet, we have to work-around API - * difficulties. The whole reason that GL_DRAW_FRAMEBUFFER is used instead - * of GL_FRAMEBUFFER is that the read framebuffer may be different. This - * is moot in OpenGL ES 1.x. - */ - const GLenum fbo_target = ctx->API == API_OPENGLES - ? GL_FRAMEBUFFER : GL_DRAW_FRAMEBUFFER; - /* check for fallbacks */ if (target == GL_TEXTURE_3D) { _mesa_perf_debug(ctx, MESA_DEBUG_SEVERITY_HIGH, @@ -109,16 +100,19 @@ fallback_required(struct gl_context *ctx, GLenum target, /* * Test that we can actually render in the texture's format. */ - if (!mipmap->FBO) - _mesa_GenFramebuffers(1, &mipmap->FBO); - _mesa_BindFramebuffer(fbo_target, mipmap->FBO); - - _mesa_meta_bind_fbo_image(fbo_target, GL_COLOR_ATTACHMENT0, baseImage, 0); - - status = _mesa_CheckFramebufferStatus(fbo_target); + if (mipmap->fb == NULL) { + mipmap->fb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + if (mipmap->fb == NULL) { + _mesa_perf_debug(ctx, MESA_DEBUG_SEVERITY_HIGH, + "glGenerateMipmap() ran out of memory\n"); + return true; + } + } - _mesa_BindFramebuffer(fbo_target, fboSave); + _mesa_meta_framebuffer_texture_image(ctx, mipmap->fb, + GL_COLOR_ATTACHMENT0, baseImage, 0); + status = _mesa_check_framebuffer_status(ctx, mipmap->fb); if (status != GL_FRAMEBUFFER_COMPLETE_EXT) { _mesa_perf_debug(ctx, MESA_DEBUG_SEVERITY_HIGH, "glGenerateMipmap() got incomplete FBO\n"); @@ -138,11 +132,7 @@ _mesa_meta_glsl_generate_mipmap_cleanup(struct gl_context *ctx, mipmap->VAO = 0; _mesa_reference_buffer_object(ctx, &mipmap->buf_obj, NULL); _mesa_reference_sampler_object(ctx, &mipmap->samp_obj, NULL); - - if (mipmap->FBO != 0) { - _mesa_DeleteFramebuffers(1, &mipmap->FBO); - mipmap->FBO = 0; - } + _mesa_reference_framebuffer(&mipmap->fb, NULL); _mesa_meta_blit_shader_table_cleanup(&mipmap->shaders); } @@ -251,8 +241,8 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target, _mesa_bind_sampler(ctx, ctx->Texture.CurrentUnit, mipmap->samp_obj); - assert(mipmap->FBO != 0); - _mesa_BindFramebuffer(GL_FRAMEBUFFER_EXT, mipmap->FBO); + assert(mipmap->fb != NULL); + _mesa_bind_framebuffers(ctx, mipmap->fb, mipmap->fb); _mesa_texture_parameteriv(ctx, texObj, GL_GENERATE_MIPMAP, &always_false, false); @@ -354,10 +344,12 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target, _mesa_buffer_data(ctx, mipmap->buf_obj, GL_NONE, sizeof(verts), verts, GL_DYNAMIC_DRAW, __func__); - _mesa_meta_bind_fbo_image(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, dstImage, layer); + _mesa_meta_framebuffer_texture_image(ctx, ctx->DrawBuffer, + GL_COLOR_ATTACHMENT0, dstImage, + layer); /* sanity check */ - if (_mesa_CheckFramebufferStatus(GL_FRAMEBUFFER) != + if (_mesa_check_framebuffer_status(ctx, ctx->DrawBuffer) != GL_FRAMEBUFFER_COMPLETE) { _mesa_problem(ctx, "Unexpected incomplete framebuffer in " "_mesa_meta_GenerateMipmap()"); diff --git a/src/mesa/drivers/common/meta_tex_subimage.c b/src/mesa/drivers/common/meta_tex_subimage.c index 4adaad7777b..639d3236359 100644 --- a/src/mesa/drivers/common/meta_tex_subimage.c +++ b/src/mesa/drivers/common/meta_tex_subimage.c @@ -30,6 +30,7 @@ #include "buffers.h" #include "clear.h" #include "fbobject.h" +#include "framebuffer.h" #include "glformats.h" #include "glheader.h" #include "image.h" @@ -178,7 +179,9 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims, const struct gl_pixelstore_attrib *packing) { struct gl_buffer_object *pbo = NULL; - GLuint pbo_tex = 0, fbos[2] = { 0, 0 }; + GLuint pbo_tex = 0; + struct gl_framebuffer *readFb = NULL; + struct gl_framebuffer *drawFb = NULL; int image_height; struct gl_texture_image *pbo_tex_image; GLenum status; @@ -225,9 +228,15 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims, _mesa_meta_begin(ctx, ~(MESA_META_PIXEL_TRANSFER | MESA_META_PIXEL_STORE)); - _mesa_GenFramebuffers(2, fbos); - _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, fbos[0]); - _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbos[1]); + readFb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + if (readFb == NULL) + goto fail; + + drawFb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + if (drawFb == NULL) + goto fail; + + _mesa_bind_framebuffers(ctx, drawFb, tex_image ? readFb : ctx->ReadBuffer); if (tex_image->TexObject->Target == GL_TEXTURE_1D_ARRAY) { assert(depth == 1); @@ -239,17 +248,19 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims, yoffset = 0; } - _mesa_meta_bind_fbo_image(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - pbo_tex_image, 0); + _mesa_meta_framebuffer_texture_image(ctx, ctx->ReadBuffer, + GL_COLOR_ATTACHMENT0, + pbo_tex_image, 0); /* If this passes on the first layer it should pass on the others */ - status = _mesa_CheckFramebufferStatus(GL_READ_FRAMEBUFFER); + status = _mesa_check_framebuffer_status(ctx, ctx->ReadBuffer); if (status != GL_FRAMEBUFFER_COMPLETE) goto fail; - _mesa_meta_bind_fbo_image(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - tex_image, zoffset); + _mesa_meta_framebuffer_texture_image(ctx, ctx->DrawBuffer, + GL_COLOR_ATTACHMENT0, + tex_image, zoffset); /* If this passes on the first layer it should pass on the others */ - status = _mesa_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER); + status = _mesa_check_framebuffer_status(ctx, ctx->DrawBuffer); if (status != GL_FRAMEBUFFER_COMPLETE) goto fail; @@ -263,8 +274,9 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims, goto fail; for (z = 1; z < depth; z++) { - _mesa_meta_bind_fbo_image(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - tex_image, zoffset + z); + _mesa_meta_framebuffer_texture_image(ctx, ctx->DrawBuffer, + GL_COLOR_ATTACHMENT0, + tex_image, zoffset + z); _mesa_update_state(ctx); @@ -279,7 +291,8 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims, success = true; fail: - _mesa_DeleteFramebuffers(2, fbos); + _mesa_reference_framebuffer(&readFb, NULL); + _mesa_reference_framebuffer(&drawFb, NULL); _mesa_DeleteTextures(1, &pbo_tex); _mesa_reference_buffer_object(ctx, &pbo, NULL); @@ -297,7 +310,9 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims, const struct gl_pixelstore_attrib *packing) { struct gl_buffer_object *pbo = NULL; - GLuint pbo_tex = 0, fbos[2] = { 0, 0 }; + GLuint pbo_tex = 0; + struct gl_framebuffer *readFb; + struct gl_framebuffer *drawFb; int image_height; struct gl_texture_image *pbo_tex_image; struct gl_renderbuffer *rb = NULL; @@ -360,7 +375,13 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims, if (ctx->Extensions.ARB_color_buffer_float) _mesa_ClampColor(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE); - _mesa_GenFramebuffers(2, fbos); + readFb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + if (readFb == NULL) + goto fail; + + drawFb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + if (drawFb == NULL) + goto fail; if (tex_image && tex_image->TexObject->Target == GL_TEXTURE_1D_ARRAY) { assert(depth == 1); @@ -376,23 +397,24 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims, * we're doing a ReadPixels and we should just use whatever framebuffer * the client has bound. */ + _mesa_bind_framebuffers(ctx, drawFb, tex_image ? readFb : ctx->ReadBuffer); if (tex_image) { - _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, fbos[0]); - _mesa_meta_bind_fbo_image(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - tex_image, zoffset); + _mesa_meta_framebuffer_texture_image(ctx, ctx->ReadBuffer, + GL_COLOR_ATTACHMENT0, + tex_image, zoffset); /* If this passes on the first layer it should pass on the others */ - status = _mesa_CheckFramebufferStatus(GL_READ_FRAMEBUFFER); + status = _mesa_check_framebuffer_status(ctx, ctx->ReadBuffer); if (status != GL_FRAMEBUFFER_COMPLETE) goto fail; } else { assert(depth == 1); } - _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbos[1]); - _mesa_meta_bind_fbo_image(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - pbo_tex_image, 0); + _mesa_meta_framebuffer_texture_image(ctx, ctx->DrawBuffer, + GL_COLOR_ATTACHMENT0, + pbo_tex_image, 0); /* If this passes on the first layer it should pass on the others */ - status = _mesa_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER); + status = _mesa_check_framebuffer_status(ctx, ctx->DrawBuffer); if (status != GL_FRAMEBUFFER_COMPLETE) goto fail; @@ -427,8 +449,9 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims, } for (z = 1; z < depth; z++) { - _mesa_meta_bind_fbo_image(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - tex_image, zoffset + z); + _mesa_meta_framebuffer_texture_image(ctx, ctx->ReadBuffer, + GL_COLOR_ATTACHMENT0, + tex_image, zoffset + z); _mesa_update_state(ctx); @@ -452,7 +475,8 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims, success = true; fail: - _mesa_DeleteFramebuffers(2, fbos); + _mesa_reference_framebuffer(&drawFb, NULL); + _mesa_reference_framebuffer(&readFb, NULL); _mesa_DeleteTextures(1, &pbo_tex); _mesa_reference_buffer_object(ctx, &pbo, NULL); diff --git a/src/mesa/drivers/dri/Android.mk b/src/mesa/drivers/dri/Android.mk index 42bfd77d467..7e7587b0753 100644 --- a/src/mesa/drivers/dri/Android.mk +++ b/src/mesa/drivers/dri/Android.mk @@ -28,9 +28,6 @@ include $(LOCAL_PATH)/common/Makefile.sources #----------------------------------------------- # Variables common to all DRI drivers -MESA_DRI_MODULE_PATH := $(TARGET_OUT_SHARED_LIBRARIES)/dri -MESA_DRI_MODULE_UNSTRIPPED_PATH := $(TARGET_OUT_SHARED_LIBRARIES_UNSTRIPPED)/dri - MESA_DRI_CFLAGS := \ -DHAVE_ANDROID_PLATFORM @@ -42,6 +39,8 @@ MESA_DRI_C_INCLUDES := \ MESA_DRI_WHOLE_STATIC_LIBRARIES := \ libmesa_glsl \ + libmesa_compiler \ + libmesa_nir \ libmesa_megadriver_stub \ libmesa_dri_common \ libmesa_dricore \ diff --git a/src/mesa/drivers/dri/common/Android.mk b/src/mesa/drivers/dri/common/Android.mk index f1a733011b9..8a21e630325 100644 --- a/src/mesa/drivers/dri/common/Android.mk +++ b/src/mesa/drivers/dri/common/Android.mk @@ -74,20 +74,24 @@ $(intermediates)/xmlpool/%.po: $(LOCAL_PATH)/xmlpool/%.po $(POT) sed -i -e 's/charset=.*\\n/charset=UTF-8\\n/' $@; \ fi -$(intermediates)/xmlpool/%/LC_MESSAGES/options.mo: $(intermediates)/xmlpool/%.po +PRIVATE_SCRIPT := $(LOCAL_PATH)/xmlpool/gen_xmlpool.py +PRIVATE_LOCALEDIR := $(intermediates)/xmlpool +PRIVATE_TEMPLATE_HEADER := $(LOCAL_PATH)/xmlpool/t_options.h +PRIVATE_MO_FILES := $(MESA_DRI_OPTIONS_LANGS:%=$(intermediates)/xmlpool/%/LC_MESSAGES/options.mo) + +LOCAL_GENERATED_SOURCES += $(PRIVATE_MO_FILES) + +$(PRIVATE_MO_FILES): $(intermediates)/xmlpool/%/LC_MESSAGES/options.mo: $(intermediates)/xmlpool/%.po mkdir -p $(dir $@) msgfmt -o $@ $< -$(MESA_DRI_OPTIONS_H): PRIVATE_SCRIPT := $(LOCAL_PATH)/xmlpool/gen_xmlpool.py -$(MESA_DRI_OPTIONS_H): PRIVATE_LOCALEDIR := $(intermediates)/xmlpool -$(MESA_DRI_OPTIONS_H): PRIVATE_TEMPLATE_HEADER := $(LOCAL_PATH)/xmlpool/t_options.h -$(MESA_DRI_OPTIONS_H): PRIVATE_MO_FILES := $(MESA_DRI_OPTIONS_LANGS:%=$(intermediates)/xmlpool/%/LC_MESSAGES/options.mo) -.SECONDEXPANSION: -$(MESA_DRI_OPTIONS_H): $$(PRIVATE_SCRIPT) $$(PRIVATE_TEMPLATE_HEADER) $$(PRIVATE_MO_FILES) - @mkdir -p $(dir $@) - $(hide) $(MESA_PYTHON2) $(PRIVATE_SCRIPT) $(PRIVATE_TEMPLATE_HEADER) \ +$(LOCAL_GENERATED_SOURCES): PRIVATE_PYTHON := $(MESA_PYTHON2) +$(LOCAL_GENERATED_SOURCES): PRIVATE_CUSTOM_TOOL = $(PRIVATE_PYTHON) $^ $(PRIVATE_TEMPLATE_HEADER) \ $(PRIVATE_LOCALEDIR) $(MESA_DRI_OPTIONS_LANGS) > $@ +$(MESA_DRI_OPTIONS_H): $(PRIVATE_SCRIPT) $(PRIVATE_TEMPLATE_HEADER) $(PRIVATE_MO_FILES) + $(transform-generated-source) + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) diff --git a/src/mesa/drivers/dri/i915/Android.mk b/src/mesa/drivers/dri/i915/Android.mk index 741ea260e94..97359fa049d 100644 --- a/src/mesa/drivers/dri/i915/Android.mk +++ b/src/mesa/drivers/dri/i915/Android.mk @@ -27,7 +27,7 @@ include $(CLEAR_VARS) LOCAL_MODULE := i915_dri ifeq ($(MESA_LOLLIPOP_BUILD),true) -LOCAL_MODULE_RELATIVE_PATH := $(notdir $(MESA_DRI_MODULE_PATH)) +LOCAL_MODULE_RELATIVE_PATH := $(MESA_DRI_MODULE_REL_PATH) else LOCAL_MODULE_PATH := $(MESA_DRI_MODULE_PATH) LOCAL_UNSTRIPPED_PATH := $(MESA_DRI_MODULE_UNSTRIPPED_PATH) @@ -54,7 +54,8 @@ LOCAL_SHARED_LIBRARIES := \ libdrm_intel LOCAL_GENERATED_SOURCES := \ - $(MESA_DRI_OPTIONS_H) + $(MESA_DRI_OPTIONS_H) \ + $(MESA_GEN_NIR_H) include $(MESA_COMMON_MK) include $(BUILD_SHARED_LIBRARY) diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index 57b033c07ea..83aaf9ea5a6 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -254,7 +254,6 @@ i915CreateContext(int api, /* FINISHME: Are there other options that should be enabled for software * FINISHME: vertex shaders? */ - ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitCondCodes = true; ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectSampler = true; diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 59d795998c6..691bae359fb 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -598,26 +598,6 @@ upload_program(struct i915_fragment_program *p) 0, src0, T0_TEXKILL); break; - case OPCODE_KIL_NV: - if (inst->DstReg.CondMask == COND_TR) { - tmp = i915_get_utemp(p); - - /* The KIL instruction discards the fragment if any component of - * the source is < 0. Emit an immediate operand of {-1}.xywz. - */ - i915_emit_texld(p, get_live_regs(p, inst), - tmp, A0_DEST_CHANNEL_ALL, - 0, /* use a dummy dest reg */ - negate(swizzle(tmp, ONE, ONE, ONE, ONE), - 1, 1, 1, 1), - T0_TEXKILL); - } else { - p->error = 1; - i915_program_error(p, "Unsupported KIL_NV condition code: %d", - inst->DstReg.CondMask); - } - break; - case OPCODE_LG2: src0 = src_vector(p, &inst->SrcReg[0], program); diff --git a/src/mesa/drivers/dri/i965/Android.mk b/src/mesa/drivers/dri/i965/Android.mk index d30a053e10f..056b223f2de 100644 --- a/src/mesa/drivers/dri/i965/Android.mk +++ b/src/mesa/drivers/dri/i965/Android.mk @@ -27,7 +27,7 @@ include $(CLEAR_VARS) LOCAL_MODULE := i965_dri ifeq ($(MESA_LOLLIPOP_BUILD),true) -LOCAL_MODULE_RELATIVE_PATH := $(notdir $(MESA_DRI_MODULE_PATH)) +LOCAL_MODULE_RELATIVE_PATH := $(MESA_DRI_MODULE_REL_PATH) else LOCAL_MODULE_PATH := $(MESA_DRI_MODULE_PATH) LOCAL_UNSTRIPPED_PATH := $(MESA_DRI_MODULE_UNSTRIPPED_PATH) @@ -59,7 +59,8 @@ LOCAL_SHARED_LIBRARIES := \ libdrm_intel LOCAL_GENERATED_SOURCES := \ - $(MESA_DRI_OPTIONS_H) + $(MESA_DRI_OPTIONS_H) \ + $(MESA_GEN_NIR_H) include $(MESA_COMMON_MK) include $(BUILD_SHARED_LIBRARY) diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c b/src/mesa/drivers/dri/i965/brw_compiler.c index 00e44af2f8d..a95f51bfa4a 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.c +++ b/src/mesa/drivers/dri/i965/brw_compiler.c @@ -144,7 +144,6 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) compiler->glsl_compiler_options[i].MaxIfDepth = devinfo->gen < 6 ? 16 : UINT_MAX; - compiler->glsl_compiler_options[i].EmitCondCodes = true; compiler->glsl_compiler_options[i].EmitNoNoise = true; compiler->glsl_compiler_options[i].EmitNoMainReturn = true; compiler->glsl_compiler_options[i].EmitNoIndirectInput = true; diff --git a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp index 61f25811cb2..2c1abaf255c 100644 --- a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp +++ b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp @@ -32,8 +32,8 @@ /* Look for and eliminate dead control flow: * * - if/endif - * . else in else/endif - * - if/else/endif + * - else in else/endif + * - then in if/else/endif */ bool dead_control_flow_eliminate(backend_shader *s) @@ -41,61 +41,42 @@ dead_control_flow_eliminate(backend_shader *s) bool progress = false; foreach_block_safe (block, s->cfg) { - bblock_t *if_block = NULL, *else_block = NULL, *endif_block = block; - bool found = false; + bblock_t *prev_block = block->prev(); + backend_instruction *const inst = block->start(); + backend_instruction *const prev_inst = prev_block->end(); /* ENDIF instructions, by definition, can only be found at the start of * basic blocks. */ - backend_instruction *endif_inst = endif_block->start(); - if (endif_inst->opcode != BRW_OPCODE_ENDIF) - continue; - - backend_instruction *if_inst = NULL, *else_inst = NULL; - backend_instruction *prev_inst = endif_block->prev()->end(); - if (prev_inst->opcode == BRW_OPCODE_ELSE) { - else_inst = prev_inst; - else_block = endif_block->prev(); - found = true; - - if (else_block->start_ip == else_block->end_ip) - prev_inst = else_block->prev()->end(); - } + if (inst->opcode == BRW_OPCODE_ENDIF && + prev_inst->opcode == BRW_OPCODE_ELSE) { + bblock_t *const else_block = prev_block; + backend_instruction *const else_inst = prev_inst; - if (prev_inst->opcode == BRW_OPCODE_IF) { - if_inst = prev_inst; - if_block = else_block != NULL ? else_block->prev() - : endif_block->prev(); - found = true; - } else { - /* Don't remove the ENDIF if we didn't find a dead IF. */ - endif_inst = NULL; - } + else_inst->remove(else_block); + progress = true; + } else if (inst->opcode == BRW_OPCODE_ENDIF && + prev_inst->opcode == BRW_OPCODE_IF) { + bblock_t *const endif_block = block; + bblock_t *const if_block = prev_block; + backend_instruction *const endif_inst = inst; + backend_instruction *const if_inst = prev_inst; - if (found) { bblock_t *earlier_block = NULL, *later_block = NULL; - if (if_inst) { - if (if_block->start_ip == if_block->end_ip) { - earlier_block = if_block->prev(); - } else { - earlier_block = if_block; - } - if_inst->remove(if_block); + if (if_block->start_ip == if_block->end_ip) { + earlier_block = if_block->prev(); + } else { + earlier_block = if_block; } + if_inst->remove(if_block); - if (else_inst) { - else_inst->remove(else_block); - } - - if (endif_inst) { - if (endif_block->start_ip == endif_block->end_ip) { - later_block = endif_block->next(); - } else { - later_block = endif_block; - } - endif_inst->remove(endif_block); + if (endif_block->start_ip == endif_block->end_ip) { + later_block = endif_block->next(); + } else { + later_block = endif_block; } + endif_inst->remove(endif_block); assert((earlier_block == NULL) == (later_block == NULL)); if (earlier_block && earlier_block->can_combine_with(later_block)) { @@ -111,6 +92,19 @@ dead_control_flow_eliminate(backend_shader *s) } progress = true; + } else if (inst->opcode == BRW_OPCODE_ELSE && + prev_inst->opcode == BRW_OPCODE_IF) { + bblock_t *const else_block = block; + backend_instruction *const if_inst = prev_inst; + backend_instruction *const else_inst = inst; + + /* Since the else-branch is becoming the new then-branch, the + * condition has to be inverted. + */ + if_inst->predicate_inverse = !if_inst->predicate_inverse; + else_inst->remove(else_block); + + progress = true; } } diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 35d8039ed13..2ef1d7bb825 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2526,6 +2526,8 @@ brw_send_indirect_message(struct brw_codegen *p, struct brw_inst *send; int setup; + dst = retype(dst, BRW_REGISTER_TYPE_UW); + assert(desc.type == BRW_REGISTER_TYPE_UD); /* We hold on to the setup instruction (the SEND in the direct case, the OR @@ -3207,6 +3209,7 @@ brw_memory_fence(struct brw_codegen *p, * message doesn't write anything back. */ insn = next_insn(p, BRW_OPCODE_SEND); + dst = retype(dst, BRW_REGISTER_TYPE_UW); brw_set_dest(p, insn, dst); brw_set_src0(p, insn, dst); brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE, @@ -3473,7 +3476,7 @@ brw_barrier(struct brw_codegen *p, struct brw_reg src) assert(devinfo->gen >= 7); inst = next_insn(p, BRW_OPCODE_SEND); - brw_set_dest(p, inst, brw_null_reg()); + brw_set_dest(p, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW)); brw_set_src0(p, inst, src); brw_set_src1(p, inst, brw_null_reg()); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 81a83400ea0..0f9de30f05b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -5643,8 +5643,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex, true); - shader = brw_nir_lower_io(shader, compiler->devinfo, true, - false, NULL); + brw_nir_lower_fs_inputs(shader); + brw_nir_lower_fs_outputs(shader); shader = brw_postprocess_nir(shader, compiler->devinfo, true); /* key->alpha_test_func means simulating alpha testing via discards, diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index cde6566c05c..0e743de7faf 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -139,6 +139,8 @@ operands_match(const fs_inst *a, const fs_inst *b, bool *negate) ys[1].f = ys1_imm; *negate = (xs0_negate != xs1_negate) != (ys0_negate != ys1_negate); + if (*negate && (a->saturate || b->saturate)) + return false; return ret; } else if (!a->is_commutative()) { bool match = true; diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index cac92b37bd5..75c29c597f5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -455,7 +455,7 @@ fs_generator::generate_cs_terminate(fs_inst *inst, struct brw_reg payload) insn = brw_next_insn(p, BRW_OPCODE_SEND); - brw_set_dest(p, insn, brw_null_reg()); + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW)); brw_set_src0(p, insn, payload); brw_set_src1(p, insn, brw_imm_d(0)); diff --git a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp index 52570943996..dc2b0c8aa8d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp @@ -56,8 +56,7 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block) inst->dst.file != VGRF || inst->dst.type != inst->src[0].type || inst->src[0].file != VGRF || - inst->src[0].abs || - inst->src[0].negate) + inst->src[0].abs) continue; int src_var = v->live_intervals->var_from_reg(inst->src[0]); @@ -82,6 +81,31 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block) scan_inst->src[i].type = inst->dst.type; } } + + if (inst->src[0].negate) { + if (scan_inst->opcode == BRW_OPCODE_MUL) { + scan_inst->src[0].negate = !scan_inst->src[0].negate; + inst->src[0].negate = false; + } else if (scan_inst->opcode == BRW_OPCODE_MAD) { + scan_inst->src[0].negate = !scan_inst->src[0].negate; + scan_inst->src[1].negate = !scan_inst->src[1].negate; + inst->src[0].negate = false; + } else if (scan_inst->opcode == BRW_OPCODE_ADD) { + if (scan_inst->src[1].file == IMM) { + if (!brw_negate_immediate(scan_inst->src[1].type, + &scan_inst->src[1].as_brw_reg())) { + break; + } + } else { + scan_inst->src[1].negate = !scan_inst->src[1].negate; + } + scan_inst->src[0].negate = !scan_inst->src[0].negate; + inst->src[0].negate = false; + } else { + break; + } + } + scan_inst->saturate = true; inst->saturate = false; progress = true; @@ -96,7 +120,9 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block) if (scan_inst->opcode != BRW_OPCODE_MOV || !scan_inst->saturate || scan_inst->src[0].abs || - scan_inst->src[0].negate) { + scan_inst->src[0].negate || + scan_inst->src[0].abs != inst->src[0].abs || + scan_inst->src[0].negate != inst->src[0].negate) { interfered = true; break; } diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c index 488fa6c0c45..b81b1438ba3 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c +++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c @@ -36,6 +36,7 @@ #include "main/varray.h" #include "main/uniforms.h" #include "main/fbobject.h" +#include "main/framebuffer.h" #include "main/renderbuffer.h" #include "main/texobj.h" @@ -848,18 +849,23 @@ brw_meta_resolve_color(struct brw_context *brw, struct intel_mipmap_tree *mt) { struct gl_context *ctx = &brw->ctx; - GLuint fbo; + struct gl_framebuffer *drawFb; struct gl_renderbuffer *rb; struct rect rect; brw_emit_mi_flush(brw); + drawFb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + if (drawFb == NULL) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "in %s", __func__); + return; + } + _mesa_meta_begin(ctx, MESA_META_ALL); - _mesa_GenFramebuffers(1, &fbo); rb = brw_get_rb_for_slice(brw, mt, 0, 0, false); - _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo); + _mesa_bind_framebuffers(ctx, drawFb, ctx->ReadBuffer); _mesa_framebuffer_renderbuffer(ctx, ctx->DrawBuffer, GL_COLOR_ATTACHMENT0, rb); _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0); @@ -888,7 +894,7 @@ brw_meta_resolve_color(struct brw_context *brw, use_rectlist(brw, false); _mesa_reference_renderbuffer(&rb, NULL); - _mesa_DeleteFramebuffers(1, &fbo); + _mesa_reference_framebuffer(&drawFb, NULL); _mesa_meta_end(ctx); diff --git a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c index 5cfaec673c0..5b0c2e9bdd5 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c +++ b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c @@ -49,6 +49,7 @@ #include "main/blit.h" #include "main/buffers.h" #include "main/fbobject.h" +#include "main/framebuffer.h" #include "main/uniforms.h" #include "main/texparam.h" #include "main/texobj.h" @@ -424,8 +425,9 @@ brw_meta_stencil_blit(struct brw_context *brw, struct gl_context *ctx = &brw->ctx; struct blit_dims dims = *orig_dims; struct fb_tex_blit_state blit; - GLuint prog, fbo; - struct gl_renderbuffer *rb; + GLuint prog; + struct gl_framebuffer *drawFb = NULL; + struct gl_renderbuffer *rb = NULL; GLenum target; _mesa_meta_fb_tex_blit_begin(ctx, &blit); @@ -436,13 +438,18 @@ brw_meta_stencil_blit(struct brw_context *brw, assert(ctx->Extensions.ARB_texture_stencil8 == false); ctx->Extensions.ARB_texture_stencil8 = true; - _mesa_GenFramebuffers(1, &fbo); + drawFb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + if (drawFb == NULL) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "in %s", __func__); + goto error; + } + /* Force the surface to be configured for level zero. */ rb = brw_get_rb_for_slice(brw, dst_mt, 0, dst_layer, true); adjust_msaa(&dims, dst_mt->num_samples); adjust_tiling(&dims, dst_mt->num_samples); - _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo); + _mesa_bind_framebuffers(ctx, drawFb, ctx->ReadBuffer); _mesa_framebuffer_renderbuffer(ctx, ctx->DrawBuffer, GL_COLOR_ATTACHMENT0, rb); _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0); @@ -477,7 +484,7 @@ error: _mesa_meta_end(ctx); _mesa_reference_renderbuffer(&rb, NULL); - _mesa_DeleteFramebuffers(1, &fbo); + _mesa_reference_framebuffer(&drawFb, NULL); } void @@ -534,19 +541,22 @@ brw_meta_stencil_updownsample(struct brw_context *brw, .dst_x0 = 0, .dst_y0 = 0, .dst_x1 = dst->logical_width0, .dst_y1 = dst->logical_height0, .mirror_x = 0, .mirror_y = 0 }; - GLuint fbo; + struct gl_framebuffer *readFb; struct gl_renderbuffer *rb; if (dst->stencil_mt) dst = dst->stencil_mt; + readFb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + if (readFb == NULL) + return; + brw_emit_mi_flush(brw); _mesa_meta_begin(ctx, MESA_META_ALL); - _mesa_GenFramebuffers(1, &fbo); rb = brw_get_rb_for_slice(brw, src, 0, 0, false); - _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, fbo); + _mesa_bind_framebuffers(ctx, ctx->DrawBuffer, readFb); _mesa_framebuffer_renderbuffer(ctx, ctx->ReadBuffer, GL_STENCIL_ATTACHMENT, rb); @@ -554,5 +564,5 @@ brw_meta_stencil_updownsample(struct brw_context *brw, brw_emit_mi_flush(brw); _mesa_reference_renderbuffer(&rb, NULL); - _mesa_DeleteFramebuffers(1, &fbo); + _mesa_reference_framebuffer(&readFb, NULL); } diff --git a/src/mesa/drivers/dri/i965/brw_meta_updownsample.c b/src/mesa/drivers/dri/i965/brw_meta_updownsample.c index e90e6b1e326..f5fc2072dd7 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_updownsample.c +++ b/src/mesa/drivers/dri/i965/brw_meta_updownsample.c @@ -29,6 +29,7 @@ #include "main/buffers.h" #include "main/enums.h" #include "main/fbobject.h" +#include "main/framebuffer.h" #include "main/renderbuffer.h" #include "drivers/common/meta.h" @@ -93,7 +94,8 @@ brw_meta_updownsample(struct brw_context *brw, struct intel_mipmap_tree *dst_mt) { struct gl_context *ctx = &brw->ctx; - GLuint fbos[2], src_fbo, dst_fbo; + struct gl_framebuffer *src_fb; + struct gl_framebuffer *dst_fb; struct gl_renderbuffer *src_rb; struct gl_renderbuffer *dst_rb; GLenum drawbuffer; @@ -113,17 +115,20 @@ brw_meta_updownsample(struct brw_context *brw, brw_emit_mi_flush(brw); _mesa_meta_begin(ctx, MESA_META_ALL); - _mesa_GenFramebuffers(2, fbos); src_rb = brw_get_rb_for_slice(brw, src_mt, 0, 0, false); dst_rb = brw_get_rb_for_slice(brw, dst_mt, 0, 0, false); - src_fbo = fbos[0]; - dst_fbo = fbos[1]; + src_fb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + dst_fb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); - _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, src_fbo); + if (src_fb == NULL || dst_fb == NULL || src_rb == NULL || dst_rb == NULL) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "in %s", __func__); + goto error; + } + + _mesa_bind_framebuffers(ctx, dst_fb, src_fb); _mesa_framebuffer_renderbuffer(ctx, ctx->ReadBuffer, attachment, src_rb); _mesa_ReadBuffer(drawbuffer); - _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_fbo); _mesa_framebuffer_renderbuffer(ctx, ctx->DrawBuffer, attachment, dst_rb); _mesa_DrawBuffer(drawbuffer); @@ -133,9 +138,11 @@ brw_meta_updownsample(struct brw_context *brw, dst_mt->logical_width0, dst_mt->logical_height0, blit_bit, GL_NEAREST); +error: _mesa_reference_renderbuffer(&src_rb, NULL); _mesa_reference_renderbuffer(&dst_rb, NULL); - _mesa_DeleteFramebuffers(2, fbos); + _mesa_reference_framebuffer(&src_fb, NULL); + _mesa_reference_framebuffer(&dst_fb, NULL); _mesa_meta_end(ctx); diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index e9351a5556a..ba9cb3f608c 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -149,7 +149,7 @@ remap_inputs_with_vue_map(nir_block *block, void *closure) struct remap_patch_urb_offsets_state { nir_builder b; - struct brw_vue_map vue_map; + const struct brw_vue_map *vue_map; }; static bool @@ -167,7 +167,7 @@ remap_patch_urb_offsets(nir_block *block, void *closure) if ((stage == MESA_SHADER_TESS_CTRL && is_output(intrin)) || (stage == MESA_SHADER_TESS_EVAL && is_input(intrin))) { - int vue_slot = state->vue_map.varying_to_slot[intrin->const_index[0]]; + int vue_slot = state->vue_map->varying_to_slot[intrin->const_index[0]]; assert(vue_slot != -1); intrin->const_index[0] = vue_slot; @@ -176,7 +176,7 @@ remap_patch_urb_offsets(nir_block *block, void *closure) nir_const_value *const_vertex = nir_src_as_const_value(*vertex); if (const_vertex) { intrin->const_index[0] += const_vertex->u[0] * - state->vue_map.num_per_vertex_slots; + state->vue_map->num_per_vertex_slots; } else { state->b.cursor = nir_before_instr(&intrin->instr); @@ -185,7 +185,7 @@ remap_patch_urb_offsets(nir_block *block, void *closure) nir_imul(&state->b, nir_ssa_for_src(&state->b, *vertex, 1), nir_imm_int(&state->b, - state->vue_map.num_per_vertex_slots)); + state->vue_map->num_per_vertex_slots)); /* Add it to the existing offset */ nir_src *offset = nir_get_io_offset_src(intrin); @@ -202,110 +202,61 @@ remap_patch_urb_offsets(nir_block *block, void *closure) return true; } -static void -brw_nir_lower_inputs(nir_shader *nir, - const struct brw_device_info *devinfo, - bool is_scalar, - bool use_legacy_snorm_formula, - const uint8_t *vs_attrib_wa_flags) +void +brw_nir_lower_vs_inputs(nir_shader *nir, + const struct brw_device_info *devinfo, + bool is_scalar, + bool use_legacy_snorm_formula, + const uint8_t *vs_attrib_wa_flags) { - switch (nir->stage) { - case MESA_SHADER_VERTEX: - /* Start with the location of the variable's base. */ - foreach_list_typed(nir_variable, var, node, &nir->inputs) { - var->data.driver_location = var->data.location; - } - - /* Now use nir_lower_io to walk dereference chains. Attribute arrays - * are loaded as one vec4 per element (or matrix column), so we use - * type_size_vec4 here. - */ - nir_lower_io(nir, nir_var_shader_in, type_size_vec4); - - /* This pass needs actual constants */ - nir_opt_constant_folding(nir); - - add_const_offset_to_base(nir, nir_var_shader_in); - - brw_nir_apply_attribute_workarounds(nir, use_legacy_snorm_formula, - vs_attrib_wa_flags); + /* Start with the location of the variable's base. */ + foreach_list_typed(nir_variable, var, node, &nir->inputs) { + var->data.driver_location = var->data.location; + } - if (is_scalar) { - /* Finally, translate VERT_ATTRIB_* values into the actual registers. - * - * Note that we can use nir->info.inputs_read instead of - * key->inputs_read since the two are identical aside from Gen4-5 - * edge flag differences. - */ - GLbitfield64 inputs_read = nir->info.inputs_read; + /* Now use nir_lower_io to walk dereference chains. Attribute arrays + * are loaded as one vec4 per element (or matrix column), so we use + * type_size_vec4 here. + */ + nir_lower_io(nir, nir_var_shader_in, type_size_vec4); - nir_foreach_function(nir, function) { - if (function->impl) { - nir_foreach_block(function->impl, remap_vs_attrs, &inputs_read); - } - } - } - break; - case MESA_SHADER_TESS_CTRL: - case MESA_SHADER_GEOMETRY: { - if (!is_scalar && nir->stage == MESA_SHADER_GEOMETRY) { - foreach_list_typed(nir_variable, var, node, &nir->inputs) { - var->data.driver_location = var->data.location; - } - } else { - /* The GLSL linker will have already matched up GS inputs and - * the outputs of prior stages. The driver does extend VS outputs - * in some cases, but only for legacy OpenGL or Gen4-5 hardware, - * neither of which offer geometry shader support. So we can - * safely ignore that. - * - * For SSO pipelines, we use a fixed VUE map layout based on variable - * locations, so we can rely on rendezvous-by-location to make this - * work. - * - * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not - * written by previous stages and shows up via payload magic. - */ - struct brw_vue_map input_vue_map; - GLbitfield64 inputs_read = - nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID; - brw_compute_vue_map(devinfo, &input_vue_map, inputs_read, - nir->info.separate_shader || - nir->stage == MESA_SHADER_TESS_CTRL); - - foreach_list_typed(nir_variable, var, node, &nir->inputs) { - var->data.driver_location = var->data.location; - } + /* This pass needs actual constants */ + nir_opt_constant_folding(nir); - /* Inputs are stored in vec4 slots, so use type_size_vec4(). */ - nir_lower_io(nir, nir_var_shader_in, type_size_vec4); + add_const_offset_to_base(nir, nir_var_shader_in); - /* This pass needs actual constants */ - nir_opt_constant_folding(nir); + brw_nir_apply_attribute_workarounds(nir, use_legacy_snorm_formula, + vs_attrib_wa_flags); - add_const_offset_to_base(nir, nir_var_shader_in); + if (is_scalar) { + /* Finally, translate VERT_ATTRIB_* values into the actual registers. + * + * Note that we can use nir->info.inputs_read instead of + * key->inputs_read since the two are identical aside from Gen4-5 + * edge flag differences. + */ + GLbitfield64 inputs_read = nir->info.inputs_read; - nir_foreach_function(nir, function) { - if (function->impl) { - nir_foreach_block(function->impl, remap_inputs_with_vue_map, - &input_vue_map); - } + nir_foreach_function(nir, function) { + if (function->impl) { + nir_foreach_block(function->impl, remap_vs_attrs, &inputs_read); } } - break; } - case MESA_SHADER_TESS_EVAL: { - struct remap_patch_urb_offsets_state state; - brw_compute_tess_vue_map(&state.vue_map, - nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID, - nir->info.patch_inputs_read); +} - foreach_list_typed(nir_variable, var, node, &nir->inputs) { - var->data.driver_location = var->data.location; - } +void +brw_nir_lower_vue_inputs(nir_shader *nir, bool is_scalar, + const struct brw_vue_map *vue_map) +{ + foreach_list_typed(nir_variable, var, node, &nir->inputs) { + var->data.driver_location = var->data.location; + } - nir_lower_io(nir, nir_var_shader_in, type_size_vec4); + /* Inputs are stored in vec4 slots, so use type_size_vec4(). */ + nir_lower_io(nir, nir_var_shader_in, type_size_vec4); + if (is_scalar || nir->stage != MESA_SHADER_GEOMETRY) { /* This pass needs actual constants */ nir_opt_constant_folding(nir); @@ -313,81 +264,93 @@ brw_nir_lower_inputs(nir_shader *nir, nir_foreach_function(nir, function) { if (function->impl) { - nir_builder_init(&state.b, function->impl); - nir_foreach_block(function->impl, remap_patch_urb_offsets, &state); + nir_foreach_block(function->impl, remap_inputs_with_vue_map, + (void *) vue_map); } } - break; - } - case MESA_SHADER_FRAGMENT: - assert(is_scalar); - nir_assign_var_locations(&nir->inputs, &nir->num_inputs, - type_size_scalar); - break; - case MESA_SHADER_COMPUTE: - /* Compute shaders have no inputs. */ - assert(exec_list_is_empty(&nir->inputs)); - break; - default: - unreachable("unsupported shader stage"); } } -static void -brw_nir_lower_outputs(nir_shader *nir, - const struct brw_device_info *devinfo, - bool is_scalar) +void +brw_nir_lower_tes_inputs(nir_shader *nir, const struct brw_vue_map *vue_map) { - switch (nir->stage) { - case MESA_SHADER_VERTEX: - case MESA_SHADER_TESS_EVAL: - case MESA_SHADER_GEOMETRY: - if (is_scalar) { - nir_assign_var_locations(&nir->outputs, &nir->num_outputs, - type_size_vec4_times_4); - nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4); - } else { - nir_foreach_variable(var, &nir->outputs) - var->data.driver_location = var->data.location; - } - break; - case MESA_SHADER_TESS_CTRL: { - struct remap_patch_urb_offsets_state state; - brw_compute_tess_vue_map(&state.vue_map, nir->info.outputs_written, - nir->info.patch_outputs_written); + struct remap_patch_urb_offsets_state state; + state.vue_map = vue_map; - nir_foreach_variable(var, &nir->outputs) { - var->data.driver_location = var->data.location; - } + foreach_list_typed(nir_variable, var, node, &nir->inputs) { + var->data.driver_location = var->data.location; + } - nir_lower_io(nir, nir_var_shader_out, type_size_vec4); + nir_lower_io(nir, nir_var_shader_in, type_size_vec4); - /* This pass needs actual constants */ - nir_opt_constant_folding(nir); + /* This pass needs actual constants */ + nir_opt_constant_folding(nir); - add_const_offset_to_base(nir, nir_var_shader_out); + add_const_offset_to_base(nir, nir_var_shader_in); - nir_foreach_function(nir, function) { - if (function->impl) { - nir_builder_init(&state.b, function->impl); - nir_foreach_block(function->impl, remap_patch_urb_offsets, &state); - } + nir_foreach_function(nir, function) { + if (function->impl) { + nir_builder_init(&state.b, function->impl); + nir_foreach_block(function->impl, remap_patch_urb_offsets, &state); } - break; } - case MESA_SHADER_FRAGMENT: +} + +void +brw_nir_lower_fs_inputs(nir_shader *nir) +{ + nir_assign_var_locations(&nir->inputs, &nir->num_inputs, type_size_scalar); + nir_lower_io(nir, nir_var_shader_in, type_size_scalar); +} + +void +brw_nir_lower_vue_outputs(nir_shader *nir, + bool is_scalar) +{ + if (is_scalar) { nir_assign_var_locations(&nir->outputs, &nir->num_outputs, - type_size_scalar); - break; - case MESA_SHADER_COMPUTE: - /* Compute shaders have no outputs. */ - assert(exec_list_is_empty(&nir->outputs)); - break; - default: - unreachable("unsupported shader stage"); + type_size_vec4_times_4); + nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4); + } else { + nir_foreach_variable(var, &nir->outputs) + var->data.driver_location = var->data.location; + nir_lower_io(nir, nir_var_shader_out, type_size_vec4); } } +void +brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map *vue_map) +{ + struct remap_patch_urb_offsets_state state; + state.vue_map = vue_map; + + nir_foreach_variable(var, &nir->outputs) { + var->data.driver_location = var->data.location; + } + + nir_lower_io(nir, nir_var_shader_out, type_size_vec4); + + /* This pass needs actual constants */ + nir_opt_constant_folding(nir); + + add_const_offset_to_base(nir, nir_var_shader_out); + + nir_foreach_function(nir, function) { + if (function->impl) { + nir_builder_init(&state.b, function->impl); + nir_foreach_block(function->impl, remap_patch_urb_offsets, &state); + } + } +} + +void +brw_nir_lower_fs_outputs(nir_shader *nir) +{ + nir_assign_var_locations(&nir->outputs, &nir->num_outputs, + type_size_scalar); + nir_lower_io(nir, nir_var_shader_out, type_size_scalar); +} + static int type_size_scalar_bytes(const struct glsl_type *type) { @@ -414,7 +377,7 @@ brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar) } } -static void +void brw_nir_lower_shared(nir_shader *nir) { nir_assign_var_locations(&nir->shared, &nir->num_shared, @@ -510,27 +473,6 @@ brw_preprocess_nir(nir_shader *nir, bool is_scalar) return nir; } -/** Lower input and output loads and stores for i965. */ -nir_shader * -brw_nir_lower_io(nir_shader *nir, - const struct brw_device_info *devinfo, - bool is_scalar, - bool use_legacy_snorm_formula, - const uint8_t *vs_attrib_wa_flags) -{ - bool progress; /* Written by OPT and OPT_V */ - (void)progress; - - OPT_V(brw_nir_lower_inputs, devinfo, is_scalar, - use_legacy_snorm_formula, vs_attrib_wa_flags); - OPT_V(brw_nir_lower_outputs, devinfo, is_scalar); - if (nir->stage == MESA_SHADER_COMPUTE) - OPT_V(brw_nir_lower_shared); - OPT_V(nir_lower_io, nir_var_all, is_scalar ? type_size_scalar : type_size_vec4); - - return nir_optimize(nir, is_scalar); -} - /* Prepare the given shader for codegen * * This function is intended to be called right before going into the actual @@ -549,6 +491,8 @@ brw_postprocess_nir(nir_shader *nir, bool progress; /* Written by OPT and OPT_V */ (void)progress; + nir = nir_optimize(nir, is_scalar); + if (devinfo->gen >= 6) { /* Try and fuse multiply-adds */ // OPT(brw_nir_opt_peephole_ffma); @@ -608,7 +552,6 @@ brw_create_nir(struct brw_context *brw, bool is_scalar) { struct gl_context *ctx = &brw->ctx; - const struct brw_device_info *devinfo = brw->intelScreen->devinfo; const nir_shader_compiler_options *options = ctx->Const.ShaderCompilerOptions[stage].NirOptions; bool progress; @@ -635,12 +578,8 @@ brw_create_nir(struct brw_context *brw, OPT_V(nir_lower_atomics, shader_prog); } - if (nir->stage != MESA_SHADER_VERTEX && - nir->stage != MESA_SHADER_TESS_CTRL && - nir->stage != MESA_SHADER_TESS_EVAL && - nir->stage != MESA_SHADER_FRAGMENT) { - nir = brw_nir_lower_io(nir, devinfo, is_scalar, false, NULL); - } + if (nir->stage == MESA_SHADER_COMPUTE) + OPT_V(brw_nir_lower_shared); return nir; } diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h index 9a90e36964b..0ef34735561 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.h +++ b/src/mesa/drivers/dri/i965/brw_nir.h @@ -82,11 +82,21 @@ nir_shader *brw_create_nir(struct brw_context *brw, bool is_scalar); nir_shader *brw_preprocess_nir(nir_shader *nir, bool is_scalar); -nir_shader *brw_nir_lower_io(nir_shader *nir, - const struct brw_device_info *devinfo, - bool is_scalar, - bool use_legacy_snorm_formula, - const uint8_t *vs_attrib_wa_flags); + +void brw_nir_lower_shared(nir_shader *nir); +void brw_nir_lower_vs_inputs(nir_shader *nir, + const struct brw_device_info *devinfo, + bool is_scalar, + bool use_legacy_snorm_formula, + const uint8_t *vs_attrib_wa_flags); +void brw_nir_lower_vue_inputs(nir_shader *nir, bool is_scalar, + const struct brw_vue_map *vue_map); +void brw_nir_lower_tes_inputs(nir_shader *nir, const struct brw_vue_map *vue); +void brw_nir_lower_fs_inputs(nir_shader *nir); +void brw_nir_lower_vue_outputs(nir_shader *nir, bool is_scalar); +void brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map *vue); +void brw_nir_lower_fs_outputs(nir_shader *nir); + nir_shader *brw_postprocess_nir(nir_shader *nir, const struct brw_device_info *devinfo, bool is_scalar); diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 91e47800e1f..dfe6afcf6d0 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -1227,10 +1227,17 @@ brw_compile_tes(const struct brw_compiler *compiler, const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_EVAL]; nir_shader *nir = nir_shader_clone(mem_ctx, src_shader); - nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar); nir->info.inputs_read = key->inputs_read; nir->info.patch_inputs_read = key->patch_inputs_read; - nir = brw_nir_lower_io(nir, compiler->devinfo, is_scalar, false, NULL); + + struct brw_vue_map input_vue_map; + brw_compute_tess_vue_map(&input_vue_map, + nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID, + nir->info.patch_inputs_read); + + nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar); + brw_nir_lower_tes_inputs(nir, &input_vue_map); + brw_nir_lower_vue_outputs(nir, is_scalar); nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar); brw_compute_vue_map(devinfo, &prog_data->base.vue_map, @@ -1249,11 +1256,6 @@ brw_compile_tes(const struct brw_compiler *compiler, /* URB entry sizes are stored as a multiple of 64 bytes. */ prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; - struct brw_vue_map input_vue_map; - brw_compute_tess_vue_map(&input_vue_map, - nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID, - nir->info.patch_inputs_read); - bool need_patch_header = nir->info.system_values_read & (BITFIELD64_BIT(SYSTEM_VALUE_TESS_LEVEL_OUTER) | BITFIELD64_BIT(SYSTEM_VALUE_TESS_LEVEL_INNER)); diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 8136339332b..0032634f023 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -2006,9 +2006,9 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex, is_scalar); - shader = brw_nir_lower_io(shader, compiler->devinfo, is_scalar, - use_legacy_snorm_formula, - key->gl_attrib_wa_flags); + brw_nir_lower_vs_inputs(shader, compiler->devinfo, is_scalar, + use_legacy_snorm_formula, key->gl_attrib_wa_flags); + brw_nir_lower_vue_outputs(shader, is_scalar); shader = brw_postprocess_nir(shader, compiler->devinfo, is_scalar); const unsigned *assembly = NULL; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 3f30f5b92d1..7df6c721430 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -594,11 +594,31 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, memset(&c, 0, sizeof(c)); c.key = *key; + const bool is_scalar = compiler->scalar_stage[MESA_SHADER_GEOMETRY]; nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); + + /* The GLSL linker will have already matched up GS inputs and the outputs + * of prior stages. The driver does extend VS outputs in some cases, but + * only for legacy OpenGL or Gen4-5 hardware, neither of which offer + * geometry shader support. So we can safely ignore that. + * + * For SSO pipelines, we use a fixed VUE map layout based on variable + * locations, so we can rely on rendezvous-by-location making this work. + * + * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not + * written by previous stages and shows up via payload magic. + */ + GLbitfield64 inputs_read = + shader->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID; + brw_compute_vue_map(compiler->devinfo, + &c.input_vue_map, inputs_read, + shader->info.separate_shader); + shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex, - compiler->scalar_stage[MESA_SHADER_GEOMETRY]); - shader = brw_postprocess_nir(shader, compiler->devinfo, - compiler->scalar_stage[MESA_SHADER_GEOMETRY]); + is_scalar); + brw_nir_lower_vue_inputs(shader, is_scalar, &c.input_vue_map); + brw_nir_lower_vue_outputs(shader, is_scalar); + shader = brw_postprocess_nir(shader, compiler->devinfo, is_scalar); prog_data->include_primitive_id = (shader->info.inputs_read & VARYING_BIT_PRIMITIVE_ID) != 0; @@ -775,23 +795,6 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, prog_data->vertices_in = shader->info.gs.vertices_in; - /* The GLSL linker will have already matched up GS inputs and the outputs - * of prior stages. The driver does extend VS outputs in some cases, but - * only for legacy OpenGL or Gen4-5 hardware, neither of which offer - * geometry shader support. So we can safely ignore that. - * - * For SSO pipelines, we use a fixed VUE map layout based on variable - * locations, so we can rely on rendezvous-by-location making this work. - * - * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not - * written by previous stages and shows up via payload magic. - */ - GLbitfield64 inputs_read = - shader->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID; - brw_compute_vue_map(compiler->devinfo, - &c.input_vue_map, inputs_read, - shader->info.separate_shader); - /* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we * need to program a URB read length of ceiling(num_slots / 2). */ @@ -807,7 +810,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, brw_print_vue_map(stderr, &prog_data->base.vue_map); } - if (compiler->scalar_stage[MESA_SHADER_GEOMETRY]) { + if (is_scalar) { /* TODO: Support instanced GS. We have basically no tests... */ assert(prog_data->invocations == 1); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 2207909e710..17d5f2aeff4 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -676,9 +676,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) case nir_intrinsic_load_instance_id: case nir_intrinsic_load_base_instance: case nir_intrinsic_load_draw_id: - case nir_intrinsic_load_invocation_id: - case nir_intrinsic_load_tess_level_inner: - case nir_intrinsic_load_tess_level_outer: { + case nir_intrinsic_load_invocation_id: { gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic); src_reg val = src_reg(nir_system_values[sv]); assert(val.file != BAD_FILE); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp index df6b44dde14..f344eaad664 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp @@ -511,18 +511,25 @@ brw_compile_tcs(const struct brw_compiler *compiler, const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_CTRL]; nir_shader *nir = nir_shader_clone(mem_ctx, src_shader); - nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar); nir->info.outputs_written = key->outputs_written; nir->info.patch_outputs_written = key->patch_outputs_written; - nir = brw_nir_lower_io(nir, compiler->devinfo, is_scalar, false, NULL); - nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar); - prog_data->instances = DIV_ROUND_UP(nir->info.tcs.vertices_out, 2); + struct brw_vue_map input_vue_map; + brw_compute_vue_map(devinfo, &input_vue_map, + nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID, + true); brw_compute_tess_vue_map(&vue_prog_data->vue_map, nir->info.outputs_written, nir->info.patch_outputs_written); + nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar); + brw_nir_lower_vue_inputs(nir, is_scalar, &input_vue_map); + brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map); + nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar); + + prog_data->instances = DIV_ROUND_UP(nir->info.tcs.vertices_out, 2); + /* Compute URB entry size. The maximum allowed URB entry size is 32k. * That divides up as follows: * @@ -549,11 +556,6 @@ brw_compile_tcs(const struct brw_compiler *compiler, /* URB entry sizes are stored as a multiple of 64 bytes. */ vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64; - struct brw_vue_map input_vue_map; - brw_compute_vue_map(devinfo, &input_vue_map, - nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID, - true); - /* HS does not use the usual payload pushing from URB to GRFs, * because we don't have enough registers for a full-size payload, and * the hardware is broken on Haswell anyway. diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp index ce5fefc75a9..e3c23f1a52f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp @@ -28,6 +28,7 @@ */ #include "brw_vec4_tes.h" +#include "brw_cfg.h" namespace brw { @@ -53,39 +54,10 @@ vec4_tes_visitor::make_reg_for_system_value(int location, const glsl_type *type) void vec4_tes_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr) { - const struct brw_tes_prog_data *tes_prog_data = - (const struct brw_tes_prog_data *) prog_data; - switch (instr->intrinsic) { - case nir_intrinsic_load_tess_level_outer: { - dst_reg dst(this, glsl_type::vec4_type); - nir_system_values[SYSTEM_VALUE_TESS_LEVEL_OUTER] = dst; - - dst_reg temp(this, glsl_type::vec4_type); - vec4_instruction *read = - emit(VEC4_OPCODE_URB_READ, temp, input_read_header); - read->offset = 1; - read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; - emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WZYX))); - break; - } - case nir_intrinsic_load_tess_level_inner: { - dst_reg dst(this, glsl_type::vec2_type); - nir_system_values[SYSTEM_VALUE_TESS_LEVEL_INNER] = dst; - - /* Set up the message header to reference the proper parts of the URB */ - dst_reg temp(this, glsl_type::vec4_type); - vec4_instruction *read = - emit(VEC4_OPCODE_URB_READ, temp, input_read_header); - read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; - if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) { - emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WZYX))); - } else { - read->offset = 1; - emit(MOV(dst, src_reg(temp))); - } + case nir_intrinsic_load_tess_level_outer: + case nir_intrinsic_load_tess_level_inner: break; - } default: vec4_visitor::nir_setup_system_value_intrinsic(instr); } @@ -105,6 +77,25 @@ vec4_tes_visitor::setup_payload() reg = setup_uniforms(reg); + foreach_block_and_inst(block, vec4_instruction, inst, cfg) { + for (int i = 0; i < 3; i++) { + if (inst->src[i].file != ATTR) + continue; + + struct brw_reg grf = + brw_vec4_grf(reg + inst->src[i].nr / 2, 4 * (inst->src[i].nr % 2)); + grf = stride(grf, 0, 4, 1); + grf.swizzle = inst->src[i].swizzle; + grf.type = inst->src[i].type; + grf.abs = inst->src[i].abs; + grf.negate = inst->src[i].negate; + + inst->src[i] = grf; + } + } + + reg += 8 * prog_data->urb_read_length; + this->first_non_payload_grf = reg; } @@ -148,12 +139,30 @@ vec4_tes_visitor::emit_urb_write_opcode(bool complete) void vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) { + const struct brw_tes_prog_data *tes_prog_data = + (const struct brw_tes_prog_data *) prog_data; + switch (instr->intrinsic) { case nir_intrinsic_load_tess_coord: /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */ emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), src_reg(brw_vec8_grf(1, 0)))); break; + case nir_intrinsic_load_tess_level_outer: + emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), + swizzle(src_reg(ATTR, 1, glsl_type::vec4_type), + BRW_SWIZZLE_WZYX))); + break; + case nir_intrinsic_load_tess_level_inner: + if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) { + emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), + swizzle(src_reg(ATTR, 0, glsl_type::vec4_type), + BRW_SWIZZLE_WZYX))); + } else { + emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), + src_reg(ATTR, 1, glsl_type::float_type))); + } + break; case nir_intrinsic_load_primitive_id: emit(TES_OPCODE_GET_PRIMITIVE_ID, get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD)); @@ -169,6 +178,19 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) header = src_reg(this, glsl_type::uvec4_type); emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header), input_read_header, indirect_offset); + } else { + /* Arbitrarily only push up to 24 vec4 slots worth of data, + * which is 12 registers (since each holds 2 vec4 slots). + */ + const unsigned max_push_slots = 24; + if (imm_offset < max_push_slots) { + emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D), + src_reg(ATTR, imm_offset, glsl_type::ivec4_type))); + prog_data->urb_read_length = + MAX2(prog_data->urb_read_length, + DIV_ROUND_UP(imm_offset + 1, 2)); + break; + } } dst_reg temp(this, glsl_type::ivec4_type); diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c index 2383401d14c..31354582964 100644 --- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c +++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c @@ -757,7 +757,9 @@ bool intel_get_memcpy(mesa_format tiledFormat, GLenum format, *cpp = 1; *mem_copy = memcpy; } else if ((tiledFormat == MESA_FORMAT_B8G8R8A8_UNORM) || - (tiledFormat == MESA_FORMAT_B8G8R8X8_UNORM)) { + (tiledFormat == MESA_FORMAT_B8G8R8X8_UNORM) || + (tiledFormat == MESA_FORMAT_B8G8R8A8_SRGB) || + (tiledFormat == MESA_FORMAT_B8G8R8X8_SRGB)) { *cpp = 4; if (format == GL_BGRA) { *mem_copy = memcpy; @@ -766,7 +768,9 @@ bool intel_get_memcpy(mesa_format tiledFormat, GLenum format, : rgba8_copy_aligned_src; } } else if ((tiledFormat == MESA_FORMAT_R8G8B8A8_UNORM) || - (tiledFormat == MESA_FORMAT_R8G8B8X8_UNORM)) { + (tiledFormat == MESA_FORMAT_R8G8B8X8_UNORM) || + (tiledFormat == MESA_FORMAT_R8G8B8A8_SRGB) || + (tiledFormat == MESA_FORMAT_R8G8B8X8_SRGB)) { *cpp = 4; if (format == GL_BGRA) { /* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can diff --git a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp index 32e8b8f8867..7b3b9e534b3 100644 --- a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp @@ -215,6 +215,40 @@ TEST_F(saturate_propagation_test, neg_mov_sat) fs_reg dst0 = v->vgrf(glsl_type::float_type); fs_reg dst1 = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); + bld.RNDU(dst0, src0); + dst0.negate = true; + set_saturate(true, bld.MOV(dst1, dst0)); + + /* = Before = + * + * 0: rndu(8) dst0 src0 + * 1: mov.sat(8) dst1 -dst0 + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(saturate_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_RNDU, instruction(block0, 0)->opcode); + EXPECT_FALSE(instruction(block0, 0)->saturate); + EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 1)->opcode); + EXPECT_TRUE(instruction(block0, 1)->saturate); +} + +TEST_F(saturate_propagation_test, add_neg_mov_sat) +{ + const fs_builder &bld = v->bld; + fs_reg dst0 = v->vgrf(glsl_type::float_type); + fs_reg dst1 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); bld.ADD(dst0, src0, src1); dst0.negate = true; @@ -226,7 +260,8 @@ TEST_F(saturate_propagation_test, neg_mov_sat) * 1: mov.sat(8) dst1 -dst0 * * = After = - * (no changes) + * 0: add.sat(8) dst0 -src0 -src1 + * 1: mov(8) dst1 dst0 */ v->calculate_cfg(); @@ -235,13 +270,138 @@ TEST_F(saturate_propagation_test, neg_mov_sat) EXPECT_EQ(0, block0->start_ip); EXPECT_EQ(1, block0->end_ip); - EXPECT_FALSE(saturate_propagation(v)); + EXPECT_TRUE(saturate_propagation(v)); EXPECT_EQ(0, block0->start_ip); EXPECT_EQ(1, block0->end_ip); EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_TRUE(instruction(block0, 0)->saturate); + EXPECT_TRUE(instruction(block0, 0)->src[0].negate); + EXPECT_TRUE(instruction(block0, 0)->src[1].negate); + EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 1)->opcode); + EXPECT_FALSE(instruction(block0, 1)->saturate); +} + +TEST_F(saturate_propagation_test, mul_neg_mov_sat) +{ + const fs_builder &bld = v->bld; + fs_reg dst0 = v->vgrf(glsl_type::float_type); + fs_reg dst1 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + bld.MUL(dst0, src0, src1); + dst0.negate = true; + set_saturate(true, bld.MOV(dst1, dst0)); + + /* = Before = + * + * 0: mul(8) dst0 src0 src1 + * 1: mov.sat(8) dst1 -dst0 + * + * = After = + * 0: mul.sat(8) dst0 src0 -src1 + * 1: mov(8) dst1 dst0 + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_TRUE(saturate_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode); + EXPECT_TRUE(instruction(block0, 0)->saturate); + EXPECT_TRUE(instruction(block0, 0)->src[0].negate); + EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 1)->opcode); + EXPECT_FALSE(instruction(block0, 1)->saturate); + EXPECT_FALSE(instruction(block0, 1)->src[0].negate); +} + +TEST_F(saturate_propagation_test, mul_mov_sat_neg_mov_sat) +{ + const fs_builder &bld = v->bld; + fs_reg dst0 = v->vgrf(glsl_type::float_type); + fs_reg dst1 = v->vgrf(glsl_type::float_type); + fs_reg dst2 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + bld.MUL(dst0, src0, src1); + set_saturate(true, bld.MOV(dst1, dst0)); + dst0.negate = true; + set_saturate(true, bld.MOV(dst2, dst0)); + + /* = Before = + * + * 0: mul(8) dst0 src0 src1 + * 1: mov.sat(8) dst1 dst0 + * 2: mov.sat(8) dst2 -dst0 + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_FALSE(saturate_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode); EXPECT_FALSE(instruction(block0, 0)->saturate); + EXPECT_FALSE(instruction(block0, 0)->src[1].negate); EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 1)->opcode); EXPECT_TRUE(instruction(block0, 1)->saturate); + EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 2)->opcode); + EXPECT_TRUE(instruction(block0, 2)->src[0].negate); + EXPECT_TRUE(instruction(block0, 2)->saturate); +} + +TEST_F(saturate_propagation_test, mul_neg_mov_sat_neg_mov_sat) +{ + const fs_builder &bld = v->bld; + fs_reg dst0 = v->vgrf(glsl_type::float_type); + fs_reg dst1 = v->vgrf(glsl_type::float_type); + fs_reg dst2 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + bld.MUL(dst0, src0, src1); + dst0.negate = true; + set_saturate(true, bld.MOV(dst1, dst0)); + set_saturate(true, bld.MOV(dst2, dst0)); + + /* = Before = + * + * 0: mul(8) dst0 src0 src1 + * 1: mov.sat(8) dst1 -dst0 + * 2: mov.sat(8) dst2 -dst0 + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_FALSE(saturate_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode); + EXPECT_FALSE(instruction(block0, 0)->saturate); + EXPECT_FALSE(instruction(block0, 0)->src[1].negate); + EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 1)->opcode); + EXPECT_TRUE(instruction(block0, 1)->src[0].negate); + EXPECT_TRUE(instruction(block0, 1)->saturate); + EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 2)->opcode); + EXPECT_TRUE(instruction(block0, 2)->src[0].negate); + EXPECT_TRUE(instruction(block0, 2)->saturate); } TEST_F(saturate_propagation_test, abs_mov_sat) |