From fb5ff51f422e1718c09da01f3c5bb5baecc9d68e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 19 Jul 2011 12:20:14 -0700 Subject: i965: Fix regression in 29a911c50e4443dfebef0a2e32c39b64992fa3cc. The previous define was the full 32-bit header, while the new define was just the top 16 bits. --- src/mesa/drivers/dri/i965/brw_misc_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 03cebbb824b..f7e6e7c81d1 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -46,7 +46,7 @@ static void upload_drawing_rect(struct brw_context *brw) struct gl_context *ctx = &intel->ctx; BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE); + OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); OUT_BATCH(0); /* xmin, ymin */ OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) | ((ctx->DrawBuffer->Height - 1) << 16)); -- cgit v1.2.3 From f7dbcba280e4397cadb14f230aa925b4143cdde4 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 18 Jul 2011 00:37:45 -0700 Subject: intel: Fix stencil buffer to be W tiled Until now, the stencil buffer was allocated as a Y tiled buffer, because in several locations the PRM states that it is. However, it is actually W tiled. From the PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Format: W-Major Tile Format is used for separate stencil. The GTT is incapable of W fencing, so we allocate the stencil buffer with I915_TILING_NONE and decode the tile's layout in software. This fix touches the following portions of code: - In intel_allocate_renderbuffer_storage(), allocate the stencil buffer with I915_TILING_NONE. - In intel_verify_dri2_has_hiz(), verify that the stencil buffer is not tiled. - In the stencil buffer's span functions, the tile's layout must be decoded in software. This commit mutually depends on the xf86-video-intel commit dri: Do not tile stencil buffer Author: Chad Versace Date: Mon Jul 18 00:38:00 2011 -0700 On Gen6 with separate stencil enabled, fixes the following Piglit tests: bugs/fdo23670-drawpix_stencil general/stencil-drawpixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX16-copypixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX16-drawpixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX16-readpixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX1-copypixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX1-drawpixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX1-readpixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX4-copypixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX4-drawpixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX4-readpixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX8-copypixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX8-drawpixels spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX8-readpixels spec/EXT_packed_depth_stencil/fbo-stencil-GL_DEPTH24_STENCIL8-copypixels spec/EXT_packed_depth_stencil/fbo-stencil-GL_DEPTH24_STENCIL8-readpixels spec/EXT_packed_depth_stencil/readpixels-24_8 Note: This is a candidate for the 7.11 branch. Signed-off-by: Chad Versace Reviewed-by: Eric Anholt Reviewed-by: Ian Romanick Acked-by: Kenneth Graunke --- src/mesa/drivers/dri/intel/intel_clear.c | 6 ++ src/mesa/drivers/dri/intel/intel_context.c | 9 ++- src/mesa/drivers/dri/intel/intel_fbo.c | 12 ++-- src/mesa/drivers/dri/intel/intel_screen.h | 9 ++- src/mesa/drivers/dri/intel/intel_span.c | 88 +++++++++++++++++++++++------- 5 files changed, 93 insertions(+), 31 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c index dfca03c14bf..5ab98734cfc 100644 --- a/src/mesa/drivers/dri/intel/intel_clear.c +++ b/src/mesa/drivers/dri/intel/intel_clear.c @@ -143,6 +143,12 @@ intelClear(struct gl_context *ctx, GLbitfield mask) */ tri_mask |= BUFFER_BIT_STENCIL; } + else if (intel->has_separate_stencil && + stencilRegion->tiling == I915_TILING_NONE) { + /* The stencil buffer is actually W tiled, which the hardware + * cannot blit to. */ + tri_mask |= BUFFER_BIT_STENCIL; + } else { /* clearing all stencil bits, use blitting */ blit_mask |= BUFFER_BIT_STENCIL; diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 2ba13632569..fe8be082dfc 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -1439,7 +1439,12 @@ intel_verify_dri2_has_hiz(struct intel_context *intel, assert(stencil_rb->Base.Format == MESA_FORMAT_S8); assert(depth_rb && depth_rb->Base.Format == MESA_FORMAT_X8_Z24); - if (stencil_rb->region->tiling == I915_TILING_Y) { + if (stencil_rb->region->tiling == I915_TILING_NONE) { + /* + * The stencil buffer is actually W tiled. The region's tiling is + * I915_TILING_NONE, however, because the GTT is incapable of W + * fencing. + */ intel->intelScreen->dri2_has_hiz = INTEL_DRI2_HAS_HIZ_TRUE; return; } else { @@ -1527,7 +1532,7 @@ intel_verify_dri2_has_hiz(struct intel_context *intel, * Presently, however, no verification or clean up is necessary, and * execution should not reach here. If the framebuffer still has a hiz * region, then we have already set dri2_has_hiz to true after - * confirming above that the stencil buffer is Y tiled. + * confirming above that the stencil buffer is W tiled. */ assert(0); } diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 55bcc757873..35be3257ab3 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -173,6 +173,9 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer if (irb->Base.Format == MESA_FORMAT_S8) { /* + * The stencil buffer is W tiled. However, we request from the kernel a + * non-tiled buffer because the GTT is incapable of W fencing. + * * The stencil buffer has quirky pitch requirements. From Vol 2a, * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch": * The pitch must be set to 2x the value computed based on width, as @@ -180,14 +183,13 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer * To accomplish this, we resort to the nasty hack of doubling the drm * region's cpp and halving its height. * - * If we neglect to double the pitch, then drm_intel_gem_bo_map_gtt() - * maps the memory incorrectly. + * If we neglect to double the pitch, then render corruption occurs. */ irb->region = intel_region_alloc(intel->intelScreen, - I915_TILING_Y, + I915_TILING_NONE, cpp * 2, - width, - height / 2, + ALIGN(width, 64), + ALIGN((height + 1) / 2, 64), GL_TRUE); if (!irb->region) return false; diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h index b2013af1a29..9dd6a525566 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.h +++ b/src/mesa/drivers/dri/intel/intel_screen.h @@ -63,9 +63,12 @@ * x8_z24 and s8). * * Eventually, intel_update_renderbuffers() makes a DRI2 request for - * DRI2BufferStencil and DRI2BufferHiz. If the returned buffers are Y tiled, - * then we joyfully set intel_screen.dri2_has_hiz to true and continue as if - * nothing happend. + * DRI2BufferStencil and DRI2BufferHiz. If the stencil buffer's tiling is + * I915_TILING_NONE [1], then we joyfully set intel_screen.dri2_has_hiz to + * true and continue as if nothing happend. + * + * [1] The stencil buffer is actually W tiled. However, we request from the + * kernel a non-tiled buffer because the GTT is incapable of W fencing. * * If the buffers are X tiled, however, the handshake has failed and we must * clean up. diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index 153803fba09..2e1c80c4766 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -131,38 +131,84 @@ intel_set_span_functions(struct intel_context *intel, int miny = 0; \ int maxx = rb->Width; \ int maxy = rb->Height; \ - int stride = rb->RowStride; \ - uint8_t *buf = rb->Data; \ + \ + /* \ + * Here we ignore rb->Data and rb->RowStride as set by \ + * intelSpanRenderStart. Since intel_offset_S8 decodes the W tile \ + * manually, the region's *real* base address and stride is \ + * required. \ + */ \ + struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ + uint8_t *buf = irb->region->buffer->virtual; \ + unsigned stride = irb->region->pitch; \ + unsigned height = 2 * irb->region->height; \ + bool flip = rb->Name == 0; \ + int y_scale = flip ? -1 : 1; \ + int y_bias = flip ? (height - 1) : 0; \ -/* Don't flip y. */ #undef Y_FLIP -#define Y_FLIP(y) y +#define Y_FLIP(y) (y_scale * (y) + y_bias) /** * \brief Get pointer offset into stencil buffer. * - * The stencil buffer interleaves two rows into one. Yay for crazy hardware. - * The table below demonstrates how the pointer arithmetic behaves for a buffer - * with positive stride (s=stride). - * - * x | y | byte offset - * -------------------------- - * 0 | 0 | 0 - * 0 | 1 | 1 - * 1 | 0 | 2 - * 1 | 1 | 3 - * ... | ... | ... - * 0 | 2 | s - * 0 | 3 | s + 1 - * 1 | 2 | s + 2 - * 1 | 3 | s + 3 + * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we + * must decode the tile's layout in software. * + * See + * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile + * Format. + * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm * + * Even though the returned offset is always positive, the return type is + * signed due to + * commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137 + * mesa: Fix return type of _mesa_get_format_bytes() (#37351) */ static inline intptr_t -intel_offset_S8(int stride, GLint x, GLint y) +intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y) { - return 2 * ((y / 2) * stride + x) + y % 2; + uint32_t tile_size = 4096; + uint32_t tile_width = 64; + uint32_t tile_height = 64; + uint32_t row_size = 64 * stride; + + uint32_t tile_x = x / tile_width; + uint32_t tile_y = y / tile_height; + + /* The byte's address relative to the tile's base addres. */ + uint32_t byte_x = x % tile_width; + uint32_t byte_y = y % tile_height; + + uintptr_t u = tile_y * row_size + + tile_x * tile_size + + 512 * (byte_x / 8) + + 64 * (byte_y / 8) + + 32 * ((byte_y / 4) % 2) + + 16 * ((byte_x / 4) % 2) + + 8 * ((byte_y / 2) % 2) + + 4 * ((byte_x / 2) % 2) + + 2 * (byte_y % 2) + + 1 * (byte_x % 2); + + /* + * Errata for Gen5: + * + * An additional offset is needed which is not documented in the PRM. + * + * if ((byte_x / 8) % 2 == 1) { + * if ((byte_y / 8) % 2) == 0) { + * u += 64; + * } else { + * u -= 64; + * } + * } + * + * The offset is expressed more tersely as + * u += ((int) x & 0x8) * (8 - (((int) y & 0x8) << 1)); + */ + + return u; } #define WRITE_STENCIL(x, y, src) buf[intel_offset_S8(stride, x, y)] = src; -- cgit v1.2.3 From f0e306c3430e4d8f6c8e085537807007a488f1e2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jul 2011 15:24:47 -0600 Subject: mesa: update, shorten some comments in dd.h --- src/mesa/main/dd.h | 51 +++++++++++++-------------------------------------- 1 file changed, 13 insertions(+), 38 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 9fe6d527f92..e1ae30fe4d4 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -189,31 +189,22 @@ struct dd_function_table { /*@{*/ /** - * Choose texture format. - * - * This is called by the \c _mesa_store_tex[sub]image[123]d() fallback - * functions. The driver should examine \p internalFormat and return a - * gl_format value. + * Choose actual hardware texture format given the user-provided source + * image format and type and the desired internal format. In some + * cases, srcFormat and srcType can be GL_NONE. + * Called by glTexImage(), etc. */ GLuint (*ChooseTextureFormat)( struct gl_context *ctx, GLint internalFormat, GLenum srcFormat, GLenum srcType ); /** - * Called by glTexImage1D(). - * - * \param target user specified. - * \param format user specified. - * \param type user specified. - * \param pixels user specified. - * \param packing indicates the image packing of pixels. + * Called by glTexImage1D(). Simply copy the source texture data into the + * destination texture memory. The gl_texture_image fields, etc. will be + * fully initialized. + * The parameters are the same as glTexImage1D(), plus: + * \param packing describes how to unpack the source data. * \param texObj is the target texture object. - * \param texImage is the target texture image. It will have the texture \p - * width, \p height, \p depth, \p border and \p internalFormat information. - * - * \p retainInternalCopy is returned by this function and indicates whether - * core Mesa should keep an internal copy of the texture image. - * - * Drivers should call a fallback routine from texstore.c if needed. + * \param texImage is the target texture image. */ void (*TexImage1D)( struct gl_context *ctx, GLenum target, GLint level, GLint internalFormat, @@ -250,25 +241,9 @@ struct dd_function_table { struct gl_texture_image *texImage ); /** - * Called by glTexSubImage1D(). - * - * \param target user specified. - * \param level user specified. - * \param xoffset user specified. - * \param yoffset user specified. - * \param zoffset user specified. - * \param width user specified. - * \param height user specified. - * \param depth user specified. - * \param format user specified. - * \param type user specified. - * \param pixels user specified. - * \param packing indicates the image packing of pixels. - * \param texObj is the target texture object. - * \param texImage is the target texture image. It will have the texture \p - * width, \p height, \p border and \p internalFormat information. - * - * The driver should use a fallback routine from texstore.c if needed. + * Called by glTexSubImage1D(). Replace a subset of the target texture + * with new texel data. + * \sa dd_function_table::TexImage1D. */ void (*TexSubImage1D)( struct gl_context *ctx, GLenum target, GLint level, GLint xoffset, GLsizei width, -- cgit v1.2.3 From d84791a72b33f96fab54ff2399e8053c50205454 Mon Sep 17 00:00:00 2001 From: Fredrik Höglund Date: Tue, 19 Jul 2011 15:25:32 -0600 Subject: st/mesa: fix the texture format in st_context_teximage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 1a339b6c71ebab6e1a64f05b2e133022d3bbcd15 made st_ChooseTextureFormat map GL_RGBA with type GL_UNSIGNED_BYTE to PIPE_FORMAT_A8B8G8R8_UNORM. The image format for ARGB pixmaps is PIPE_FORMAT_B8G8R8A8_UNORM however. This mismatch caused the texture to be recreated in st_finalize_texture. NOTE: This is a candidate for the 7.11 branch. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=39209 Signed-off-by: Fredrik Höglund Reviewed-by: Stéphane Marchesin Signed-off-by: Brian Paul --- src/mesa/state_tracker/st_manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c index 7bd82aae206..d5228d387f7 100644 --- a/src/mesa/state_tracker/st_manager.c +++ b/src/mesa/state_tracker/st_manager.c @@ -587,7 +587,7 @@ st_context_teximage(struct st_context_iface *stctxi, internalFormat = GL_RGB; texFormat = st_ChooseTextureFormat(ctx, internalFormat, - GL_RGBA, GL_UNSIGNED_BYTE); + GL_BGRA, GL_UNSIGNED_BYTE); _mesa_init_teximage_fields(ctx, target, texImage, tex->width0, tex->height0, 1, 0, -- cgit v1.2.3 From 5874890c26f434f54e9218b83fae4eb8175c24e9 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jul 2011 20:03:05 -0600 Subject: mesa: stop using ctx->Driver.CopyTexImage1D/2D() hooks --- src/mesa/main/teximage.c | 49 +++++++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 17 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 6f53686e7ff..302fd65cb27 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -2797,29 +2797,43 @@ copyteximage(struct gl_context *ctx, GLuint dims, _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims); } else { - gl_format texFormat; - - if (texImage->Data) { - ctx->Driver.FreeTexImageData( ctx, texImage ); - } + /* choose actual hw format */ + gl_format texFormat = _mesa_choose_texture_format(ctx, texObj, + target, level, + internalFormat, + GL_NONE, GL_NONE); - ASSERT(texImage->Data == NULL); + if (legal_texture_size(ctx, texFormat, width, height, 1)) { + GLint srcX = x, srcY = y, dstX = 0, dstY = 0; - texFormat = _mesa_choose_texture_format(ctx, texObj, target, level, - internalFormat, GL_NONE, - GL_NONE); + /* Free old texture image */ + ctx->Driver.FreeTexImageData(ctx, texImage); - if (legal_texture_size(ctx, texFormat, width, height, 1)) { _mesa_init_teximage_fields(ctx, target, texImage, width, height, 1, border, internalFormat, texFormat); - ASSERT(ctx->Driver.CopyTexImage2D); - if (dims == 1) - ctx->Driver.CopyTexImage1D(ctx, target, level, internalFormat, - x, y, width, border); - else - ctx->Driver.CopyTexImage2D(ctx, target, level, internalFormat, - x, y, width, height, border); + /* Allocate texture memory (no pixel data yet) */ + if (dims == 1) { + ctx->Driver.TexImage1D(ctx, target, level, internalFormat, + width, border, GL_NONE, GL_NONE, NULL, + &ctx->Unpack, texObj, texImage); + } + else { + ctx->Driver.TexImage2D(ctx, target, level, internalFormat, + width, height, border, GL_NONE, GL_NONE, + NULL, &ctx->Unpack, texObj, texImage); + } + + if (_mesa_clip_copytexsubimage(ctx, &dstX, &dstY, &srcX, &srcY, + &width, &height)) { + if (dims == 1) + ctx->Driver.CopyTexSubImage1D(ctx, target, level, dstX, + srcX, srcY, width); + + else + ctx->Driver.CopyTexSubImage2D(ctx, target, level, dstX, dstY, + srcX, srcY, width, height); + } check_gen_mipmap(ctx, target, texObj, level); @@ -2830,6 +2844,7 @@ copyteximage(struct gl_context *ctx, GLuint dims, ctx->NewState |= _NEW_TEXTURE; } else { + /* probably too large of image */ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims); } } -- cgit v1.2.3 From 1da28fa959e80610ebc9b7a28bfb83e3cad3aee4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jul 2011 20:03:05 -0600 Subject: mesa: remove comments referring to Driver.TexImage1D/2D --- src/mesa/main/texstore.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index 6e1e63bdfb0..c4aeaa8f16d 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -4577,8 +4577,7 @@ texture_row_stride(const struct gl_texture_image *texImage) /** - * This is the software fallback for Driver.TexImage1D() - * and Driver.CopyTexImage1D(). + * This is the software fallback for Driver.TexImage1D(). * \sa _mesa_store_teximage2d() */ void @@ -4629,8 +4628,7 @@ _mesa_store_teximage1d(struct gl_context *ctx, GLenum target, GLint level, /** - * This is the software fallback for Driver.TexImage2D() - * and Driver.CopyTexImage2D(). + * This is the software fallback for Driver.TexImage2D(). * * This function is oriented toward storing images in main memory, rather * than VRAM. Device driver's can easily plug in their own replacement. @@ -4684,8 +4682,7 @@ _mesa_store_teximage2d(struct gl_context *ctx, GLenum target, GLint level, /** - * This is the software fallback for Driver.TexImage3D() - * and Driver.CopyTexImage3D(). + * This is the software fallback for Driver.TexImage3D(). * \sa _mesa_store_teximage2d() */ void -- cgit v1.2.3 From fbe6836043dff2798571b838096ed59c60ec4438 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jul 2011 20:03:05 -0600 Subject: intel: remove intelCopyTexImage1D/2D() --- src/mesa/drivers/dri/intel/intel_tex_copy.c | 97 ----------------------------- 1 file changed, 97 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index 1a3643da593..e89e91dee3e 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -163,101 +163,6 @@ intel_copy_texsubimage(struct intel_context *intel, } -static void -intelCopyTexImage1D(struct gl_context * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLint border) -{ - struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - int srcx, srcy, dstx, dsty, height; - - if (border) - goto fail; - - /* Setup or redefine the texture object, mipmap tree and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage1D(ctx, target, level, internalFormat, - width, border, - GL_RGBA, CHAN_TYPE, NULL, - &ctx->DefaultPacking, texObj, texImage); - srcx = x; - srcy = y; - dstx = 0; - dsty = 0; - height = 1; - if (!_mesa_clip_copytexsubimage(ctx, - &dstx, &dsty, - &srcx, &srcy, - &width, &height)) - return; - - if (!intel_copy_texsubimage(intel_context(ctx), target, - intel_texture_image(texImage), - internalFormat, 0, 0, x, y, width, height)) - goto fail; - - return; - - fail: - fallback_debug("%s - fallback to swrast\n", __FUNCTION__); - _mesa_meta_CopyTexImage1D(ctx, target, level, internalFormat, x, y, - width, border); -} - - -static void -intelCopyTexImage2D(struct gl_context * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border) -{ - struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - int srcx, srcy, dstx, dsty; - - if (border) - goto fail; - - /* Setup or redefine the texture object, mipmap tree and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage2D(ctx, target, level, internalFormat, - width, height, border, - GL_RGBA, GL_UNSIGNED_BYTE, NULL, - &ctx->DefaultPacking, texObj, texImage); - - srcx = x; - srcy = y; - dstx = 0; - dsty = 0; - if (!_mesa_clip_copytexsubimage(ctx, - &dstx, &dsty, - &srcx, &srcy, - &width, &height)) - return; - - if (!intel_copy_texsubimage(intel_context(ctx), target, - intel_texture_image(texImage), - internalFormat, 0, 0, x, y, width, height)) - goto fail; - - return; - - fail: - fallback_debug("%s - fallback to swrast\n", __FUNCTION__); - _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y, - width, height, border); -} - - static void intelCopyTexSubImage1D(struct gl_context * ctx, GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width) @@ -312,8 +217,6 @@ intelCopyTexSubImage2D(struct gl_context * ctx, GLenum target, GLint level, void intelInitTextureCopyImageFuncs(struct dd_function_table *functions) { - functions->CopyTexImage1D = intelCopyTexImage1D; - functions->CopyTexImage2D = intelCopyTexImage2D; functions->CopyTexSubImage1D = intelCopyTexSubImage1D; functions->CopyTexSubImage2D = intelCopyTexSubImage2D; } -- cgit v1.2.3 From 9ed87c4463cf265b06566d15ba86bf20661c70de Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jul 2011 20:03:05 -0600 Subject: radeon: remove radeonCopyTexImage2D() --- src/mesa/drivers/dri/r200/r200_tex.c | 1 - src/mesa/drivers/dri/r300/r300_tex.c | 1 - src/mesa/drivers/dri/r600/evergreen_tex.c | 1 - src/mesa/drivers/dri/r600/r600_tex.c | 1 - src/mesa/drivers/dri/radeon/radeon_tex.c | 1 - src/mesa/drivers/dri/radeon/radeon_tex_copy.c | 55 --------------------------- src/mesa/drivers/dri/radeon/radeon_texture.h | 5 --- 7 files changed, 65 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c index d42e8f12041..91e77f9f7da 100644 --- a/src/mesa/drivers/dri/r200/r200_tex.c +++ b/src/mesa/drivers/dri/r200/r200_tex.c @@ -527,7 +527,6 @@ void r200InitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *fu functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index 590d9afe14a..93d8fe185ef 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -379,7 +379,6 @@ void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *fun functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/r600/evergreen_tex.c b/src/mesa/drivers/dri/r600/evergreen_tex.c index 33a5f277683..9784a8484f2 100644 --- a/src/mesa/drivers/dri/r600/evergreen_tex.c +++ b/src/mesa/drivers/dri/r600/evergreen_tex.c @@ -1688,7 +1688,6 @@ void evergreenInitTextureFuncs(radeonContextPtr radeon, struct dd_function_table functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c index eb7ed30c7a3..3efa1d197fa 100644 --- a/src/mesa/drivers/dri/r600/r600_tex.c +++ b/src/mesa/drivers/dri/r600/r600_tex.c @@ -470,7 +470,6 @@ void r600InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *fun functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c index 25a8ddf7b6a..a0b5506ae76 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex.c @@ -455,7 +455,6 @@ void radeonInitTextureFuncs( radeonContextPtr radeon, struct dd_function_table * functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c index f14dfa25d40..94ff3c4a727 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c @@ -140,61 +140,6 @@ do_copy_texsubimage(struct gl_context *ctx, dstx, dsty, width, height, flip_y); } -void -radeonCopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border) -{ - struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - int srcx, srcy, dstx, dsty; - - radeonContextPtr radeon = RADEON_CONTEXT(ctx); - radeon_prepare_render(radeon); - - if (border) - goto fail; - - /* Setup or redefine the texture object, mipmap tree and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage2D(ctx, target, level, internalFormat, - width, height, border, - GL_RGBA, GL_UNSIGNED_BYTE, NULL, - &ctx->DefaultPacking, texObj, texImage); - - srcx = x; - srcy = y; - dstx = 0; - dsty = 0; - if (!_mesa_clip_copytexsubimage(ctx, - &dstx, &dsty, - &srcx, &srcy, - &width, &height)) { - return; - } - - if (!do_copy_texsubimage(ctx, target, level, - radeon_tex_obj(texObj), (radeon_texture_image *)texImage, - 0, 0, x, y, width, height)) { - goto fail; - } - - return; - -fail: - radeon_print(RADEON_FALLBACKS, RADEON_NORMAL, - "Falling back to sw for glCopyTexImage2D (internalFormat %s, border %d)\n", - _mesa_lookup_enum_by_nr(internalFormat), border); - - _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y, - width, height, border); -} - void radeonCopyTexSubImage2D(struct gl_context *ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.h b/src/mesa/drivers/dri/radeon/radeon_texture.h index 538a07fbba8..6fc06d967dd 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.h +++ b/src/mesa/drivers/dri/radeon/radeon_texture.h @@ -126,11 +126,6 @@ void radeonGetCompressedTexImage(struct gl_context *ctx, GLenum target, GLint le struct gl_texture_object *texObj, struct gl_texture_image *texImage); -void radeonCopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border); - void radeonCopyTexSubImage2D(struct gl_context *ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, -- cgit v1.2.3 From 0823ef84a5c3a6332ea76d0001febf6aaa440dc3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jul 2011 20:03:05 -0600 Subject: st/mesa: remove st_CopyTexImage1D/2D() --- src/mesa/state_tracker/st_cb_texture.c | 55 ---------------------------------- 1 file changed, 55 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 6907cfc03cf..83e83695aae 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -1609,59 +1609,6 @@ st_copy_texsubimage(struct gl_context *ctx, -static void -st_CopyTexImage1D(struct gl_context * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLint border) -{ - struct gl_texture_unit *texUnit = - &ctx->Texture.Unit[ctx->Texture.CurrentUnit]; - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - - /* Setup or redefine the texture object, texture and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage1D(ctx, target, level, internalFormat, - width, border, - GL_RGBA, CHAN_TYPE, NULL, - &ctx->DefaultPacking, texObj, texImage); - - st_copy_texsubimage(ctx, target, level, - 0, 0, 0, /* destX,Y,Z */ - x, y, width, 1); /* src X, Y, size */ -} - - -static void -st_CopyTexImage2D(struct gl_context * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border) -{ - struct gl_texture_unit *texUnit = - &ctx->Texture.Unit[ctx->Texture.CurrentUnit]; - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - - /* Setup or redefine the texture object, texture and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage2D(ctx, target, level, internalFormat, - width, height, border, - GL_RGBA, CHAN_TYPE, NULL, - &ctx->DefaultPacking, texObj, texImage); - - st_copy_texsubimage(ctx, target, level, - 0, 0, 0, /* destX,Y,Z */ - x, y, width, height); /* src X, Y, size */ -} - - static void st_CopyTexSubImage1D(struct gl_context * ctx, GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width) @@ -1947,8 +1894,6 @@ st_init_texture_functions(struct dd_function_table *functions) functions->CompressedTexSubImage1D = st_CompressedTexSubImage1D; functions->CompressedTexSubImage2D = st_CompressedTexSubImage2D; functions->CompressedTexSubImage3D = st_CompressedTexSubImage3D; - functions->CopyTexImage1D = st_CopyTexImage1D; - functions->CopyTexImage2D = st_CopyTexImage2D; functions->CopyTexSubImage1D = st_CopyTexSubImage1D; functions->CopyTexSubImage2D = st_CopyTexSubImage2D; functions->CopyTexSubImage3D = st_CopyTexSubImage3D; -- cgit v1.2.3 From 774311fb5403e3da7ff0197199ffad8f34089e6a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jul 2011 20:03:05 -0600 Subject: meta: remove _mesa_meta_CopyTexImage1D/2D() --- src/mesa/drivers/common/driverfuncs.c | 2 - src/mesa/drivers/common/meta.c | 113 ---------------------------------- src/mesa/drivers/common/meta.h | 10 --- 3 files changed, 125 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 8ab129dd73d..76630264bf7 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -95,8 +95,6 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->TexSubImage2D = _mesa_store_texsubimage2d; driver->TexSubImage3D = _mesa_store_texsubimage3d; driver->GetTexImage = _mesa_get_teximage; - driver->CopyTexImage1D = _mesa_meta_CopyTexImage1D; - driver->CopyTexImage2D = _mesa_meta_CopyTexImage2D; driver->CopyTexSubImage1D = _mesa_meta_CopyTexSubImage1D; driver->CopyTexSubImage2D = _mesa_meta_CopyTexSubImage2D; driver->CopyTexSubImage3D = _mesa_meta_CopyTexSubImage3D; diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index 0e58aeca3f5..706239c7736 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -2677,119 +2677,6 @@ get_temp_image_type(struct gl_context *ctx, GLenum baseFormat) } -/** - * Helper for _mesa_meta_CopyTexImage1/2D() functions. - * Have to be careful with locking and meta state for pixel transfer. - */ -static void -copy_tex_image(struct gl_context *ctx, GLuint dims, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLsizei height, GLint border) -{ - struct gl_texture_object *texObj; - struct gl_texture_image *texImage; - GLenum format, type; - GLint bpp; - void *buf; - struct gl_renderbuffer *read_rb = ctx->ReadBuffer->_ColorReadBuffer; - - texObj = _mesa_get_current_tex_object(ctx, target); - texImage = _mesa_get_tex_image(ctx, texObj, target, level); - - /* Choose format/type for temporary image buffer */ - format = _mesa_base_tex_format(ctx, internalFormat); - - if (format == GL_LUMINANCE && - _mesa_get_format_base_format(read_rb->Format) != GL_LUMINANCE) { - /* The glReadPixels() path will convert RGB to luminance by - * summing R+G+B. glCopyTexImage() is supposed to behave as - * glCopyPixels, which doesn't do that change, and instead - * leaves it up to glTexImage which converts RGB to luminance by - * just taking the R channel. To avoid glReadPixels() trashing - * our data, use RGBA for our temporary image. - */ - format = GL_RGBA; - } - - type = get_temp_image_type(ctx, format); - bpp = _mesa_bytes_per_pixel(format, type); - if (bpp <= 0) { - _mesa_problem(ctx, "Bad bpp in meta copy_tex_image()"); - return; - } - - /* - * Alloc image buffer (XXX could use a PBO) - */ - buf = malloc(width * height * bpp); - if (!buf) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims); - return; - } - - _mesa_unlock_texture(ctx, texObj); /* need to unlock first */ - - /* - * Read image from framebuffer (disable pixel transfer ops) - */ - _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER); - ctx->Driver.ReadPixels(ctx, x, y, width, height, - format, type, &ctx->Pack, buf); - _mesa_meta_end(ctx); - - if (texImage->Data) { - ctx->Driver.FreeTexImageData(ctx, texImage); - } - - /* The texture's format was already chosen in _mesa_CopyTexImage() */ - ASSERT(texImage->TexFormat != MESA_FORMAT_NONE); - - /* - * Store texture data (with pixel transfer ops) - */ - _mesa_meta_begin(ctx, META_PIXEL_STORE); - - _mesa_update_state(ctx); /* to update pixel transfer state */ - - if (target == GL_TEXTURE_1D) { - ctx->Driver.TexImage1D(ctx, target, level, internalFormat, - width, border, format, type, - buf, &ctx->Unpack, texObj, texImage); - } - else { - ctx->Driver.TexImage2D(ctx, target, level, internalFormat, - width, height, border, format, type, - buf, &ctx->Unpack, texObj, texImage); - } - _mesa_meta_end(ctx); - - _mesa_lock_texture(ctx, texObj); /* re-lock */ - - free(buf); -} - - -void -_mesa_meta_CopyTexImage1D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLint border) -{ - copy_tex_image(ctx, 1, target, level, internalFormat, x, y, - width, 1, border); -} - - -void -_mesa_meta_CopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLsizei height, GLint border) -{ - copy_tex_image(ctx, 2, target, level, internalFormat, x, y, - width, height, border); -} - - - /** * Helper for _mesa_meta_CopyTexSubImage1/2/3D() functions. * Have to be careful with locking and meta state for pixel transfer. diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h index b0797d3d91a..7190dee768a 100644 --- a/src/mesa/drivers/common/meta.h +++ b/src/mesa/drivers/common/meta.h @@ -68,16 +68,6 @@ extern void _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target, struct gl_texture_object *texObj); -extern void -_mesa_meta_CopyTexImage1D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLint border); - -extern void -_mesa_meta_CopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLsizei height, GLint border); - extern void _mesa_meta_CopyTexSubImage1D(struct gl_context *ctx, GLenum target, GLint level, GLint xoffset, -- cgit v1.2.3 From 1c1fc62e388534b6c0751fc9f8ab34a89e25efd0 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jul 2011 20:03:05 -0600 Subject: mesa: remove unused dd_function_table::CopyTexImage1D/2D() hooks --- src/mesa/main/dd.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index e1ae30fe4d4..e0c5844e193 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -289,24 +289,6 @@ struct dd_function_table { struct gl_texture_object *texObj, struct gl_texture_image *texImage ); - /** - * Called by glCopyTexImage1D(). - * - * Drivers should use a fallback routine from texstore.c if needed. - */ - void (*CopyTexImage1D)( struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLint border ); - - /** - * Called by glCopyTexImage2D(). - * - * Drivers should use a fallback routine from texstore.c if needed. - */ - void (*CopyTexImage2D)( struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, GLint x, GLint y, - GLsizei width, GLsizei height, GLint border ); - /** * Called by glCopyTexSubImage1D(). * -- cgit v1.2.3 From d5e32397762a3bd55fa69ad6332351512083f9c6 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Jul 2011 20:03:05 -0600 Subject: st/mesa: get rid of redundant clipping code in st_copy_texsubimage() --- src/mesa/state_tracker/st_cb_texture.c | 28 ---------------------------- 1 file changed, 28 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 83e83695aae..25f08aa4d09 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -1466,34 +1466,6 @@ st_copy_texsubimage(struct gl_context *ctx, depth/stencil samples per pixel? Need some transfer clarifications. */ assert(sample_count < 2); - if (srcX < 0) { - width -= -srcX; - destX += -srcX; - srcX = 0; - } - - if (srcY < 0) { - height -= -srcY; - destY += -srcY; - srcY = 0; - } - - if (destX < 0) { - width -= -destX; - srcX += -destX; - destX = 0; - } - - if (destY < 0) { - height -= -destY; - srcY += -destY; - destY = 0; - } - - if (width < 0 || height < 0) - return; - - assert(strb); assert(strb->surface); assert(stImage->pt); -- cgit v1.2.3 From eee570290aebc8a339acd063033e3daefcef2bc6 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 17 Jul 2011 14:53:16 -0700 Subject: meta: Add a GLSL-based _mesa_meta_Clear() variant. This cuts out a large portion of the overhead of glClear() from resetting the texenv state and recomputing the fixed function programs. It also means less use of fixed function internally in our GLES2 drivers, which is rather bogus. Reviewed-by: Brian Paul --- src/mesa/drivers/common/meta.c | 160 ++++++++++++++++++++++++++++++++++++++++- src/mesa/drivers/common/meta.h | 3 + 2 files changed, 162 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index 0e58aeca3f5..887118b9417 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -62,6 +62,7 @@ #include "main/teximage.h" #include "main/texparam.h" #include "main/texstate.h" +#include "main/uniforms.h" #include "main/varray.h" #include "main/viewport.h" #include "program/program.h" @@ -235,6 +236,8 @@ struct clear_state { GLuint ArrayObj; GLuint VBO; + GLuint ShaderProg; + GLint ColorLocation; }; @@ -1589,10 +1592,165 @@ _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers) _mesa_meta_end(ctx); } +static void +meta_glsl_clear_init(struct gl_context *ctx, struct clear_state *clear) +{ + const char *vs_source = + "attribute vec4 position;\n" + "void main()\n" + "{\n" + " gl_Position = position;\n" + "}\n"; + const char *fs_source = + "uniform vec4 color;\n" + "void main()\n" + "{\n" + " gl_FragColor = color;\n" + "}\n"; + GLuint vs, fs; + + if (clear->ArrayObj != 0) + return; + + /* create vertex array object */ + _mesa_GenVertexArrays(1, &clear->ArrayObj); + _mesa_BindVertexArray(clear->ArrayObj); + + /* create vertex array buffer */ + _mesa_GenBuffersARB(1, &clear->VBO); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, clear->VBO); + + /* setup vertex arrays */ + _mesa_VertexAttribPointerARB(0, 3, GL_FLOAT, GL_FALSE, 0, (void *)0); + _mesa_EnableVertexAttribArrayARB(0); + + vs = _mesa_CreateShaderObjectARB(GL_VERTEX_SHADER); + _mesa_ShaderSourceARB(vs, 1, &vs_source, NULL); + _mesa_CompileShaderARB(vs); + + fs = _mesa_CreateShaderObjectARB(GL_FRAGMENT_SHADER); + _mesa_ShaderSourceARB(fs, 1, &fs_source, NULL); + _mesa_CompileShaderARB(fs); + + clear->ShaderProg = _mesa_CreateProgramObjectARB(); + _mesa_AttachShader(clear->ShaderProg, fs); + _mesa_AttachShader(clear->ShaderProg, vs); + _mesa_BindAttribLocationARB(clear->ShaderProg, 0, "position"); + _mesa_LinkProgramARB(clear->ShaderProg); + + clear->ColorLocation = _mesa_GetUniformLocationARB(clear->ShaderProg, + "color"); +} + +/** + * Meta implementation of ctx->Driver.Clear() in terms of polygon rendering. + */ +void +_mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers) +{ + struct clear_state *clear = &ctx->Meta->Clear; + GLbitfield metaSave; + const GLuint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1; + struct gl_framebuffer *fb = ctx->DrawBuffer; + const float x0 = ((float)fb->_Xmin / fb->Width) * 2.0f - 1.0f; + const float y0 = ((float)fb->_Ymin / fb->Height) * 2.0f - 1.0f; + const float x1 = ((float)fb->_Xmax / fb->Width) * 2.0f - 1.0f; + const float y1 = ((float)fb->_Ymax / fb->Height) * 2.0f - 1.0f; + const float z = -invert_z(ctx->Depth.Clear); + struct vertex { + GLfloat x, y, z; + } verts[4]; + + metaSave = (META_ALPHA_TEST | + META_BLEND | + META_DEPTH_TEST | + META_RASTERIZATION | + META_SHADER | + META_STENCIL_TEST | + META_VERTEX | + META_VIEWPORT | + META_CLAMP_FRAGMENT_COLOR); + + if (!(buffers & BUFFER_BITS_COLOR)) { + /* We'll use colormask to disable color writes. Otherwise, + * respect color mask + */ + metaSave |= META_COLOR_MASK; + } + + _mesa_meta_begin(ctx, metaSave); + + meta_glsl_clear_init(ctx, clear); + + _mesa_UseProgramObjectARB(clear->ShaderProg); + _mesa_Uniform4fvARB(clear->ColorLocation, 1, + ctx->Color.ClearColorUnclamped); + + _mesa_BindVertexArray(clear->ArrayObj); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, clear->VBO); + + /* GL_COLOR_BUFFER_BIT */ + if (buffers & BUFFER_BITS_COLOR) { + /* leave colormask, glDrawBuffer state as-is */ + + /* Clears never have the color clamped. */ + _mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE); + } + else { + ASSERT(metaSave & META_COLOR_MASK); + _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + } + + /* GL_DEPTH_BUFFER_BIT */ + if (buffers & BUFFER_BIT_DEPTH) { + _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_TRUE); + _mesa_DepthFunc(GL_ALWAYS); + _mesa_DepthMask(GL_TRUE); + } + else { + assert(!ctx->Depth.Test); + } + + /* GL_STENCIL_BUFFER_BIT */ + if (buffers & BUFFER_BIT_STENCIL) { + _mesa_set_enable(ctx, GL_STENCIL_TEST, GL_TRUE); + _mesa_StencilOpSeparate(GL_FRONT_AND_BACK, + GL_REPLACE, GL_REPLACE, GL_REPLACE); + _mesa_StencilFuncSeparate(GL_FRONT_AND_BACK, GL_ALWAYS, + ctx->Stencil.Clear & stencilMax, + ctx->Stencil.WriteMask[0]); + } + else { + assert(!ctx->Stencil.Enabled); + } + + /* vertex positions */ + verts[0].x = x0; + verts[0].y = y0; + verts[0].z = z; + verts[1].x = x1; + verts[1].y = y0; + verts[1].z = z; + verts[2].x = x1; + verts[2].y = y1; + verts[2].z = z; + verts[3].x = x0; + verts[3].y = y1; + verts[3].z = z; + + /* upload new vertex data */ + _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(verts), verts, + GL_DYNAMIC_DRAW_ARB); + + /* draw quad */ + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); + + _mesa_meta_end(ctx); +} /** * Meta implementation of ctx->Driver.CopyPixels() in terms - * of texture mapping and polygon rendering. + * of texture mapping and polygon rendering and GLSL shaders. */ void _mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcX, GLint srcY, diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h index b0797d3d91a..32c789ea638 100644 --- a/src/mesa/drivers/common/meta.h +++ b/src/mesa/drivers/common/meta.h @@ -42,6 +42,9 @@ _mesa_meta_BlitFramebuffer(struct gl_context *ctx, extern void _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers); +extern void +_mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers); + extern void _mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, GLsizei width, GLsizei height, -- cgit v1.2.3 From 540e66b3bebb5ae82422e386aa178147ea14a39e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 17 Jul 2011 14:55:10 -0700 Subject: intel: Use the GLSL-based meta clear when available. Improves firefox-talos-gfx performance under GL when 3D clears are enabled: [ 0] gl-before firefox-talos-gfx 20.193 20.251 0.27% 3/3 [ 0] gl-after firefox-talos-gfx 18.013 18.040 0.19% 3/3 --- src/mesa/drivers/dri/intel/intel_clear.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c index 5ab98734cfc..81c062fba53 100644 --- a/src/mesa/drivers/dri/intel/intel_clear.c +++ b/src/mesa/drivers/dri/intel/intel_clear.c @@ -188,7 +188,10 @@ intelClear(struct gl_context *ctx, GLbitfield mask) if (tri_mask) { debug_mask("tri", tri_mask); - _mesa_meta_Clear(&intel->ctx, tri_mask); + if (ctx->Extensions.ARB_fragment_shader) + _mesa_meta_glsl_Clear(&intel->ctx, tri_mask); + else + _mesa_meta_Clear(&intel->ctx, tri_mask); } } -- cgit v1.2.3 From dc7422405f6f3c201993251e4665bb9ec1b59db0 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 18 Jul 2011 15:25:10 -0700 Subject: i965: Avoid kernel BUG_ON if we happen to wait on the pipe_control w/a BO. For this and occlusion queries, we're trying to avoid setting I915_GEM_DOMAIN_RENDER for the write domain, because the data written is definitely not going through the render cache, but we do need to tell the kernel that the object has been written. However, with using I915_GEM_DOMAIN_GTT, the kernel on retiring the batchbuffer sees that the w/a BO has a write domain of GTT, and puts it on the flushing list. If something tries to wait for that BO to finish rendering (such as the AUB dumper reading the contents of BOs), we get into wait_request (since obj->active) but with a 0 seqno (since the object is on the flushing list, not actually on a ringbuffer), and BUG_ONs. To avoid the kernel bug (which I'm hoping to delete soon anyway), just use I915_GEM_DOMAIN_INSTRUCTION like occlusion queries do. This doesn't result in more flushing, because we invalidate INSTRUCTION on every batchbuffer now that we're state streaming, anyway. Reviewed-by: Kenneth Graunke Tested-by: Kenneth Graunke --- src/mesa/drivers/dri/intel/intel_batchbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index b61a2ffef19..9c97ef22888 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -325,7 +325,7 @@ intel_emit_post_sync_nonzero_flush(struct intel_context *intel) OUT_BATCH(_3DSTATE_PIPE_CONTROL); OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); OUT_RELOC(intel->batch.workaround_bo, - I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT, 0); + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); OUT_BATCH(0); /* write data */ ADVANCE_BATCH(); -- cgit v1.2.3 From 407785d0e97abd0cc51a6e360089111973748e7c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 18 Jul 2011 17:17:03 -0700 Subject: i965: Enable the PIPE_CONTROL workaround workaround out of paranoia. There's scary stuff going on in PIPE_CONTROL internals, and if the BSpec says to do this to make PIPE_CONTROL work, I'll go ahead and do it because we'll probably never be able to debug it after the fact. v2: Use stall at scoreboard instead of depth stall, as noted by Ken. --- src/mesa/drivers/dri/intel/intel_batchbuffer.c | 31 +++++++++++++++++++++++--- src/mesa/drivers/dri/intel/intel_reg.h | 1 + 2 files changed, 29 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 9c97ef22888..97cc219ce6d 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -308,12 +308,29 @@ emit: * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. * - * XXX: There is also a workaround that would appear to apply to this - * workaround, but it doesn't appear to be necessary so far: + * And the workaround for these two requires this workaround first: * - * Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent + * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent * BEFORE the pipe-control with a post-sync op and no write-cache * flushes. + * + * And this last workaround is tricky because of the requirements on + * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM + * volume 2 part 1: + * + * "1 of the following must also be set: + * - Render Target Cache Flush Enable ([12] of DW1) + * - Depth Cache Flush Enable ([0] of DW1) + * - Stall at Pixel Scoreboard ([1] of DW1) + * - Depth Stall ([13] of DW1) + * - Post-Sync Operation ([13] of DW1) + * - Notify Enable ([8] of DW1)" + * + * The cache flushes require the workaround flush that triggered this + * one, so we can't use it. Depth stall would trigger the same. + * Post-sync nonzero is what triggered this second workaround, so we + * can't use that one either. Notify enable is IRQs, which aren't + * really our business. That leaves only stall at scoreboard. */ void intel_emit_post_sync_nonzero_flush(struct intel_context *intel) @@ -321,6 +338,14 @@ intel_emit_post_sync_nonzero_flush(struct intel_context *intel) if (!intel->batch.need_workaround_flush) return; + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL); + OUT_BATCH(PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_STALL_AT_SCOREBOARD); + OUT_BATCH(0); /* address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); + BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_PIPE_CONTROL); OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h index 5aa629150cf..a98a669af21 100644 --- a/src/mesa/drivers/dri/intel/intel_reg.h +++ b/src/mesa/drivers/dri/intel/intel_reg.h @@ -75,6 +75,7 @@ #define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4) #define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1 << 3) #define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1 << 2) +#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1) #define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) #define PIPE_CONTROL_PPGTT_WRITE (0 << 2) #define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2) -- cgit v1.2.3 From 3e5d36267d8c9536490c902f785137a7fa0637fc Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 19 Jul 2011 15:06:15 -0700 Subject: i965: Apply a homebrew workaround for GPU hang in OGLC api-texcoord. The behavior of flushes in the hardware is a maze of twisty passages, and strangely the VS constants appear to be loaded during a pipeline flush instead of at the time of the packet emit according to the simulator. On moving the STATE_BASE_ADDRESS packet to where it really needed to live (in order for data loads by other packets to be correct), we sometimes no longer got a flush between those packets where we apparently needed it. This replicates the flushes implied by a STATE_BASE_ADDRESS update, fixing the GPU hangs in OGLC and the "engine" demo. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=36821 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=39257 Tested-by: Keith Packard (bzflag and etracer fixed) Acked-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/gen6_vs_state.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index fb4cdbaadf9..e70454416bf 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -160,6 +160,32 @@ upload_vs_state(struct brw_context *brw) GEN6_VS_STATISTICS_ENABLE | GEN6_VS_ENABLE); ADVANCE_BATCH(); + + /* Based on my reading of the simulator, the VS constants don't get + * pulled into the VS FF unit until an appropriate pipeline flush + * happens, and instead the 3DSTATE_CONSTANT_VS packet just adds + * references to them into a little FIFO. The flushes are common, + * but don't reliably happen between this and a 3DPRIMITIVE, causing + * the primitive to use the wrong constants. Then the FIFO + * containing the constant setup gets added to again on the next + * constants change, and eventually when a flush does happen the + * unit is overwhelmed by constant changes and dies. + * + * To avoid this, send a PIPE_CONTROL down the line that will + * update the unit immediately loading the constants. The flush + * type bits here were those set by the STATE_BASE_ADDRESS whose + * move in a82a43e8d99e1715dd11c9c091b5ab734079b6a6 triggered the + * bug reports that led to this workaround, and may be more than + * what is strictly required to avoid the issue. + */ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL); + OUT_BATCH(PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_INSTRUCTION_FLUSH | + PIPE_CONTROL_STATE_CACHE_INVALIDATE); + OUT_BATCH(0); /* address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); } const struct brw_tracked_state gen6_vs_state = { -- cgit v1.2.3 From 000896c0bb99f356e52854608a29476d3ade387c Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 19 Jul 2011 03:05:07 +0200 Subject: mesa: GLES2 should return different error enums for invalid fbo queries ES 2.0.25 page 127 says: If the value of FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE is NONE, then querying any other pname will generate INVALID_ENUM. See also: b9e9df78a03edb35472c2e231aef4747e09db792 NOTE: This is a candidate for the 7.10 and 7.11 branches. Reviewed-by: Ian Romanick --- src/mesa/main/fbobject.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 84969360d92..82eb7fb718d 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -2134,10 +2134,14 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, { const struct gl_renderbuffer_attachment *att; struct gl_framebuffer *buffer; + GLenum err; GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); + /* The error differs in GL andd GLES. */ + err = ctx->API == API_OPENGL ? GL_INVALID_OPERATION : GL_INVALID_ENUM; + buffer = get_framebuffer_target(ctx, target); if (!buffer) { _mesa_error(ctx, GL_INVALID_ENUM, @@ -2188,7 +2192,12 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, } else { assert(att->Type == GL_NONE); - *params = 0; + if (ctx->API == API_OPENGL) { + *params = 0; + } else { + _mesa_error(ctx, GL_INVALID_ENUM, + "glGetFramebufferAttachmentParameterivEXT(pname)"); + } } return; case GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL_EXT: @@ -2196,7 +2205,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, *params = att->TextureLevel; } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else { @@ -2214,7 +2223,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, } } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else { @@ -2232,7 +2241,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, } } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else { @@ -2246,7 +2255,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else { @@ -2267,7 +2276,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, return; } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else { @@ -2301,7 +2310,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else if (att->Type == GL_NONE) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, err, "glGetFramebufferAttachmentParameterivEXT(pname)"); } else if (att->Texture) { -- cgit v1.2.3 From 12c22cab77f35a887d9f6790e0de4a8fa4b3b575 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Thu, 7 Jul 2011 13:03:45 -0700 Subject: mesa: Add an ifndef guard around the definition of the INLINE macro Several Mesa headers redundantly define the INLINE macro. Adding this guard prevents the compiler from complaining about macro redefinition. Reviewed-by: Brian Paul Reviewed-by: Kenneth Graunke Reviewed-by: Chad Versace --- src/mesa/main/compiler.h | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h index 743841be4ef..d736fdfc58a 100644 --- a/src/mesa/main/compiler.h +++ b/src/mesa/main/compiler.h @@ -139,26 +139,28 @@ extern "C" { /** * Function inlining */ -#if defined(__GNUC__) -# define INLINE __inline__ -#elif defined(__MSC__) -# define INLINE __inline -#elif defined(_MSC_VER) -# define INLINE __inline -#elif defined(__ICL) -# define INLINE __inline -#elif defined(__INTEL_COMPILER) -# define INLINE inline -#elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100) -# define INLINE __inline -#elif defined(__SUNPRO_C) && defined(__C99FEATURES__) -# define INLINE inline -# define __inline inline -# define __inline__ inline -#elif (__STDC_VERSION__ >= 199901L) /* C99 */ -# define INLINE inline -#else -# define INLINE +#ifndef INLINE +# if defined(__GNUC__) +# define INLINE __inline__ +# elif defined(__MSC__) +# define INLINE __inline +# elif defined(_MSC_VER) +# define INLINE __inline +# elif defined(__ICL) +# define INLINE __inline +# elif defined(__INTEL_COMPILER) +# define INLINE inline +# elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100) +# define INLINE __inline +# elif defined(__SUNPRO_C) && defined(__C99FEATURES__) +# define INLINE inline +# define __inline inline +# define __inline__ inline +# elif (__STDC_VERSION__ >= 199901L) /* C99 */ +# define INLINE inline +# else +# define INLINE +# endif #endif -- cgit v1.2.3 From d6e1a8f71437d4a65e65f93271b2892dd62b0d23 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Sun, 17 Jul 2011 23:15:54 -0700 Subject: ir_to_mesa: Add each relative address to the previous This fixes many cases of accessing arrays of matrices using non-constant indices at each level. Fixes i965 piglit: vs-temp-array-mat[234]-index-col-rd vs-temp-array-mat[234]-index-col-row-rd vs-temp-array-mat[234]-index-col-wr vs-uniform-array-mat[234]-index-col-rd Fixes swrast piglit: fs-temp-array-mat[234]-index-col-rd fs-temp-array-mat[234]-index-col-row-rd fs-temp-array-mat[234]-index-col-wr fs-uniform-array-mat[234]-index-col-rd fs-uniform-array-mat[234]-index-col-row-rd fs-varying-array-mat[234]-index-col-rd fs-varying-array-mat[234]-index-col-row-rd vs-temp-array-mat[234]-index-col-rd vs-temp-array-mat[234]-index-col-row-rd vs-temp-array-mat[234]-index-col-wr vs-uniform-array-mat[234]-index-col-rd vs-uniform-array-mat[234]-index-col-row-rd vs-varying-array-mat[234]-index-col-rd vs-varying-array-mat[234]-index-col-row-rd vs-varying-array-mat[234]-index-col-wr Reviewed-by: Eric Anholt --- src/mesa/program/ir_to_mesa.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index d8e5a3a9772..beb481b3a3b 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1496,6 +1496,18 @@ ir_to_mesa_visitor::visit(ir_dereference_array *ir) this->result, src_reg_for_float(element_size)); } + /* If there was already a relative address register involved, add the + * new and the old together to get the new offset. + */ + if (src.reladdr != NULL) { + src_reg accum_reg = get_temp(glsl_type::float_type); + + emit(ir, OPCODE_ADD, dst_reg(accum_reg), + index_reg, *src.reladdr); + + index_reg = accum_reg; + } + src.reladdr = ralloc(mem_ctx, src_reg); memcpy(src.reladdr, &index_reg, sizeof(index_reg)); } -- cgit v1.2.3 From f7cd9a858c043e609fcdbf9ac9dfc1ef7ad002bf Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Sun, 17 Jul 2011 23:35:26 -0700 Subject: ir_to_mesa: Copy reladdr in src_reg(dst_reg) constructor Fixes i965 piglit: vs-temp-array-mat[234]-col-row-wr vs-temp-array-mat[234]-index-col-row-wr vs-temp-array-mat[234]-index-row-wr vs-temp-mat[234]-col-row-wr Fixes swrast piglit: fs-temp-array-mat[234]-col-row-wr fs-temp-array-mat[234]-index-col-row-wr fs-temp-array-mat[234]-index-row-wr fs-temp-mat[234]-col-row-wr vs-temp-array-mat[234]-col-row-wr vs-temp-array-mat[234]-index-col-row-wr vs-temp-array-mat[234]-index-row-wr vs-temp-mat[234]-col-row-wr Reviewed-by: Eric Anholt --- src/mesa/program/ir_to_mesa.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index beb481b3a3b..8b4a535b75f 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -134,7 +134,7 @@ src_reg::src_reg(dst_reg reg) this->index = reg.index; this->swizzle = SWIZZLE_XYZW; this->negate = 0; - this->reladdr = NULL; + this->reladdr = reg.reladdr; } dst_reg::dst_reg(src_reg reg) -- cgit v1.2.3 From fbeb68e880318808f90c779cd3f8b8c4160eecf8 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 20 Jul 2011 18:02:17 -0700 Subject: prog_optimize: Set unused regs to PROGRAM_UNDEFINED after CMP->MOV conversion Leaving the unused registers with other values caused assertion failures and other problems in places that blindly iterate over all sources. brw_vs_emit.c:1381: get_src_reg: Assertion `c->regs[file][index].nr != 0' failed. Fixes i965 piglit: vs-uniform-array-mat[234]-col-row-rd vs-uniform-array-mat[234]-index-col-row-rd vs-uniform-array-mat[234]-index-row-rd vs-uniform-mat[234]-col-row-rd Reviewed-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/mesa/program/prog_optimize.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c index 8a40fa69eca..f4a7a638d5f 100644 --- a/src/mesa/program/prog_optimize.c +++ b/src/mesa/program/prog_optimize.c @@ -1319,6 +1319,15 @@ _mesa_simplify_cmp(struct gl_program * program) inst->Opcode = OPCODE_MOV; inst->SrcReg[0] = inst->SrcReg[1]; + + /* Unused operands are expected to have the file set to + * PROGRAM_UNDEFINED. This is how _mesa_init_instructions initializes + * all of the sources. + */ + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + inst->SrcReg[2].File = PROGRAM_UNDEFINED; + inst->SrcReg[2].Swizzle = SWIZZLE_NOOP; } } if (dbg) { -- cgit v1.2.3 From 337e2dfad0bcd567755272271abd2593a1d0fd1f Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 20 Jul 2011 16:04:17 -0700 Subject: i965: When emitting a src/dst write of an output, keep the write mask Fixes i965 piglit: vs-varying-array-mat[234]-col-row-wr vs-varying-array-mat[234]-index-col-row-wr vs-varying-array-mat[234]-index-row-wr vs-varying-array-mat[234]-row-wr vs-varying-mat[234]-col-row-wr vs-varying-mat[234]-row-wr Reviewed-by: Eric Anholt Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_vs_emit.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 9d733344a26..5ef8b0720ba 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1993,7 +1993,11 @@ void brw_vs_emit(struct brw_vs_compile *c ) index = inst->DstReg.Index; file = inst->DstReg.File; if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) - dst = c->output_regs[index].reg; + /* Can't just make get_dst "do the right thing" here because other + * callers of get_dst don't expect any special behavior for the + * c->output_regs[index].used_in_src case. + */ + dst = brw_writemask(c->output_regs[index].reg, inst->DstReg.WriteMask); else dst = get_dst(c, inst->DstReg); -- cgit v1.2.3 From 1d3f09f15998c60326bf6c53a8d32c82496264ae Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 20 Jul 2011 18:07:50 -0700 Subject: i965: When emitting a src/dst read of an output, keep the swizzle and neg Fixes i965 piglit vs-varying-array-mat[234]-row-rd. Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i965/brw_vs_emit.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 5ef8b0720ba..d8cb0f7cb69 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1980,9 +1980,22 @@ void brw_vs_emit(struct brw_vs_compile *c ) const struct prog_src_register *src = &inst->SrcReg[i]; index = src->Index; file = src->File; - if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) - args[i] = c->output_regs[index].reg; - else + if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) { + /* Can't just make get_arg "do the right thing" here because + * other callers of get_arg and get_src_reg don't expect any + * special behavior for the c->output_regs[index].used_in_src + * case. + */ + args[i] = c->output_regs[index].reg; + args[i].dw1.bits.swizzle = + BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0), + GET_SWZ(src->Swizzle, 1), + GET_SWZ(src->Swizzle, 2), + GET_SWZ(src->Swizzle, 3)); + + /* Note this is ok for non-swizzle ARB_vp instructions */ + args[i].negate = src->Negate ? 1 : 0; + } else args[i] = get_arg(c, inst, i); } -- cgit v1.2.3 From 156cef0fbacf242e8fc67e39ab964e5f8f3739cb Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 21 Jul 2011 21:17:10 -0700 Subject: i965/fs: Clear result before visiting shadow comparitor and LOD info. Commit 53c89c67f33639afef951e178f93f4e29acc5d53 ("i965: Avoid generating MOVs for assignments of expressions.") added the line "this->result = reg_undef" all over the code. Unfortunately, since Eric developed his patch before I landed Ivybridge support, he missed adding it to fs_visitor::emit_texture_gen7() after rebasing. Furthermore, since I developed TXD support before Eric's patch, I neglected to add it to the gradient handling when I rebased. Neglecting to set this causes the visitor to use this->result as storage rather than generating a new temporary. These missing statements resulted in the same register being used to store several different values. Fixes the following piglit tests on Ivybridge: - glsl-fs-shadow2dproj.shader_test - glsl-fs-shadow2dproj-bias.shader_test NOTE: This is a candidate for the 7.11 branch. Signed-off-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index cbe5cf428c5..9632aae64b0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -603,9 +603,11 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ mlen += 3; } else if (ir->op == ir_txd) { + this->result = reg_undef; ir->lod_info.grad.dPdx->accept(this); fs_reg dPdx = this->result; + this->result = reg_undef; ir->lod_info.grad.dPdy->accept(this); fs_reg dPdy = this->result; @@ -786,9 +788,11 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, inst = emit(FS_OPCODE_TXL, dst); break; case ir_txd: { + this->result = reg_undef; ir->lod_info.grad.dPdx->accept(this); fs_reg dPdx = this->result; + this->result = reg_undef; ir->lod_info.grad.dPdy->accept(this); fs_reg dPdy = this->result; @@ -850,6 +854,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, } if (ir->shadow_comparitor && ir->op != ir_txd) { + this->result = reg_undef; ir->shadow_comparitor->accept(this); emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen += reg_width; @@ -860,11 +865,13 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, case ir_tex: break; case ir_txb: + this->result = reg_undef; ir->lod_info.bias->accept(this); emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen += reg_width; break; case ir_txl: + this->result = reg_undef; ir->lod_info.lod->accept(this); emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen += reg_width; @@ -873,9 +880,11 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, if (c->dispatch_width == 16) fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode."); + this->result = reg_undef; ir->lod_info.grad.dPdx->accept(this); fs_reg dPdx = this->result; + this->result = reg_undef; ir->lod_info.grad.dPdy->accept(this); fs_reg dPdy = this->result; @@ -1070,6 +1079,7 @@ fs_visitor::visit(ir_texture *ir) if (hw_compare_supported) { inst->shadow_compare = true; } else { + this->result = reg_undef; ir->shadow_comparitor->accept(this); fs_reg ref = this->result; -- cgit v1.2.3 From 572f6318954f31fcf3d396ac5df8e9eff3f37c74 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 12 May 2011 04:02:32 -0700 Subject: i965/gen7: Fix shadow sampling in the old brw_wm_emit backend. On Ivybridge, the shadow comparitor goes in the first slot, rather than at the end. It's not necessary to send u, v, and r. Fixes tests texturing/texdepth and glean/fbo. NOTE: This is a candidate for the 7.11 branch. Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_wm_emit.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index f61757a8cac..6ea4a7d6e50 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -1094,9 +1094,16 @@ void emit_tex(struct brw_wm_compile *c, if (intel->gen < 5 && c->dispatch_width == 8) nr_texcoords = 3; - /* For shadow comparisons, we have to supply u,v,r. */ - if (shadow) - nr_texcoords = 3; + if (shadow) { + if (intel->gen < 7) { + /* For shadow comparisons, we have to supply u,v,r. */ + nr_texcoords = 3; + } else { + /* On Ivybridge, the shadow comparitor comes first. Just load it. */ + brw_MOV(p, brw_message_reg(cur_mrf), arg[2]); + cur_mrf += mrf_per_channel; + } + } /* Emit the texcoords. */ for (i = 0; i < nr_texcoords; i++) { @@ -1113,7 +1120,7 @@ void emit_tex(struct brw_wm_compile *c, } /* Fill in the shadow comparison reference value. */ - if (shadow) { + if (shadow && intel->gen < 7) { if (intel->gen >= 5) { /* Fill in the cube map array index value. */ brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); -- cgit v1.2.3 From d92463d5dc42aca09a54588c322fc60582cf9131 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Fri, 22 Jul 2011 14:05:52 -0700 Subject: i965: vs optimization fix: Check val.{negate,abs} in accumulator_contains() When emitting a MAC instruction in a vertex shader, brw_vs_emit() calls accumulator_contains() to determine whether the accumulator already contains the appropriate addend; if it does, then we can avoid emitting an unnecessary MOV instruction. However, accumulator_contains() wasn't checking the val.negate or val.abs flags. As a result, if the desired value was the negation, or the absolute value, of what was already in the accumulator, we would generate an incorrect shader. Fixes piglit test vs-refract-vec4-vec4-float. Tested on Gen5 and Gen6. Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i965/brw_vs_emit.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index d8cb0f7cb69..674a994bace 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1821,6 +1821,9 @@ accumulator_contains(struct brw_vs_compile *c, struct brw_reg val) if (val.address_mode != BRW_ADDRESS_DIRECT) return GL_FALSE; + if (val.negate || val.abs) + return GL_FALSE; + switch (prev_insn->header.opcode) { case BRW_OPCODE_MOV: case BRW_OPCODE_MAC: -- cgit v1.2.3 From 185868c9c2e6a31a7313df2dbe29490547b65f61 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 25 Jul 2011 11:50:27 -0700 Subject: i965: Emit texture cache flushes on gen6 along with render cache flushes. It turns out that internally the texture cache gets flushed in a couple of cases, particularly around 2D operations mixed with 3D. In almost all cases one of those happens between rendering to an FBO-attached texture and rendering from that texture. However, as of the next patch, glean tfbo (and the new fbo-flushing-2 test) would manage to get stale texture values because one of those flushes didn't occur. The intention of this code was always to get the render cache cleared and ready to be used from the sampler cache (and it does on <= gen4), so this just catches gen5 up. This patch was also tested to fix fbo-flushing on gen7. --- src/mesa/drivers/dri/intel/intel_batchbuffer.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 97cc219ce6d..db4343be10c 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -390,6 +390,7 @@ intel_batchbuffer_emit_mi_flush(struct intel_context *intel) OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH | PIPE_CONTROL_WRITE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_TC_FLUSH | PIPE_CONTROL_NO_WRITE); OUT_BATCH(0); /* write address */ OUT_BATCH(0); /* write data */ -- cgit v1.2.3 From 808024689247561d3de225856fb6ef17430fd39e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 21 Jul 2011 09:15:05 -0700 Subject: meta: Also save/restore clip planes for GLSL. Fixes user-clip on 965 with 3D clears enabled. I created a separate flag because I wanted to avoid the overhead of the matrix operations in this path. Reviewed-by: Brian Paul --- src/mesa/drivers/common/meta.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index fa78674e4eb..26c89519679 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -90,13 +90,14 @@ #define META_SCISSOR 0x100 #define META_SHADER 0x200 #define META_STENCIL_TEST 0x400 -#define META_TRANSFORM 0x800 /**< modelview, projection, clip planes */ +#define META_TRANSFORM 0x800 /**< modelview/projection matrix state */ #define META_TEXTURE 0x1000 #define META_VERTEX 0x2000 #define META_VIEWPORT 0x4000 #define META_CLAMP_FRAGMENT_COLOR 0x8000 #define META_CLAMP_VERTEX_COLOR 0x10000 #define META_CONDITIONAL_RENDER 0x20000 +#define META_CLIP 0x40000 /*@}*/ @@ -165,6 +166,8 @@ struct save_state GLfloat ModelviewMatrix[16]; GLfloat ProjectionMatrix[16]; GLfloat TextureMatrix[16]; + + /** META_CLIP */ GLbitfield ClipPlanesEnabled; /** META_TEXTURE */ @@ -547,6 +550,9 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) _mesa_Ortho(0.0, ctx->DrawBuffer->Width, 0.0, ctx->DrawBuffer->Height, -1.0, 1.0); + } + + if (state & META_CLIP) { save->ClipPlanesEnabled = ctx->Transform.ClipPlanesEnabled; if (ctx->Transform.ClipPlanesEnabled) { GLuint i; @@ -846,7 +852,9 @@ _mesa_meta_end(struct gl_context *ctx) _mesa_LoadMatrixf(save->ProjectionMatrix); _mesa_MatrixMode(save->MatrixMode); + } + if (state & META_CLIP) { if (save->ClipPlanesEnabled) { GLuint i; for (i = 0; i < ctx->Const.MaxClipPlanes; i++) { @@ -1669,6 +1677,7 @@ _mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers) META_STENCIL_TEST | META_VERTEX | META_VIEWPORT | + META_CLIP | META_CLAMP_FRAGMENT_COLOR); if (!(buffers & BUFFER_BITS_COLOR)) { @@ -1783,6 +1792,7 @@ _mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcX, GLint srcY, META_SHADER | META_TEXTURE | META_TRANSFORM | + META_CLIP | META_VERTEX | META_VIEWPORT)); @@ -2104,6 +2114,7 @@ _mesa_meta_DrawPixels(struct gl_context *ctx, META_SHADER | META_TEXTURE | META_TRANSFORM | + META_CLIP | META_VERTEX | META_VIEWPORT | META_CLAMP_FRAGMENT_COLOR | @@ -2313,6 +2324,7 @@ _mesa_meta_Bitmap(struct gl_context *ctx, META_SHADER | META_TEXTURE | META_TRANSFORM | + META_CLIP | META_VERTEX | META_VIEWPORT)); -- cgit v1.2.3 From a0e5affb22da50aeb30262f5ba0912b059d858ea Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 19 May 2011 11:02:14 -0700 Subject: i965: Use 3D clears on gen6+ to avoid inter-ring synchronization. Improves firefox-talos-gfx around 5%. --- src/mesa/drivers/dri/intel/intel_clear.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c index 81c062fba53..76d33f9b37e 100644 --- a/src/mesa/drivers/dri/intel/intel_clear.c +++ b/src/mesa/drivers/dri/intel/intel_clear.c @@ -116,13 +116,13 @@ intelClear(struct gl_context *ctx, GLbitfield mask) } /* HW color buffers (front, back, aux, generic FBO, etc) */ - if (colorMask == ~0) { + if (intel->gen < 6 && colorMask == ~0) { /* clear all R,G,B,A */ blit_mask |= (mask & BUFFER_BITS_COLOR); } else { /* glColorMask in effect */ - tri_mask |= (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT)); + tri_mask |= (mask & BUFFER_BITS_COLOR); } /* Make sure we have up to date buffers before we start looking at -- cgit v1.2.3 From 818db3848bfaa002d0e7cf6b9b615a31eb82ba25 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 22 Jul 2011 10:56:10 -0700 Subject: i965: Fix many of the trivial WebGL demos that broke due to IB optimization. The index buffer state emit only occurred if there was an IB in place and we were in either a new batch or a new IB state. But because we only flagged new IB state if IB state changed from the last IB state we calculated, we could simply never emit IB state after batchbuffer wraps if the first draw didn't use the IB and we didn't actually change the IB. Fixes piglit glx-multi-context-ib-1. --- src/mesa/drivers/dri/i965/brw_vtbl.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 55dbd4fa8b0..40360b23fff 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -213,6 +213,7 @@ static void brw_new_batch( struct intel_context *intel ) brw->state_batch_count = 0; brw->vb.nr_current_buffers = 0; + brw->ib.type = -1; /* Mark that the current program cache BO has been used by the GPU. * It will be reallocated if we need to put new programs in for the -- cgit v1.2.3 From 84f8548dfcc7de55e162359e2e39af2614903cbe Mon Sep 17 00:00:00 2001 From: Tobias Droste Date: Mon, 18 Jul 2011 07:14:06 +0200 Subject: r300/compiler: simplify code in peephole_add_presub_add MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Tobias Droste Signed-off-by: Marek Olšák --- .../drivers/dri/r300/compiler/radeon_optimize.c | 35 +++++++++++----------- 1 file changed, 18 insertions(+), 17 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index b24274259f4..39dcb21d4f4 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -561,28 +561,29 @@ static int peephole_add_presub_add( struct rc_instruction * inst_add) { unsigned dstmask = inst_add->U.I.DstReg.WriteMask; - struct rc_src_register * src1 = NULL; - unsigned int i; - - if (!is_presub_candidate(c, inst_add)) - return 0; + unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask; + unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask; if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) return 0; - /* XXX This isn't fully implemented, is it? */ - /* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */ - for (i = 0; i < 2; i++) { - if (inst_add->U.I.SrcReg[i].Abs) - return 0; + /* src0 and src1 can't have absolute values */ + if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs) + return 0; - /* XXX This looks weird, but it's basically what was here before this commit (see git blame): */ - if ((inst_add->U.I.SrcReg[i].Negate & dstmask) != dstmask && !src1) { - src1 = &inst_add->U.I.SrcReg[i]; - } - } + /* presub_replace_add() assumes only one is negative */ + if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate) + return 0; + + /* if src0 is negative, at least all bits of dstmask have to be set */ + if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask) + return 0; - if (!src1) + /* if src1 is negative, at least all bits of dstmask have to be set */ + if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask) + return 0; + + if (!is_presub_candidate(c, inst_add)) return 0; if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) { @@ -615,7 +616,7 @@ static void presub_replace_inv( * of the add instruction must have the constatnt 1 swizzle. This function * does not check const registers to see if their value is 1.0, so it should * be called after the constant_folding optimization. - * @return + * @return * 0 if the ADD instruction is still part of the program. * 1 if the ADD instruction is no longer part of the program. */ -- cgit v1.2.3 From 3daa2d97eb13f41de4cbab9301a167be85d48642 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 25 Jul 2011 15:39:03 -0700 Subject: i965/fs: Fix MRT drawing since the m0->m2 move for shader debug. Reviewed-by: Ian Romanick --- src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 1d89b8f1d11..eecfc92eb5b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -59,7 +59,8 @@ fs_visitor::generate_fb_write(fs_inst *inst) if (inst->target > 0) { /* Set the render target index for choosing BLEND_STATE. */ - brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2), + brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, + inst->base_mrf, 2), BRW_REGISTER_TYPE_UD), brw_imm_ud(inst->target)); } -- cgit v1.2.3 From 09916e877fc14723d7950f892e181df9f7d7f36f Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 22 Jul 2011 15:25:55 -0700 Subject: mesa: Add utility function to get base format from a GL compressed format Reviewed-by: Brian Paul --- src/mesa/main/texcompress.c | 88 +++++++++++++++++++++++++++++++++++++++++++++ src/mesa/main/texcompress.h | 3 ++ 2 files changed, 91 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c index d820ae92747..040be943e82 100644 --- a/src/mesa/main/texcompress.c +++ b/src/mesa/main/texcompress.c @@ -39,6 +39,94 @@ #include "texcompress.h" +/** + * Get the GL base format of a specified GL compressed texture format + * + * From page 232 of the OpenGL 3.3 (Compatiblity Profile) spec: + * + * "Compressed Internal Format Base Internal Format Type + * --------------------------- -------------------- --------- + * COMPRESSED_ALPHA ALPHA Generic + * COMPRESSED_LUMINANCE LUMINANCE Generic + * COMPRESSED_LUMINANCE_ALPHA LUMINANCE_ALPHA Generic + * COMPRESSED_INTENSITY INTENSITY Generic + * COMPRESSED_RED RED Generic + * COMPRESSED_RG RG Generic + * COMPRESSED_RGB RGB Generic + * COMPRESSED_RGBA RGBA Generic + * COMPRESSED_SRGB RGB Generic + * COMPRESSED_SRGB_ALPHA RGBA Generic + * COMPRESSED_SLUMINANCE LUMINANCE Generic + * COMPRESSED_SLUMINANCE_ALPHA LUMINANCE_ALPHA Generic + * COMPRESSED_RED_RGTC1 RED Specific + * COMPRESSED_SIGNED_RED_RGTC1 RED Specific + * COMPRESSED_RG_RGTC2 RG Specific + * COMPRESSED_SIGNED_RG_RGTC2 RG Specific" + * + * \return + * The base format of \c format if \c format is a compressed format (either + * generic or specific. Otherwise 0 is returned. + */ +GLenum +_mesa_gl_compressed_format_base_format(GLenum format) +{ + switch (format) { + case GL_COMPRESSED_RED: + case GL_COMPRESSED_RED_RGTC1: + case GL_COMPRESSED_SIGNED_RED_RGTC1: + return GL_RED; + + case GL_COMPRESSED_RG: + case GL_COMPRESSED_RG_RGTC2: + case GL_COMPRESSED_SIGNED_RG_RGTC2: + return GL_RG; + + case GL_COMPRESSED_RGB: + case GL_COMPRESSED_SRGB: + case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: + case GL_COMPRESSED_RGB_FXT1_3DFX: + case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT: + return GL_RGB; + + case GL_COMPRESSED_RGBA: + case GL_COMPRESSED_SRGB_ALPHA: + case GL_COMPRESSED_RGBA_BPTC_UNORM_ARB: + case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_ARB: + case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB: + case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT_ARB: + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: + case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: + case GL_COMPRESSED_RGBA_FXT1_3DFX: + case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: + case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: + case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: + return GL_RGBA; + + case GL_COMPRESSED_ALPHA: + return GL_ALPHA; + + case GL_COMPRESSED_LUMINANCE: + case GL_COMPRESSED_SLUMINANCE: + case GL_COMPRESSED_LUMINANCE_LATC1_EXT: + case GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT: + return GL_LUMINANCE; + + case GL_COMPRESSED_LUMINANCE_ALPHA: + case GL_COMPRESSED_SLUMINANCE_ALPHA: + case GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT: + case GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT: + case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI: + return GL_LUMINANCE_ALPHA; + + case GL_COMPRESSED_INTENSITY: + return GL_INTENSITY; + + default: + return 0; + } +} + /** * Return list of (and count of) all specific texture compression * formats that are supported. diff --git a/src/mesa/main/texcompress.h b/src/mesa/main/texcompress.h index 19b08bbadf6..922da00912d 100644 --- a/src/mesa/main/texcompress.h +++ b/src/mesa/main/texcompress.h @@ -33,6 +33,9 @@ struct gl_context; #if _HAVE_FULL_GL +extern GLenum +_mesa_gl_compressed_format_base_format(GLenum format); + extern GLuint _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean all); -- cgit v1.2.3 From 143b65f7612c255f29d08392192098b1c2bf4b62 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 22 Jul 2011 15:26:24 -0700 Subject: mesa: Return the correct internal fmt when a generic compressed fmt was used If an application requests a generic compressed format for a texture and the driver does not pick a specific compressed format, return the generic base format (e.g., GL_RGBA) for the GL_TEXTURE_INTERNAL_FORMAT query. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=3165 Reviewed-by: Brian Paul --- src/mesa/main/texparam.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index 4b9dcb5d3b5..c4ec29533e2 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -915,9 +915,23 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level, *params = _mesa_compressed_format_to_glenum(ctx, texFormat); } else { - /* return the user's requested internal format */ - *params = img->InternalFormat; - } + /* If the true internal format is not compressed but the user + * requested a generic compressed format, we have to return the + * generic base format that matches. + * + * From page 119 (page 129 of the PDF) of the OpenGL 1.3 spec: + * + * "If no specific compressed format is available, + * internalformat is instead replaced by the corresponding base + * internal format." + * + * Otherwise just return the user's requested internal format + */ + const GLenum f = + _mesa_gl_compressed_format_base_format(img->InternalFormat); + + *params = (f != 0) ? f : img->InternalFormat; + } break; case GL_TEXTURE_BORDER: *params = img->Border; -- cgit v1.2.3 From b189d1635d89cd7d900e8f9a5eed88d7dc0b46cb Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 22 Jul 2011 16:45:50 -0700 Subject: mesa: Make _mesa_get_compressed_formats match the texture compression specs The implementation deviated slightly from the GL_EXT_texture_sRGB spec and from other implementations. A giant comment block was added to justify the somewhat odd behavior of this function. In addition, the interface had unnecessary cruft. The 'all' parameter was false at all callers, so it has been removed. Reviewed-by: Brian Paul --- src/mesa/main/get.c | 4 +- src/mesa/main/texcompress.c | 117 ++++++++++++++++++++++++++++++++++---------- src/mesa/main/texcompress.h | 2 +- 3 files changed, 93 insertions(+), 30 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index 0492e1585c3..d32c68a53a4 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -1569,11 +1569,11 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu break; case GL_NUM_COMPRESSED_TEXTURE_FORMATS_ARB: - v->value_int = _mesa_get_compressed_formats(ctx, NULL, GL_FALSE); + v->value_int = _mesa_get_compressed_formats(ctx, NULL); break; case GL_COMPRESSED_TEXTURE_FORMATS_ARB: v->value_int_n.n = - _mesa_get_compressed_formats(ctx, v->value_int_n.ints, GL_FALSE); + _mesa_get_compressed_formats(ctx, v->value_int_n.ints); ASSERT(v->value_int_n.n <= 100); break; diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c index 040be943e82..42bd1eee5ca 100644 --- a/src/mesa/main/texcompress.c +++ b/src/mesa/main/texcompress.c @@ -131,16 +131,101 @@ _mesa_gl_compressed_format_base_format(GLenum format) * Return list of (and count of) all specific texture compression * formats that are supported. * + * Some formats are \b not returned by this function. The + * \c GL_COMPRESSED_TEXTURE_FORMATS query only returns formats that are + * "suitable for general-purpose usage." All texture compression extensions + * have taken this to mean either linear RGB or linear RGBA. + * + * The GL_ARB_texture_compress_rgtc spec says: + * + * "19) Should the GL_NUM_COMPRESSED_TEXTURE_FORMATS and + * GL_COMPRESSED_TEXTURE_FORMATS queries return the RGTC formats? + * + * RESOLVED: No. + * + * The OpenGL 2.1 specification says "The only values returned + * by this query [GL_COMPRESSED_TEXTURE_FORMATS"] are those + * corresponding to formats suitable for general-purpose usage. + * The renderer will not enumerate formats with restrictions that + * need to be specifically understood prior to use." + * + * Compressed textures with just red or red-green components are + * not general-purpose so should not be returned by these queries + * because they have restrictions. + * + * Applications that seek to use the RGTC formats should do so + * by looking for this extension's name in the string returned by + * glGetString(GL_EXTENSIONS) rather than + * what GL_NUM_COMPRESSED_TEXTURE_FORMATS and + * GL_COMPRESSED_TEXTURE_FORMATS return." + * + * There is nearly identical wording in the GL_EXT_texture_compression_rgtc + * spec. + * + * The GL_EXT_texture_rRGB spec says: + * + * "22) Should the new COMPRESSED_SRGB_* formats be listed in an + * implementation's GL_COMPRESSED_TEXTURE_FORMATS list? + * + * RESOLVED: No. Section 3.8.1 says formats listed by + * GL_COMPRESSED_TEXTURE_FORMATS are "suitable for general-purpose + * usage." The non-linear distribution of red, green, and + * blue for these sRGB compressed formats makes them not really + * general-purpose." + * + * The GL_EXT_texture_compression_latc spec says: + * + * "16) Should the GL_NUM_COMPRESSED_TEXTURE_FORMATS and + * GL_COMPRESSED_TEXTURE_FORMATS queries return the LATC formats? + * + * RESOLVED: No. + * + * The OpenGL 2.1 specification says "The only values returned + * by this query [GL_COMPRESSED_TEXTURE_FORMATS"] are those + * corresponding to formats suitable for general-purpose usage. + * The renderer will not enumerate formats with restrictions that + * need to be specifically understood prior to use." + * + * Historically, OpenGL implementation have advertised the RGB and + * RGBA versions of the S3TC extensions compressed format tokens + * through this mechanism. + * + * The specification is not sufficiently clear about what "suitable + * for general-purpose usage" means. Historically that seems to mean + * unsigned RGB or unsigned RGBA. The DXT1 format supporting alpha + * (GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) is not exposed in the list (at + * least for NVIDIA drivers) because the alpha is always 1.0 expect + * when it is 0.0 when RGB is required to be black. NVIDIA's even + * limits itself to true linear RGB or RGBA formats, specifically + * not including EXT_texture_sRGB's sRGB S3TC compressed formats. + * + * Adding luminance and luminance-alpha texture formats (and + * certainly signed versions of luminance and luminance-alpha + * formats!) invites potential comptaibility problems with old + * applications using this mechanism since old applications are + * unlikely to expect non-RGB or non-RGBA formats to be advertised + * through this mechanism. However no specific misinteractions + * with old applications is known. + * + * Applications that seek to use the LATC formats should do so + * by looking for this extension's name in the string returned by + * glGetString(GL_EXTENSIONS) rather than + * what GL_NUM_COMPRESSED_TEXTURE_FORMATS and + * GL_COMPRESSED_TEXTURE_FORMATS return." + * + * There is no formal spec for GL_ATI_texture_compression_3dc. Since the + * formats added by this extension are luminance-alpha formats, it is + * reasonable to expect them to follow the same rules as + * GL_EXT_texture_compression_latc. At the very least, Catalyst 11.6 does not + * expose the 3dc formats through this mechanism. + * * \param ctx the GL context * \param formats the resulting format list (may be NULL). - * \param all if true return all formats, even those with some kind - * of restrictions/limitations (See GL_ARB_texture_compression - * spec for more info). * * \return number of formats. */ GLuint -_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean all) +_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats) { GLuint n = 0; if (ctx->Extensions.TDFX_texture_compression_FXT1) { @@ -152,24 +237,15 @@ _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean a n += 2; } } - /* don't return RGTC - ARB_texture_compression_rgtc query 19 */ + if (ctx->Extensions.EXT_texture_compression_s3tc) { if (formats) { formats[n++] = GL_COMPRESSED_RGB_S3TC_DXT1_EXT; - /* This format has some restrictions/limitations and so should - * not be returned via the GL_COMPRESSED_TEXTURE_FORMATS query. - * Specifically, all transparent pixels become black. NVIDIA - * omits this format too. - */ - if (all) - formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT; formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT; formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT; } else { n += 3; - if (all) - n += 1; } } if (ctx->Extensions.S3_s3tc) { @@ -183,19 +259,6 @@ _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean a n += 4; } } -#if FEATURE_EXT_texture_sRGB - if (ctx->Extensions.EXT_texture_sRGB) { - if (formats) { - formats[n++] = GL_COMPRESSED_SRGB_S3TC_DXT1_EXT; - formats[n++] = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT; - formats[n++] = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT; - formats[n++] = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT; - } - else { - n += 4; - } - } -#endif /* FEATURE_EXT_texture_sRGB */ return n; #if FEATURE_ES1 || FEATURE_ES2 diff --git a/src/mesa/main/texcompress.h b/src/mesa/main/texcompress.h index 922da00912d..375cf90c8a2 100644 --- a/src/mesa/main/texcompress.h +++ b/src/mesa/main/texcompress.h @@ -37,7 +37,7 @@ extern GLenum _mesa_gl_compressed_format_base_format(GLenum format); extern GLuint -_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean all); +_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats); extern gl_format _mesa_glenum_to_compressed_format(GLenum format); -- cgit v1.2.3 From 95739f19ccc8d3915c437238ca057ddbecd193c6 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Mon, 25 Jul 2011 13:30:17 -0500 Subject: st/mesa: respect force_s3tc_enable environment variable NOTE: This is a candidate for the 7.10 and 7.11 branches. --- src/mesa/state_tracker/st_extensions.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 99b231d9706..b5f6d356eb0 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -208,6 +208,15 @@ void st_init_limits(struct st_context *st) } +static GLboolean st_get_s3tc_override(void) +{ + const char *override = _mesa_getenv("force_s3tc_enable"); + if (override && !strcmp(override, "true")) + return GL_TRUE; + return GL_FALSE; +} + + /** * Use pipe_screen::get_param() to query PIPE_CAP_ values to determine * which GL extensions are supported. @@ -426,7 +435,7 @@ void st_init_extensions(struct st_context *st) if (screen->is_format_supported(screen, PIPE_FORMAT_DXT5_RGBA, PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW) && - ctx->Mesa_DXTn) { + (ctx->Mesa_DXTn || st_get_s3tc_override())) { ctx->Extensions.EXT_texture_compression_s3tc = GL_TRUE; ctx->Extensions.S3_s3tc = GL_TRUE; } -- cgit v1.2.3 From 58c04435b12a104b1996fac4d3a3d345f31bd4e7 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 27 Jul 2011 12:13:37 +0200 Subject: mesa: don't forget about sampleBuffers in framebuffer visual update Otherwise multisample will never been enabled for multisample renderbuffers. Reviewed-by: Brian Paul --- src/mesa/main/framebuffer.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/mesa') diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index e27569a6fac..23fa1b2c11e 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -548,6 +548,7 @@ _mesa_update_framebuffer_visual(struct gl_context *ctx, fb->Visual.rgbBits = fb->Visual.redBits + fb->Visual.greenBits + fb->Visual.blueBits; fb->Visual.samples = rb->NumSamples; + fb->Visual.sampleBuffers = rb->NumSamples > 0 ? 1 : 0; if (_mesa_get_format_color_encoding(fmt) == GL_SRGB) fb->Visual.sRGBCapable = ctx->Const.sRGBCapable; break; -- cgit v1.2.3 From 3e1fd13f605f16e8b48f3a9b71910a3c66eb84b5 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 25 Jul 2011 14:27:07 -0700 Subject: i965/gen4: Fix message parameter loading for 1D TXD sampling. We were neglecting to load dvdx and dvdy. v is not optional. Fixes glslparsertests tex-grad-0[12345].frag on Broadwater/Crestline. (We still need an execution test using sampler1D.) NOTE: This is a candidate for the 7.11 branch. Reviewed-by: Eric Anholt Reviewed-by: Ian Romanick Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 9632aae64b0..b82dfd5ead4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -622,6 +622,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, * dPdx = dudx, dvdx, drdx * dPdy = dudy, dvdy, drdy * + * 1-arg: Does not exist. + * * 2-arg: dudx dvdx dudy dvdy * dPdx.x dPdx.y dPdy.x dPdy.y * m4 m5 m6 m7 @@ -633,14 +635,14 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) { emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx); dPdx.reg_offset++; - mlen++; } + mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2); for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) { emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy); dPdy.reg_offset++; - mlen++; } + mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2); } else { /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod * instructions. We'll need to do SIMD16 here. -- cgit v1.2.3 From 15c0bc5eefc89bec537e412c02965f201fb1c011 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 25 Jul 2011 17:06:13 -0700 Subject: i965: Check actual tile offsets in Gen4 miptree workaround. The purpose of the (irb->draw_offset & 4095) != 0 check was to ensure that we don't have XYy offsets into a tile, since Gen4 hardware doesn't support that. However, it's insufficient: there are cases where draw_offset & 4095 is 0 but we still have a Y-offset. This leads to an assertion failure in brw_update_renderbuffer_surface with tile_y != 0. Instead, simply call intel_renderbuffer_tile_offsets to compute the actual X/Y offsets and check if either are non-zero. This makes both the workaround and the assertion check the same things. Fixes piglit test fbo-generatemipmap-formats, and should also fix bugs #34009 and #39487. NOTE: This is a candidate for stable release branches. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=34009 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=39487 Reviewed-by: Eric Anholt Reviewed-by: Chad Versace Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/intel/intel_fbo.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 35be3257ab3..7d6d9f271e6 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -647,6 +647,22 @@ intel_renderbuffer_tile_offsets(struct intel_renderbuffer *irb, } } +#ifndef I915 +static bool +need_tile_offset_workaround(struct brw_context *brw, + struct intel_renderbuffer *irb) +{ + uint32_t tile_x, tile_y; + + if (brw->has_surface_tile_offset) + return false; + + intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y); + + return tile_x != 0 || tile_y != 0; +} +#endif + /** * Called by glFramebufferTexture[123]DEXT() (and other places) to * prepare for rendering into texture memory. This might be called @@ -700,8 +716,7 @@ intel_render_texture(struct gl_context * ctx, intel_image->used_as_render_target = GL_TRUE; #ifndef I915 - if (!brw_context(ctx)->has_surface_tile_offset && - (irb->draw_offset & 4095) != 0) { + if (need_tile_offset_workaround(brw_context(ctx), irb)) { /* Original gen4 hardware couldn't draw to a non-tile-aligned * destination in a miptree unless you actually setup your * renderbuffer as a miptree and used the fragile -- cgit v1.2.3 From f73caddd3339d284556036d031ab30ce8057a510 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 25 Jul 2011 21:13:43 -0700 Subject: i965: Remove the now unused intel_renderbuffer::draw_offset field. The previous commit removed the last use of this field. Reviewed-by: Eric Anholt Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/intel/intel_fbo.c | 1 - src/mesa/drivers/dri/intel/intel_fbo.h | 1 - 2 files changed, 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 7d6d9f271e6..e48d6ef9cbd 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -606,7 +606,6 @@ intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb, zoffset, &dst_x, &dst_y); - irb->draw_offset = (dst_y * mt->region->pitch + dst_x) * mt->cpp; irb->draw_x = dst_x; irb->draw_y = dst_y; } diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h index f7f99a4f00c..2487994fde5 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.h +++ b/src/mesa/drivers/dri/intel/intel_fbo.h @@ -58,7 +58,6 @@ struct intel_renderbuffer /** \} */ - GLuint draw_offset; /**< Offset of drawing address within the region */ GLuint draw_x, draw_y; /**< Offset of drawing within the region */ }; -- cgit v1.2.3 From 58d6aa82878fc901d4dadd39e308a5d88b064997 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 27 Jul 2011 15:49:39 -0600 Subject: st/mesa: fix comment language --- src/mesa/state_tracker/st_atom_texture.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c index 800a9f1f0e0..3115a2511ce 100644 --- a/src/mesa/state_tracker/st_atom_texture.c +++ b/src/mesa/state_tracker/st_atom_texture.c @@ -221,9 +221,9 @@ update_single_texture(struct st_context *st, struct pipe_sampler_view **sampler_ if ((samp->sRGBDecode == GL_SKIP_DECODE_EXT) && (_mesa_get_format_color_encoding(texFormat) == GL_SRGB)) { - /* don't do sRGB->RGB conversion. Interpret the texture - * texture data as linear values. - */ + /* Don't do sRGB->RGB conversion. Interpret the texture data as + * linear values. + */ const gl_format linearFormat = _mesa_get_srgb_format_linear(texFormat); firstImageFormat = st_mesa_format_to_pipe_format(linearFormat); -- cgit v1.2.3 From 26684e0b1a857cc16a2c6f2b542e5ccf3da5acf5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 28 Jul 2011 09:43:09 -0600 Subject: mesa: test against MESA_FORMAT_NONE in _mesa_GetTexLevelParameteriv() --- src/mesa/main/texparam.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index c4ec29533e2..3f771f08bc6 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -888,7 +888,7 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level, texObj = _mesa_select_tex_object(ctx, texUnit, target); img = _mesa_select_tex_image(ctx, texObj, target, level); - if (!img || !img->TexFormat) { + if (!img || img->TexFormat == MESA_FORMAT_NONE) { /* undefined texture image */ if (pname == GL_TEXTURE_COMPONENTS) *params = 1; -- cgit v1.2.3 From e4fdc95277bd323d8945e20635d3a1702a2e695d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 28 Jul 2011 09:51:30 -0600 Subject: mesa: fix format selection for meta CopyTexSubImage() When we do a glReadPixels into the temporary buffer, we don't want to use GL_LUMINANCE, GL_LUMINANCE_ALPHA or GL_INTENSITY since they will compute L=R+G+B which is not what we want. This bug has existed all along but was only exposed by the elimination of the driver hook for glCopyTexImage() in 5874890c26f434f54e9218b83fae4eb8175c24e9. Fixes https://bugs.freedesktop.org/show_bug.cgi?id=39604 Tested-by: Ian Romanick --- src/mesa/drivers/common/meta.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index 26c89519679..f9b4755988b 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -2869,6 +2869,16 @@ copy_tex_sub_image(struct gl_context *ctx, /* Choose format/type for temporary image buffer */ format = _mesa_get_format_base_format(texImage->TexFormat); + if (format == GL_LUMINANCE || + format == GL_LUMINANCE_ALPHA || + format == GL_INTENSITY) { + /* We don't want to use GL_LUMINANCE, GL_INTENSITY, etc. for the + * temp image buffer because glReadPixels will do L=R+G+B which is + * not what we want (should be L=R). + */ + format = GL_RGBA; + } + type = get_temp_image_type(ctx, format); bpp = _mesa_bytes_per_pixel(format, type); if (bpp <= 0) { -- cgit v1.2.3 From 83f5d5e6aa58754f52c3579c27d810c497fe13a3 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 22 Jul 2011 18:42:21 -0700 Subject: Add dependency generation for Mesa and GLSL dricore objects. Reviewed-By: Christopher James Halse Rogers --- src/glsl/Makefile | 1 + src/mesa/Makefile | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/glsl/Makefile b/src/glsl/Makefile index 005b51d724b..c20a6c9edd9 100644 --- a/src/glsl/Makefile +++ b/src/glsl/Makefile @@ -164,6 +164,7 @@ depend: $(ALL_SOURCES) Makefile rm -f depend touch depend $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(ALL_SOURCES) 2> /dev/null + $(MKDEP) $(MKDEP_OPTIONS) -a -p $(DRICORE_OBJ_DIR)/ $(INCLUDES) $(ALL_SOURCES) 2> /dev/null # Remove .o and backup files clean: clean-dricore diff --git a/src/mesa/Makefile b/src/mesa/Makefile index a903a260ac9..88f31b68695 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -12,11 +12,10 @@ DRICORE_OBJ_DIR := objs-dricore include sources.mak # adjust object dirs +DRICORE_OBJECTS := $(addprefix $(DRICORE_OBJ_DIR)/, $(MESA_OBJECTS)) MESA_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_OBJECTS)) MESA_GALLIUM_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_GALLIUM_OBJECTS)) -DRICORE_OBJECTS := $(addprefix $(DRICORE_OBJ_DIR)/, $(MESA_OBJECTS)) - # define preprocessor flags MESA_CPPFLAGS := $(API_DEFINES) $(DEFINES) @@ -124,6 +123,8 @@ depend: $(ALL_SOURCES) @ touch depend @$(MKDEP) $(MKDEP_OPTIONS) -p$(MESA_OBJ_DIR)/ $(MESA_CPPFLAGS) \ $(ALL_SOURCES) > /dev/null 2>/dev/null + @$(MKDEP) $(MKDEP_OPTIONS) -a -p$(DRICORE_OBJ_DIR)/ $(MESA_CPPFLAGS) \ + $(ALL_SOURCES) > /dev/null 2>/dev/null ###################################################################### # Installation rules -- cgit v1.2.3 From ef1854d09021b6601e59e39fcb71a88fb5e5efb2 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 13 Jul 2011 14:24:41 -0700 Subject: mesa: Fix ff fragment shader inputs calculation when enabling a VS. The FF VS generation happens just after the FF FS generation in state.c, so the ctx->VP._Current value is for the previous state update's vertex shader, not the one that will be chosen as a result of this state update. The vertexShader and vertexProgram variables should be accurately telling us whether there's going to be a ctx->VP._Current (except on _MaintainTnlProgram drivers, where it's always true). The glsl-vs-statechange-1 test was created to test for this, but it turns out that the bug is hidden by the fact that we call _mesa_update_state() twice per draw call -- once from _mesa_valid_to_render() and once from vbo_draw_arrays(), and the second one was fixing up the first one. Reviewed-by: Brian Paul --- src/mesa/main/ff_fragment_shader.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/ff_fragment_shader.cpp b/src/mesa/main/ff_fragment_shader.cpp index 0b53c28f7ae..dbfa6b57d4d 100644 --- a/src/mesa/main/ff_fragment_shader.cpp +++ b/src/mesa/main/ff_fragment_shader.cpp @@ -330,8 +330,7 @@ static GLbitfield get_fp_input_mask( struct gl_context *ctx ) /* _NEW_RENDERMODE */ fp_inputs = (FRAG_BIT_COL0 | FRAG_BIT_TEX0); } - else if (!(vertexProgram || vertexShader) || - !ctx->VertexProgram._Current) { + else if (!(vertexProgram || vertexShader)) { /* Fixed function vertex logic */ /* _NEW_ARRAY */ GLbitfield varying_inputs = ctx->varying_vp_inputs; -- cgit v1.2.3 From 4fdd289805d14d4f7a234f88cd375be1b3b96764 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 25 Jul 2011 18:50:43 -0700 Subject: i965/fs: Respect ARB_color_buffer_float clamping. This was done in the old codegen path, but not the new one. Caught by piglit fbo tests after the conversion to GLSL ff_fragment_shader. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index b82dfd5ead4..4f599fb477e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1745,6 +1745,7 @@ void fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color) { int reg_width = c->dispatch_width / 8; + fs_inst *inst; if (c->dispatch_width == 8 || intel->gen == 6) { /* SIMD8 write looks like: @@ -1763,8 +1764,10 @@ fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color) * m + 6: a0 * m + 7: a1 */ - emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index * reg_width), - color); + inst = emit(BRW_OPCODE_MOV, + fs_reg(MRF, first_color_mrf + index * reg_width), + color); + inst->saturate = c->key.clamp_fragment_color; } else { /* pre-gen6 SIMD16 single source DP write looks like: * m + 0: r0 @@ -1782,16 +1785,22 @@ fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color) * usual destination + 1 for the second half we get * destination + 4. */ - emit(BRW_OPCODE_MOV, - fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index), color); + inst = emit(BRW_OPCODE_MOV, + fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index), + color); + inst->saturate = c->key.clamp_fragment_color; } else { push_force_uncompressed(); - emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), color); + inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), + color); + inst->saturate = c->key.clamp_fragment_color; pop_force_uncompressed(); push_force_sechalf(); color.sechalf = true; - emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), color); + inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), + color); + inst->saturate = c->key.clamp_fragment_color; pop_force_sechalf(); color.sechalf = false; } -- cgit v1.2.3 From 44ffb4ae207e48f78fae55925601b8708ed09c1d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 29 Jul 2011 11:52:39 -0700 Subject: i965/fs: Stop using the exec_list iterator. The old style has gone out of favor in the project, but I kept copy and pasting from existing iterator code. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 70 ++++++++++------------ src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 4 +- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 16 ++--- .../dri/i965/brw_fs_schedule_instructions.cpp | 16 ++--- .../drivers/dri/i965/brw_fs_vector_splitting.cpp | 16 ++--- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 16 ++--- 6 files changed, 67 insertions(+), 71 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index b5ea943387d..15475fbae2f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -621,8 +621,8 @@ fs_visitor::assign_curb_setup() } /* Map the offsets in the UNIFORM file to fixed HW regs. */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == UNIFORM) { @@ -684,8 +684,8 @@ fs_visitor::assign_urb_setup() /* Offset all the urb_setup[] index by the actual position of the * setup regs, now that the location of the constants has been chosen. */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->opcode == FS_OPCODE_LINTERP) { assert(inst->src[2].file == FIXED_HW_REG); @@ -739,8 +739,8 @@ fs_visitor::split_virtual_grfs() split_grf[this->delta_x.reg] = false; } - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; /* Texturing produces 4 contiguous registers, so no splitting. */ if (inst->is_tex()) { @@ -763,8 +763,8 @@ fs_visitor::split_virtual_grfs() } } - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->dst.file == GRF && split_grf[inst->dst.reg] && @@ -815,8 +815,8 @@ fs_visitor::setup_pull_constants() int pull_uniform_base = max_uniform_components; int pull_uniform_count = c->prog_data.nr_params - pull_uniform_base; - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; for (int i = 0; i < 3; i++) { if (inst->src[i].file != UNIFORM) @@ -871,8 +871,8 @@ fs_visitor::calculate_live_intervals() } int ip = 0; - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->opcode == BRW_OPCODE_DO) { if (loop_depth++ == 0) @@ -945,8 +945,8 @@ fs_visitor::propagate_constants() calculate_live_intervals(); - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->opcode != BRW_OPCODE_MOV || inst->predicated || @@ -965,11 +965,9 @@ fs_visitor::propagate_constants() /* Found a move of a constant to a GRF. Find anything else using the GRF * before it's written, and replace it with the constant if we can. */ - exec_list_iterator scan_iter = iter; - scan_iter.next(); - for (; scan_iter.has_next(); scan_iter.next()) { - fs_inst *scan_inst = (fs_inst *)scan_iter.get(); - + for (fs_inst *scan_inst = (fs_inst *)inst->next; + !scan_inst->is_tail_sentinel(); + scan_inst = (fs_inst *)scan_inst->next) { if (scan_inst->opcode == BRW_OPCODE_DO || scan_inst->opcode == BRW_OPCODE_WHILE || scan_inst->opcode == BRW_OPCODE_ELSE || @@ -1077,8 +1075,8 @@ fs_visitor::dead_code_eliminate() calculate_live_intervals(); - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list_safe(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) { inst->remove(); @@ -1101,8 +1099,8 @@ fs_visitor::register_coalesce() int if_depth = 0; int loop_depth = 0; - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list_safe(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; /* Make sure that we dominate the instructions we're going to * scan for interfering with our coalescing, or we won't have @@ -1141,11 +1139,10 @@ fs_visitor::register_coalesce() * program. */ bool interfered = false; - exec_list_iterator scan_iter = iter; - scan_iter.next(); - for (; scan_iter.has_next(); scan_iter.next()) { - fs_inst *scan_inst = (fs_inst *)scan_iter.get(); + for (fs_inst *scan_inst = (fs_inst *)inst->next; + !scan_inst->is_tail_sentinel(); + scan_inst = (fs_inst *)scan_inst->next) { if (scan_inst->dst.file == GRF) { if (scan_inst->dst.reg == inst->dst.reg && (scan_inst->dst.reg_offset == inst->dst.reg_offset || @@ -1176,10 +1173,9 @@ fs_visitor::register_coalesce() /* Rewrite the later usage to point at the source of the move to * be removed. */ - for (exec_list_iterator scan_iter = iter; scan_iter.has_next(); - scan_iter.next()) { - fs_inst *scan_inst = (fs_inst *)scan_iter.get(); - + for (fs_inst *scan_inst = inst; + !scan_inst->is_tail_sentinel(); + scan_inst = (fs_inst *)scan_inst->next) { for (int i = 0; i < 3; i++) { if (scan_inst->src[i].file == GRF && scan_inst->src[i].reg == inst->dst.reg && @@ -1212,8 +1208,8 @@ fs_visitor::compute_to_mrf() calculate_live_intervals(); - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list_safe(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; int ip = next_ip; next_ip++; @@ -1392,8 +1388,8 @@ fs_visitor::remove_duplicate_mrf_writes() memset(last_mrf_move, 0, sizeof(last_mrf_move)); - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list_safe(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; switch (inst->opcode) { case BRW_OPCODE_DO: @@ -1527,8 +1523,8 @@ fs_visitor::run() /* Generate FS IR for main(). (the visitor only descends into * functions called "main"). */ - foreach_iter(exec_list_iterator, iter, *shader->ir) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &*shader->ir) { + ir_instruction *ir = (ir_instruction *)node; base_ir = ir; this->result = reg_undef; ir->accept(this); diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index eecfc92eb5b..9fb0153d1f8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -609,8 +609,8 @@ fs_visitor::generate_code() prog->Name, c->dispatch_width); } - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; struct brw_reg src[3], dst; if (unlikely(INTEL_DEBUG & DEBUG_WM)) { diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index b4689d2c293..78daa491156 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -75,8 +75,8 @@ fs_visitor::assign_regs_trivial() last_grf = hw_reg_mapping[i - 1] + (this->virtual_grf_sizes[i - 1] * reg_width); - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; assign_reg(hw_reg_mapping, &inst->dst, reg_width); assign_reg(hw_reg_mapping, &inst->src[0], reg_width); @@ -283,8 +283,8 @@ fs_visitor::assign_regs() reg_width); } - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; assign_reg(hw_reg_mapping, &inst->dst, reg_width); assign_reg(hw_reg_mapping, &inst->src[0], reg_width); @@ -336,8 +336,8 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) * spill/unspill we'll have to do, and guess that the insides of * loops run 10 times. */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == GRF) { @@ -394,8 +394,8 @@ fs_visitor::spill_reg(int spill_reg) * virtual grf of the same size. For most instructions, though, we * could just spill/unspill the GRF being accessed. */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == GRF && diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp index d8218c26edb..9ec3f502764 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -283,8 +283,8 @@ instruction_scheduler::calculate_deps() memset(last_mrf_write, 0, sizeof(last_mrf_write)); /* top-to-bottom dependencies: RAW and WAW. */ - foreach_iter(exec_list_iterator, iter, instructions) { - schedule_node *n = (schedule_node *)iter.get(); + foreach_list(node, &instructions) { + schedule_node *n = (schedule_node *)node; fs_inst *inst = n->inst; /* read-after-write deps. */ @@ -437,8 +437,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header) int time = 0; /* Remove non-DAG heads from the list. */ - foreach_iter(exec_list_iterator, iter, instructions) { - schedule_node *n = (schedule_node *)iter.get(); + foreach_list_safe(node, &instructions) { + schedule_node *n = (schedule_node *)node; if (n->parent_count != 0) n->remove(); } @@ -447,8 +447,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header) schedule_node *chosen = NULL; int chosen_time = 0; - foreach_iter(exec_list_iterator, iter, instructions) { - schedule_node *n = (schedule_node *)iter.get(); + foreach_list(node, &instructions) { + schedule_node *n = (schedule_node *)node; if (!chosen || n->unblocked_time < chosen_time) { chosen = n; @@ -490,8 +490,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header) * progress until the first is done. */ if (chosen->inst->is_math()) { - foreach_iter(exec_list_iterator, iter, instructions) { - schedule_node *n = (schedule_node *)iter.get(); + foreach_list(node, &instructions) { + schedule_node *n = (schedule_node *)node; if (n->inst->is_math()) n->unblocked_time = MAX2(n->unblocked_time, diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp index 530ffa26580..a9a60c2fd8a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp @@ -122,8 +122,8 @@ ir_vector_reference_visitor::get_variable_entry(ir_variable *var) break; } - foreach_iter(exec_list_iterator, iter, this->variable_list) { - variable_entry *entry = (variable_entry *)iter.get(); + foreach_list(node, &this->variable_list) { + variable_entry *entry = (variable_entry *)node; if (entry->var == var) return entry; } @@ -222,8 +222,8 @@ ir_vector_splitting_visitor::get_splitting_entry(ir_variable *var) if (!var->type->is_vector()) return NULL; - foreach_iter(exec_list_iterator, iter, *this->variable_list) { - variable_entry *entry = (variable_entry *)iter.get(); + foreach_list(node, &*this->variable_list) { + variable_entry *entry = (variable_entry *)node; if (entry->var == var) { return entry; } @@ -341,8 +341,8 @@ brw_do_vector_splitting(exec_list *instructions) visit_list_elements(&refs, instructions); /* Trim out variables we can't split. */ - foreach_iter(exec_list_iterator, iter, refs.variable_list) { - variable_entry *entry = (variable_entry *)iter.get(); + foreach_list_safe(node, &refs.variable_list) { + variable_entry *entry = (variable_entry *)node; if (debug) { printf("vector %s@%p: decl %d, whole_access %d\n", @@ -363,8 +363,8 @@ brw_do_vector_splitting(exec_list *instructions) /* Replace the decls of the vectors to be split with their split * components. */ - foreach_iter(exec_list_iterator, iter, refs.variable_list) { - variable_entry *entry = (variable_entry *)iter.get(); + foreach_list(node, &refs.variable_list) { + variable_entry *entry = (variable_entry *)node; const struct glsl_type *type; type = glsl_type::get_instance(entry->var->type->base_type, 1, 1); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 4f599fb477e..2b769ccbba1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1477,8 +1477,8 @@ fs_visitor::visit(ir_if *ir) inst->predicated = true; } - foreach_iter(exec_list_iterator, iter, ir->then_instructions) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &ir->then_instructions) { + ir_instruction *ir = (ir_instruction *)node; this->base_ir = ir; this->result = reg_undef; ir->accept(this); @@ -1487,8 +1487,8 @@ fs_visitor::visit(ir_if *ir) if (!ir->else_instructions.is_empty()) { emit(BRW_OPCODE_ELSE); - foreach_iter(exec_list_iterator, iter, ir->else_instructions) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &ir->else_instructions) { + ir_instruction *ir = (ir_instruction *)node; this->base_ir = ir; this->result = reg_undef; ir->accept(this); @@ -1538,8 +1538,8 @@ fs_visitor::visit(ir_loop *ir) inst->predicated = true; } - foreach_iter(exec_list_iterator, iter, ir->body_instructions) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &ir->body_instructions) { + ir_instruction *ir = (ir_instruction *)node; this->base_ir = ir; this->result = reg_undef; @@ -1595,8 +1595,8 @@ fs_visitor::visit(ir_function *ir) assert(sig); - foreach_iter(exec_list_iterator, iter, sig->body) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &sig->body) { + ir_instruction *ir = (ir_instruction *)node; this->base_ir = ir; this->result = reg_undef; ir->accept(this); -- cgit v1.2.3 From 652ef8569c923cf8e1e254dddc160c7995d258aa Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 22 Jul 2011 15:48:53 -0700 Subject: Revert "i965: Don't compute brw->wm.input_size_masks when it's unused." This reverts commit 3412069e23b7fa5656262f3dd1aa86f66980594d. We're about to start using it in fragment shaders to handle avoiding projection for fixed function. --- src/mesa/drivers/dri/i965/brw_vs_constval.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c index 9fdfebe9f76..47cc0a7da7a 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_constval.c +++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c @@ -194,19 +194,11 @@ static void calc_wm_input_sizes( struct brw_context *brw ) /* BRW_NEW_VERTEX_PROGRAM */ const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); - /* BRW_NEW_FRAGMENT_PROGRAM */ - struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram; /* BRW_NEW_INPUT_DIMENSIONS */ struct tracker t; GLuint insn; GLuint i; - /* If we're going to go through brw_fs.cpp, we don't end up using - * brw->wm.input_size_masks. - */ - if (prog && prog->_LinkedShaders[MESA_SHADER_FRAGMENT]) - return; - memset(&t, 0, sizeof(t)); /* _NEW_LIGHT */ @@ -246,9 +238,7 @@ static void calc_wm_input_sizes( struct brw_context *brw ) const struct brw_tracked_state brw_wm_input_sizes = { .dirty = { .mesa = _NEW_LIGHT, - .brw = (BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_VERTEX_PROGRAM | - BRW_NEW_INPUT_DIMENSIONS), + .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_INPUT_DIMENSIONS, .cache = 0 }, .prepare = calc_wm_input_sizes -- cgit v1.2.3 From eb30820f268608cf451da32de69723036dddbc62 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 22 Jul 2011 15:56:46 -0700 Subject: i965/fs: Port texture projection avoidance optimization from the old backend. This is part of fixing a ~1% performance regression in OpenArena when changing the fixed function fragment shader to using the new backend. Right now this just avoids the LINTERP of the projector, not the math using it. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 15475fbae2f..9c3180fbc1c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -463,9 +463,21 @@ fs_visitor::emit_general_interpolation(ir_variable *ir) } else { /* Perspective interpolation case. */ for (unsigned int k = 0; k < type->vector_elements; k++) { - struct brw_reg interp = interp_reg(location, k); - emit(FS_OPCODE_LINTERP, attr, - this->delta_x, this->delta_y, fs_reg(interp)); + /* FINISHME: At some point we probably want to push + * this farther by giving similar treatment to the + * other potentially constant components of the + * attribute, as well as making brw_vs_constval.c + * handle varyings other than gl_TexCoord. + */ + if (location >= FRAG_ATTRIB_TEX0 && + location <= FRAG_ATTRIB_TEX7 && + k == 3 && !(c->key.proj_attrib_mask & (1 << location))) { + emit(BRW_OPCODE_MOV, attr, fs_reg(1.0f)); + } else { + struct brw_reg interp = interp_reg(location, k); + emit(FS_OPCODE_LINTERP, attr, + this->delta_x, this->delta_y, fs_reg(interp)); + } attr.reg_offset++; } -- cgit v1.2.3 From 6d8d6b41b85a18685351f3023a4cd41266ba9e68 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 22 Jul 2011 16:18:39 -0700 Subject: i965/fs: If we see a RCP of a constant, try to constant fold it. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 9c3180fbc1c..351d1dd283e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1056,6 +1056,20 @@ fs_visitor::propagate_constants() progress = true; } break; + + case FS_OPCODE_RCP: + /* The hardware doesn't do math on immediate values + * (because why are you doing that, seriously?), but + * the correct answer is to just constant fold it + * anyway. + */ + assert(i == 0); + if (inst->src[0].imm.f != 0.0f) { + scan_inst->opcode = BRW_OPCODE_MOV; + scan_inst->src[0] = inst->src[0]; + progress = true; + } + break; } } -- cgit v1.2.3 From a8b86459a1bb74cfdf0d63572a9fe194b2b5b53f Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 22 Jul 2011 16:45:15 -0700 Subject: i965/fs: Optimize a * 1.0 -> a. This appears in our instruction stream as a result of the brw_vs_constval.c handling. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 43 ++++++++++++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_fs.h | 1 + 2 files changed, 44 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 351d1dd283e..a9617c56e12 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1067,6 +1067,7 @@ fs_visitor::propagate_constants() if (inst->src[0].imm.f != 0.0f) { scan_inst->opcode = BRW_OPCODE_MOV; scan_inst->src[0] = inst->src[0]; + scan_inst->src[0].imm.f = 1.0f / scan_inst->src[0].imm.f; progress = true; } break; @@ -1087,6 +1088,47 @@ fs_visitor::propagate_constants() return progress; } + + +/** + * Attempts to move immediate constants into the immediate + * constant slot of following instructions. + * + * Immediate constants are a bit tricky -- they have to be in the last + * operand slot, you can't do abs/negate on them, + */ + +bool +fs_visitor::opt_algebraic() +{ + bool progress = false; + + calculate_live_intervals(); + + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; + + switch (inst->opcode) { + case BRW_OPCODE_MUL: + if (inst->src[1].file != IMM) + continue; + + /* a * 1.0 = a */ + if (inst->src[1].type == BRW_REGISTER_TYPE_F && + inst->src[1].imm.f == 1.0) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[1] = reg_undef; + progress = true; + break; + } + + break; + } + } + + return progress; +} + /** * Must be called after calculate_live_intervales() to remove unused * writes to registers -- register allocation will fail otherwise @@ -1572,6 +1614,7 @@ fs_visitor::run() progress = remove_duplicate_mrf_writes() || progress; progress = propagate_constants() || progress; + progress = opt_algebraic() || progress; progress = register_coalesce() || progress; progress = compute_to_mrf() || progress; progress = dead_code_eliminate() || progress; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 2bf850e5dea..89d6cda7e4f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -485,6 +485,7 @@ public: void setup_pull_constants(); void calculate_live_intervals(); bool propagate_constants(); + bool opt_algebraic(); bool register_coalesce(); bool compute_to_mrf(); bool dead_code_eliminate(); -- cgit v1.2.3 From f710b8c7501f29f5f8941e757ea1066cbeb03305 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 22 Jul 2011 16:52:54 -0700 Subject: i965/fs: Allow register coalescing where the source is a uniform. Removes 0.8% of the fragment shader instructions on Unigine Tropics. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index a9617c56e12..e07798cebc1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1196,7 +1196,8 @@ fs_visitor::register_coalesce() if (inst->opcode != BRW_OPCODE_MOV || inst->predicated || inst->saturate || - inst->dst.file != GRF || inst->src[0].file != GRF || + inst->dst.file != GRF || (inst->src[0].file != GRF && + inst->src[0].file != UNIFORM)|| inst->dst.type != inst->src[0].type) continue; @@ -1218,7 +1219,8 @@ fs_visitor::register_coalesce() interfered = true; break; } - if (scan_inst->dst.reg == inst->src[0].reg && + if (inst->src[0].file == GRF && + scan_inst->dst.reg == inst->src[0].reg && (scan_inst->dst.reg_offset == inst->src[0].reg_offset || scan_inst->is_tex())) { interfered = true; @@ -1226,10 +1228,13 @@ fs_visitor::register_coalesce() } } - /* The gen6 MATH instruction can't handle source modifiers, so avoid - * coalescing those for now. We should do something more specific. + /* The gen6 MATH instruction can't handle source modifiers or + * unusual register regions, so avoid coalescing those for + * now. We should do something more specific. */ - if (intel->gen >= 6 && scan_inst->is_math() && has_source_modifiers) { + if (intel->gen >= 6 && + scan_inst->is_math() && + (has_source_modifiers || inst->src[0].file == UNIFORM)) { interfered = true; break; } @@ -1248,11 +1253,10 @@ fs_visitor::register_coalesce() if (scan_inst->src[i].file == GRF && scan_inst->src[i].reg == inst->dst.reg && scan_inst->src[i].reg_offset == inst->dst.reg_offset) { - scan_inst->src[i].reg = inst->src[0].reg; - scan_inst->src[i].reg_offset = inst->src[0].reg_offset; - scan_inst->src[i].abs |= inst->src[0].abs; - scan_inst->src[i].negate ^= inst->src[0].negate; - scan_inst->src[i].smear = inst->src[0].smear; + fs_reg new_src = inst->src[0]; + new_src.negate ^= scan_inst->src[i].negate; + new_src.abs |= scan_inst->src[i].abs; + scan_inst->src[i] = new_src; } } } -- cgit v1.2.3 From dc1f32deae1ab7366792fe5c7d654e02757985c0 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 29 Jul 2011 16:49:55 -0600 Subject: mesa: add missing breaks for GL_TEXTURE_CUBE_MAP_SEAMLESS queries And fix indentation. NOTE: This is a candidate for the 7.11 branch. --- src/mesa/main/texparam.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index 3f771f08bc6..134f15346e8 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -1266,12 +1266,13 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) break; case GL_TEXTURE_CUBE_MAP_SEAMLESS: - if (ctx->Extensions.AMD_seamless_cubemap_per_texture) { - *params = (GLfloat) obj->Sampler.CubeMapSeamless; - } - else { - error = GL_TRUE; - } + if (ctx->Extensions.AMD_seamless_cubemap_per_texture) { + *params = (GLfloat) obj->Sampler.CubeMapSeamless; + } + else { + error = GL_TRUE; + } + break; default: error = GL_TRUE; @@ -1441,6 +1442,7 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params ) else { error = GL_TRUE; } + break; default: ; /* silence warnings */ -- cgit v1.2.3 From 120d71a45cfda1edfa8cd6b1732e209eb98b53d8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 29 Jul 2011 16:49:55 -0600 Subject: mesa: minor comment changes in teximage.c --- src/mesa/main/teximage.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 302fd65cb27..27717cfb0f5 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -1,6 +1,5 @@ /* - * mesa 3-D graphics library - * Version: 7.6 + * Mesa 3-D graphics library * * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. * Copyright (C) 2009 VMware, Inc. All Rights Reserved. @@ -556,8 +555,6 @@ _mesa_tex_target_to_face(GLenum target) * \param target texture target. * \param level image level. * \param texImage texture image. - * - * This was basically prompted by the introduction of cube maps. */ void _mesa_set_tex_image(struct gl_texture_object *tObj, @@ -709,15 +706,13 @@ get_proxy_target(GLenum target) /** * Get the texture object that corresponds to the target of the given - * texture unit. + * texture unit. The target should have already been checked for validity. * * \param ctx GL context. * \param texUnit texture unit. * \param target texture target. * * \return pointer to the texture object on success, or NULL on failure. - * - * \sa gl_texture_unit. */ struct gl_texture_object * _mesa_select_tex_object(struct gl_context *ctx, -- cgit v1.2.3 From f379d8f73063a4c4d6cf379318c6b37118d46bfa Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Mon, 25 Apr 2011 23:37:47 -0500 Subject: st/mesa: Add a GLSL IR to TGSI translator. It is still a work in progress at this point, but it produces working and reasonably well-optimized code. Originally based on ir_to_mesa and st_mesa_to_tgsi, but does not directly use Mesa IR instructions in TGSI generation, instead generating TGSI from the intermediate class glsl_to_tgsi_instruction. It also has new optimization passes to replace _mesa_optimize_program. --- src/mesa/sources.mak | 3 +- src/mesa/state_tracker/st_cb_program.c | 14 + src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 4431 ++++++++++++++++++++++++++++ src/mesa/state_tracker/st_glsl_to_tgsi.h | 66 + src/mesa/state_tracker/st_mesa_to_tgsi.c | 4 +- src/mesa/state_tracker/st_mesa_to_tgsi.h | 6 + src/mesa/state_tracker/st_program.c | 399 +-- src/mesa/state_tracker/st_program.h | 27 + 8 files changed, 4767 insertions(+), 183 deletions(-) create mode 100644 src/mesa/state_tracker/st_glsl_to_tgsi.cpp create mode 100644 src/mesa/state_tracker/st_glsl_to_tgsi.h (limited to 'src/mesa') diff --git a/src/mesa/sources.mak b/src/mesa/sources.mak index 4b2ec08bbb0..ed008f8813e 100644 --- a/src/mesa/sources.mak +++ b/src/mesa/sources.mak @@ -336,7 +336,8 @@ MESA_GALLIUM_SOURCES = \ MESA_GALLIUM_CXX_SOURCES = \ $(MAIN_CXX_SOURCES) \ - $(SHADER_CXX_SOURCES) + $(SHADER_CXX_SOURCES) \ + state_tracker/st_glsl_to_tgsi.cpp # All the core C sources, for dependency checking ALL_SOURCES = \ diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c index 32694975d17..2abb4d8f082 100644 --- a/src/mesa/state_tracker/st_cb_program.c +++ b/src/mesa/state_tracker/st_cb_program.c @@ -44,6 +44,7 @@ #include "st_program.h" #include "st_mesa_to_tgsi.h" #include "st_cb_program.h" +#include "st_glsl_to_tgsi.h" @@ -129,6 +130,9 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog) { struct st_vertex_program *stvp = (struct st_vertex_program *) prog; st_release_vp_variants( st, stvp ); + + if (stvp->glsl_to_tgsi) + free_glsl_to_tgsi_visitor(stvp->glsl_to_tgsi); } break; case MESA_GEOMETRY_PROGRAM: @@ -137,6 +141,9 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog) (struct st_geometry_program *) prog; st_release_gp_variants(st, stgp); + + if (stgp->glsl_to_tgsi) + free_glsl_to_tgsi_visitor(stgp->glsl_to_tgsi); if (stgp->tgsi.tokens) { st_free_tokens((void *) stgp->tgsi.tokens); @@ -151,6 +158,9 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog) st_release_fp_variants(st, stfp); + if (stfp->glsl_to_tgsi) + free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi); + if (stfp->tgsi.tokens) { st_free_tokens(stfp->tgsi.tokens); stfp->tgsi.tokens = NULL; @@ -242,4 +252,8 @@ st_init_program_functions(struct dd_function_table *functions) functions->DeleteProgram = st_delete_program; functions->IsProgramNative = st_is_program_native; functions->ProgramStringNotify = st_program_string_notify; + + functions->NewShader = st_new_shader; + functions->NewShaderProgram = st_new_shader_program; + functions->LinkShader = st_link_shader; } diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp new file mode 100644 index 00000000000..e1102503ee0 --- /dev/null +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -0,0 +1,4431 @@ +/* + * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. + * Copyright (C) 2008 VMware, Inc. All Rights Reserved. + * Copyright © 2010 Intel Corporation + * Copyright © 2011 Bryan Cain + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file glsl_to_tgsi.cpp + * + * Translate GLSL IR to Mesa's gl_program representation and to TGSI. + */ + +#include +#include "main/compiler.h" +#include "ir.h" +#include "ir_visitor.h" +#include "ir_print_visitor.h" +#include "ir_expression_flattening.h" +#include "glsl_types.h" +#include "glsl_parser_extras.h" +#include "../glsl/program.h" +#include "ir_optimization.h" +#include "ast.h" + +extern "C" { +#include "main/mtypes.h" +#include "main/shaderapi.h" +#include "main/shaderobj.h" +#include "main/uniforms.h" +#include "program/hash_table.h" +#include "program/prog_instruction.h" +#include "program/prog_optimize.h" +#include "program/prog_print.h" +#include "program/program.h" +#include "program/prog_uniform.h" +#include "program/prog_parameter.h" +#include "program/sampler.h" + +#include "pipe/p_compiler.h" +#include "pipe/p_context.h" +#include "pipe/p_screen.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_state.h" +#include "util/u_math.h" +#include "tgsi/tgsi_ureg.h" +#include "tgsi/tgsi_dump.h" +#include "st_context.h" +#include "st_program.h" +#include "st_glsl_to_tgsi.h" +#include "st_mesa_to_tgsi.h" + +#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ + (1 << PROGRAM_ENV_PARAM) | \ + (1 << PROGRAM_STATE_VAR) | \ + (1 << PROGRAM_NAMED_PARAM) | \ + (1 << PROGRAM_CONSTANT) | \ + (1 << PROGRAM_UNIFORM)) +} + +class st_src_reg; +class st_dst_reg; + +static int swizzle_for_size(int size); + +/** + * This struct is a corresponding struct to Mesa prog_src_register, with + * wider fields. + */ +class st_src_reg { +public: + st_src_reg(gl_register_file file, int index, const glsl_type *type) + { + this->file = file; + this->index = index; + if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) + this->swizzle = swizzle_for_size(type->vector_elements); + else + this->swizzle = SWIZZLE_XYZW; + this->negate = 0; + this->reladdr = NULL; + } + + st_src_reg() + { + this->file = PROGRAM_UNDEFINED; + this->index = 0; + this->swizzle = 0; + this->negate = 0; + this->reladdr = NULL; + } + + explicit st_src_reg(st_dst_reg reg); + + gl_register_file file; /**< PROGRAM_* from Mesa */ + int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ + GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ + int negate; /**< NEGATE_XYZW mask from mesa */ + /** Register index should be offset by the integer in this reg. */ + st_src_reg *reladdr; +}; + +class st_dst_reg { +public: + st_dst_reg(gl_register_file file, int writemask) + { + this->file = file; + this->index = 0; + this->writemask = writemask; + this->cond_mask = COND_TR; + this->reladdr = NULL; + } + + st_dst_reg() + { + this->file = PROGRAM_UNDEFINED; + this->index = 0; + this->writemask = 0; + this->cond_mask = COND_TR; + this->reladdr = NULL; + } + + explicit st_dst_reg(st_src_reg reg); + + gl_register_file file; /**< PROGRAM_* from Mesa */ + int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ + int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ + GLuint cond_mask:4; + /** Register index should be offset by the integer in this reg. */ + st_src_reg *reladdr; +}; + +st_src_reg::st_src_reg(st_dst_reg reg) +{ + this->file = reg.file; + this->index = reg.index; + this->swizzle = SWIZZLE_XYZW; + this->negate = 0; + this->reladdr = NULL; +} + +st_dst_reg::st_dst_reg(st_src_reg reg) +{ + this->file = reg.file; + this->index = reg.index; + this->writemask = WRITEMASK_XYZW; + this->cond_mask = COND_TR; + this->reladdr = reg.reladdr; +} + +class glsl_to_tgsi_instruction : public exec_node { +public: + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = rzalloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + enum prog_opcode op; + st_dst_reg dst; + st_src_reg src[3]; + /** Pointer to the ir source this tree came from for debugging */ + ir_instruction *ir; + GLboolean cond_update; + bool saturate; + int sampler; /**< sampler index */ + int tex_target; /**< One of TEXTURE_*_INDEX */ + GLboolean tex_shadow; + + class function_entry *function; /* Set on OPCODE_CAL or OPCODE_BGNSUB */ +}; + +class variable_storage : public exec_node { +public: + variable_storage(ir_variable *var, gl_register_file file, int index) + : file(file), index(index), var(var) + { + /* empty */ + } + + gl_register_file file; + int index; + ir_variable *var; /* variable that maps to this, if any */ +}; + +class function_entry : public exec_node { +public: + ir_function_signature *sig; + + /** + * identifier of this function signature used by the program. + * + * At the point that Mesa instructions for function calls are + * generated, we don't know the address of the first instruction of + * the function body. So we make the BranchTarget that is called a + * small integer and rewrite them during set_branchtargets(). + */ + int sig_id; + + /** + * Pointer to first instruction of the function body. + * + * Set during function body emits after main() is processed. + */ + glsl_to_tgsi_instruction *bgn_inst; + + /** + * Index of the first instruction of the function body in actual + * Mesa IR. + * + * Set after convertion from glsl_to_tgsi_instruction to prog_instruction. + */ + int inst; + + /** Storage for the return value. */ + st_src_reg return_reg; +}; + +class glsl_to_tgsi_visitor : public ir_visitor { +public: + glsl_to_tgsi_visitor(); + ~glsl_to_tgsi_visitor(); + + function_entry *current_function; + + struct gl_context *ctx; + struct gl_program *prog; + struct gl_shader_program *shader_program; + struct gl_shader_compiler_options *options; + + int next_temp; + + int num_address_regs; + bool indirect_addr_temps; + bool indirect_addr_consts; + + variable_storage *find_variable_storage(ir_variable *var); + + function_entry *get_function_signature(ir_function_signature *sig); + + st_src_reg get_temp(const glsl_type *type); + void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr); + + st_src_reg st_src_reg_for_float(float val); + + /** + * \name Visit methods + * + * As typical for the visitor pattern, there must be one \c visit method for + * each concrete subclass of \c ir_instruction. Virtual base classes within + * the hierarchy should not have \c visit methods. + */ + /*@{*/ + virtual void visit(ir_variable *); + virtual void visit(ir_loop *); + virtual void visit(ir_loop_jump *); + virtual void visit(ir_function_signature *); + virtual void visit(ir_function *); + virtual void visit(ir_expression *); + virtual void visit(ir_swizzle *); + virtual void visit(ir_dereference_variable *); + virtual void visit(ir_dereference_array *); + virtual void visit(ir_dereference_record *); + virtual void visit(ir_assignment *); + virtual void visit(ir_constant *); + virtual void visit(ir_call *); + virtual void visit(ir_return *); + virtual void visit(ir_discard *); + virtual void visit(ir_texture *); + virtual void visit(ir_if *); + /*@}*/ + + st_src_reg result; + + /** List of variable_storage */ + exec_list variables; + + /** List of function_entry */ + exec_list function_signatures; + int next_signature_id; + + /** List of glsl_to_tgsi_instruction */ + exec_list instructions; + + glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op); + + glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, st_src_reg src0); + + glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, st_src_reg src0, st_src_reg src1); + + glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, + st_src_reg src0, st_src_reg src1, st_src_reg src2); + + /** + * Emit the correct dot-product instruction for the type of arguments + */ + void emit_dp(ir_instruction *ir, + st_dst_reg dst, + st_src_reg src0, + st_src_reg src1, + unsigned elements); + + void emit_scalar(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, st_src_reg src0); + + void emit_scalar(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, st_src_reg src0, st_src_reg src1); + + void emit_scs(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, const st_src_reg &src); + + GLboolean try_emit_mad(ir_expression *ir, + int mul_operand); + GLboolean try_emit_sat(ir_expression *ir); + + void emit_swz(ir_expression *ir); + + bool process_move_condition(ir_rvalue *ir); + + void rename_temp_register(int index, int new_index); + int get_first_temp_read(int index); + int get_first_temp_write(int index); + int get_last_temp_read(int index); + int get_last_temp_write(int index); + + void copy_propagate(void); + void eliminate_dead_code(void); + void merge_registers(void); + void renumber_registers(void); + + void *mem_ctx; +}; + +static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, NULL); + +static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP); + +static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X); + +static void +fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); + +static void +fail_link(struct gl_shader_program *prog, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + ralloc_vasprintf_append(&prog->InfoLog, fmt, args); + va_end(args); + + prog->LinkStatus = GL_FALSE; +} + +static int +swizzle_for_size(int size) +{ + int size_swizzles[4] = { + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), + }; + + assert((size >= 1) && (size <= 4)); + return size_swizzles[size - 1]; +} + +glsl_to_tgsi_instruction * +glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, + st_src_reg src0, st_src_reg src1, st_src_reg src2) +{ + glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction(); + int num_reladdr = 0, i; + + /* If we have to do relative addressing, we want to load the ARL + * reg directly for one of the regs, and preload the other reladdr + * sources into temps. + */ + num_reladdr += dst.reladdr != NULL; + num_reladdr += src0.reladdr != NULL; + num_reladdr += src1.reladdr != NULL; + num_reladdr += src2.reladdr != NULL; + + reladdr_to_temp(ir, &src2, &num_reladdr); + reladdr_to_temp(ir, &src1, &num_reladdr); + reladdr_to_temp(ir, &src0, &num_reladdr); + + if (dst.reladdr) { + emit(ir, OPCODE_ARL, address_reg, *dst.reladdr); + num_reladdr--; + } + assert(num_reladdr == 0); + + inst->op = op; + inst->dst = dst; + inst->src[0] = src0; + inst->src[1] = src1; + inst->src[2] = src2; + inst->ir = ir; + + inst->function = NULL; + + if (op == OPCODE_ARL) + this->num_address_regs = 1; + + /* Update indirect addressing status used by TGSI */ + if (dst.reladdr) { + switch(dst.file) { + case PROGRAM_TEMPORARY: + this->indirect_addr_temps = true; + break; + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: + this->indirect_addr_consts = true; + break; + default: + break; + } + } + else { + for (i=0; i<3; i++) { + if(inst->src[i].reladdr) { + switch(dst.file) { + case PROGRAM_TEMPORARY: + this->indirect_addr_temps = true; + break; + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: + this->indirect_addr_consts = true; + break; + default: + break; + } + } + } + } + + this->instructions.push_tail(inst); + + return inst; +} + + +glsl_to_tgsi_instruction * +glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, st_src_reg src0, st_src_reg src1) +{ + return emit(ir, op, dst, src0, src1, undef_src); +} + +glsl_to_tgsi_instruction * +glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, st_src_reg src0) +{ + assert(dst.writemask != 0); + return emit(ir, op, dst, src0, undef_src, undef_src); +} + +glsl_to_tgsi_instruction * +glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op) +{ + return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); +} + +void +glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, + st_dst_reg dst, st_src_reg src0, st_src_reg src1, + unsigned elements) +{ + static const gl_inst_opcode dot_opcodes[] = { + OPCODE_DP2, OPCODE_DP3, OPCODE_DP4 + }; + + emit(ir, dot_opcodes[elements - 2], dst, src0, src1); +} + +/** + * Emits Mesa scalar opcodes to produce unique answers across channels. + * + * Some Mesa opcodes are scalar-only, like ARB_fp/vp. The src X + * channel determines the result across all channels. So to do a vec4 + * of this operation, we want to emit a scalar per source channel used + * to produce dest channels. + */ +void +glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, + st_src_reg orig_src0, st_src_reg orig_src1) +{ + int i, j; + int done_mask = ~dst.writemask; + + /* Mesa RCP is a scalar operation splatting results to all channels, + * like ARB_fp/vp. So emit as many RCPs as necessary to cover our + * dst channels. + */ + for (i = 0; i < 4; i++) { + GLuint this_mask = (1 << i); + glsl_to_tgsi_instruction *inst; + st_src_reg src0 = orig_src0; + st_src_reg src1 = orig_src1; + + if (done_mask & this_mask) + continue; + + GLuint src0_swiz = GET_SWZ(src0.swizzle, i); + GLuint src1_swiz = GET_SWZ(src1.swizzle, i); + for (j = i + 1; j < 4; j++) { + /* If there is another enabled component in the destination that is + * derived from the same inputs, generate its value on this pass as + * well. + */ + if (!(done_mask & (1 << j)) && + GET_SWZ(src0.swizzle, j) == src0_swiz && + GET_SWZ(src1.swizzle, j) == src1_swiz) { + this_mask |= (1 << j); + } + } + src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, + src0_swiz, src0_swiz); + src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, + src1_swiz, src1_swiz); + + inst = emit(ir, op, dst, src0, src1); + inst->dst.writemask = this_mask; + done_mask |= this_mask; + } +} + +void +glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, st_src_reg src0) +{ + st_src_reg undef = undef_src; + + undef.swizzle = SWIZZLE_XXXX; + + emit_scalar(ir, op, dst, src0, undef); +} + +/** + * Emit an OPCODE_SCS instruction + * + * The \c SCS opcode functions a bit differently than the other Mesa (or + * ARB_fragment_program) opcodes. Instead of splatting its result across all + * four components of the destination, it writes one value to the \c x + * component and another value to the \c y component. + * + * \param ir IR instruction being processed + * \param op Either \c OPCODE_SIN or \c OPCODE_COS depending on which + * value is desired. + * \param dst Destination register + * \param src Source register + */ +void +glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op, + st_dst_reg dst, + const st_src_reg &src) +{ + /* Vertex programs cannot use the SCS opcode. + */ + if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) { + emit_scalar(ir, op, dst, src); + return; + } + + const unsigned component = (op == OPCODE_SIN) ? 0 : 1; + const unsigned scs_mask = (1U << component); + int done_mask = ~dst.writemask; + st_src_reg tmp; + + assert(op == OPCODE_SIN || op == OPCODE_COS); + + /* If there are compnents in the destination that differ from the component + * that will be written by the SCS instrution, we'll need a temporary. + */ + if (scs_mask != unsigned(dst.writemask)) { + tmp = get_temp(glsl_type::vec4_type); + } + + for (unsigned i = 0; i < 4; i++) { + unsigned this_mask = (1U << i); + st_src_reg src0 = src; + + if ((done_mask & this_mask) != 0) + continue; + + /* The source swizzle specified which component of the source generates + * sine / cosine for the current component in the destination. The SCS + * instruction requires that this value be swizzle to the X component. + * Replace the current swizzle with a swizzle that puts the source in + * the X component. + */ + unsigned src0_swiz = GET_SWZ(src.swizzle, i); + + src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, + src0_swiz, src0_swiz); + for (unsigned j = i + 1; j < 4; j++) { + /* If there is another enabled component in the destination that is + * derived from the same inputs, generate its value on this pass as + * well. + */ + if (!(done_mask & (1 << j)) && + GET_SWZ(src0.swizzle, j) == src0_swiz) { + this_mask |= (1 << j); + } + } + + if (this_mask != scs_mask) { + glsl_to_tgsi_instruction *inst; + st_dst_reg tmp_dst = st_dst_reg(tmp); + + /* Emit the SCS instruction. + */ + inst = emit(ir, OPCODE_SCS, tmp_dst, src0); + inst->dst.writemask = scs_mask; + + /* Move the result of the SCS instruction to the desired location in + * the destination. + */ + tmp.swizzle = MAKE_SWIZZLE4(component, component, + component, component); + inst = emit(ir, OPCODE_SCS, dst, tmp); + inst->dst.writemask = this_mask; + } else { + /* Emit the SCS instruction to write directly to the destination. + */ + glsl_to_tgsi_instruction *inst = emit(ir, OPCODE_SCS, dst, src0); + inst->dst.writemask = scs_mask; + } + + done_mask |= this_mask; + } +} + +struct st_src_reg +glsl_to_tgsi_visitor::st_src_reg_for_float(float val) +{ + st_src_reg src(PROGRAM_CONSTANT, -1, NULL); + + src.index = _mesa_add_unnamed_constant(this->prog->Parameters, + &val, 1, &src.swizzle); + + return src; +} + +static int +type_size(const struct glsl_type *type) +{ + unsigned int i; + int size; + + switch (type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + if (type->is_matrix()) { + return type->matrix_columns; + } else { + /* Regardless of size of vector, it gets a vec4. This is bad + * packing for things like floats, but otherwise arrays become a + * mess. Hopefully a later pass over the code can pack scalars + * down if appropriate. + */ + return 1; + } + case GLSL_TYPE_ARRAY: + assert(type->length > 0); + return type_size(type->fields.array) * type->length; + case GLSL_TYPE_STRUCT: + size = 0; + for (i = 0; i < type->length; i++) { + size += type_size(type->fields.structure[i].type); + } + return size; + case GLSL_TYPE_SAMPLER: + /* Samplers take up one slot in UNIFORMS[], but they're baked in + * at link time. + */ + return 1; + default: + assert(0); + return 0; + } +} + +/** + * In the initial pass of codegen, we assign temporary numbers to + * intermediate results. (not SSA -- variable assignments will reuse + * storage). Actual register allocation for the Mesa VM occurs in a + * pass over the Mesa IR later. + */ +st_src_reg +glsl_to_tgsi_visitor::get_temp(const glsl_type *type) +{ + st_src_reg src; + int swizzle[4]; + int i; + + src.file = PROGRAM_TEMPORARY; + src.index = next_temp; + src.reladdr = NULL; + next_temp += type_size(type); + + if (type->is_array() || type->is_record()) { + src.swizzle = SWIZZLE_NOOP; + } else { + for (i = 0; i < type->vector_elements; i++) + swizzle[i] = i; + for (; i < 4; i++) + swizzle[i] = type->vector_elements - 1; + src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], + swizzle[2], swizzle[3]); + } + src.negate = 0; + + return src; +} + +variable_storage * +glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var) +{ + + variable_storage *entry; + + foreach_iter(exec_list_iterator, iter, this->variables) { + entry = (variable_storage *)iter.get(); + + if (entry->var == var) + return entry; + } + + return NULL; +} + +void +glsl_to_tgsi_visitor::visit(ir_variable *ir) +{ + if (strcmp(ir->name, "gl_FragCoord") == 0) { + struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; + + fp->OriginUpperLeft = ir->origin_upper_left; + fp->PixelCenterInteger = ir->pixel_center_integer; + + } else if (strcmp(ir->name, "gl_FragDepth") == 0) { + struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; + switch (ir->depth_layout) { + case ir_depth_layout_none: + fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE; + break; + case ir_depth_layout_any: + fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY; + break; + case ir_depth_layout_greater: + fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER; + break; + case ir_depth_layout_less: + fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS; + break; + case ir_depth_layout_unchanged: + fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED; + break; + default: + assert(0); + break; + } + } + + if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { + unsigned int i; + const ir_state_slot *const slots = ir->state_slots; + assert(ir->state_slots != NULL); + + /* Check if this statevar's setup in the STATE file exactly + * matches how we'll want to reference it as a + * struct/array/whatever. If not, then we need to move it into + * temporary storage and hope that it'll get copy-propagated + * out. + */ + for (i = 0; i < ir->num_state_slots; i++) { + if (slots[i].swizzle != SWIZZLE_XYZW) { + break; + } + } + + struct variable_storage *storage; + st_dst_reg dst; + if (i == ir->num_state_slots) { + /* We'll set the index later. */ + storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); + this->variables.push_tail(storage); + + dst = undef_dst; + } else { + /* The variable_storage constructor allocates slots based on the size + * of the type. However, this had better match the number of state + * elements that we're going to copy into the new temporary. + */ + assert((int) ir->num_state_slots == type_size(ir->type)); + + storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY, + this->next_temp); + this->variables.push_tail(storage); + this->next_temp += type_size(ir->type); + + dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, NULL)); + } + + + for (unsigned int i = 0; i < ir->num_state_slots; i++) { + int index = _mesa_add_state_reference(this->prog->Parameters, + (gl_state_index *)slots[i].tokens); + + if (storage->file == PROGRAM_STATE_VAR) { + if (storage->index == -1) { + storage->index = index; + } else { + assert(index == storage->index + (int)i); + } + } else { + st_src_reg src(PROGRAM_STATE_VAR, index, NULL); + src.swizzle = slots[i].swizzle; + emit(ir, OPCODE_MOV, dst, src); + /* even a float takes up a whole vec4 reg in a struct/array. */ + dst.index++; + } + } + + if (storage->file == PROGRAM_TEMPORARY && + dst.index != storage->index + (int) ir->num_state_slots) { + fail_link(this->shader_program, + "failed to load builtin uniform `%s' (%d/%d regs loaded)\n", + ir->name, dst.index - storage->index, + type_size(ir->type)); + } + } +} + +void +glsl_to_tgsi_visitor::visit(ir_loop *ir) +{ + ir_dereference_variable *counter = NULL; + + if (ir->counter != NULL) + counter = new(ir) ir_dereference_variable(ir->counter); + + if (ir->from != NULL) { + assert(ir->counter != NULL); + + ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); + + a->accept(this); + delete a; + } + + emit(NULL, OPCODE_BGNLOOP); + + if (ir->to) { + ir_expression *e = + new(ir) ir_expression(ir->cmp, glsl_type::bool_type, + counter, ir->to); + ir_if *if_stmt = new(ir) ir_if(e); + + ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); + + if_stmt->then_instructions.push_tail(brk); + + if_stmt->accept(this); + + delete if_stmt; + delete e; + delete brk; + } + + visit_exec_list(&ir->body_instructions, this); + + if (ir->increment) { + ir_expression *e = + new(ir) ir_expression(ir_binop_add, counter->type, + counter, ir->increment); + + ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); + + a->accept(this); + delete a; + delete e; + } + + emit(NULL, OPCODE_ENDLOOP); +} + +void +glsl_to_tgsi_visitor::visit(ir_loop_jump *ir) +{ + switch (ir->mode) { + case ir_loop_jump::jump_break: + emit(NULL, OPCODE_BRK); + break; + case ir_loop_jump::jump_continue: + emit(NULL, OPCODE_CONT); + break; + } +} + + +void +glsl_to_tgsi_visitor::visit(ir_function_signature *ir) +{ + assert(0); + (void)ir; +} + +void +glsl_to_tgsi_visitor::visit(ir_function *ir) +{ + /* Ignore function bodies other than main() -- we shouldn't see calls to + * them since they should all be inlined before we get to glsl_to_tgsi. + */ + if (strcmp(ir->name, "main") == 0) { + const ir_function_signature *sig; + exec_list empty; + + sig = ir->matching_signature(&empty); + + assert(sig); + + foreach_iter(exec_list_iterator, iter, sig->body) { + ir_instruction *ir = (ir_instruction *)iter.get(); + + ir->accept(this); + } + } +} + +GLboolean +glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) +{ + int nonmul_operand = 1 - mul_operand; + st_src_reg a, b, c; + + ir_expression *expr = ir->operands[mul_operand]->as_expression(); + if (!expr || expr->operation != ir_binop_mul) + return false; + + expr->operands[0]->accept(this); + a = this->result; + expr->operands[1]->accept(this); + b = this->result; + ir->operands[nonmul_operand]->accept(this); + c = this->result; + + this->result = get_temp(ir->type); + emit(ir, OPCODE_MAD, st_dst_reg(this->result), a, b, c); + + return true; +} + +GLboolean +glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) +{ + /* Saturates were only introduced to vertex programs in + * NV_vertex_program3, so don't give them to drivers in the VP. + */ + if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) + return false; + + ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); + if (!sat_src) + return false; + + sat_src->accept(this); + st_src_reg src = this->result; + + this->result = get_temp(ir->type); + glsl_to_tgsi_instruction *inst; + inst = emit(ir, OPCODE_MOV, st_dst_reg(this->result), src); + inst->saturate = true; + + return true; +} + +void +glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, + st_src_reg *reg, int *num_reladdr) +{ + if (!reg->reladdr) + return; + + emit(ir, OPCODE_ARL, address_reg, *reg->reladdr); + + if (*num_reladdr != 1) { + st_src_reg temp = get_temp(glsl_type::vec4_type); + + emit(ir, OPCODE_MOV, st_dst_reg(temp), *reg); + *reg = temp; + } + + (*num_reladdr)--; +} + +void +glsl_to_tgsi_visitor::emit_swz(ir_expression *ir) +{ + /* Assume that the vector operator is in a form compatible with OPCODE_SWZ. + * This means that each of the operands is either an immediate value of -1, + * 0, or 1, or is a component from one source register (possibly with + * negation). + */ + uint8_t components[4] = { 0 }; + bool negate[4] = { false }; + ir_variable *var = NULL; + + for (unsigned i = 0; i < ir->type->vector_elements; i++) { + ir_rvalue *op = ir->operands[i]; + + assert(op->type->is_scalar()); + + while (op != NULL) { + switch (op->ir_type) { + case ir_type_constant: { + + assert(op->type->is_scalar()); + + const ir_constant *const c = op->as_constant(); + if (c->is_one()) { + components[i] = SWIZZLE_ONE; + } else if (c->is_zero()) { + components[i] = SWIZZLE_ZERO; + } else if (c->is_negative_one()) { + components[i] = SWIZZLE_ONE; + negate[i] = true; + } else { + assert(!"SWZ constant must be 0.0 or 1.0."); + } + + op = NULL; + break; + } + + case ir_type_dereference_variable: { + ir_dereference_variable *const deref = + (ir_dereference_variable *) op; + + assert((var == NULL) || (deref->var == var)); + components[i] = SWIZZLE_X; + var = deref->var; + op = NULL; + break; + } + + case ir_type_expression: { + ir_expression *const expr = (ir_expression *) op; + + assert(expr->operation == ir_unop_neg); + negate[i] = true; + + op = expr->operands[0]; + break; + } + + case ir_type_swizzle: { + ir_swizzle *const swiz = (ir_swizzle *) op; + + components[i] = swiz->mask.x; + op = swiz->val; + break; + } + + default: + assert(!"Should not get here."); + return; + } + } + } + + assert(var != NULL); + + ir_dereference_variable *const deref = + new(mem_ctx) ir_dereference_variable(var); + + this->result.file = PROGRAM_UNDEFINED; + deref->accept(this); + if (this->result.file == PROGRAM_UNDEFINED) { + ir_print_visitor v; + printf("Failed to get tree for expression operand:\n"); + deref->accept(&v); + exit(1); + } + + st_src_reg src; + + src = this->result; + src.swizzle = MAKE_SWIZZLE4(components[0], + components[1], + components[2], + components[3]); + src.negate = ((unsigned(negate[0]) << 0) + | (unsigned(negate[1]) << 1) + | (unsigned(negate[2]) << 2) + | (unsigned(negate[3]) << 3)); + + /* Storage for our result. Ideally for an assignment we'd be using the + * actual storage for the result here, instead. + */ + const st_src_reg result_src = get_temp(ir->type); + st_dst_reg result_dst = st_dst_reg(result_src); + + /* Limit writes to the channels that will be used by result_src later. + * This does limit this temp's use as a temporary for multi-instruction + * sequences. + */ + result_dst.writemask = (1 << ir->type->vector_elements) - 1; + + emit(ir, OPCODE_SWZ, result_dst, src); + this->result = result_src; +} + +void +glsl_to_tgsi_visitor::visit(ir_expression *ir) +{ + unsigned int operand; + st_src_reg op[Elements(ir->operands)]; + st_src_reg result_src; + st_dst_reg result_dst; + + /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c) + */ + if (ir->operation == ir_binop_add) { + if (try_emit_mad(ir, 1)) + return; + if (try_emit_mad(ir, 0)) + return; + } + if (try_emit_sat(ir)) + return; + + if (ir->operation == ir_quadop_vector) { + this->emit_swz(ir); + return; + } + + for (operand = 0; operand < ir->get_num_operands(); operand++) { + this->result.file = PROGRAM_UNDEFINED; + ir->operands[operand]->accept(this); + if (this->result.file == PROGRAM_UNDEFINED) { + ir_print_visitor v; + printf("Failed to get tree for expression operand:\n"); + ir->operands[operand]->accept(&v); + exit(1); + } + op[operand] = this->result; + + /* Matrix expression operands should have been broken down to vector + * operations already. + */ + assert(!ir->operands[operand]->type->is_matrix()); + } + + int vector_elements = ir->operands[0]->type->vector_elements; + if (ir->operands[1]) { + vector_elements = MAX2(vector_elements, + ir->operands[1]->type->vector_elements); + } + + this->result.file = PROGRAM_UNDEFINED; + + /* Storage for our result. Ideally for an assignment we'd be using + * the actual storage for the result here, instead. + */ + result_src = get_temp(ir->type); + /* convenience for the emit functions below. */ + result_dst = st_dst_reg(result_src); + /* Limit writes to the channels that will be used by result_src later. + * This does limit this temp's use as a temporary for multi-instruction + * sequences. + */ + result_dst.writemask = (1 << ir->type->vector_elements) - 1; + + switch (ir->operation) { + case ir_unop_logic_not: + emit(ir, OPCODE_SEQ, result_dst, op[0], st_src_reg_for_float(0.0)); + break; + case ir_unop_neg: + op[0].negate = ~op[0].negate; + result_src = op[0]; + break; + case ir_unop_abs: + emit(ir, OPCODE_ABS, result_dst, op[0]); + break; + case ir_unop_sign: + emit(ir, OPCODE_SSG, result_dst, op[0]); + break; + case ir_unop_rcp: + emit_scalar(ir, OPCODE_RCP, result_dst, op[0]); + break; + + case ir_unop_exp2: + emit_scalar(ir, OPCODE_EX2, result_dst, op[0]); + break; + case ir_unop_exp: + case ir_unop_log: + assert(!"not reached: should be handled by ir_explog_to_explog2"); + break; + case ir_unop_log2: + emit_scalar(ir, OPCODE_LG2, result_dst, op[0]); + break; + case ir_unop_sin: + emit_scalar(ir, OPCODE_SIN, result_dst, op[0]); + break; + case ir_unop_cos: + emit_scalar(ir, OPCODE_COS, result_dst, op[0]); + break; + case ir_unop_sin_reduced: + emit_scs(ir, OPCODE_SIN, result_dst, op[0]); + break; + case ir_unop_cos_reduced: + emit_scs(ir, OPCODE_COS, result_dst, op[0]); + break; + + case ir_unop_dFdx: + emit(ir, OPCODE_DDX, result_dst, op[0]); + break; + case ir_unop_dFdy: + emit(ir, OPCODE_DDY, result_dst, op[0]); + break; + + case ir_unop_noise: { + const enum prog_opcode opcode = + prog_opcode(OPCODE_NOISE1 + + (ir->operands[0]->type->vector_elements) - 1); + assert((opcode >= OPCODE_NOISE1) && (opcode <= OPCODE_NOISE4)); + + emit(ir, opcode, result_dst, op[0]); + break; + } + + case ir_binop_add: + emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); + break; + case ir_binop_sub: + emit(ir, OPCODE_SUB, result_dst, op[0], op[1]); + break; + + case ir_binop_mul: + emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); + break; + case ir_binop_div: + assert(!"not reached: should be handled by ir_div_to_mul_rcp"); + case ir_binop_mod: + assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); + break; + + case ir_binop_less: + emit(ir, OPCODE_SLT, result_dst, op[0], op[1]); + break; + case ir_binop_greater: + emit(ir, OPCODE_SGT, result_dst, op[0], op[1]); + break; + case ir_binop_lequal: + emit(ir, OPCODE_SLE, result_dst, op[0], op[1]); + break; + case ir_binop_gequal: + emit(ir, OPCODE_SGE, result_dst, op[0], op[1]); + break; + case ir_binop_equal: + emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]); + break; + case ir_binop_nequal: + emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); + break; + case ir_binop_all_equal: + /* "==" operator producing a scalar boolean. */ + if (ir->operands[0]->type->is_vector() || + ir->operands[1]->type->is_vector()) { + st_src_reg temp = get_temp(glsl_type::vec4_type); + emit(ir, OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); + emit_dp(ir, result_dst, temp, temp, vector_elements); + emit(ir, OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0)); + } else { + emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]); + } + break; + case ir_binop_any_nequal: + /* "!=" operator producing a scalar boolean. */ + if (ir->operands[0]->type->is_vector() || + ir->operands[1]->type->is_vector()) { + st_src_reg temp = get_temp(glsl_type::vec4_type); + emit(ir, OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); + emit_dp(ir, result_dst, temp, temp, vector_elements); + emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); + } else { + emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); + } + break; + + case ir_unop_any: + assert(ir->operands[0]->type->is_vector()); + emit_dp(ir, result_dst, op[0], op[0], + ir->operands[0]->type->vector_elements); + emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); + break; + + case ir_binop_logic_xor: + emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); + break; + + case ir_binop_logic_or: + /* This could be a saturated add and skip the SNE. */ + emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); + emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); + break; + + case ir_binop_logic_and: + /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ + emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); + break; + + case ir_binop_dot: + assert(ir->operands[0]->type->is_vector()); + assert(ir->operands[0]->type == ir->operands[1]->type); + emit_dp(ir, result_dst, op[0], op[1], + ir->operands[0]->type->vector_elements); + break; + + case ir_unop_sqrt: + /* sqrt(x) = x * rsq(x). */ + emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); + emit(ir, OPCODE_MUL, result_dst, result_src, op[0]); + /* For incoming channels <= 0, set the result to 0. */ + op[0].negate = ~op[0].negate; + emit(ir, OPCODE_CMP, result_dst, + op[0], result_src, st_src_reg_for_float(0.0)); + break; + case ir_unop_rsq: + emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); + break; + case ir_unop_i2f: + case ir_unop_b2f: + case ir_unop_b2i: + /* Mesa IR lacks types, ints are stored as truncated floats. */ + result_src = op[0]; + break; + case ir_unop_f2i: + emit(ir, OPCODE_TRUNC, result_dst, op[0]); + break; + case ir_unop_f2b: + case ir_unop_i2b: + emit(ir, OPCODE_SNE, result_dst, + op[0], st_src_reg_for_float(0.0)); + break; + case ir_unop_trunc: + emit(ir, OPCODE_TRUNC, result_dst, op[0]); + break; + case ir_unop_ceil: + op[0].negate = ~op[0].negate; + emit(ir, OPCODE_FLR, result_dst, op[0]); + result_src.negate = ~result_src.negate; + break; + case ir_unop_floor: + emit(ir, OPCODE_FLR, result_dst, op[0]); + break; + case ir_unop_fract: + emit(ir, OPCODE_FRC, result_dst, op[0]); + break; + + case ir_binop_min: + emit(ir, OPCODE_MIN, result_dst, op[0], op[1]); + break; + case ir_binop_max: + emit(ir, OPCODE_MAX, result_dst, op[0], op[1]); + break; + case ir_binop_pow: + emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]); + break; + + case ir_unop_bit_not: + case ir_unop_u2f: + case ir_binop_lshift: + case ir_binop_rshift: + case ir_binop_bit_and: + case ir_binop_bit_xor: + case ir_binop_bit_or: + case ir_unop_round_even: + assert(!"GLSL 1.30 features unsupported"); + break; + + case ir_quadop_vector: + /* This operation should have already been handled. + */ + assert(!"Should not get here."); + break; + } + + this->result = result_src; +} + + +void +glsl_to_tgsi_visitor::visit(ir_swizzle *ir) +{ + st_src_reg src; + int i; + int swizzle[4]; + + /* Note that this is only swizzles in expressions, not those on the left + * hand side of an assignment, which do write masking. See ir_assignment + * for that. + */ + + ir->val->accept(this); + src = this->result; + assert(src.file != PROGRAM_UNDEFINED); + + for (i = 0; i < 4; i++) { + if (i < ir->type->vector_elements) { + switch (i) { + case 0: + swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); + break; + case 1: + swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); + break; + case 2: + swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); + break; + case 3: + swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); + break; + } + } else { + /* If the type is smaller than a vec4, replicate the last + * channel out. + */ + swizzle[i] = swizzle[ir->type->vector_elements - 1]; + } + } + + src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); + + this->result = src; +} + +void +glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) +{ + variable_storage *entry = find_variable_storage(ir->var); + ir_variable *var = ir->var; + + if (!entry) { + switch (var->mode) { + case ir_var_uniform: + entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, + var->location); + this->variables.push_tail(entry); + break; + case ir_var_in: + case ir_var_inout: + /* The linker assigns locations for varyings and attributes, + * including deprecated builtins (like gl_Color), user-assign + * generic attributes (glBindVertexLocation), and + * user-defined varyings. + * + * FINISHME: We would hit this path for function arguments. Fix! + */ + assert(var->location != -1); + entry = new(mem_ctx) variable_storage(var, + PROGRAM_INPUT, + var->location); + if (this->prog->Target == GL_VERTEX_PROGRAM_ARB && + var->location >= VERT_ATTRIB_GENERIC0) { + _mesa_add_attribute(this->prog->Attributes, + var->name, + _mesa_sizeof_glsl_type(var->type->gl_type), + var->type->gl_type, + var->location - VERT_ATTRIB_GENERIC0); + } + break; + case ir_var_out: + assert(var->location != -1); + entry = new(mem_ctx) variable_storage(var, + PROGRAM_OUTPUT, + var->location); + break; + case ir_var_system_value: + entry = new(mem_ctx) variable_storage(var, + PROGRAM_SYSTEM_VALUE, + var->location); + break; + case ir_var_auto: + case ir_var_temporary: + entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY, + this->next_temp); + this->variables.push_tail(entry); + + next_temp += type_size(var->type); + break; + } + + if (!entry) { + printf("Failed to make storage for %s\n", var->name); + exit(1); + } + } + + this->result = st_src_reg(entry->file, entry->index, var->type); +} + +void +glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) +{ + ir_constant *index; + st_src_reg src; + int element_size = type_size(ir->type); + + index = ir->array_index->constant_expression_value(); + + ir->array->accept(this); + src = this->result; + + if (index) { + src.index += index->value.i[0] * element_size; + } else { + st_src_reg array_base = this->result; + /* Variable index array dereference. It eats the "vec4" of the + * base of the array and an index that offsets the Mesa register + * index. + */ + ir->array_index->accept(this); + + st_src_reg index_reg; + + if (element_size == 1) { + index_reg = this->result; + } else { + index_reg = get_temp(glsl_type::float_type); + + emit(ir, OPCODE_MUL, st_dst_reg(index_reg), + this->result, st_src_reg_for_float(element_size)); + } + + src.reladdr = ralloc(mem_ctx, st_src_reg); + memcpy(src.reladdr, &index_reg, sizeof(index_reg)); + } + + /* If the type is smaller than a vec4, replicate the last channel out. */ + if (ir->type->is_scalar() || ir->type->is_vector()) + src.swizzle = swizzle_for_size(ir->type->vector_elements); + else + src.swizzle = SWIZZLE_NOOP; + + this->result = src; +} + +void +glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) +{ + unsigned int i; + const glsl_type *struct_type = ir->record->type; + int offset = 0; + + ir->record->accept(this); + + for (i = 0; i < struct_type->length; i++) { + if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) + break; + offset += type_size(struct_type->fields.structure[i].type); + } + + /* If the type is smaller than a vec4, replicate the last channel out. */ + if (ir->type->is_scalar() || ir->type->is_vector()) + this->result.swizzle = swizzle_for_size(ir->type->vector_elements); + else + this->result.swizzle = SWIZZLE_NOOP; + + this->result.index += offset; +} + +/** + * We want to be careful in assignment setup to hit the actual storage + * instead of potentially using a temporary like we might with the + * ir_dereference handler. + */ +static st_dst_reg +get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v) +{ + /* The LHS must be a dereference. If the LHS is a variable indexed array + * access of a vector, it must be separated into a series conditional moves + * before reaching this point (see ir_vec_index_to_cond_assign). + */ + assert(ir->as_dereference()); + ir_dereference_array *deref_array = ir->as_dereference_array(); + if (deref_array) { + assert(!deref_array->array->type->is_vector()); + } + + /* Use the rvalue deref handler for the most part. We'll ignore + * swizzles in it and write swizzles using writemask, though. + */ + ir->accept(v); + return st_dst_reg(v->result); +} + +/** + * Process the condition of a conditional assignment + * + * Examines the condition of a conditional assignment to generate the optimal + * first operand of a \c CMP instruction. If the condition is a relational + * operator with 0 (e.g., \c ir_binop_less), the value being compared will be + * used as the source for the \c CMP instruction. Otherwise the comparison + * is processed to a boolean result, and the boolean result is used as the + * operand to the CMP instruction. + */ +bool +glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir) +{ + ir_rvalue *src_ir = ir; + bool negate = true; + bool switch_order = false; + + ir_expression *const expr = ir->as_expression(); + if ((expr != NULL) && (expr->get_num_operands() == 2)) { + bool zero_on_left = false; + + if (expr->operands[0]->is_zero()) { + src_ir = expr->operands[1]; + zero_on_left = true; + } else if (expr->operands[1]->is_zero()) { + src_ir = expr->operands[0]; + zero_on_left = false; + } + + /* a is - 0 + - 0 + + * (a < 0) T F F ( a < 0) T F F + * (0 < a) F F T (-a < 0) F F T + * (a <= 0) T T F (-a < 0) F F T (swap order of other operands) + * (0 <= a) F T T ( a < 0) T F F (swap order of other operands) + * (a > 0) F F T (-a < 0) F F T + * (0 > a) T F F ( a < 0) T F F + * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) + * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) + * + * Note that exchanging the order of 0 and 'a' in the comparison simply + * means that the value of 'a' should be negated. + */ + if (src_ir != ir) { + switch (expr->operation) { + case ir_binop_less: + switch_order = false; + negate = zero_on_left; + break; + + case ir_binop_greater: + switch_order = false; + negate = !zero_on_left; + break; + + case ir_binop_lequal: + switch_order = true; + negate = !zero_on_left; + break; + + case ir_binop_gequal: + switch_order = true; + negate = zero_on_left; + break; + + default: + /* This isn't the right kind of comparison afterall, so make sure + * the whole condition is visited. + */ + src_ir = ir; + break; + } + } + } + + src_ir->accept(this); + + /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the + * condition we produced is 0.0 or 1.0. By flipping the sign, we can + * choose which value OPCODE_CMP produces without an extra instruction + * computing the condition. + */ + if (negate) + this->result.negate = ~this->result.negate; + + return switch_order; +} + +void +glsl_to_tgsi_visitor::visit(ir_assignment *ir) +{ + st_dst_reg l; + st_src_reg r; + int i; + + ir->rhs->accept(this); + r = this->result; + + l = get_assignment_lhs(ir->lhs, this); + + /* FINISHME: This should really set to the correct maximal writemask for each + * FINISHME: component written (in the loops below). This case can only + * FINISHME: occur for matrices, arrays, and structures. + */ + if (ir->write_mask == 0) { + assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); + l.writemask = WRITEMASK_XYZW; + } else if (ir->lhs->type->is_scalar()) { + /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the + * FINISHME: W component of fragment shader output zero, work correctly. + */ + l.writemask = WRITEMASK_XYZW; + } else { + int swizzles[4]; + int first_enabled_chan = 0; + int rhs_chan = 0; + + assert(ir->lhs->type->is_vector()); + l.writemask = ir->write_mask; + + for (int i = 0; i < 4; i++) { + if (l.writemask & (1 << i)) { + first_enabled_chan = GET_SWZ(r.swizzle, i); + break; + } + } + + /* Swizzle a small RHS vector into the channels being written. + * + * glsl ir treats write_mask as dictating how many channels are + * present on the RHS while Mesa IR treats write_mask as just + * showing which channels of the vec4 RHS get written. + */ + for (int i = 0; i < 4; i++) { + if (l.writemask & (1 << i)) + swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); + else + swizzles[i] = first_enabled_chan; + } + r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], + swizzles[2], swizzles[3]); + } + + assert(l.file != PROGRAM_UNDEFINED); + assert(r.file != PROGRAM_UNDEFINED); + + if (ir->condition) { + const bool switch_order = this->process_move_condition(ir->condition); + st_src_reg condition = this->result; + + for (i = 0; i < type_size(ir->lhs->type); i++) { + if (switch_order) { + emit(ir, OPCODE_CMP, l, condition, st_src_reg(l), r); + } else { + emit(ir, OPCODE_CMP, l, condition, r, st_src_reg(l)); + } + + l.index++; + r.index++; + } + } else { + for (i = 0; i < type_size(ir->lhs->type); i++) { + emit(ir, OPCODE_MOV, l, r); + l.index++; + r.index++; + } + } +} + + +void +glsl_to_tgsi_visitor::visit(ir_constant *ir) +{ + st_src_reg src; + GLfloat stack_vals[4] = { 0 }; + GLfloat *values = stack_vals; + unsigned int i; + + /* Unfortunately, 4 floats is all we can get into + * _mesa_add_unnamed_constant. So, make a temp to store an + * aggregate constant and move each constant value into it. If we + * get lucky, copy propagation will eliminate the extra moves. + */ + + if (ir->type->base_type == GLSL_TYPE_STRUCT) { + st_src_reg temp_base = get_temp(ir->type); + st_dst_reg temp = st_dst_reg(temp_base); + + foreach_iter(exec_list_iterator, iter, ir->components) { + ir_constant *field_value = (ir_constant *)iter.get(); + int size = type_size(field_value->type); + + assert(size > 0); + + field_value->accept(this); + src = this->result; + + for (i = 0; i < (unsigned int)size; i++) { + emit(ir, OPCODE_MOV, temp, src); + + src.index++; + temp.index++; + } + } + this->result = temp_base; + return; + } + + if (ir->type->is_array()) { + st_src_reg temp_base = get_temp(ir->type); + st_dst_reg temp = st_dst_reg(temp_base); + int size = type_size(ir->type->fields.array); + + assert(size > 0); + + for (i = 0; i < ir->type->length; i++) { + ir->array_elements[i]->accept(this); + src = this->result; + for (int j = 0; j < size; j++) { + emit(ir, OPCODE_MOV, temp, src); + + src.index++; + temp.index++; + } + } + this->result = temp_base; + return; + } + + if (ir->type->is_matrix()) { + st_src_reg mat = get_temp(ir->type); + st_dst_reg mat_column = st_dst_reg(mat); + + for (i = 0; i < ir->type->matrix_columns; i++) { + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + values = &ir->value.f[i * ir->type->vector_elements]; + + src = st_src_reg(PROGRAM_CONSTANT, -1, NULL); + src.index = _mesa_add_unnamed_constant(this->prog->Parameters, + values, + ir->type->vector_elements, + &src.swizzle); + emit(ir, OPCODE_MOV, mat_column, src); + + mat_column.index++; + } + + this->result = mat; + return; + } + + src.file = PROGRAM_CONSTANT; + switch (ir->type->base_type) { + case GLSL_TYPE_FLOAT: + values = &ir->value.f[0]; + break; + case GLSL_TYPE_UINT: + for (i = 0; i < ir->type->vector_elements; i++) { + values[i] = ir->value.u[i]; + } + break; + case GLSL_TYPE_INT: + for (i = 0; i < ir->type->vector_elements; i++) { + values[i] = ir->value.i[i]; + } + break; + case GLSL_TYPE_BOOL: + for (i = 0; i < ir->type->vector_elements; i++) { + values[i] = ir->value.b[i]; + } + break; + default: + assert(!"Non-float/uint/int/bool constant"); + } + + this->result = st_src_reg(PROGRAM_CONSTANT, -1, ir->type); + this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters, + values, + ir->type->vector_elements, + &this->result.swizzle); +} + +function_entry * +glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig) +{ + function_entry *entry; + + foreach_iter(exec_list_iterator, iter, this->function_signatures) { + entry = (function_entry *)iter.get(); + + if (entry->sig == sig) + return entry; + } + + entry = ralloc(mem_ctx, function_entry); + entry->sig = sig; + entry->sig_id = this->next_signature_id++; + entry->bgn_inst = NULL; + + /* Allocate storage for all the parameters. */ + foreach_iter(exec_list_iterator, iter, sig->parameters) { + ir_variable *param = (ir_variable *)iter.get(); + variable_storage *storage; + + storage = find_variable_storage(param); + assert(!storage); + + storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY, + this->next_temp); + this->variables.push_tail(storage); + + this->next_temp += type_size(param->type); + } + + if (!sig->return_type->is_void()) { + entry->return_reg = get_temp(sig->return_type); + } else { + entry->return_reg = undef_src; + } + + this->function_signatures.push_tail(entry); + return entry; +} + +void +glsl_to_tgsi_visitor::visit(ir_call *ir) +{ + glsl_to_tgsi_instruction *call_inst; + ir_function_signature *sig = ir->get_callee(); + function_entry *entry = get_function_signature(sig); + int i; + + /* Process in parameters. */ + exec_list_iterator sig_iter = sig->parameters.iterator(); + foreach_iter(exec_list_iterator, iter, *ir) { + ir_rvalue *param_rval = (ir_rvalue *)iter.get(); + ir_variable *param = (ir_variable *)sig_iter.get(); + + if (param->mode == ir_var_in || + param->mode == ir_var_inout) { + variable_storage *storage = find_variable_storage(param); + assert(storage); + + param_rval->accept(this); + st_src_reg r = this->result; + + st_dst_reg l; + l.file = storage->file; + l.index = storage->index; + l.reladdr = NULL; + l.writemask = WRITEMASK_XYZW; + l.cond_mask = COND_TR; + + for (i = 0; i < type_size(param->type); i++) { + emit(ir, OPCODE_MOV, l, r); + l.index++; + r.index++; + } + } + + sig_iter.next(); + } + assert(!sig_iter.has_next()); + + /* Emit call instruction */ + call_inst = emit(ir, OPCODE_CAL); + call_inst->function = entry; + + /* Process out parameters. */ + sig_iter = sig->parameters.iterator(); + foreach_iter(exec_list_iterator, iter, *ir) { + ir_rvalue *param_rval = (ir_rvalue *)iter.get(); + ir_variable *param = (ir_variable *)sig_iter.get(); + + if (param->mode == ir_var_out || + param->mode == ir_var_inout) { + variable_storage *storage = find_variable_storage(param); + assert(storage); + + st_src_reg r; + r.file = storage->file; + r.index = storage->index; + r.reladdr = NULL; + r.swizzle = SWIZZLE_NOOP; + r.negate = 0; + + param_rval->accept(this); + st_dst_reg l = st_dst_reg(this->result); + + for (i = 0; i < type_size(param->type); i++) { + emit(ir, OPCODE_MOV, l, r); + l.index++; + r.index++; + } + } + + sig_iter.next(); + } + assert(!sig_iter.has_next()); + + /* Process return value. */ + this->result = entry->return_reg; +} + +void +glsl_to_tgsi_visitor::visit(ir_texture *ir) +{ + st_src_reg result_src, coord, lod_info, projector, dx, dy; + st_dst_reg result_dst, coord_dst; + glsl_to_tgsi_instruction *inst = NULL; + prog_opcode opcode = OPCODE_NOP; + + ir->coordinate->accept(this); + + /* Put our coords in a temp. We'll need to modify them for shadow, + * projection, or LOD, so the only case we'd use it as is is if + * we're doing plain old texturing. Mesa IR optimization should + * handle cleaning up our mess in that case. + */ + coord = get_temp(glsl_type::vec4_type); + coord_dst = st_dst_reg(coord); + emit(ir, OPCODE_MOV, coord_dst, this->result); + + if (ir->projector) { + ir->projector->accept(this); + projector = this->result; + } + + /* Storage for our result. Ideally for an assignment we'd be using + * the actual storage for the result here, instead. + */ + result_src = get_temp(glsl_type::vec4_type); + result_dst = st_dst_reg(result_src); + + switch (ir->op) { + case ir_tex: + opcode = OPCODE_TEX; + break; + case ir_txb: + opcode = OPCODE_TXB; + ir->lod_info.bias->accept(this); + lod_info = this->result; + break; + case ir_txl: + opcode = OPCODE_TXL; + ir->lod_info.lod->accept(this); + lod_info = this->result; + break; + case ir_txd: + opcode = OPCODE_TXD; + ir->lod_info.grad.dPdx->accept(this); + dx = this->result; + ir->lod_info.grad.dPdy->accept(this); + dy = this->result; + break; + case ir_txf: // TODO: use TGSI_OPCODE_TXF here + assert(!"GLSL 1.30 features unsupported"); + break; + } + + if (ir->projector) { + if (opcode == OPCODE_TEX) { + /* Slot the projector in as the last component of the coord. */ + coord_dst.writemask = WRITEMASK_W; + emit(ir, OPCODE_MOV, coord_dst, projector); + coord_dst.writemask = WRITEMASK_XYZW; + opcode = OPCODE_TXP; + } else { + st_src_reg coord_w = coord; + coord_w.swizzle = SWIZZLE_WWWW; + + /* For the other TEX opcodes there's no projective version + * since the last slot is taken up by lod info. Do the + * projective divide now. + */ + coord_dst.writemask = WRITEMASK_W; + emit(ir, OPCODE_RCP, coord_dst, projector); + + /* In the case where we have to project the coordinates "by hand," + * the shadow comparitor value must also be projected. + */ + st_src_reg tmp_src = coord; + if (ir->shadow_comparitor) { + /* Slot the shadow value in as the second to last component of the + * coord. + */ + ir->shadow_comparitor->accept(this); + + tmp_src = get_temp(glsl_type::vec4_type); + st_dst_reg tmp_dst = st_dst_reg(tmp_src); + + tmp_dst.writemask = WRITEMASK_Z; + emit(ir, OPCODE_MOV, tmp_dst, this->result); + + tmp_dst.writemask = WRITEMASK_XY; + emit(ir, OPCODE_MOV, tmp_dst, coord); + } + + coord_dst.writemask = WRITEMASK_XYZ; + emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w); + + coord_dst.writemask = WRITEMASK_XYZW; + coord.swizzle = SWIZZLE_XYZW; + } + } + + /* If projection is done and the opcode is not OPCODE_TXP, then the shadow + * comparitor was put in the correct place (and projected) by the code, + * above, that handles by-hand projection. + */ + if (ir->shadow_comparitor && (!ir->projector || opcode == OPCODE_TXP)) { + /* Slot the shadow value in as the second to last component of the + * coord. + */ + ir->shadow_comparitor->accept(this); + coord_dst.writemask = WRITEMASK_Z; + emit(ir, OPCODE_MOV, coord_dst, this->result); + coord_dst.writemask = WRITEMASK_XYZW; + } + + if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) { + /* Mesa IR stores lod or lod bias in the last channel of the coords. */ + coord_dst.writemask = WRITEMASK_W; + emit(ir, OPCODE_MOV, coord_dst, lod_info); + coord_dst.writemask = WRITEMASK_XYZW; + } + + if (opcode == OPCODE_TXD) + inst = emit(ir, opcode, result_dst, coord, dx, dy); + else + inst = emit(ir, opcode, result_dst, coord); + + if (ir->shadow_comparitor) + inst->tex_shadow = GL_TRUE; + + inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler, + this->shader_program, + this->prog); + + const glsl_type *sampler_type = ir->sampler->type; + + switch (sampler_type->sampler_dimensionality) { + case GLSL_SAMPLER_DIM_1D: + inst->tex_target = (sampler_type->sampler_array) + ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; + break; + case GLSL_SAMPLER_DIM_2D: + inst->tex_target = (sampler_type->sampler_array) + ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; + break; + case GLSL_SAMPLER_DIM_3D: + inst->tex_target = TEXTURE_3D_INDEX; + break; + case GLSL_SAMPLER_DIM_CUBE: + inst->tex_target = TEXTURE_CUBE_INDEX; + break; + case GLSL_SAMPLER_DIM_RECT: + inst->tex_target = TEXTURE_RECT_INDEX; + break; + case GLSL_SAMPLER_DIM_BUF: + assert(!"FINISHME: Implement ARB_texture_buffer_object"); + break; + default: + assert(!"Should not get here."); + } + + this->result = result_src; +} + +void +glsl_to_tgsi_visitor::visit(ir_return *ir) +{ + if (ir->get_value()) { + st_dst_reg l; + int i; + + assert(current_function); + + ir->get_value()->accept(this); + st_src_reg r = this->result; + + l = st_dst_reg(current_function->return_reg); + + for (i = 0; i < type_size(current_function->sig->return_type); i++) { + emit(ir, OPCODE_MOV, l, r); + l.index++; + r.index++; + } + } + + emit(ir, OPCODE_RET); +} + +void +glsl_to_tgsi_visitor::visit(ir_discard *ir) +{ + struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; + + if (ir->condition) { + ir->condition->accept(this); + this->result.negate = ~this->result.negate; + emit(ir, OPCODE_KIL, undef_dst, this->result); + } else { + emit(ir, OPCODE_KIL_NV); + } + + fp->UsesKill = GL_TRUE; +} + +void +glsl_to_tgsi_visitor::visit(ir_if *ir) +{ + glsl_to_tgsi_instruction *cond_inst, *if_inst, *else_inst = NULL; + glsl_to_tgsi_instruction *prev_inst; + + prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); + + ir->condition->accept(this); + assert(this->result.file != PROGRAM_UNDEFINED); + + if (this->options->EmitCondCodes) { + cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); + + /* See if we actually generated any instruction for generating + * the condition. If not, then cook up a move to a temp so we + * have something to set cond_update on. + */ + if (cond_inst == prev_inst) { + st_src_reg temp = get_temp(glsl_type::bool_type); + cond_inst = emit(ir->condition, OPCODE_MOV, st_dst_reg(temp), result); + } + cond_inst->cond_update = GL_TRUE; + + if_inst = emit(ir->condition, OPCODE_IF); + if_inst->dst.cond_mask = COND_NE; + } else { + if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result); + } + + this->instructions.push_tail(if_inst); + + visit_exec_list(&ir->then_instructions, this); + + if (!ir->else_instructions.is_empty()) { + else_inst = emit(ir->condition, OPCODE_ELSE); + visit_exec_list(&ir->else_instructions, this); + } + + if_inst = emit(ir->condition, OPCODE_ENDIF); +} + +glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() +{ + result.file = PROGRAM_UNDEFINED; + next_temp = 1; + next_signature_id = 1; + current_function = NULL; + num_address_regs = 0; + indirect_addr_temps = false; + indirect_addr_consts = false; + mem_ctx = ralloc_context(NULL); +} + +glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() +{ + ralloc_free(mem_ctx); +} + +extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v) +{ + delete v; +} + +static struct prog_src_register +mesa_st_src_reg_from_ir_st_src_reg(st_src_reg reg) +{ + struct prog_src_register mesa_reg; + + mesa_reg.File = reg.file; + assert(reg.index < (1 << INST_INDEX_BITS)); + mesa_reg.Index = reg.index; + mesa_reg.Swizzle = reg.swizzle; + mesa_reg.RelAddr = reg.reladdr != NULL; + mesa_reg.Negate = reg.negate; + mesa_reg.Abs = 0; + mesa_reg.HasIndex2 = GL_FALSE; + mesa_reg.RelAddr2 = 0; + mesa_reg.Index2 = 0; + + return mesa_reg; +} + +static void +set_branchtargets(glsl_to_tgsi_visitor *v, + struct prog_instruction *mesa_instructions, + int num_instructions) +{ + int if_count = 0, loop_count = 0; + int *if_stack, *loop_stack; + int if_stack_pos = 0, loop_stack_pos = 0; + int i, j; + + for (i = 0; i < num_instructions; i++) { + switch (mesa_instructions[i].Opcode) { + case OPCODE_IF: + if_count++; + break; + case OPCODE_BGNLOOP: + loop_count++; + break; + case OPCODE_BRK: + case OPCODE_CONT: + mesa_instructions[i].BranchTarget = -1; + break; + default: + break; + } + } + + if_stack = rzalloc_array(v->mem_ctx, int, if_count); + loop_stack = rzalloc_array(v->mem_ctx, int, loop_count); + + for (i = 0; i < num_instructions; i++) { + switch (mesa_instructions[i].Opcode) { + case OPCODE_IF: + if_stack[if_stack_pos] = i; + if_stack_pos++; + break; + case OPCODE_ELSE: + mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; + if_stack[if_stack_pos - 1] = i; + break; + case OPCODE_ENDIF: + mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; + if_stack_pos--; + break; + case OPCODE_BGNLOOP: + loop_stack[loop_stack_pos] = i; + loop_stack_pos++; + break; + case OPCODE_ENDLOOP: + loop_stack_pos--; + /* Rewrite any breaks/conts at this nesting level (haven't + * already had a BranchTarget assigned) to point to the end + * of the loop. + */ + for (j = loop_stack[loop_stack_pos]; j < i; j++) { + if (mesa_instructions[j].Opcode == OPCODE_BRK || + mesa_instructions[j].Opcode == OPCODE_CONT) { + if (mesa_instructions[j].BranchTarget == -1) { + mesa_instructions[j].BranchTarget = i; + } + } + } + /* The loop ends point at each other. */ + mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos]; + mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i; + break; + case OPCODE_CAL: + foreach_iter(exec_list_iterator, iter, v->function_signatures) { + function_entry *entry = (function_entry *)iter.get(); + + if (entry->sig_id == mesa_instructions[i].BranchTarget) { + mesa_instructions[i].BranchTarget = entry->inst; + break; + } + } + break; + default: + break; + } + } +} + +static void +print_program(struct prog_instruction *mesa_instructions, + ir_instruction **mesa_instruction_annotation, + int num_instructions) +{ + /*ir_instruction *last_ir = NULL;*/ + int i; + int indent = 0; + + for (i = 0; i < num_instructions; i++) { + struct prog_instruction *mesa_inst = mesa_instructions + i; + + fprintf(stdout, "%3d: ", i); + +#if 0 +/* Disable this for now, since printing GLSL IR along with its corresponding + * Mesa IR makes the Mesa IR unreadable. */ + ir_instruction *ir = mesa_instruction_annotation[i]; + if (last_ir != ir && ir) { + int j; + + for (j = 0; j < indent; j++) { + fprintf(stdout, " "); + } + ir->print(); + printf("\n"); + last_ir = ir; + + fprintf(stdout, " "); /* line number spacing. */ + } +#endif + + indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent, + PROG_PRINT_DEBUG, NULL); + } +} + + +/** + * Count resources used by the given gpu program (number of texture + * samplers, etc). + */ +static void +count_resources(struct gl_program *prog) +{ + unsigned int i; + + prog->SamplersUsed = 0; + + for (i = 0; i < prog->NumInstructions; i++) { + struct prog_instruction *inst = &prog->Instructions[i]; + + if (_mesa_is_tex_instruction(inst->Opcode)) { + prog->SamplerTargets[inst->TexSrcUnit] = + (gl_texture_index)inst->TexSrcTarget; + prog->SamplersUsed |= 1 << inst->TexSrcUnit; + if (inst->TexShadow) { + prog->ShadowSamplers |= 1 << inst->TexSrcUnit; + } + } + } + + _mesa_update_shader_textures_used(prog); +} + + +/** + * Check if the given vertex/fragment/shader program is within the + * resource limits of the context (number of texture units, etc). + * If any of those checks fail, record a linker error. + * + * XXX more checks are needed... + */ +static void +check_resources(const struct gl_context *ctx, + struct gl_shader_program *shader_program, + struct gl_program *prog) +{ + switch (prog->Target) { + case GL_VERTEX_PROGRAM_ARB: + if (_mesa_bitcount(prog->SamplersUsed) > + ctx->Const.MaxVertexTextureImageUnits) { + fail_link(shader_program, "Too many vertex shader texture samplers"); + } + if (prog->Parameters->NumParameters > MAX_UNIFORMS) { + fail_link(shader_program, "Too many vertex shader constants"); + } + break; + case MESA_GEOMETRY_PROGRAM: + if (_mesa_bitcount(prog->SamplersUsed) > + ctx->Const.MaxGeometryTextureImageUnits) { + fail_link(shader_program, "Too many geometry shader texture samplers"); + } + if (prog->Parameters->NumParameters > + MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) { + fail_link(shader_program, "Too many geometry shader constants"); + } + break; + case GL_FRAGMENT_PROGRAM_ARB: + if (_mesa_bitcount(prog->SamplersUsed) > + ctx->Const.MaxTextureImageUnits) { + fail_link(shader_program, "Too many fragment shader texture samplers"); + } + if (prog->Parameters->NumParameters > MAX_UNIFORMS) { + fail_link(shader_program, "Too many fragment shader constants"); + } + break; + default: + _mesa_problem(ctx, "unexpected program type in check_resources()"); + } +} + + + +struct uniform_sort { + struct gl_uniform *u; + int pos; +}; + +/* The shader_program->Uniforms list is almost sorted in increasing + * uniform->{Frag,Vert}Pos locations, but not quite when there are + * uniforms shared between targets. We need to add parameters in + * increasing order for the targets. + */ +static int +sort_uniforms(const void *a, const void *b) +{ + struct uniform_sort *u1 = (struct uniform_sort *)a; + struct uniform_sort *u2 = (struct uniform_sort *)b; + + return u1->pos - u2->pos; +} + +/* Add the uniforms to the parameters. The linker chose locations + * in our parameters lists (which weren't created yet), which the + * uniforms code will use to poke values into our parameters list + * when uniforms are updated. + */ +static void +add_uniforms_to_parameters_list(struct gl_shader_program *shader_program, + struct gl_shader *shader, + struct gl_program *prog) +{ + unsigned int i; + unsigned int next_sampler = 0, num_uniforms = 0; + struct uniform_sort *sorted_uniforms; + + sorted_uniforms = ralloc_array(NULL, struct uniform_sort, + shader_program->Uniforms->NumUniforms); + + for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) { + struct gl_uniform *uniform = shader_program->Uniforms->Uniforms + i; + int parameter_index = -1; + + switch (shader->Type) { + case GL_VERTEX_SHADER: + parameter_index = uniform->VertPos; + break; + case GL_FRAGMENT_SHADER: + parameter_index = uniform->FragPos; + break; + case GL_GEOMETRY_SHADER: + parameter_index = uniform->GeomPos; + break; + } + + /* Only add uniforms used in our target. */ + if (parameter_index != -1) { + sorted_uniforms[num_uniforms].pos = parameter_index; + sorted_uniforms[num_uniforms].u = uniform; + num_uniforms++; + } + } + + qsort(sorted_uniforms, num_uniforms, sizeof(struct uniform_sort), + sort_uniforms); + + for (i = 0; i < num_uniforms; i++) { + struct gl_uniform *uniform = sorted_uniforms[i].u; + int parameter_index = sorted_uniforms[i].pos; + const glsl_type *type = uniform->Type; + unsigned int size; + + if (type->is_vector() || + type->is_scalar()) { + size = type->vector_elements; + } else { + size = type_size(type) * 4; + } + + gl_register_file file; + if (type->is_sampler() || + (type->is_array() && type->fields.array->is_sampler())) { + file = PROGRAM_SAMPLER; + } else { + file = PROGRAM_UNIFORM; + } + + GLint index = _mesa_lookup_parameter_index(prog->Parameters, -1, + uniform->Name); + + if (index < 0) { + index = _mesa_add_parameter(prog->Parameters, file, + uniform->Name, size, type->gl_type, + NULL, NULL, 0x0); + + /* Sampler uniform values are stored in prog->SamplerUnits, + * and the entry in that array is selected by this index we + * store in ParameterValues[]. + */ + if (file == PROGRAM_SAMPLER) { + for (unsigned int j = 0; j < size / 4; j++) + prog->Parameters->ParameterValues[index + j][0] = next_sampler++; + } + + /* The location chosen in the Parameters list here (returned + * from _mesa_add_uniform) has to match what the linker chose. + */ + if (index != parameter_index) { + fail_link(shader_program, "Allocation of uniform `%s' to target " + "failed (%d vs %d)\n", + uniform->Name, index, parameter_index); + } + } + } + + ralloc_free(sorted_uniforms); +} + +static void +set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, + struct gl_shader_program *shader_program, + const char *name, const glsl_type *type, + ir_constant *val) +{ + if (type->is_record()) { + ir_constant *field_constant; + + field_constant = (ir_constant *)val->components.get_head(); + + for (unsigned int i = 0; i < type->length; i++) { + const glsl_type *field_type = type->fields.structure[i].type; + const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name, + type->fields.structure[i].name); + set_uniform_initializer(ctx, mem_ctx, shader_program, field_name, + field_type, field_constant); + field_constant = (ir_constant *)field_constant->next; + } + return; + } + + int loc = _mesa_get_uniform_location(ctx, shader_program, name); + + if (loc == -1) { + fail_link(shader_program, + "Couldn't find uniform for initializer %s\n", name); + return; + } + + for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) { + ir_constant *element; + const glsl_type *element_type; + if (type->is_array()) { + element = val->array_elements[i]; + element_type = type->fields.array; + } else { + element = val; + element_type = type; + } + + void *values; + + if (element_type->base_type == GLSL_TYPE_BOOL) { + int *conv = ralloc_array(mem_ctx, int, element_type->components()); + for (unsigned int j = 0; j < element_type->components(); j++) { + conv[j] = element->value.b[j]; + } + values = (void *)conv; + element_type = glsl_type::get_instance(GLSL_TYPE_INT, + element_type->vector_elements, + 1); + } else { + values = &element->value; + } + + if (element_type->is_matrix()) { + _mesa_uniform_matrix(ctx, shader_program, + element_type->matrix_columns, + element_type->vector_elements, + loc, 1, GL_FALSE, (GLfloat *)values); + loc += element_type->matrix_columns; + } else { + _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns, + values, element_type->gl_type); + loc += type_size(element_type); + } + } +} + +static void +set_uniform_initializers(struct gl_context *ctx, + struct gl_shader_program *shader_program) +{ + void *mem_ctx = NULL; + + for (unsigned int i = 0; i < MESA_SHADER_TYPES; i++) { + struct gl_shader *shader = shader_program->_LinkedShaders[i]; + + if (shader == NULL) + continue; + + foreach_iter(exec_list_iterator, iter, *shader->ir) { + ir_instruction *ir = (ir_instruction *)iter.get(); + ir_variable *var = ir->as_variable(); + + if (!var || var->mode != ir_var_uniform || !var->constant_value) + continue; + + if (!mem_ctx) + mem_ctx = ralloc_context(NULL); + + set_uniform_initializer(ctx, mem_ctx, shader_program, var->name, + var->type, var->constant_value); + } + } + + ralloc_free(mem_ctx); +} + +/* Replaces all references to a temporary register index with another index. */ +void +glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) +{ + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + unsigned j; + + for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) { + if (inst->src[j].file == PROGRAM_TEMPORARY && + inst->src[j].index == index) { + inst->src[j].index = new_index; + } + } + + if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { + inst->dst.index = new_index; + } + } +} + +int +glsl_to_tgsi_visitor::get_first_temp_read(int index) +{ + int depth = 0; /* loop depth */ + int loop_start = -1; /* index of the first active BGNLOOP (if any) */ + unsigned i = 0, j; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) { + if (inst->src[j].file == PROGRAM_TEMPORARY && + inst->src[j].index == index) { + return (depth == 0) ? i : loop_start; + } + } + + if (inst->op == OPCODE_BGNLOOP) { + if(depth++ == 0) + loop_start = i; + } else if (inst->op == OPCODE_ENDLOOP) { + if (--depth == 0) + loop_start = -1; + } + assert(depth >= 0); + + i++; + } + + return -1; +} + +int +glsl_to_tgsi_visitor::get_first_temp_write(int index) +{ + int depth = 0; /* loop depth */ + int loop_start = -1; /* index of the first active BGNLOOP (if any) */ + int i = 0; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { + return (depth == 0) ? i : loop_start; + } + + if (inst->op == OPCODE_BGNLOOP) { + if(depth++ == 0) + loop_start = i; + } else if (inst->op == OPCODE_ENDLOOP) { + if (--depth == 0) + loop_start = -1; + } + assert(depth >= 0); + + i++; + } + + return -1; +} + +int +glsl_to_tgsi_visitor::get_last_temp_read(int index) +{ + int depth = 0; /* loop depth */ + int last = -1; /* index of last instruction that reads the temporary */ + unsigned i = 0, j; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) { + if (inst->src[j].file == PROGRAM_TEMPORARY && + inst->src[j].index == index) { + last = (depth == 0) ? i : -2; + } + } + + if (inst->op == OPCODE_BGNLOOP) + depth++; + else if (inst->op == OPCODE_ENDLOOP) + if (--depth == 0 && last == -2) + last = i; + assert(depth >= 0); + + i++; + } + + assert(last >= -1); + return last; +} + +int +glsl_to_tgsi_visitor::get_last_temp_write(int index) +{ + int depth = 0; /* loop depth */ + int last = -1; /* index of last instruction that writes to the temporary */ + int i = 0; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) + last = (depth == 0) ? i : -2; + + if (inst->op == OPCODE_BGNLOOP) + depth++; + else if (inst->op == OPCODE_ENDLOOP) + if (--depth == 0 && last == -2) + last = i; + assert(depth >= 0); + + i++; + } + + assert(last >= -1); + return last; +} + +/* + * On a basic block basis, tracks available PROGRAM_TEMPORARY register + * channels for copy propagation and updates following instructions to + * use the original versions. + * + * The glsl_to_tgsi_visitor lazily produces code assuming that this pass + * will occur. As an example, a TXP production before this pass: + * + * 0: MOV TEMP[1], INPUT[4].xyyy; + * 1: MOV TEMP[1].w, INPUT[4].wwww; + * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; + * + * and after: + * + * 0: MOV TEMP[1], INPUT[4].xyyy; + * 1: MOV TEMP[1].w, INPUT[4].wwww; + * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; + * + * which allows for dead code elimination on TEMP[1]'s writes. + */ +void +glsl_to_tgsi_visitor::copy_propagate(void) +{ + glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx, + glsl_to_tgsi_instruction *, + this->next_temp * 4); + int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); + int level = 0; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + assert(inst->dst.file != PROGRAM_TEMPORARY + || inst->dst.index < this->next_temp); + + /* First, do any copy propagation possible into the src regs. */ + for (int r = 0; r < 3; r++) { + glsl_to_tgsi_instruction *first = NULL; + bool good = true; + int acp_base = inst->src[r].index * 4; + + if (inst->src[r].file != PROGRAM_TEMPORARY || + inst->src[r].reladdr) + continue; + + /* See if we can find entries in the ACP consisting of MOVs + * from the same src register for all the swizzled channels + * of this src register reference. + */ + for (int i = 0; i < 4; i++) { + int src_chan = GET_SWZ(inst->src[r].swizzle, i); + glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan]; + + if (!copy_chan) { + good = false; + break; + } + + assert(acp_level[acp_base + src_chan] <= level); + + if (!first) { + first = copy_chan; + } else { + if (first->src[0].file != copy_chan->src[0].file || + first->src[0].index != copy_chan->src[0].index) { + good = false; + break; + } + } + } + + if (good) { + /* We've now validated that we can copy-propagate to + * replace this src register reference. Do it. + */ + inst->src[r].file = first->src[0].file; + inst->src[r].index = first->src[0].index; + + int swizzle = 0; + for (int i = 0; i < 4; i++) { + int src_chan = GET_SWZ(inst->src[r].swizzle, i); + glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan]; + swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << + (3 * i)); + } + inst->src[r].swizzle = swizzle; + } + } + + switch (inst->op) { + case OPCODE_BGNLOOP: + case OPCODE_ENDLOOP: + /* End of a basic block, clear the ACP entirely. */ + memset(acp, 0, sizeof(*acp) * this->next_temp * 4); + break; + + case OPCODE_IF: + ++level; + break; + + case OPCODE_ENDIF: + case OPCODE_ELSE: + /* Clear all channels written inside the block from the ACP, but + * leaving those that were not touched. + */ + for (int r = 0; r < this->next_temp; r++) { + for (int c = 0; c < 4; c++) { + if (!acp[4 * r + c]) + continue; + + if (acp_level[4 * r + c] >= level) + acp[4 * r + c] = NULL; + } + } + if (inst->op == OPCODE_ENDIF) + --level; + break; + + default: + /* Continuing the block, clear any written channels from + * the ACP. + */ + if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) { + /* Any temporary might be written, so no copy propagation + * across this instruction. + */ + memset(acp, 0, sizeof(*acp) * this->next_temp * 4); + } else if (inst->dst.file == PROGRAM_OUTPUT && + inst->dst.reladdr) { + /* Any output might be written, so no copy propagation + * from outputs across this instruction. + */ + for (int r = 0; r < this->next_temp; r++) { + for (int c = 0; c < 4; c++) { + if (!acp[4 * r + c]) + continue; + + if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) + acp[4 * r + c] = NULL; + } + } + } else if (inst->dst.file == PROGRAM_TEMPORARY || + inst->dst.file == PROGRAM_OUTPUT) { + /* Clear where it's used as dst. */ + if (inst->dst.file == PROGRAM_TEMPORARY) { + for (int c = 0; c < 4; c++) { + if (inst->dst.writemask & (1 << c)) { + acp[4 * inst->dst.index + c] = NULL; + } + } + } + + /* Clear where it's used as src. */ + for (int r = 0; r < this->next_temp; r++) { + for (int c = 0; c < 4; c++) { + if (!acp[4 * r + c]) + continue; + + int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); + + if (acp[4 * r + c]->src[0].file == inst->dst.file && + acp[4 * r + c]->src[0].index == inst->dst.index && + inst->dst.writemask & (1 << src_chan)) + { + acp[4 * r + c] = NULL; + } + } + } + } + break; + } + + /* If this is a copy, add it to the ACP. */ + if (inst->op == OPCODE_MOV && + inst->dst.file == PROGRAM_TEMPORARY && + !inst->dst.reladdr && + !inst->saturate && + !inst->src[0].reladdr && + !inst->src[0].negate) { + for (int i = 0; i < 4; i++) { + if (inst->dst.writemask & (1 << i)) { + acp[4 * inst->dst.index + i] = inst; + acp_level[4 * inst->dst.index + i] = level; + } + } + } + } + + ralloc_free(acp_level); + ralloc_free(acp); +} + +/* + * Tracks available PROGRAM_TEMPORARY registers for dead code elimination. + * + * The glsl_to_tgsi_visitor lazily produces code assuming that this pass + * will occur. As an example, a TXP production after copy propagation but + * before this pass: + * + * 0: MOV TEMP[1], INPUT[4].xyyy; + * 1: MOV TEMP[1].w, INPUT[4].wwww; + * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; + * + * and after this pass: + * + * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; + * + * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB) + * FIXME: doesn't eliminate all dead code inside of loops; it steps around them + */ +void +glsl_to_tgsi_visitor::eliminate_dead_code(void) +{ + int i; + + for (i=0; i < this->next_temp; i++) { + int last_read = get_last_temp_read(i); + int j = 0; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i && + j > last_read) + { + iter.remove(); + delete inst; + } + + j++; + } + } +} + +/* Merges temporary registers together where possible to reduce the number of + * registers needed to run a program. + * + * Produces optimal code only after copy propagation and dead code elimination + * have been run. */ +void +glsl_to_tgsi_visitor::merge_registers(void) +{ + int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp); + int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp); + int i, j; + + /* Read the indices of the last read and first write to each temp register + * into an array so that we don't have to traverse the instruction list as + * much. */ + for (i=0; i < this->next_temp; i++) { + last_reads[i] = get_last_temp_read(i); + first_writes[i] = get_first_temp_write(i); + } + + /* Start looking for registers with non-overlapping usages that can be + * merged together. */ + for (i=0; i < this->next_temp - 1; i++) { + /* Don't touch unused registers. */ + if (last_reads[i] < 0 || first_writes[i] < 0) continue; + + for (j=i+1; j < this->next_temp; j++) { + /* Don't touch unused registers. */ + if (last_reads[j] < 0 || first_writes[j] < 0) continue; + + /* We can merge the two registers if the first write to j is after or + * in the same instruction as the last read from i. Note that the + * register at index i will always be used earlier or at the same time + * as the register at index j. */ + assert(first_writes[i] <= first_writes[j]); + if (last_reads[i] <= first_writes[j]) { + rename_temp_register(j, i); /* Replace all references to j with i.*/ + + /* Update the first_writes and last_reads arrays with the new + * values for the merged register index, and mark the newly unused + * register index as such. */ + last_reads[i] = last_reads[j]; + first_writes[j] = -1; + last_reads[j] = -1; + } + } + } + + ralloc_free(last_reads); + ralloc_free(first_writes); +} + +/* Reassign indices to temporary registers by reusing unused indices created + * by optimization passes. */ +void +glsl_to_tgsi_visitor::renumber_registers(void) +{ + int i = 0; + int new_index = 0; + + for (i=0; i < this->next_temp; i++) { + if (get_first_temp_read(i) < 0) continue; + if (i != new_index) + rename_temp_register(i, new_index); + new_index++; + } + + this->next_temp = new_index; +} + +/* ------------------------- TGSI conversion stuff -------------------------- */ +struct label { + unsigned branch_target; + unsigned token; +}; + +/** + * Intermediate state used during shader translation. + */ +struct st_translate { + struct ureg_program *ureg; + + struct ureg_dst temps[MAX_PROGRAM_TEMPS]; + struct ureg_src *constants; + struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; + struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; + struct ureg_dst address[1]; + struct ureg_src samplers[PIPE_MAX_SAMPLERS]; + struct ureg_src systemValues[SYSTEM_VALUE_MAX]; + + /* Extra info for handling point size clamping in vertex shader */ + struct ureg_dst pointSizeResult; /**< Actual point size output register */ + struct ureg_src pointSizeConst; /**< Point size range constant register */ + GLint pointSizeOutIndex; /**< Temp point size output register */ + GLboolean prevInstWrotePointSize; + + const GLuint *inputMapping; + const GLuint *outputMapping; + + /* For every instruction that contains a label (eg CALL), keep + * details so that we can go back afterwards and emit the correct + * tgsi instruction number for each label. + */ + struct label *labels; + unsigned labels_size; + unsigned labels_count; + + /* Keep a record of the tgsi instruction number that each mesa + * instruction starts at, will be used to fix up labels after + * translation. + */ + unsigned *insn; + unsigned insn_size; + unsigned insn_count; + + unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */ + + boolean error; +}; + +/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ +static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { + TGSI_SEMANTIC_FACE, + TGSI_SEMANTIC_INSTANCEID +}; + +/** + * Make note of a branch to a label in the TGSI code. + * After we've emitted all instructions, we'll go over the list + * of labels built here and patch the TGSI code with the actual + * location of each label. + */ +static unsigned *get_label( struct st_translate *t, + unsigned branch_target ) +{ + unsigned i; + + if (t->labels_count + 1 >= t->labels_size) { + t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); + t->labels = (struct label *)realloc(t->labels, + t->labels_size * sizeof t->labels[0]); + if (t->labels == NULL) { + static unsigned dummy; + t->error = TRUE; + return &dummy; + } + } + + i = t->labels_count++; + t->labels[i].branch_target = branch_target; + return &t->labels[i].token; +} + +/** + * Called prior to emitting the TGSI code for each Mesa instruction. + * Allocate additional space for instructions if needed. + * Update the insn[] array so the next Mesa instruction points to + * the next TGSI instruction. + */ +static void set_insn_start( struct st_translate *t, + unsigned start ) +{ + if (t->insn_count + 1 >= t->insn_size) { + t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); + t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof t->insn[0]); + if (t->insn == NULL) { + t->error = TRUE; + return; + } + } + + t->insn[t->insn_count++] = start; +} + +/** + * Map a Mesa dst register to a TGSI ureg_dst register. + */ +static struct ureg_dst +dst_register( struct st_translate *t, + gl_register_file file, + GLuint index ) +{ + switch( file ) { + case PROGRAM_UNDEFINED: + return ureg_dst_undef(); + + case PROGRAM_TEMPORARY: + if (ureg_dst_is_undef(t->temps[index])) + t->temps[index] = ureg_DECL_temporary( t->ureg ); + + return t->temps[index]; + + case PROGRAM_OUTPUT: + if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ) + t->prevInstWrotePointSize = GL_TRUE; + + if (t->procType == TGSI_PROCESSOR_VERTEX) + assert(index < VERT_RESULT_MAX); + else if (t->procType == TGSI_PROCESSOR_FRAGMENT) + assert(index < FRAG_RESULT_MAX); + else + assert(index < GEOM_RESULT_MAX); + + assert(t->outputMapping[index] < Elements(t->outputs)); + + return t->outputs[t->outputMapping[index]]; + + case PROGRAM_ADDRESS: + return t->address[index]; + + default: + debug_assert( 0 ); + return ureg_dst_undef(); + } +} + +/** + * Map a Mesa src register to a TGSI ureg_src register. + */ +static struct ureg_src +src_register( struct st_translate *t, + gl_register_file file, + GLuint index ) +{ + switch( file ) { + case PROGRAM_UNDEFINED: + return ureg_src_undef(); + + case PROGRAM_TEMPORARY: + assert(index >= 0); + assert(index < Elements(t->temps)); + if (ureg_dst_is_undef(t->temps[index])) + t->temps[index] = ureg_DECL_temporary( t->ureg ); + return ureg_src(t->temps[index]); + + case PROGRAM_NAMED_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_UNIFORM: + assert(index >= 0); + return t->constants[index]; + case PROGRAM_STATE_VAR: + case PROGRAM_CONSTANT: /* ie, immediate */ + if (index < 0) + return ureg_DECL_constant( t->ureg, 0 ); + else + return t->constants[index]; + + case PROGRAM_INPUT: + assert(t->inputMapping[index] < Elements(t->inputs)); + return t->inputs[t->inputMapping[index]]; + + case PROGRAM_OUTPUT: + assert(t->outputMapping[index] < Elements(t->outputs)); + return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */ + + case PROGRAM_ADDRESS: + return ureg_src(t->address[index]); + + case PROGRAM_SYSTEM_VALUE: + assert(index < Elements(t->systemValues)); + return t->systemValues[index]; + + default: + debug_assert( 0 ); + return ureg_src_undef(); + } +} + +/** + * Create a TGSI ureg_dst register from a Mesa dest register. + */ +static struct ureg_dst +translate_dst( struct st_translate *t, + const st_dst_reg *dst_reg, //const struct prog_dst_register *DstReg, + boolean saturate ) +{ + struct ureg_dst dst = dst_register( t, + dst_reg->file, + dst_reg->index ); + + dst = ureg_writemask( dst, + dst_reg->writemask ); + + if (saturate) + dst = ureg_saturate( dst ); + + if (dst_reg->reladdr != NULL) + dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) ); + + return dst; +} + +/** + * Create a TGSI ureg_src register from a Mesa src register. + */ +static struct ureg_src +translate_src( struct st_translate *t, + const st_src_reg *src_reg ) +{ + struct ureg_src src = src_register( t, src_reg->file, src_reg->index ); + + src = ureg_swizzle( src, + GET_SWZ( src_reg->swizzle, 0 ) & 0x3, + GET_SWZ( src_reg->swizzle, 1 ) & 0x3, + GET_SWZ( src_reg->swizzle, 2 ) & 0x3, + GET_SWZ( src_reg->swizzle, 3 ) & 0x3); + + if ((src_reg->negate & 0xf) == NEGATE_XYZW) + src = ureg_negate(src); + +#if 0 + // src_reg currently does not have an equivalent to SrcReg->Abs in Mesa IR + if (src_reg->abs) + src = ureg_abs(src); +#endif + + if (src_reg->reladdr != NULL) { + /* Normally ureg_src_indirect() would be used here, but a stupid compiler + * bug in g++ makes ureg_src_indirect (an inline C function) erroneously + * set the bit for src.Negate. So we have to do the operation manually + * here to work around the compiler's problems. */ + /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/ + struct ureg_src addr = ureg_src(t->address[0]); + src.Indirect = 1; + src.IndirectFile = addr.File; + src.IndirectIndex = addr.Index; + src.IndirectSwizzle = addr.SwizzleX; + + if (src_reg->file != PROGRAM_INPUT && + src_reg->file != PROGRAM_OUTPUT) { + /* If src_reg->index was negative, it was set to zero in + * src_register(). Reassign it now. But don't do this + * for input/output regs since they get remapped while + * const buffers don't. + */ + src.Index = src_reg->index; + } + } + + return src; +} + +static void +compile_tgsi_instruction(struct st_translate *t, + const struct glsl_to_tgsi_instruction *inst) +{ + struct ureg_program *ureg = t->ureg; + GLuint i; + struct ureg_dst dst[1]; + struct ureg_src src[4]; + unsigned num_dst; + unsigned num_src; + + num_dst = _mesa_num_inst_dst_regs( inst->op ); + num_src = _mesa_num_inst_src_regs( inst->op ); + + if (num_dst) + dst[0] = translate_dst( t, + &inst->dst, + inst->saturate); // inst->SaturateMode + + for (i = 0; i < num_src; i++) + src[i] = translate_src( t, &inst->src[i] ); + + switch( inst->op ) { + case OPCODE_SWZ: + // TODO: copy emit_swz function from st_mesa_to_tgsi.c + //emit_swz( t, dst[0], &inst->src[0] ); + assert(!"OPCODE_SWZ"); + return; + + case OPCODE_BGNLOOP: + case OPCODE_CAL: + case OPCODE_ELSE: + case OPCODE_ENDLOOP: + case OPCODE_IF: + debug_assert(num_dst == 0); + ureg_label_insn( ureg, + translate_opcode( inst->op ), + src, num_src, + get_label( t, + inst->op == OPCODE_CAL ? inst->function->sig_id : 0 )); + return; + + case OPCODE_TEX: + case OPCODE_TXB: + case OPCODE_TXD: + case OPCODE_TXL: + case OPCODE_TXP: + src[num_src++] = t->samplers[inst->sampler]; + ureg_tex_insn( ureg, + translate_opcode( inst->op ), + dst, num_dst, + translate_texture_target( inst->tex_target, + inst->tex_shadow ), + src, num_src ); + return; + + case OPCODE_SCS: + dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY ); + ureg_insn( ureg, + translate_opcode( inst->op ), + dst, num_dst, + src, num_src ); + break; + + case OPCODE_XPD: + dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ ); + ureg_insn( ureg, + translate_opcode( inst->op ), + dst, num_dst, + src, num_src ); + break; + + case OPCODE_NOISE1: + case OPCODE_NOISE2: + case OPCODE_NOISE3: + case OPCODE_NOISE4: + /* At some point, a motivated person could add a better + * implementation of noise. Currently not even the nvidia + * binary drivers do anything more than this. In any case, the + * place to do this is in the GL state tracker, not the poor + * driver. + */ + ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) ); + break; + + case OPCODE_DDY: + // TODO: copy emit_ddy() function from st_mesa_to_tgsi.c + assert(!"OPCODE_DDY"); + //emit_ddy( t, dst[0], &inst->src[0] ); + break; + + default: + ureg_insn( ureg, + translate_opcode( inst->op ), + dst, num_dst, + src, num_src ); + break; + } +} + +/** + * Emit the TGSI instructions to adjust the WPOS pixel center convention + * Basically, add (adjX, adjY) to the fragment position. + */ +static void +emit_adjusted_wpos( struct st_translate *t, + const struct gl_program *program, + GLfloat adjX, GLfloat adjY) +{ + struct ureg_program *ureg = t->ureg; + struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); + struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; + + /* Note that we bias X and Y and pass Z and W through unchanged. + * The shader might also use gl_FragCoord.w and .z. + */ + ureg_ADD(ureg, wpos_temp, wpos_input, + ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f)); + + t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); +} + + +/** + * Emit the TGSI instructions for inverting the WPOS y coordinate. + * This code is unavoidable because it also depends on whether + * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). + */ +static void +emit_wpos_inversion( struct st_translate *t, + const struct gl_program *program, + boolean invert) +{ + struct ureg_program *ureg = t->ureg; + + /* Fragment program uses fragment position input. + * Need to replace instances of INPUT[WPOS] with temp T + * where T = INPUT[WPOS] by y is inverted. + */ + static const gl_state_index wposTransformState[STATE_LENGTH] + = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, + (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; + + /* XXX: note we are modifying the incoming shader here! Need to + * do this before emitting the constant decls below, or this + * will be missed: + */ + unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, + wposTransformState); + + struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst ); + struct ureg_dst wpos_temp; + struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; + + /* MOV wpos_temp, input[wpos] + */ + if (wpos_input.File == TGSI_FILE_TEMPORARY) + wpos_temp = ureg_dst(wpos_input); + else { + wpos_temp = ureg_DECL_temporary( ureg ); + ureg_MOV( ureg, wpos_temp, wpos_input ); + } + + if (invert) { + /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy + */ + ureg_MAD( ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), + wpos_input, + ureg_scalar(wpostrans, 0), + ureg_scalar(wpostrans, 1)); + } else { + /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww + */ + ureg_MAD( ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), + wpos_input, + ureg_scalar(wpostrans, 2), + ureg_scalar(wpostrans, 3)); + } + + /* Use wpos_temp as position input from here on: + */ + t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); +} + + +/** + * Emit fragment position/ooordinate code. + */ +static void +emit_wpos(struct st_context *st, + struct st_translate *t, + const struct gl_program *program, + struct ureg_program *ureg) +{ + const struct gl_fragment_program *fp = + (const struct gl_fragment_program *) program; + struct pipe_screen *pscreen = st->pipe->screen; + boolean invert = FALSE; + + if (fp->OriginUpperLeft) { + /* Fragment shader wants origin in upper-left */ + if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { + /* the driver supports upper-left origin */ + } + else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { + /* the driver supports lower-left origin, need to invert Y */ + ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); + invert = TRUE; + } + else + assert(0); + } + else { + /* Fragment shader wants origin in lower-left */ + if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) + /* the driver supports lower-left origin */ + ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); + else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) + /* the driver supports upper-left origin, need to invert Y */ + invert = TRUE; + else + assert(0); + } + + if (fp->PixelCenterInteger) { + /* Fragment shader wants pixel center integer */ + if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) + /* the driver supports pixel center integer */ + ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); + else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) + /* the driver supports pixel center half integer, need to bias X,Y */ + emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f); + else + assert(0); + } + else { + /* Fragment shader wants pixel center half integer */ + if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { + /* the driver supports pixel center half integer */ + } + else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { + /* the driver supports pixel center integer, need to bias X,Y */ + ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); + emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f); + } + else + assert(0); + } + + /* we invert after adjustment so that we avoid the MOV to temporary, + * and reuse the adjustment ADD instead */ + emit_wpos_inversion(t, program, invert); +} + +/** + * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. + * \param program the program to translate + * \param numInputs number of input registers used + * \param inputMapping maps Mesa fragment program inputs to TGSI generic + * input indexes + * \param inputSemanticName the TGSI_SEMANTIC flag for each input + * \param inputSemanticIndex the semantic index (ex: which texcoord) for + * each input + * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input + * \param numOutputs number of output registers used + * \param outputMapping maps Mesa fragment program outputs to TGSI + * generic outputs + * \param outputSemanticName the TGSI_SEMANTIC flag for each output + * \param outputSemanticIndex the semantic index (ex: which texcoord) for + * each output + * + * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY + */ +extern "C" enum pipe_error +st_translate_program( + struct gl_context *ctx, + uint procType, + struct ureg_program *ureg, + glsl_to_tgsi_visitor *program, + const struct gl_program *proginfo, + GLuint numInputs, + const GLuint inputMapping[], + const ubyte inputSemanticName[], + const ubyte inputSemanticIndex[], + const GLuint interpMode[], + GLuint numOutputs, + const GLuint outputMapping[], + const ubyte outputSemanticName[], + const ubyte outputSemanticIndex[], + boolean passthrough_edgeflags ) +{ + struct st_translate translate, *t; + unsigned i; + enum pipe_error ret = PIPE_OK; + + assert(numInputs <= Elements(t->inputs)); + assert(numOutputs <= Elements(t->outputs)); + + t = &translate; + memset(t, 0, sizeof *t); + + t->procType = procType; + t->inputMapping = inputMapping; + t->outputMapping = outputMapping; + t->ureg = ureg; + t->pointSizeOutIndex = -1; + t->prevInstWrotePointSize = GL_FALSE; + + /*_mesa_print_program(program);*/ + + /* + * Declare input attributes. + */ + if (procType == TGSI_PROCESSOR_FRAGMENT) { + for (i = 0; i < numInputs; i++) { + t->inputs[i] = ureg_DECL_fs_input(ureg, + inputSemanticName[i], + inputSemanticIndex[i], + interpMode[i]); + } + + if (proginfo->InputsRead & FRAG_BIT_WPOS) { + /* Must do this after setting up t->inputs, and before + * emitting constant references, below: + */ + printf("FRAG_BIT_WPOS\n"); + emit_wpos(st_context(ctx), t, proginfo, ureg); + } + + if (proginfo->InputsRead & FRAG_BIT_FACE) { + // TODO: uncomment + printf("FRAG_BIT_FACE\n"); + //emit_face_var( t, program ); + } + + /* + * Declare output attributes. + */ + for (i = 0; i < numOutputs; i++) { + switch (outputSemanticName[i]) { + case TGSI_SEMANTIC_POSITION: + t->outputs[i] = ureg_DECL_output( ureg, + TGSI_SEMANTIC_POSITION, /* Z / Depth */ + outputSemanticIndex[i] ); + + t->outputs[i] = ureg_writemask( t->outputs[i], + TGSI_WRITEMASK_Z ); + break; + case TGSI_SEMANTIC_STENCIL: + t->outputs[i] = ureg_DECL_output( ureg, + TGSI_SEMANTIC_STENCIL, /* Stencil */ + outputSemanticIndex[i] ); + t->outputs[i] = ureg_writemask( t->outputs[i], + TGSI_WRITEMASK_Y ); + break; + case TGSI_SEMANTIC_COLOR: + t->outputs[i] = ureg_DECL_output( ureg, + TGSI_SEMANTIC_COLOR, + outputSemanticIndex[i] ); + break; + default: + debug_assert(0); + return PIPE_ERROR_BAD_INPUT; + } + } + } + else if (procType == TGSI_PROCESSOR_GEOMETRY) { + for (i = 0; i < numInputs; i++) { + t->inputs[i] = ureg_DECL_gs_input(ureg, + i, + inputSemanticName[i], + inputSemanticIndex[i]); + } + + for (i = 0; i < numOutputs; i++) { + t->outputs[i] = ureg_DECL_output( ureg, + outputSemanticName[i], + outputSemanticIndex[i] ); + } + } + else { + assert(procType == TGSI_PROCESSOR_VERTEX); + + for (i = 0; i < numInputs; i++) { + t->inputs[i] = ureg_DECL_vs_input(ureg, i); + } + + for (i = 0; i < numOutputs; i++) { + t->outputs[i] = ureg_DECL_output( ureg, + outputSemanticName[i], + outputSemanticIndex[i] ); + if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) { + /* Writing to the point size result register requires special + * handling to implement clamping. + */ + static const gl_state_index pointSizeClampState[STATE_LENGTH] + = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; + /* XXX: note we are modifying the incoming shader here! Need to + * do this before emitting the constant decls below, or this + * will be missed. + * XXX: depends on "Parameters" field specific to Mesa IR + */ + unsigned pointSizeClampConst = + _mesa_add_state_reference(proginfo->Parameters, + pointSizeClampState); + struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg ); + t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst ); + t->pointSizeResult = t->outputs[i]; + t->pointSizeOutIndex = i; + t->outputs[i] = psizregtemp; + } + } + /*if (passthrough_edgeflags) + emit_edgeflags( t, program ); */ // TODO: uncomment + } + + /* Declare address register. + */ + if (program->num_address_regs > 0) { + debug_assert( program->num_address_regs == 1 ); + t->address[0] = ureg_DECL_address( ureg ); + } + + /* Declare misc input registers + */ + { + GLbitfield sysInputs = proginfo->SystemValuesRead; + unsigned numSys = 0; + for (i = 0; sysInputs; i++) { + if (sysInputs & (1 << i)) { + unsigned semName = mesa_sysval_to_semantic[i]; + t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0); + numSys++; + sysInputs &= ~(1 << i); + } + } + } + + if (program->indirect_addr_temps) { + /* If temps are accessed with indirect addressing, declare temporaries + * in sequential order. Else, we declare them on demand elsewhere. + * (Note: the number of temporaries is equal to program->next_temp) + */ + for (i = 0; i < (unsigned)program->next_temp; i++) { + /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ + t->temps[i] = ureg_DECL_temporary( t->ureg ); + } + } + + /* Emit constants and immediates. Mesa uses a single index space + * for these, so we put all the translated regs in t->constants. + * XXX: this entire if block depends on proginfo->Parameters from Mesa IR + */ + if (proginfo->Parameters) { + t->constants = (struct ureg_src *)CALLOC( proginfo->Parameters->NumParameters * sizeof t->constants[0] ); + if (t->constants == NULL) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out; + } + + for (i = 0; i < proginfo->Parameters->NumParameters; i++) { + switch (proginfo->Parameters->Parameters[i].Type) { + case PROGRAM_ENV_PARAM: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_UNIFORM: + t->constants[i] = ureg_DECL_constant( ureg, i ); + break; + + /* Emit immediates only when there's no indirect addressing of + * the const buffer. + * FIXME: Be smarter and recognize param arrays: + * indirect addressing is only valid within the referenced + * array. + */ + case PROGRAM_CONSTANT: + if (program->indirect_addr_consts) + t->constants[i] = ureg_DECL_constant( ureg, i ); + else + t->constants[i] = + ureg_DECL_immediate( ureg, + proginfo->Parameters->ParameterValues[i], + 4 ); + break; + default: + break; + } + } + } + + /* texture samplers */ + for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { + // XXX: depends on SamplersUsed property generated by conversion to Mesa IR + if (proginfo->SamplersUsed & (1 << i)) { + t->samplers[i] = ureg_DECL_sampler( ureg, i ); + } + } + + /* Emit each instruction in turn: + */ + foreach_iter(exec_list_iterator, iter, program->instructions) { + set_insn_start( t, ureg_get_instruction_number( ureg )); + compile_tgsi_instruction( t, (glsl_to_tgsi_instruction *)iter.get() ); + + if (t->prevInstWrotePointSize && proginfo->Id) { + /* The previous instruction wrote to the (fake) vertex point size + * result register. Now we need to clamp that value to the min/max + * point size range, putting the result into the real point size + * register. + * Note that we can't do this easily at the end of program due to + * possible early return. + */ + set_insn_start( t, ureg_get_instruction_number( ureg )); + ureg_MAX( t->ureg, + ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X), + ureg_src(t->outputs[t->pointSizeOutIndex]), + ureg_swizzle(t->pointSizeConst, 1,1,1,1)); + ureg_MIN( t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X), + ureg_src(t->outputs[t->pointSizeOutIndex]), + ureg_swizzle(t->pointSizeConst, 2,2,2,2)); + } + t->prevInstWrotePointSize = GL_FALSE; + } + + /* Fix up all emitted labels: + */ + for (i = 0; i < t->labels_count; i++) { + ureg_fixup_label( ureg, + t->labels[i].token, + t->insn[t->labels[i].branch_target] ); + } + +out: + FREE(t->insn); + FREE(t->labels); + FREE(t->constants); + + if (t->error) { + debug_printf("%s: translate error flag set\n", __FUNCTION__); + } + + return ret; +} +/* ----------------------------- End TGSI code ------------------------------ */ + +/** + * Convert a shader's GLSL IR into both a Mesa gl_program and a TGSI shader. + */ +static struct gl_program * +get_mesa_program(struct gl_context *ctx, + struct gl_shader_program *shader_program, + struct gl_shader *shader) +{ + glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor(); + struct prog_instruction *mesa_instructions, *mesa_inst; + ir_instruction **mesa_instruction_annotation; + int i; + struct gl_program *prog; + GLenum target; + const char *target_string; + GLboolean progress; + struct gl_shader_compiler_options *options = + &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)]; + + switch (shader->Type) { + case GL_VERTEX_SHADER: + target = GL_VERTEX_PROGRAM_ARB; + target_string = "vertex"; + break; + case GL_FRAGMENT_SHADER: + target = GL_FRAGMENT_PROGRAM_ARB; + target_string = "fragment"; + break; + case GL_GEOMETRY_SHADER: + target = GL_GEOMETRY_PROGRAM_NV; + target_string = "geometry"; + break; + default: + assert(!"should not be reached"); + return NULL; + } + + validate_ir_tree(shader->ir); + + prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name); + if (!prog) + return NULL; + prog->Parameters = _mesa_new_parameter_list(); + prog->Varying = _mesa_new_parameter_list(); + prog->Attributes = _mesa_new_parameter_list(); + v->ctx = ctx; + v->prog = prog; + v->shader_program = shader_program; + v->options = options; + + add_uniforms_to_parameters_list(shader_program, shader, prog); + + /* Emit Mesa IR for main(). */ + visit_exec_list(shader->ir, v); + v->emit(NULL, OPCODE_END); + + /* Now emit bodies for any functions that were used. */ + do { + progress = GL_FALSE; + + foreach_iter(exec_list_iterator, iter, v->function_signatures) { + function_entry *entry = (function_entry *)iter.get(); + + if (!entry->bgn_inst) { + v->current_function = entry; + + entry->bgn_inst = v->emit(NULL, OPCODE_BGNSUB); + entry->bgn_inst->function = entry; + + visit_exec_list(&entry->sig->body, v); + + glsl_to_tgsi_instruction *last; + last = (glsl_to_tgsi_instruction *)v->instructions.get_tail(); + if (last->op != OPCODE_RET) + v->emit(NULL, OPCODE_RET); + + glsl_to_tgsi_instruction *end; + end = v->emit(NULL, OPCODE_ENDSUB); + end->function = entry; + + progress = GL_TRUE; + } + } + } while (progress); + +#if 0 + /* Print out some information (for debugging purposes) used by the + * optimization passes. */ + for (i=0; i < v->next_temp; i++) { + int fr = v->get_first_temp_read(i); + int fw = v->get_first_temp_write(i); + int lr = v->get_last_temp_read(i); + int lw = v->get_last_temp_write(i); + + printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw); + assert(fw <= fr); + } +#endif + + /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ + v->copy_propagate(); + v->eliminate_dead_code(); + v->merge_registers(); + v->renumber_registers(); + + prog->NumTemporaries = v->next_temp; + + int num_instructions = 0; + foreach_iter(exec_list_iterator, iter, v->instructions) { + num_instructions++; + } + + mesa_instructions = + (struct prog_instruction *)calloc(num_instructions, + sizeof(*mesa_instructions)); + mesa_instruction_annotation = ralloc_array(v->mem_ctx, ir_instruction *, + num_instructions); + + /* Convert glsl_to_tgsi_instructions into Mesa IR prog_instructions. + * TODO: remove + */ + mesa_inst = mesa_instructions; + i = 0; + foreach_iter(exec_list_iterator, iter, v->instructions) { + const glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + mesa_inst->Opcode = inst->op; + mesa_inst->CondUpdate = inst->cond_update; + if (inst->saturate) + mesa_inst->SaturateMode = SATURATE_ZERO_ONE; + mesa_inst->DstReg.File = inst->dst.file; + mesa_inst->DstReg.Index = inst->dst.index; + mesa_inst->DstReg.CondMask = inst->dst.cond_mask; + mesa_inst->DstReg.WriteMask = inst->dst.writemask; + mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL; + mesa_inst->SrcReg[0] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[0]); + mesa_inst->SrcReg[1] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[1]); + mesa_inst->SrcReg[2] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[2]); + mesa_inst->TexSrcUnit = inst->sampler; + mesa_inst->TexSrcTarget = inst->tex_target; + mesa_inst->TexShadow = inst->tex_shadow; + mesa_instruction_annotation[i] = inst->ir; + + /* Set IndirectRegisterFiles. */ + if (mesa_inst->DstReg.RelAddr) + prog->IndirectRegisterFiles |= 1 << mesa_inst->DstReg.File; + + /* Update program's bitmask of indirectly accessed register files */ + for (unsigned src = 0; src < 3; src++) + if (mesa_inst->SrcReg[src].RelAddr) + prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File; + + if (options->EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) { + fail_link(shader_program, "Couldn't flatten if statement\n"); + } + + switch (mesa_inst->Opcode) { + case OPCODE_BGNSUB: + inst->function->inst = i; + mesa_inst->Comment = strdup(inst->function->sig->function_name()); + break; + case OPCODE_ENDSUB: + mesa_inst->Comment = strdup(inst->function->sig->function_name()); + break; + case OPCODE_CAL: + mesa_inst->BranchTarget = inst->function->sig_id; /* rewritten later */ + break; + case OPCODE_ARL: + prog->NumAddressRegs = 1; + break; + default: + break; + } + + mesa_inst++; + i++; + + if (!shader_program->LinkStatus) + break; + } + + if (!shader_program->LinkStatus) { + free(mesa_instructions); + _mesa_reference_program(ctx, &shader->Program, NULL); + return NULL; + } + + set_branchtargets(v, mesa_instructions, num_instructions); + + if (ctx->Shader.Flags & GLSL_DUMP) { + printf("\n"); + printf("GLSL IR for linked %s program %d:\n", target_string, + shader_program->Name); + _mesa_print_ir(shader->ir, NULL); + printf("\n"); + printf("\n"); + printf("Mesa IR for linked %s program %d:\n", target_string, + shader_program->Name); + print_program(mesa_instructions, mesa_instruction_annotation, + num_instructions); + } + + prog->Instructions = mesa_instructions; + prog->NumInstructions = num_instructions; + + do_set_program_inouts(shader->ir, prog); + count_resources(prog); + + check_resources(ctx, shader_program, prog); + + _mesa_reference_program(ctx, &shader->Program, prog); + + if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) { + _mesa_optimize_program(ctx, prog); + } + + struct st_vertex_program *stvp; + struct st_fragment_program *stfp; + struct st_geometry_program *stgp; + + switch (shader->Type) { + case GL_VERTEX_SHADER: + stvp = (struct st_vertex_program *)prog; + stvp->glsl_to_tgsi = v; + break; + case GL_FRAGMENT_SHADER: + stfp = (struct st_fragment_program *)prog; + stfp->glsl_to_tgsi = v; + break; + case GL_GEOMETRY_SHADER: + stgp = (struct st_geometry_program *)prog; + stgp->glsl_to_tgsi = v; + break; + default: + assert(!"should not be reached"); + return NULL; + } + + return prog; +} + +extern "C" { + +struct gl_shader * +st_new_shader(struct gl_context *ctx, GLuint name, GLuint type) +{ + struct gl_shader *shader; + assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER || + type == GL_GEOMETRY_SHADER_ARB); + shader = rzalloc(NULL, struct gl_shader); + if (shader) { + shader->Type = type; + shader->Name = name; + _mesa_init_shader(ctx, shader); + } + return shader; +} + +struct gl_shader_program * +st_new_shader_program(struct gl_context *ctx, GLuint name) +{ + struct gl_shader_program *shProg; + shProg = rzalloc(NULL, struct gl_shader_program); + if (shProg) { + shProg->Name = name; + _mesa_init_shader_program(ctx, shProg); + } + return shProg; +} + +/** + * Link a shader. + * Called via ctx->Driver.LinkShader() + * This actually involves converting GLSL IR into Mesa gl_programs with + * code lowering and other optimizations. + */ +GLboolean +st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) +{ + assert(prog->LinkStatus); + + for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { + if (prog->_LinkedShaders[i] == NULL) + continue; + + bool progress; + exec_list *ir = prog->_LinkedShaders[i]->ir; + const struct gl_shader_compiler_options *options = + &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)]; + + do { + progress = false; + + /* Lowering */ + do_mat_op_to_vec(ir); + lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 + | LOG_TO_LOG2 + | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); + + progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; + + progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress; + + progress = lower_quadop_vector(ir, true) || progress; + + if (options->EmitNoIfs) { + progress = lower_discard(ir) || progress; + progress = lower_if_to_cond_assign(ir) || progress; + } + + if (options->EmitNoNoise) + progress = lower_noise(ir) || progress; + + /* If there are forms of indirect addressing that the driver + * cannot handle, perform the lowering pass. + */ + if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput + || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) + progress = + lower_variable_index_to_cond_assign(ir, + options->EmitNoIndirectInput, + options->EmitNoIndirectOutput, + options->EmitNoIndirectTemp, + options->EmitNoIndirectUniform) + || progress; + + progress = do_vec_index_to_cond_assign(ir) || progress; + } while (progress); + + validate_ir_tree(ir); + } + + for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { + struct gl_program *linked_prog; + + if (prog->_LinkedShaders[i] == NULL) + continue; + + linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]); + + if (linked_prog) { + bool ok = true; + + switch (prog->_LinkedShaders[i]->Type) { + case GL_VERTEX_SHADER: + _mesa_reference_vertprog(ctx, &prog->VertexProgram, + (struct gl_vertex_program *)linked_prog); + ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB, + linked_prog); + break; + case GL_FRAGMENT_SHADER: + _mesa_reference_fragprog(ctx, &prog->FragmentProgram, + (struct gl_fragment_program *)linked_prog); + ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB, + linked_prog); + break; + case GL_GEOMETRY_SHADER: + _mesa_reference_geomprog(ctx, &prog->GeometryProgram, + (struct gl_geometry_program *)linked_prog); + ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV, + linked_prog); + break; + } + if (!ok) { + return GL_FALSE; + } + } + + _mesa_reference_program(ctx, &linked_prog, NULL); + } + + return GL_TRUE; +} + + +/** + * Link a GLSL shader program. Called via glLinkProgram(). + */ +void +st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) +{ + unsigned int i; + + _mesa_clear_shader_program_data(ctx, prog); + + prog->LinkStatus = GL_TRUE; + + for (i = 0; i < prog->NumShaders; i++) { + if (!prog->Shaders[i]->CompileStatus) { + fail_link(prog, "linking with uncompiled shader"); + prog->LinkStatus = GL_FALSE; + } + } + + prog->Varying = _mesa_new_parameter_list(); + _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL); + _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL); + _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL); + + if (prog->LinkStatus) { + link_shaders(ctx, prog); + } + + if (prog->LinkStatus) { + if (!ctx->Driver.LinkShader(ctx, prog)) { + prog->LinkStatus = GL_FALSE; + } + } + + set_uniform_initializers(ctx, prog); + + if (ctx->Shader.Flags & GLSL_DUMP) { + if (!prog->LinkStatus) { + printf("GLSL shader program %d failed to link\n", prog->Name); + } + + if (prog->InfoLog && prog->InfoLog[0] != 0) { + printf("GLSL shader program %d info log:\n", prog->Name); + printf("%s\n", prog->InfoLog); + } + } +} + +} /* extern "C" */ diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h new file mode 100644 index 00000000000..e21c0d1e0af --- /dev/null +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h @@ -0,0 +1,66 @@ +/* + * Copyright © 2010 Intel Corporation + * Copyright © 2011 Bryan Cain + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "main/glheader.h" +#include "tgsi/tgsi_ureg.h" + +struct gl_context; +struct gl_shader; +struct gl_shader_program; +struct glsl_to_tgsi_visitor; + +enum pipe_error st_translate_program( + struct gl_context *ctx, + uint procType, + struct ureg_program *ureg, + struct glsl_to_tgsi_visitor *program, + const struct gl_program *proginfo, + GLuint numInputs, + const GLuint inputMapping[], + const ubyte inputSemanticName[], + const ubyte inputSemanticIndex[], + const GLuint interpMode[], + GLuint numOutputs, + const GLuint outputMapping[], + const ubyte outputSemanticName[], + const ubyte outputSemanticIndex[], + boolean passthrough_edgeflags); + +void free_glsl_to_tgsi_visitor(struct glsl_to_tgsi_visitor *v); + +struct gl_shader *st_new_shader(struct gl_context *ctx, GLuint name, GLuint type); + +struct gl_shader_program * +st_new_shader_program(struct gl_context *ctx, GLuint name); + +void st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog); +GLboolean st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog); + +#ifdef __cplusplus +} +#endif diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index a41e5b16a85..75842286ba8 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -267,7 +267,7 @@ src_register( struct st_translate *t, /** * Map mesa texture target to TGSI texture target. */ -static unsigned +unsigned translate_texture_target( GLuint textarget, GLboolean shadow ) { @@ -511,7 +511,7 @@ static void emit_ddy( struct st_translate *t, -static unsigned +unsigned translate_opcode( unsigned op ) { switch( op ) { diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.h b/src/mesa/state_tracker/st_mesa_to_tgsi.h index 0615e52ef62..0dbdf5f6159 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.h +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.h @@ -64,6 +64,12 @@ st_translate_mesa_program( void st_free_tokens(const struct tgsi_token *tokens); +unsigned +translate_opcode(unsigned op); + +unsigned +translate_texture_target(GLuint textarget, GLboolean shadow); + #if defined __cplusplus } /* extern "C" */ diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 7a6d33d3fea..dd618424d66 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -174,8 +174,8 @@ st_release_gp_variants(struct st_context *st, struct st_geometry_program *stgp) * \param tokensOut destination for TGSI tokens * \return pointer to cached pipe_shader object. */ -static void -st_prepare_vertex_program(struct st_context *st, +void +st_prepare_vertex_program(struct gl_context *ctx, struct st_vertex_program *stvp) { GLuint attr; @@ -184,7 +184,7 @@ st_prepare_vertex_program(struct st_context *st, stvp->num_outputs = 0; if (stvp->Base.IsPositionInvariant) - _mesa_insert_mvp_code(st->ctx, &stvp->Base); + _mesa_insert_mvp_code(ctx, &stvp->Base); assert(stvp->Base.Base.NumInstructions > 1); @@ -292,7 +292,7 @@ st_translate_vertex_program(struct st_context *st, enum pipe_error error; unsigned num_outputs; - st_prepare_vertex_program( st, stvp ); + st_prepare_vertex_program(st->ctx, stvp); _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT); _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_VARYING); @@ -318,22 +318,41 @@ st_translate_vertex_program(struct st_context *st, debug_printf("\n"); } - error = st_translate_mesa_program(st->ctx, - TGSI_PROCESSOR_VERTEX, - ureg, - &stvp->Base.Base, - /* inputs */ - vpv->num_inputs, - stvp->input_to_index, - NULL, /* input semantic name */ - NULL, /* input semantic index */ - NULL, - /* outputs */ - num_outputs, - stvp->result_to_output, - stvp->output_semantic_name, - stvp->output_semantic_index, - key->passthrough_edgeflags ); + if (stvp->glsl_to_tgsi) + error = st_translate_program(st->ctx, + TGSI_PROCESSOR_VERTEX, + ureg, + stvp->glsl_to_tgsi, + &stvp->Base.Base, + /* inputs */ + stvp->num_inputs, + stvp->input_to_index, + NULL, /* input semantic name */ + NULL, /* input semantic index */ + NULL, /* interp mode */ + /* outputs */ + stvp->num_outputs, + stvp->result_to_output, + stvp->output_semantic_name, + stvp->output_semantic_index, + key->passthrough_edgeflags ); + else + error = st_translate_mesa_program(st->ctx, + TGSI_PROCESSOR_VERTEX, + ureg, + &stvp->Base.Base, + /* inputs */ + vpv->num_inputs, + stvp->input_to_index, + NULL, /* input semantic name */ + NULL, /* input semantic index */ + NULL, + /* outputs */ + num_outputs, + stvp->result_to_output, + stvp->output_semantic_name, + stvp->output_semantic_index, + key->passthrough_edgeflags ); if (error) goto fail; @@ -393,6 +412,151 @@ st_get_vp_variant(struct st_context *st, return vpv; } +/** + * Translate Mesa fragment shader attributes to TGSI attributes. + * \return GL_TRUE if color output should be written to all render targets, + * GL_FALSE if not + */ +GLboolean +st_prepare_fragment_program(struct gl_context *ctx, + struct st_fragment_program *stfp) +{ + GLuint attr; + const GLbitfield inputsRead = stfp->Base.Base.InputsRead; + GLboolean write_all = GL_FALSE; + + /* + * Convert Mesa program inputs to TGSI input register semantics. + */ + for (attr = 0; attr < FRAG_ATTRIB_MAX; attr++) { + if (inputsRead & (1 << attr)) { + const GLuint slot = stfp->num_inputs++; + + stfp->input_to_index[attr] = slot; + + switch (attr) { + case FRAG_ATTRIB_WPOS: + stfp->input_semantic_name[slot] = TGSI_SEMANTIC_POSITION; + stfp->input_semantic_index[slot] = 0; + stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR; + break; + case FRAG_ATTRIB_COL0: + stfp->input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; + stfp->input_semantic_index[slot] = 0; + stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR; + break; + case FRAG_ATTRIB_COL1: + stfp->input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; + stfp->input_semantic_index[slot] = 1; + stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR; + break; + case FRAG_ATTRIB_FOGC: + stfp->input_semantic_name[slot] = TGSI_SEMANTIC_FOG; + stfp->input_semantic_index[slot] = 0; + stfp->interp_mode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; + break; + case FRAG_ATTRIB_FACE: + stfp->input_semantic_name[slot] = TGSI_SEMANTIC_FACE; + stfp->input_semantic_index[slot] = 0; + stfp->interp_mode[slot] = TGSI_INTERPOLATE_CONSTANT; + break; + /* In most cases, there is nothing special about these + * inputs, so adopt a convention to use the generic + * semantic name and the mesa FRAG_ATTRIB_ number as the + * index. + * + * All that is required is that the vertex shader labels + * its own outputs similarly, and that the vertex shader + * generates at least every output required by the + * fragment shader plus fixed-function hardware (such as + * BFC). + * + * There is no requirement that semantic indexes start at + * zero or be restricted to a particular range -- nobody + * should be building tables based on semantic index. + */ + case FRAG_ATTRIB_PNTC: + case FRAG_ATTRIB_TEX0: + case FRAG_ATTRIB_TEX1: + case FRAG_ATTRIB_TEX2: + case FRAG_ATTRIB_TEX3: + case FRAG_ATTRIB_TEX4: + case FRAG_ATTRIB_TEX5: + case FRAG_ATTRIB_TEX6: + case FRAG_ATTRIB_TEX7: + case FRAG_ATTRIB_VAR0: + default: + /* Actually, let's try and zero-base this just for + * readability of the generated TGSI. + */ + assert(attr >= FRAG_ATTRIB_TEX0); + stfp->input_semantic_index[slot] = (attr - FRAG_ATTRIB_TEX0); + stfp->input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; + if (attr == FRAG_ATTRIB_PNTC) + stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR; + else + stfp->interp_mode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; + break; + } + } + else { + stfp->input_to_index[attr] = -1; + } + } + + /* + * Semantics and mapping for outputs + */ + { + uint numColors = 0; + GLbitfield64 outputsWritten = stfp->Base.Base.OutputsWritten; + + /* if z is written, emit that first */ + if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { + stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_POSITION; + stfp->output_semantic_index[stfp->num_outputs] = 0; + stfp->result_to_output[FRAG_RESULT_DEPTH] = stfp->num_outputs; + stfp->num_outputs++; + outputsWritten &= ~(1 << FRAG_RESULT_DEPTH); + } + + if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) { + stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_STENCIL; + stfp->output_semantic_index[stfp->num_outputs] = 0; + stfp->result_to_output[FRAG_RESULT_STENCIL] = stfp->num_outputs; + stfp->num_outputs++; + outputsWritten &= ~(1 << FRAG_RESULT_STENCIL); + } + + /* handle remaning outputs (color) */ + for (attr = 0; attr < FRAG_RESULT_MAX; attr++) { + if (outputsWritten & BITFIELD64_BIT(attr)) { + switch (attr) { + case FRAG_RESULT_DEPTH: + case FRAG_RESULT_STENCIL: + /* handled above */ + assert(0); + break; + case FRAG_RESULT_COLOR: + write_all = GL_TRUE; /* fallthrough */ + default: + assert(attr == FRAG_RESULT_COLOR || + (FRAG_RESULT_DATA0 <= attr && attr < FRAG_RESULT_MAX)); + stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_COLOR; + stfp->output_semantic_index[stfp->num_outputs] = numColors; + stfp->result_to_output[attr] = stfp->num_outputs; + numColors++; + break; + } + + stfp->num_outputs++; + } + } + } + + return write_all; +} + /** * Translate a Mesa fragment shader into a TGSI shader using extra info in @@ -445,155 +609,12 @@ st_translate_fragment_program(struct st_context *st, if (!stfp->tgsi.tokens) { /* need to translate Mesa instructions to TGSI now */ - GLuint outputMapping[FRAG_RESULT_MAX]; - GLuint inputMapping[FRAG_ATTRIB_MAX]; - GLuint interpMode[PIPE_MAX_SHADER_INPUTS]; /* XXX size? */ - GLuint attr; enum pipe_error error; - const GLbitfield inputsRead = stfp->Base.Base.InputsRead; struct ureg_program *ureg; - GLboolean write_all = GL_FALSE; - - ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; - ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; - uint fs_num_inputs = 0; - - ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; - ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; - uint fs_num_outputs = 0; - - + GLboolean write_all = st_prepare_fragment_program(st->ctx, stfp); + _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT); - /* - * Convert Mesa program inputs to TGSI input register semantics. - */ - for (attr = 0; attr < FRAG_ATTRIB_MAX; attr++) { - if (inputsRead & (1 << attr)) { - const GLuint slot = fs_num_inputs++; - - inputMapping[attr] = slot; - - switch (attr) { - case FRAG_ATTRIB_WPOS: - input_semantic_name[slot] = TGSI_SEMANTIC_POSITION; - input_semantic_index[slot] = 0; - interpMode[slot] = TGSI_INTERPOLATE_LINEAR; - break; - case FRAG_ATTRIB_COL0: - input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; - input_semantic_index[slot] = 0; - interpMode[slot] = TGSI_INTERPOLATE_LINEAR; - break; - case FRAG_ATTRIB_COL1: - input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; - input_semantic_index[slot] = 1; - interpMode[slot] = TGSI_INTERPOLATE_LINEAR; - break; - case FRAG_ATTRIB_FOGC: - input_semantic_name[slot] = TGSI_SEMANTIC_FOG; - input_semantic_index[slot] = 0; - interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; - break; - case FRAG_ATTRIB_FACE: - input_semantic_name[slot] = TGSI_SEMANTIC_FACE; - input_semantic_index[slot] = 0; - interpMode[slot] = TGSI_INTERPOLATE_CONSTANT; - break; - /* In most cases, there is nothing special about these - * inputs, so adopt a convention to use the generic - * semantic name and the mesa FRAG_ATTRIB_ number as the - * index. - * - * All that is required is that the vertex shader labels - * its own outputs similarly, and that the vertex shader - * generates at least every output required by the - * fragment shader plus fixed-function hardware (such as - * BFC). - * - * There is no requirement that semantic indexes start at - * zero or be restricted to a particular range -- nobody - * should be building tables based on semantic index. - */ - case FRAG_ATTRIB_PNTC: - case FRAG_ATTRIB_TEX0: - case FRAG_ATTRIB_TEX1: - case FRAG_ATTRIB_TEX2: - case FRAG_ATTRIB_TEX3: - case FRAG_ATTRIB_TEX4: - case FRAG_ATTRIB_TEX5: - case FRAG_ATTRIB_TEX6: - case FRAG_ATTRIB_TEX7: - case FRAG_ATTRIB_VAR0: - default: - /* Actually, let's try and zero-base this just for - * readability of the generated TGSI. - */ - assert(attr >= FRAG_ATTRIB_TEX0); - input_semantic_index[slot] = (attr - FRAG_ATTRIB_TEX0); - input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; - if (attr == FRAG_ATTRIB_PNTC) - interpMode[slot] = TGSI_INTERPOLATE_LINEAR; - else - interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; - break; - } - } - else { - inputMapping[attr] = -1; - } - } - - /* - * Semantics and mapping for outputs - */ - { - uint numColors = 0; - GLbitfield64 outputsWritten = stfp->Base.Base.OutputsWritten; - - /* if z is written, emit that first */ - if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { - fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION; - fs_output_semantic_index[fs_num_outputs] = 0; - outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs; - fs_num_outputs++; - outputsWritten &= ~(1 << FRAG_RESULT_DEPTH); - } - - if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) { - fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL; - fs_output_semantic_index[fs_num_outputs] = 0; - outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs; - fs_num_outputs++; - outputsWritten &= ~(1 << FRAG_RESULT_STENCIL); - } - - /* handle remaning outputs (color) */ - for (attr = 0; attr < FRAG_RESULT_MAX; attr++) { - if (outputsWritten & BITFIELD64_BIT(attr)) { - switch (attr) { - case FRAG_RESULT_DEPTH: - case FRAG_RESULT_STENCIL: - /* handled above */ - assert(0); - break; - case FRAG_RESULT_COLOR: - write_all = GL_TRUE; /* fallthrough */ - default: - assert(attr == FRAG_RESULT_COLOR || - (FRAG_RESULT_DATA0 <= attr && attr < FRAG_RESULT_MAX)); - fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR; - fs_output_semantic_index[fs_num_outputs] = numColors; - outputMapping[attr] = fs_num_outputs; - numColors++; - break; - } - - fs_num_outputs++; - } - } - } - ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT ); if (ureg == NULL) return NULL; @@ -606,21 +627,39 @@ st_translate_fragment_program(struct st_context *st, if (write_all == GL_TRUE) ureg_property_fs_color0_writes_all_cbufs(ureg, 1); - error = st_translate_mesa_program(st->ctx, - TGSI_PROCESSOR_FRAGMENT, - ureg, - &stfp->Base.Base, - /* inputs */ - fs_num_inputs, - inputMapping, - input_semantic_name, - input_semantic_index, - interpMode, - /* outputs */ - fs_num_outputs, - outputMapping, - fs_output_semantic_name, - fs_output_semantic_index, FALSE ); + if (stfp->glsl_to_tgsi) + error = st_translate_program(st->ctx, + TGSI_PROCESSOR_FRAGMENT, + ureg, + stfp->glsl_to_tgsi, + &stfp->Base.Base, + /* inputs */ + stfp->num_inputs, + stfp->input_to_index, + stfp->input_semantic_name, + stfp->input_semantic_index, + stfp->interp_mode, + /* outputs */ + stfp->num_outputs, + stfp->result_to_output, + stfp->output_semantic_name, + stfp->output_semantic_index, FALSE ); + else + error = st_translate_mesa_program(st->ctx, + TGSI_PROCESSOR_FRAGMENT, + ureg, + &stfp->Base.Base, + /* inputs */ + stfp->num_inputs, + stfp->input_to_index, + stfp->input_semantic_name, + stfp->input_semantic_index, + stfp->interp_mode, + /* outputs */ + stfp->num_outputs, + stfp->result_to_output, + stfp->output_semantic_name, + stfp->output_semantic_index, FALSE ); stfp->tgsi.tokens = ureg_get_tokens( ureg, NULL ); ureg_destroy( ureg ); diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index c4244df939e..67723de6d53 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -38,6 +38,7 @@ #include "program/program.h" #include "pipe/p_state.h" #include "st_context.h" +#include "st_glsl_to_tgsi.h" /** Fragment program variant key */ @@ -83,6 +84,22 @@ struct st_fp_variant struct st_fragment_program { struct gl_fragment_program Base; + struct glsl_to_tgsi_visitor* glsl_to_tgsi; + + /** maps a Mesa FRAG_ATTRIB_x to a packed TGSI input index */ + GLuint input_to_index[FRAG_ATTRIB_MAX]; + /** maps a TGSI input index back to a Mesa FRAG_ATTRIB_x */ + GLuint index_to_input[PIPE_MAX_SHADER_INPUTS]; + ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; + ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; + GLuint num_inputs; + GLuint interp_mode[PIPE_MAX_SHADER_INPUTS]; /* XXX size? */ + + /** Maps FRAG_RESULT_x to slot */ + GLuint result_to_output[FRAG_RESULT_MAX]; + ubyte output_semantic_name[FRAG_RESULT_MAX]; + ubyte output_semantic_index[FRAG_RESULT_MAX]; + GLuint num_outputs; struct pipe_shader_state tgsi; @@ -136,6 +153,7 @@ struct st_vp_variant struct st_vertex_program { struct gl_vertex_program Base; /**< The Mesa vertex program */ + struct glsl_to_tgsi_visitor* glsl_to_tgsi; /** maps a Mesa VERT_ATTRIB_x to a packed TGSI input index */ GLuint input_to_index[VERT_ATTRIB_MAX]; @@ -184,6 +202,7 @@ struct st_gp_variant struct st_geometry_program { struct gl_geometry_program Base; /**< The Mesa geometry program */ + struct glsl_to_tgsi_visitor* glsl_to_tgsi; /** map GP input back to VP output */ GLuint input_map[PIPE_MAX_SHADER_INPUTS]; @@ -276,6 +295,14 @@ st_get_gp_variant(struct st_context *st, const struct st_gp_variant_key *key); +extern void +st_prepare_vertex_program(struct gl_context *ctx, + struct st_vertex_program *stvp); + +extern GLboolean +st_prepare_fragment_program(struct gl_context *ctx, + struct st_fragment_program *stfp); + extern void st_release_vp_variants( struct st_context *st, -- cgit v1.2.3 From 1e5fd8e480b661c1ab748c2ded587650ea7f3d20 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 29 Apr 2011 19:00:24 -0500 Subject: mesa: fix segfault when no Mesa IR is generated --- src/mesa/program/program.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c index 78efca9f122..224446a2683 100644 --- a/src/mesa/program/program.c +++ b/src/mesa/program/program.c @@ -388,8 +388,9 @@ _mesa_delete_program(struct gl_context *ctx, struct gl_program *prog) if (prog->String) free(prog->String); - _mesa_free_instructions(prog->Instructions, prog->NumInstructions); - + if (prog->Instructions) { + _mesa_free_instructions(prog->Instructions, prog->NumInstructions); + } if (prog->Parameters) { _mesa_free_parameter_list(prog->Parameters); } -- cgit v1.2.3 From 44867da3543ca54ef245695cef72a6e305451d93 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 29 Apr 2011 19:24:57 -0500 Subject: glsl_to_tgsi: stop generating Mesa IR Before, it was still generating unused Mesa IR as a remnant of ir_to_mesa, and depended on some of the information from it. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 290 +++-------------------------- src/mesa/state_tracker/st_program.c | 13 +- 2 files changed, 33 insertions(+), 270 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index e1102503ee0..c562abc96c9 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -254,8 +254,9 @@ public: struct gl_shader_compiler_options *options; int next_temp; - + int num_address_regs; + int samplers_used; bool indirect_addr_temps; bool indirect_addr_consts; @@ -2310,170 +2311,23 @@ extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v) delete v; } -static struct prog_src_register -mesa_st_src_reg_from_ir_st_src_reg(st_src_reg reg) -{ - struct prog_src_register mesa_reg; - - mesa_reg.File = reg.file; - assert(reg.index < (1 << INST_INDEX_BITS)); - mesa_reg.Index = reg.index; - mesa_reg.Swizzle = reg.swizzle; - mesa_reg.RelAddr = reg.reladdr != NULL; - mesa_reg.Negate = reg.negate; - mesa_reg.Abs = 0; - mesa_reg.HasIndex2 = GL_FALSE; - mesa_reg.RelAddr2 = 0; - mesa_reg.Index2 = 0; - - return mesa_reg; -} - -static void -set_branchtargets(glsl_to_tgsi_visitor *v, - struct prog_instruction *mesa_instructions, - int num_instructions) -{ - int if_count = 0, loop_count = 0; - int *if_stack, *loop_stack; - int if_stack_pos = 0, loop_stack_pos = 0; - int i, j; - - for (i = 0; i < num_instructions; i++) { - switch (mesa_instructions[i].Opcode) { - case OPCODE_IF: - if_count++; - break; - case OPCODE_BGNLOOP: - loop_count++; - break; - case OPCODE_BRK: - case OPCODE_CONT: - mesa_instructions[i].BranchTarget = -1; - break; - default: - break; - } - } - - if_stack = rzalloc_array(v->mem_ctx, int, if_count); - loop_stack = rzalloc_array(v->mem_ctx, int, loop_count); - - for (i = 0; i < num_instructions; i++) { - switch (mesa_instructions[i].Opcode) { - case OPCODE_IF: - if_stack[if_stack_pos] = i; - if_stack_pos++; - break; - case OPCODE_ELSE: - mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; - if_stack[if_stack_pos - 1] = i; - break; - case OPCODE_ENDIF: - mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; - if_stack_pos--; - break; - case OPCODE_BGNLOOP: - loop_stack[loop_stack_pos] = i; - loop_stack_pos++; - break; - case OPCODE_ENDLOOP: - loop_stack_pos--; - /* Rewrite any breaks/conts at this nesting level (haven't - * already had a BranchTarget assigned) to point to the end - * of the loop. - */ - for (j = loop_stack[loop_stack_pos]; j < i; j++) { - if (mesa_instructions[j].Opcode == OPCODE_BRK || - mesa_instructions[j].Opcode == OPCODE_CONT) { - if (mesa_instructions[j].BranchTarget == -1) { - mesa_instructions[j].BranchTarget = i; - } - } - } - /* The loop ends point at each other. */ - mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos]; - mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i; - break; - case OPCODE_CAL: - foreach_iter(exec_list_iterator, iter, v->function_signatures) { - function_entry *entry = (function_entry *)iter.get(); - - if (entry->sig_id == mesa_instructions[i].BranchTarget) { - mesa_instructions[i].BranchTarget = entry->inst; - break; - } - } - break; - default: - break; - } - } -} - -static void -print_program(struct prog_instruction *mesa_instructions, - ir_instruction **mesa_instruction_annotation, - int num_instructions) -{ - /*ir_instruction *last_ir = NULL;*/ - int i; - int indent = 0; - - for (i = 0; i < num_instructions; i++) { - struct prog_instruction *mesa_inst = mesa_instructions + i; - - fprintf(stdout, "%3d: ", i); - -#if 0 -/* Disable this for now, since printing GLSL IR along with its corresponding - * Mesa IR makes the Mesa IR unreadable. */ - ir_instruction *ir = mesa_instruction_annotation[i]; - if (last_ir != ir && ir) { - int j; - - for (j = 0; j < indent; j++) { - fprintf(stdout, " "); - } - ir->print(); - printf("\n"); - last_ir = ir; - - fprintf(stdout, " "); /* line number spacing. */ - } -#endif - - indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent, - PROG_PRINT_DEBUG, NULL); - } -} - /** * Count resources used by the given gpu program (number of texture * samplers, etc). */ static void -count_resources(struct gl_program *prog) +count_resources(glsl_to_tgsi_visitor *v) { - unsigned int i; + v->samplers_used = 0; - prog->SamplersUsed = 0; - - for (i = 0; i < prog->NumInstructions; i++) { - struct prog_instruction *inst = &prog->Instructions[i]; + foreach_iter(exec_list_iterator, iter, v->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - if (_mesa_is_tex_instruction(inst->Opcode)) { - prog->SamplerTargets[inst->TexSrcUnit] = - (gl_texture_index)inst->TexSrcTarget; - prog->SamplersUsed |= 1 << inst->TexSrcUnit; - if (inst->TexShadow) { - prog->ShadowSamplers |= 1 << inst->TexSrcUnit; - } + if (_mesa_is_tex_instruction(inst->op)) { + v->samplers_used |= 1 << inst->sampler; } } - - _mesa_update_shader_textures_used(prog); } @@ -2487,34 +2341,35 @@ count_resources(struct gl_program *prog) static void check_resources(const struct gl_context *ctx, struct gl_shader_program *shader_program, - struct gl_program *prog) + glsl_to_tgsi_visitor *prog, + struct gl_program *proginfo) { - switch (prog->Target) { + switch (proginfo->Target) { case GL_VERTEX_PROGRAM_ARB: - if (_mesa_bitcount(prog->SamplersUsed) > + if (_mesa_bitcount(prog->samplers_used) > ctx->Const.MaxVertexTextureImageUnits) { fail_link(shader_program, "Too many vertex shader texture samplers"); } - if (prog->Parameters->NumParameters > MAX_UNIFORMS) { + if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) { fail_link(shader_program, "Too many vertex shader constants"); } break; case MESA_GEOMETRY_PROGRAM: - if (_mesa_bitcount(prog->SamplersUsed) > + if (_mesa_bitcount(prog->samplers_used) > ctx->Const.MaxGeometryTextureImageUnits) { fail_link(shader_program, "Too many geometry shader texture samplers"); } - if (prog->Parameters->NumParameters > + if (proginfo->Parameters->NumParameters > MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) { fail_link(shader_program, "Too many geometry shader constants"); } break; case GL_FRAGMENT_PROGRAM_ARB: - if (_mesa_bitcount(prog->SamplersUsed) > + if (_mesa_bitcount(prog->samplers_used) > ctx->Const.MaxTextureImageUnits) { fail_link(shader_program, "Too many fragment shader texture samplers"); } - if (prog->Parameters->NumParameters > MAX_UNIFORMS) { + if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) { fail_link(shader_program, "Too many fragment shader constants"); } break; @@ -3767,8 +3622,6 @@ st_translate_program( t->pointSizeOutIndex = -1; t->prevInstWrotePointSize = GL_FALSE; - /*_mesa_print_program(program);*/ - /* * Declare input attributes. */ @@ -3952,8 +3805,7 @@ st_translate_program( /* texture samplers */ for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { - // XXX: depends on SamplersUsed property generated by conversion to Mesa IR - if (proginfo->SamplersUsed & (1 << i)) { + if (program->samplers_used & (1 << i)) { t->samplers[i] = ureg_DECL_sampler( ureg, i ); } } @@ -4006,7 +3858,8 @@ out: /* ----------------------------- End TGSI code ------------------------------ */ /** - * Convert a shader's GLSL IR into both a Mesa gl_program and a TGSI shader. + * Convert a shader's GLSL IR into a Mesa gl_program, although without + * generating Mesa IR. */ static struct gl_program * get_mesa_program(struct gl_context *ctx, @@ -4014,9 +3867,6 @@ get_mesa_program(struct gl_context *ctx, struct gl_shader *shader) { glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor(); - struct prog_instruction *mesa_instructions, *mesa_inst; - ir_instruction **mesa_instruction_annotation; - int i; struct gl_program *prog; GLenum target; const char *target_string; @@ -4110,90 +3960,6 @@ get_mesa_program(struct gl_context *ctx, v->merge_registers(); v->renumber_registers(); - prog->NumTemporaries = v->next_temp; - - int num_instructions = 0; - foreach_iter(exec_list_iterator, iter, v->instructions) { - num_instructions++; - } - - mesa_instructions = - (struct prog_instruction *)calloc(num_instructions, - sizeof(*mesa_instructions)); - mesa_instruction_annotation = ralloc_array(v->mem_ctx, ir_instruction *, - num_instructions); - - /* Convert glsl_to_tgsi_instructions into Mesa IR prog_instructions. - * TODO: remove - */ - mesa_inst = mesa_instructions; - i = 0; - foreach_iter(exec_list_iterator, iter, v->instructions) { - const glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - - mesa_inst->Opcode = inst->op; - mesa_inst->CondUpdate = inst->cond_update; - if (inst->saturate) - mesa_inst->SaturateMode = SATURATE_ZERO_ONE; - mesa_inst->DstReg.File = inst->dst.file; - mesa_inst->DstReg.Index = inst->dst.index; - mesa_inst->DstReg.CondMask = inst->dst.cond_mask; - mesa_inst->DstReg.WriteMask = inst->dst.writemask; - mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL; - mesa_inst->SrcReg[0] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[0]); - mesa_inst->SrcReg[1] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[1]); - mesa_inst->SrcReg[2] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[2]); - mesa_inst->TexSrcUnit = inst->sampler; - mesa_inst->TexSrcTarget = inst->tex_target; - mesa_inst->TexShadow = inst->tex_shadow; - mesa_instruction_annotation[i] = inst->ir; - - /* Set IndirectRegisterFiles. */ - if (mesa_inst->DstReg.RelAddr) - prog->IndirectRegisterFiles |= 1 << mesa_inst->DstReg.File; - - /* Update program's bitmask of indirectly accessed register files */ - for (unsigned src = 0; src < 3; src++) - if (mesa_inst->SrcReg[src].RelAddr) - prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File; - - if (options->EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) { - fail_link(shader_program, "Couldn't flatten if statement\n"); - } - - switch (mesa_inst->Opcode) { - case OPCODE_BGNSUB: - inst->function->inst = i; - mesa_inst->Comment = strdup(inst->function->sig->function_name()); - break; - case OPCODE_ENDSUB: - mesa_inst->Comment = strdup(inst->function->sig->function_name()); - break; - case OPCODE_CAL: - mesa_inst->BranchTarget = inst->function->sig_id; /* rewritten later */ - break; - case OPCODE_ARL: - prog->NumAddressRegs = 1; - break; - default: - break; - } - - mesa_inst++; - i++; - - if (!shader_program->LinkStatus) - break; - } - - if (!shader_program->LinkStatus) { - free(mesa_instructions); - _mesa_reference_program(ctx, &shader->Program, NULL); - return NULL; - } - - set_branchtargets(v, mesa_instructions, num_instructions); - if (ctx->Shader.Flags & GLSL_DUMP) { printf("\n"); printf("GLSL IR for linked %s program %d:\n", target_string, @@ -4201,25 +3967,17 @@ get_mesa_program(struct gl_context *ctx, _mesa_print_ir(shader->ir, NULL); printf("\n"); printf("\n"); - printf("Mesa IR for linked %s program %d:\n", target_string, - shader_program->Name); - print_program(mesa_instructions, mesa_instruction_annotation, - num_instructions); } - prog->Instructions = mesa_instructions; - prog->NumInstructions = num_instructions; + prog->Instructions = NULL; + prog->NumInstructions = 0; do_set_program_inouts(shader->ir, prog); - count_resources(prog); + count_resources(v); - check_resources(ctx, shader_program, prog); + check_resources(ctx, shader_program, v, prog); _mesa_reference_program(ctx, &shader->Program, prog); - - if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) { - _mesa_optimize_program(ctx, prog); - } struct st_vertex_program *stvp; struct st_fragment_program *stfp; diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index dd618424d66..6d395128295 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -186,7 +186,8 @@ st_prepare_vertex_program(struct gl_context *ctx, if (stvp->Base.IsPositionInvariant) _mesa_insert_mvp_code(ctx, &stvp->Base); - assert(stvp->Base.Base.NumInstructions > 1); + if (!stvp->glsl_to_tgsi) + assert(stvp->Base.Base.NumInstructions > 1); /* * Determine number of inputs, the mappings between VERT_ATTRIB_x @@ -294,8 +295,11 @@ st_translate_vertex_program(struct st_context *st, st_prepare_vertex_program(st->ctx, stvp); - _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT); - _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_VARYING); + if (!stvp->glsl_to_tgsi) + { + _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT); + _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_VARYING); + } ureg = ureg_create( TGSI_PROCESSOR_VERTEX ); if (ureg == NULL) { @@ -613,7 +617,8 @@ st_translate_fragment_program(struct st_context *st, struct ureg_program *ureg; GLboolean write_all = st_prepare_fragment_program(st->ctx, stfp); - _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT); + if (!stfp->glsl_to_tgsi) + _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT); ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT ); if (ureg == NULL) -- cgit v1.2.3 From c341d3cfd0ddbabf6274212b7f0da1a25854a673 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Sat, 30 Apr 2011 13:03:33 -0500 Subject: glsl_to_tgsi: remove reads to output registers Fixes a regression in 0 A.D. introduced by 809a11c77073e999fd47. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 91 ++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index c562abc96c9..5ea03b4424e 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -100,6 +100,15 @@ public: this->reladdr = NULL; } + st_src_reg(gl_register_file file, int index) + { + this->file = file; + this->index = index; + this->swizzle = SWIZZLE_XYZW; + this->negate = 0; + this->reladdr = NULL; + } + st_src_reg() { this->file = PROGRAM_UNDEFINED; @@ -346,6 +355,8 @@ public: bool process_move_condition(ir_rvalue *ir); + void remove_output_reads(gl_register_file type); + void rename_temp_register(int index, int new_index); int get_first_temp_read(int index); int get_first_temp_write(int index); @@ -2595,6 +2606,81 @@ set_uniform_initializers(struct gl_context *ctx, ralloc_free(mem_ctx); } +/* + * Scan/rewrite program to remove reads of custom (output) registers. + * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING + * (for vertex shaders). + * In GLSL shaders, varying vars can be read and written. + * On some hardware, trying to read an output register causes trouble. + * So, rewrite the program to use a temporary register in this case. + * + * Based on _mesa_remove_output_reads from programopt.c. + */ +void +glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) +{ + GLuint i; + GLint outputMap[VERT_RESULT_MAX]; + GLuint numVaryingReads = 0; + GLboolean usedTemps[MAX_PROGRAM_TEMPS]; + GLuint firstTemp = 0; + + _mesa_find_used_registers(prog, PROGRAM_TEMPORARY, + usedTemps, MAX_PROGRAM_TEMPS); + + assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT); + assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING); + + for (i = 0; i < VERT_RESULT_MAX; i++) + outputMap[i] = -1; + + /* look for instructions which read from varying vars */ + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + const GLuint numSrc = _mesa_num_inst_src_regs(inst->op); + GLuint j; + for (j = 0; j < numSrc; j++) { + if (inst->src[j].file == type) { + /* replace the read with a temp reg */ + const GLuint var = inst->src[j].index; + if (outputMap[var] == -1) { + numVaryingReads++; + outputMap[var] = _mesa_find_free_register(usedTemps, + MAX_PROGRAM_TEMPS, + firstTemp); + firstTemp = outputMap[var] + 1; + } + inst->src[j].file = PROGRAM_TEMPORARY; + inst->src[j].index = outputMap[var]; + } + } + } + + if (numVaryingReads == 0) + return; /* nothing to be done */ + + /* look for instructions which write to the varying vars identified above */ + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + if (inst->dst.file == type && outputMap[inst->dst.index] >= 0) { + /* change inst to write to the temp reg, instead of the varying */ + inst->dst.file = PROGRAM_TEMPORARY; + inst->dst.index = outputMap[inst->dst.index]; + } + } + + /* insert new MOV instructions at the end */ + for (i = 0; i < VERT_RESULT_MAX; i++) { + if (outputMap[i] >= 0) { + /* MOV VAR[i], TEMP[tmp]; */ + st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i]); + st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW); + dst.index = i; + this->emit(NULL, OPCODE_MOV, dst, src); + } + } +} + /* Replaces all references to a temporary register index with another index. */ void glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) @@ -3954,6 +4040,11 @@ get_mesa_program(struct gl_context *ctx, } #endif + /* Remove reads to output registers, and to varyings in vertex shaders. */ + v->remove_output_reads(PROGRAM_OUTPUT); + if (target == GL_VERTEX_PROGRAM_ARB) + v->remove_output_reads(PROGRAM_VARYING); + /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ v->copy_propagate(); v->eliminate_dead_code(); -- cgit v1.2.3 From 556bd82ce1227a568d69dfa0c22841986267d39f Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Sat, 30 Apr 2011 13:44:32 -0500 Subject: glsl_to_tgsi: remove a bad assertion It was triggered by Alien Arena. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 5ea03b4424e..aa63539e5e8 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3077,11 +3077,11 @@ glsl_to_tgsi_visitor::merge_registers(void) /* Start looking for registers with non-overlapping usages that can be * merged together. */ - for (i=0; i < this->next_temp - 1; i++) { + for (i=0; i < this->next_temp; i++) { /* Don't touch unused registers. */ if (last_reads[i] < 0 || first_writes[i] < 0) continue; - for (j=i+1; j < this->next_temp; j++) { + for (j=0; j < this->next_temp; j++) { /* Don't touch unused registers. */ if (last_reads[j] < 0 || first_writes[j] < 0) continue; @@ -3089,8 +3089,9 @@ glsl_to_tgsi_visitor::merge_registers(void) * in the same instruction as the last read from i. Note that the * register at index i will always be used earlier or at the same time * as the register at index j. */ - assert(first_writes[i] <= first_writes[j]); - if (last_reads[i] <= first_writes[j]) { + if (first_writes[i] <= first_writes[j] && + last_reads[i] <= first_writes[j]) + { rename_temp_register(j, i); /* Replace all references to j with i.*/ /* Update the first_writes and last_reads arrays with the new -- cgit v1.2.3 From 5768ed6429937940bd48f5de4f8383273952880a Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Sat, 30 Apr 2011 21:17:38 -0500 Subject: glsl_to_tgsi: define the sampler objects used Fixes the Nexuiz title screen and the water in 0 A.D. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index aa63539e5e8..5f3f0ba295a 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -2328,7 +2328,7 @@ extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v) * samplers, etc). */ static void -count_resources(glsl_to_tgsi_visitor *v) +count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) { v->samplers_used = 0; @@ -2337,8 +2337,17 @@ count_resources(glsl_to_tgsi_visitor *v) if (_mesa_is_tex_instruction(inst->op)) { v->samplers_used |= 1 << inst->sampler; + + prog->SamplerTargets[inst->sampler] = + (gl_texture_index)inst->tex_target; + if (inst->tex_shadow) { + prog->ShadowSamplers |= 1 << inst->sampler; + } } } + + prog->SamplersUsed = v->samplers_used; + _mesa_update_shader_textures_used(prog); } @@ -4065,7 +4074,7 @@ get_mesa_program(struct gl_context *ctx, prog->NumInstructions = 0; do_set_program_inouts(shader->ir, prog); - count_resources(v); + count_resources(v, prog); check_resources(ctx, shader_program, v, prog); -- cgit v1.2.3 From a6705aa5ca151278ed1e596b68a327afd1405b9e Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Sat, 30 Apr 2011 23:17:11 -0500 Subject: glsl_to_tgsi: lower noise opcodes when converting from GLSL IR, not when generating TGSI --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 5f3f0ba295a..08c6a7b2dd3 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1275,12 +1275,13 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; case ir_unop_noise: { - const enum prog_opcode opcode = - prog_opcode(OPCODE_NOISE1 - + (ir->operands[0]->type->vector_elements) - 1); - assert((opcode >= OPCODE_NOISE1) && (opcode <= OPCODE_NOISE4)); - - emit(ir, opcode, result_dst, op[0]); + /* At some point, a motivated person could add a better + * implementation of noise. Currently not even the nvidia + * binary drivers do anything more than this. In any case, the + * place to do this is in the GL state tracker, not the poor + * driver. + */ + emit(ir, OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); break; } @@ -3484,13 +3485,7 @@ compile_tgsi_instruction(struct st_translate *t, case OPCODE_NOISE2: case OPCODE_NOISE3: case OPCODE_NOISE4: - /* At some point, a motivated person could add a better - * implementation of noise. Currently not even the nvidia - * binary drivers do anything more than this. In any case, the - * place to do this is in the GL state tracker, not the poor - * driver. - */ - ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) ); + assert(!"OPCODE_NOISE should have been lowered\n"); break; case OPCODE_DDY: -- cgit v1.2.3 From 3b0858f1aed83e2d90449f042d625c86ac7b93ed Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Sun, 1 May 2011 11:55:03 -0500 Subject: glsl_to_tgsi: support DDY (ir_unop_dFdy) --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 08c6a7b2dd3..eed9bb0819e 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1271,6 +1271,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) emit(ir, OPCODE_DDX, result_dst, op[0]); break; case ir_unop_dFdy: + op[0].negate = ~op[0].negate; emit(ir, OPCODE_DDY, result_dst, op[0]); break; @@ -3487,12 +3488,6 @@ compile_tgsi_instruction(struct st_translate *t, case OPCODE_NOISE4: assert(!"OPCODE_NOISE should have been lowered\n"); break; - - case OPCODE_DDY: - // TODO: copy emit_ddy() function from st_mesa_to_tgsi.c - assert(!"OPCODE_DDY"); - //emit_ddy( t, dst[0], &inst->src[0] ); - break; default: ureg_insn( ureg, -- cgit v1.2.3 From 56dc2c176c3ef0d4d5abea54ff4035b062262286 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Sun, 1 May 2011 21:49:21 -0500 Subject: glsl_to_tgsi: use TGSI opcodes when converting from GLSL IR Before, the translator used Mesa IR opcodes (a holdover from ir_to_mesa) and converted them to TGSI opcodes during TGSI emission. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 550 ++++++++++++----------------- 1 file changed, 217 insertions(+), 333 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index eed9bb0819e..4cb2f377e98 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -27,7 +27,7 @@ /** * \file glsl_to_tgsi.cpp * - * Translate GLSL IR to Mesa's gl_program representation and to TGSI. + * Translate GLSL IR to TGSI. */ #include @@ -63,11 +63,12 @@ extern "C" { #include "pipe/p_state.h" #include "util/u_math.h" #include "tgsi/tgsi_ureg.h" -#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_info.h" #include "st_context.h" #include "st_program.h" #include "st_glsl_to_tgsi.h" #include "st_mesa_to_tgsi.h" +} #define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ (1 << PROGRAM_ENV_PARAM) | \ @@ -75,7 +76,6 @@ extern "C" { (1 << PROGRAM_NAMED_PARAM) | \ (1 << PROGRAM_CONSTANT) | \ (1 << PROGRAM_UNIFORM)) -} class st_src_reg; class st_dst_reg; @@ -83,8 +83,7 @@ class st_dst_reg; static int swizzle_for_size(int size); /** - * This struct is a corresponding struct to Mesa prog_src_register, with - * wider fields. + * This struct is a corresponding struct to TGSI ureg_src. */ class st_src_reg { public: @@ -190,7 +189,7 @@ public: return node; } - enum prog_opcode op; + unsigned op; st_dst_reg dst; st_src_reg src[3]; /** Pointer to the ir source this tree came from for debugging */ @@ -201,7 +200,7 @@ public: int tex_target; /**< One of TEXTURE_*_INDEX */ GLboolean tex_shadow; - class function_entry *function; /* Set on OPCODE_CAL or OPCODE_BGNSUB */ + class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */ }; class variable_storage : public exec_node { @@ -317,15 +316,15 @@ public: /** List of glsl_to_tgsi_instruction */ exec_list instructions; - glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op); + glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op); - glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op, + glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0); - glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op, + glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1); - glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op, + glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1, st_src_reg src2); @@ -338,13 +337,13 @@ public: st_src_reg src1, unsigned elements); - void emit_scalar(ir_instruction *ir, enum prog_opcode op, + void emit_scalar(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0); - void emit_scalar(ir_instruction *ir, enum prog_opcode op, + void emit_scalar(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1); - void emit_scs(ir_instruction *ir, enum prog_opcode op, + void emit_scs(ir_instruction *ir, unsigned op, st_dst_reg dst, const st_src_reg &src); GLboolean try_emit_mad(ir_expression *ir, @@ -405,8 +404,29 @@ swizzle_for_size(int size) return size_swizzles[size - 1]; } +static bool +is_tex_instruction(unsigned opcode) +{ + const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); + return info->is_tex; +} + +static unsigned +num_inst_dst_regs(unsigned opcode) +{ + const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); + return info->num_dst; +} + +static unsigned +num_inst_src_regs(unsigned opcode) +{ + const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); + return info->is_tex ? info->num_src - 1 : info->num_src; +} + glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, +glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1, st_src_reg src2) { @@ -427,7 +447,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, reladdr_to_temp(ir, &src0, &num_reladdr); if (dst.reladdr) { - emit(ir, OPCODE_ARL, address_reg, *dst.reladdr); + emit(ir, TGSI_OPCODE_ARL, address_reg, *dst.reladdr); num_reladdr--; } assert(num_reladdr == 0); @@ -441,7 +461,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, inst->function = NULL; - if (op == OPCODE_ARL) + if (op == TGSI_OPCODE_ARL) this->num_address_regs = 1; /* Update indirect addressing status used by TGSI */ @@ -491,14 +511,14 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, +glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1) { return emit(ir, op, dst, src0, src1, undef_src); } glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, +glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0) { assert(dst.writemask != 0); @@ -506,7 +526,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op, } glsl_to_tgsi_instruction * -glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op) +glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op) { return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); } @@ -516,30 +536,30 @@ glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, st_dst_reg dst, st_src_reg src0, st_src_reg src1, unsigned elements) { - static const gl_inst_opcode dot_opcodes[] = { - OPCODE_DP2, OPCODE_DP3, OPCODE_DP4 + static const unsigned dot_opcodes[] = { + TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4 }; emit(ir, dot_opcodes[elements - 2], dst, src0, src1); } /** - * Emits Mesa scalar opcodes to produce unique answers across channels. + * Emits TGSI scalar opcodes to produce unique answers across channels. * - * Some Mesa opcodes are scalar-only, like ARB_fp/vp. The src X + * Some TGSI opcodes are scalar-only, like ARB_fp/vp. The src X * channel determines the result across all channels. So to do a vec4 * of this operation, we want to emit a scalar per source channel used * to produce dest channels. */ void -glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, +glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg orig_src0, st_src_reg orig_src1) { int i, j; int done_mask = ~dst.writemask; - /* Mesa RCP is a scalar operation splatting results to all channels, + /* TGSI RCP is a scalar operation splatting results to all channels, * like ARB_fp/vp. So emit as many RCPs as necessary to cover our * dst channels. */ @@ -577,7 +597,7 @@ glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, } void -glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, +glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0) { st_src_reg undef = undef_src; @@ -588,21 +608,21 @@ glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, } /** - * Emit an OPCODE_SCS instruction + * Emit an TGSI_OPCODE_SCS instruction * - * The \c SCS opcode functions a bit differently than the other Mesa (or - * ARB_fragment_program) opcodes. Instead of splatting its result across all - * four components of the destination, it writes one value to the \c x - * component and another value to the \c y component. + * The \c SCS opcode functions a bit differently than the other TGSI opcodes. + * Instead of splatting its result across all four components of the + * destination, it writes one value to the \c x component and another value to + * the \c y component. * * \param ir IR instruction being processed - * \param op Either \c OPCODE_SIN or \c OPCODE_COS depending on which - * value is desired. + * \param op Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending + * on which value is desired. * \param dst Destination register * \param src Source register */ void -glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op, +glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, st_dst_reg dst, const st_src_reg &src) { @@ -613,12 +633,12 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op, return; } - const unsigned component = (op == OPCODE_SIN) ? 0 : 1; + const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1; const unsigned scs_mask = (1U << component); int done_mask = ~dst.writemask; st_src_reg tmp; - assert(op == OPCODE_SIN || op == OPCODE_COS); + assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS); /* If there are compnents in the destination that differ from the component * that will be written by the SCS instrution, we'll need a temporary. @@ -661,7 +681,7 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op, /* Emit the SCS instruction. */ - inst = emit(ir, OPCODE_SCS, tmp_dst, src0); + inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0); inst->dst.writemask = scs_mask; /* Move the result of the SCS instruction to the desired location in @@ -669,12 +689,12 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op, */ tmp.swizzle = MAKE_SWIZZLE4(component, component, component, component); - inst = emit(ir, OPCODE_SCS, dst, tmp); + inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp); inst->dst.writemask = this_mask; } else { /* Emit the SCS instruction to write directly to the destination. */ - glsl_to_tgsi_instruction *inst = emit(ir, OPCODE_SCS, dst, src0); + glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0); inst->dst.writemask = scs_mask; } @@ -870,7 +890,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) } else { st_src_reg src(PROGRAM_STATE_VAR, index, NULL); src.swizzle = slots[i].swizzle; - emit(ir, OPCODE_MOV, dst, src); + emit(ir, TGSI_OPCODE_MOV, dst, src); /* even a float takes up a whole vec4 reg in a struct/array. */ dst.index++; } @@ -903,7 +923,7 @@ glsl_to_tgsi_visitor::visit(ir_loop *ir) delete a; } - emit(NULL, OPCODE_BGNLOOP); + emit(NULL, TGSI_OPCODE_BGNLOOP); if (ir->to) { ir_expression *e = @@ -936,7 +956,7 @@ glsl_to_tgsi_visitor::visit(ir_loop *ir) delete e; } - emit(NULL, OPCODE_ENDLOOP); + emit(NULL, TGSI_OPCODE_ENDLOOP); } void @@ -944,10 +964,10 @@ glsl_to_tgsi_visitor::visit(ir_loop_jump *ir) { switch (ir->mode) { case ir_loop_jump::jump_break: - emit(NULL, OPCODE_BRK); + emit(NULL, TGSI_OPCODE_BRK); break; case ir_loop_jump::jump_continue: - emit(NULL, OPCODE_CONT); + emit(NULL, TGSI_OPCODE_CONT); break; } } @@ -1000,7 +1020,7 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) c = this->result; this->result = get_temp(ir->type); - emit(ir, OPCODE_MAD, st_dst_reg(this->result), a, b, c); + emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, c); return true; } @@ -1023,7 +1043,7 @@ glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) this->result = get_temp(ir->type); glsl_to_tgsi_instruction *inst; - inst = emit(ir, OPCODE_MOV, st_dst_reg(this->result), src); + inst = emit(ir, TGSI_OPCODE_MOV, st_dst_reg(this->result), src); inst->saturate = true; return true; @@ -1036,135 +1056,18 @@ glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, if (!reg->reladdr) return; - emit(ir, OPCODE_ARL, address_reg, *reg->reladdr); + emit(ir, TGSI_OPCODE_ARL, address_reg, *reg->reladdr); if (*num_reladdr != 1) { st_src_reg temp = get_temp(glsl_type::vec4_type); - emit(ir, OPCODE_MOV, st_dst_reg(temp), *reg); + emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg); *reg = temp; } (*num_reladdr)--; } -void -glsl_to_tgsi_visitor::emit_swz(ir_expression *ir) -{ - /* Assume that the vector operator is in a form compatible with OPCODE_SWZ. - * This means that each of the operands is either an immediate value of -1, - * 0, or 1, or is a component from one source register (possibly with - * negation). - */ - uint8_t components[4] = { 0 }; - bool negate[4] = { false }; - ir_variable *var = NULL; - - for (unsigned i = 0; i < ir->type->vector_elements; i++) { - ir_rvalue *op = ir->operands[i]; - - assert(op->type->is_scalar()); - - while (op != NULL) { - switch (op->ir_type) { - case ir_type_constant: { - - assert(op->type->is_scalar()); - - const ir_constant *const c = op->as_constant(); - if (c->is_one()) { - components[i] = SWIZZLE_ONE; - } else if (c->is_zero()) { - components[i] = SWIZZLE_ZERO; - } else if (c->is_negative_one()) { - components[i] = SWIZZLE_ONE; - negate[i] = true; - } else { - assert(!"SWZ constant must be 0.0 or 1.0."); - } - - op = NULL; - break; - } - - case ir_type_dereference_variable: { - ir_dereference_variable *const deref = - (ir_dereference_variable *) op; - - assert((var == NULL) || (deref->var == var)); - components[i] = SWIZZLE_X; - var = deref->var; - op = NULL; - break; - } - - case ir_type_expression: { - ir_expression *const expr = (ir_expression *) op; - - assert(expr->operation == ir_unop_neg); - negate[i] = true; - - op = expr->operands[0]; - break; - } - - case ir_type_swizzle: { - ir_swizzle *const swiz = (ir_swizzle *) op; - - components[i] = swiz->mask.x; - op = swiz->val; - break; - } - - default: - assert(!"Should not get here."); - return; - } - } - } - - assert(var != NULL); - - ir_dereference_variable *const deref = - new(mem_ctx) ir_dereference_variable(var); - - this->result.file = PROGRAM_UNDEFINED; - deref->accept(this); - if (this->result.file == PROGRAM_UNDEFINED) { - ir_print_visitor v; - printf("Failed to get tree for expression operand:\n"); - deref->accept(&v); - exit(1); - } - - st_src_reg src; - - src = this->result; - src.swizzle = MAKE_SWIZZLE4(components[0], - components[1], - components[2], - components[3]); - src.negate = ((unsigned(negate[0]) << 0) - | (unsigned(negate[1]) << 1) - | (unsigned(negate[2]) << 2) - | (unsigned(negate[3]) << 3)); - - /* Storage for our result. Ideally for an assignment we'd be using the - * actual storage for the result here, instead. - */ - const st_src_reg result_src = get_temp(ir->type); - st_dst_reg result_dst = st_dst_reg(result_src); - - /* Limit writes to the channels that will be used by result_src later. - * This does limit this temp's use as a temporary for multi-instruction - * sequences. - */ - result_dst.writemask = (1 << ir->type->vector_elements) - 1; - - emit(ir, OPCODE_SWZ, result_dst, src); - this->result = result_src; -} - void glsl_to_tgsi_visitor::visit(ir_expression *ir) { @@ -1173,7 +1076,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) st_src_reg result_src; st_dst_reg result_dst; - /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c) + /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c) */ if (ir->operation == ir_binop_add) { if (try_emit_mad(ir, 1)) @@ -1184,10 +1087,8 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) if (try_emit_sat(ir)) return; - if (ir->operation == ir_quadop_vector) { - this->emit_swz(ir); - return; - } + if (ir->operation == ir_quadop_vector) + assert(!"ir_quadop_vector should have been lowered"); for (operand = 0; operand < ir->get_num_operands(); operand++) { this->result.file = PROGRAM_UNDEFINED; @@ -1228,51 +1129,51 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) switch (ir->operation) { case ir_unop_logic_not: - emit(ir, OPCODE_SEQ, result_dst, op[0], st_src_reg_for_float(0.0)); + emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_float(0.0)); break; case ir_unop_neg: op[0].negate = ~op[0].negate; result_src = op[0]; break; case ir_unop_abs: - emit(ir, OPCODE_ABS, result_dst, op[0]); + emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]); break; case ir_unop_sign: - emit(ir, OPCODE_SSG, result_dst, op[0]); + emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]); break; case ir_unop_rcp: - emit_scalar(ir, OPCODE_RCP, result_dst, op[0]); + emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]); break; case ir_unop_exp2: - emit_scalar(ir, OPCODE_EX2, result_dst, op[0]); + emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]); break; case ir_unop_exp: case ir_unop_log: assert(!"not reached: should be handled by ir_explog_to_explog2"); break; case ir_unop_log2: - emit_scalar(ir, OPCODE_LG2, result_dst, op[0]); + emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]); break; case ir_unop_sin: - emit_scalar(ir, OPCODE_SIN, result_dst, op[0]); + emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]); break; case ir_unop_cos: - emit_scalar(ir, OPCODE_COS, result_dst, op[0]); + emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]); break; case ir_unop_sin_reduced: - emit_scs(ir, OPCODE_SIN, result_dst, op[0]); + emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]); break; case ir_unop_cos_reduced: - emit_scs(ir, OPCODE_COS, result_dst, op[0]); + emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]); break; case ir_unop_dFdx: - emit(ir, OPCODE_DDX, result_dst, op[0]); + emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]); break; case ir_unop_dFdy: op[0].negate = ~op[0].negate; - emit(ir, OPCODE_DDY, result_dst, op[0]); + emit(ir, TGSI_OPCODE_DDY, result_dst, op[0]); break; case ir_unop_noise: { @@ -1282,19 +1183,19 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) * place to do this is in the GL state tracker, not the poor * driver. */ - emit(ir, OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); + emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); break; } case ir_binop_add: - emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); break; case ir_binop_sub: - emit(ir, OPCODE_SUB, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]); break; case ir_binop_mul: - emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); break; case ir_binop_div: assert(!"not reached: should be handled by ir_div_to_mul_rcp"); @@ -1303,33 +1204,33 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; case ir_binop_less: - emit(ir, OPCODE_SLT, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]); break; case ir_binop_greater: - emit(ir, OPCODE_SGT, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SGT, result_dst, op[0], op[1]); break; case ir_binop_lequal: - emit(ir, OPCODE_SLE, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SLE, result_dst, op[0], op[1]); break; case ir_binop_gequal: - emit(ir, OPCODE_SGE, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); break; case ir_binop_equal: - emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); break; case ir_binop_nequal: - emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); break; case ir_binop_all_equal: /* "==" operator producing a scalar boolean. */ if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { st_src_reg temp = get_temp(glsl_type::vec4_type); - emit(ir, OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); + emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); emit_dp(ir, result_dst, temp, temp, vector_elements); - emit(ir, OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0)); + emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0)); } else { - emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); } break; case ir_binop_any_nequal: @@ -1337,11 +1238,11 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { st_src_reg temp = get_temp(glsl_type::vec4_type); - emit(ir, OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); + emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); emit_dp(ir, result_dst, temp, temp, vector_elements); - emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); + emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); } else { - emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); } break; @@ -1349,22 +1250,22 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) assert(ir->operands[0]->type->is_vector()); emit_dp(ir, result_dst, op[0], op[0], ir->operands[0]->type->vector_elements); - emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); + emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); break; case ir_binop_logic_xor: - emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); break; case ir_binop_logic_or: /* This could be a saturated add and skip the SNE. */ - emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); - emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); + emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); break; case ir_binop_logic_and: /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ - emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); break; case ir_binop_dot: @@ -1376,15 +1277,15 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) case ir_unop_sqrt: /* sqrt(x) = x * rsq(x). */ - emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); - emit(ir, OPCODE_MUL, result_dst, result_src, op[0]); + emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); + emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]); /* For incoming channels <= 0, set the result to 0. */ op[0].negate = ~op[0].negate; - emit(ir, OPCODE_CMP, result_dst, + emit(ir, TGSI_OPCODE_CMP, result_dst, op[0], result_src, st_src_reg_for_float(0.0)); break; case ir_unop_rsq: - emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); + emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); break; case ir_unop_i2f: case ir_unop_b2f: @@ -1393,36 +1294,36 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) result_src = op[0]; break; case ir_unop_f2i: - emit(ir, OPCODE_TRUNC, result_dst, op[0]); + emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); break; case ir_unop_f2b: case ir_unop_i2b: - emit(ir, OPCODE_SNE, result_dst, + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); break; case ir_unop_trunc: - emit(ir, OPCODE_TRUNC, result_dst, op[0]); + emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); break; case ir_unop_ceil: op[0].negate = ~op[0].negate; - emit(ir, OPCODE_FLR, result_dst, op[0]); + emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); result_src.negate = ~result_src.negate; break; case ir_unop_floor: - emit(ir, OPCODE_FLR, result_dst, op[0]); + emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); break; case ir_unop_fract: - emit(ir, OPCODE_FRC, result_dst, op[0]); + emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]); break; case ir_binop_min: - emit(ir, OPCODE_MIN, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]); break; case ir_binop_max: - emit(ir, OPCODE_MAX, result_dst, op[0], op[1]); + emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]); break; case ir_binop_pow: - emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]); + emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]); break; case ir_unop_bit_not: @@ -1586,7 +1487,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) } else { index_reg = get_temp(glsl_type::float_type); - emit(ir, OPCODE_MUL, st_dst_reg(index_reg), + emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg), this->result, st_src_reg_for_float(element_size)); } @@ -1728,9 +1629,9 @@ glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir) src_ir->accept(this); - /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the + /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the * condition we produced is 0.0 or 1.0. By flipping the sign, we can - * choose which value OPCODE_CMP produces without an extra instruction + * choose which value TGSI_OPCODE_CMP produces without an extra instruction * computing the condition. */ if (negate) @@ -1803,9 +1704,9 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) for (i = 0; i < type_size(ir->lhs->type); i++) { if (switch_order) { - emit(ir, OPCODE_CMP, l, condition, st_src_reg(l), r); + emit(ir, TGSI_OPCODE_CMP, l, condition, st_src_reg(l), r); } else { - emit(ir, OPCODE_CMP, l, condition, r, st_src_reg(l)); + emit(ir, TGSI_OPCODE_CMP, l, condition, r, st_src_reg(l)); } l.index++; @@ -1813,7 +1714,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) } } else { for (i = 0; i < type_size(ir->lhs->type); i++) { - emit(ir, OPCODE_MOV, l, r); + emit(ir, TGSI_OPCODE_MOV, l, r); l.index++; r.index++; } @@ -1849,7 +1750,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) src = this->result; for (i = 0; i < (unsigned int)size; i++) { - emit(ir, OPCODE_MOV, temp, src); + emit(ir, TGSI_OPCODE_MOV, temp, src); src.index++; temp.index++; @@ -1870,7 +1771,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) ir->array_elements[i]->accept(this); src = this->result; for (int j = 0; j < size; j++) { - emit(ir, OPCODE_MOV, temp, src); + emit(ir, TGSI_OPCODE_MOV, temp, src); src.index++; temp.index++; @@ -1893,7 +1794,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) values, ir->type->vector_elements, &src.swizzle); - emit(ir, OPCODE_MOV, mat_column, src); + emit(ir, TGSI_OPCODE_MOV, mat_column, src); mat_column.index++; } @@ -2005,7 +1906,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) l.cond_mask = COND_TR; for (i = 0; i < type_size(param->type); i++) { - emit(ir, OPCODE_MOV, l, r); + emit(ir, TGSI_OPCODE_MOV, l, r); l.index++; r.index++; } @@ -2016,7 +1917,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) assert(!sig_iter.has_next()); /* Emit call instruction */ - call_inst = emit(ir, OPCODE_CAL); + call_inst = emit(ir, TGSI_OPCODE_CAL); call_inst->function = entry; /* Process out parameters. */ @@ -2041,7 +1942,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) st_dst_reg l = st_dst_reg(this->result); for (i = 0; i < type_size(param->type); i++) { - emit(ir, OPCODE_MOV, l, r); + emit(ir, TGSI_OPCODE_MOV, l, r); l.index++; r.index++; } @@ -2061,7 +1962,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) st_src_reg result_src, coord, lod_info, projector, dx, dy; st_dst_reg result_dst, coord_dst; glsl_to_tgsi_instruction *inst = NULL; - prog_opcode opcode = OPCODE_NOP; + unsigned opcode = TGSI_OPCODE_NOP; ir->coordinate->accept(this); @@ -2072,7 +1973,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) */ coord = get_temp(glsl_type::vec4_type); coord_dst = st_dst_reg(coord); - emit(ir, OPCODE_MOV, coord_dst, this->result); + emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); if (ir->projector) { ir->projector->accept(this); @@ -2087,20 +1988,20 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) switch (ir->op) { case ir_tex: - opcode = OPCODE_TEX; + opcode = TGSI_OPCODE_TEX; break; case ir_txb: - opcode = OPCODE_TXB; + opcode = TGSI_OPCODE_TXB; ir->lod_info.bias->accept(this); lod_info = this->result; break; case ir_txl: - opcode = OPCODE_TXL; + opcode = TGSI_OPCODE_TXL; ir->lod_info.lod->accept(this); lod_info = this->result; break; case ir_txd: - opcode = OPCODE_TXD; + opcode = TGSI_OPCODE_TXD; ir->lod_info.grad.dPdx->accept(this); dx = this->result; ir->lod_info.grad.dPdy->accept(this); @@ -2112,25 +2013,25 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) } if (ir->projector) { - if (opcode == OPCODE_TEX) { + if (opcode == TGSI_OPCODE_TEX) { /* Slot the projector in as the last component of the coord. */ coord_dst.writemask = WRITEMASK_W; - emit(ir, OPCODE_MOV, coord_dst, projector); + emit(ir, TGSI_OPCODE_MOV, coord_dst, projector); coord_dst.writemask = WRITEMASK_XYZW; - opcode = OPCODE_TXP; + opcode = TGSI_OPCODE_TXP; } else { st_src_reg coord_w = coord; coord_w.swizzle = SWIZZLE_WWWW; /* For the other TEX opcodes there's no projective version - * since the last slot is taken up by lod info. Do the + * since the last slot is taken up by LOD info. Do the * projective divide now. */ coord_dst.writemask = WRITEMASK_W; - emit(ir, OPCODE_RCP, coord_dst, projector); + emit(ir, TGSI_OPCODE_RCP, coord_dst, projector); /* In the case where we have to project the coordinates "by hand," - * the shadow comparitor value must also be projected. + * the shadow comparator value must also be projected. */ st_src_reg tmp_src = coord; if (ir->shadow_comparitor) { @@ -2143,42 +2044,42 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) st_dst_reg tmp_dst = st_dst_reg(tmp_src); tmp_dst.writemask = WRITEMASK_Z; - emit(ir, OPCODE_MOV, tmp_dst, this->result); + emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result); tmp_dst.writemask = WRITEMASK_XY; - emit(ir, OPCODE_MOV, tmp_dst, coord); + emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord); } coord_dst.writemask = WRITEMASK_XYZ; - emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w); + emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w); coord_dst.writemask = WRITEMASK_XYZW; coord.swizzle = SWIZZLE_XYZW; } } - /* If projection is done and the opcode is not OPCODE_TXP, then the shadow - * comparitor was put in the correct place (and projected) by the code, + /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow + * comparator was put in the correct place (and projected) by the code, * above, that handles by-hand projection. */ - if (ir->shadow_comparitor && (!ir->projector || opcode == OPCODE_TXP)) { + if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) { /* Slot the shadow value in as the second to last component of the * coord. */ ir->shadow_comparitor->accept(this); coord_dst.writemask = WRITEMASK_Z; - emit(ir, OPCODE_MOV, coord_dst, this->result); + emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); coord_dst.writemask = WRITEMASK_XYZW; } - if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) { - /* Mesa IR stores lod or lod bias in the last channel of the coords. */ + if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB) { + /* TGSI stores LOD or LOD bias in the last channel of the coords. */ coord_dst.writemask = WRITEMASK_W; - emit(ir, OPCODE_MOV, coord_dst, lod_info); + emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info); coord_dst.writemask = WRITEMASK_XYZW; } - if (opcode == OPCODE_TXD) + if (opcode == TGSI_OPCODE_TXD) inst = emit(ir, opcode, result_dst, coord, dx, dy); else inst = emit(ir, opcode, result_dst, coord); @@ -2235,13 +2136,13 @@ glsl_to_tgsi_visitor::visit(ir_return *ir) l = st_dst_reg(current_function->return_reg); for (i = 0; i < type_size(current_function->sig->return_type); i++) { - emit(ir, OPCODE_MOV, l, r); + emit(ir, TGSI_OPCODE_MOV, l, r); l.index++; r.index++; } } - emit(ir, OPCODE_RET); + emit(ir, TGSI_OPCODE_RET); } void @@ -2252,9 +2153,9 @@ glsl_to_tgsi_visitor::visit(ir_discard *ir) if (ir->condition) { ir->condition->accept(this); this->result.negate = ~this->result.negate; - emit(ir, OPCODE_KIL, undef_dst, this->result); + emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result); } else { - emit(ir, OPCODE_KIL_NV); + emit(ir, TGSI_OPCODE_KILP); } fp->UsesKill = GL_TRUE; @@ -2280,14 +2181,14 @@ glsl_to_tgsi_visitor::visit(ir_if *ir) */ if (cond_inst == prev_inst) { st_src_reg temp = get_temp(glsl_type::bool_type); - cond_inst = emit(ir->condition, OPCODE_MOV, st_dst_reg(temp), result); + cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result); } cond_inst->cond_update = GL_TRUE; - if_inst = emit(ir->condition, OPCODE_IF); + if_inst = emit(ir->condition, TGSI_OPCODE_IF); if_inst->dst.cond_mask = COND_NE; } else { - if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result); + if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result); } this->instructions.push_tail(if_inst); @@ -2295,11 +2196,11 @@ glsl_to_tgsi_visitor::visit(ir_if *ir) visit_exec_list(&ir->then_instructions, this); if (!ir->else_instructions.is_empty()) { - else_inst = emit(ir->condition, OPCODE_ELSE); + else_inst = emit(ir->condition, TGSI_OPCODE_ELSE); visit_exec_list(&ir->else_instructions, this); } - if_inst = emit(ir->condition, OPCODE_ENDIF); + if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF); } glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() @@ -2337,7 +2238,7 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) foreach_iter(exec_list_iterator, iter, v->instructions) { glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - if (_mesa_is_tex_instruction(inst->op)) { + if (is_tex_instruction(inst->op)) { v->samplers_used |= 1 << inst->sampler; prog->SamplerTargets[inst->sampler] = @@ -2648,7 +2549,7 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) /* look for instructions which read from varying vars */ foreach_iter(exec_list_iterator, iter, this->instructions) { glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - const GLuint numSrc = _mesa_num_inst_src_regs(inst->op); + const GLuint numSrc = num_inst_src_regs(inst->op); GLuint j; for (j = 0; j < numSrc; j++) { if (inst->src[j].file == type) { @@ -2687,7 +2588,7 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i]); st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW); dst.index = i; - this->emit(NULL, OPCODE_MOV, dst, src); + this->emit(NULL, TGSI_OPCODE_MOV, dst, src); } } } @@ -2700,7 +2601,7 @@ glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); unsigned j; - for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) { + for (j=0; j < num_inst_src_regs(inst->op); j++) { if (inst->src[j].file == PROGRAM_TEMPORARY && inst->src[j].index == index) { inst->src[j].index = new_index; @@ -2723,17 +2624,17 @@ glsl_to_tgsi_visitor::get_first_temp_read(int index) foreach_iter(exec_list_iterator, iter, this->instructions) { glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) { + for (j=0; j < num_inst_src_regs(inst->op); j++) { if (inst->src[j].file == PROGRAM_TEMPORARY && inst->src[j].index == index) { return (depth == 0) ? i : loop_start; } } - if (inst->op == OPCODE_BGNLOOP) { + if (inst->op == TGSI_OPCODE_BGNLOOP) { if(depth++ == 0) loop_start = i; - } else if (inst->op == OPCODE_ENDLOOP) { + } else if (inst->op == TGSI_OPCODE_ENDLOOP) { if (--depth == 0) loop_start = -1; } @@ -2759,10 +2660,10 @@ glsl_to_tgsi_visitor::get_first_temp_write(int index) return (depth == 0) ? i : loop_start; } - if (inst->op == OPCODE_BGNLOOP) { + if (inst->op == TGSI_OPCODE_BGNLOOP) { if(depth++ == 0) loop_start = i; - } else if (inst->op == OPCODE_ENDLOOP) { + } else if (inst->op == TGSI_OPCODE_ENDLOOP) { if (--depth == 0) loop_start = -1; } @@ -2784,16 +2685,16 @@ glsl_to_tgsi_visitor::get_last_temp_read(int index) foreach_iter(exec_list_iterator, iter, this->instructions) { glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); - for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) { + for (j=0; j < num_inst_src_regs(inst->op); j++) { if (inst->src[j].file == PROGRAM_TEMPORARY && inst->src[j].index == index) { last = (depth == 0) ? i : -2; } } - if (inst->op == OPCODE_BGNLOOP) + if (inst->op == TGSI_OPCODE_BGNLOOP) depth++; - else if (inst->op == OPCODE_ENDLOOP) + else if (inst->op == TGSI_OPCODE_ENDLOOP) if (--depth == 0 && last == -2) last = i; assert(depth >= 0); @@ -2818,9 +2719,9 @@ glsl_to_tgsi_visitor::get_last_temp_write(int index) if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) last = (depth == 0) ? i : -2; - if (inst->op == OPCODE_BGNLOOP) + if (inst->op == TGSI_OPCODE_BGNLOOP) depth++; - else if (inst->op == OPCODE_ENDLOOP) + else if (inst->op == TGSI_OPCODE_ENDLOOP) if (--depth == 0 && last == -2) last = i; assert(depth >= 0); @@ -2922,18 +2823,18 @@ glsl_to_tgsi_visitor::copy_propagate(void) } switch (inst->op) { - case OPCODE_BGNLOOP: - case OPCODE_ENDLOOP: + case TGSI_OPCODE_BGNLOOP: + case TGSI_OPCODE_ENDLOOP: /* End of a basic block, clear the ACP entirely. */ memset(acp, 0, sizeof(*acp) * this->next_temp * 4); break; - case OPCODE_IF: + case TGSI_OPCODE_IF: ++level; break; - case OPCODE_ENDIF: - case OPCODE_ELSE: + case TGSI_OPCODE_ENDIF: + case TGSI_OPCODE_ELSE: /* Clear all channels written inside the block from the ACP, but * leaving those that were not touched. */ @@ -2946,7 +2847,7 @@ glsl_to_tgsi_visitor::copy_propagate(void) acp[4 * r + c] = NULL; } } - if (inst->op == OPCODE_ENDIF) + if (inst->op == TGSI_OPCODE_ENDIF) --level; break; @@ -3005,7 +2906,7 @@ glsl_to_tgsi_visitor::copy_propagate(void) } /* If this is a copy, add it to the ACP. */ - if (inst->op == OPCODE_MOV && + if (inst->op == TGSI_OPCODE_MOV && inst->dst.file == PROGRAM_TEMPORARY && !inst->dst.reladdr && !inst->saturate && @@ -3337,11 +3238,11 @@ src_register( struct st_translate *t, } /** - * Create a TGSI ureg_dst register from a Mesa dest register. + * Create a TGSI ureg_dst register from an st_dst_reg. */ static struct ureg_dst translate_dst( struct st_translate *t, - const st_dst_reg *dst_reg, //const struct prog_dst_register *DstReg, + const st_dst_reg *dst_reg, boolean saturate ) { struct ureg_dst dst = dst_register( t, @@ -3361,7 +3262,7 @@ translate_dst( struct st_translate *t, } /** - * Create a TGSI ureg_src register from a Mesa src register. + * Create a TGSI ureg_src register from an st_src_reg. */ static struct ureg_src translate_src( struct st_translate *t, @@ -3378,12 +3279,6 @@ translate_src( struct st_translate *t, if ((src_reg->negate & 0xf) == NEGATE_XYZW) src = ureg_negate(src); -#if 0 - // src_reg currently does not have an equivalent to SrcReg->Abs in Mesa IR - if (src_reg->abs) - src = ureg_abs(src); -#endif - if (src_reg->reladdr != NULL) { /* Normally ureg_src_indirect() would be used here, but a stupid compiler * bug in g++ makes ureg_src_indirect (an inline C function) erroneously @@ -3421,77 +3316,64 @@ compile_tgsi_instruction(struct st_translate *t, unsigned num_dst; unsigned num_src; - num_dst = _mesa_num_inst_dst_regs( inst->op ); - num_src = _mesa_num_inst_src_regs( inst->op ); + num_dst = num_inst_dst_regs( inst->op ); + num_src = num_inst_src_regs( inst->op ); if (num_dst) dst[0] = translate_dst( t, &inst->dst, - inst->saturate); // inst->SaturateMode + inst->saturate); for (i = 0; i < num_src; i++) src[i] = translate_src( t, &inst->src[i] ); switch( inst->op ) { - case OPCODE_SWZ: - // TODO: copy emit_swz function from st_mesa_to_tgsi.c - //emit_swz( t, dst[0], &inst->src[0] ); - assert(!"OPCODE_SWZ"); - return; - - case OPCODE_BGNLOOP: - case OPCODE_CAL: - case OPCODE_ELSE: - case OPCODE_ENDLOOP: - case OPCODE_IF: + case TGSI_OPCODE_BGNLOOP: + case TGSI_OPCODE_CAL: + case TGSI_OPCODE_ELSE: + case TGSI_OPCODE_ENDLOOP: + case TGSI_OPCODE_IF: debug_assert(num_dst == 0); ureg_label_insn( ureg, - translate_opcode( inst->op ), + inst->op, src, num_src, get_label( t, - inst->op == OPCODE_CAL ? inst->function->sig_id : 0 )); + inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0 )); return; - case OPCODE_TEX: - case OPCODE_TXB: - case OPCODE_TXD: - case OPCODE_TXL: - case OPCODE_TXP: + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXD: + case TGSI_OPCODE_TXL: + case TGSI_OPCODE_TXP: src[num_src++] = t->samplers[inst->sampler]; ureg_tex_insn( ureg, - translate_opcode( inst->op ), + inst->op, dst, num_dst, translate_texture_target( inst->tex_target, inst->tex_shadow ), src, num_src ); return; - case OPCODE_SCS: + case TGSI_OPCODE_SCS: dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY ); ureg_insn( ureg, - translate_opcode( inst->op ), + inst->op, dst, num_dst, src, num_src ); break; - case OPCODE_XPD: + case TGSI_OPCODE_XPD: dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ ); ureg_insn( ureg, - translate_opcode( inst->op ), + inst->op, dst, num_dst, src, num_src ); break; - case OPCODE_NOISE1: - case OPCODE_NOISE2: - case OPCODE_NOISE3: - case OPCODE_NOISE4: - assert(!"OPCODE_NOISE should have been lowered\n"); - break; - default: ureg_insn( ureg, - translate_opcode( inst->op ), + inst->op, dst, num_dst, src, num_src ); break; @@ -3993,9 +3875,8 @@ get_mesa_program(struct gl_context *ctx, add_uniforms_to_parameters_list(shader_program, shader, prog); - /* Emit Mesa IR for main(). */ + /* Emit intermediate IR for main(). */ visit_exec_list(shader->ir, v); - v->emit(NULL, OPCODE_END); /* Now emit bodies for any functions that were used. */ do { @@ -4007,18 +3888,18 @@ get_mesa_program(struct gl_context *ctx, if (!entry->bgn_inst) { v->current_function = entry; - entry->bgn_inst = v->emit(NULL, OPCODE_BGNSUB); + entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB); entry->bgn_inst->function = entry; visit_exec_list(&entry->sig->body, v); glsl_to_tgsi_instruction *last; last = (glsl_to_tgsi_instruction *)v->instructions.get_tail(); - if (last->op != OPCODE_RET) - v->emit(NULL, OPCODE_RET); + if (last->op != TGSI_OPCODE_RET) + v->emit(NULL, TGSI_OPCODE_RET); glsl_to_tgsi_instruction *end; - end = v->emit(NULL, OPCODE_ENDSUB); + end = v->emit(NULL, TGSI_OPCODE_ENDSUB); end->function = entry; progress = GL_TRUE; @@ -4050,6 +3931,9 @@ get_mesa_program(struct gl_context *ctx, v->eliminate_dead_code(); v->merge_registers(); v->renumber_registers(); + + /* Write the END instruction. */ + v->emit(NULL, TGSI_OPCODE_END); if (ctx->Shader.Flags & GLSL_DUMP) { printf("\n"); @@ -4127,8 +4011,8 @@ st_new_shader_program(struct gl_context *ctx, GLuint name) /** * Link a shader. * Called via ctx->Driver.LinkShader() - * This actually involves converting GLSL IR into Mesa gl_programs with - * code lowering and other optimizations. + * This actually involves converting GLSL IR into an intermediate TGSI-like IR + * with code lowering and other optimizations. */ GLboolean st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) -- cgit v1.2.3 From 16d7a717d592524e4d62fec4173cb9523f7a1453 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Mon, 2 May 2011 23:12:18 -0500 Subject: glsl_to_tgsi: fix shaders with indirect addressing of temps Fixes several Piglit tests, although it's a step backwards for optimization. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 4cb2f377e98..75ab9c5de7c 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -485,7 +485,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, else { for (i=0; i<3; i++) { if(inst->src[i].reladdr) { - switch(dst.file) { + switch(inst->src[i].file) { case PROGRAM_TEMPORARY: this->indirect_addr_temps = true; break; @@ -3928,9 +3928,17 @@ get_mesa_program(struct gl_context *ctx, /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ v->copy_propagate(); - v->eliminate_dead_code(); - v->merge_registers(); - v->renumber_registers(); + + /* FIXME: These passes to optimize temporary registers don't work when there + * is indirect addressing of the temporary register space. We need proper + * array support so that we don't have to give up these passes in every + * shader that uses arrays. + */ + if (!v->indirect_addr_temps) { + v->merge_registers(); + v->eliminate_dead_code(); + v->renumber_registers(); + } /* Write the END instruction. */ v->emit(NULL, TGSI_OPCODE_END); -- cgit v1.2.3 From 6d89abadbcd68bbe9e08f041412549f8dc1fc73c Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Tue, 17 May 2011 17:13:20 -0500 Subject: mesa: support boolean and integer-based parameters in prog_parameter The functionality is not used by anything yet, and the glUniform functions will need to be reworked before this can reach its full usefulness. It is nonetheless a step towards integer support in the state tracker and classic drivers. --- src/mesa/main/ff_fragment_shader.cpp | 3 +- src/mesa/main/ffvertex_prog.c | 10 +++--- src/mesa/main/uniforms.c | 12 +++---- src/mesa/program/ir_to_mesa.cpp | 8 ++--- src/mesa/program/nvfragparse.c | 23 +++++++++----- src/mesa/program/prog_execute.c | 2 +- src/mesa/program/prog_parameter.c | 50 ++++++++++++++++-------------- src/mesa/program/prog_parameter.h | 25 ++++++++++----- src/mesa/program/prog_parameter_layout.c | 2 +- src/mesa/program/prog_print.c | 2 +- src/mesa/program/program.c | 3 +- src/mesa/program/sampler.cpp | 2 +- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 12 ++++--- 13 files changed, 88 insertions(+), 66 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/ff_fragment_shader.cpp b/src/mesa/main/ff_fragment_shader.cpp index 0b53c28f7ae..2ccbaf8f8c3 100644 --- a/src/mesa/main/ff_fragment_shader.cpp +++ b/src/mesa/main/ff_fragment_shader.cpp @@ -875,7 +875,8 @@ static struct ureg register_const4f( struct texenv_fragment_program *p, values[1] = s1; values[2] = s2; values[3] = s3; - idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4, + idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, + (gl_constant_value *) values, 4, &swizzle ); r = make_ureg(PROGRAM_CONSTANT, idx); r.swz = swizzle; diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c index b8e49a3757f..2d2485c9e06 100644 --- a/src/mesa/main/ffvertex_prog.c +++ b/src/mesa/main/ffvertex_prog.c @@ -455,13 +455,13 @@ static struct ureg register_const4f( struct tnl_program *p, GLfloat s2, GLfloat s3) { - GLfloat values[4]; + gl_constant_value values[4]; GLint idx; GLuint swizzle; - values[0] = s0; - values[1] = s1; - values[2] = s2; - values[3] = s3; + values[0].f = s0; + values[1].f = s1; + values[2].f = s2; + values[3].f = s3; idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4, &swizzle ); ASSERT(swizzle == SWIZZLE_NOOP); diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c index 1c4fd82baac..07d46c6404f 100644 --- a/src/mesa/main/uniforms.c +++ b/src/mesa/main/uniforms.c @@ -429,7 +429,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location, for (i = 0; i < rows; i++) { const int base = paramPos + offset + i; for (j = 0; j < cols; j++ ) { - params[k++] = prog->Parameters->ParameterValues[base][j]; + params[k++] = prog->Parameters->ParameterValues[base][j].f; } } } @@ -442,7 +442,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location, const int base = paramPos + offset + i; for (j = 0; j < cols; j++ ) { params[k++] = (GLdouble) - prog->Parameters->ParameterValues[base][j]; + prog->Parameters->ParameterValues[base][j].f; } } } @@ -455,7 +455,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location, const int base = paramPos + offset + i; for (j = 0; j < cols; j++ ) { params[k++] = (GLint) - prog->Parameters->ParameterValues[base][j]; + prog->Parameters->ParameterValues[base][j].f; } } } @@ -468,7 +468,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location, const int base = paramPos + offset + i; for (j = 0; j < cols; j++ ) { params[k++] = (GLuint) - prog->Parameters->ParameterValues[base][j]; + prog->Parameters->ParameterValues[base][j].f; } } } @@ -670,7 +670,7 @@ set_program_uniform(struct gl_context *ctx, struct gl_program *program, /* loop over number of samplers to change */ for (i = 0; i < count; i++) { GLuint sampler = (GLuint) - program->Parameters->ParameterValues[index + offset + i][0]; + program->Parameters->ParameterValues[index+offset + i][0].f; GLuint texUnit = ((GLuint *) values)[i]; /* check that the sampler (tex unit index) is legal */ @@ -936,7 +936,7 @@ set_program_uniform_matrix(struct gl_context *ctx, struct gl_program *program, /* Ignore writes beyond the end of (the used part of) an array */ return; } - v = program->Parameters->ParameterValues[index + offset]; + v = (GLfloat *) program->Parameters->ParameterValues[index + offset]; for (row = 0; row < rows; row++) { if (transpose) { v[row] = values[src + row * cols + col]; diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 00869979dd8..f27492749bd 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -599,7 +599,7 @@ ir_to_mesa_visitor::src_reg_for_float(float val) src_reg src(PROGRAM_CONSTANT, -1, NULL); src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - &val, 1, &src.swizzle); + (const gl_constant_value *)&val, 1, &src.swizzle); return src; } @@ -1798,7 +1798,7 @@ ir_to_mesa_visitor::visit(ir_constant *ir) src = src_reg(PROGRAM_CONSTANT, -1, NULL); src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - values, + (gl_constant_value *) values, ir->type->vector_elements, &src.swizzle); emit(ir, OPCODE_MOV, mat_column, src); @@ -1836,7 +1836,7 @@ ir_to_mesa_visitor::visit(ir_constant *ir) this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type); this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters, - values, + (gl_constant_value *) values, ir->type->vector_elements, &this->result.swizzle); } @@ -2533,7 +2533,7 @@ add_uniforms_to_parameters_list(struct gl_shader_program *shader_program, */ if (file == PROGRAM_SAMPLER) { for (unsigned int j = 0; j < size / 4; j++) - prog->Parameters->ParameterValues[index + j][0] = next_sampler++; + prog->Parameters->ParameterValues[index + j][0].f = next_sampler++; } /* The location chosen in the Parameters list here (returned diff --git a/src/mesa/program/nvfragparse.c b/src/mesa/program/nvfragparse.c index 8516b5fc1ff..ce72c610d89 100644 --- a/src/mesa/program/nvfragparse.c +++ b/src/mesa/program/nvfragparse.c @@ -472,8 +472,9 @@ Parse_ScalarConstant(struct parse_state *parseState, GLfloat *number) const GLfloat *constant; if (!Parse_Identifier(parseState, ident)) RETURN_ERROR1("Expected an identifier"); - constant = _mesa_lookup_parameter_value(parseState->parameters, - -1, (const char *) ident); + constant = (GLfloat *)_mesa_lookup_parameter_value(parseState->parameters, + -1, + (const char *) ident); /* XXX Check that it's a constant and not a parameter */ if (!constant) { RETURN_ERROR1("Undefined symbol"); @@ -1039,7 +1040,8 @@ Parse_VectorSrc(struct parse_state *parseState, if (!Parse_ScalarConstant(parseState, values)) RETURN_ERROR; paramIndex = _mesa_add_unnamed_constant(parseState->parameters, - values, 4, NULL); + (gl_constant_value *) values, + 4, NULL); srcReg->File = PROGRAM_NAMED_PARAM; srcReg->Index = paramIndex; } @@ -1051,7 +1053,8 @@ Parse_VectorSrc(struct parse_state *parseState, if (!Parse_VectorConstant(parseState, values)) RETURN_ERROR; paramIndex = _mesa_add_unnamed_constant(parseState->parameters, - values, 4, NULL); + (gl_constant_value *) values, + 4, NULL); srcReg->File = PROGRAM_NAMED_PARAM; srcReg->Index = paramIndex; } @@ -1145,7 +1148,8 @@ Parse_ScalarSrcReg(struct parse_state *parseState, if (!Parse_VectorConstant(parseState, values)) RETURN_ERROR; paramIndex = _mesa_add_unnamed_constant(parseState->parameters, - values, 4, NULL); + (gl_constant_value *) values, + 4, NULL); srcReg->File = PROGRAM_NAMED_PARAM; srcReg->Index = paramIndex; } @@ -1170,7 +1174,8 @@ Parse_ScalarSrcReg(struct parse_state *parseState, if (!Parse_ScalarConstant(parseState, values)) RETURN_ERROR; paramIndex = _mesa_add_unnamed_constant(parseState->parameters, - values, 4, NULL); + (gl_constant_value *) values, + 4, NULL); srcReg->Index = paramIndex; srcReg->File = PROGRAM_NAMED_PARAM; needSuffix = GL_FALSE; @@ -1296,7 +1301,8 @@ Parse_InstructionSequence(struct parse_state *parseState, RETURN_ERROR2(id, "already defined"); } _mesa_add_named_parameter(parseState->parameters, - (const char *) id, value); + (const char *) id, + (gl_constant_value *) value); } else if (Parse_String(parseState, "DECLARE")) { GLubyte id[100]; @@ -1315,7 +1321,8 @@ Parse_InstructionSequence(struct parse_state *parseState, RETURN_ERROR2(id, "already declared"); } _mesa_add_named_parameter(parseState->parameters, - (const char *) id, value); + (const char *) id, + (gl_constant_value *) value); } else if (Parse_String(parseState, "END")) { inst->Opcode = OPCODE_END; diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c index e7553c69dbe..dbfd1b91875 100644 --- a/src/mesa/program/prog_execute.c +++ b/src/mesa/program/prog_execute.c @@ -157,7 +157,7 @@ get_src_register_pointer(const struct prog_src_register *source, case PROGRAM_NAMED_PARAM: if (reg >= (GLint) prog->Parameters->NumParameters) return ZeroVec; - return prog->Parameters->ParameterValues[reg]; + return (GLfloat *) prog->Parameters->ParameterValues[reg]; case PROGRAM_SYSTEM_VALUE: assert(reg < Elements(machine->SystemValues)); diff --git a/src/mesa/program/prog_parameter.c b/src/mesa/program/prog_parameter.c index 3570cab118b..b1cdf8bf2c0 100644 --- a/src/mesa/program/prog_parameter.c +++ b/src/mesa/program/prog_parameter.c @@ -56,8 +56,8 @@ _mesa_new_parameter_list_sized(unsigned size) p->Parameters = (struct gl_program_parameter *) calloc(1, size * sizeof(struct gl_program_parameter)); - p->ParameterValues = (GLfloat (*)[4]) - _mesa_align_malloc(size * 4 *sizeof(GLfloat), 16); + p->ParameterValues = (gl_constant_value (*)[4]) + _mesa_align_malloc(size * 4 *sizeof(gl_constant_value), 16); if ((p->Parameters == NULL) || (p->ParameterValues == NULL)) { @@ -101,14 +101,15 @@ _mesa_free_parameter_list(struct gl_program_parameter_list *paramList) * \param name the parameter name, will be duplicated/copied! * \param size number of elements in 'values' vector (1..4, or more) * \param datatype GL_FLOAT, GL_FLOAT_VECx, GL_INT, GL_INT_VECx or GL_NONE. - * \param values initial parameter value, up to 4 GLfloats, or NULL + * \param values initial parameter value, up to 4 gl_constant_values, or NULL * \param state state indexes, or NULL * \return index of new parameter in the list, or -1 if error (out of mem) */ GLint _mesa_add_parameter(struct gl_program_parameter_list *paramList, gl_register_file type, const char *name, - GLuint size, GLenum datatype, const GLfloat *values, + GLuint size, GLenum datatype, + const gl_constant_value *values, const gl_state_index state[STATE_LENGTH], GLbitfield flags) { @@ -127,10 +128,10 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList, oldNum * sizeof(struct gl_program_parameter), paramList->Size * sizeof(struct gl_program_parameter)); - paramList->ParameterValues = (GLfloat (*)[4]) + paramList->ParameterValues = (gl_constant_value (*)[4]) _mesa_align_realloc(paramList->ParameterValues, /* old buf */ - oldNum * 4 * sizeof(GLfloat), /* old size */ - paramList->Size * 4 *sizeof(GLfloat), /* new sz */ + oldNum * 4 * sizeof(gl_constant_value),/* old sz */ + paramList->Size*4*sizeof(gl_constant_value),/*new*/ 16); } @@ -142,7 +143,7 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList, return -1; } else { - GLuint i; + GLuint i, j; paramList->NumParameters = oldNum + sz4; @@ -163,7 +164,8 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList, } else { /* silence valgrind */ - ASSIGN_4V(paramList->ParameterValues[oldNum + i], 0, 0, 0, 0); + for (j = 0; j < 4; j++) + paramList->ParameterValues[oldNum + i][j].f = 0; } size -= 4; } @@ -184,7 +186,7 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList, */ GLint _mesa_add_named_parameter(struct gl_program_parameter_list *paramList, - const char *name, const GLfloat values[4]) + const char *name, const gl_constant_value values[4]) { return _mesa_add_parameter(paramList, PROGRAM_NAMED_PARAM, name, 4, GL_NONE, values, NULL, 0x0); @@ -204,17 +206,17 @@ _mesa_add_named_parameter(struct gl_program_parameter_list *paramList, */ GLint _mesa_add_named_constant(struct gl_program_parameter_list *paramList, - const char *name, const GLfloat values[4], + const char *name, const gl_constant_value values[4], GLuint size) { /* first check if this is a duplicate constant */ GLint pos; for (pos = 0; pos < (GLint)paramList->NumParameters; pos++) { - const GLfloat *pvals = paramList->ParameterValues[pos]; - if (pvals[0] == values[0] && - pvals[1] == values[1] && - pvals[2] == values[2] && - pvals[3] == values[3] && + const gl_constant_value *pvals = paramList->ParameterValues[pos]; + if (pvals[0].u == values[0].u && + pvals[1].u == values[1].u && + pvals[2].u == values[2].u && + pvals[3].u == values[3].u && strcmp(paramList->Parameters[pos].Name, name) == 0) { /* Same name and value is already in the param list - reuse it */ return pos; @@ -240,7 +242,7 @@ _mesa_add_named_constant(struct gl_program_parameter_list *paramList, */ GLint _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, - const GLfloat values[4], GLuint size, + const gl_constant_value values[4], GLuint size, GLuint *swizzleOut) { GLint pos; @@ -262,7 +264,7 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, struct gl_program_parameter *p = paramList->Parameters + pos; if (p->Type == PROGRAM_CONSTANT && p->Size + size <= 4) { /* ok, found room */ - GLfloat *pVal = paramList->ParameterValues[pos]; + gl_constant_value *pVal = paramList->ParameterValues[pos]; GLuint swz = p->Size; /* 1, 2 or 3 for Y, Z, W */ pVal[p->Size] = values[0]; p->Size++; @@ -401,7 +403,7 @@ _mesa_add_state_reference(struct gl_program_parameter_list *paramList, * Lookup a parameter value by name in the given parameter list. * \return pointer to the float[4] values. */ -GLfloat * +gl_constant_value * _mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList, GLsizei nameLen, const char *name) { @@ -465,7 +467,7 @@ _mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList, */ GLboolean _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list, - const GLfloat v[], GLuint vSize, + const gl_constant_value v[], GLuint vSize, GLint *posOut, GLuint *swizzleOut) { GLuint i; @@ -484,7 +486,7 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list, /* swizzle not allowed */ GLuint j, match = 0; for (j = 0; j < vSize; j++) { - if (v[j] == list->ParameterValues[i][j]) + if (v[j].u == list->ParameterValues[i][j].u) match++; } if (match == vSize) { @@ -498,7 +500,7 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list, /* look for v[0] anywhere within float[4] value */ GLuint j; for (j = 0; j < list->Parameters[i].Size; j++) { - if (list->ParameterValues[i][j] == v[0]) { + if (list->ParameterValues[i][j].u == v[0].u) { /* found it */ *posOut = i; *swizzleOut = MAKE_SWIZZLE4(j, j, j, j); @@ -511,13 +513,13 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list, GLuint swz[4]; GLuint match = 0, j, k; for (j = 0; j < vSize; j++) { - if (v[j] == list->ParameterValues[i][j]) { + if (v[j].u == list->ParameterValues[i][j].u) { swz[j] = j; match++; } else { for (k = 0; k < list->Parameters[i].Size; k++) { - if (v[j] == list->ParameterValues[i][k]) { + if (v[j].u == list->ParameterValues[i][k].u) { swz[j] = k; match++; break; diff --git a/src/mesa/program/prog_parameter.h b/src/mesa/program/prog_parameter.h index 10cbbe57a6c..dcc171ed745 100644 --- a/src/mesa/program/prog_parameter.h +++ b/src/mesa/program/prog_parameter.h @@ -46,7 +46,15 @@ #define PROG_PARAM_BIT_CYL_WRAP 0x10 /**< XXX gallium debug */ /*@}*/ - +/** + * Actual data for constant values of parameters. + */ +typedef union gl_constant_value { + GLfloat f; + GLboolean b; + GLint i; + GLuint u; +} gl_constant_value; /** * Program parameter. @@ -81,7 +89,7 @@ struct gl_program_parameter_list GLuint Size; /**< allocated size of Parameters, ParameterValues */ GLuint NumParameters; /**< number of parameters in arrays */ struct gl_program_parameter *Parameters; /**< Array [Size] */ - GLfloat (*ParameterValues)[4]; /**< Array [Size] of GLfloat[4] */ + gl_constant_value (*ParameterValues)[4]; /**< Array [Size] of constant[4] */ GLbitfield StateFlags; /**< _NEW_* flags indicating which state changes might invalidate ParameterValues[] */ }; @@ -112,22 +120,23 @@ _mesa_num_parameters(const struct gl_program_parameter_list *list) extern GLint _mesa_add_parameter(struct gl_program_parameter_list *paramList, gl_register_file type, const char *name, - GLuint size, GLenum datatype, const GLfloat *values, + GLuint size, GLenum datatype, + const gl_constant_value *values, const gl_state_index state[STATE_LENGTH], GLbitfield flags); extern GLint _mesa_add_named_parameter(struct gl_program_parameter_list *paramList, - const char *name, const GLfloat values[4]); + const char *name, const gl_constant_value values[4]); extern GLint _mesa_add_named_constant(struct gl_program_parameter_list *paramList, - const char *name, const GLfloat values[4], + const char *name, const gl_constant_value values[4], GLuint size); extern GLint _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, - const GLfloat values[4], GLuint size, + const gl_constant_value values[4], GLuint size, GLuint *swizzleOut); extern GLint @@ -143,7 +152,7 @@ extern GLint _mesa_add_state_reference(struct gl_program_parameter_list *paramList, const gl_state_index stateTokens[STATE_LENGTH]); -extern GLfloat * +extern gl_constant_value * _mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList, GLsizei nameLen, const char *name); @@ -153,7 +162,7 @@ _mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList, extern GLboolean _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list, - const GLfloat v[], GLuint vSize, + const gl_constant_value v[], GLuint vSize, GLint *posOut, GLuint *swizzleOut); extern GLuint diff --git a/src/mesa/program/prog_parameter_layout.c b/src/mesa/program/prog_parameter_layout.c index 90a9771080c..28fca3b92d9 100644 --- a/src/mesa/program/prog_parameter_layout.c +++ b/src/mesa/program/prog_parameter_layout.c @@ -182,7 +182,7 @@ _mesa_layout_parameters(struct asm_parser_state *state) switch (p->Type) { case PROGRAM_CONSTANT: { - const float *const v = + const gl_constant_value *const v = state->prog->Parameters->ParameterValues[idx]; inst->Base.SrcReg[i].Index = diff --git a/src/mesa/program/prog_print.c b/src/mesa/program/prog_print.c index 7c3b4909e73..70412b1fa6a 100644 --- a/src/mesa/program/prog_print.c +++ b/src/mesa/program/prog_print.c @@ -985,7 +985,7 @@ _mesa_fprint_parameter_list(FILE *f, fprintf(f, "dirty state flags: 0x%x\n", list->StateFlags); for (i = 0; i < list->NumParameters; i++){ struct gl_program_parameter *param = list->Parameters + i; - const GLfloat *v = list->ParameterValues[i]; + const GLfloat *v = (GLfloat *) list->ParameterValues[i]; fprintf(f, "param[%d] sz=%d %s %s = {%.3g, %.3g, %.3g, %.3g}", i, param->Size, _mesa_register_file_name(list->Parameters[i].Type), diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c index 224446a2683..4f2b6270501 100644 --- a/src/mesa/program/program.c +++ b/src/mesa/program/program.c @@ -1030,7 +1030,8 @@ _mesa_postprocess_program(struct gl_context *ctx, struct gl_program *prog) GLuint i; GLuint whiteSwizzle; GLint whiteIndex = _mesa_add_unnamed_constant(prog->Parameters, - white, 4, &whiteSwizzle); + (gl_constant_value *) white, + 4, &whiteSwizzle); (void) whiteIndex; diff --git a/src/mesa/program/sampler.cpp b/src/mesa/program/sampler.cpp index 1457d1199fa..e8d34c670a9 100644 --- a/src/mesa/program/sampler.cpp +++ b/src/mesa/program/sampler.cpp @@ -132,6 +132,6 @@ _mesa_get_sampler_uniform_value(class ir_dereference *sampler, index += getname.offset; - return prog->Parameters->ParameterValues[index][0]; + return prog->Parameters->ParameterValues[index][0].f; } } diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 75ab9c5de7c..881b9e05de1 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -706,9 +706,11 @@ struct st_src_reg glsl_to_tgsi_visitor::st_src_reg_for_float(float val) { st_src_reg src(PROGRAM_CONSTANT, -1, NULL); + union gl_constant_value uval; + uval.f = val; src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - &val, 1, &src.swizzle); + &uval, 1, &src.swizzle); return src; } @@ -1791,7 +1793,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) src = st_src_reg(PROGRAM_CONSTANT, -1, NULL); src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - values, + (gl_constant_value *) values, ir->type->vector_elements, &src.swizzle); emit(ir, TGSI_OPCODE_MOV, mat_column, src); @@ -1829,7 +1831,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) this->result = st_src_reg(PROGRAM_CONSTANT, -1, ir->type); this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters, - values, + (gl_constant_value *) values, ir->type->vector_elements, &this->result.swizzle); } @@ -2401,7 +2403,7 @@ add_uniforms_to_parameters_list(struct gl_shader_program *shader_program, */ if (file == PROGRAM_SAMPLER) { for (unsigned int j = 0; j < size / 4; j++) - prog->Parameters->ParameterValues[index + j][0] = next_sampler++; + prog->Parameters->ParameterValues[index + j][0].f = next_sampler++; } /* The location chosen in the Parameters list here (returned @@ -3762,7 +3764,7 @@ st_translate_program( else t->constants[i] = ureg_DECL_immediate( ureg, - proginfo->Parameters->ParameterValues[i], + (GLfloat *) proginfo->Parameters->ParameterValues[i], 4 ); break; default: -- cgit v1.2.3 From b191382c60bdcfeb7f424b23aa6ab63de81e2f08 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Mon, 13 Jun 2011 18:12:56 -0500 Subject: mesa, glsl_to_tgsi: add native support for integers in shaders Disabled by default on all drivers. To enable it, change ctx->GLSLVersion to 130 in st_extensions.c. Currently, softpipe is the only driver with integer support. --- src/glsl/glsl_types.h | 15 ++ src/mesa/main/uniforms.c | 38 ++-- src/mesa/program/prog_parameter.c | 28 ++- src/mesa/program/prog_parameter.h | 5 + src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 302 ++++++++++++++++++++++++----- 5 files changed, 328 insertions(+), 60 deletions(-) (limited to 'src/mesa') diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h index 1b069df74fe..eb9d501858a 100644 --- a/src/glsl/glsl_types.h +++ b/src/glsl/glsl_types.h @@ -165,6 +165,21 @@ struct glsl_type { static const glsl_type *const mat4x3_type; static const glsl_type *const mat4_type; /*@}*/ + + /** + * Get the built-in instance of the vec4 type for a specific base type + */ + static const glsl_type *get_vec4_type(glsl_base_type base_type) + { + if (base_type == GLSL_TYPE_FLOAT) + return vec4_type; + else if (base_type == GLSL_TYPE_INT) + return ivec4_type; + else if (base_type == GLSL_TYPE_UINT) + return uvec4_type; + else + return NULL; + } /** diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c index 07d46c6404f..ce4863faf78 100644 --- a/src/mesa/main/uniforms.c +++ b/src/mesa/main/uniforms.c @@ -454,8 +454,9 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location, for (i = 0; i < rows; i++) { const int base = paramPos + offset + i; for (j = 0; j < cols; j++ ) { - params[k++] = (GLint) - prog->Parameters->ParameterValues[base][j].f; + params[k++] = ctx->Const.GLSLVersion <= 120 ? + (GLint) prog->Parameters->ParameterValues[base][j].f : + prog->Parameters->ParameterValues[base][j].i; } } } @@ -467,8 +468,9 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location, for (i = 0; i < rows; i++) { const int base = paramPos + offset + i; for (j = 0; j < cols; j++ ) { - params[k++] = (GLuint) - prog->Parameters->ParameterValues[base][j].f; + params[k++] = ctx->Const.GLSLVersion <= 120 ? + (GLuint) prog->Parameters->ParameterValues[base][j].f : + prog->Parameters->ParameterValues[base][j].u; } } } @@ -735,42 +737,52 @@ set_program_uniform(struct gl_context *ctx, struct gl_program *program, /* loop over number of array elements */ for (k = 0; k < count; k++) { - GLfloat *uniformVal; + gl_constant_value *uniformVal; if (offset + k >= slots) { /* Extra array data is ignored */ break; } - /* uniformVal (the destination) is always float[4] */ + /* uniformVal (the destination) is always gl_constant_value[4] */ uniformVal = program->Parameters->ParameterValues[index + offset + k]; if (basicType == GL_INT) { - /* convert user's ints to floats */ const GLint *iValues = ((const GLint *) values) + k * elems; for (i = 0; i < elems; i++) { - uniformVal[i] = (GLfloat) iValues[i]; + if (ctx->Const.GLSLVersion <= 120) + uniformVal[i].f = (GLfloat) iValues[i]; + else + uniformVal[i].i = iValues[i]; } } else if (basicType == GL_UNSIGNED_INT) { - /* convert user's uints to floats */ const GLuint *iValues = ((const GLuint *) values) + k * elems; for (i = 0; i < elems; i++) { - uniformVal[i] = (GLfloat) iValues[i]; + if (ctx->Const.GLSLVersion <= 120) + uniformVal[i].f = (GLfloat)(GLuint) iValues[i]; + else + uniformVal[i].u = iValues[i]; } } else { const GLfloat *fValues = ((const GLfloat *) values) + k * elems; assert(basicType == GL_FLOAT); for (i = 0; i < elems; i++) { - uniformVal[i] = fValues[i]; + uniformVal[i].f = fValues[i]; } } - /* if the uniform is bool-valued, convert to 1.0 or 0.0 */ + /* if the uniform is bool-valued, convert to 1 or 0 */ if (isUniformBool) { for (i = 0; i < elems; i++) { - uniformVal[i] = uniformVal[i] ? 1.0f : 0.0f; + if (basicType == GL_FLOAT) + uniformVal[i].b = uniformVal[i].f != 0.0f ? 1 : 0; + else + uniformVal[i].b = uniformVal[i].u ? 1 : 0; + + if (ctx->Const.GLSLVersion <= 120) + uniformVal[i].f = uniformVal[i].b ? 1.0f : 0.0f; } } } diff --git a/src/mesa/program/prog_parameter.c b/src/mesa/program/prog_parameter.c index b1cdf8bf2c0..49b3ffbdd5c 100644 --- a/src/mesa/program/prog_parameter.c +++ b/src/mesa/program/prog_parameter.c @@ -241,9 +241,9 @@ _mesa_add_named_constant(struct gl_program_parameter_list *paramList, * \return index/position of the new parameter in the parameter list. */ GLint -_mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, +_mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList, const gl_constant_value values[4], GLuint size, - GLuint *swizzleOut) + GLenum datatype, GLuint *swizzleOut) { GLint pos; ASSERT(size >= 1); @@ -276,7 +276,7 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, /* add a new parameter to store this constant */ pos = _mesa_add_parameter(paramList, PROGRAM_CONSTANT, NULL, - size, GL_NONE, values, NULL, 0x0); + size, datatype, values, NULL, 0x0); if (pos >= 0 && swizzleOut) { if (size == 1) *swizzleOut = SWIZZLE_XXXX; @@ -286,6 +286,28 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, return pos; } +/** + * Add a new unnamed constant to the parameter list. This will be used + * when a fragment/vertex program contains something like this: + * MOV r, { 0, 1, 2, 3 }; + * If swizzleOut is non-null we'll search the parameter list for an + * existing instance of the constant which matches with a swizzle. + * + * \param paramList the parameter list + * \param values four float values + * \param swizzleOut returns swizzle mask for accessing the constant + * \return index/position of the new parameter in the parameter list. + * \sa _mesa_add_typed_unnamed_constant + */ +GLint +_mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, + const gl_constant_value values[4], GLuint size, + GLuint *swizzleOut) +{ + return _mesa_add_typed_unnamed_constant(paramList, values, size, GL_NONE, + swizzleOut); +} + /** * Add parameter representing a varying variable. */ diff --git a/src/mesa/program/prog_parameter.h b/src/mesa/program/prog_parameter.h index dcc171ed745..f858cf0fa0d 100644 --- a/src/mesa/program/prog_parameter.h +++ b/src/mesa/program/prog_parameter.h @@ -134,6 +134,11 @@ _mesa_add_named_constant(struct gl_program_parameter_list *paramList, const char *name, const gl_constant_value values[4], GLuint size); +extern GLint +_mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList, + const gl_constant_value values[4], GLuint size, + GLenum datatype, GLuint *swizzleOut); + extern GLint _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, const gl_constant_value values[4], GLuint size, diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 881b9e05de1..3f5c0c60226 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -96,11 +96,13 @@ public: else this->swizzle = SWIZZLE_XYZW; this->negate = 0; + this->type = type ? type->base_type : GLSL_TYPE_ERROR; this->reladdr = NULL; } - st_src_reg(gl_register_file file, int index) + st_src_reg(gl_register_file file, int index, int type) { + this->type = type; this->file = file; this->index = index; this->swizzle = SWIZZLE_XYZW; @@ -110,6 +112,7 @@ public: st_src_reg() { + this->type = GLSL_TYPE_ERROR; this->file = PROGRAM_UNDEFINED; this->index = 0; this->swizzle = 0; @@ -123,23 +126,26 @@ public: int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ int negate; /**< NEGATE_XYZW mask from mesa */ + int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ /** Register index should be offset by the integer in this reg. */ st_src_reg *reladdr; }; class st_dst_reg { public: - st_dst_reg(gl_register_file file, int writemask) + st_dst_reg(gl_register_file file, int writemask, int type) { this->file = file; this->index = 0; this->writemask = writemask; this->cond_mask = COND_TR; this->reladdr = NULL; + this->type = type; } st_dst_reg() { + this->type = GLSL_TYPE_ERROR; this->file = PROGRAM_UNDEFINED; this->index = 0; this->writemask = 0; @@ -153,12 +159,14 @@ public: int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ GLuint cond_mask:4; + int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ /** Register index should be offset by the integer in this reg. */ st_src_reg *reladdr; }; st_src_reg::st_src_reg(st_dst_reg reg) { + this->type = reg.type; this->file = reg.file; this->index = reg.index; this->swizzle = SWIZZLE_XYZW; @@ -168,6 +176,7 @@ st_src_reg::st_src_reg(st_dst_reg reg) st_dst_reg::st_dst_reg(st_src_reg reg) { + this->type = reg.type; this->file = reg.file; this->index = reg.index; this->writemask = WRITEMASK_XYZW; @@ -267,6 +276,8 @@ public: int samplers_used; bool indirect_addr_temps; bool indirect_addr_consts; + + int glsl_version; variable_storage *find_variable_storage(ir_variable *var); @@ -276,6 +287,8 @@ public: void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr); st_src_reg st_src_reg_for_float(float val); + st_src_reg st_src_reg_for_int(int val); + st_src_reg st_src_reg_for_type(int type, int val); /** * \name Visit methods @@ -327,6 +340,10 @@ public: glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1, st_src_reg src2); + + unsigned get_opcode(ir_instruction *ir, unsigned op, + st_dst_reg dst, + st_src_reg src0, st_src_reg src1); /** * Emit the correct dot-product instruction for the type of arguments @@ -343,6 +360,8 @@ public: void emit_scalar(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1); + void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0); + void emit_scs(ir_instruction *ir, unsigned op, st_dst_reg dst, const st_src_reg &src); @@ -372,9 +391,9 @@ public: static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, NULL); -static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP); +static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); -static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X); +static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT); static void fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); @@ -432,6 +451,8 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, { glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction(); int num_reladdr = 0, i; + + op = get_opcode(ir, op, dst, src0, src1); /* If we have to do relative addressing, we want to load the ARL * reg directly for one of the regs, and preload the other reladdr @@ -447,7 +468,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, reladdr_to_temp(ir, &src0, &num_reladdr); if (dst.reladdr) { - emit(ir, TGSI_OPCODE_ARL, address_reg, *dst.reladdr); + emit_arl(ir, address_reg, *dst.reladdr); num_reladdr--; } assert(num_reladdr == 0); @@ -531,6 +552,62 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op) return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); } +/** + * Determines whether to use an integer, unsigned integer, or float opcode + * based on the operands and input opcode, then emits the result. + * + * TODO: type checking for remaining TGSI opcodes + */ +unsigned +glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, + st_dst_reg dst, + st_src_reg src0, st_src_reg src1) +{ + int type = GLSL_TYPE_FLOAT; + + if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) + type = GLSL_TYPE_FLOAT; + else if (glsl_version >= 130) + type = src0.type; + +#define case4(c, f, i, u) \ + case TGSI_OPCODE_##c: \ + if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \ + else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \ + else op = TGSI_OPCODE_##f; \ + break; +#define case3(f, i, u) case4(f, f, i, u) +#define case2fi(f, i) case4(f, f, i, i) +#define case2iu(i, u) case4(i, LAST, i, u) + + switch(op) { + case2fi(ADD, UADD); + case2fi(MUL, UMUL); + case2fi(MAD, UMAD); + case3(DIV, IDIV, UDIV); + case3(MAX, IMAX, UMAX); + case3(MIN, IMIN, UMIN); + case2iu(MOD, UMOD); + + case2fi(SEQ, USEQ); + case2fi(SNE, USNE); + case3(SGE, ISGE, USGE); + case3(SLT, ISLT, USLT); + + case2iu(SHL, SHL); + case2iu(ISHR, USHR); + case2iu(NOT, NOT); + case2iu(AND, AND); + case2iu(OR, OR); + case2iu(XOR, XOR); + + default: break; + } + + assert(op != TGSI_OPCODE_LAST); + return op; +} + void glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, st_dst_reg dst, st_src_reg src0, st_src_reg src1, @@ -607,6 +684,22 @@ glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, emit_scalar(ir, op, dst, src0, undef); } +void +glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, + st_dst_reg dst, st_src_reg src0) +{ + st_src_reg tmp = get_temp(glsl_type::float_type); + + if (src0.type == GLSL_TYPE_INT) + emit(ir, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0); + else if (src0.type == GLSL_TYPE_UINT) + emit(ir, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0); + else + tmp = src0; + + emit(ir, TGSI_OPCODE_ARL, dst, tmp); +} + /** * Emit an TGSI_OPCODE_SCS instruction * @@ -705,16 +798,41 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, struct st_src_reg glsl_to_tgsi_visitor::st_src_reg_for_float(float val) { - st_src_reg src(PROGRAM_CONSTANT, -1, NULL); + st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_FLOAT); union gl_constant_value uval; uval.f = val; - src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - &uval, 1, &src.swizzle); + src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, + &uval, 1, GL_FLOAT, &src.swizzle); + + return src; +} + +struct st_src_reg +glsl_to_tgsi_visitor::st_src_reg_for_int(int val) +{ + st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_INT); + union gl_constant_value uval; + + assert(glsl_version >= 130); + + uval.i = val; + src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, + &uval, 1, GL_INT, &src.swizzle); return src; } +struct st_src_reg +glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val) +{ + if (glsl_version >= 130) + return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : + st_src_reg_for_int(val); + else + return st_src_reg_for_float(val); +} + static int type_size(const struct glsl_type *type) { @@ -759,8 +877,7 @@ type_size(const struct glsl_type *type) /** * In the initial pass of codegen, we assign temporary numbers to * intermediate results. (not SSA -- variable assignments will reuse - * storage). Actual register allocation for the Mesa VM occurs in a - * pass over the Mesa IR later. + * storage). */ st_src_reg glsl_to_tgsi_visitor::get_temp(const glsl_type *type) @@ -769,6 +886,7 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type) int swizzle[4]; int i; + src.type = type->base_type; src.file = PROGRAM_TEMPORARY; src.index = next_temp; src.reladdr = NULL; @@ -875,7 +993,8 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) this->variables.push_tail(storage); this->next_temp += type_size(ir->type); - dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, NULL)); + dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, + glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT)); } @@ -890,7 +1009,8 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) assert(index == storage->index + (int)i); } } else { - st_src_reg src(PROGRAM_STATE_VAR, index, NULL); + st_src_reg src(PROGRAM_STATE_VAR, index, + glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT); src.swizzle = slots[i].swizzle; emit(ir, TGSI_OPCODE_MOV, dst, src); /* even a float takes up a whole vec4 reg in a struct/array. */ @@ -1058,7 +1178,7 @@ glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, if (!reg->reladdr) return; - emit(ir, TGSI_OPCODE_ARL, address_reg, *reg->reladdr); + emit_arl(ir, address_reg, *reg->reladdr); if (*num_reladdr != 1) { st_src_reg temp = get_temp(glsl_type::vec4_type); @@ -1131,13 +1251,19 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) switch (ir->operation) { case ir_unop_logic_not: - emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_float(0.0)); + emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0)); break; case ir_unop_neg: - op[0].negate = ~op[0].negate; - result_src = op[0]; + assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT); + if (result_dst.type == GLSL_TYPE_INT) + emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); + else { + op[0].negate = ~op[0].negate; + result_src = op[0]; + } break; case ir_unop_abs: + assert(result_dst.type == GLSL_TYPE_FLOAT); emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]); break; case ir_unop_sign: @@ -1200,9 +1326,16 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); break; case ir_binop_div: - assert(!"not reached: should be handled by ir_div_to_mul_rcp"); + if (result_dst.type == GLSL_TYPE_FLOAT) + assert(!"not reached: should be handled by ir_div_to_mul_rcp"); + else + emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); + break; case ir_binop_mod: - assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); + if (result_dst.type == GLSL_TYPE_FLOAT) + assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); + else + emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]); break; case ir_binop_less: @@ -1227,7 +1360,10 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) /* "==" operator producing a scalar boolean. */ if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { - st_src_reg temp = get_temp(glsl_type::vec4_type); + st_src_reg temp = get_temp(glsl_version >= 130 ? + glsl_type::get_vec4_type(ir->operands[0]->type->base_type) : + glsl_type::vec4_type); + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); emit_dp(ir, result_dst, temp, temp, vector_elements); emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0)); @@ -1239,7 +1375,10 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) /* "!=" operator producing a scalar boolean. */ if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { - st_src_reg temp = get_temp(glsl_type::vec4_type); + st_src_reg temp = get_temp(glsl_version >= 130 ? + glsl_type::get_vec4_type(ir->operands[0]->type->base_type) : + glsl_type::vec4_type); + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); emit_dp(ir, result_dst, temp, temp, vector_elements); emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0)); @@ -1291,17 +1430,24 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; case ir_unop_i2f: case ir_unop_b2f: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]); + break; + } case ir_unop_b2i: - /* Mesa IR lacks types, ints are stored as truncated floats. */ + /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). */ result_src = op[0]; break; case ir_unop_f2i: - emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); + if (glsl_version >= 130) + emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]); + else + emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); break; case ir_unop_f2b: case ir_unop_i2b: - emit(ir, TGSI_OPCODE_SNE, result_dst, - op[0], st_src_reg_for_float(0.0)); + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], + st_src_reg_for_type(result_dst.type, 0)); break; case ir_unop_trunc: emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); @@ -1329,12 +1475,40 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) break; case ir_unop_bit_not: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); + break; + } case ir_unop_u2f: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]); + break; + } case ir_binop_lshift: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]); + break; + } case ir_binop_rshift: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]); + break; + } case ir_binop_bit_and: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_AND, result_dst, op[0]); + break; + } case ir_binop_bit_xor: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]); + break; + } case ir_binop_bit_or: + if (glsl_version >= 130) { + emit(ir, TGSI_OPCODE_OR, result_dst, op[0]); + break; + } case ir_unop_round_even: assert(!"GLSL 1.30 features unsupported"); break; @@ -1729,7 +1903,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) { st_src_reg src; GLfloat stack_vals[4] = { 0 }; - GLfloat *values = stack_vals; + gl_constant_value *values = (gl_constant_value *) stack_vals; + GLenum gl_type = GL_NONE; unsigned int i; /* Unfortunately, 4 floats is all we can get into @@ -1737,7 +1912,6 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) * aggregate constant and move each constant value into it. If we * get lucky, copy propagation will eliminate the extra moves. */ - if (ir->type->base_type == GLSL_TYPE_STRUCT) { st_src_reg temp_base = get_temp(ir->type); st_dst_reg temp = st_dst_reg(temp_base); @@ -1789,13 +1963,13 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) for (i = 0; i < ir->type->matrix_columns; i++) { assert(ir->type->base_type == GLSL_TYPE_FLOAT); - values = &ir->value.f[i * ir->type->vector_elements]; + values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; - src = st_src_reg(PROGRAM_CONSTANT, -1, NULL); + src = st_src_reg(PROGRAM_CONSTANT, -1, ir->type->base_type); src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - (gl_constant_value *) values, - ir->type->vector_elements, - &src.swizzle); + values, + ir->type->vector_elements, + &src.swizzle); emit(ir, TGSI_OPCODE_MOV, mat_column, src); mat_column.index++; @@ -1808,21 +1982,36 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) src.file = PROGRAM_CONSTANT; switch (ir->type->base_type) { case GLSL_TYPE_FLOAT: - values = &ir->value.f[0]; + gl_type = GL_FLOAT; + for (i = 0; i < ir->type->vector_elements; i++) { + values[i].f = ir->value.f[i]; + } break; case GLSL_TYPE_UINT: + gl_type = glsl_version >= 130 ? GL_UNSIGNED_INT : GL_FLOAT; for (i = 0; i < ir->type->vector_elements; i++) { - values[i] = ir->value.u[i]; + if (glsl_version >= 130) + values[i].u = ir->value.u[i]; + else + values[i].f = ir->value.u[i]; } break; case GLSL_TYPE_INT: + gl_type = glsl_version >= 130 ? GL_INT : GL_FLOAT; for (i = 0; i < ir->type->vector_elements; i++) { - values[i] = ir->value.i[i]; + if (glsl_version >= 130) + values[i].i = ir->value.i[i]; + else + values[i].f = ir->value.i[i]; } break; case GLSL_TYPE_BOOL: + gl_type = glsl_version >= 130 ? GL_BOOL : GL_FLOAT; for (i = 0; i < ir->type->vector_elements; i++) { - values[i] = ir->value.b[i]; + if (glsl_version >= 130) + values[i].b = ir->value.b[i]; + else + values[i].f = ir->value.b[i]; } break; default: @@ -1830,9 +2019,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) } this->result = st_src_reg(PROGRAM_CONSTANT, -1, ir->type); - this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters, - (gl_constant_value *) values, - ir->type->vector_elements, + this->result.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, + values, ir->type->vector_elements, gl_type, &this->result.swizzle); } @@ -2535,6 +2723,7 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) { GLuint i; GLint outputMap[VERT_RESULT_MAX]; + GLint outputTypes[VERT_RESULT_MAX]; GLuint numVaryingReads = 0; GLboolean usedTemps[MAX_PROGRAM_TEMPS]; GLuint firstTemp = 0; @@ -2562,6 +2751,7 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) outputMap[var] = _mesa_find_free_register(usedTemps, MAX_PROGRAM_TEMPS, firstTemp); + outputTypes[var] = inst->src[j].type; firstTemp = outputMap[var] + 1; } inst->src[j].file = PROGRAM_TEMPORARY; @@ -2587,8 +2777,8 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) for (i = 0; i < VERT_RESULT_MAX; i++) { if (outputMap[i] >= 0) { /* MOV VAR[i], TEMP[tmp]; */ - st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i]); - st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW); + st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i], outputTypes[i]); + st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW, outputTypes[i]); dst.index = i; this->emit(NULL, TGSI_OPCODE_MOV, dst, src); } @@ -3762,10 +3952,33 @@ st_translate_program( if (program->indirect_addr_consts) t->constants[i] = ureg_DECL_constant( ureg, i ); else - t->constants[i] = - ureg_DECL_immediate( ureg, - (GLfloat *) proginfo->Parameters->ParameterValues[i], - 4 ); + switch(proginfo->Parameters->Parameters[i].DataType) + { + case GL_FLOAT: + case GL_FLOAT_VEC2: + case GL_FLOAT_VEC3: + case GL_FLOAT_VEC4: + t->constants[i] = ureg_DECL_immediate(ureg, (float *)proginfo->Parameters->ParameterValues[i], 4); + break; + case GL_INT: + case GL_INT_VEC2: + case GL_INT_VEC3: + case GL_INT_VEC4: + t->constants[i] = ureg_DECL_immediate_int(ureg, (int *)proginfo->Parameters->ParameterValues[i], 4); + break; + case GL_UNSIGNED_INT: + case GL_UNSIGNED_INT_VEC2: + case GL_UNSIGNED_INT_VEC3: + case GL_UNSIGNED_INT_VEC4: + case GL_BOOL: + case GL_BOOL_VEC2: + case GL_BOOL_VEC3: + case GL_BOOL_VEC4: + t->constants[i] = ureg_DECL_immediate_uint(ureg, (unsigned *)proginfo->Parameters->ParameterValues[i], 4); + break; + default: + assert(!"should not get here"); + } break; default: break; @@ -3874,6 +4087,7 @@ get_mesa_program(struct gl_context *ctx, v->prog = prog; v->shader_program = shader_program; v->options = options; + v->glsl_version = ctx->Const.GLSLVersion; add_uniforms_to_parameters_list(shader_program, shader, prog); -- cgit v1.2.3 From b2c067e3075414703a7ebad439d4290c27cab46a Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Tue, 14 Jun 2011 17:38:14 -0500 Subject: glsl-to-tgsi: fix piglit tests This commit fixes all of the piglit tests regressed by "mesa, glsl_to_tgsi: add native support for integers in shaders" on softpipe. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 3f5c0c60226..49613fccda7 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -886,7 +886,7 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type) int swizzle[4]; int i; - src.type = type->base_type; + src.type = glsl_version >= 130 ? type->base_type : GLSL_TYPE_FLOAT; src.file = PROGRAM_TEMPORARY; src.index = next_temp; src.reladdr = NULL; @@ -1632,6 +1632,8 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) } this->result = st_src_reg(entry->file, entry->index, var->type); + if (glsl_version <= 120) + this->result.type = GLSL_TYPE_FLOAT; } void @@ -1966,10 +1968,11 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; src = st_src_reg(PROGRAM_CONSTANT, -1, ir->type->base_type); - src.index = _mesa_add_unnamed_constant(this->prog->Parameters, - values, - ir->type->vector_elements, - &src.swizzle); + src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, + values, + ir->type->vector_elements, + GL_FLOAT, + &src.swizzle); emit(ir, TGSI_OPCODE_MOV, mat_column, src); mat_column.index++; @@ -4142,15 +4145,14 @@ get_mesa_program(struct gl_context *ctx, if (target == GL_VERTEX_PROGRAM_ARB) v->remove_output_reads(PROGRAM_VARYING); - /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ - v->copy_propagate(); - - /* FIXME: These passes to optimize temporary registers don't work when there + /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. + * FIXME: These passes to optimize temporary registers don't work when there * is indirect addressing of the temporary register space. We need proper * array support so that we don't have to give up these passes in every * shader that uses arrays. */ if (!v->indirect_addr_temps) { + v->copy_propagate(); v->merge_registers(); v->eliminate_dead_code(); v->renumber_registers(); -- cgit v1.2.3 From bf1cee9f24022e3da96d84fdc6baaa050d3eadf1 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Tue, 14 Jun 2011 18:17:40 -0500 Subject: glsl_to_tgsi: finish some loose ends --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 46 +++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 10 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 49613fccda7..438f21483c7 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -2200,7 +2200,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) ir->lod_info.grad.dPdy->accept(this); dy = this->result; break; - case ir_txf: // TODO: use TGSI_OPCODE_TXF here + case ir_txf: /* TODO: use TGSI_OPCODE_TXF here */ assert(!"GLSL 1.30 features unsupported"); break; } @@ -3731,6 +3731,37 @@ emit_wpos(struct st_context *st, emit_wpos_inversion(t, program, invert); } +/** + * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back. + * TGSI uses +1 for front, -1 for back. + * This function converts the TGSI value to the GL value. Simply clamping/ + * saturating the value to [0,1] does the job. + */ +static void +emit_face_var(struct st_translate *t) +{ + struct ureg_program *ureg = t->ureg; + struct ureg_dst face_temp = ureg_DECL_temporary(ureg); + struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]]; + + /* MOV_SAT face_temp, input[face] */ + face_temp = ureg_saturate(face_temp); + ureg_MOV(ureg, face_temp, face_input); + + /* Use face_temp as face input from here on: */ + t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp); +} + +static void +emit_edgeflags(struct st_translate *t) +{ + struct ureg_program *ureg = t->ureg; + struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]]; + struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]]; + + ureg_MOV(ureg, edge_dst, edge_src); +} + /** * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. * \param program the program to translate @@ -3800,15 +3831,11 @@ st_translate_program( /* Must do this after setting up t->inputs, and before * emitting constant references, below: */ - printf("FRAG_BIT_WPOS\n"); emit_wpos(st_context(ctx), t, proginfo, ureg); } - if (proginfo->InputsRead & FRAG_BIT_FACE) { - // TODO: uncomment - printf("FRAG_BIT_FACE\n"); - //emit_face_var( t, program ); - } + if (proginfo->InputsRead & FRAG_BIT_FACE) + emit_face_var(t); /* * Declare output attributes. @@ -3875,7 +3902,6 @@ st_translate_program( /* XXX: note we are modifying the incoming shader here! Need to * do this before emitting the constant decls below, or this * will be missed. - * XXX: depends on "Parameters" field specific to Mesa IR */ unsigned pointSizeClampConst = _mesa_add_state_reference(proginfo->Parameters, @@ -3887,8 +3913,8 @@ st_translate_program( t->outputs[i] = psizregtemp; } } - /*if (passthrough_edgeflags) - emit_edgeflags( t, program ); */ // TODO: uncomment + if (passthrough_edgeflags) + emit_edgeflags(t); } /* Declare address register. -- cgit v1.2.3 From b30bbd7436bdb9727d3766ba9c07abd610e6dda8 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Wed, 15 Jun 2011 14:45:03 -0500 Subject: glsl_to_tgsi: silence compiler warning --- src/mesa/state_tracker/st_mesa_to_tgsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index 75842286ba8..656c985d78f 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -1207,7 +1207,7 @@ st_translate_mesa_program( else t->constants[i] = ureg_DECL_immediate( ureg, - program->Parameters->ParameterValues[i], + (const float*) program->Parameters->ParameterValues[i], 4 ); break; default: -- cgit v1.2.3 From 1141c3f4c4014e3c2834db65b96a3ba7cc78744a Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Wed, 15 Jun 2011 17:31:51 -0500 Subject: glsl: remove glsl_type::get_vec4_type() Thanks to Kenneth Graunke for pointing out that glsl_type::get_instance(base, 4, 1) is the same as glsl_type::get_vec4_type(base). The function was only used in st_glsl_to_tgsi, and this commit replaces that usage with get_instance. --- src/glsl/glsl_types.h | 15 --------------- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 4 ++-- 2 files changed, 2 insertions(+), 17 deletions(-) (limited to 'src/mesa') diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h index eb9d501858a..1b069df74fe 100644 --- a/src/glsl/glsl_types.h +++ b/src/glsl/glsl_types.h @@ -165,21 +165,6 @@ struct glsl_type { static const glsl_type *const mat4x3_type; static const glsl_type *const mat4_type; /*@}*/ - - /** - * Get the built-in instance of the vec4 type for a specific base type - */ - static const glsl_type *get_vec4_type(glsl_base_type base_type) - { - if (base_type == GLSL_TYPE_FLOAT) - return vec4_type; - else if (base_type == GLSL_TYPE_INT) - return ivec4_type; - else if (base_type == GLSL_TYPE_UINT) - return uvec4_type; - else - return NULL; - } /** diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 438f21483c7..5fedf263090 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1361,7 +1361,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { st_src_reg temp = get_temp(glsl_version >= 130 ? - glsl_type::get_vec4_type(ir->operands[0]->type->base_type) : + glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : glsl_type::vec4_type); assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); @@ -1376,7 +1376,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { st_src_reg temp = get_temp(glsl_version >= 130 ? - glsl_type::get_vec4_type(ir->operands[0]->type->base_type) : + glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : glsl_type::vec4_type); assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); -- cgit v1.2.3 From 552cc48fca9b932fceb3d8fa7f9d0067f46b67c2 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Thu, 16 Jun 2011 13:42:57 -0500 Subject: glsl_to_tgsi: fix compile error with g++ 4.6 --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 5fedf263090..6c92441a105 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -389,7 +389,7 @@ public: void *mem_ctx; }; -static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, NULL); +static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); -- cgit v1.2.3 From 29d21417e38aed0f0710d3692df320728aef90b1 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Thu, 16 Jun 2011 18:36:16 -0500 Subject: glsl_to_tgsi: implement simplify_cmp pass needed by r300g --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 95 ++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 6c92441a105..322bfbbf1ab 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -374,6 +374,7 @@ public: bool process_move_condition(ir_rvalue *ir); void remove_output_reads(gl_register_file type); + void simplify_cmp(void); void rename_temp_register(int index, int new_index); int get_first_temp_read(int index); @@ -2788,6 +2789,97 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) } } +/** + * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which + * are read from the given src in this instruction + */ +static int +get_src_arg_mask(st_dst_reg dst, st_src_reg src) +{ + int read_mask = 0, comp; + + /* Now, given the src swizzle and the written channels, find which + * components are actually read + */ + for (comp = 0; comp < 4; ++comp) { + const unsigned coord = GET_SWZ(src.swizzle, comp); + ASSERT(coord < 4); + if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W) + read_mask |= 1 << coord; + } + + return read_mask; +} + +/** + * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP + * instruction is the first instruction to write to register T0. There are + * several lowering passes done in GLSL IR (e.g. branches and + * relative addressing) that create a large number of conditional assignments + * that ir_to_mesa converts to CMP instructions like the one mentioned above. + * + * Here is why this conversion is safe: + * CMP T0, T1 T2 T0 can be expanded to: + * if (T1 < 0.0) + * MOV T0, T2; + * else + * MOV T0, T0; + * + * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same + * as the original program. If (T1 < 0.0) evaluates to false, executing + * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized. + * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2 + * because any instruction that was going to read from T0 after this was going + * to read a garbage value anyway. + */ +void +glsl_to_tgsi_visitor::simplify_cmp(void) +{ + unsigned tempWrites[MAX_PROGRAM_TEMPS]; + unsigned outputWrites[MAX_PROGRAM_OUTPUTS]; + + memset(tempWrites, 0, sizeof(tempWrites)); + memset(outputWrites, 0, sizeof(outputWrites)); + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + unsigned prevWriteMask = 0; + + /* Give up if we encounter relative addressing or flow control. */ + if (inst->dst.reladdr || + tgsi_get_opcode_info(inst->op)->is_branch || + inst->op == TGSI_OPCODE_BGNSUB || + inst->op == TGSI_OPCODE_CONT || + inst->op == TGSI_OPCODE_END || + inst->op == TGSI_OPCODE_ENDSUB || + inst->op == TGSI_OPCODE_RET) { + return; + } + + if (inst->dst.file == PROGRAM_OUTPUT) { + assert(inst->dst.index < MAX_PROGRAM_OUTPUTS); + prevWriteMask = outputWrites[inst->dst.index]; + outputWrites[inst->dst.index] |= inst->dst.writemask; + } else if (inst->dst.file == PROGRAM_TEMPORARY) { + assert(inst->dst.index < MAX_PROGRAM_TEMPS); + prevWriteMask = tempWrites[inst->dst.index]; + tempWrites[inst->dst.index] |= inst->dst.writemask; + } + + /* For a CMP to be considered a conditional write, the destination + * register and source register two must be the same. */ + if (inst->op == TGSI_OPCODE_CMP + && !(inst->dst.writemask & prevWriteMask) + && inst->src[2].file == inst->dst.file + && inst->src[2].index == inst->dst.index + && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) { + + inst->op = TGSI_OPCODE_MOV; + inst->src[0] = inst->src[1]; + } + } +} + /* Replaces all references to a temporary register index with another index. */ void glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) @@ -4170,6 +4262,9 @@ get_mesa_program(struct gl_context *ctx, v->remove_output_reads(PROGRAM_OUTPUT); if (target == GL_VERTEX_PROGRAM_ARB) v->remove_output_reads(PROGRAM_VARYING); + + /* Perform the simplify_cmp optimization, which is required by r300g. */ + v->simplify_cmp(); /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. * FIXME: These passes to optimize temporary registers don't work when there -- cgit v1.2.3 From 8c50f18b29637470539d05ccc32b0cae0092aeac Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Tue, 21 Jun 2011 21:52:19 +0100 Subject: glsl_to_tgsi: execute merge_registers() after eliminate_dead_code() Fixes a regression unintentionally introduced by "glsl_to_tgsi: fix shaders with indirect addressing of temps" that caused missing leaves in 3dmark01 test 4 (Nature) and missing/displaced textures on human models in Counter-Strike: Source. Signed-off-by: Emil Velikov Signed-off-by: Bryan Cain --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 322bfbbf1ab..abeb44a4083 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -4274,8 +4274,8 @@ get_mesa_program(struct gl_context *ctx, */ if (!v->indirect_addr_temps) { v->copy_propagate(); - v->merge_registers(); v->eliminate_dead_code(); + v->merge_registers(); v->renumber_registers(); } -- cgit v1.2.3 From 8b881ad1c3d9dd3c96afbdbb608a7240d40e9c92 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Thu, 23 Jun 2011 19:35:36 -0500 Subject: glsl_to_tgsi: use swizzle_for_size for src reg in conditional moves This prevents the copy propagation pass from being confused by undefined channels and thus missing optimization opportunities. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index abeb44a4083..6d76686ab5d 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1882,10 +1882,13 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) st_src_reg condition = this->result; for (i = 0; i < type_size(ir->lhs->type); i++) { + st_src_reg l_src = st_src_reg(l); + l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements); + if (switch_order) { - emit(ir, TGSI_OPCODE_CMP, l, condition, st_src_reg(l), r); + emit(ir, TGSI_OPCODE_CMP, l, condition, l_src, r); } else { - emit(ir, TGSI_OPCODE_CMP, l, condition, r, st_src_reg(l)); + emit(ir, TGSI_OPCODE_CMP, l, condition, r, l_src); } l.index++; -- cgit v1.2.3 From 7ec7dd4fb6ae6c8aa29988754476e1212eb986ef Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Thu, 23 Jun 2011 19:53:37 -0500 Subject: glsl_to_tgsi: remove handling of XPD opcode in compile_tgsi_instruction() The opcode is never emitted by the glsl_to_tgsi_visitor, so its special case in compile_tgsi_instruction() was dead code. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 8 -------- 1 file changed, 8 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 6d76686ab5d..721ba28d61f 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3653,14 +3653,6 @@ compile_tgsi_instruction(struct st_translate *t, src, num_src ); break; - case TGSI_OPCODE_XPD: - dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ ); - ureg_insn( ureg, - inst->op, - dst, num_dst, - src, num_src ); - break; - default: ureg_insn( ureg, inst->op, -- cgit v1.2.3 From 41472f7809dcff114223b8fadc5b97baff6060a9 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 24 Jun 2011 18:45:04 -0500 Subject: glsl_to_tgsi: add a better, more advanced dead code elimination pass --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 140 +++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 721ba28d61f..d47364fabb6 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -208,6 +208,7 @@ public: int sampler; /**< sampler index */ int tex_target; /**< One of TEXTURE_*_INDEX */ GLboolean tex_shadow; + int dead_mask; /**< Used in dead code elimination */ class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */ }; @@ -384,6 +385,7 @@ public: void copy_propagate(void); void eliminate_dead_code(void); + int eliminate_dead_code_advanced(void); void merge_registers(void); void renumber_registers(void); @@ -480,6 +482,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, inst->src[1] = src1; inst->src[2] = src2; inst->ir = ir; + inst->dead_mask = 0; inst->function = NULL; @@ -3257,6 +3260,142 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void) } } +/* + * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead + * code elimination. This is less primitive than eliminate_dead_code(), as it + * is per-channel and can detect consecutive writes without a read between them + * as dead code. However, there is some dead code that can be eliminated by + * eliminate_dead_code() but not this function - for example, this function + * cannot eliminate an instruction writing to a register that is never read and + * is the only instruction writing to that register. + * + * The glsl_to_tgsi_visitor lazily produces code assuming that this pass + * will occur. + */ +int +glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) +{ + glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx, + glsl_to_tgsi_instruction *, + this->next_temp * 4); + int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); + int level = 0; + int removed = 0; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + assert(inst->dst.file != PROGRAM_TEMPORARY + || inst->dst.index < this->next_temp); + + switch (inst->op) { + case TGSI_OPCODE_BGNLOOP: + case TGSI_OPCODE_ENDLOOP: + /* End of a basic block, clear the write array entirely. + * FIXME: This keeps us from killing dead code when the writes are + * on either side of a loop, even when the register isn't touched + * inside the loop. + */ + memset(writes, 0, sizeof(*writes) * this->next_temp * 4); + break; + + case TGSI_OPCODE_IF: + ++level; + break; + + case TGSI_OPCODE_ENDIF: + --level; + break; + + case TGSI_OPCODE_ELSE: + /* Clear all channels written inside the preceding if block from the + * write array, but leave those that were not touched. + * + * FIXME: This destroys opportunities to remove dead code inside of + * IF blocks that are followed by an ELSE block. + */ + for (int r = 0; r < this->next_temp; r++) { + for (int c = 0; c < 4; c++) { + if (!writes[4 * r + c]) + continue; + + if (write_level[4 * r + c] >= level) + writes[4 * r + c] = NULL; + } + } + break; + + default: + /* Continuing the block, clear any channels from the write array that + * are read by this instruction. + */ + for (int i = 0; i < 4; i++) { + if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){ + /* Any temporary might be read, so no dead code elimination + * across this instruction. + */ + memset(writes, 0, sizeof(*writes) * this->next_temp * 4); + } else if (inst->src[i].file == PROGRAM_TEMPORARY) { + /* Clear where it's used as src. */ + int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0); + src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1); + src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2); + src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3); + + for (int c = 0; c < 4; c++) { + if (src_chans & (1 << c)) { + writes[4 * inst->src[i].index + c] = NULL; + } + } + } + } + break; + } + + /* If this instruction writes to a temporary, add it to the write array. + * If there is already an instruction in the write array for one or more + * of the channels, flag that channel write as dead. + */ + if (inst->dst.file == PROGRAM_TEMPORARY && + !inst->dst.reladdr && + !inst->saturate) { + for (int c = 0; c < 4; c++) { + if (inst->dst.writemask & (1 << c)) { + if (writes[4 * inst->dst.index + c]) { + if (write_level[4 * inst->dst.index + c] < level) + continue; + else + writes[4 * inst->dst.index + c]->dead_mask |= (1 << c); + } + writes[4 * inst->dst.index + c] = inst; + write_level[4 * inst->dst.index + c] = level; + } + } + } + } + + /* Now actually remove the instructions that are completely dead and update + * the writemask of other instructions with dead channels. + */ + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + + if (!inst->dead_mask || !inst->dst.writemask) + continue; + else if (inst->dead_mask == inst->dst.writemask) { + iter.remove(); + delete inst; + removed++; + } else + inst->dst.writemask &= ~(inst->dead_mask); + } + + ralloc_free(write_level); + ralloc_free(writes); + + return removed; +} + /* Merges temporary registers together where possible to reduce the number of * registers needed to run a program. * @@ -4269,6 +4408,7 @@ get_mesa_program(struct gl_context *ctx, */ if (!v->indirect_addr_temps) { v->copy_propagate(); + while (v->eliminate_dead_code_advanced()); v->eliminate_dead_code(); v->merge_registers(); v->renumber_registers(); -- cgit v1.2.3 From 194732fd7299481dd57815f46a594d155260ce17 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 24 Jun 2011 20:37:53 -0500 Subject: glsl_to_tgsi: use a more specific condition for gl_FragDepth hack in generating assignments This reduces the number of instructions in the fragment shader of glsl-fs-atan-2 from 174 to 146 with EmitNoIfs enabled. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index d47364fabb6..5f22f7091d6 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1841,7 +1841,8 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) if (ir->write_mask == 0) { assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); l.writemask = WRITEMASK_XYZW; - } else if (ir->lhs->type->is_scalar()) { + } else if (ir->lhs->type->is_scalar() && + ir->lhs->variable_referenced()->mode == ir_var_out) { /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the * FINISHME: W component of fragment shader output zero, work correctly. */ @@ -1851,7 +1852,6 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) int first_enabled_chan = 0; int rhs_chan = 0; - assert(ir->lhs->type->is_vector()); l.writemask = ir->write_mask; for (int i = 0; i < 4; i++) { -- cgit v1.2.3 From 3bd06e5b82b438041f50e2469be9ea68bf3b4300 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 24 Jun 2011 22:32:26 -0500 Subject: glsl_to_tgsi: use the correct writemask in try_emit_mad() and try_emit_sat() --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 5f22f7091d6..13573fc1b94 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1133,6 +1133,7 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) { int nonmul_operand = 1 - mul_operand; st_src_reg a, b, c; + st_dst_reg result_dst; ir_expression *expr = ir->operands[mul_operand]->as_expression(); if (!expr || expr->operation != ir_binop_mul) @@ -1146,7 +1147,9 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) c = this->result; this->result = get_temp(ir->type); - emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, c); + result_dst = st_dst_reg(this->result); + result_dst.writemask = (1 << ir->type->vector_elements) - 1; + emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c); return true; } @@ -1168,8 +1171,10 @@ glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) st_src_reg src = this->result; this->result = get_temp(ir->type); + st_dst_reg result_dst = st_dst_reg(this->result); + result_dst.writemask = (1 << ir->type->vector_elements) - 1; glsl_to_tgsi_instruction *inst; - inst = emit(ir, TGSI_OPCODE_MOV, st_dst_reg(this->result), src); + inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src); inst->saturate = true; return true; -- cgit v1.2.3 From 71cbc9e3c4c9ef6090ee31e87601ae64af26321e Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 24 Jun 2011 23:17:30 -0500 Subject: glsl_to_tgsi: improve eliminate_dead_code_advanced() --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 13573fc1b94..15a1a3c51c4 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3379,6 +3379,15 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) } } + /* Anything still in the write array at this point is dead code. */ + for (int r = 0; r < this->next_temp; r++) { + for (int c = 0; c < 4; c++) { + glsl_to_tgsi_instruction *inst = writes[4 * r + c]; + if (inst) + inst->dead_mask |= (1 << c); + } + } + /* Now actually remove the instructions that are completely dead and update * the writemask of other instructions with dead channels. */ -- cgit v1.2.3 From f00406b68c07f97b11e873c04917cafdb1a67462 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Mon, 27 Jun 2011 17:11:07 -0500 Subject: glsl_to_tgsi: improve assignment handling This is a hack, but it's better than emitting an unnecessary MOV instruction and hoping the optimization passes clean it up. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 15a1a3c51c4..e38617ae9fe 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -695,13 +695,13 @@ glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, st_src_reg tmp = get_temp(glsl_type::float_type); if (src0.type == GLSL_TYPE_INT) - emit(ir, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0); + emit(NULL, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0); else if (src0.type == GLSL_TYPE_UINT) - emit(ir, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0); + emit(NULL, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0); else tmp = src0; - emit(ir, TGSI_OPCODE_ARL, dst, tmp); + emit(NULL, TGSI_OPCODE_ARL, dst, tmp); } /** @@ -1902,6 +1902,17 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) l.index++; r.index++; } + } else if (ir->rhs->as_expression() && + this->instructions.get_tail() && + ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && + type_size(ir->lhs->type) == 1) { + /* To avoid emitting an extra MOV when assigning an expression to a + * variable, change the destination register of the last instruction + * emitted as part of the expression to the assignment variable. + */ + glsl_to_tgsi_instruction *inst; + inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); + inst->dst = l; } else { for (i = 0; i < type_size(ir->lhs->type); i++) { emit(ir, TGSI_OPCODE_MOV, l, r); -- cgit v1.2.3 From 4c8b6a286887628e5fc35306189a4c4a83c482ea Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Mon, 27 Jun 2011 17:25:50 -0500 Subject: glsl_to_tgsi: fix mistake in new dead code elimination pass The conditions of IF opcodes were not being counted as reads, which sometimes led to the condition register being wrong or undefined. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index e38617ae9fe..f87c64f62c7 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3315,10 +3315,6 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) memset(writes, 0, sizeof(*writes) * this->next_temp * 4); break; - case TGSI_OPCODE_IF: - ++level; - break; - case TGSI_OPCODE_ENDIF: --level; break; @@ -3341,6 +3337,10 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) } break; + case TGSI_OPCODE_IF: + ++level; + /* fallthrough to default case to mark the condition as read */ + default: /* Continuing the block, clear any channels from the write array that * are read by this instruction. -- cgit v1.2.3 From 9c2810103d107d1e5ef8bd8b57819d12264f664a Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Mon, 27 Jun 2011 17:40:10 -0500 Subject: glsl_to_tgsi: always run copy_propagate() and eliminate_dead_code_advanced() These two passes are written to handle indirect addressing properly. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index f87c64f62c7..e7d0af83a6b 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -4422,18 +4422,17 @@ get_mesa_program(struct gl_context *ctx, if (target == GL_VERTEX_PROGRAM_ARB) v->remove_output_reads(PROGRAM_VARYING); - /* Perform the simplify_cmp optimization, which is required by r300g. */ + /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ v->simplify_cmp(); + v->copy_propagate(); + while (v->eliminate_dead_code_advanced()); - /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. - * FIXME: These passes to optimize temporary registers don't work when there + /* FIXME: These passes to optimize temporary registers don't work when there * is indirect addressing of the temporary register space. We need proper * array support so that we don't have to give up these passes in every * shader that uses arrays. */ if (!v->indirect_addr_temps) { - v->copy_propagate(); - while (v->eliminate_dead_code_advanced()); v->eliminate_dead_code(); v->merge_registers(); v->renumber_registers(); -- cgit v1.2.3 From 54db6e618e43abbd69b59e0a03e2b6ec83d3120f Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Thu, 30 Jun 2011 13:42:37 -0500 Subject: r200, r600c, i965: fix build --- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- src/mesa/drivers/dri/i965/brw_vs_emit.c | 2 +- src/mesa/drivers/dri/r200/r200_vertprog.c | 8 ++++---- src/mesa/drivers/dri/r600/evergreen_fragprog.c | 8 ++++---- src/mesa/drivers/dri/r600/evergreen_vertprog.c | 16 ++++++++-------- src/mesa/drivers/dri/r600/r700_fragprog.c | 8 ++++---- src/mesa/drivers/dri/r600/r700_vertprog.c | 16 ++++++++-------- 7 files changed, 30 insertions(+), 30 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 7c73a8fbf02..31f76f8c939 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -605,7 +605,7 @@ fs_visitor::setup_paramvalues_refs() /* Set up the pointers to ParamValues now that that array is finalized. */ for (unsigned int i = 0; i < c->prog_data.nr_params; i++) { c->prog_data.param[i] = - fp->Base.Parameters->ParameterValues[this->param_index[i]] + + (const float *)fp->Base.Parameters->ParameterValues[this->param_index[i]] + this->param_offset[i]; } } diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index b6c9e5a1ceb..2fa04a15a34 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1359,7 +1359,7 @@ get_src_reg( struct brw_vs_compile *c, if (component >= 0) { params = c->vp->program.Base.Parameters; - f = params->ParameterValues[src->Index][component]; + f = params->ParameterValues[src->Index][component].f; if (src->Abs) f = fabs(f); diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c index 63e03b0e0c7..cf44d7f459c 100644 --- a/src/mesa/drivers/dri/r200/r200_vertprog.c +++ b/src/mesa/drivers/dri/r200/r200_vertprog.c @@ -126,10 +126,10 @@ static GLboolean r200VertexProgUpdateParams(struct gl_context *ctx, struct r200_ case PROGRAM_NAMED_PARAM: //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name); case PROGRAM_CONSTANT: - *fcmd++ = paramList->ParameterValues[pi][0]; - *fcmd++ = paramList->ParameterValues[pi][1]; - *fcmd++ = paramList->ParameterValues[pi][2]; - *fcmd++ = paramList->ParameterValues[pi][3]; + *fcmd++ = paramList->ParameterValues[pi][0].f; + *fcmd++ = paramList->ParameterValues[pi][1].f; + *fcmd++ = paramList->ParameterValues[pi][2].f; + *fcmd++ = paramList->ParameterValues[pi][3].f; break; default: _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__); diff --git a/src/mesa/drivers/dri/r600/evergreen_fragprog.c b/src/mesa/drivers/dri/r600/evergreen_fragprog.c index e527c379b62..cc584ca2b35 100644 --- a/src/mesa/drivers/dri/r600/evergreen_fragprog.c +++ b/src/mesa/drivers/dri/r600/evergreen_fragprog.c @@ -752,10 +752,10 @@ GLboolean evergreenSetupFPconstants(struct gl_context * ctx) unNumParamData = paramList->NumParameters; for(ui=0; uips.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - evergreen->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - evergreen->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - evergreen->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + evergreen->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f; + evergreen->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f; + evergreen->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f; + evergreen->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f; } /* alloc multiple of 16 constants */ diff --git a/src/mesa/drivers/dri/r600/evergreen_vertprog.c b/src/mesa/drivers/dri/r600/evergreen_vertprog.c index 018869b9996..117916ac78f 100644 --- a/src/mesa/drivers/dri/r600/evergreen_vertprog.c +++ b/src/mesa/drivers/dri/r600/evergreen_vertprog.c @@ -684,17 +684,17 @@ GLboolean evergreenSetupVPconstants(struct gl_context * ctx) for(ui=0; uiParameters[ui].Type == PROGRAM_UNIFORM) { - evergreen->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0]; - evergreen->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1]; - evergreen->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2]; - evergreen->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3]; + evergreen->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0].f; + evergreen->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1].f; + evergreen->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2].f; + evergreen->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3].f; } else { - evergreen->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - evergreen->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - evergreen->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - evergreen->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + evergreen->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f; + evergreen->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f; + evergreen->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f; + evergreen->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f; } } diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index 40494cd6af0..6f9834e68fe 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -778,10 +778,10 @@ GLboolean r700SetupFragmentProgram(struct gl_context * ctx) unNumParamData = paramList->NumParameters; for(ui=0; uips.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f; + r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f; + r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f; + r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f; } /* Load fp constants to gpu */ diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index 7d4be9180a0..b1e2742b27d 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -720,17 +720,17 @@ GLboolean r700SetupVertexProgram(struct gl_context * ctx) for(ui=0; uiParameters[ui].Type == PROGRAM_UNIFORM) { - r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0]; - r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1]; - r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2]; - r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3]; + r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0].f; + r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1].f; + r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2].f; + r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3].f; } else { - r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f; + r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f; + r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f; + r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f; } } -- cgit v1.2.3 From 33e0c47b05c8fbae9d7af57ba65b612825b5db60 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Mon, 4 Jul 2011 08:44:12 -0500 Subject: glsl_to_tgsi: replace MAX_PROGRAM_TEMPS (256) with MAX_TEMPS (4096) --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index e7d0af83a6b..d7afc22c048 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -77,6 +77,8 @@ extern "C" { (1 << PROGRAM_CONSTANT) | \ (1 << PROGRAM_UNIFORM)) +#define MAX_TEMPS 4096 + class st_src_reg; class st_dst_reg; @@ -2751,11 +2753,11 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) GLint outputMap[VERT_RESULT_MAX]; GLint outputTypes[VERT_RESULT_MAX]; GLuint numVaryingReads = 0; - GLboolean usedTemps[MAX_PROGRAM_TEMPS]; + GLboolean usedTemps[MAX_TEMPS]; GLuint firstTemp = 0; _mesa_find_used_registers(prog, PROGRAM_TEMPORARY, - usedTemps, MAX_PROGRAM_TEMPS); + usedTemps, MAX_TEMPS); assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT); assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING); @@ -2775,7 +2777,7 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type) if (outputMap[var] == -1) { numVaryingReads++; outputMap[var] = _mesa_find_free_register(usedTemps, - MAX_PROGRAM_TEMPS, + MAX_TEMPS, firstTemp); outputTypes[var] = inst->src[j].type; firstTemp = outputMap[var] + 1; @@ -2857,7 +2859,7 @@ get_src_arg_mask(st_dst_reg dst, st_src_reg src) void glsl_to_tgsi_visitor::simplify_cmp(void) { - unsigned tempWrites[MAX_PROGRAM_TEMPS]; + unsigned tempWrites[MAX_TEMPS]; unsigned outputWrites[MAX_PROGRAM_OUTPUTS]; memset(tempWrites, 0, sizeof(tempWrites)); @@ -2883,7 +2885,7 @@ glsl_to_tgsi_visitor::simplify_cmp(void) prevWriteMask = outputWrites[inst->dst.index]; outputWrites[inst->dst.index] |= inst->dst.writemask; } else if (inst->dst.file == PROGRAM_TEMPORARY) { - assert(inst->dst.index < MAX_PROGRAM_TEMPS); + assert(inst->dst.index < MAX_TEMPS); prevWriteMask = tempWrites[inst->dst.index]; tempWrites[inst->dst.index] |= inst->dst.writemask; } @@ -3504,7 +3506,7 @@ struct label { struct st_translate { struct ureg_program *ureg; - struct ureg_dst temps[MAX_PROGRAM_TEMPS]; + struct ureg_dst temps[MAX_TEMPS]; struct ureg_src *constants; struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; -- cgit v1.2.3 From c0dcab2882a4731dccd363a40c3ebcabc88b9c5d Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 8 Jul 2011 21:12:08 -0500 Subject: st/mesa, glsl_to_tgsi: support glDrawPixels/glCopyPixels with a GLSL fragment shader active Since this was previously implemented using Mesa IR and _mesa_combine_programs, this commit adds a new code path that works with glsl_to_tgsi. --- src/mesa/state_tracker/st_cb_drawpixels.c | 65 +++++++++++++++ src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 126 +++++++++++++++++++++++++++++ src/mesa/state_tracker/st_glsl_to_tgsi.h | 3 + 3 files changed, 194 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 965fbcd1d9e..f4dd2a42847 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -94,6 +94,67 @@ is_passthrough_program(const struct gl_fragment_program *prog) } +/* XXX copied verbatim from st_atom_pixeltransfer.c */ +static struct pipe_resource * +create_color_map_texture(struct gl_context *ctx) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct pipe_resource *pt; + enum pipe_format format; + const uint texSize = 256; /* simple, and usually perfect */ + + /* find an RGBA texture format */ + format = st_choose_format(pipe->screen, GL_RGBA, + PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW); + + /* create texture for color map/table */ + pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0, + texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW); + return pt; +} + + +/** + * Returns a fragment program which implements the current pixel transfer ops. + */ +static struct gl_fragment_program * +get_glsl_pixel_transfer_program(struct st_context *st, + struct st_fragment_program *orig) +{ + int pixelMaps = 0, scaleAndBias = 0; + struct gl_context *ctx = st->ctx; + struct st_fragment_program *fp = (struct st_fragment_program *) + ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); + + if (!fp) + return NULL; + + if (ctx->Pixel.RedBias != 0.0 || ctx->Pixel.RedScale != 1.0 || + ctx->Pixel.GreenBias != 0.0 || ctx->Pixel.GreenScale != 1.0 || + ctx->Pixel.BlueBias != 0.0 || ctx->Pixel.BlueScale != 1.0 || + ctx->Pixel.AlphaBias != 0.0 || ctx->Pixel.AlphaScale != 1.0) { + scaleAndBias = 1; + } + + pixelMaps = ctx->Pixel.MapColorFlag; + + if (pixelMaps) { + /* create the colormap/texture now if not already done */ + if (!st->pixel_xfer.pixelmap_texture) { + st->pixel_xfer.pixelmap_texture = create_color_map_texture(ctx); + st->pixel_xfer.pixelmap_sampler_view = + st_create_texture_sampler_view(st->pipe, + st->pixel_xfer.pixelmap_texture); + } + } + + get_pixel_transfer_visitor(fp, orig->glsl_to_tgsi, + scaleAndBias, pixelMaps); + + return &fp->Base; +} + /** * Make fragment shader for glDraw/CopyPixels. This shader is made @@ -107,11 +168,15 @@ st_make_drawpix_fragment_program(struct st_context *st, struct gl_fragment_program **fpOut) { struct gl_program *newProg; + struct st_fragment_program *stfp = (struct st_fragment_program *) fpIn; if (is_passthrough_program(fpIn)) { newProg = (struct gl_program *) _mesa_clone_fragment_program(st->ctx, &st->pixel_xfer.program->Base); } + else if (stfp->glsl_to_tgsi != NULL) { + newProg = (struct gl_program *) get_glsl_pixel_transfer_program(st, stfp); + } else { #if 0 /* debug */ diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index d7afc22c048..ae0c92f5f13 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3494,6 +3494,132 @@ glsl_to_tgsi_visitor::renumber_registers(void) this->next_temp = new_index; } +/** + * Returns a fragment program which implements the current pixel transfer ops. + * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c. + */ +extern "C" void +get_pixel_transfer_visitor(struct st_fragment_program *fp, + glsl_to_tgsi_visitor *original, + int scale_and_bias, int pixel_maps) +{ + glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); + struct st_context *st = st_context(original->ctx); + struct gl_program *prog = &fp->Base.Base; + struct gl_program_parameter_list *params = _mesa_new_parameter_list(); + st_src_reg coord, src0; + st_dst_reg dst0; + glsl_to_tgsi_instruction *inst; + + /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ + v->ctx = original->ctx; + v->prog = prog; + v->glsl_version = original->glsl_version; + v->options = original->options; + v->next_temp = original->next_temp; + v->num_address_regs = original->num_address_regs; + v->samplers_used = prog->SamplersUsed = original->samplers_used; + v->indirect_addr_temps = original->indirect_addr_temps; + v->indirect_addr_consts = original->indirect_addr_consts; + + /* + * Get initial pixel color from the texture. + * TEX colorTemp, fragment.texcoord[0], texture[0], 2D; + */ + coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); + src0 = v->get_temp(glsl_type::vec4_type); + dst0 = st_dst_reg(src0); + inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); + inst->sampler = 0; + inst->tex_target = TEXTURE_2D_INDEX; + + prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0); + prog->OutputsWritten |= BITFIELD64_BIT(FRAG_RESULT_COLOR); + prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */ + v->samplers_used |= (1 << 0); + + if (scale_and_bias) { + static const gl_state_index scale_state[STATE_LENGTH] = + { STATE_INTERNAL, STATE_PT_SCALE, + (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; + static const gl_state_index bias_state[STATE_LENGTH] = + { STATE_INTERNAL, STATE_PT_BIAS, + (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; + GLint scale_p, bias_p; + st_src_reg scale, bias; + + scale_p = _mesa_add_state_reference(params, scale_state); + bias_p = _mesa_add_state_reference(params, bias_state); + + /* MAD colorTemp, colorTemp, scale, bias; */ + scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT); + bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT); + inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias); + } + + if (pixel_maps) { + st_src_reg temp = v->get_temp(glsl_type::vec4_type); + st_dst_reg temp_dst = st_dst_reg(temp); + + assert(st->pixel_xfer.pixelmap_texture); + + /* With a little effort, we can do four pixel map look-ups with + * two TEX instructions: + */ + + /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */ + temp_dst.writemask = WRITEMASK_XY; /* write R,G */ + inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); + inst->sampler = 1; + inst->tex_target = TEXTURE_2D_INDEX; + + /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */ + src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W); + temp_dst.writemask = WRITEMASK_ZW; /* write B,A */ + inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); + inst->sampler = 1; + inst->tex_target = TEXTURE_2D_INDEX; + + prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */ + v->samplers_used |= (1 << 1); + + /* MOV colorTemp, temp; */ + inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp); + } + + /* Now copy the instructions from the original glsl_to_tgsi_visitor into the + * new visitor. */ + foreach_iter(exec_list_iterator, iter, original->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + st_src_reg src_regs[3]; + + for (int i=0; i<3; i++) { + src_regs[i] = inst->src[i]; + if (src_regs[i].file == PROGRAM_INPUT && + src_regs[i].index == FRAG_ATTRIB_COL0) + { + src_regs[i].file = PROGRAM_TEMPORARY; + src_regs[i].index = src0.index; + } + else if (src_regs[i].file == PROGRAM_INPUT) + prog->InputsRead |= (1 << src_regs[i].index); + else if (src_regs[i].file == PROGRAM_OUTPUT) + prog->OutputsWritten |= BITFIELD64_BIT(src_regs[i].index); + } + + v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); + } + + /* Make modifications to fragment program info. */ + prog->Parameters = _mesa_combine_parameter_lists(params, + original->prog->Parameters); + prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes); + prog->Varying = _mesa_clone_parameter_list(original->prog->Varying); + _mesa_free_parameter_list(params); + count_resources(v, prog); + fp->glsl_to_tgsi = v; +} + /* ------------------------- TGSI conversion stuff -------------------------- */ struct label { unsigned branch_target; diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h index e21c0d1e0af..7884a9feb71 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.h +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h @@ -52,6 +52,9 @@ enum pipe_error st_translate_program( boolean passthrough_edgeflags); void free_glsl_to_tgsi_visitor(struct glsl_to_tgsi_visitor *v); +void get_pixel_transfer_visitor(struct st_fragment_program *fp, + struct glsl_to_tgsi_visitor *original, + int scale_and_bias, int pixel_maps); struct gl_shader *st_new_shader(struct gl_context *ctx, GLuint name, GLuint type); -- cgit v1.2.3 From 5f0b4b0e9d376f9ec1cb5ae08c36052f4f51ac37 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Sun, 10 Jul 2011 17:17:38 -0500 Subject: st/mesa, glsl_to_tgsi: support glBitmap with a GLSL fragment shader active --- src/mesa/state_tracker/st_cb_bitmap.c | 35 +++++++++++--- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 73 ++++++++++++++++++++++++++++++ src/mesa/state_tracker/st_glsl_to_tgsi.h | 3 ++ 3 files changed, 105 insertions(+), 6 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index 49b196032b9..f0750b518ad 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -172,6 +172,23 @@ make_bitmap_fragment_program(struct gl_context *ctx, GLuint samplerIndex) } +static struct gl_program * +make_bitmap_fragment_program_glsl(struct st_context *st, + struct st_fragment_program *orig, + GLuint samplerIndex) +{ + struct gl_context *ctx = st->ctx; + struct st_fragment_program *fp = (struct st_fragment_program *) + ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); + + if (!fp) + return NULL; + + get_bitmap_visitor(fp, orig->glsl_to_tgsi, samplerIndex); + return &fp->Base.Base; +} + + static int find_free_bit(uint bitfield) { @@ -199,6 +216,7 @@ st_make_bitmap_fragment_program(struct st_context *st, GLuint *bitmap_sampler) { struct st_fragment_program *bitmap_prog; + struct st_fragment_program *stfpIn = (struct st_fragment_program *) fpIn; struct gl_program *newProg; uint sampler; @@ -207,13 +225,18 @@ st_make_bitmap_fragment_program(struct st_context *st, * with the bitmap sampler/kill instructions. */ sampler = find_free_bit(fpIn->Base.SamplersUsed); - bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler); + + if (stfpIn->glsl_to_tgsi) + newProg = make_bitmap_fragment_program_glsl(st, stfpIn, sampler); + else { + bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler); - newProg = _mesa_combine_programs(st->ctx, - &bitmap_prog->Base.Base, - &fpIn->Base); - /* done with this after combining */ - st_reference_fragprog(st, &bitmap_prog, NULL); + newProg = _mesa_combine_programs(st->ctx, + &bitmap_prog->Base.Base, + &fpIn->Base); + /* done with this after combining */ + st_reference_fragprog(st, &bitmap_prog, NULL); + } #if 0 { diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index ae0c92f5f13..74f15087947 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3620,6 +3620,79 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, fp->glsl_to_tgsi = v; } +/** + * Make fragment program for glBitmap: + * Sample the texture and kill the fragment if the bit is 0. + * This program will be combined with the user's fragment program. + * + * Based on make_bitmap_fragment_program in st_cb_bitmap.c. + */ +extern "C" void +get_bitmap_visitor(struct st_fragment_program *fp, + glsl_to_tgsi_visitor *original, int samplerIndex) +{ + glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); + struct st_context *st = st_context(original->ctx); + struct gl_program *prog = &fp->Base.Base; + st_src_reg coord, src0; + st_dst_reg dst0; + glsl_to_tgsi_instruction *inst; + + /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ + v->ctx = original->ctx; + v->prog = prog; + v->glsl_version = original->glsl_version; + v->options = original->options; + v->next_temp = original->next_temp; + v->num_address_regs = original->num_address_regs; + v->samplers_used = prog->SamplersUsed = original->samplers_used; + v->indirect_addr_temps = original->indirect_addr_temps; + v->indirect_addr_consts = original->indirect_addr_consts; + + /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ + coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); + src0 = v->get_temp(glsl_type::vec4_type); + dst0 = st_dst_reg(src0); + inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); + inst->sampler = samplerIndex; + inst->tex_target = TEXTURE_2D_INDEX; + + prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0); + prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */ + v->samplers_used |= (1 << samplerIndex); + + /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */ + src0.negate = NEGATE_XYZW; + if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM) + src0.swizzle = SWIZZLE_XXXX; + inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0); + + /* Now copy the instructions from the original glsl_to_tgsi_visitor into the + * new visitor. */ + foreach_iter(exec_list_iterator, iter, original->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + st_src_reg src_regs[3]; + + if (inst->dst.file == PROGRAM_OUTPUT) + prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); + + for (int i=0; i<3; i++) { + src_regs[i] = inst->src[i]; + if (src_regs[i].file == PROGRAM_INPUT) + prog->InputsRead |= (1 << src_regs[i].index); + } + + v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); + } + + /* Make modifications to fragment program info. */ + prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters); + prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes); + prog->Varying = _mesa_clone_parameter_list(original->prog->Varying); + count_resources(v, prog); + fp->glsl_to_tgsi = v; +} + /* ------------------------- TGSI conversion stuff -------------------------- */ struct label { unsigned branch_target; diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h index 7884a9feb71..d877471785d 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.h +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h @@ -55,6 +55,9 @@ void free_glsl_to_tgsi_visitor(struct glsl_to_tgsi_visitor *v); void get_pixel_transfer_visitor(struct st_fragment_program *fp, struct glsl_to_tgsi_visitor *original, int scale_and_bias, int pixel_maps); +void get_bitmap_visitor(struct st_fragment_program *fp, + struct glsl_to_tgsi_visitor *original, + int samplerIndex); struct gl_shader *st_new_shader(struct gl_context *ctx, GLuint name, GLuint type); -- cgit v1.2.3 From 87f8d8547db9b947ae847c509a464e06d0ac6c64 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Sun, 10 Jul 2011 17:36:04 -0500 Subject: glsl_to_tgsi: fix mistakes in get_pixel_transfer_visitor() I noticed these issues while working on get_bitmap_visitor(). --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 74f15087947..3df22eae918 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3534,7 +3534,6 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, inst->tex_target = TEXTURE_2D_INDEX; prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0); - prog->OutputsWritten |= BITFIELD64_BIT(FRAG_RESULT_COLOR); prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */ v->samplers_used |= (1 << 0); @@ -3593,6 +3592,9 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); st_src_reg src_regs[3]; + if (inst->dst.file == PROGRAM_OUTPUT) + prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); + for (int i=0; i<3; i++) { src_regs[i] = inst->src[i]; if (src_regs[i].file == PROGRAM_INPUT && @@ -3603,8 +3605,6 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, } else if (src_regs[i].file == PROGRAM_INPUT) prog->InputsRead |= (1 << src_regs[i].index); - else if (src_regs[i].file == PROGRAM_OUTPUT) - prog->OutputsWritten |= BITFIELD64_BIT(src_regs[i].index); } v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); -- cgit v1.2.3 From 7732822c833ee22e259af3f8bd2bfb57c986612e Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Thu, 21 Jul 2011 15:49:26 -0500 Subject: glsl_to_tgsi: separate immediates from array constants during IR translation Before, if any uniform or constant array was accessed with indirect addressing, st_translate_program() would emit uniform constants in the place of immediates. This behavior was unavoidable with ir_to_mesa/mesa_to_tgsi, but glsl_to_tgsi can work around it since the GLSL IR backend and the TGSI emission are both inside the state tracker. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 143 +++++++++++++++++++---------- 1 file changed, 95 insertions(+), 48 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 3df22eae918..389e5d8e2ef 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -70,6 +70,7 @@ extern "C" { #include "st_mesa_to_tgsi.h" } +#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX #define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ (1 << PROGRAM_ENV_PARAM) | \ (1 << PROGRAM_STATE_VAR) | \ @@ -272,6 +273,7 @@ public: struct gl_program *prog; struct gl_shader_program *shader_program; struct gl_shader_compiler_options *options; + struct gl_program_parameter_list *immediates; int next_temp; @@ -505,6 +507,9 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, case PROGRAM_UNIFORM: this->indirect_addr_consts = true; break; + case PROGRAM_IMMEDIATE: + assert(!"immediates should not have indirect addressing"); + break; default: break; } @@ -524,6 +529,9 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, case PROGRAM_UNIFORM: this->indirect_addr_consts = true; break; + case PROGRAM_IMMEDIATE: + assert(!"immediates should not have indirect addressing"); + break; default: break; } @@ -804,12 +812,12 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, struct st_src_reg glsl_to_tgsi_visitor::st_src_reg_for_float(float val) { - st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_FLOAT); + st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT); union gl_constant_value uval; uval.f = val; - src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, - &uval, 1, GL_FLOAT, &src.swizzle); + src.index = _mesa_add_typed_unnamed_constant(this->immediates, &uval, 1, + GL_FLOAT, &src.swizzle); return src; } @@ -817,14 +825,14 @@ glsl_to_tgsi_visitor::st_src_reg_for_float(float val) struct st_src_reg glsl_to_tgsi_visitor::st_src_reg_for_int(int val) { - st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_INT); + st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT); union gl_constant_value uval; assert(glsl_version >= 130); uval.i = val; - src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, - &uval, 1, GL_INT, &src.swizzle); + src.index = _mesa_add_typed_unnamed_constant(this->immediates, &uval, 1, + GL_INT, &src.swizzle); return src; } @@ -1933,9 +1941,15 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) gl_constant_value *values = (gl_constant_value *) stack_vals; GLenum gl_type = GL_NONE; unsigned int i; + gl_register_file file; + gl_program_parameter_list *param_list; + static int in_array = 0; + + file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; + param_list = in_array ? this->prog->Parameters : this->immediates; /* Unfortunately, 4 floats is all we can get into - * _mesa_add_unnamed_constant. So, make a temp to store an + * _mesa_add_typed_unnamed_constant. So, make a temp to store an * aggregate constant and move each constant value into it. If we * get lucky, copy propagation will eliminate the extra moves. */ @@ -1969,6 +1983,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) int size = type_size(ir->type->fields.array); assert(size > 0); + in_array++; for (i = 0; i < ir->type->length; i++) { ir->array_elements[i]->accept(this); @@ -1981,6 +1996,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) } } this->result = temp_base; + in_array--; return; } @@ -1992,8 +2008,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) assert(ir->type->base_type == GLSL_TYPE_FLOAT); values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; - src = st_src_reg(PROGRAM_CONSTANT, -1, ir->type->base_type); - src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, + src = st_src_reg(file, -1, ir->type->base_type); + src.index = _mesa_add_typed_unnamed_constant(param_list, values, ir->type->vector_elements, GL_FLOAT, @@ -2007,7 +2023,6 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) return; } - src.file = PROGRAM_CONSTANT; switch (ir->type->base_type) { case GLSL_TYPE_FLOAT: gl_type = GL_FLOAT; @@ -2046,8 +2061,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) assert(!"Non-float/uint/int/bool constant"); } - this->result = st_src_reg(PROGRAM_CONSTANT, -1, ir->type); - this->result.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters, + this->result = st_src_reg(file, -1, ir->type); + this->result.index = _mesa_add_typed_unnamed_constant(param_list, values, ir->type->vector_elements, gl_type, &this->result.swizzle); } @@ -2430,11 +2445,13 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() num_address_regs = 0; indirect_addr_temps = false; indirect_addr_consts = false; + immediates = _mesa_new_parameter_list(); mem_ctx = ralloc_context(NULL); } glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() { + _mesa_free_parameter_list(immediates); ralloc_free(mem_ctx); } @@ -3521,6 +3538,8 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, v->samplers_used = prog->SamplersUsed = original->samplers_used; v->indirect_addr_temps = original->indirect_addr_temps; v->indirect_addr_consts = original->indirect_addr_consts; + _mesa_free_parameter_list(v->immediates); + v->immediates = _mesa_clone_parameter_list(original->immediates); /* * Get initial pixel color from the texture. @@ -3648,6 +3667,8 @@ get_bitmap_visitor(struct st_fragment_program *fp, v->samplers_used = prog->SamplersUsed = original->samplers_used; v->indirect_addr_temps = original->indirect_addr_temps; v->indirect_addr_consts = original->indirect_addr_consts; + _mesa_free_parameter_list(v->immediates); + v->immediates = _mesa_clone_parameter_list(original->immediates); /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); @@ -3707,6 +3728,7 @@ struct st_translate { struct ureg_dst temps[MAX_TEMPS]; struct ureg_src *constants; + struct ureg_src *immediates; struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; struct ureg_dst address[1]; @@ -3797,6 +3819,43 @@ static void set_insn_start( struct st_translate *t, t->insn[t->insn_count++] = start; } +/** + * Map a glsl_to_tgsi constant/immediate to a TGSI immediate. + */ +static struct ureg_src +emit_immediate( struct st_translate *t, + struct gl_program_parameter_list *params, + int index) +{ + struct ureg_program *ureg = t->ureg; + + switch(params->Parameters[index].DataType) + { + case GL_FLOAT: + case GL_FLOAT_VEC2: + case GL_FLOAT_VEC3: + case GL_FLOAT_VEC4: + return ureg_DECL_immediate(ureg, (float *)params->ParameterValues[index], 4); + case GL_INT: + case GL_INT_VEC2: + case GL_INT_VEC3: + case GL_INT_VEC4: + return ureg_DECL_immediate_int(ureg, (int *)params->ParameterValues[index], 4); + case GL_UNSIGNED_INT: + case GL_UNSIGNED_INT_VEC2: + case GL_UNSIGNED_INT_VEC3: + case GL_UNSIGNED_INT_VEC4: + case GL_BOOL: + case GL_BOOL_VEC2: + case GL_BOOL_VEC3: + case GL_BOOL_VEC4: + return ureg_DECL_immediate_uint(ureg, (unsigned *)params->ParameterValues[index], 4); + default: + assert(!"should not get here - type must be float, int, uint, or bool"); + return ureg_src_undef(); + } +} + /** * Map a Mesa dst register to a TGSI ureg_dst register. */ @@ -3871,6 +3930,9 @@ src_register( struct st_translate *t, else return t->constants[index]; + case PROGRAM_IMMEDIATE: + return t->immediates[index]; + case PROGRAM_INPUT: assert(t->inputMapping[index] < Elements(t->inputs)); return t->inputs[t->inputMapping[index]]; @@ -4402,9 +4464,8 @@ st_translate_program( } } - /* Emit constants and immediates. Mesa uses a single index space - * for these, so we put all the translated regs in t->constants. - * XXX: this entire if block depends on proginfo->Parameters from Mesa IR + /* Emit constants and uniforms. TGSI uses a single index space for these, + * so we put all the translated regs in t->constants. */ if (proginfo->Parameters) { t->constants = (struct ureg_src *)CALLOC( proginfo->Parameters->NumParameters * sizeof t->constants[0] ); @@ -4423,49 +4484,34 @@ st_translate_program( t->constants[i] = ureg_DECL_constant( ureg, i ); break; - /* Emit immediates only when there's no indirect addressing of - * the const buffer. - * FIXME: Be smarter and recognize param arrays: - * indirect addressing is only valid within the referenced - * array. - */ + /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect + * addressing of the const buffer. + * FIXME: Be smarter and recognize param arrays: + * indirect addressing is only valid within the referenced + * array. + */ case PROGRAM_CONSTANT: if (program->indirect_addr_consts) t->constants[i] = ureg_DECL_constant( ureg, i ); else - switch(proginfo->Parameters->Parameters[i].DataType) - { - case GL_FLOAT: - case GL_FLOAT_VEC2: - case GL_FLOAT_VEC3: - case GL_FLOAT_VEC4: - t->constants[i] = ureg_DECL_immediate(ureg, (float *)proginfo->Parameters->ParameterValues[i], 4); - break; - case GL_INT: - case GL_INT_VEC2: - case GL_INT_VEC3: - case GL_INT_VEC4: - t->constants[i] = ureg_DECL_immediate_int(ureg, (int *)proginfo->Parameters->ParameterValues[i], 4); - break; - case GL_UNSIGNED_INT: - case GL_UNSIGNED_INT_VEC2: - case GL_UNSIGNED_INT_VEC3: - case GL_UNSIGNED_INT_VEC4: - case GL_BOOL: - case GL_BOOL_VEC2: - case GL_BOOL_VEC3: - case GL_BOOL_VEC4: - t->constants[i] = ureg_DECL_immediate_uint(ureg, (unsigned *)proginfo->Parameters->ParameterValues[i], 4); - break; - default: - assert(!"should not get here"); - } + t->constants[i] = emit_immediate( t, proginfo->Parameters, i ); break; default: break; } } } + + /* Emit immediate values. + */ + t->immediates = (struct ureg_src *)CALLOC( program->immediates->NumParameters * sizeof(struct ureg_src) ); + if (t->immediates == NULL) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out; + } + for (i = 0; i < program->immediates->NumParameters; i++) { + t->immediates[i] = emit_immediate( t, program->immediates, i ); + } /* texture samplers */ for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { @@ -4512,6 +4558,7 @@ out: FREE(t->insn); FREE(t->labels); FREE(t->constants); + FREE(t->immediates); if (t->error) { debug_printf("%s: translate error flag set\n", __FUNCTION__); -- cgit v1.2.3 From 0da994a9f15b461d16cf88ce16dc07e98dfada6f Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Thu, 21 Jul 2011 16:29:56 -0500 Subject: glsl_to_tgsi: make assignment hack safer Fixes an assertion failure in piglit test glsl-texcoord-array. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 389e5d8e2ef..6e01a44a733 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1917,12 +1917,13 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && type_size(ir->lhs->type) == 1) { /* To avoid emitting an extra MOV when assigning an expression to a - * variable, change the destination register of the last instruction - * emitted as part of the expression to the assignment variable. + * variable, emit the last instruction of the expression again, but + * replace the destination register with the target of the assignment. + * Dead code elimination will remove the original instruction. */ glsl_to_tgsi_instruction *inst; inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); - inst->dst = l; + emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); } else { for (i = 0; i < type_size(ir->lhs->type); i++) { emit(ir, TGSI_OPCODE_MOV, l, r); -- cgit v1.2.3 From a2c3b9f38d81f363bd62abc87dc3abef2beeba95 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 22 Jul 2011 13:23:26 -0500 Subject: glsl_to_tgsi: make coding style more consistent --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 263 ++++++++++++++--------------- 1 file changed, 126 insertions(+), 137 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 6e01a44a733..952900a1fb5 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3778,15 +3778,14 @@ static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { * of labels built here and patch the TGSI code with the actual * location of each label. */ -static unsigned *get_label( struct st_translate *t, - unsigned branch_target ) +static unsigned *get_label(struct st_translate *t, unsigned branch_target) { unsigned i; if (t->labels_count + 1 >= t->labels_size) { t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); t->labels = (struct label *)realloc(t->labels, - t->labels_size * sizeof t->labels[0]); + t->labels_size * sizeof(struct label)); if (t->labels == NULL) { static unsigned dummy; t->error = TRUE; @@ -3805,12 +3804,11 @@ static unsigned *get_label( struct st_translate *t, * Update the insn[] array so the next Mesa instruction points to * the next TGSI instruction. */ -static void set_insn_start( struct st_translate *t, - unsigned start ) +static void set_insn_start(struct st_translate *t, unsigned start) { if (t->insn_count + 1 >= t->insn_size) { t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); - t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof t->insn[0]); + t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0])); if (t->insn == NULL) { t->error = TRUE; return; @@ -3824,9 +3822,9 @@ static void set_insn_start( struct st_translate *t, * Map a glsl_to_tgsi constant/immediate to a TGSI immediate. */ static struct ureg_src -emit_immediate( struct st_translate *t, - struct gl_program_parameter_list *params, - int index) +emit_immediate(struct st_translate *t, + struct gl_program_parameter_list *params, + int index) { struct ureg_program *ureg = t->ureg; @@ -3861,17 +3859,17 @@ emit_immediate( struct st_translate *t, * Map a Mesa dst register to a TGSI ureg_dst register. */ static struct ureg_dst -dst_register( struct st_translate *t, - gl_register_file file, - GLuint index ) +dst_register(struct st_translate *t, + gl_register_file file, + GLuint index) { - switch( file ) { + switch(file) { case PROGRAM_UNDEFINED: return ureg_dst_undef(); case PROGRAM_TEMPORARY: if (ureg_dst_is_undef(t->temps[index])) - t->temps[index] = ureg_DECL_temporary( t->ureg ); + t->temps[index] = ureg_DECL_temporary(t->ureg); return t->temps[index]; @@ -3894,7 +3892,7 @@ dst_register( struct st_translate *t, return t->address[index]; default: - debug_assert( 0 ); + assert(!"unknown dst register file"); return ureg_dst_undef(); } } @@ -3903,11 +3901,11 @@ dst_register( struct st_translate *t, * Map a Mesa src register to a TGSI ureg_src register. */ static struct ureg_src -src_register( struct st_translate *t, - gl_register_file file, - GLuint index ) +src_register(struct st_translate *t, + gl_register_file file, + GLuint index) { - switch( file ) { + switch(file) { case PROGRAM_UNDEFINED: return ureg_src_undef(); @@ -3915,7 +3913,7 @@ src_register( struct st_translate *t, assert(index >= 0); assert(index < Elements(t->temps)); if (ureg_dst_is_undef(t->temps[index])) - t->temps[index] = ureg_DECL_temporary( t->ureg ); + t->temps[index] = ureg_DECL_temporary(t->ureg); return ureg_src(t->temps[index]); case PROGRAM_NAMED_PARAM: @@ -3927,7 +3925,7 @@ src_register( struct st_translate *t, case PROGRAM_STATE_VAR: case PROGRAM_CONSTANT: /* ie, immediate */ if (index < 0) - return ureg_DECL_constant( t->ureg, 0 ); + return ureg_DECL_constant(t->ureg, 0); else return t->constants[index]; @@ -3950,7 +3948,7 @@ src_register( struct st_translate *t, return t->systemValues[index]; default: - debug_assert( 0 ); + assert(!"unknown src register file"); return ureg_src_undef(); } } @@ -3959,22 +3957,21 @@ src_register( struct st_translate *t, * Create a TGSI ureg_dst register from an st_dst_reg. */ static struct ureg_dst -translate_dst( struct st_translate *t, - const st_dst_reg *dst_reg, - boolean saturate ) +translate_dst(struct st_translate *t, + const st_dst_reg *dst_reg, + bool saturate) { - struct ureg_dst dst = dst_register( t, - dst_reg->file, - dst_reg->index ); + struct ureg_dst dst = dst_register(t, + dst_reg->file, + dst_reg->index); - dst = ureg_writemask( dst, - dst_reg->writemask ); + dst = ureg_writemask(dst, dst_reg->writemask); if (saturate) - dst = ureg_saturate( dst ); + dst = ureg_saturate(dst); if (dst_reg->reladdr != NULL) - dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) ); + dst = ureg_dst_indirect(dst, ureg_src(t->address[0])); return dst; } @@ -3983,16 +3980,15 @@ translate_dst( struct st_translate *t, * Create a TGSI ureg_src register from an st_src_reg. */ static struct ureg_src -translate_src( struct st_translate *t, - const st_src_reg *src_reg ) +translate_src(struct st_translate *t, const st_src_reg *src_reg) { - struct ureg_src src = src_register( t, src_reg->file, src_reg->index ); + struct ureg_src src = src_register(t, src_reg->file, src_reg->index); - src = ureg_swizzle( src, - GET_SWZ( src_reg->swizzle, 0 ) & 0x3, - GET_SWZ( src_reg->swizzle, 1 ) & 0x3, - GET_SWZ( src_reg->swizzle, 2 ) & 0x3, - GET_SWZ( src_reg->swizzle, 3 ) & 0x3); + src = ureg_swizzle(src, + GET_SWZ(src_reg->swizzle, 0) & 0x3, + GET_SWZ(src_reg->swizzle, 1) & 0x3, + GET_SWZ(src_reg->swizzle, 2) & 0x3, + GET_SWZ(src_reg->swizzle, 3) & 0x3); if ((src_reg->negate & 0xf) == NEGATE_XYZW) src = ureg_negate(src); @@ -4024,8 +4020,8 @@ translate_src( struct st_translate *t, } static void -compile_tgsi_instruction(struct st_translate *t, - const struct glsl_to_tgsi_instruction *inst) +compile_tgsi_instruction(struct st_translate *t, + const struct glsl_to_tgsi_instruction *inst) { struct ureg_program *ureg = t->ureg; GLuint i; @@ -4034,29 +4030,29 @@ compile_tgsi_instruction(struct st_translate *t, unsigned num_dst; unsigned num_src; - num_dst = num_inst_dst_regs( inst->op ); - num_src = num_inst_src_regs( inst->op ); + num_dst = num_inst_dst_regs(inst->op); + num_src = num_inst_src_regs(inst->op); if (num_dst) - dst[0] = translate_dst( t, - &inst->dst, - inst->saturate); + dst[0] = translate_dst(t, + &inst->dst, + inst->saturate); for (i = 0; i < num_src; i++) - src[i] = translate_src( t, &inst->src[i] ); + src[i] = translate_src(t, &inst->src[i]); - switch( inst->op ) { + switch(inst->op) { case TGSI_OPCODE_BGNLOOP: case TGSI_OPCODE_CAL: case TGSI_OPCODE_ELSE: case TGSI_OPCODE_ENDLOOP: case TGSI_OPCODE_IF: - debug_assert(num_dst == 0); - ureg_label_insn( ureg, - inst->op, - src, num_src, - get_label( t, - inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0 )); + assert(num_dst == 0); + ureg_label_insn(ureg, + inst->op, + src, num_src, + get_label(t, + inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0)); return; case TGSI_OPCODE_TEX: @@ -4065,27 +4061,23 @@ compile_tgsi_instruction(struct st_translate *t, case TGSI_OPCODE_TXL: case TGSI_OPCODE_TXP: src[num_src++] = t->samplers[inst->sampler]; - ureg_tex_insn( ureg, - inst->op, - dst, num_dst, - translate_texture_target( inst->tex_target, - inst->tex_shadow ), - src, num_src ); + ureg_tex_insn(ureg, + inst->op, + dst, num_dst, + translate_texture_target(inst->tex_target, inst->tex_shadow), + src, num_src); return; case TGSI_OPCODE_SCS: - dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY ); - ureg_insn( ureg, - inst->op, - dst, num_dst, - src, num_src ); + dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY); + ureg_insn(ureg, inst->op, dst, num_dst, src, num_src); break; default: - ureg_insn( ureg, - inst->op, - dst, num_dst, - src, num_src ); + ureg_insn(ureg, + inst->op, + dst, num_dst, + src, num_src); break; } } @@ -4095,9 +4087,9 @@ compile_tgsi_instruction(struct st_translate *t, * Basically, add (adjX, adjY) to the fragment position. */ static void -emit_adjusted_wpos( struct st_translate *t, - const struct gl_program *program, - GLfloat adjX, GLfloat adjY) +emit_adjusted_wpos(struct st_translate *t, + const struct gl_program *program, + float adjX, float adjY) { struct ureg_program *ureg = t->ureg; struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); @@ -4119,9 +4111,9 @@ emit_adjusted_wpos( struct st_translate *t, * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). */ static void -emit_wpos_inversion( struct st_translate *t, - const struct gl_program *program, - boolean invert) +emit_wpos_inversion(struct st_translate *t, + const struct gl_program *program, + bool invert) { struct ureg_program *ureg = t->ureg; @@ -4140,7 +4132,7 @@ emit_wpos_inversion( struct st_translate *t, unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, wposTransformState); - struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst ); + struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst); struct ureg_dst wpos_temp; struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; @@ -4149,26 +4141,26 @@ emit_wpos_inversion( struct st_translate *t, if (wpos_input.File == TGSI_FILE_TEMPORARY) wpos_temp = ureg_dst(wpos_input); else { - wpos_temp = ureg_DECL_temporary( ureg ); - ureg_MOV( ureg, wpos_temp, wpos_input ); + wpos_temp = ureg_DECL_temporary(ureg); + ureg_MOV(ureg, wpos_temp, wpos_input); } if (invert) { /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy */ - ureg_MAD( ureg, - ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), - wpos_input, - ureg_scalar(wpostrans, 0), - ureg_scalar(wpostrans, 1)); + ureg_MAD(ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), + wpos_input, + ureg_scalar(wpostrans, 0), + ureg_scalar(wpostrans, 1)); } else { /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww */ - ureg_MAD( ureg, - ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), - wpos_input, - ureg_scalar(wpostrans, 2), - ureg_scalar(wpostrans, 3)); + ureg_MAD(ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), + wpos_input, + ureg_scalar(wpostrans, 2), + ureg_scalar(wpostrans, 3)); } /* Use wpos_temp as position input from here on: @@ -4312,7 +4304,7 @@ st_translate_program( const GLuint outputMapping[], const ubyte outputSemanticName[], const ubyte outputSemanticIndex[], - boolean passthrough_edgeflags ) + boolean passthrough_edgeflags) { struct st_translate translate, *t; unsigned i; @@ -4358,27 +4350,24 @@ st_translate_program( for (i = 0; i < numOutputs; i++) { switch (outputSemanticName[i]) { case TGSI_SEMANTIC_POSITION: - t->outputs[i] = ureg_DECL_output( ureg, - TGSI_SEMANTIC_POSITION, /* Z / Depth */ - outputSemanticIndex[i] ); - - t->outputs[i] = ureg_writemask( t->outputs[i], - TGSI_WRITEMASK_Z ); + t->outputs[i] = ureg_DECL_output(ureg, + TGSI_SEMANTIC_POSITION, /* Z/Depth */ + outputSemanticIndex[i]); + t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z); break; case TGSI_SEMANTIC_STENCIL: - t->outputs[i] = ureg_DECL_output( ureg, - TGSI_SEMANTIC_STENCIL, /* Stencil */ - outputSemanticIndex[i] ); - t->outputs[i] = ureg_writemask( t->outputs[i], - TGSI_WRITEMASK_Y ); + t->outputs[i] = ureg_DECL_output(ureg, + TGSI_SEMANTIC_STENCIL, /* Stencil */ + outputSemanticIndex[i]); + t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y); break; case TGSI_SEMANTIC_COLOR: - t->outputs[i] = ureg_DECL_output( ureg, - TGSI_SEMANTIC_COLOR, - outputSemanticIndex[i] ); + t->outputs[i] = ureg_DECL_output(ureg, + TGSI_SEMANTIC_COLOR, + outputSemanticIndex[i]); break; default: - debug_assert(0); + assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR"); return PIPE_ERROR_BAD_INPUT; } } @@ -4392,9 +4381,9 @@ st_translate_program( } for (i = 0; i < numOutputs; i++) { - t->outputs[i] = ureg_DECL_output( ureg, - outputSemanticName[i], - outputSemanticIndex[i] ); + t->outputs[i] = ureg_DECL_output(ureg, + outputSemanticName[i], + outputSemanticIndex[i]); } } else { @@ -4405,9 +4394,9 @@ st_translate_program( } for (i = 0; i < numOutputs; i++) { - t->outputs[i] = ureg_DECL_output( ureg, - outputSemanticName[i], - outputSemanticIndex[i] ); + t->outputs[i] = ureg_DECL_output(ureg, + outputSemanticName[i], + outputSemanticIndex[i]); if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) { /* Writing to the point size result register requires special * handling to implement clamping. @@ -4421,8 +4410,8 @@ st_translate_program( unsigned pointSizeClampConst = _mesa_add_state_reference(proginfo->Parameters, pointSizeClampState); - struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg ); - t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst ); + struct ureg_dst psizregtemp = ureg_DECL_temporary(ureg); + t->pointSizeConst = ureg_DECL_constant(ureg, pointSizeClampConst); t->pointSizeResult = t->outputs[i]; t->pointSizeOutIndex = i; t->outputs[i] = psizregtemp; @@ -4435,8 +4424,8 @@ st_translate_program( /* Declare address register. */ if (program->num_address_regs > 0) { - debug_assert( program->num_address_regs == 1 ); - t->address[0] = ureg_DECL_address( ureg ); + assert(program->num_address_regs == 1); + t->address[0] = ureg_DECL_address(ureg); } /* Declare misc input registers @@ -4461,7 +4450,7 @@ st_translate_program( */ for (i = 0; i < (unsigned)program->next_temp; i++) { /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ - t->temps[i] = ureg_DECL_temporary( t->ureg ); + t->temps[i] = ureg_DECL_temporary(t->ureg); } } @@ -4469,7 +4458,7 @@ st_translate_program( * so we put all the translated regs in t->constants. */ if (proginfo->Parameters) { - t->constants = (struct ureg_src *)CALLOC( proginfo->Parameters->NumParameters * sizeof t->constants[0] ); + t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0])); if (t->constants == NULL) { ret = PIPE_ERROR_OUT_OF_MEMORY; goto out; @@ -4482,7 +4471,7 @@ st_translate_program( case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: case PROGRAM_UNIFORM: - t->constants[i] = ureg_DECL_constant( ureg, i ); + t->constants[i] = ureg_DECL_constant(ureg, i); break; /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect @@ -4493,9 +4482,9 @@ st_translate_program( */ case PROGRAM_CONSTANT: if (program->indirect_addr_consts) - t->constants[i] = ureg_DECL_constant( ureg, i ); + t->constants[i] = ureg_DECL_constant(ureg, i); else - t->constants[i] = emit_immediate( t, proginfo->Parameters, i ); + t->constants[i] = emit_immediate(t, proginfo->Parameters, i); break; default: break; @@ -4505,27 +4494,28 @@ st_translate_program( /* Emit immediate values. */ - t->immediates = (struct ureg_src *)CALLOC( program->immediates->NumParameters * sizeof(struct ureg_src) ); + t->immediates = (struct ureg_src *)CALLOC(program->immediates->NumParameters * sizeof(struct ureg_src)); if (t->immediates == NULL) { ret = PIPE_ERROR_OUT_OF_MEMORY; goto out; } for (i = 0; i < program->immediates->NumParameters; i++) { - t->immediates[i] = emit_immediate( t, program->immediates, i ); + assert(program->immediates->Parameters[i].Type == PROGRAM_IMMEDIATE); + t->immediates[i] = emit_immediate(t, program->immediates, i); } /* texture samplers */ for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { if (program->samplers_used & (1 << i)) { - t->samplers[i] = ureg_DECL_sampler( ureg, i ); + t->samplers[i] = ureg_DECL_sampler(ureg, i); } } /* Emit each instruction in turn: */ foreach_iter(exec_list_iterator, iter, program->instructions) { - set_insn_start( t, ureg_get_instruction_number( ureg )); - compile_tgsi_instruction( t, (glsl_to_tgsi_instruction *)iter.get() ); + set_insn_start(t, ureg_get_instruction_number(ureg)); + compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get()); if (t->prevInstWrotePointSize && proginfo->Id) { /* The previous instruction wrote to the (fake) vertex point size @@ -4535,14 +4525,14 @@ st_translate_program( * Note that we can't do this easily at the end of program due to * possible early return. */ - set_insn_start( t, ureg_get_instruction_number( ureg )); - ureg_MAX( t->ureg, - ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X), - ureg_src(t->outputs[t->pointSizeOutIndex]), - ureg_swizzle(t->pointSizeConst, 1,1,1,1)); - ureg_MIN( t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X), - ureg_src(t->outputs[t->pointSizeOutIndex]), - ureg_swizzle(t->pointSizeConst, 2,2,2,2)); + set_insn_start(t, ureg_get_instruction_number(ureg)); + ureg_MAX(t->ureg, + ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X), + ureg_src(t->outputs[t->pointSizeOutIndex]), + ureg_swizzle(t->pointSizeConst, 1,1,1,1)); + ureg_MIN(t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X), + ureg_src(t->outputs[t->pointSizeOutIndex]), + ureg_swizzle(t->pointSizeConst, 2,2,2,2)); } t->prevInstWrotePointSize = GL_FALSE; } @@ -4550,9 +4540,8 @@ st_translate_program( /* Fix up all emitted labels: */ for (i = 0; i < t->labels_count; i++) { - ureg_fixup_label( ureg, - t->labels[i].token, - t->insn[t->labels[i].branch_target] ); + ureg_fixup_label(ureg, t->labels[i].token, + t->insn[t->labels[i].branch_target]); } out: @@ -4582,7 +4571,7 @@ get_mesa_program(struct gl_context *ctx, struct gl_program *prog; GLenum target; const char *target_string; - GLboolean progress; + bool progress; struct gl_shader_compiler_options *options = &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)]; -- cgit v1.2.3 From f751730ad003bb19ce85bc4d0abddaf40edde6c1 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 22 Jul 2011 13:24:42 -0500 Subject: glsl_to_tgsi: update comments --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 952900a1fb5..3a69a439822 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -236,7 +236,7 @@ public: /** * identifier of this function signature used by the program. * - * At the point that Mesa instructions for function calls are + * At the point that TGSI instructions for function calls are * generated, we don't know the address of the first instruction of * the function body. So we make the BranchTarget that is called a * small integer and rewrite them during set_branchtargets(). @@ -251,10 +251,9 @@ public: glsl_to_tgsi_instruction *bgn_inst; /** - * Index of the first instruction of the function body in actual - * Mesa IR. + * Index of the first instruction of the function body in actual TGSI. * - * Set after convertion from glsl_to_tgsi_instruction to prog_instruction. + * Set after conversion from glsl_to_tgsi_instruction to TGSI. */ int inst; @@ -1672,7 +1671,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) } else { st_src_reg array_base = this->result; /* Variable index array dereference. It eats the "vec4" of the - * base of the array and an index that offsets the Mesa register + * base of the array and an index that offsets the TGSI register * index. */ ir->array_index->accept(this); @@ -1879,7 +1878,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) /* Swizzle a small RHS vector into the channels being written. * * glsl ir treats write_mask as dictating how many channels are - * present on the RHS while Mesa IR treats write_mask as just + * present on the RHS while TGSI treats write_mask as just * showing which channels of the vec4 RHS get written. */ for (int i = 0; i < 4; i++) { @@ -2202,8 +2201,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) /* Put our coords in a temp. We'll need to modify them for shadow, * projection, or LOD, so the only case we'd use it as is is if - * we're doing plain old texturing. Mesa IR optimization should - * handle cleaning up our mess in that case. + * we're doing plain old texturing. The optimization passes on + * glsl_to_tgsi_visitor should handle cleaning up our mess in that case. */ coord = get_temp(glsl_type::vec4_type); coord_dst = st_dst_reg(coord); @@ -3799,9 +3798,9 @@ static unsigned *get_label(struct st_translate *t, unsigned branch_target) } /** - * Called prior to emitting the TGSI code for each Mesa instruction. + * Called prior to emitting the TGSI code for each instruction. * Allocate additional space for instructions if needed. - * Update the insn[] array so the next Mesa instruction points to + * Update the insn[] array so the next glsl_to_tgsi_instruction points to * the next TGSI instruction. */ static void set_insn_start(struct st_translate *t, unsigned start) @@ -3856,7 +3855,7 @@ emit_immediate(struct st_translate *t, } /** - * Map a Mesa dst register to a TGSI ureg_dst register. + * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register. */ static struct ureg_dst dst_register(struct st_translate *t, @@ -3898,7 +3897,7 @@ dst_register(struct st_translate *t, } /** - * Map a Mesa src register to a TGSI ureg_src register. + * Map a glsl_to_tgsi src register to a TGSI ureg_src register. */ static struct ureg_src src_register(struct st_translate *t, -- cgit v1.2.3 From 3354a5b56398f90fc36ab14b6444aae27b50e859 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Wed, 27 Jul 2011 15:20:19 -0500 Subject: glsl_to_tgsi: rework immediate tracking to not use gl_program_parameter_list --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 135 +++++++++++++++++++---------- 1 file changed, 88 insertions(+), 47 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 3a69a439822..6039488f26b 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -229,6 +229,20 @@ public: ir_variable *var; /* variable that maps to this, if any */ }; +class immediate_storage : public exec_node { +public: + immediate_storage(gl_constant_value *values, int size, int type) + { + memcpy(this->values, values, size * sizeof(gl_constant_value)); + this->size = size; + this->type = type; + } + + gl_constant_value values[4]; + int size; /**< Number of components (1-4) */ + int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ +}; + class function_entry : public exec_node { public: ir_function_signature *sig; @@ -272,7 +286,6 @@ public: struct gl_program *prog; struct gl_shader_program *shader_program; struct gl_shader_compiler_options *options; - struct gl_program_parameter_list *immediates; int next_temp; @@ -285,6 +298,9 @@ public: variable_storage *find_variable_storage(ir_variable *var); + int add_constant(gl_register_file file, gl_constant_value values[4], + int size, int datatype, GLuint *swizzle_out); + function_entry *get_function_signature(ir_function_signature *sig); st_src_reg get_temp(const glsl_type *type); @@ -326,6 +342,10 @@ public: /** List of variable_storage */ exec_list variables; + /** List of immediate_storage */ + exec_list immediates; + int num_immediates; + /** List of function_entry */ exec_list function_signatures; int next_signature_id; @@ -808,6 +828,42 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, } } +int +glsl_to_tgsi_visitor::add_constant(gl_register_file file, + gl_constant_value values[4], int size, int datatype, + GLuint *swizzle_out) +{ + if (file == PROGRAM_CONSTANT) { + return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values, + size, datatype, swizzle_out); + } else { + int index = 0; + immediate_storage *entry; + assert(file == PROGRAM_IMMEDIATE); + fprintf(stderr, "adding immediate\n"); + + /* Search immediate storage to see if we already have an identical + * immediate that we can use instead of adding a duplicate entry. + */ + foreach_iter(exec_list_iterator, iter, this->immediates) { + entry = (immediate_storage *)iter.get(); + + if (entry->size == size && + entry->type == datatype && + !memcmp(entry->values, values, size * sizeof(gl_constant_value))) { + return index; + } + index++; + } + + /* Add this immediate to the list. */ + entry = new(mem_ctx) immediate_storage(values, size, datatype); + this->immediates.push_tail(entry); + this->num_immediates++; + return index; + } +} + struct st_src_reg glsl_to_tgsi_visitor::st_src_reg_for_float(float val) { @@ -815,8 +871,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_float(float val) union gl_constant_value uval; uval.f = val; - src.index = _mesa_add_typed_unnamed_constant(this->immediates, &uval, 1, - GL_FLOAT, &src.swizzle); + src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle); return src; } @@ -830,8 +885,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_int(int val) assert(glsl_version >= 130); uval.i = val; - src.index = _mesa_add_typed_unnamed_constant(this->immediates, &uval, 1, - GL_INT, &src.swizzle); + src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle); return src; } @@ -1941,12 +1995,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) gl_constant_value *values = (gl_constant_value *) stack_vals; GLenum gl_type = GL_NONE; unsigned int i; - gl_register_file file; - gl_program_parameter_list *param_list; static int in_array = 0; - - file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; - param_list = in_array ? this->prog->Parameters : this->immediates; + gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; /* Unfortunately, 4 floats is all we can get into * _mesa_add_typed_unnamed_constant. So, make a temp to store an @@ -2009,11 +2059,11 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; src = st_src_reg(file, -1, ir->type->base_type); - src.index = _mesa_add_typed_unnamed_constant(param_list, - values, - ir->type->vector_elements, - GL_FLOAT, - &src.swizzle); + src.index = add_constant(file, + values, + ir->type->vector_elements, + GL_FLOAT, + &src.swizzle); emit(ir, TGSI_OPCODE_MOV, mat_column, src); mat_column.index++; @@ -2062,9 +2112,11 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) } this->result = st_src_reg(file, -1, ir->type); - this->result.index = _mesa_add_typed_unnamed_constant(param_list, - values, ir->type->vector_elements, gl_type, - &this->result.swizzle); + this->result.index = add_constant(file, + values, + ir->type->vector_elements, + gl_type, + &this->result.swizzle); } function_entry * @@ -2441,17 +2493,16 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() result.file = PROGRAM_UNDEFINED; next_temp = 1; next_signature_id = 1; + num_immediates = 0; current_function = NULL; num_address_regs = 0; indirect_addr_temps = false; indirect_addr_consts = false; - immediates = _mesa_new_parameter_list(); mem_ctx = ralloc_context(NULL); } glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() { - _mesa_free_parameter_list(immediates); ralloc_free(mem_ctx); } @@ -3538,8 +3589,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, v->samplers_used = prog->SamplersUsed = original->samplers_used; v->indirect_addr_temps = original->indirect_addr_temps; v->indirect_addr_consts = original->indirect_addr_consts; - _mesa_free_parameter_list(v->immediates); - v->immediates = _mesa_clone_parameter_list(original->immediates); + memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); /* * Get initial pixel color from the texture. @@ -3667,8 +3717,7 @@ get_bitmap_visitor(struct st_fragment_program *fp, v->samplers_used = prog->SamplersUsed = original->samplers_used; v->indirect_addr_temps = original->indirect_addr_temps; v->indirect_addr_consts = original->indirect_addr_consts; - _mesa_free_parameter_list(v->immediates); - v->immediates = _mesa_clone_parameter_list(original->immediates); + memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); @@ -3822,32 +3871,20 @@ static void set_insn_start(struct st_translate *t, unsigned start) */ static struct ureg_src emit_immediate(struct st_translate *t, - struct gl_program_parameter_list *params, - int index) + gl_constant_value values[4], + int type, int size) { struct ureg_program *ureg = t->ureg; - switch(params->Parameters[index].DataType) + switch(type) { case GL_FLOAT: - case GL_FLOAT_VEC2: - case GL_FLOAT_VEC3: - case GL_FLOAT_VEC4: - return ureg_DECL_immediate(ureg, (float *)params->ParameterValues[index], 4); + return ureg_DECL_immediate(ureg, &values[0].f, size); case GL_INT: - case GL_INT_VEC2: - case GL_INT_VEC3: - case GL_INT_VEC4: - return ureg_DECL_immediate_int(ureg, (int *)params->ParameterValues[index], 4); + return ureg_DECL_immediate_int(ureg, &values[0].i, size); case GL_UNSIGNED_INT: - case GL_UNSIGNED_INT_VEC2: - case GL_UNSIGNED_INT_VEC3: - case GL_UNSIGNED_INT_VEC4: case GL_BOOL: - case GL_BOOL_VEC2: - case GL_BOOL_VEC3: - case GL_BOOL_VEC4: - return ureg_DECL_immediate_uint(ureg, (unsigned *)params->ParameterValues[index], 4); + return ureg_DECL_immediate_uint(ureg, &values[0].u, size); default: assert(!"should not get here - type must be float, int, uint, or bool"); return ureg_src_undef(); @@ -4483,7 +4520,10 @@ st_translate_program( if (program->indirect_addr_consts) t->constants[i] = ureg_DECL_constant(ureg, i); else - t->constants[i] = emit_immediate(t, proginfo->Parameters, i); + t->constants[i] = emit_immediate(t, + proginfo->Parameters->ParameterValues[i], + proginfo->Parameters->Parameters[i].DataType, + 4); break; default: break; @@ -4493,14 +4533,15 @@ st_translate_program( /* Emit immediate values. */ - t->immediates = (struct ureg_src *)CALLOC(program->immediates->NumParameters * sizeof(struct ureg_src)); + t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src)); if (t->immediates == NULL) { ret = PIPE_ERROR_OUT_OF_MEMORY; goto out; } - for (i = 0; i < program->immediates->NumParameters; i++) { - assert(program->immediates->Parameters[i].Type == PROGRAM_IMMEDIATE); - t->immediates[i] = emit_immediate(t, program->immediates, i); + i = 0; + foreach_iter(exec_list_iterator, iter, program->immediates) { + immediate_storage *imm = (immediate_storage *)iter.get(); + t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size); } /* texture samplers */ -- cgit v1.2.3 From 10d31cb307f90a08fafed5c67945ffe53d279940 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Wed, 27 Jul 2011 15:45:16 -0500 Subject: glsl_to_tgsi: lower all ir_quadop_vector expressions Unlike Mesa IR, TGSI doesn't have a SWZ opcode. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 6039488f26b..0cbfc943a05 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -4825,7 +4825,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress; - progress = lower_quadop_vector(ir, true) || progress; + progress = lower_quadop_vector(ir, false) || progress; if (options->EmitNoIfs) { progress = lower_discard(ir) || progress; -- cgit v1.2.3 From 3e7fce9773ec332665326a785b6ed1fcf5bd578e Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Wed, 27 Jul 2011 16:36:10 -0500 Subject: glsl_to_tgsi: add each relative address to the previous This is a glsl_to_tgsi port of commit d6e1a8f71437. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 0cbfc943a05..f66e240a177 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1741,6 +1741,18 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) this->result, st_src_reg_for_float(element_size)); } + /* If there was already a relative address register involved, add the + * new and the old together to get the new offset. + */ + if (src.reladdr != NULL) { + st_src_reg accum_reg = get_temp(glsl_type::float_type); + + emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg), + index_reg, *src.reladdr); + + index_reg = accum_reg; + } + src.reladdr = ralloc(mem_ctx, st_src_reg); memcpy(src.reladdr, &index_reg, sizeof(index_reg)); } -- cgit v1.2.3 From 189e9f12c7d3a82d7dd28695935a83e4319bb267 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Wed, 27 Jul 2011 16:39:40 -0500 Subject: glsl_to_tgsi: copy reladdr in st_src_reg(st_dst_reg) constructor This is a glsl_to_tgsi port of commit f7cd9a858c04. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index f66e240a177..ba4074eecd5 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -174,7 +174,7 @@ st_src_reg::st_src_reg(st_dst_reg reg) this->index = reg.index; this->swizzle = SWIZZLE_XYZW; this->negate = 0; - this->reladdr = NULL; + this->reladdr = reg.reladdr; } st_dst_reg::st_dst_reg(st_src_reg reg) -- cgit v1.2.3 From 81b036b4d79423c194596461b098a525af0102c2 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Sat, 30 Jul 2011 16:44:49 -0700 Subject: i965/gen5+: Fix incorrect miptree layout for non-power-of-two cubemaps. For power-of-two sizes, h0 == mt->height0 since it's already a multiple of two. However, for NPOT, they're different; h1 should be computed based on the original size. Fixes piglit test "cubemap npot" and oglconform test "textureNPOT". NOTE: This is a candidate for stable release branches. Reviewed-by: Eric Anholt Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_tex_layout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index f462f32b19a..46a417a08ed 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -60,7 +60,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel, * given in Volume 1 of the BSpec. */ h0 = ALIGN(mt->height0, align_h); - h1 = ALIGN(minify(h0), align_h); + h1 = ALIGN(minify(mt->height0), align_h); qpitch = (h0 + h1 + (intel->gen >= 7 ? 12 : 11) * align_h); if (mt->compressed) qpitch /= 4; -- cgit v1.2.3 From 89193933cbd322cd08fb54232411a8a9221fcca8 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 28 Jul 2011 15:10:17 -0700 Subject: mesa: Ensure that gl_shader_program::InfoLog is never NULL This prevents assertion failures in ralloc_strcat. The ralloc_free in _mesa_free_shader_program_data can be omitted because freeing the gl_shader_program in _mesa_delete_shader_program will take care of this automatically. A bunch of this code could use a refactor to use ralloc a bit more effectively. A bunch of the things that are allocated with malloc and owned by the gl_shader_program should be allocated with ralloc (using the gl_shader_program as the context). Signed-off-by: Ian Romanick Reviewed-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/glsl/main.cpp | 1 + src/mesa/main/shaderobj.c | 11 ++++++----- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'src/mesa') diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp index 9f85096e1a1..9b8a50738ac 100644 --- a/src/glsl/main.cpp +++ b/src/glsl/main.cpp @@ -221,6 +221,7 @@ main(int argc, char **argv) whole_program = rzalloc (NULL, struct gl_shader_program); assert(whole_program != NULL); + whole_program->InfoLog = ralloc_strdup(whole_program, ""); for (/* empty */; argc > optind; optind++) { whole_program->Shaders = diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c index 33d91ad594d..f128648f477 100644 --- a/src/mesa/main/shaderobj.c +++ b/src/mesa/main/shaderobj.c @@ -244,6 +244,8 @@ _mesa_init_shader_program(struct gl_context *ctx, struct gl_shader_program *prog prog->Geom.InputType = GL_TRIANGLES; prog->Geom.OutputType = GL_TRIANGLE_STRIP; #endif + + prog->InfoLog = ralloc_strdup(prog, ""); } /** @@ -283,6 +285,10 @@ _mesa_clear_shader_program_data(struct gl_context *ctx, _mesa_free_parameter_list(shProg->Varying); shProg->Varying = NULL; } + + assert(shProg->InfoLog != NULL); + ralloc_free(shProg->InfoLog); + shProg->InfoLog = ralloc_strdup(shProg, ""); } @@ -317,11 +323,6 @@ _mesa_free_shader_program_data(struct gl_context *ctx, shProg->Shaders = NULL; } - if (shProg->InfoLog) { - ralloc_free(shProg->InfoLog); - shProg->InfoLog = NULL; - } - /* Transform feedback varying vars */ for (i = 0; i < shProg->TransformFeedback.NumVarying; i++) { free(shProg->TransformFeedback.VaryingNames[i]); -- cgit v1.2.3 From 8aadd89d07d750aadd10989fa9c81f8a2fdd98e2 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 25 Jul 2011 15:55:59 -0700 Subject: ir_to_mesa: Use Add linker_error instead of fail_link The functions were almost identical. Signed-off-by: Ian Romanick Reviewed-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/mesa/program/ir_to_mesa.cpp | 53 +++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 31 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 8b4a535b75f..a0188128e2a 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -331,20 +331,6 @@ dst_reg undef_dst = dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP); dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X); -static void -fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); - -static void -fail_link(struct gl_shader_program *prog, const char *fmt, ...) -{ - va_list args; - va_start(args, fmt); - ralloc_vasprintf_append(&prog->InfoLog, fmt, args); - va_end(args); - - prog->LinkStatus = GL_FALSE; -} - static int swizzle_for_size(int size) { @@ -789,10 +775,11 @@ ir_to_mesa_visitor::visit(ir_variable *ir) if (storage->file == PROGRAM_TEMPORARY && dst.index != storage->index + (int) ir->num_state_slots) { - fail_link(this->shader_program, - "failed to load builtin uniform `%s' (%d/%d regs loaded)\n", - ir->name, dst.index - storage->index, - type_size(ir->type)); + linker_error(this->shader_program, + "failed to load builtin uniform `%s' " + "(%d/%d regs loaded)\n", + ir->name, dst.index - storage->index, + type_size(ir->type)); } } } @@ -2413,29 +2400,32 @@ check_resources(const struct gl_context *ctx, case GL_VERTEX_PROGRAM_ARB: if (_mesa_bitcount(prog->SamplersUsed) > ctx->Const.MaxVertexTextureImageUnits) { - fail_link(shader_program, "Too many vertex shader texture samplers"); + linker_error(shader_program, + "Too many vertex shader texture samplers"); } if (prog->Parameters->NumParameters > MAX_UNIFORMS) { - fail_link(shader_program, "Too many vertex shader constants"); + linker_error(shader_program, "Too many vertex shader constants"); } break; case MESA_GEOMETRY_PROGRAM: if (_mesa_bitcount(prog->SamplersUsed) > ctx->Const.MaxGeometryTextureImageUnits) { - fail_link(shader_program, "Too many geometry shader texture samplers"); + linker_error(shader_program, + "Too many geometry shader texture samplers"); } if (prog->Parameters->NumParameters > MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) { - fail_link(shader_program, "Too many geometry shader constants"); + linker_error(shader_program, "Too many geometry shader constants"); } break; case GL_FRAGMENT_PROGRAM_ARB: if (_mesa_bitcount(prog->SamplersUsed) > ctx->Const.MaxTextureImageUnits) { - fail_link(shader_program, "Too many fragment shader texture samplers"); + linker_error(shader_program, + "Too many fragment shader texture samplers"); } if (prog->Parameters->NumParameters > MAX_UNIFORMS) { - fail_link(shader_program, "Too many fragment shader constants"); + linker_error(shader_program, "Too many fragment shader constants"); } break; default: @@ -2550,9 +2540,10 @@ add_uniforms_to_parameters_list(struct gl_shader_program *shader_program, * from _mesa_add_uniform) has to match what the linker chose. */ if (index != parameter_index) { - fail_link(shader_program, "Allocation of uniform `%s' to target " - "failed (%d vs %d)\n", - uniform->Name, index, parameter_index); + linker_error(shader_program, + "Allocation of uniform `%s' to target failed " + "(%d vs %d)\n", + uniform->Name, index, parameter_index); } } } @@ -2585,8 +2576,8 @@ set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, int loc = _mesa_get_uniform_location(ctx, shader_program, name); if (loc == -1) { - fail_link(shader_program, - "Couldn't find uniform for initializer %s\n", name); + linker_error(shader_program, + "Couldn't find uniform for initializer %s\n", name); return; } @@ -2987,7 +2978,7 @@ get_mesa_program(struct gl_context *ctx, prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File; if (options->EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) { - fail_link(shader_program, "Couldn't flatten if statement\n"); + linker_error(shader_program, "Couldn't flatten if statement\n"); } switch (mesa_inst->Opcode) { @@ -3258,7 +3249,7 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) for (i = 0; i < prog->NumShaders; i++) { if (!prog->Shaders[i]->CompileStatus) { - fail_link(prog, "linking with uncompiled shader"); + linker_error(prog, "linking with uncompiled shader"); prog->LinkStatus = GL_FALSE; } } -- cgit v1.2.3 From 322c3bf9dc4c6edbf5a8793475ce1307e1c0186b Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 25 Jul 2011 15:58:07 -0700 Subject: ir_to_mesa: Emit warnings instead of errors for IR that can't be lowered Rely on the driver to do the right thing. This probably means falling back to software. Page 88 of the OpenGL 2.1 spec specifically says: "A shader should not fail to compile, and a program object should not fail to link due to lack of instruction space or lack of temporary variables. Implementations should ensure that all valid shaders and program objects may be successfully compiled, linked and executed." There is no provision for saying "No" to a valid shader that is difficult for the hardware to handle, so stop doing that. On i915 this causes a large number of piglit tests to change from FAIL to WARN. The warning is because the driver still emits messages to stderr like "i915_program_error: Unsupported opcode: BGNLOOP". It also fixes ES2 conformance CorrectFull_frag and CorrectParse1_frag on i915 (and probably other hardware that can't handle loops). Signed-off-by: Ian Romanick Reviewed-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/mesa/program/ir_to_mesa.cpp | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index a0188128e2a..382cda0c703 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -2977,11 +2977,31 @@ get_mesa_program(struct gl_context *ctx, if (mesa_inst->SrcReg[src].RelAddr) prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File; - if (options->EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) { - linker_error(shader_program, "Couldn't flatten if statement\n"); - } - switch (mesa_inst->Opcode) { + case OPCODE_IF: + if (options->EmitNoIfs) { + linker_warning(shader_program, + "Couldn't flatten if-statement. " + "This will likely result in software " + "rasterization.\n"); + } + break; + case OPCODE_BGNLOOP: + if (options->EmitNoLoops) { + linker_warning(shader_program, + "Couldn't unroll loop. " + "This will likely result in software " + "rasterization.\n"); + } + break; + case OPCODE_CONT: + if (options->EmitNoCont) { + linker_warning(shader_program, + "Couldn't lower continue-statement. " + "This will likely result in software " + "rasterization.\n"); + } + break; case OPCODE_BGNSUB: inst->function->inst = i; mesa_inst->Comment = strdup(inst->function->sig->function_name()); -- cgit v1.2.3 From 3bb2f0dde1cd813a0b5e0b45be376f4d6606aeb8 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 25 Jul 2011 16:41:39 -0700 Subject: i915: Fail without crashing if a Mesa IR program uses too many registers This can only happen in GLSL shaders because assembly shaders that use too many temps are rejected by core Mesa. It is easiest to make this happen with shaders that contain flow-control that could not be lowered. Signed-off-by: Ian Romanick Reviewed-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i915/i915_fragprog.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 6e1d7092237..32050cebf33 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -303,7 +303,7 @@ do { \ /* * TODO: consider moving this into core */ -static void calc_live_regs( struct i915_fragment_program *p ) +static bool calc_live_regs( struct i915_fragment_program *p ) { const struct gl_fragment_program *program = &p->FragProg; GLuint regsUsed = 0xffff0000; @@ -317,6 +317,9 @@ static void calc_live_regs( struct i915_fragment_program *p ) /* Register is written to: unmark as live for this and preceeding ops */ if (inst->DstReg.File == PROGRAM_TEMPORARY) { + if (inst->DstReg.Index > 16) + return false; + live_components[inst->DstReg.Index] &= ~inst->DstReg.WriteMask; if (live_components[inst->DstReg.Index] == 0) regsUsed &= ~(1 << inst->DstReg.Index); @@ -327,6 +330,9 @@ static void calc_live_regs( struct i915_fragment_program *p ) if (inst->SrcReg[a].File == PROGRAM_TEMPORARY) { unsigned c; + if (inst->SrcReg[a].Index > 16) + return false; + regsUsed |= 1 << inst->SrcReg[a].Index; for (c = 0; c < 4; c++) { @@ -340,6 +346,8 @@ static void calc_live_regs( struct i915_fragment_program *p ) p->usedRegs[i] = regsUsed; } + + return true; } static GLuint get_live_regs( struct i915_fragment_program *p, @@ -394,7 +402,10 @@ upload_program(struct i915_fragment_program *p) /* Not always needed: */ - calc_live_regs(p); + if (!calc_live_regs(p)) { + i915_program_error(p, "Could not allocate registers"); + return; + } while (1) { GLuint src0, src1, src2, flags; -- cgit v1.2.3 From 0290a018a50bd4a3180af3233f145f4de7b63706 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 10 Feb 2011 13:20:26 -0800 Subject: i915: Only emit program errors when INTEL_DEBUG=wm or INTEL_DEBUG=fallbacks This makes piglit a lot more happy. The errors are logged when INTEL_DEBUG=fallbacks because the application is about to hit a big software fallback. We frequently ask people to run applications that are hitting software fallbacks with INTEL_DEBUG=fallbacks so the we can help them debug the reason for the software fallback. Signed-off-by: Ian Romanick Reviewed-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i915/i915_program.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i915/i915_program.c b/src/mesa/drivers/dri/i915/i915_program.c index ca1949b223e..0a600d30bef 100644 --- a/src/mesa/drivers/dri/i915/i915_program.c +++ b/src/mesa/drivers/dri/i915/i915_program.c @@ -442,14 +442,16 @@ i915_emit_param4fv(struct i915_fragment_program * p, const GLfloat * values) void i915_program_error(struct i915_fragment_program *p, const char *fmt, ...) { - va_list args; + if (unlikely((INTEL_DEBUG & (DEBUG_WM | DEBUG_FALLBACKS)) != 0)) { + va_list args; - fprintf(stderr, "i915_program_error: "); - va_start(args, fmt); - vfprintf(stderr, fmt, args); - va_end(args); + fprintf(stderr, "i915_program_error: "); + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); - fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + } p->error = 1; } -- cgit v1.2.3 From 0f1aae3ae7cef051f87dae056c46fcfd0afaab20 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 1 Aug 2011 16:06:59 -0700 Subject: intel: Fix unused variable warning. --- src/mesa/drivers/dri/intel/intel_fbo.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index e48d6ef9cbd..65ad621e770 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -596,7 +596,6 @@ intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb, struct intel_texture_image *intel_image, int zoffset) { - struct intel_mipmap_tree *mt = intel_image->mt; unsigned int dst_x, dst_y; /* compute offset of the particular 2D image within the texture region */ -- cgit v1.2.3 From 7cf799d47269ce01d3e5981709744a16b7c2756c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 2 Aug 2011 13:36:57 -0700 Subject: radeon: Remove set-but-unused color_mask variable. This has been around since the initial import in 2003 and never used. --- src/mesa/drivers/dri/r200/r200_ioctl.c | 3 --- src/mesa/drivers/dri/radeon/radeon_ioctl.c | 3 --- 2 files changed, 6 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c index 02201cb53d6..44a794da396 100644 --- a/src/mesa/drivers/dri/r200/r200_ioctl.c +++ b/src/mesa/drivers/dri/r200/r200_ioctl.c @@ -185,7 +185,6 @@ static void r200Clear( struct gl_context *ctx, GLbitfield mask ) r200ContextPtr rmesa = R200_CONTEXT(ctx); __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon); GLuint flags = 0; - GLuint color_mask = 0; GLuint orig_mask = mask; if ( R200_DEBUG & RADEON_IOCTL ) { @@ -206,13 +205,11 @@ static void r200Clear( struct gl_context *ctx, GLbitfield mask ) if ( mask & BUFFER_BIT_FRONT_LEFT ) { flags |= RADEON_FRONT; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; mask &= ~BUFFER_BIT_FRONT_LEFT; } if ( mask & BUFFER_BIT_BACK_LEFT ) { flags |= RADEON_BACK; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; mask &= ~BUFFER_BIT_BACK_LEFT; } diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c index a91d8727792..c23e9c2d2a2 100644 --- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c +++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c @@ -560,7 +560,6 @@ static void radeonClear( struct gl_context *ctx, GLbitfield mask ) r100ContextPtr rmesa = R100_CONTEXT(ctx); __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon); GLuint flags = 0; - GLuint color_mask = 0; GLuint orig_mask = mask; if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) { @@ -582,13 +581,11 @@ static void radeonClear( struct gl_context *ctx, GLbitfield mask ) if ( mask & BUFFER_BIT_FRONT_LEFT ) { flags |= RADEON_FRONT; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; mask &= ~BUFFER_BIT_FRONT_LEFT; } if ( mask & BUFFER_BIT_BACK_LEFT ) { flags |= RADEON_BACK; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; mask &= ~BUFFER_BIT_BACK_LEFT; } -- cgit v1.2.3 From 25fffa9364baef76a7e7e875be1fb3c4f10aadfd Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 2 Aug 2011 13:39:43 -0700 Subject: radeon: Remove set-but-unused log2depth variable. r100 doesn't support 3D GL_EXT_texture3D. --- src/mesa/drivers/dri/radeon/radeon_texstate.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c index 9ba98e303a7..3abaa1504a4 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texstate.c +++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c @@ -1018,7 +1018,7 @@ static GLboolean radeon_validate_texgen( struct gl_context *ctx, GLuint unit ) static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int unit) { const struct gl_texture_image *firstImage; - GLint log2Width, log2Height, log2Depth, texelBytes; + GLint log2Width, log2Height, texelBytes; if ( t->bo ) { return GL_TRUE; @@ -1033,7 +1033,6 @@ static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int log2Width = firstImage->WidthLog2; log2Height = firstImage->HeightLog2; - log2Depth = firstImage->DepthLog2; texelBytes = _mesa_get_format_bytes(firstImage->TexFormat); if (!t->image_override) { -- cgit v1.2.3 From f5e612ab594689c7736f8af082e88c107bd7582c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 2 Aug 2011 13:41:59 -0700 Subject: radeon: Remove set-but-unused variables in radeonSetTexBuffer2() variants. These have been unused since 2009. --- src/mesa/drivers/dri/r200/r200_texstate.c | 6 ------ src/mesa/drivers/dri/r300/r300_texstate.c | 5 ----- src/mesa/drivers/dri/r600/evergreen_tex.c | 7 ------- src/mesa/drivers/dri/r600/r600_texstate.c | 5 ----- src/mesa/drivers/dri/radeon/radeon_texstate.c | 6 ------ 5 files changed, 29 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c index 7adf9ad73ed..8c9bd6d00b2 100644 --- a/src/mesa/drivers/dri/r200/r200_texstate.c +++ b/src/mesa/drivers/dri/r200/r200_texstate.c @@ -773,18 +773,12 @@ void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format struct radeon_renderbuffer *rb; radeon_texture_image *rImage; radeonContextPtr radeon; - r200ContextPtr rmesa; struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, format; gl_format texFormat; - format = GL_UNSIGNED_BYTE; - internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); - radeon = pDRICtx->driverPrivate; - rmesa = pDRICtx->driverPrivate; rfb = dPriv->driverPrivate; texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit]; diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index e24ad6f088d..e4388a021ed 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -427,13 +427,8 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, type, format; gl_format texFormat; - type = GL_BGRA; - format = GL_UNSIGNED_BYTE; - internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); - radeon = pDRICtx->driverPrivate; rmesa = pDRICtx->driverPrivate; diff --git a/src/mesa/drivers/dri/r600/evergreen_tex.c b/src/mesa/drivers/dri/r600/evergreen_tex.c index 9784a8484f2..d240a216817 100644 --- a/src/mesa/drivers/dri/r600/evergreen_tex.c +++ b/src/mesa/drivers/dri/r600/evergreen_tex.c @@ -1288,19 +1288,12 @@ void evergreenSetTexBuffer(__DRIcontext *pDRICtx, GLint target, GLint glx_textur struct radeon_renderbuffer *rb; radeon_texture_image *rImage; radeonContextPtr radeon; - context_t *rmesa; struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, type, format; gl_format texFormat; - type = GL_BGRA; - format = GL_UNSIGNED_BYTE; - internalFormat = (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); - radeon = pDRICtx->driverPrivate; - rmesa = pDRICtx->driverPrivate; rfb = dPriv->driverPrivate; texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit]; diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c index 949db29c189..65fae7195fd 100644 --- a/src/mesa/drivers/dri/r600/r600_texstate.c +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -1141,13 +1141,8 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, type, format; gl_format texFormat; - type = GL_BGRA; - format = GL_UNSIGNED_BYTE; - internalFormat = (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); - radeon = pDRICtx->driverPrivate; rmesa = pDRICtx->driverPrivate; diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c index 3abaa1504a4..430309392a0 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texstate.c +++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c @@ -648,18 +648,12 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_form struct radeon_renderbuffer *rb; radeon_texture_image *rImage; radeonContextPtr radeon; - r100ContextPtr rmesa; struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, format; gl_format texFormat; - format = GL_UNSIGNED_BYTE; - internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? GL_RGB : GL_RGBA); - radeon = pDRICtx->driverPrivate; - rmesa = pDRICtx->driverPrivate; rfb = dPriv->driverPrivate; texUnit = _mesa_get_current_tex_unit(radeon->glCtx); -- cgit v1.2.3 From 8de1d42f244f6315c471b01ef52a61f61d227c6d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 2 Aug 2011 13:47:18 -0700 Subject: radeon: Remove set-but-unused variables in radeon_lock.c These have been unused since this function's introduction in the FBO support development around 2009. --- src/mesa/drivers/dri/radeon/radeon_lock.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c index 7b6bd36dcf7..ae8a212f806 100644 --- a/src/mesa/drivers/dri/radeon/radeon_lock.c +++ b/src/mesa/drivers/dri/radeon/radeon_lock.c @@ -114,16 +114,6 @@ void radeon_lock_hardware(radeonContextPtr radeon ) { char ret = 0; - struct radeon_framebuffer *rfb = NULL; - struct radeon_renderbuffer *rrb = NULL; - - if (radeon_get_drawable(radeon)) { - rfb = radeon_get_drawable(radeon)->driverPrivate; - - if (rfb) - rrb = radeon_get_renderbuffer(&rfb->base, - rfb->base._ColorDrawBufferIndexes[0]); - } if (!radeon->radeonScreen->driScreen->dri2.enabled) { if (ATOMIC_INC_AND_FETCH(radeon->dri.hwLockCount) > 1) -- cgit v1.2.3 From e0e4c2e30552e524c91b2eb98a2dabdcd4666169 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 2 Aug 2011 13:49:05 -0700 Subject: radeon: Remove some remaining set-but-unused variables. These looked more like copy-and-paste to me than the others (which looked more like possibly someone forgot to write some code in a refactor), so I didn't verify where they came from. --- src/mesa/drivers/dri/r600/r600_cmdbuf.c | 2 -- src/mesa/drivers/dri/radeon/radeon_common.c | 8 -------- src/mesa/drivers/dri/radeon/radeon_common_context.c | 2 -- src/mesa/drivers/dri/radeon/radeon_cs_legacy.c | 2 -- 4 files changed, 14 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c index ce2f7779563..74f048b1062 100644 --- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c @@ -259,13 +259,11 @@ static int r600_cs_process_relocs(struct radeon_cs_int *csi, uint32_t * reloc_chunk, uint32_t * length_dw_reloc_chunk) { - struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm; struct r600_cs_reloc_legacy *relocs; int i, j, r; uint32_t offset_dw = 0; - csm = (struct r600_cs_manager_legacy*)csi->csm; relocs = (struct r600_cs_reloc_legacy *)csi->relocs; restart: for (i = 0; i < csi->crelocs; i++) { diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index bfc307ca987..e7a6623cf84 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -436,7 +436,6 @@ void radeonCopyBuffer( __DRIdrawable *dPriv, const drm_clip_rect_t *rect) { radeonContextPtr rmesa; - struct radeon_framebuffer *rfb; GLint nbox, i, ret; assert(dPriv); @@ -447,8 +446,6 @@ void radeonCopyBuffer( __DRIdrawable *dPriv, LOCK_HARDWARE(rmesa); - rfb = dPriv->driverPrivate; - if ( RADEON_DEBUG & RADEON_IOCTL ) { fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx ); } @@ -527,8 +524,6 @@ static GLboolean radeonPageFlip( __DRIdrawable *dPriv ) { radeonContextPtr radeon; GLint ret; - __DRIscreen *psp; - struct radeon_renderbuffer *rrb; struct radeon_framebuffer *rfb; assert(dPriv); @@ -537,9 +532,6 @@ static GLboolean radeonPageFlip( __DRIdrawable *dPriv ) radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; rfb = dPriv->driverPrivate; - rrb = (void *)rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer; - - psp = dPriv->driScreenPriv; LOCK_HARDWARE(radeon); diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index bf8925f61d0..c08b79484af 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -515,7 +515,6 @@ void radeon_prepare_render(radeonContextPtr radeon) __DRIcontext *driContext = radeon->dri.context; __DRIdrawable *drawable; __DRIscreen *screen; - struct radeon_framebuffer *draw; screen = driContext->driScreenPriv; if (!screen->dri2.loader) @@ -527,7 +526,6 @@ void radeon_prepare_render(radeonContextPtr radeon) radeon_update_renderbuffers(driContext, drawable, GL_FALSE); /* Intel driver does the equivalent of this, no clue if it is needed:*/ - draw = drawable->driverPrivate; radeon_draw_buffer(radeon->glCtx, radeon->glCtx->DrawBuffer); driContext->dri2.draw_stamp = drawable->dri2.stamp; diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c index c2722a4e195..5595b705b15 100644 --- a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c +++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c @@ -218,11 +218,9 @@ static int cs_end(struct radeon_cs_int *cs, static int cs_process_relocs(struct radeon_cs_int *cs) { - struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm; struct cs_reloc_legacy *relocs; int i, j, r; - csm = (struct cs_manager_legacy*)cs->csm; relocs = (struct cs_reloc_legacy *)cs->relocs; restart: for (i = 0; i < cs->crelocs; i++) -- cgit v1.2.3 From 57590e173b6f421b1015190aa3c0011ea55f31d8 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 28 Jul 2011 15:26:01 +0200 Subject: st/mesa: determine Const.MaxSamples in init_extensions v2: Check for non-pow2 sample counts as well. --- src/mesa/state_tracker/st_extensions.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index b5f6d356eb0..8e900934054 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -228,6 +228,7 @@ void st_init_extensions(struct st_context *st) { struct pipe_screen *screen = st->pipe->screen; struct gl_context *ctx = st->ctx; + int i; /* * Extensions that are supported by all Gallium drivers: @@ -605,6 +606,16 @@ void st_init_extensions(struct st_context *st) ctx->Extensions.EXT_packed_float = GL_TRUE; } + /* Maximum sample count. */ + for (i = 16; i > 0; --i) { + if (screen->is_format_supported(screen, PIPE_FORMAT_B8G8R8A8_UNORM, + PIPE_TEXTURE_2D, i, + PIPE_BIND_RENDER_TARGET)) { + ctx->Const.MaxSamples = i; + break; + } + } + if (screen->get_param(screen, PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE)) { ctx->Extensions.ARB_seamless_cube_map = GL_TRUE; ctx->Extensions.AMD_seamless_cubemap_per_texture = GL_TRUE; -- cgit v1.2.3 From f253d83bc72e7d26df8cd3a04747b3d46a8543e6 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 3 Aug 2011 16:01:41 +0200 Subject: st/mesa: implement multisample resolve via BlitFramebuffer --- src/mesa/state_tracker/st_cb_blit.c | 116 ++++++++++++++++++++++++++++++++++-- 1 file changed, 111 insertions(+), 5 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c index 416be194d11..276d10fb557 100644 --- a/src/mesa/state_tracker/st_cb_blit.c +++ b/src/mesa/state_tracker/st_cb_blit.c @@ -61,6 +61,81 @@ st_destroy_blit(struct st_context *st) #if FEATURE_EXT_framebuffer_blit +static void +st_BlitFramebuffer_resolve(struct gl_context *ctx, + GLbitfield mask, + struct pipe_resolve_info *info) +{ + const GLbitfield depthStencil = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + + struct st_context *st = st_context(ctx); + + struct st_renderbuffer *srcRb, *dstRb; + + if (mask & GL_COLOR_BUFFER_BIT) { + srcRb = st_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer); + dstRb = st_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]); + + info->mask = PIPE_MASK_RGBA; + + info->src.res = srcRb->texture; + info->src.layer = srcRb->surface->u.tex.first_layer; + info->dst.res = dstRb->texture; + info->dst.level = dstRb->surface->u.tex.level; + info->dst.layer = dstRb->surface->u.tex.first_layer; + + st->pipe->resource_resolve(st->pipe, info); + } + + if (mask & depthStencil) { + struct gl_renderbuffer_attachment *srcDepth, *srcStencil; + struct gl_renderbuffer_attachment *dstDepth, *dstStencil; + + srcDepth = &ctx->ReadBuffer->Attachment[BUFFER_DEPTH]; + dstDepth = &ctx->DrawBuffer->Attachment[BUFFER_DEPTH]; + srcStencil = &ctx->ReadBuffer->Attachment[BUFFER_STENCIL]; + dstStencil = &ctx->DrawBuffer->Attachment[BUFFER_STENCIL]; + + const boolean combined = + st_is_depth_stencil_combined(srcDepth, srcStencil) && + st_is_depth_stencil_combined(dstDepth, dstStencil); + + if ((mask & GL_DEPTH_BUFFER_BIT) || combined) { + /* resolve depth and, if combined and requested, stencil as well */ + srcRb = st_renderbuffer(srcDepth->Renderbuffer); + dstRb = st_renderbuffer(dstDepth->Renderbuffer); + + info->mask = (mask & GL_DEPTH_BUFFER_BIT) ? PIPE_MASK_Z : 0; + if (combined && (mask & GL_STENCIL_BUFFER_BIT)) + info->mask |= PIPE_MASK_S; + + info->src.res = srcRb->texture; + info->src.layer = srcRb->surface->u.tex.first_layer; + info->dst.res = dstRb->texture; + info->dst.level = dstRb->surface->u.tex.level; + info->dst.layer = dstRb->surface->u.tex.first_layer; + + st->pipe->resource_resolve(st->pipe, info); + } + + if (mask & GL_STENCIL_BUFFER_BIT) { + /* resolve separate stencil buffer */ + srcRb = st_renderbuffer(srcStencil->Renderbuffer); + dstRb = st_renderbuffer(dstStencil->Renderbuffer); + + info->mask = PIPE_MASK_S; + + info->src.res = srcRb->texture; + info->src.layer = srcRb->surface->u.tex.first_layer; + info->dst.res = dstRb->texture; + info->dst.level = dstRb->surface->u.tex.level; + info->dst.layer = dstRb->surface->u.tex.first_layer; + + st->pipe->resource_resolve(st->pipe, info); + } + } +} + static void st_BlitFramebuffer(struct gl_context *ctx, GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, @@ -95,6 +170,42 @@ st_BlitFramebuffer(struct gl_context *ctx, srcY1 = readFB->Height - srcY1; } + /* Disable conditional rendering. */ + if (st->render_condition) { + st->pipe->render_condition(st->pipe, NULL, 0); + } + + if (readFB->Visual.sampleBuffers > drawFB->Visual.sampleBuffers) { + struct pipe_resolve_info info; + + if (dstX0 < dstX1) { + info.dst.x0 = dstX0; + info.dst.x1 = dstX1; + info.src.x0 = srcX0; + info.src.x1 = srcX1; + } else { + info.dst.x0 = dstX1; + info.dst.x1 = dstX0; + info.src.x0 = srcX1; + info.src.x1 = srcX0; + } + if (dstY0 < dstY1) { + info.dst.y0 = dstY0; + info.dst.y1 = dstY1; + info.src.y0 = srcY0; + info.src.y1 = srcY1; + } else { + info.dst.y0 = dstY1; + info.dst.y1 = dstY0; + info.src.y0 = srcY1; + info.src.y1 = srcY0; + } + + st_BlitFramebuffer_resolve(ctx, mask, &info); /* filter doesn't apply */ + + goto done; + } + if (srcY0 > srcY1 && dstY0 > dstY1) { /* Both src and dst are upside down. Swap Y to make it * right-side up to increase odds of using a fast path. @@ -109,11 +220,6 @@ st_BlitFramebuffer(struct gl_context *ctx, dstY1 = tmp; } - /* Disable conditional rendering. */ - if (st->render_condition) { - st->pipe->render_condition(st->pipe, NULL, 0); - } - if (mask & GL_COLOR_BUFFER_BIT) { struct gl_renderbuffer_attachment *srcAtt = &readFB->Attachment[readFB->_ColorReadBufferIndex]; -- cgit v1.2.3 From 88a4f2fe543d7c394c0ad732ae60f8cf94c0d357 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Aug 2011 08:22:30 -0600 Subject: mesa: make error handling in glGetTexParameter() a bit more concise --- src/mesa/main/texparam.c | 266 +++++++++++++++++------------------------------ 1 file changed, 96 insertions(+), 170 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index 134f15346e8..78dcc5dccea 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -994,28 +994,21 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level, *params = 0; break; case GL_TEXTURE_DEPTH_SIZE_ARB: - if (ctx->Extensions.ARB_depth_texture) - *params = _mesa_get_format_bits(texFormat, pname); - else + if (!ctx->Extensions.ARB_depth_texture) goto invalid_pname; + *params = _mesa_get_format_bits(texFormat, pname); break; case GL_TEXTURE_STENCIL_SIZE_EXT: - if (ctx->Extensions.EXT_packed_depth_stencil || - ctx->Extensions.ARB_framebuffer_object) { - *params = _mesa_get_format_bits(texFormat, pname); - } - else { + if (!ctx->Extensions.EXT_packed_depth_stencil && + !ctx->Extensions.ARB_framebuffer_object) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, pname); break; case GL_TEXTURE_SHARED_SIZE: - if (ctx->VersionMajor >= 3 || - ctx->Extensions.EXT_texture_shared_exponent) { - *params = texFormat == MESA_FORMAT_RGB9_E5_FLOAT ? 5 : 0; - } - else { + if (ctx->VersionMajor < 3 && + !ctx->Extensions.EXT_texture_shared_exponent) goto invalid_pname; - } + *params = texFormat == MESA_FORMAT_RGB9_E5_FLOAT ? 5 : 0; break; /* GL_ARB_texture_compression */ @@ -1036,67 +1029,46 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level, /* GL_ARB_texture_float */ case GL_TEXTURE_RED_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_RED_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_RED_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_GREEN_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_GREEN_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_GREEN_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_BLUE_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_BLUE_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_BLUE_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_ALPHA_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_ALPHA_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_ALPHA_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_LUMINANCE_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_LUMINANCE_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_LUMINANCE_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_INTENSITY_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_INTENSITY_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_INTENSITY_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; case GL_TEXTURE_DEPTH_TYPE_ARB: - if (ctx->Extensions.ARB_texture_float) { - *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_DEPTH_SIZE) ? - _mesa_get_format_datatype(texFormat) : GL_NONE; - } - else { + if (!ctx->Extensions.ARB_texture_float) goto invalid_pname; - } + *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_DEPTH_SIZE) ? + _mesa_get_format_datatype(texFormat) : GL_NONE; break; default: @@ -1118,7 +1090,6 @@ void GLAPIENTRY _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) { struct gl_texture_object *obj; - GLboolean error = GL_FALSE; GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); @@ -1187,49 +1158,37 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) *params = (GLfloat) obj->MaxLevel; break; case GL_TEXTURE_MAX_ANISOTROPY_EXT: - if (ctx->Extensions.EXT_texture_filter_anisotropic) { - *params = obj->Sampler.MaxAnisotropy; - } - else - error = GL_TRUE; + if (!ctx->Extensions.EXT_texture_filter_anisotropic) + goto invalid_pname; + *params = obj->Sampler.MaxAnisotropy; break; case GL_TEXTURE_COMPARE_FAIL_VALUE_ARB: - if (ctx->Extensions.ARB_shadow_ambient) { - *params = obj->Sampler.CompareFailValue; - } - else - error = GL_TRUE; + if (!ctx->Extensions.ARB_shadow_ambient) + goto invalid_pname; + *params = obj->Sampler.CompareFailValue; break; case GL_GENERATE_MIPMAP_SGIS: *params = (GLfloat) obj->GenerateMipmap; break; case GL_TEXTURE_COMPARE_MODE_ARB: - if (ctx->Extensions.ARB_shadow) { - *params = (GLfloat) obj->Sampler.CompareMode; - } - else - error = GL_TRUE; + if (!ctx->Extensions.ARB_shadow) + goto invalid_pname; + *params = (GLfloat) obj->Sampler.CompareMode; break; case GL_TEXTURE_COMPARE_FUNC_ARB: - if (ctx->Extensions.ARB_shadow) { - *params = (GLfloat) obj->Sampler.CompareFunc; - } - else - error = GL_TRUE; + if (!ctx->Extensions.ARB_shadow) + goto invalid_pname; + *params = (GLfloat) obj->Sampler.CompareFunc; break; case GL_DEPTH_TEXTURE_MODE_ARB: - if (ctx->Extensions.ARB_depth_texture) { - *params = (GLfloat) obj->Sampler.DepthMode; - } - else - error = GL_TRUE; + if (!ctx->Extensions.ARB_depth_texture) + goto invalid_pname; + *params = (GLfloat) obj->Sampler.DepthMode; break; case GL_TEXTURE_LOD_BIAS: - if (ctx->Extensions.EXT_texture_lod_bias) { - *params = obj->Sampler.LodBias; - } - else - error = GL_TRUE; + if (!ctx->Extensions.EXT_texture_lod_bias) + goto invalid_pname; + *params = obj->Sampler.LodBias; break; #if FEATURE_OES_draw_texture case GL_TEXTURE_CROP_RECT_OES: @@ -1244,46 +1203,40 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) case GL_TEXTURE_SWIZZLE_G_EXT: case GL_TEXTURE_SWIZZLE_B_EXT: case GL_TEXTURE_SWIZZLE_A_EXT: - if (ctx->Extensions.EXT_texture_swizzle) { - GLuint comp = pname - GL_TEXTURE_SWIZZLE_R_EXT; - *params = (GLfloat) obj->Swizzle[comp]; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.EXT_texture_swizzle) + goto invalid_pname; + *params = (GLfloat) obj->Swizzle[pname - GL_TEXTURE_SWIZZLE_R_EXT]; break; case GL_TEXTURE_SWIZZLE_RGBA_EXT: - if (ctx->Extensions.EXT_texture_swizzle) { + if (!ctx->Extensions.EXT_texture_swizzle) { + goto invalid_pname; + } + else { GLuint comp; for (comp = 0; comp < 4; comp++) { params[comp] = (GLfloat) obj->Swizzle[comp]; } } - else { - error = GL_TRUE; - } break; case GL_TEXTURE_CUBE_MAP_SEAMLESS: - if (ctx->Extensions.AMD_seamless_cubemap_per_texture) { - *params = (GLfloat) obj->Sampler.CubeMapSeamless; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.AMD_seamless_cubemap_per_texture) + goto invalid_pname; + *params = (GLfloat) obj->Sampler.CubeMapSeamless; break; default: - error = GL_TRUE; - break; + goto invalid_pname; } - if (error) - _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameterfv(pname=0x%x)", - pname); + /* no error if we get here */ + _mesa_unlock_texture(ctx, obj); + return; +invalid_pname: _mesa_unlock_texture(ctx, obj); + _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameterfv(pname=0x%x)", pname); } @@ -1291,7 +1244,6 @@ void GLAPIENTRY _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params ) { struct gl_texture_object *obj; - GLboolean error = GL_FALSE; GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); @@ -1355,55 +1307,37 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params ) *params = obj->MaxLevel; break;; case GL_TEXTURE_MAX_ANISOTROPY_EXT: - if (ctx->Extensions.EXT_texture_filter_anisotropic) { - *params = (GLint) obj->Sampler.MaxAnisotropy; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.EXT_texture_filter_anisotropic) + goto invalid_pname; + *params = (GLint) obj->Sampler.MaxAnisotropy; break; case GL_TEXTURE_COMPARE_FAIL_VALUE_ARB: - if (ctx->Extensions.ARB_shadow_ambient) { - *params = (GLint) FLOAT_TO_INT(obj->Sampler.CompareFailValue); - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.ARB_shadow_ambient) + goto invalid_pname; + *params = (GLint) FLOAT_TO_INT(obj->Sampler.CompareFailValue); break; case GL_GENERATE_MIPMAP_SGIS: *params = (GLint) obj->GenerateMipmap; break; case GL_TEXTURE_COMPARE_MODE_ARB: - if (ctx->Extensions.ARB_shadow) { - *params = (GLint) obj->Sampler.CompareMode; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.ARB_shadow) + goto invalid_pname; + *params = (GLint) obj->Sampler.CompareMode; break; case GL_TEXTURE_COMPARE_FUNC_ARB: - if (ctx->Extensions.ARB_shadow) { - *params = (GLint) obj->Sampler.CompareFunc; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.ARB_shadow) + goto invalid_pname; + *params = (GLint) obj->Sampler.CompareFunc; break; case GL_DEPTH_TEXTURE_MODE_ARB: - if (ctx->Extensions.ARB_depth_texture) { - *params = (GLint) obj->Sampler.DepthMode; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.ARB_depth_texture) + goto invalid_pname; + *params = (GLint) obj->Sampler.DepthMode; break; case GL_TEXTURE_LOD_BIAS: - if (ctx->Extensions.EXT_texture_lod_bias) { - *params = (GLint) obj->Sampler.LodBias; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.EXT_texture_lod_bias) + goto invalid_pname; + *params = (GLint) obj->Sampler.LodBias; break; #if FEATURE_OES_draw_texture case GL_TEXTURE_CROP_RECT_OES: @@ -1417,42 +1351,34 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params ) case GL_TEXTURE_SWIZZLE_G_EXT: case GL_TEXTURE_SWIZZLE_B_EXT: case GL_TEXTURE_SWIZZLE_A_EXT: - if (ctx->Extensions.EXT_texture_swizzle) { - GLuint comp = pname - GL_TEXTURE_SWIZZLE_R_EXT; - *params = obj->Swizzle[comp]; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.EXT_texture_swizzle) + goto invalid_pname; + *params = obj->Swizzle[pname - GL_TEXTURE_SWIZZLE_R_EXT]; break; case GL_TEXTURE_SWIZZLE_RGBA_EXT: - if (ctx->Extensions.EXT_texture_swizzle) { - COPY_4V(params, obj->Swizzle); - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.EXT_texture_swizzle) + goto invalid_pname; + COPY_4V(params, obj->Swizzle); break; case GL_TEXTURE_CUBE_MAP_SEAMLESS: - if (ctx->Extensions.AMD_seamless_cubemap_per_texture) { - *params = (GLint) obj->Sampler.CubeMapSeamless; - } - else { - error = GL_TRUE; - } + if (!ctx->Extensions.AMD_seamless_cubemap_per_texture) + goto invalid_pname; + *params = (GLint) obj->Sampler.CubeMapSeamless; break; default: - ; /* silence warnings */ + goto invalid_pname; } - if (error) - _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameteriv(pname=0x%x)", - pname); + /* no error if we get here */ + _mesa_unlock_texture(ctx, obj); + return; +invalid_pname: _mesa_unlock_texture(ctx, obj); + _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameteriv(pname=0x%x)", pname); } -- cgit v1.2.3 From 1254a2b2e45c6961a57d9c60f561907183ef7de7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Aug 2011 08:22:31 -0600 Subject: mesa: condense GL_TEXTURE_RESIDENT query code --- src/mesa/main/texparam.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index 78dcc5dccea..0dec0172989 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -1133,14 +1133,8 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) } break; case GL_TEXTURE_RESIDENT: - { - GLboolean resident; - if (ctx->Driver.IsTextureResident) - resident = ctx->Driver.IsTextureResident(ctx, obj); - else - resident = GL_TRUE; - *params = ENUM_TO_FLOAT(resident); - } + *params = ctx->Driver.IsTextureResident ? + ctx->Driver.IsTextureResident(ctx, obj) : 1.0F; break; case GL_TEXTURE_PRIORITY: *params = obj->Priority; @@ -1282,14 +1276,8 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params ) } break;; case GL_TEXTURE_RESIDENT: - { - GLboolean resident; - if (ctx->Driver.IsTextureResident) - resident = ctx->Driver.IsTextureResident(ctx, obj); - else - resident = GL_TRUE; - *params = (GLint) resident; - } + *params = ctx->Driver.IsTextureResident ? + ctx->Driver.IsTextureResident(ctx, obj) : 1; break;; case GL_TEXTURE_PRIORITY: *params = FLOAT_TO_INT(obj->Priority); -- cgit v1.2.3 From 02d81dfcaf073b5f7073d405e931b3d3e9f577ef Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Aug 2011 08:22:31 -0600 Subject: mesa: add null ptr checks in GetTexParameterI[u]iv() functions --- src/mesa/main/texparam.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index 0dec0172989..97d0359f170 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -1379,6 +1379,8 @@ _mesa_GetTexParameterIiv(GLenum target, GLenum pname, GLint *params) ASSERT_OUTSIDE_BEGIN_END(ctx); texObj = get_texobj(ctx, target, GL_TRUE); + if (!texObj) + return; switch (pname) { case GL_TEXTURE_BORDER_COLOR: @@ -1399,6 +1401,8 @@ _mesa_GetTexParameterIuiv(GLenum target, GLenum pname, GLuint *params) ASSERT_OUTSIDE_BEGIN_END(ctx); texObj = get_texobj(ctx, target, GL_TRUE); + if (!texObj) + return; switch (pname) { case GL_TEXTURE_BORDER_COLOR: -- cgit v1.2.3 From 1e89a526c6cd21852b440904711c5ee733ce1ad2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Aug 2011 08:22:31 -0600 Subject: mesa: whitespace, formatting fixes in GetTexParameter() code --- src/mesa/main/texparam.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index 97d0359f170..bbbb306b2d9 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -1115,17 +1115,15 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) *params = ENUM_TO_FLOAT(obj->Sampler.WrapR); break; case GL_TEXTURE_BORDER_COLOR: - if(ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP)) + if (ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP)) _mesa_update_state_locked(ctx); - if(ctx->Color._ClampFragmentColor) - { + if (ctx->Color._ClampFragmentColor) { params[0] = CLAMP(obj->Sampler.BorderColor.f[0], 0.0F, 1.0F); params[1] = CLAMP(obj->Sampler.BorderColor.f[1], 0.0F, 1.0F); params[2] = CLAMP(obj->Sampler.BorderColor.f[2], 0.0F, 1.0F); params[3] = CLAMP(obj->Sampler.BorderColor.f[3], 0.0F, 1.0F); } - else - { + else { params[0] = obj->Sampler.BorderColor.f[0]; params[1] = obj->Sampler.BorderColor.f[1]; params[2] = obj->Sampler.BorderColor.f[2]; @@ -1241,9 +1239,9 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params ) GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); - obj = get_texobj(ctx, target, GL_TRUE); - if (!obj) - return; + obj = get_texobj(ctx, target, GL_TRUE); + if (!obj) + return; _mesa_lock_texture(ctx, obj); switch (pname) { -- cgit v1.2.3 From 192baaac0fc4701e82dcc3e19b3033f81dd82a62 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Aug 2011 08:22:31 -0600 Subject: mesa: minor comment updates in enable.c --- src/mesa/main/enable.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c index aac8b9c5eaf..3ba4df6342f 100644 --- a/src/mesa/main/enable.c +++ b/src/mesa/main/enable.c @@ -5,7 +5,6 @@ /* * Mesa 3-D graphics library - * Version: 7.0.3 * * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. * @@ -560,7 +559,6 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state) ctx->Polygon.OffsetLine = state; break; case GL_POLYGON_OFFSET_FILL: - /*case GL_POLYGON_OFFSET_EXT:*/ if (ctx->Polygon.OffsetFill == state) return; FLUSH_VERTICES(ctx, _NEW_POLYGON); @@ -643,9 +641,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state) break; #endif - /* - * CLIENT STATE!!! - */ + /* client-side state */ case GL_VERTEX_ARRAY: case GL_NORMAL_ARRAY: case GL_COLOR_ARRAY: @@ -1174,7 +1170,6 @@ _mesa_IsEnabled( GLenum cap ) case GL_POLYGON_OFFSET_LINE: return ctx->Polygon.OffsetLine; case GL_POLYGON_OFFSET_FILL: - /*case GL_POLYGON_OFFSET_EXT:*/ return ctx->Polygon.OffsetFill; case GL_RESCALE_NORMAL_EXT: return ctx->Transform.RescaleNormals; @@ -1213,9 +1208,7 @@ _mesa_IsEnabled( GLenum cap ) } #endif - /* - * CLIENT STATE!!! - */ + /* client-side state */ case GL_VERTEX_ARRAY: return (ctx->Array.ArrayObj->Vertex.Enabled != 0); case GL_NORMAL_ARRAY: -- cgit v1.2.3 From 09ba2527e885f6134002205716a44d01d83638c3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Aug 2011 08:22:31 -0600 Subject: st/mesa: move declaration before code --- src/mesa/state_tracker/st_cb_blit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c index 276d10fb557..626db12431d 100644 --- a/src/mesa/state_tracker/st_cb_blit.c +++ b/src/mesa/state_tracker/st_cb_blit.c @@ -90,13 +90,14 @@ st_BlitFramebuffer_resolve(struct gl_context *ctx, if (mask & depthStencil) { struct gl_renderbuffer_attachment *srcDepth, *srcStencil; struct gl_renderbuffer_attachment *dstDepth, *dstStencil; + boolean combined; srcDepth = &ctx->ReadBuffer->Attachment[BUFFER_DEPTH]; dstDepth = &ctx->DrawBuffer->Attachment[BUFFER_DEPTH]; srcStencil = &ctx->ReadBuffer->Attachment[BUFFER_STENCIL]; dstStencil = &ctx->DrawBuffer->Attachment[BUFFER_STENCIL]; - const boolean combined = + combined = st_is_depth_stencil_combined(srcDepth, srcStencil) && st_is_depth_stencil_combined(dstDepth, dstStencil); -- cgit v1.2.3 From 50073563b2bfe3716b3dc8b1ed2f91381ba24305 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Aug 2011 08:22:31 -0600 Subject: st/mesa: silence int/float and double/float conversion warnings --- src/mesa/state_tracker/st_cb_bitmap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index 49b196032b9..067403f396b 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -328,8 +328,8 @@ setup_bitmap_vertex_data(struct st_context *st, bool normalized, if(!normalized) { - sRight = width; - tBot = height; + sRight = (GLfloat) width; + tBot = (GLfloat) height; } /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as @@ -381,7 +381,7 @@ setup_bitmap_vertex_data(struct st_context *st, bool normalized, /* same for all verts: */ for (i = 0; i < 4; i++) { st->bitmap.vertices[i][0][2] = z; - st->bitmap.vertices[i][0][3] = 1.0; + st->bitmap.vertices[i][0][3] = 1.0f; st->bitmap.vertices[i][1][0] = color[0]; st->bitmap.vertices[i][1][1] = color[1]; st->bitmap.vertices[i][1][2] = color[2]; @@ -513,7 +513,7 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, cso_set_vertex_elements(cso, 3, st->velems_util_draw); /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */ - z = z * 2.0 - 1.0; + z = z * 2.0f - 1.0f; /* draw textured quad */ offset = setup_bitmap_vertex_data(st, -- cgit v1.2.3 From b7e89115310628310bf458a33f2df2bf23384cf3 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Tue, 2 Aug 2011 11:36:44 -0500 Subject: glsl_to_tgsi: remove debugging printf --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index ba4074eecd5..b5f4253ea64 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -840,7 +840,6 @@ glsl_to_tgsi_visitor::add_constant(gl_register_file file, int index = 0; immediate_storage *entry; assert(file == PROGRAM_IMMEDIATE); - fprintf(stderr, "adding immediate\n"); /* Search immediate storage to see if we already have an identical * immediate that we can use instead of adding a duplicate entry. -- cgit v1.2.3 From 9adcab9cd464d659288e31e6767efb5dee3894ff Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Thu, 4 Aug 2011 10:15:54 -0500 Subject: st/mesa: replace duplicated create_color_map_texture() function with shared function --- src/mesa/state_tracker/st_atom_pixeltransfer.c | 22 +--------------------- src/mesa/state_tracker/st_cb_drawpixels.c | 23 +---------------------- src/mesa/state_tracker/st_texture.c | 20 ++++++++++++++++++++ src/mesa/state_tracker/st_texture.h | 4 ++++ 4 files changed, 26 insertions(+), 43 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c index 95b706cb96c..12b5bc5ba79 100644 --- a/src/mesa/state_tracker/st_atom_pixeltransfer.c +++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c @@ -84,26 +84,6 @@ make_state_key(struct gl_context *ctx, struct state_key *key) } -static struct pipe_resource * -create_color_map_texture(struct gl_context *ctx) -{ - struct st_context *st = st_context(ctx); - struct pipe_context *pipe = st->pipe; - struct pipe_resource *pt; - enum pipe_format format; - const uint texSize = 256; /* simple, and usually perfect */ - - /* find an RGBA texture format */ - format = st_choose_format(pipe->screen, GL_RGBA, - PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW); - - /* create texture for color map/table */ - pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0, - texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW); - return pt; -} - - /** * Update the pixelmap texture with the contents of the R/G/B/A pixel maps. */ @@ -219,7 +199,7 @@ get_pixel_transfer_program(struct gl_context *ctx, const struct state_key *key) /* create the colormap/texture now if not already done */ if (!st->pixel_xfer.pixelmap_texture) { - st->pixel_xfer.pixelmap_texture = create_color_map_texture(ctx); + st->pixel_xfer.pixelmap_texture = st_create_color_map_texture(ctx); st->pixel_xfer.pixelmap_sampler_view = st_create_texture_sampler_view(st->pipe, st->pixel_xfer.pixelmap_texture); diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index f4dd2a42847..0c4dc23ccf7 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -94,27 +94,6 @@ is_passthrough_program(const struct gl_fragment_program *prog) } -/* XXX copied verbatim from st_atom_pixeltransfer.c */ -static struct pipe_resource * -create_color_map_texture(struct gl_context *ctx) -{ - struct st_context *st = st_context(ctx); - struct pipe_context *pipe = st->pipe; - struct pipe_resource *pt; - enum pipe_format format; - const uint texSize = 256; /* simple, and usually perfect */ - - /* find an RGBA texture format */ - format = st_choose_format(pipe->screen, GL_RGBA, - PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW); - - /* create texture for color map/table */ - pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0, - texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW); - return pt; -} - - /** * Returns a fragment program which implements the current pixel transfer ops. */ @@ -142,7 +121,7 @@ get_glsl_pixel_transfer_program(struct st_context *st, if (pixelMaps) { /* create the colormap/texture now if not already done */ if (!st->pixel_xfer.pixelmap_texture) { - st->pixel_xfer.pixelmap_texture = create_color_map_texture(ctx); + st->pixel_xfer.pixelmap_texture = st_create_color_map_texture(ctx); st->pixel_xfer.pixelmap_sampler_view = st_create_texture_sampler_view(st->pipe, st->pixel_xfer.pixelmap_texture); diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index ffe7e256a56..d8ba3ac9252 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -396,3 +396,23 @@ st_texture_image_copy(struct pipe_context *pipe, } } + +struct pipe_resource * +st_create_color_map_texture(struct gl_context *ctx) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct pipe_resource *pt; + enum pipe_format format; + const uint texSize = 256; /* simple, and usually perfect */ + + /* find an RGBA texture format */ + format = st_choose_format(pipe->screen, GL_RGBA, + PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW); + + /* create texture for color map/table */ + pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0, + texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW); + return pt; +} + diff --git a/src/mesa/state_tracker/st_texture.h b/src/mesa/state_tracker/st_texture.h index d50c3c9af79..b822f47cf9e 100644 --- a/src/mesa/state_tracker/st_texture.h +++ b/src/mesa/state_tracker/st_texture.h @@ -232,4 +232,8 @@ st_texture_image_copy(struct pipe_context *pipe, struct pipe_resource *src, GLuint srcLevel, GLuint face); + +extern struct pipe_resource * +st_create_color_map_texture(struct gl_context *ctx); + #endif -- cgit v1.2.3 From d6a0692f9dc055c5e5f0e7c806537ad24aa13709 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Aug 2011 13:07:50 -0600 Subject: mesa: don't use K&R style function pointer calls --- src/mesa/main/texobj.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c index 3021716a0b6..078a43ab153 100644 --- a/src/mesa/main/texobj.c +++ b/src/mesa/main/texobj.c @@ -842,7 +842,7 @@ _mesa_GenTextures( GLsizei n, GLuint *textures ) struct gl_texture_object *texObj; GLuint name = first + i; GLenum target = 0; - texObj = (*ctx->Driver.NewTextureObject)( ctx, name, target); + texObj = ctx->Driver.NewTextureObject(ctx, name, target); if (!texObj) { _glthread_UNLOCK_MUTEX(ctx->Shared->Mutex); _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGenTextures"); @@ -1066,7 +1066,7 @@ _mesa_BindTexture( GLenum target, GLuint texName ) } else { /* if this is a new texture id, allocate a texture object now */ - newTexObj = (*ctx->Driver.NewTextureObject)(ctx, texName, target); + newTexObj = ctx->Driver.NewTextureObject(ctx, texName, target); if (!newTexObj) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBindTexture"); return; @@ -1108,7 +1108,7 @@ _mesa_BindTexture( GLenum target, GLuint texName ) /* Pass BindTexture call to device driver */ if (ctx->Driver.BindTexture) - (*ctx->Driver.BindTexture)( ctx, target, newTexObj ); + ctx->Driver.BindTexture(ctx, target, newTexObj); } -- cgit v1.2.3 From 1c8d079e205919b24e04efdc2421c18d03f078ff Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Aug 2011 15:32:09 -0600 Subject: mesa: fix out of bounds array access in rtgc debug code Fixes https://bugs.freedesktop.org/show_bug.cgi?id=39841 This would only be hit if someone set RGTC_DEBUG=1. --- src/mesa/main/texcompress_rgtc_tmp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/main/texcompress_rgtc_tmp.h b/src/mesa/main/texcompress_rgtc_tmp.h index c8bf082a158..48bbd374e08 100644 --- a/src/mesa/main/texcompress_rgtc_tmp.h +++ b/src/mesa/main/texcompress_rgtc_tmp.h @@ -181,7 +181,7 @@ static void TAG(encode_rgtc_chan)(TYPE *blkaddr, TYPE srccolors[4][4], fprintf(stderr, "%d ", alphaenc1[i]); } fprintf(stderr, "cutVals "); - for (i = 0; i < 8; i++) { + for (i = 0; i < 7; i++) { fprintf(stderr, "%d ", acutValues[i]); } fprintf(stderr, "srcVals "); -- cgit v1.2.3 From d7f2e38fca38a5521e930242be46be5a70a9cbd3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Aug 2011 15:55:13 -0600 Subject: mesa: add st_glsl_to_tgsi.cpp to Sconscript --- src/mesa/SConscript | 1 + 1 file changed, 1 insertion(+) (limited to 'src/mesa') diff --git a/src/mesa/SConscript b/src/mesa/SConscript index 24e2155c387..cbd16625186 100644 --- a/src/mesa/SConscript +++ b/src/mesa/SConscript @@ -264,6 +264,7 @@ statetracker_sources = [ 'state_tracker/st_draw_feedback.c', 'state_tracker/st_extensions.c', 'state_tracker/st_format.c', + 'state_tracker/st_glsl_to_tgsi.cpp', 'state_tracker/st_gen_mipmap.c', 'state_tracker/st_manager.c', 'state_tracker/st_mesa_to_tgsi.c', -- cgit v1.2.3 From a0eb83401ef599e597b72e70c8856e1bc0f59dcc Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Aug 2011 15:55:50 -0600 Subject: mesa: use gl_constant_value type in _mesa_[Get]ProgramNamedParameter4fNV() --- src/mesa/main/nvprogram.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/nvprogram.c b/src/mesa/main/nvprogram.c index dd198b8141a..7ff7645b7b7 100644 --- a/src/mesa/main/nvprogram.c +++ b/src/mesa/main/nvprogram.c @@ -812,7 +812,7 @@ _mesa_ProgramNamedParameter4fNV(GLuint id, GLsizei len, const GLubyte *name, { struct gl_program *prog; struct gl_fragment_program *fragProg; - GLfloat *v; + gl_constant_value *v; GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); @@ -834,10 +834,10 @@ _mesa_ProgramNamedParameter4fNV(GLuint id, GLsizei len, const GLubyte *name, v = _mesa_lookup_parameter_value(fragProg->Base.Parameters, len, (char *) name); if (v) { - v[0] = x; - v[1] = y; - v[2] = z; - v[3] = w; + v[0].f = x; + v[1].f = y; + v[2].f = z; + v[3].f = w; return; } @@ -878,7 +878,7 @@ _mesa_GetProgramNamedParameterfvNV(GLuint id, GLsizei len, const GLubyte *name, { struct gl_program *prog; struct gl_fragment_program *fragProg; - const GLfloat *v; + const gl_constant_value *v; GET_CURRENT_CONTEXT(ctx); @@ -899,10 +899,10 @@ _mesa_GetProgramNamedParameterfvNV(GLuint id, GLsizei len, const GLubyte *name, v = _mesa_lookup_parameter_value(fragProg->Base.Parameters, len, (char *) name); if (v) { - params[0] = v[0]; - params[1] = v[1]; - params[2] = v[2]; - params[3] = v[3]; + params[0] = v[0].f; + params[1] = v[1].f; + params[2] = v[2].f; + params[3] = v[3].f; return; } -- cgit v1.2.3 From 324857599b2a4735c86e54da9a1776c034dadf72 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Aug 2011 16:00:06 -0600 Subject: mesa: use gl_constant_value type in ARB program parser --- src/mesa/program/program_parse.y | 56 +++++++++++++++++++-------------------- src/mesa/program/program_parser.h | 3 ++- 2 files changed, 30 insertions(+), 29 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/program/program_parse.y b/src/mesa/program/program_parse.y index dbf5abaa617..dec35038be5 100644 --- a/src/mesa/program/program_parse.y +++ b/src/mesa/program/program_parse.y @@ -1854,64 +1854,64 @@ paramConstUse: paramConstScalarUse | paramConstVector; paramConstScalarDecl: signedFloatConstant { $$.count = 4; - $$.data[0] = $1; - $$.data[1] = $1; - $$.data[2] = $1; - $$.data[3] = $1; + $$.data[0].f = $1; + $$.data[1].f = $1; + $$.data[2].f = $1; + $$.data[3].f = $1; } ; paramConstScalarUse: REAL { $$.count = 1; - $$.data[0] = $1; - $$.data[1] = $1; - $$.data[2] = $1; - $$.data[3] = $1; + $$.data[0].f = $1; + $$.data[1].f = $1; + $$.data[2].f = $1; + $$.data[3].f = $1; } | INTEGER { $$.count = 1; - $$.data[0] = (float) $1; - $$.data[1] = (float) $1; - $$.data[2] = (float) $1; - $$.data[3] = (float) $1; + $$.data[0].f = (float) $1; + $$.data[1].f = (float) $1; + $$.data[2].f = (float) $1; + $$.data[3].f = (float) $1; } ; paramConstVector: '{' signedFloatConstant '}' { $$.count = 4; - $$.data[0] = $2; - $$.data[1] = 0.0f; - $$.data[2] = 0.0f; - $$.data[3] = 1.0f; + $$.data[0].f = $2; + $$.data[1].f = 0.0f; + $$.data[2].f = 0.0f; + $$.data[3].f = 1.0f; } | '{' signedFloatConstant ',' signedFloatConstant '}' { $$.count = 4; - $$.data[0] = $2; - $$.data[1] = $4; - $$.data[2] = 0.0f; - $$.data[3] = 1.0f; + $$.data[0].f = $2; + $$.data[1].f = $4; + $$.data[2].f = 0.0f; + $$.data[3].f = 1.0f; } | '{' signedFloatConstant ',' signedFloatConstant ',' signedFloatConstant '}' { $$.count = 4; - $$.data[0] = $2; - $$.data[1] = $4; - $$.data[2] = $6; - $$.data[3] = 1.0f; + $$.data[0].f = $2; + $$.data[1].f = $4; + $$.data[2].f = $6; + $$.data[3].f = 1.0f; } | '{' signedFloatConstant ',' signedFloatConstant ',' signedFloatConstant ',' signedFloatConstant '}' { $$.count = 4; - $$.data[0] = $2; - $$.data[1] = $4; - $$.data[2] = $6; - $$.data[3] = $8; + $$.data[0].f = $2; + $$.data[1].f = $4; + $$.data[2].f = $6; + $$.data[3].f = $8; } ; diff --git a/src/mesa/program/program_parser.h b/src/mesa/program/program_parser.h index 8e5aaee95e5..5637598f3b3 100644 --- a/src/mesa/program/program_parser.h +++ b/src/mesa/program/program_parser.h @@ -23,6 +23,7 @@ #pragma once #include "main/config.h" +#include "program/prog_parameter.h" struct gl_context; @@ -96,7 +97,7 @@ struct asm_symbol { struct asm_vector { unsigned count; - float data[4]; + gl_constant_value data[4]; }; -- cgit v1.2.3 From bf8d06c518a8e17e485b18ba03be3e1b45cc7327 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Aug 2011 16:01:27 -0600 Subject: mesa: pass correct constant type to _mesa_fetch_state() Fixes assorted warnings about float vs. gl_constant_value pointers. --- src/mesa/program/prog_statevars.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c index 16f9690e865..6aa2409e85e 100644 --- a/src/mesa/program/prog_statevars.c +++ b/src/mesa/program/prog_statevars.c @@ -1111,7 +1111,7 @@ _mesa_load_state_parameters(struct gl_context *ctx, if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) { _mesa_fetch_state(ctx, paramList->Parameters[i].StateIndexes, - paramList->ParameterValues[i]); + ¶mList->ParameterValues[i][0].f); } } } -- cgit v1.2.3 From a48118e510fcbb57634a7869cb628123fa8c3f2e Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Thu, 4 Aug 2011 18:04:44 -0700 Subject: mesa: Remove MSVC stdint typedefs from compiler.h. MSVC can now include the stdint.h at include/c99/stdint.h. --- src/mesa/main/compiler.h | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h index d736fdfc58a..ee7d0b2f880 100644 --- a/src/mesa/main/compiler.h +++ b/src/mesa/main/compiler.h @@ -60,29 +60,7 @@ extern "C" { /** * Get standard integer types */ -#if defined(_MSC_VER) - typedef __int8 int8_t; - typedef unsigned __int8 uint8_t; - typedef __int16 int16_t; - typedef unsigned __int16 uint16_t; - typedef __int32 int32_t; - typedef unsigned __int32 uint32_t; - typedef __int64 int64_t; - typedef unsigned __int64 uint64_t; - -# if defined(_WIN64) - typedef __int64 intptr_t; - typedef unsigned __int64 uintptr_t; -# else - typedef __int32 intptr_t; - typedef unsigned __int32 uintptr_t; -# endif - -# define INT64_C(__val) __val##i64 -# define UINT64_C(__val) __val##ui64 -#else -# include -#endif +#include /** -- cgit v1.2.3 From c251d83d916336f95109363e919920a024947230 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Thu, 4 Aug 2011 07:38:13 +0200 Subject: vbo: do not call _mesa_max_buffer_index in debug builds That code drops performance in Unigine Heaven and Tropics by a factor of 10. That's too crazy even for a debug build. NOTE: This is a candidate for the 7.11 branch. Reviewed-by: Brian Paul --- src/mesa/vbo/vbo_exec_array.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index b908d5aea7e..32ce0e4a8ff 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -909,11 +909,10 @@ vbo_exec_DrawRangeElementsBaseVertex(GLenum mode, if (0) _mesa_print_arrays(ctx); -#ifdef DEBUG /* 'end' was out of bounds, but now let's check the actual array * indexes to see if any of them are out of bounds. */ - { + if (0) { GLuint max = _mesa_max_buffer_index(ctx, count, type, indices, ctx->Array.ElementArrayBufferObj); if (max >= ctx->Array.ArrayObj->_MaxElement) { @@ -934,7 +933,6 @@ vbo_exec_DrawRangeElementsBaseVertex(GLenum mode, * upper bound wrong. */ } -#endif /* Set 'end' to the max possible legal value */ assert(ctx->Array.ArrayObj->_MaxElement >= 1); -- cgit v1.2.3 From 6e7942936c5de59f509779b6f7620d80d2fbc21a Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 5 Aug 2011 06:57:07 +0200 Subject: st/mesa: remove unused-but-set variables in st_glsl_to_tgsi.cpp --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 9c6a7ed738a..460bafb3821 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1725,7 +1725,6 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) if (index) { src.index += index->value.i[0] * element_size; } else { - st_src_reg array_base = this->result; /* Variable index array dereference. It eats the "vec4" of the * base of the array and an index that offsets the TGSI register * index. @@ -2463,7 +2462,7 @@ glsl_to_tgsi_visitor::visit(ir_discard *ir) void glsl_to_tgsi_visitor::visit(ir_if *ir) { - glsl_to_tgsi_instruction *cond_inst, *if_inst, *else_inst = NULL; + glsl_to_tgsi_instruction *cond_inst, *if_inst; glsl_to_tgsi_instruction *prev_inst; prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); @@ -2495,7 +2494,7 @@ glsl_to_tgsi_visitor::visit(ir_if *ir) visit_exec_list(&ir->then_instructions, this); if (!ir->else_instructions.is_empty()) { - else_inst = emit(ir->condition, TGSI_OPCODE_ELSE); + emit(ir->condition, TGSI_OPCODE_ELSE); visit_exec_list(&ir->else_instructions, this); } -- cgit v1.2.3 From 0722edc59cd526437c2d4bad474b934dad84d789 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 28 Jul 2011 09:57:19 -0700 Subject: i965/fs: Don't allocate the old backend's compile structs for our compile. This saves some 35MB when the program only uses GLSL shaders. --- src/mesa/drivers/dri/i965/brw_wm.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index b0dfdd536aa..d13ac6124c8 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -206,10 +206,6 @@ bool do_wm_prog(struct brw_context *brw, */ return false; } - c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN); - c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN); - c->vreg = rzalloc_array(c, struct brw_wm_value, BRW_WM_MAX_VREG); - c->refs = rzalloc_array(c, struct brw_wm_ref, BRW_WM_MAX_REF); } else { void *instruction = c->instruction; void *prog_instructions = c->prog_instructions; @@ -232,6 +228,13 @@ bool do_wm_prog(struct brw_context *brw, if (!brw_wm_fs_emit(brw, c, prog)) return false; } else { + if (!c->instruction) { + c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN); + c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN); + c->vreg = rzalloc_array(c, struct brw_wm_value, BRW_WM_MAX_VREG); + c->refs = rzalloc_array(c, struct brw_wm_ref, BRW_WM_MAX_REF); + } + /* Fallback for fixed function and ARB_fp shaders. */ c->dispatch_width = 16; brw_wm_payload_setup(brw, c); -- cgit v1.2.3 From ee0373b833155804bb8846c6f05f897b9ee5afa6 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 25 Jul 2011 18:13:04 -0700 Subject: i965/fs: Don't upload unused uniform components. This saves both register space and upload bandwidth for unused values. Note that previously we were relying on the visitor not initially generating references to different sets of uniforms between the 8-wide and 16-wide code generation, and now we're relying on them dead-code eliminating the same stuff, too. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 89 ++++++++++++++++++++++++++++++++++-- src/mesa/drivers/dri/i965/brw_fs.h | 10 +++- 2 files changed, 95 insertions(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 02041b3bc03..f55be022f72 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -242,11 +242,12 @@ import_uniforms_callback(const void *key, * This brings in those uniform definitions */ void -fs_visitor::import_uniforms(struct hash_table *src_variable_ht) +fs_visitor::import_uniforms(fs_visitor *v) { - hash_table_call_foreach(src_variable_ht, + hash_table_call_foreach(v->variable_ht, import_uniforms_callback, variable_ht); + this->params_remap = v->params_remap; } /* Our support for uniforms is piggy-backed on the struct @@ -798,6 +799,86 @@ fs_visitor::split_virtual_grfs() this->live_intervals_valid = false; } +bool +fs_visitor::remove_dead_constants() +{ + if (c->dispatch_width == 8) { + this->params_remap = ralloc_array(mem_ctx, int, c->prog_data.nr_params); + + for (unsigned int i = 0; i < c->prog_data.nr_params; i++) + this->params_remap[i] = -1; + + /* Find which params are still in use. */ + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; + + for (int i = 0; i < 3; i++) { + int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; + + if (inst->src[i].file != UNIFORM) + continue; + + assert(constant_nr < (int)c->prog_data.nr_params); + + /* For now, set this to non-negative. We'll give it the + * actual new number in a moment, in order to keep the + * register numbers nicely ordered. + */ + this->params_remap[constant_nr] = 0; + } + } + + /* Figure out what the new numbers for the params will be. At some + * point when we're doing uniform array access, we're going to want + * to keep the distinction between .reg and .reg_offset, but for + * now we don't care. + */ + unsigned int new_nr_params = 0; + for (unsigned int i = 0; i < c->prog_data.nr_params; i++) { + if (this->params_remap[i] != -1) { + this->params_remap[i] = new_nr_params++; + } + } + + /* Update the list of params to be uploaded to match our new numbering. */ + for (unsigned int i = 0; i < c->prog_data.nr_params; i++) { + int remapped = this->params_remap[i]; + + if (remapped == -1) + continue; + + /* We've already done setup_paramvalues_refs() so no need to worry + * about param_index and param_offset. + */ + c->prog_data.param[remapped] = c->prog_data.param[i]; + c->prog_data.param_convert[remapped] = c->prog_data.param_convert[i]; + } + + c->prog_data.nr_params = new_nr_params; + } else { + /* This should have been generated in the 8-wide pass already. */ + assert(this->params_remap); + } + + /* Now do the renumbering of the shader to remove unused params. */ + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; + + for (int i = 0; i < 3; i++) { + int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; + + if (inst->src[i].file != UNIFORM) + continue; + + assert(this->params_remap[constant_nr] != -1); + inst->src[i].hw_reg = this->params_remap[constant_nr]; + inst->src[i].reg_offset = 0; + } + } + + return true; +} + /** * Choose accesses from the UNIFORM file to demote to using the pull * constant buffer. @@ -1624,6 +1705,8 @@ fs_visitor::run() progress = dead_code_eliminate() || progress; } while (progress); + remove_dead_constants(); + schedule_instructions(); assign_curb_setup(); @@ -1702,7 +1785,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c, if (intel->gen >= 5 && c->prog_data.nr_pull_params == 0) { c->dispatch_width = 16; fs_visitor v2(c, prog, shader); - v2.import_uniforms(v.variable_ht); + v2.import_uniforms(&v); v2.run(); } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 89d6cda7e4f..96e1420038f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -421,7 +421,7 @@ public: fs_reg *variable_storage(ir_variable *var); int virtual_grf_alloc(int size); - void import_uniforms(struct hash_table *src_variable_ht); + void import_uniforms(fs_visitor *v); void visit(ir_variable *ir); void visit(ir_assignment *ir); @@ -489,6 +489,7 @@ public: bool register_coalesce(); bool compute_to_mrf(); bool dead_code_eliminate(); + bool remove_dead_constants(); bool remove_duplicate_mrf_writes(); bool virtual_grf_interferes(int a, int b); void schedule_instructions(); @@ -566,6 +567,13 @@ public: int *virtual_grf_use; bool live_intervals_valid; + /* This is the map from UNIFORM hw_reg + reg_offset as generated by + * the visitor to the packed uniform number after + * remove_dead_constants() that represents the actual uploaded + * uniform index. + */ + int *params_remap; + struct hash_table *variable_ht; ir_variable *frag_color, *frag_data, *frag_depth; int first_non_payload_grf; -- cgit v1.2.3 From 69dc529da241747888efefdf0d3e58479dd6248c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 28 Jul 2011 09:52:03 -0700 Subject: mesa: Remove dead "MemPool" field of gl_shader_state. --- src/mesa/main/mtypes.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index b88118366b2..2d5f44c1e7b 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2252,8 +2252,6 @@ struct gl_shader_state */ struct gl_shader_program *ActiveProgram; - void *MemPool; - GLbitfield Flags; /**< Mask of GLSL_x flags */ }; -- cgit v1.2.3 From 9998df36c271810ecf20041bf6bed28f3952a94f Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 25 Jul 2011 18:15:25 -0700 Subject: i965: Add dumping for gen6 WM constants too. This looks just like the VS dump for now. --- src/mesa/drivers/dri/i965/brw_context.h | 1 + src/mesa/drivers/dri/i965/brw_state_dump.c | 20 ++++++++++++++++++++ src/mesa/drivers/dri/i965/gen6_wm_state.c | 2 +- 3 files changed, 22 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 471015cf9d0..22baf978ad4 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -212,6 +212,7 @@ enum state_struct_type { AUB_TRACE_BINDING_TABLE = 0x101, AUB_TRACE_SURFACE_STATE = 0x102, AUB_TRACE_VS_CONSTANTS = 0x103, + AUB_TRACE_WM_CONSTANTS = 0x104, }; /** Subclass of Mesa vertex program */ diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index b9e5cc1a534..cb7a3ef73d3 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -455,6 +455,23 @@ dump_vs_constants(struct brw_context *brw, uint32_t offset, uint32_t size) } } +static void +dump_wm_constants(struct brw_context *brw, uint32_t offset, uint32_t size) +{ + const char *name = "WM_CONST"; + struct intel_context *intel = &brw->intel; + uint32_t *as_uint = intel->batch.bo->virtual + offset; + float *as_float = intel->batch.bo->virtual + offset; + int i; + + for (i = 0; i < size / 4; i += 4) { + batch_out(brw, name, offset, i, "%3d: (% f % f % f % f) (0x%08x 0x%08x 0x%08x 0x%08x)\n", + i / 4, + as_float[i], as_float[i + 1], as_float[i + 2], as_float[i + 3], + as_uint[i], as_uint[i + 1], as_uint[i + 2], as_uint[i + 3]); + } +} + static void dump_binding_table(struct brw_context *brw, uint32_t offset, uint32_t size) { @@ -602,6 +619,9 @@ dump_state_batch(struct brw_context *brw) case AUB_TRACE_VS_CONSTANTS: dump_vs_constants(brw, offset, size); break; + case AUB_TRACE_WM_CONSTANTS: + dump_wm_constants(brw, offset, size); + break; default: break; } diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 185da9c355f..3d525248f25 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -54,7 +54,7 @@ gen6_prepare_wm_push_constants(struct brw_context *brw) float *constants; unsigned int i; - constants = brw_state_batch(brw, AUB_TRACE_NO_TYPE, + constants = brw_state_batch(brw, AUB_TRACE_WM_CONSTANTS, brw->wm.prog_data->nr_params * sizeof(float), 32, &brw->wm.push_const_offset); -- cgit v1.2.3 From 6bd5f43f212962a054a41290b0f8e350dae2f40d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 22 Jul 2011 15:13:08 -0700 Subject: prog_optimize: Add support for saturates to _mesa_merge_mov_into_inst. This fixes the remaining regression from ff_fragment_shader in Mesa IR instruction count, to now being a 1.9% win overall. --- src/mesa/program/prog_optimize.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c index f4a7a638d5f..3340ce0498b 100644 --- a/src/mesa/program/prog_optimize.c +++ b/src/mesa/program/prog_optimize.c @@ -472,8 +472,7 @@ can_downward_mov_be_modifed(const struct prog_instruction *mov) mov->SrcReg[0].HasIndex2 == 0 && mov->SrcReg[0].RelAddr2 == 0 && mov->DstReg.RelAddr == 0 && - mov->DstReg.CondMask == COND_TR && - mov->SaturateMode == SATURATE_OFF; + mov->DstReg.CondMask == COND_TR; } @@ -482,7 +481,8 @@ can_upward_mov_be_modifed(const struct prog_instruction *mov) { return can_downward_mov_be_modifed(mov) && - mov->DstReg.File == PROGRAM_TEMPORARY; + mov->DstReg.File == PROGRAM_TEMPORARY && + mov->SaturateMode == SATURATE_OFF; } @@ -657,6 +657,8 @@ _mesa_merge_mov_into_inst(struct prog_instruction *inst, if (mask != (inst->DstReg.WriteMask & mask)) return GL_FALSE; + inst->SaturateMode |= mov->SaturateMode; + /* Depending on the instruction, we may need to recompute the swizzles. * Also, some other instructions (like TEX) are not linear. We will only * consider completely active sources and destinations -- cgit v1.2.3 From 62722d90af9d43d889af33b080a682f2004e049c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 22 Jul 2011 13:54:15 -0700 Subject: ir_to_mesa: Try to avoid emitting a MOV_SAT to saturate an expression tree. Fixes a regression in codegen quality for ff_fragment_shader conversion to GLSL -- glean texCombine produces 7.5% fewer Mesa IR instructions. --- src/mesa/program/ir_to_mesa.cpp | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index debadb9a398..9b615b68a23 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -915,10 +915,30 @@ ir_to_mesa_visitor::try_emit_sat(ir_expression *ir) sat_src->accept(this); src_reg src = this->result; - this->result = get_temp(ir->type); - ir_to_mesa_instruction *inst; - inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src); - inst->saturate = true; + /* If we generated an expression instruction into a temporary in + * processing the saturate's operand, apply the saturate to that + * instruction. Otherwise, generate a MOV to do the saturate. + * + * Note that we have to be careful to only do this optimization if + * the instruction in question was what generated src->result. For + * example, ir_dereference_array might generate a MUL instruction + * to create the reladdr, and return us a src reg using that + * reladdr. That MUL result is not the value we're trying to + * saturate. + */ + ir_expression *sat_src_expr = sat_src->as_expression(); + ir_to_mesa_instruction *new_inst; + new_inst = (ir_to_mesa_instruction *)this->instructions.get_tail(); + if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul || + sat_src_expr->operation == ir_binop_add || + sat_src_expr->operation == ir_binop_dot)) { + new_inst->saturate = true; + } else { + this->result = get_temp(ir->type); + ir_to_mesa_instruction *inst; + inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src); + inst->saturate = true; + } return true; } -- cgit v1.2.3 From 4c7e215c7bb09f827df630cbfc80e87869351f18 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 2 May 2011 16:27:46 -0700 Subject: ir_to_mesa: Replace open-coded swizzle_for_size() --- src/mesa/program/ir_to_mesa.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 9b615b68a23..1ef609fe15d 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -641,8 +641,6 @@ src_reg ir_to_mesa_visitor::get_temp(const glsl_type *type) { src_reg src; - int swizzle[4]; - int i; src.file = PROGRAM_TEMPORARY; src.index = next_temp; @@ -652,12 +650,7 @@ ir_to_mesa_visitor::get_temp(const glsl_type *type) if (type->is_array() || type->is_record()) { src.swizzle = SWIZZLE_NOOP; } else { - for (i = 0; i < type->vector_elements; i++) - swizzle[i] = i; - for (; i < 4; i++) - swizzle[i] = type->vector_elements - 1; - src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], - swizzle[2], swizzle[3]); + src.swizzle = swizzle_for_size(type->vector_elements); } src.negate = 0; -- cgit v1.2.3 From b44648c9186d403abaeeeb3190d6759f951a49e4 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 5 Aug 2011 14:09:37 -0500 Subject: glsl_to_tgsi: try to avoid emitting a MOV_SAT to saturate an expression tree This is a port of commit 62722d9 to glsl_to_tgsi, with minor aesthetic changes (moved the declaration and assignment of new_inst inside the if block). --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 32 ++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 460bafb3821..e10243add8a 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1232,12 +1232,32 @@ glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) sat_src->accept(this); st_src_reg src = this->result; - this->result = get_temp(ir->type); - st_dst_reg result_dst = st_dst_reg(this->result); - result_dst.writemask = (1 << ir->type->vector_elements) - 1; - glsl_to_tgsi_instruction *inst; - inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src); - inst->saturate = true; + /* If we generated an expression instruction into a temporary in + * processing the saturate's operand, apply the saturate to that + * instruction. Otherwise, generate a MOV to do the saturate. + * + * Note that we have to be careful to only do this optimization if + * the instruction in question was what generated src->result. For + * example, ir_dereference_array might generate a MUL instruction + * to create the reladdr, and return us a src reg using that + * reladdr. That MUL result is not the value we're trying to + * saturate. + */ + ir_expression *sat_src_expr = sat_src->as_expression(); + if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul || + sat_src_expr->operation == ir_binop_add || + sat_src_expr->operation == ir_binop_dot)) { + glsl_to_tgsi_instruction *new_inst; + new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); + new_inst->saturate = true; + } else { + this->result = get_temp(ir->type); + st_dst_reg result_dst = st_dst_reg(this->result); + result_dst.writemask = (1 << ir->type->vector_elements) - 1; + glsl_to_tgsi_instruction *inst; + inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src); + inst->saturate = true; + } return true; } -- cgit v1.2.3 From 5164244df02f33d6ad9e0a286f4b6d6af2dfbc75 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Fri, 5 Aug 2011 14:37:33 -0500 Subject: glsl_to_tgsi: replace open-coded swizzle_for_size() This is a port of commit 4c7e215c7bb to glsl_to_tgsi. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index e10243add8a..d7a1ba80e1d 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -949,8 +949,6 @@ st_src_reg glsl_to_tgsi_visitor::get_temp(const glsl_type *type) { st_src_reg src; - int swizzle[4]; - int i; src.type = glsl_version >= 130 ? type->base_type : GLSL_TYPE_FLOAT; src.file = PROGRAM_TEMPORARY; @@ -961,12 +959,7 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type) if (type->is_array() || type->is_record()) { src.swizzle = SWIZZLE_NOOP; } else { - for (i = 0; i < type->vector_elements; i++) - swizzle[i] = i; - for (; i < 4; i++) - swizzle[i] = type->vector_elements - 1; - src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], - swizzle[2], swizzle[3]); + src.swizzle = swizzle_for_size(type->vector_elements); } src.negate = 0; -- cgit v1.2.3 From a9e97d022cb68266639eb54947517454c8ffe45e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 12:47:25 -0700 Subject: intel: Fix warnings from gl_constant_parameter changes. --- src/mesa/drivers/dri/i915/i915_fragprog.c | 6 ++---- src/mesa/drivers/dri/i965/brw_wm_fp.c | 10 +++++----- src/mesa/drivers/dri/i965/brw_wm_pass0.c | 4 ++-- 3 files changed, 9 insertions(+), 11 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 32050cebf33..d155b85ffca 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -175,10 +175,8 @@ src_vector(struct i915_fragment_program *p, case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: case PROGRAM_UNIFORM: - src = - i915_emit_param4fv(p, - program->Base.Parameters->ParameterValues[source-> - Index]); + src = i915_emit_param4fv(p, + &program->Base.Parameters->ParameterValues[source->Index][0].f); break; default: diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 7cd3edad235..d52a9581f5e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -535,15 +535,15 @@ static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c, GLfloat s3) { struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters; - GLfloat values[4]; + gl_constant_value values[4]; GLuint idx; GLuint swizzle; struct prog_src_register reg; - values[0] = s0; - values[1] = s1; - values[2] = s2; - values[3] = s3; + values[0].f = s0; + values[1].f = s1; + values[2].f = s2; + values[3].f = s3; idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle ); reg = src_reg(PROGRAM_STATE_VAR, idx); diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c index f78bdc31866..ccf9dc2bc18 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -205,14 +205,14 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, case PROGRAM_CONSTANT: /* These are invarient: */ - ref = get_const_ref(c, &plist->ParameterValues[idx][component]); + ref = get_const_ref(c, &plist->ParameterValues[idx][component].f); break; case PROGRAM_STATE_VAR: case PROGRAM_UNIFORM: /* These may change from run to run: */ - ref = get_param_ref(c, &plist->ParameterValues[idx][component] ); + ref = get_param_ref(c, &plist->ParameterValues[idx][component].f ); break; default: -- cgit v1.2.3 From db726b048e8858af226dbd0f0fda72d0be01394e Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Sat, 30 Jul 2011 21:26:26 -0700 Subject: mesa: In validate_program(), initialize errMsg for safety. validate_program relies on validate_shader_program to fill in errMsg; empirically, there exist cases where that doesn't happen. While tracking those down may be worthwhile, initializing the string so we don't try to ralloc_strdup random garbage also seems wise. Fixes issues caught by valgrind while running some test case. NOTE: This is a candidate for stable release branches. Reviewed-by: Chad Versace Signed-off-by: Kenneth Graunke --- src/mesa/main/shaderapi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index 8df25c3f988..74997eaaa77 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -1125,7 +1125,7 @@ static void validate_program(struct gl_context *ctx, GLuint program) { struct gl_shader_program *shProg; - char errMsg[100]; + char errMsg[100] = ""; shProg = _mesa_lookup_shader_program_err(ctx, program, "glValidateProgram"); if (!shProg) { -- cgit v1.2.3 From 425b179fafe93ddf4abacbccb67ed6aecbef6a7e Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 5 Aug 2011 20:10:04 +0200 Subject: st/mesa: don't resolve stencil twice --- src/mesa/state_tracker/st_cb_blit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c index 626db12431d..750f541b5dd 100644 --- a/src/mesa/state_tracker/st_cb_blit.c +++ b/src/mesa/state_tracker/st_cb_blit.c @@ -107,8 +107,10 @@ st_BlitFramebuffer_resolve(struct gl_context *ctx, dstRb = st_renderbuffer(dstDepth->Renderbuffer); info->mask = (mask & GL_DEPTH_BUFFER_BIT) ? PIPE_MASK_Z : 0; - if (combined && (mask & GL_STENCIL_BUFFER_BIT)) + if (combined && (mask & GL_STENCIL_BUFFER_BIT)) { + mask &= ~GL_STENCIL_BUFFER_BIT; info->mask |= PIPE_MASK_S; + } info->src.res = srcRb->texture; info->src.layer = srcRb->surface->u.tex.first_layer; -- cgit v1.2.3 From 8488112d20d49d3dc7fefef19c6e550e4b71661c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 5 Aug 2011 15:01:41 -0600 Subject: mesa: whitespace changes --- src/mesa/program/prog_parameter.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/program/prog_parameter.h b/src/mesa/program/prog_parameter.h index f858cf0fa0d..1a5ed343937 100644 --- a/src/mesa/program/prog_parameter.h +++ b/src/mesa/program/prog_parameter.h @@ -46,16 +46,19 @@ #define PROG_PARAM_BIT_CYL_WRAP 0x10 /**< XXX gallium debug */ /*@}*/ + /** * Actual data for constant values of parameters. */ -typedef union gl_constant_value { - GLfloat f; - GLboolean b; - GLint i; - GLuint u; +typedef union gl_constant_value +{ + GLfloat f; + GLboolean b; + GLint i; + GLuint u; } gl_constant_value; + /** * Program parameter. * Used by shaders/programs for uniforms, constants, varying vars, etc. -- cgit v1.2.3 From ffb7d02154186402f64e0b628998485309774bb8 Mon Sep 17 00:00:00 2001 From: Bryan Cain Date: Sun, 7 Aug 2011 14:15:35 -0500 Subject: st/mesa: inline st_prepare_fragment_program in st_translate_fragment_program This reverts an unnecessary part of commit 4683529048ee and fixes misrendering and an assertion failure in Cogs. Fixes freedesktop.org bug 39888. Reviewed-by: Brian Paul --- src/mesa/state_tracker/st_program.c | 326 ++++++++++++++++++------------------ src/mesa/state_tracker/st_program.h | 15 -- 2 files changed, 162 insertions(+), 179 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index ca01d2e1976..a4f47edfcd3 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -416,151 +416,6 @@ st_get_vp_variant(struct st_context *st, return vpv; } -/** - * Translate Mesa fragment shader attributes to TGSI attributes. - * \return GL_TRUE if color output should be written to all render targets, - * GL_FALSE if not - */ -GLboolean -st_prepare_fragment_program(struct gl_context *ctx, - struct st_fragment_program *stfp) -{ - GLuint attr; - const GLbitfield inputsRead = stfp->Base.Base.InputsRead; - GLboolean write_all = GL_FALSE; - - /* - * Convert Mesa program inputs to TGSI input register semantics. - */ - for (attr = 0; attr < FRAG_ATTRIB_MAX; attr++) { - if (inputsRead & (1 << attr)) { - const GLuint slot = stfp->num_inputs++; - - stfp->input_to_index[attr] = slot; - - switch (attr) { - case FRAG_ATTRIB_WPOS: - stfp->input_semantic_name[slot] = TGSI_SEMANTIC_POSITION; - stfp->input_semantic_index[slot] = 0; - stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR; - break; - case FRAG_ATTRIB_COL0: - stfp->input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; - stfp->input_semantic_index[slot] = 0; - stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR; - break; - case FRAG_ATTRIB_COL1: - stfp->input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; - stfp->input_semantic_index[slot] = 1; - stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR; - break; - case FRAG_ATTRIB_FOGC: - stfp->input_semantic_name[slot] = TGSI_SEMANTIC_FOG; - stfp->input_semantic_index[slot] = 0; - stfp->interp_mode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; - break; - case FRAG_ATTRIB_FACE: - stfp->input_semantic_name[slot] = TGSI_SEMANTIC_FACE; - stfp->input_semantic_index[slot] = 0; - stfp->interp_mode[slot] = TGSI_INTERPOLATE_CONSTANT; - break; - /* In most cases, there is nothing special about these - * inputs, so adopt a convention to use the generic - * semantic name and the mesa FRAG_ATTRIB_ number as the - * index. - * - * All that is required is that the vertex shader labels - * its own outputs similarly, and that the vertex shader - * generates at least every output required by the - * fragment shader plus fixed-function hardware (such as - * BFC). - * - * There is no requirement that semantic indexes start at - * zero or be restricted to a particular range -- nobody - * should be building tables based on semantic index. - */ - case FRAG_ATTRIB_PNTC: - case FRAG_ATTRIB_TEX0: - case FRAG_ATTRIB_TEX1: - case FRAG_ATTRIB_TEX2: - case FRAG_ATTRIB_TEX3: - case FRAG_ATTRIB_TEX4: - case FRAG_ATTRIB_TEX5: - case FRAG_ATTRIB_TEX6: - case FRAG_ATTRIB_TEX7: - case FRAG_ATTRIB_VAR0: - default: - /* Actually, let's try and zero-base this just for - * readability of the generated TGSI. - */ - assert(attr >= FRAG_ATTRIB_TEX0); - stfp->input_semantic_index[slot] = (attr - FRAG_ATTRIB_TEX0); - stfp->input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; - if (attr == FRAG_ATTRIB_PNTC) - stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR; - else - stfp->interp_mode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; - break; - } - } - else { - stfp->input_to_index[attr] = -1; - } - } - - /* - * Semantics and mapping for outputs - */ - { - uint numColors = 0; - GLbitfield64 outputsWritten = stfp->Base.Base.OutputsWritten; - - /* if z is written, emit that first */ - if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { - stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_POSITION; - stfp->output_semantic_index[stfp->num_outputs] = 0; - stfp->result_to_output[FRAG_RESULT_DEPTH] = stfp->num_outputs; - stfp->num_outputs++; - outputsWritten &= ~(1 << FRAG_RESULT_DEPTH); - } - - if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) { - stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_STENCIL; - stfp->output_semantic_index[stfp->num_outputs] = 0; - stfp->result_to_output[FRAG_RESULT_STENCIL] = stfp->num_outputs; - stfp->num_outputs++; - outputsWritten &= ~(1 << FRAG_RESULT_STENCIL); - } - - /* handle remaning outputs (color) */ - for (attr = 0; attr < FRAG_RESULT_MAX; attr++) { - if (outputsWritten & BITFIELD64_BIT(attr)) { - switch (attr) { - case FRAG_RESULT_DEPTH: - case FRAG_RESULT_STENCIL: - /* handled above */ - assert(0); - break; - case FRAG_RESULT_COLOR: - write_all = GL_TRUE; /* fallthrough */ - default: - assert(attr == FRAG_RESULT_COLOR || - (FRAG_RESULT_DATA0 <= attr && attr < FRAG_RESULT_MAX)); - stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_COLOR; - stfp->output_semantic_index[stfp->num_outputs] = numColors; - stfp->result_to_output[attr] = stfp->num_outputs; - numColors++; - break; - } - - stfp->num_outputs++; - } - } - } - - return write_all; -} - /** * Translate a Mesa fragment shader into a TGSI shader using extra info in @@ -613,12 +468,155 @@ st_translate_fragment_program(struct st_context *st, if (!stfp->tgsi.tokens) { /* need to translate Mesa instructions to TGSI now */ + GLuint outputMapping[FRAG_RESULT_MAX]; + GLuint inputMapping[FRAG_ATTRIB_MAX]; + GLuint interpMode[PIPE_MAX_SHADER_INPUTS]; /* XXX size? */ + GLuint attr; + const GLbitfield inputsRead = stfp->Base.Base.InputsRead; struct ureg_program *ureg; - GLboolean write_all = st_prepare_fragment_program(st->ctx, stfp); + + GLboolean write_all = GL_FALSE; + + ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; + ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; + uint fs_num_inputs = 0; + + ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; + ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; + uint fs_num_outputs = 0; if (!stfp->glsl_to_tgsi) _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT); + /* + * Convert Mesa program inputs to TGSI input register semantics. + */ + for (attr = 0; attr < FRAG_ATTRIB_MAX; attr++) { + if (inputsRead & (1 << attr)) { + const GLuint slot = fs_num_inputs++; + + inputMapping[attr] = slot; + + switch (attr) { + case FRAG_ATTRIB_WPOS: + input_semantic_name[slot] = TGSI_SEMANTIC_POSITION; + input_semantic_index[slot] = 0; + interpMode[slot] = TGSI_INTERPOLATE_LINEAR; + break; + case FRAG_ATTRIB_COL0: + input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; + input_semantic_index[slot] = 0; + interpMode[slot] = TGSI_INTERPOLATE_LINEAR; + break; + case FRAG_ATTRIB_COL1: + input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; + input_semantic_index[slot] = 1; + interpMode[slot] = TGSI_INTERPOLATE_LINEAR; + break; + case FRAG_ATTRIB_FOGC: + input_semantic_name[slot] = TGSI_SEMANTIC_FOG; + input_semantic_index[slot] = 0; + interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; + break; + case FRAG_ATTRIB_FACE: + input_semantic_name[slot] = TGSI_SEMANTIC_FACE; + input_semantic_index[slot] = 0; + interpMode[slot] = TGSI_INTERPOLATE_CONSTANT; + break; + /* In most cases, there is nothing special about these + * inputs, so adopt a convention to use the generic + * semantic name and the mesa FRAG_ATTRIB_ number as the + * index. + * + * All that is required is that the vertex shader labels + * its own outputs similarly, and that the vertex shader + * generates at least every output required by the + * fragment shader plus fixed-function hardware (such as + * BFC). + * + * There is no requirement that semantic indexes start at + * zero or be restricted to a particular range -- nobody + * should be building tables based on semantic index. + */ + case FRAG_ATTRIB_PNTC: + case FRAG_ATTRIB_TEX0: + case FRAG_ATTRIB_TEX1: + case FRAG_ATTRIB_TEX2: + case FRAG_ATTRIB_TEX3: + case FRAG_ATTRIB_TEX4: + case FRAG_ATTRIB_TEX5: + case FRAG_ATTRIB_TEX6: + case FRAG_ATTRIB_TEX7: + case FRAG_ATTRIB_VAR0: + default: + /* Actually, let's try and zero-base this just for + * readability of the generated TGSI. + */ + assert(attr >= FRAG_ATTRIB_TEX0); + input_semantic_index[slot] = (attr - FRAG_ATTRIB_TEX0); + input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; + if (attr == FRAG_ATTRIB_PNTC) + interpMode[slot] = TGSI_INTERPOLATE_LINEAR; + else + interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; + break; + } + } + else { + inputMapping[attr] = -1; + } + } + + /* + * Semantics and mapping for outputs + */ + { + uint numColors = 0; + GLbitfield64 outputsWritten = stfp->Base.Base.OutputsWritten; + + /* if z is written, emit that first */ + if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { + fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION; + fs_output_semantic_index[fs_num_outputs] = 0; + outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs; + fs_num_outputs++; + outputsWritten &= ~(1 << FRAG_RESULT_DEPTH); + } + + if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) { + fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL; + fs_output_semantic_index[fs_num_outputs] = 0; + outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs; + fs_num_outputs++; + outputsWritten &= ~(1 << FRAG_RESULT_STENCIL); + } + + /* handle remaning outputs (color) */ + for (attr = 0; attr < FRAG_RESULT_MAX; attr++) { + if (outputsWritten & BITFIELD64_BIT(attr)) { + switch (attr) { + case FRAG_RESULT_DEPTH: + case FRAG_RESULT_STENCIL: + /* handled above */ + assert(0); + break; + case FRAG_RESULT_COLOR: + write_all = GL_TRUE; /* fallthrough */ + default: + assert(attr == FRAG_RESULT_COLOR || + (FRAG_RESULT_DATA0 <= attr && attr < FRAG_RESULT_MAX)); + fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR; + fs_output_semantic_index[fs_num_outputs] = numColors; + outputMapping[attr] = fs_num_outputs; + numColors++; + break; + } + + fs_num_outputs++; + } + } + } + ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT ); if (ureg == NULL) return NULL; @@ -638,32 +636,32 @@ st_translate_fragment_program(struct st_context *st, stfp->glsl_to_tgsi, &stfp->Base.Base, /* inputs */ - stfp->num_inputs, - stfp->input_to_index, - stfp->input_semantic_name, - stfp->input_semantic_index, - stfp->interp_mode, + fs_num_inputs, + inputMapping, + input_semantic_name, + input_semantic_index, + interpMode, /* outputs */ - stfp->num_outputs, - stfp->result_to_output, - stfp->output_semantic_name, - stfp->output_semantic_index, FALSE ); + fs_num_outputs, + outputMapping, + fs_output_semantic_name, + fs_output_semantic_index, FALSE ); else st_translate_mesa_program(st->ctx, TGSI_PROCESSOR_FRAGMENT, ureg, &stfp->Base.Base, /* inputs */ - stfp->num_inputs, - stfp->input_to_index, - stfp->input_semantic_name, - stfp->input_semantic_index, - stfp->interp_mode, + fs_num_inputs, + inputMapping, + input_semantic_name, + input_semantic_index, + interpMode, /* outputs */ - stfp->num_outputs, - stfp->result_to_output, - stfp->output_semantic_name, - stfp->output_semantic_index, FALSE ); + fs_num_outputs, + outputMapping, + fs_output_semantic_name, + fs_output_semantic_index, FALSE ); stfp->tgsi.tokens = ureg_get_tokens( ureg, NULL ); ureg_destroy( ureg ); diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index 67723de6d53..699b6e8ccb7 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -85,21 +85,6 @@ struct st_fragment_program { struct gl_fragment_program Base; struct glsl_to_tgsi_visitor* glsl_to_tgsi; - - /** maps a Mesa FRAG_ATTRIB_x to a packed TGSI input index */ - GLuint input_to_index[FRAG_ATTRIB_MAX]; - /** maps a TGSI input index back to a Mesa FRAG_ATTRIB_x */ - GLuint index_to_input[PIPE_MAX_SHADER_INPUTS]; - ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; - ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; - GLuint num_inputs; - GLuint interp_mode[PIPE_MAX_SHADER_INPUTS]; /* XXX size? */ - - /** Maps FRAG_RESULT_x to slot */ - GLuint result_to_output[FRAG_RESULT_MAX]; - ubyte output_semantic_name[FRAG_RESULT_MAX]; - ubyte output_semantic_index[FRAG_RESULT_MAX]; - GLuint num_outputs; struct pipe_shader_state tgsi; -- cgit v1.2.3 From afd1d857752b5c30a3082068f8bb9002e0c69699 Mon Sep 17 00:00:00 2001 From: Fabio Pedretti Date: Tue, 9 Aug 2011 08:08:59 -0600 Subject: swrast: silence unused var warnings Signed-off-by: Brian Paul --- src/mesa/swrast/s_span.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/swrast/s_span.c b/src/mesa/swrast/s_span.c index db102ac7946..9a91be39970 100644 --- a/src/mesa/swrast/s_span.c +++ b/src/mesa/swrast/s_span.c @@ -212,10 +212,10 @@ interpolate_active_attribs(struct gl_context *ctx, SWspan *span, GLbitfield attr static INLINE void interpolate_int_colors(struct gl_context *ctx, SWspan *span) { +#if CHAN_BITS != 32 const GLuint n = span->end; GLuint i; -#if CHAN_BITS != 32 ASSERT(!(span->arrayMask & SPAN_RGBA)); #endif -- cgit v1.2.3 From e6c64800cc8833fb4083a556c839b51e8ac84a8b Mon Sep 17 00:00:00 2001 From: Henri Verbeet Date: Tue, 9 Aug 2011 12:23:47 -0500 Subject: glsl_to_tgsi: improve assignment hack Fixes StarCraft 2 and Fallout 3 in Wine. --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index d7a1ba80e1d..aef23e7d207 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1994,15 +1994,17 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) } else if (ir->rhs->as_expression() && this->instructions.get_tail() && ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && - type_size(ir->lhs->type) == 1) { + type_size(ir->lhs->type) == 1 && + l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) { /* To avoid emitting an extra MOV when assigning an expression to a * variable, emit the last instruction of the expression again, but * replace the destination register with the target of the assignment. * Dead code elimination will remove the original instruction. */ - glsl_to_tgsi_instruction *inst; + glsl_to_tgsi_instruction *inst, *new_inst; inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); - emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); + new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); + new_inst->saturate = inst->saturate; } else { for (i = 0; i < type_size(ir->lhs->type); i++) { emit(ir, TGSI_OPCODE_MOV, l, r); -- cgit v1.2.3 From fa43477fa33c068915283d511b64e3d6470ccd73 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 4 May 2011 13:27:33 -0700 Subject: mesa: Add a convenience interface for register allocator conflicts setup. --- src/mesa/program/register_allocate.c | 21 +++++++++++++++++++++ src/mesa/program/register_allocate.h | 2 ++ 2 files changed, 23 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/program/register_allocate.c b/src/mesa/program/register_allocate.c index de96eb42c9b..f5b5174fc18 100644 --- a/src/mesa/program/register_allocate.c +++ b/src/mesa/program/register_allocate.c @@ -200,6 +200,27 @@ ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2) } } +/** + * Adds a conflict between base_reg and reg, and also between reg and + * anything that base_reg conflicts with. + * + * This can simplify code for setting up multiple register classes + * which are aggregates of some base hardware registers, compared to + * explicitly using ra_add_reg_conflict. + */ +void +ra_add_transitive_reg_conflict(struct ra_regs *regs, + unsigned int base_reg, unsigned int reg) +{ + int i; + + ra_add_reg_conflict(regs, reg, base_reg); + + for (i = 0; i < regs->regs[base_reg].num_conflicts; i++) { + ra_add_reg_conflict(regs, reg, regs->regs[base_reg].conflict_list[i]); + } +} + unsigned int ra_alloc_reg_class(struct ra_regs *regs) { diff --git a/src/mesa/program/register_allocate.h b/src/mesa/program/register_allocate.h index 5b95833f394..ee2e58a4756 100644 --- a/src/mesa/program/register_allocate.h +++ b/src/mesa/program/register_allocate.h @@ -40,6 +40,8 @@ struct ra_regs *ra_alloc_reg_set(unsigned int count); unsigned int ra_alloc_reg_class(struct ra_regs *regs); void ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2); +void ra_add_transitive_reg_conflict(struct ra_regs *regs, + unsigned int base_reg, unsigned int reg); void ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int reg); void ra_set_finalize(struct ra_regs *regs); /** @} */ -- cgit v1.2.3 From bbcf13adbe525bd389a65ba15dd7831a56b8b13c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 4 May 2011 13:31:01 -0700 Subject: i965/fs: Use the new convenience interface for setting up reg conflicts. That code I wrote was impenetrable, and hard to write the first time. This makes things a lot more obvious. --- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 29 ++++++----------------- 1 file changed, 7 insertions(+), 22 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 78daa491156..f246ac49660 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -157,29 +157,14 @@ fs_visitor::assign_regs() classes[i] = ra_alloc_reg_class(regs); for (int i_r = 0; i_r < class_reg_count[i]; i_r++) { - ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r); - } + int class_reg = class_base_reg[i] + i_r; - /* Add conflicts between our contiguous registers aliasing - * base regs and other register classes' contiguous registers - * that alias base regs, or the base regs themselves for classes[0]. - */ - for (int c = 0; c <= i; c++) { - for (int i_r = 0; i_r < class_reg_count[i]; i_r++) { - for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1)); - c_r < MIN2(class_reg_count[c], i_r + class_sizes[i]); - c_r++) { - - if (0) { - printf("%d/%d conflicts %d/%d\n", - class_sizes[i], first_assigned_grf + i_r, - class_sizes[c], first_assigned_grf + c_r); - } - - ra_add_reg_conflict(regs, - class_base_reg[i] + i_r, - class_base_reg[c] + c_r); - } + ra_class_add_reg(regs, classes[i], class_reg); + + for (int base_reg = i_r; + base_reg < i_r + class_sizes[i]; + base_reg++) { + ra_add_transitive_reg_conflict(regs, base_reg, class_reg); } } } -- cgit v1.2.3 From b76378d46a211521582cfab56dc05031a57502a6 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 4 May 2011 13:50:13 -0700 Subject: i965/fs: Eliminate the magic nature of virtual GRF 0. This was a debugging aid at one point -- virtual grf 0 should never be allocated, and it would be used if undefined register access occurred in codegen. However, it made the confusing register allocation code even more confusing by indexing things off of 1 all over. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 9 +++---- src/mesa/drivers/dri/i965/brw_fs.h | 2 +- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 33 +++++++++-------------- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 4 +-- 4 files changed, 17 insertions(+), 31 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index f55be022f72..d57a67cc4fc 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -181,9 +181,6 @@ fs_visitor::virtual_grf_alloc(int size) virtual_grf_array_size *= 2; virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, virtual_grf_array_size); - - /* This slot is always unused. */ - virtual_grf_sizes[0] = 0; } virtual_grf_sizes[virtual_grf_next] = size; return virtual_grf_next++; @@ -985,7 +982,7 @@ fs_visitor::calculate_live_intervals() } } else { for (unsigned int i = 0; i < 3; i++) { - if (inst->src[i].file == GRF && inst->src[i].reg != 0) { + if (inst->src[i].file == GRF) { int reg = inst->src[i].reg; if (!loop_depth) { @@ -1001,7 +998,7 @@ fs_visitor::calculate_live_intervals() } } } - if (inst->dst.file == GRF && inst->dst.reg != 0) { + if (inst->dst.file == GRF) { int reg = inst->dst.reg; if (!loop_depth) { @@ -1715,7 +1712,7 @@ fs_visitor::run() if (0) { /* Debug of register spilling: Go spill everything. */ int virtual_grf_count = virtual_grf_next; - for (int i = 1; i < virtual_grf_count; i++) { + for (int i = 0; i < virtual_grf_count; i++) { spill_reg(i); } } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 96e1420038f..0375f672bec 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -402,7 +402,7 @@ public: this->base_ir = NULL; this->virtual_grf_sizes = NULL; - this->virtual_grf_next = 1; + this->virtual_grf_next = 0; this->virtual_grf_array_size = 0; this->virtual_grf_def = NULL; this->virtual_grf_use = NULL; diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index f246ac49660..83dd629aafb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -50,7 +50,7 @@ extern "C" { static void assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width) { - if (reg->file == GRF && reg->reg != 0) { + if (reg->file == GRF) { assert(reg->reg_offset >= 0); reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width; reg->reg = 0; @@ -60,20 +60,17 @@ assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width) void fs_visitor::assign_regs_trivial() { - int last_grf = 0; - int hw_reg_mapping[this->virtual_grf_next]; + int hw_reg_mapping[this->virtual_grf_next + 1]; int i; int reg_width = c->dispatch_width / 8; - hw_reg_mapping[0] = 0; /* Note that compressed instructions require alignment to 2 registers. */ - hw_reg_mapping[1] = ALIGN(this->first_non_payload_grf, reg_width); - for (i = 2; i < this->virtual_grf_next; i++) { + hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width); + for (i = 1; i <= this->virtual_grf_next; i++) { hw_reg_mapping[i] = (hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1] * reg_width); } - last_grf = hw_reg_mapping[i - 1] + (this->virtual_grf_sizes[i - 1] * - reg_width); + this->grf_used = hw_reg_mapping[this->virtual_grf_next]; foreach_list(node, &this->instructions) { fs_inst *inst = (fs_inst *)node; @@ -83,12 +80,11 @@ fs_visitor::assign_regs_trivial() assign_reg(hw_reg_mapping, &inst->src[1], reg_width); } - if (last_grf >= BRW_MAX_GRF) { + if (this->grf_used >= BRW_MAX_GRF) { fail("Ran out of regs on trivial allocator (%d/%d)\n", - last_grf, BRW_MAX_GRF); + this->grf_used, BRW_MAX_GRF); } - this->grf_used = last_grf + reg_width; } bool @@ -101,7 +97,7 @@ fs_visitor::assign_regs() * for reg_width == 2. */ int reg_width = c->dispatch_width / 8; - int hw_reg_mapping[this->virtual_grf_next + 1]; + int hw_reg_mapping[this->virtual_grf_next]; int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width); int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width; int class_sizes[base_reg_count]; @@ -125,7 +121,7 @@ fs_visitor::assign_regs() */ class_sizes[class_count++] = 2; } - for (int r = 1; r < this->virtual_grf_next; r++) { + for (int r = 0; r < this->virtual_grf_next; r++) { int i; for (i = 0; i < class_count; i++) { @@ -195,12 +191,8 @@ fs_visitor::assign_regs() struct ra_graph *g = ra_alloc_interference_graph(regs, this->virtual_grf_next); - /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1 - * with nodes. - */ - ra_set_node_class(g, 0, classes[0]); - for (int i = 1; i < this->virtual_grf_next; i++) { + for (int i = 0; i < this->virtual_grf_next; i++) { for (int c = 0; c < class_count; c++) { if (class_sizes[c] == this->virtual_grf_sizes[i]) { if (aligned_pair_class >= 0 && @@ -213,7 +205,7 @@ fs_visitor::assign_regs() } } - for (int j = 1; j < i; j++) { + for (int j = 0; j < i; j++) { if (virtual_grf_interferes(i, j)) { ra_add_node_interference(g, i, j); } @@ -248,8 +240,7 @@ fs_visitor::assign_regs() * numbers. */ this->grf_used = first_assigned_grf; - hw_reg_mapping[0] = 0; /* unused */ - for (int i = 1; i < this->virtual_grf_next; i++) { + for (int i = 0; i < this->virtual_grf_next; i++) { int reg = ra_get_node_reg(g, i); int hw_reg = -1; diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 2b769ccbba1..2e3f9be75b4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -142,9 +142,7 @@ fs_visitor::visit(ir_dereference_array *ir) this->result.type = brw_type_for_base_type(ir->type); if (index) { - assert(this->result.file == UNIFORM || - (this->result.file == GRF && - this->result.reg != 0)); + assert(this->result.file == UNIFORM || this->result.file == GRF); this->result.reg_offset += index->value.i[0] * element_size; } else { assert(!"FINISHME: non-constant array element"); -- cgit v1.2.3 From 4e10d5825b31d2c58c0af3e29b7fc2eacb2b4709 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 5 May 2011 19:37:10 -0700 Subject: i965/fs: Simplify the register allocator using a map from RA reg to GRF. It's fewer pointers to track, and when we start caching the register set, should be algorithmically better in the cache hit case (lookup in a byte-per-register array, instead of a linear walk through desctiption of register classes to find how to translate that class). --- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 79 +++++++++++------------ 1 file changed, 38 insertions(+), 41 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 83dd629aafb..42ab66df6d8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -102,7 +102,7 @@ fs_visitor::assign_regs() int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width; int class_sizes[base_reg_count]; int class_count = 0; - int aligned_pair_class = -1; + int aligned_pairs_class = -1; calculate_live_intervals(); @@ -137,52 +137,59 @@ fs_visitor::assign_regs() } } + /* Compute the total number of registers across all classes. */ int ra_reg_count = 0; - int class_base_reg[class_count]; - int class_reg_count[class_count]; - int classes[class_count + 1]; - for (int i = 0; i < class_count; i++) { - class_base_reg[i] = ra_reg_count; - class_reg_count[i] = base_reg_count - (class_sizes[i] - 1); - ra_reg_count += class_reg_count[i]; + ra_reg_count += base_reg_count - (class_sizes[i] - 1); } struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count); + uint8_t ra_reg_to_grf[ra_reg_count]; + int classes[class_count + 1]; + + /* Now, add the registers to their classes, and add the conflicts + * between them and the base GRF registers (and also each other). + */ + int reg = 0; + int pairs_base_reg = 0; + int pairs_reg_count = 0; for (int i = 0; i < class_count; i++) { + int class_reg_count = base_reg_count - (class_sizes[i] - 1); classes[i] = ra_alloc_reg_class(regs); - for (int i_r = 0; i_r < class_reg_count[i]; i_r++) { - int class_reg = class_base_reg[i] + i_r; + /* Save this off for the aligned pair class at the end. */ + if (class_sizes[i] == 2) { + pairs_base_reg = reg; + pairs_reg_count = class_reg_count; + } + + for (int j = 0; j < class_reg_count; j++) { + ra_class_add_reg(regs, classes[i], reg); - ra_class_add_reg(regs, classes[i], class_reg); + ra_reg_to_grf[reg] = j; - for (int base_reg = i_r; - base_reg < i_r + class_sizes[i]; + for (int base_reg = j; + base_reg < j + class_sizes[i]; base_reg++) { - ra_add_transitive_reg_conflict(regs, base_reg, class_reg); + ra_add_transitive_reg_conflict(regs, base_reg, reg); } + + reg++; } } + assert(reg == ra_reg_count); /* Add a special class for aligned pairs, which we'll put delta_x/y * in on gen5 so that we can do PLN. */ if (brw->has_pln && reg_width == 1 && intel->gen < 6) { - int reg_count = (base_reg_count - 1) / 2; - int unaligned_pair_class = 1; - assert(class_sizes[unaligned_pair_class] == 2); - - aligned_pair_class = class_count; - classes[aligned_pair_class] = ra_alloc_reg_class(regs); - class_sizes[aligned_pair_class] = 2; - class_base_reg[aligned_pair_class] = 0; - class_reg_count[aligned_pair_class] = 0; - int start = (first_assigned_grf & 1) ? 1 : 0; - - for (int i = 0; i < reg_count; i++) { - ra_class_add_reg(regs, classes[aligned_pair_class], - class_base_reg[unaligned_pair_class] + i * 2 + start); + aligned_pairs_class = ra_alloc_reg_class(regs); + + for (int i = 0; i < pairs_reg_count; i++) { + if ((ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) { + ra_class_add_reg(regs, aligned_pairs_class, + pairs_base_reg + i); + } } class_count++; } @@ -195,9 +202,9 @@ fs_visitor::assign_regs() for (int i = 0; i < this->virtual_grf_next; i++) { for (int c = 0; c < class_count; c++) { if (class_sizes[c] == this->virtual_grf_sizes[i]) { - if (aligned_pair_class >= 0 && + if (aligned_pairs_class >= 0 && this->delta_x.reg == i) { - ra_set_node_class(g, i, classes[aligned_pair_class]); + ra_set_node_class(g, i, aligned_pairs_class); } else { ra_set_node_class(g, i, classes[c]); } @@ -242,18 +249,8 @@ fs_visitor::assign_regs() this->grf_used = first_assigned_grf; for (int i = 0; i < this->virtual_grf_next; i++) { int reg = ra_get_node_reg(g, i); - int hw_reg = -1; - - for (int c = 0; c < class_count; c++) { - if (reg >= class_base_reg[c] && - reg < class_base_reg[c] + class_reg_count[c]) { - hw_reg = reg - class_base_reg[c]; - break; - } - } - assert(hw_reg >= 0); - hw_reg_mapping[i] = first_assigned_grf + hw_reg * reg_width; + hw_reg_mapping[i] = first_assigned_grf + ra_reg_to_grf[reg] * reg_width; this->grf_used = MAX2(this->grf_used, hw_reg_mapping[i] + this->virtual_grf_sizes[i] * reg_width); -- cgit v1.2.3 From b1f0bffd399f377a19b0541e1d834afad8b9dad0 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 9 May 2011 09:56:18 -0700 Subject: i965/fs: Factor out the register allocator setup to a separate function. Besides separating out a logical step of the giant register allocator function, this now communicates a bunch of the allocator information through entries in brw_context, which will make this code partially reusable for caching the expensive allocator setup. --- src/mesa/drivers/dri/i965/brw_context.h | 23 ++++ src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 148 ++++++++++++---------- 2 files changed, 105 insertions(+), 66 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 22baf978ad4..cc11d06874d 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -748,6 +748,29 @@ struct brw_context * Pre-gen6, push constants live in the CURBE. */ uint32_t push_const_offset; + + /** @{ register allocator */ + + struct ra_regs *regs; + + /** Array of the ra classes for the unaligned contiguous + * register block sizes used. + */ + int *classes; + + /** + * Mapping for register-allocated objects in *regs to the first + * GRF for that object. + */ + uint8_t *ra_reg_to_grf; + + /** + * ra class for the aligned pairs we use for PLN, which doesn't + * appear in *classes. + */ + int aligned_pairs_class; + + /** @} */ } wm; diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 42ab66df6d8..8e44a010576 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -87,55 +87,14 @@ fs_visitor::assign_regs_trivial() } -bool -fs_visitor::assign_regs() +static void +brw_alloc_reg_set_for_classes(struct brw_context *brw, + int *class_sizes, + int class_count, + int reg_width, + int base_reg_count) { - /* Most of this allocation was written for a reg_width of 1 - * (dispatch_width == 8). In extending to 16-wide, the code was - * left in place and it was converted to have the hardware - * registers it's allocating be contiguous physical pairs of regs - * for reg_width == 2. - */ - int reg_width = c->dispatch_width / 8; - int hw_reg_mapping[this->virtual_grf_next]; - int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width); - int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width; - int class_sizes[base_reg_count]; - int class_count = 0; - int aligned_pairs_class = -1; - - calculate_live_intervals(); - - /* Set up the register classes. - * - * The base registers store a scalar value. For texture samples, - * we get virtual GRFs composed of 4 contiguous hw register. For - * structures and arrays, we store them as contiguous larger things - * than that, though we should be able to do better most of the - * time. - */ - class_sizes[class_count++] = 1; - if (brw->has_pln && intel->gen < 6) { - /* Always set up the (unaligned) pairs for gen5, so we can find - * them for making the aligned pair class. - */ - class_sizes[class_count++] = 2; - } - for (int r = 0; r < this->virtual_grf_next; r++) { - int i; - - for (i = 0; i < class_count; i++) { - if (class_sizes[i] == this->virtual_grf_sizes[r]) - break; - } - if (i == class_count) { - if (this->virtual_grf_sizes[r] >= base_reg_count) { - fail("Object too large to register allocate.\n"); - } - - class_sizes[class_count++] = this->virtual_grf_sizes[r]; - } - } + struct intel_context *intel = &brw->intel; /* Compute the total number of registers across all classes. */ int ra_reg_count = 0; @@ -143,9 +102,14 @@ fs_visitor::assign_regs() ra_reg_count += base_reg_count - (class_sizes[i] - 1); } - struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count); - uint8_t ra_reg_to_grf[ra_reg_count]; - int classes[class_count + 1]; + ralloc_free(brw->wm.ra_reg_to_grf); + brw->wm.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count); + ralloc_free(brw->wm.regs); + brw->wm.regs = ra_alloc_reg_set(ra_reg_count); + ralloc_free(brw->wm.classes); + brw->wm.classes = ralloc_array(brw, int, class_count + 1); + + brw->wm.aligned_pairs_class = -1; /* Now, add the registers to their classes, and add the conflicts * between them and the base GRF registers (and also each other). @@ -155,7 +119,7 @@ fs_visitor::assign_regs() int pairs_reg_count = 0; for (int i = 0; i < class_count; i++) { int class_reg_count = base_reg_count - (class_sizes[i] - 1); - classes[i] = ra_alloc_reg_class(regs); + brw->wm.classes[i] = ra_alloc_reg_class(brw->wm.regs); /* Save this off for the aligned pair class at the end. */ if (class_sizes[i] == 2) { @@ -164,14 +128,14 @@ fs_visitor::assign_regs() } for (int j = 0; j < class_reg_count; j++) { - ra_class_add_reg(regs, classes[i], reg); + ra_class_add_reg(brw->wm.regs, brw->wm.classes[i], reg); - ra_reg_to_grf[reg] = j; + brw->wm.ra_reg_to_grf[reg] = j; for (int base_reg = j; base_reg < j + class_sizes[i]; base_reg++) { - ra_add_transitive_reg_conflict(regs, base_reg, reg); + ra_add_transitive_reg_conflict(brw->wm.regs, base_reg, reg); } reg++; @@ -183,30 +147,83 @@ fs_visitor::assign_regs() * in on gen5 so that we can do PLN. */ if (brw->has_pln && reg_width == 1 && intel->gen < 6) { - aligned_pairs_class = ra_alloc_reg_class(regs); + brw->wm.aligned_pairs_class = ra_alloc_reg_class(brw->wm.regs); for (int i = 0; i < pairs_reg_count; i++) { - if ((ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) { - ra_class_add_reg(regs, aligned_pairs_class, + if ((brw->wm.ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) { + ra_class_add_reg(brw->wm.regs, brw->wm.aligned_pairs_class, pairs_base_reg + i); } } class_count++; } - ra_set_finalize(regs); + ra_set_finalize(brw->wm.regs); +} + +bool +fs_visitor::assign_regs() +{ + /* Most of this allocation was written for a reg_width of 1 + * (dispatch_width == 8). In extending to 16-wide, the code was + * left in place and it was converted to have the hardware + * registers it's allocating be contiguous physical pairs of regs + * for reg_width == 2. + */ + int reg_width = c->dispatch_width / 8; + int hw_reg_mapping[this->virtual_grf_next]; + int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width); + int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width; + int class_sizes[base_reg_count]; + int class_count = 0; + + calculate_live_intervals(); + + /* Set up the register classes. + * + * The base registers store a scalar value. For texture samples, + * we get virtual GRFs composed of 4 contiguous hw register. For + * structures and arrays, we store them as contiguous larger things + * than that, though we should be able to do better most of the + * time. + */ + class_sizes[class_count++] = 1; + if (brw->has_pln && intel->gen < 6) { + /* Always set up the (unaligned) pairs for gen5, so we can find + * them for making the aligned pair class. + */ + class_sizes[class_count++] = 2; + } + for (int r = 0; r < this->virtual_grf_next; r++) { + int i; + + for (i = 0; i < class_count; i++) { + if (class_sizes[i] == this->virtual_grf_sizes[r]) + break; + } + if (i == class_count) { + if (this->virtual_grf_sizes[r] >= base_reg_count) { + fail("Object too large to register allocate.\n"); + } + + class_sizes[class_count++] = this->virtual_grf_sizes[r]; + } + } + + brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, + reg_width, base_reg_count); - struct ra_graph *g = ra_alloc_interference_graph(regs, + struct ra_graph *g = ra_alloc_interference_graph(brw->wm.regs, this->virtual_grf_next); for (int i = 0; i < this->virtual_grf_next; i++) { for (int c = 0; c < class_count; c++) { if (class_sizes[c] == this->virtual_grf_sizes[i]) { - if (aligned_pairs_class >= 0 && + if (brw->wm.aligned_pairs_class >= 0 && this->delta_x.reg == i) { - ra_set_node_class(g, i, aligned_pairs_class); + ra_set_node_class(g, i, brw->wm.aligned_pairs_class); } else { - ra_set_node_class(g, i, classes[c]); + ra_set_node_class(g, i, brw->wm.classes[c]); } break; } @@ -237,7 +254,6 @@ fs_visitor::assign_regs() ralloc_free(g); - ralloc_free(regs); return false; } @@ -250,7 +266,8 @@ fs_visitor::assign_regs() for (int i = 0; i < this->virtual_grf_next; i++) { int reg = ra_get_node_reg(g, i); - hw_reg_mapping[i] = first_assigned_grf + ra_reg_to_grf[reg] * reg_width; + hw_reg_mapping[i] = (first_assigned_grf + + brw->wm.ra_reg_to_grf[reg] * reg_width); this->grf_used = MAX2(this->grf_used, hw_reg_mapping[i] + this->virtual_grf_sizes[i] * reg_width); @@ -265,7 +282,6 @@ fs_visitor::assign_regs() } ralloc_free(g); - ralloc_free(regs); return true; } -- cgit v1.2.3 From c9e81fe14f36933617c862efb15ae09194485eab Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 15 May 2011 09:36:19 -0700 Subject: i965: Drop the reg/hw_reg distinction. "reg" was set in only one case, virtual GRFs pre register allocation, and would be unset and have hw_reg set after allocation. Since we never bothered with looking at virtual GRF number after allocation anyway, just use the same storage and avoid confusion. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 34 +++++++++++----------- src/mesa/drivers/dri/i965/brw_fs.h | 21 +++++++------ src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 6 ++-- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 4 +-- .../dri/i965/brw_fs_schedule_instructions.cpp | 8 ++--- 5 files changed, 37 insertions(+), 36 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index d57a67cc4fc..cafb7092ac8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -187,20 +187,20 @@ fs_visitor::virtual_grf_alloc(int size) } /** Fixed HW reg constructor. */ -fs_reg::fs_reg(enum register_file file, int hw_reg) +fs_reg::fs_reg(enum register_file file, int reg) { init(); this->file = file; - this->hw_reg = hw_reg; + this->reg = reg; this->type = BRW_REGISTER_TYPE_F; } /** Fixed HW reg constructor. */ -fs_reg::fs_reg(enum register_file file, int hw_reg, uint32_t type) +fs_reg::fs_reg(enum register_file file, int reg, uint32_t type) { init(); this->file = file; - this->hw_reg = hw_reg; + this->reg = reg; this->type = type; } @@ -636,7 +636,7 @@ fs_visitor::assign_curb_setup() for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == UNIFORM) { - int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; + int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; struct brw_reg brw_reg = brw_vec1_grf(c->nr_payload_regs + constant_nr / 8, constant_nr % 8); @@ -810,7 +810,7 @@ fs_visitor::remove_dead_constants() fs_inst *inst = (fs_inst *)node; for (int i = 0; i < 3; i++) { - int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; + int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; if (inst->src[i].file != UNIFORM) continue; @@ -862,13 +862,13 @@ fs_visitor::remove_dead_constants() fs_inst *inst = (fs_inst *)node; for (int i = 0; i < 3; i++) { - int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; + int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; if (inst->src[i].file != UNIFORM) continue; assert(this->params_remap[constant_nr] != -1); - inst->src[i].hw_reg = this->params_remap[constant_nr]; + inst->src[i].reg = this->params_remap[constant_nr]; inst->src[i].reg_offset = 0; } } @@ -912,7 +912,7 @@ fs_visitor::setup_pull_constants() if (inst->src[i].file != UNIFORM) continue; - int uniform_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; + int uniform_nr = inst->src[i].reg + inst->src[i].reg_offset; if (uniform_nr < pull_uniform_base) continue; @@ -1374,9 +1374,9 @@ fs_visitor::compute_to_mrf() /* Work out which hardware MRF registers are written by this * instruction. */ - int mrf_low = inst->dst.hw_reg & ~BRW_MRF_COMPR4; + int mrf_low = inst->dst.reg & ~BRW_MRF_COMPR4; int mrf_high; - if (inst->dst.hw_reg & BRW_MRF_COMPR4) { + if (inst->dst.reg & BRW_MRF_COMPR4) { mrf_high = mrf_low + 4; } else if (c->dispatch_width == 16 && (!inst->force_uncompressed && !inst->force_sechalf)) { @@ -1443,7 +1443,7 @@ fs_visitor::compute_to_mrf() if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) { /* Found the creator of our MRF's source value. */ scan_inst->dst.file = MRF; - scan_inst->dst.hw_reg = inst->dst.hw_reg; + scan_inst->dst.reg = inst->dst.reg; scan_inst->saturate |= inst->saturate; inst->remove(); progress = true; @@ -1480,10 +1480,10 @@ fs_visitor::compute_to_mrf() /* If somebody else writes our MRF here, we can't * compute-to-MRF before that. */ - int scan_mrf_low = scan_inst->dst.hw_reg & ~BRW_MRF_COMPR4; + int scan_mrf_low = scan_inst->dst.reg & ~BRW_MRF_COMPR4; int scan_mrf_high; - if (scan_inst->dst.hw_reg & BRW_MRF_COMPR4) { + if (scan_inst->dst.reg & BRW_MRF_COMPR4) { scan_mrf_high = scan_mrf_low + 4; } else if (c->dispatch_width == 16 && (!scan_inst->force_uncompressed && @@ -1555,7 +1555,7 @@ fs_visitor::remove_duplicate_mrf_writes() if (inst->opcode == BRW_OPCODE_MOV && inst->dst.file == MRF) { - fs_inst *prev_inst = last_mrf_move[inst->dst.hw_reg]; + fs_inst *prev_inst = last_mrf_move[inst->dst.reg]; if (prev_inst && inst->equals(prev_inst)) { inst->remove(); progress = true; @@ -1565,7 +1565,7 @@ fs_visitor::remove_duplicate_mrf_writes() /* Clear out the last-write records for MRFs that were overwritten. */ if (inst->dst.file == MRF) { - last_mrf_move[inst->dst.hw_reg] = NULL; + last_mrf_move[inst->dst.reg] = NULL; } if (inst->mlen > 0) { @@ -1591,7 +1591,7 @@ fs_visitor::remove_duplicate_mrf_writes() inst->dst.file == MRF && inst->src[0].file == GRF && !inst->predicated) { - last_mrf_move[inst->dst.hw_reg] = inst; + last_mrf_move[inst->dst.reg] = inst; } } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 0375f672bec..4ec649014de 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -51,7 +51,7 @@ enum register_file { MRF = BRW_MESSAGE_REGISTER_FILE, IMM = BRW_IMMEDIATE_VALUE, FIXED_HW_REG, /* a struct brw_reg */ - UNIFORM, /* prog_data->params[hw_reg] */ + UNIFORM, /* prog_data->params[reg] */ BAD_FILE }; @@ -99,7 +99,6 @@ public: void init() { memset(this, 0, sizeof(*this)); - this->hw_reg = -1; this->smear = -1; } @@ -146,8 +145,8 @@ public: this->type = fixed_hw_reg.type; } - fs_reg(enum register_file file, int hw_reg); - fs_reg(enum register_file file, int hw_reg, uint32_t type); + fs_reg(enum register_file file, int reg); + fs_reg(enum register_file file, int reg, uint32_t type); fs_reg(class fs_visitor *v, const struct glsl_type *type); bool equals(fs_reg *r) @@ -155,7 +154,6 @@ public: return (file == r->file && reg == r->reg && reg_offset == r->reg_offset && - hw_reg == r->hw_reg && type == r->type && negate == r->negate && abs == r->abs && @@ -167,12 +165,17 @@ public: /** Register file: ARF, GRF, MRF, IMM. */ enum register_file file; - /** virtual register number. 0 = fixed hw reg */ + /** + * Register number. For ARF/MRF, it's the hardware register. For + * GRF, it's a virtual register number until register allocation + */ int reg; - /** Offset within the virtual register. */ + /** + * For virtual registers, this is a hardware register offset from + * the start of the register block (for example, a constant index + * in an array access). + */ int reg_offset; - /** HW register number. Generally unset until register allocation. */ - int hw_reg; /** Register type. BRW_REGISTER_TYPE_* */ int type; bool negate; diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 9fb0153d1f8..e168e541bef 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -538,11 +538,9 @@ brw_reg_from_fs_reg(fs_reg *reg) case ARF: case MRF: if (reg->smear == -1) { - brw_reg = brw_vec8_reg(reg->file, - reg->hw_reg, 0); + brw_reg = brw_vec8_reg(reg->file, reg->reg, 0); } else { - brw_reg = brw_vec1_reg(reg->file, - reg->hw_reg, reg->smear); + brw_reg = brw_vec1_reg(reg->file, reg->reg, reg->smear); } brw_reg = retype(brw_reg, reg->type); if (reg->sechalf) diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 8e44a010576..5c9cba99ae5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -52,8 +52,8 @@ assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width) { if (reg->file == GRF) { assert(reg->reg_offset >= 0); - reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width; - reg->reg = 0; + reg->reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width; + reg->reg_offset = 0; } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp index 9ec3f502764..f1a88fcfa79 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -321,12 +321,12 @@ instruction_scheduler::calculate_deps() add_dep(last_grf_write[inst->dst.reg], n); last_grf_write[inst->dst.reg] = n; } else if (inst->dst.file == MRF) { - int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4; + int reg = inst->dst.reg & ~BRW_MRF_COMPR4; add_dep(last_mrf_write[reg], n); last_mrf_write[reg] = n; if (is_compressed(inst)) { - if (inst->dst.hw_reg & BRW_MRF_COMPR4) + if (inst->dst.reg & BRW_MRF_COMPR4) reg += 4; else reg++; @@ -401,12 +401,12 @@ instruction_scheduler::calculate_deps() if (inst->dst.file == GRF) { last_grf_write[inst->dst.reg] = n; } else if (inst->dst.file == MRF) { - int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4; + int reg = inst->dst.reg & ~BRW_MRF_COMPR4; last_mrf_write[reg] = n; if (is_compressed(inst)) { - if (inst->dst.hw_reg & BRW_MRF_COMPR4) + if (inst->dst.reg & BRW_MRF_COMPR4) reg += 4; else reg++; -- cgit v1.2.3 From 09eeb0ff27005c0ffccd5cdbe46862e181a4ee6c Mon Sep 17 00:00:00 2001 From: Carl Simonson Date: Wed, 10 Aug 2011 11:10:43 -0700 Subject: i830: Add missing vtable entry for i830 from the hiz work. --- src/mesa/drivers/dri/i915/i830_vtbl.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index 6d43726beb1..ed5286fd7d9 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -881,6 +881,12 @@ i830_invalidate_state(struct intel_context *intel, GLuint new_state) i830_update_provoking_vertex(&intel->ctx); } +static bool +i830_is_hiz_depth_format(struct intel_context *intel, gl_format format) +{ + return false; +} + void i830InitVtbl(struct i830_context *i830) { @@ -898,4 +904,5 @@ i830InitVtbl(struct i830_context *i830) i830->intel.vtbl.finish_batch = intel_finish_vb; i830->intel.vtbl.invalidate_state = i830_invalidate_state; i830->intel.vtbl.render_target_supported = i830_render_target_supported; + i830->intel.vtbl.is_hiz_depth_format = i830_is_hiz_depth_format; } -- cgit v1.2.3 From fa351bd2e0aecccd5ed6ef8744d5ba4a6dbf5d2c Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Sun, 7 Aug 2011 17:04:04 -0700 Subject: intel: GetBuffer fix After copy buffer on preGEN6, it is necessary to wait for the blit to complete before returning data to the user. This should fix the piglit test: copy_buffer_coherency (pre-GEN6). Signed-off-by: Ben Widawsky Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/intel/intel_buffer_objects.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index 439d6fc8247..703300b31af 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -282,12 +282,17 @@ intel_bufferobj_get_subdata(struct gl_context * ctx, GLvoid * data, struct gl_buffer_object *obj) { struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + struct intel_context *intel = intel_context(ctx); assert(intel_obj); if (intel_obj->sys_buffer) memcpy(data, (char *)intel_obj->sys_buffer + offset, size); - else + else { + if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) { + intel_batchbuffer_flush(intel); + } drm_intel_bo_get_subdata(intel_obj->buffer, offset, size, data); + } } -- cgit v1.2.3 From e411cd7b0a54d2f9b9f4cda4918aa7742ed5c2a6 Mon Sep 17 00:00:00 2001 From: Andreas Fänger Date: Wed, 10 Aug 2011 08:07:29 +0000 Subject: swrast: initial multi-threaded span rendering Optional parallel rendering of spans using OpenMP. Initial implementation for aa triangles. A new option for scons is also provided to activate the openmp support (off by default). Signed-off-by: Brian Paul --- common.py | 1 + scons/gallium.py | 12 +++++++ src/mesa/swrast/s_aatritemp.h | 72 ++++++++++++++++++++++++------------------ src/mesa/swrast/s_context.c | 26 +++++++++++---- src/mesa/swrast/s_texcombine.c | 4 +++ src/mesa/tnl/t_pipeline.c | 12 +++++++ 6 files changed, 91 insertions(+), 36 deletions(-) (limited to 'src/mesa') diff --git a/common.py b/common.py index 8657030ea3f..cfee1b5dc2e 100644 --- a/common.py +++ b/common.py @@ -88,6 +88,7 @@ def AddOptions(opts): opts.Add('toolchain', 'compiler toolchain', default_toolchain) opts.Add(BoolOption('gles', 'EXPERIMENTAL: enable OpenGL ES support', 'no')) opts.Add(BoolOption('llvm', 'use LLVM', default_llvm)) + opts.Add(BoolOption('openmp', 'EXPERIMENTAL: compile with openmp (swrast)', 'no')) opts.Add(BoolOption('debug', 'DEPRECATED: debug build', 'yes')) opts.Add(BoolOption('profile', 'DEPRECATED: profile build', 'no')) opts.Add(BoolOption('quiet', 'DEPRECATED: profile build', 'yes')) diff --git a/scons/gallium.py b/scons/gallium.py index 8cd3bc7f6e0..7135251d7a3 100755 --- a/scons/gallium.py +++ b/scons/gallium.py @@ -596,6 +596,18 @@ def generate(env): libs += ['m', 'pthread', 'dl'] env.Append(LIBS = libs) + # OpenMP + if env['openmp']: + if env['msvc']: + env.Append(CCFLAGS = ['/openmp']) + # When building openmp release VS2008 link.exe crashes with LNK1103 error. + # Workaround: overwrite PDB flags with empty value as it isn't required anyways + if env['build'] == 'release': + env['PDB'] = '' + if env['gcc']: + env.Append(CCFLAGS = ['-fopenmp']) + env.Append(LIBS = ['gomp']) + # Load tools env.Tool('lex') env.Tool('yacc') diff --git a/src/mesa/swrast/s_aatritemp.h b/src/mesa/swrast/s_aatritemp.h index 91d4f7a10ab..77b3ae6ec7a 100644 --- a/src/mesa/swrast/s_aatritemp.h +++ b/src/mesa/swrast/s_aatritemp.h @@ -181,13 +181,20 @@ const GLfloat *pMax = vMax->attrib[FRAG_ATTRIB_WPOS]; const GLfloat dxdy = majDx / majDy; const GLfloat xAdj = dxdy < 0.0F ? -dxdy : 0.0F; - GLfloat x = pMin[0] - (yMin - iyMin) * dxdy; GLint iy; - for (iy = iyMin; iy < iyMax; iy++, x += dxdy) { +#ifdef _OPENMP +#pragma omp parallel for schedule(dynamic) private(iy) firstprivate(span) +#endif + for (iy = iyMin; iy < iyMax; iy++) { + GLfloat x = pMin[0] - (yMin - iy) * dxdy; GLint ix, startX = (GLint) (x - xAdj); GLuint count; GLfloat coverage = 0.0F; +#ifdef _OPENMP + /* each thread needs to use a different (global) SpanArrays variable */ + span.array = SWRAST_CONTEXT(ctx)->SpanArrays + omp_get_thread_num(); +#endif /* skip over fragments with zero coverage */ while (startX < MAX_WIDTH) { coverage = compute_coveragef(pMin, pMid, pMax, startX, iy); @@ -228,13 +235,12 @@ coverage = compute_coveragef(pMin, pMid, pMax, ix, iy); } - if (ix <= startX) - continue; - - span.x = startX; - span.y = iy; - span.end = (GLuint) ix - (GLuint) startX; - _swrast_write_rgba_span(ctx, &span); + if (ix > startX) { + span.x = startX; + span.y = iy; + span.end = (GLuint) ix - (GLuint) startX; + _swrast_write_rgba_span(ctx, &span); + } } } else { @@ -244,13 +250,20 @@ const GLfloat *pMax = vMax->attrib[FRAG_ATTRIB_WPOS]; const GLfloat dxdy = majDx / majDy; const GLfloat xAdj = dxdy > 0 ? dxdy : 0.0F; - GLfloat x = pMin[0] - (yMin - iyMin) * dxdy; GLint iy; - for (iy = iyMin; iy < iyMax; iy++, x += dxdy) { +#ifdef _OPENMP +#pragma omp parallel for schedule(dynamic) private(iy) firstprivate(span) +#endif + for (iy = iyMin; iy < iyMax; iy++) { + GLfloat x = pMin[0] - (yMin - iy) * dxdy; GLint ix, left, startX = (GLint) (x + xAdj); GLuint count, n; GLfloat coverage = 0.0F; +#ifdef _OPENMP + /* each thread needs to use a different (global) SpanArrays variable */ + span.array = SWRAST_CONTEXT(ctx)->SpanArrays + omp_get_thread_num(); +#endif /* make sure we're not past the window edge */ if (startX >= ctx->DrawBuffer->_Xmax) { startX = ctx->DrawBuffer->_Xmax - 1; @@ -296,31 +309,30 @@ ATTRIB_LOOP_END #endif - if (startX <= ix) - continue; - - n = (GLuint) startX - (GLuint) ix; + if (startX > ix) { + n = (GLuint) startX - (GLuint) ix; - left = ix + 1; + left = ix + 1; - /* shift all values to the left */ - /* XXX this is temporary */ - { - SWspanarrays *array = span.array; - GLint j; - for (j = 0; j < (GLint) n; j++) { - array->coverage[j] = array->coverage[j + left]; - COPY_CHAN4(array->rgba[j], array->rgba[j + left]); + /* shift all values to the left */ + /* XXX this is temporary */ + { + SWspanarrays *array = span.array; + GLint j; + for (j = 0; j < (GLint) n; j++) { + array->coverage[j] = array->coverage[j + left]; + COPY_CHAN4(array->rgba[j], array->rgba[j + left]); #ifdef DO_Z - array->z[j] = array->z[j + left]; + array->z[j] = array->z[j + left]; #endif + } } - } - span.x = left; - span.y = iy; - span.end = n; - _swrast_write_rgba_span(ctx, &span); + span.x = left; + span.y = iy; + span.end = n; + _swrast_write_rgba_span(ctx, &span); + } } } } diff --git a/src/mesa/swrast/s_context.c b/src/mesa/swrast/s_context.c index def1531d7ff..4434f11b990 100644 --- a/src/mesa/swrast/s_context.c +++ b/src/mesa/swrast/s_context.c @@ -772,6 +772,11 @@ _swrast_CreateContext( struct gl_context *ctx ) { GLuint i; SWcontext *swrast = (SWcontext *)CALLOC(sizeof(SWcontext)); +#ifdef _OPENMP + const GLint maxThreads = omp_get_max_threads(); +#else + const GLint maxThreads = 1; +#endif if (SWRAST_DEBUG) { _mesa_debug(ctx, "_swrast_CreateContext\n"); @@ -806,19 +811,25 @@ _swrast_CreateContext( struct gl_context *ctx ) for (i = 0; i < MAX_TEXTURE_IMAGE_UNITS; i++) swrast->TextureSample[i] = NULL; - swrast->SpanArrays = MALLOC_STRUCT(sw_span_arrays); + /* SpanArrays is global and shared by all SWspan instances. However, when + * using multiple threads, it is necessary to have one SpanArrays instance + * per thread. + */ + swrast->SpanArrays = (SWspanarrays *) MALLOC(maxThreads * sizeof(SWspanarrays)); if (!swrast->SpanArrays) { FREE(swrast); return GL_FALSE; } - swrast->SpanArrays->ChanType = CHAN_TYPE; + for(i = 0; i < maxThreads; i++) { + swrast->SpanArrays[i].ChanType = CHAN_TYPE; #if CHAN_TYPE == GL_UNSIGNED_BYTE - swrast->SpanArrays->rgba = swrast->SpanArrays->rgba8; + swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].rgba8; #elif CHAN_TYPE == GL_UNSIGNED_SHORT - swrast->SpanArrays->rgba = swrast->SpanArrays->rgba16; + swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].rgba16; #else - swrast->SpanArrays->rgba = swrast->SpanArrays->attribs[FRAG_ATTRIB_COL0]; + swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].attribs[FRAG_ATTRIB_COL0]; #endif + } /* init point span buffer */ swrast->PointSpan.primitive = GL_POINT; @@ -826,7 +837,10 @@ _swrast_CreateContext( struct gl_context *ctx ) swrast->PointSpan.facing = 0; swrast->PointSpan.array = swrast->SpanArrays; - swrast->TexelBuffer = (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits * + /* TexelBuffer is also global and normally shared by all SWspan instances; + * when running with multiple threads, create one per thread. + */ + swrast->TexelBuffer = (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits * maxThreads * MAX_WIDTH * 4 * sizeof(GLfloat)); if (!swrast->TexelBuffer) { FREE(swrast->SpanArrays); diff --git a/src/mesa/swrast/s_texcombine.c b/src/mesa/swrast/s_texcombine.c index 086ed0b33d7..80b9dff3cc2 100644 --- a/src/mesa/swrast/s_texcombine.c +++ b/src/mesa/swrast/s_texcombine.c @@ -48,7 +48,11 @@ typedef float (*float4_array)[4]; static INLINE float4_array get_texel_array(SWcontext *swrast, GLuint unit) { +#ifdef _OPENMP + return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4 * omp_get_num_threads() + (MAX_WIDTH * 4 * omp_get_thread_num())); +#else return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4); +#endif } diff --git a/src/mesa/tnl/t_pipeline.c b/src/mesa/tnl/t_pipeline.c index 18f095f0d4b..881d5d5f535 100644 --- a/src/mesa/tnl/t_pipeline.c +++ b/src/mesa/tnl/t_pipeline.c @@ -146,7 +146,17 @@ void _tnl_run_pipeline( struct gl_context *ctx ) _tnl_notify_pipeline_output_change( ctx ); } +#ifndef _OPENMP + /* Don't adjust FPU precision mode in case multiple threads are to be used. + * This would require that the additional threads also changed the FPU mode + * which is quite a mess as this had to be done in all parallelized sections; + * otherwise the master thread and all other threads are running in different + * modes, producing inconsistent results. + * Note that all x64 implementations don't define/use START_FAST_MATH, so + * this is "hack" is only used in i386 mode + */ START_FAST_MATH(__tmp); +#endif for (i = 0; i < tnl->pipeline.nr_stages ; i++) { struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i]; @@ -154,7 +164,9 @@ void _tnl_run_pipeline( struct gl_context *ctx ) break; } +#ifndef _OPENMP END_FAST_MATH(__tmp); +#endif } -- cgit v1.2.3 From 37a64baea87c470a68f9b2582af86783eb3509c4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 11 Aug 2011 08:52:41 -0600 Subject: swrast: don't try to do depth testing if there's no depth buffer Fixes piglit hiz-depth-stencil-test-fbo-d0-s8 crash. See http://bugs.freedesktop.org/show_bug.cgi?id=37907 NOTE: This is a candidate for the 7.11 branch. --- src/mesa/swrast/s_stencil.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/swrast/s_stencil.c b/src/mesa/swrast/s_stencil.c index 5bec71c057b..fa5093a3407 100644 --- a/src/mesa/swrast/s_stencil.c +++ b/src/mesa/swrast/s_stencil.c @@ -462,7 +462,8 @@ stencil_and_ztest_span(struct gl_context *ctx, SWspan *span, GLuint face) * Some fragments passed the stencil test, apply depth test to them * and apply Zpass and Zfail stencil ops. */ - if (ctx->Depth.Test == GL_FALSE) { + if (ctx->Depth.Test == GL_FALSE || + ctx->DrawBuffer->_DepthBuffer == NULL) { /* * No depth buffer, just apply zpass stencil function to active pixels. */ -- cgit v1.2.3 From 9b8287f8f5398647ced3a52885233d58e548c2b7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 11 Aug 2011 08:58:08 -0600 Subject: mesa: fix ColorMask array index in _mesa_init_driver_state() This doesn't really make any difference because all the colormasks are the same upon context set-up, but it makes more sense. --- src/mesa/drivers/common/driverfuncs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 76630264bf7..70f8727a092 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -248,10 +248,10 @@ _mesa_init_driver_state(struct gl_context *ctx) GLuint i; for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) { ctx->Driver.ColorMaskIndexed(ctx, i, - ctx->Color.ColorMask[0][RCOMP], - ctx->Color.ColorMask[0][GCOMP], - ctx->Color.ColorMask[0][BCOMP], - ctx->Color.ColorMask[0][ACOMP]); + ctx->Color.ColorMask[i][RCOMP], + ctx->Color.ColorMask[i][GCOMP], + ctx->Color.ColorMask[i][BCOMP], + ctx->Color.ColorMask[i][ACOMP]); } } else { -- cgit v1.2.3 From 099aad2fb0dba8baff61dc7a6803c6c976c08069 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 11 Aug 2011 09:02:16 -0600 Subject: mesa: fix initialization of GL_FOG_MODE in _mesa_init_driver_state() --- src/mesa/drivers/common/driverfuncs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 70f8727a092..a6174ee2f56 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -286,7 +286,10 @@ _mesa_init_driver_state(struct gl_context *ctx) ctx->Driver.Enable(ctx, GL_TEXTURE_CUBE_MAP, GL_FALSE); ctx->Driver.Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color); - ctx->Driver.Fogfv(ctx, GL_FOG_MODE, 0); + { + GLfloat mode = (GLfloat) ctx->Fog.Mode; + ctx->Driver.Fogfv(ctx, GL_FOG_MODE, &mode); + } ctx->Driver.Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density); ctx->Driver.Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start); ctx->Driver.Fogfv(ctx, GL_FOG_END, &ctx->Fog.End); -- cgit v1.2.3 From 9cd64ec35acd54cbe0be4d03236d2c5a9d4be6fe Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Wed, 10 Aug 2011 15:46:14 -0700 Subject: x86-64: Fix compile error with clang Remove the 'f' suffix from a float literal. - .float 0.0f+1.0 + .float 1.0 This fixes the following compile error with clang: error: unexpected token in directive .float 0.0f+1.0 ^ Note: This is a candidate for the stable branches. Reviewed-by: Ben Widawsky Signed-off-by: Chad Versace --- src/mesa/x86-64/xform4.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/x86-64/xform4.S b/src/mesa/x86-64/xform4.S index 6141e434679..5abd5a25de5 100644 --- a/src/mesa/x86-64/xform4.S +++ b/src/mesa/x86-64/xform4.S @@ -118,7 +118,7 @@ p4_constants: .byte 0x00, 0x00, 0x00, 0x00 .byte 0x00, 0x00, 0x00, 0x00 .byte 0x00, 0x00, 0x00, 0x00 -.float 0f+1.0 +.float 1.0 .text .align 16 -- cgit v1.2.3 From 5076561b35b9c2c78f277ab03bf1e642094ee20e Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 8 Aug 2011 10:14:44 +0900 Subject: glapi: use gl_and_es_API.xml to generate GLES headers glapi/gen-es/ defines two sets of GLAPI XMLs for OpenGL ES 1.1 (es1_API.xml) and 2.0 (es2_API.xml) respectively. They are used to generate dispatch.h and remap_helper.h for GLES. Together with gl_and_es_API.xml, we have to maintain three sets of GLAPI XMLs. This commit makes dispatch.h and remap_helper.h for GLES be generated from gl_and_es_API.xml. Reviewed-by: Brian Paul --- src/mapi/glapi/gen-es/Makefile | 10 ++++++---- src/mapi/glapi/gen/Makefile | 6 ++---- src/mapi/glapi/gen/gl_table.py | 20 +++++++++++++++----- src/mapi/glapi/gen/remap_helper.py | 18 ++++++++++++++++-- src/mesa/SConscript | 16 ++++++++-------- 5 files changed, 47 insertions(+), 23 deletions(-) (limited to 'src/mesa') diff --git a/src/mapi/glapi/gen-es/Makefile b/src/mapi/glapi/gen-es/Makefile index bf66ec037cf..3fd539d26d5 100644 --- a/src/mapi/glapi/gen-es/Makefile +++ b/src/mapi/glapi/gen-es/Makefile @@ -11,8 +11,8 @@ OUTPUTS := \ COMMON = gl_and_es_API.xml gl_XML.py glX_XML.py license.py typeexpr.py COMMON := $(addprefix $(GLAPI)/, $(COMMON)) -ES1_APIXML := es1_API.xml -ES2_APIXML := es2_API.xml +ES1_APIXML := $(GLAPI)/gl_and_es_API.xml +ES2_APIXML := $(GLAPI)/gl_and_es_API.xml ES1_OUTPUT_DIR := $(TOP)/src/mapi/es1api ES2_OUTPUT_DIR := $(TOP)/src/mapi/es2api @@ -37,10 +37,12 @@ shared-glapi: $(SHARED_GLAPI_OUTPUTS) $(ES1_OUTPUTS): APIXML := $(ES1_APIXML) $(ES1_OUTPUTS): PRINTER := es1api +$(ES1_OUTPUTS): GLES_VER := es1 $(ES1_OUTPUTS): $(ES1_DEPS) $(ES2_OUTPUTS): APIXML := $(ES2_APIXML) $(ES2_OUTPUTS): PRINTER := es2api +$(ES2_OUTPUTS): GLES_VER := es2 $(ES2_OUTPUTS): $(ES2_DEPS) $(SHARED_GLAPI_OUTPUTS): APIXML := $(SHARED_GLAPI_APIXML) @@ -49,7 +51,7 @@ $(SHARED_GLAPI_OUTPUTS): $(SHARED_GLAPI_DEPS) define gen-glapi @mkdir -p $(dir $@) - $(PYTHON2) $(PYTHON_FLAGS) $< -f $(APIXML) $(1) > $@ + $(PYTHON2) $(PYTHON_FLAGS) $< -f $(APIXML) -c $(GLES_VER) $(1) > $@ endef %/glapi_mapi_tmp.h: $(MAPI)/mapi_abi.py $(COMMON) @@ -58,7 +60,7 @@ endef --printer $(PRINTER) --mode lib $(GLAPI)/gl_and_es_API.xml > $@ %/main/dispatch.h: $(GLAPI)/gl_table.py $(COMMON) - $(call gen-glapi,-c -m remap_table) + $(call gen-glapi,-m remap_table) %/main/remap_helper.h: $(GLAPI)/remap_helper.py $(COMMON) $(call gen-glapi) diff --git a/src/mapi/glapi/gen/Makefile b/src/mapi/glapi/gen/Makefile index 3e101f3a10f..c386b8766c4 100644 --- a/src/mapi/glapi/gen/Makefile +++ b/src/mapi/glapi/gen/Makefile @@ -180,10 +180,8 @@ $(MESA_GLAPI_DIR)/glapi_sparc.S: gl_SPARC_asm.py $(COMMON) ###################################################################### -$(MESA_DIR)/main/enums.c: gl_enums.py $(COMMON) $(ES_API) - $(PYTHON2) $(PYTHON_FLAGS) $< -f gl_API.xml \ - -f $(MESA_GLAPI_DIR)/gen-es/es1_API.xml \ - -f $(MESA_GLAPI_DIR)/gen-es/es2_API.xml > $@ +$(MESA_DIR)/main/enums.c: gl_enums.py $(COMMON_ES) + $(PYTHON2) $(PYTHON_FLAGS) $< -f gl_and_es_API.xml > $@ $(MESA_DIR)/main/dispatch.h: gl_table.py $(COMMON) $(PYTHON2) $(PYTHON_FLAGS) $< -m remap_table > $@ diff --git a/src/mapi/glapi/gen/gl_table.py b/src/mapi/glapi/gen/gl_table.py index 05979e3813f..2cbbd971a86 100644 --- a/src/mapi/glapi/gen/gl_table.py +++ b/src/mapi/glapi/gen/gl_table.py @@ -211,28 +211,28 @@ class PrintRemapTable(gl_XML.gl_print_base): def show_usage(): - print "Usage: %s [-f input_file_name] [-m mode] [-c]" % sys.argv[0] + print "Usage: %s [-f input_file_name] [-m mode] [-c ver]" % sys.argv[0] print " -m mode Mode can be 'table' or 'remap_table'." - print " -c Enable compatibility with OpenGL ES." + print " -c ver Version can be 'es1' or 'es2'." sys.exit(1) if __name__ == '__main__': file_name = "gl_API.xml" try: - (args, trail) = getopt.getopt(sys.argv[1:], "f:m:c") + (args, trail) = getopt.getopt(sys.argv[1:], "f:m:c:") except Exception,e: show_usage() mode = "table" - es = False + es = None for (arg,val) in args: if arg == "-f": file_name = val elif arg == "-m": mode = val elif arg == "-c": - es = True + es = val if mode == "table": printer = PrintGlTable(es) @@ -243,4 +243,14 @@ if __name__ == '__main__': api = gl_XML.parse_GL_API( file_name ) + if es is not None: + import gles_api + + api_map = { + 'es1': gles_api.es1_api, + 'es2': gles_api.es2_api, + } + + api.filter_functions(api_map[es]) + printer.Print( api ) diff --git a/src/mapi/glapi/gen/remap_helper.py b/src/mapi/glapi/gen/remap_helper.py index 69b8e5e9d02..367ae24c75c 100644 --- a/src/mapi/glapi/gen/remap_helper.py +++ b/src/mapi/glapi/gen/remap_helper.py @@ -197,22 +197,36 @@ class PrintGlRemap(gl_XML.gl_print_base): def show_usage(): - print "Usage: %s [-f input_file_name]" % sys.argv[0] + print "Usage: %s [-f input_file_name] [-c ver]" % sys.argv[0] + print " -c ver Version can be 'es1' or 'es2'." sys.exit(1) if __name__ == '__main__': file_name = "gl_API.xml" try: - (args, trail) = getopt.getopt(sys.argv[1:], "f:") + (args, trail) = getopt.getopt(sys.argv[1:], "f:c:") except Exception,e: show_usage() + es = None for (arg,val) in args: if arg == "-f": file_name = val + elif arg == "-c": + es = val api = gl_XML.parse_GL_API( file_name ) + if es is not None: + import gles_api + + api_map = { + 'es1': gles_api.es1_api, + 'es2': gles_api.es2_api, + } + + api.filter_functions(api_map[es]) + printer = PrintGlRemap() printer.Print( api ) diff --git a/src/mesa/SConscript b/src/mesa/SConscript index cbd16625186..ac85a3eeb05 100644 --- a/src/mesa/SConscript +++ b/src/mesa/SConscript @@ -349,26 +349,26 @@ if env['gles']: gles_headers += env.CodeGenerate( target = 'es1api/main/dispatch.h', script = GLAPI + 'gen/gl_table.py', - source = GLAPI + 'gen-es/es1_API.xml', - command = python_cmd + ' $SCRIPT -c -m remap_table -f $SOURCE > $TARGET', + source = GLAPI + 'gen/gl_and_es_API.xml', + command = python_cmd + ' $SCRIPT -c es1 -m remap_table -f $SOURCE > $TARGET', ) gles_headers += env.CodeGenerate( target = 'es1api/main/remap_helper.h', script = GLAPI + 'gen/remap_helper.py', - source = GLAPI + 'gen-es/es1_API.xml', - command = python_cmd + ' $SCRIPT -f $SOURCE > $TARGET', + source = GLAPI + 'gen/gl_and_es_API.xml', + command = python_cmd + ' $SCRIPT -c es1 -f $SOURCE > $TARGET', ) gles_headers += env.CodeGenerate( target = 'es2api/main/dispatch.h', script = GLAPI + 'gen/gl_table.py', - source = GLAPI + 'gen-es/es2_API.xml', - command = python_cmd + ' $SCRIPT -c -m remap_table -f $SOURCE > $TARGET', + source = GLAPI + 'gen/gl_and_es_API.xml', + command = python_cmd + ' $SCRIPT -c es2 -m remap_table -f $SOURCE > $TARGET', ) gles_headers += env.CodeGenerate( target = 'es2api/main/remap_helper.h', script = GLAPI + 'gen/remap_helper.py', - source = GLAPI + 'gen-es/es2_API.xml', - command = python_cmd + ' $SCRIPT -f $SOURCE > $TARGET', + source = GLAPI + 'gen/gl_and_es_API.xml', + command = python_cmd + ' $SCRIPT -c es2 -f $SOURCE > $TARGET', ) env.Depends(gles_sources, gles_headers) -- cgit v1.2.3 From 6eff33dc7f2cd6e1430bd8dcaef4a7eb9fe3f6ee Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Thu, 11 Aug 2011 16:41:09 +0800 Subject: glapi: generate ES dispatch headers from core mesa GLESv1 and GLESv2 have their own dispatch.h and remap_helper.h. These headers are only used by api_exec_es1.c and api_exec_es2.c in core mesa. Move the rules to generate them from glapi to core mesa. Reviewed-by: Brian Paul [olv: updated after reviewing to fix SCons build] --- src/mapi/es1api/Makefile | 9 +-------- src/mesa/Makefile | 20 ++++++++++++++++++++ src/mesa/SConscript | 8 ++++---- src/mesa/main/es_generator.py | 4 ++-- 4 files changed, 27 insertions(+), 14 deletions(-) (limited to 'src/mesa') diff --git a/src/mapi/es1api/Makefile b/src/mapi/es1api/Makefile index da5aa45806c..0a0449b10a3 100644 --- a/src/mapi/es1api/Makefile +++ b/src/mapi/es1api/Makefile @@ -48,7 +48,7 @@ esapi_CPPFLAGS += -DMAPI_MODE_BRIDGE esapi_LIB_DEPS := -L$(TOP)/$(LIB_DIR) -l$(GLAPI_LIB) $(esapi_LIB_DEPS) .PHONY: default -default: depend $(TOP)/$(LIB_DIR)/$(esapi_LIB_NAME) main/dispatch.h main/remap_helper.h +default: depend $(TOP)/$(LIB_DIR)/$(esapi_LIB_NAME) $(TOP)/$(LIB_DIR)/$(esapi_LIB_NAME): $(esapi_OBJECTS) $(MKLIB) -o $(esapi_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \ @@ -72,12 +72,6 @@ include $(GLAPI)/gen/glapi_gen.mk glapi_mapi_tmp.h: $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps) $(call glapi_gen_mapi,$<,$(ESAPI)) -main/dispatch.h: $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_dispatch_deps) - $(call glapi_gen_dispatch,$<,$(ES)) - -main/remap_helper.h: $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_remap_deps) - $(call glapi_gen_remap,$<,$(ES)) - .PHONY: clean clean: -rm -f $(esapi_PC) @@ -86,7 +80,6 @@ clean: -rm -f $(esapi_OBJECTS) -rm -f depend depend.bak -rm -f glapi_mapi_tmp.h - -rm -rf main pcedit = \ -e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \ diff --git a/src/mesa/Makefile b/src/mesa/Makefile index 88f31b68695..0e15d61bd8d 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -67,6 +67,26 @@ $(DRICORE_OBJ_DIR)/%.o: %.S # then convenience libs (.a) and finally the device drivers: default: $(DEPENDS) asm_subdirs $(MESA_LIBS) $(DRICORE_LIBS) driver_subdirs +# include glapi_gen.mk for generating glapi headers for GLES +GLAPI := $(TOP)/src/mapi/glapi/gen +include $(GLAPI)/glapi_gen.mk + +main/api_exec_es1_dispatch.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_dispatch_deps) + $(call glapi_gen_dispatch,$<,es1) + +main/api_exec_es1_remap_helper.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_remap_deps) + $(call glapi_gen_remap,$<,es1) + +main/api_exec_es1.o: main/api_exec_es1_dispatch.h main/api_exec_es1_remap_helper.h + +main/api_exec_es2_dispatch.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_dispatch_deps) + $(call glapi_gen_dispatch,$<,es2) + +main/api_exec_es2_remap_helper.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_remap_deps) + $(call glapi_gen_remap,$<,es2) + +main/api_exec_es2.o: main/api_exec_es2_dispatch.h main/api_exec_es2_remap_helper.h + main/api_exec_es1.c: main/APIspec.xml main/es_generator.py main/APIspecutil.py main/APIspec.py $(PYTHON2) $(PYTHON_FLAGS) main/es_generator.py -S main/APIspec.xml -V GLES1.1 > $@ diff --git a/src/mesa/SConscript b/src/mesa/SConscript index ac85a3eeb05..05aa0e8010e 100644 --- a/src/mesa/SConscript +++ b/src/mesa/SConscript @@ -347,25 +347,25 @@ if env['gles']: GLAPI = '#src/mapi/glapi/' gles_headers = [] gles_headers += env.CodeGenerate( - target = 'es1api/main/dispatch.h', + target = 'main/api_exec_es1_dispatch.h', script = GLAPI + 'gen/gl_table.py', source = GLAPI + 'gen/gl_and_es_API.xml', command = python_cmd + ' $SCRIPT -c es1 -m remap_table -f $SOURCE > $TARGET', ) gles_headers += env.CodeGenerate( - target = 'es1api/main/remap_helper.h', + target = 'main/api_exec_es1_remap_helper.h', script = GLAPI + 'gen/remap_helper.py', source = GLAPI + 'gen/gl_and_es_API.xml', command = python_cmd + ' $SCRIPT -c es1 -f $SOURCE > $TARGET', ) gles_headers += env.CodeGenerate( - target = 'es2api/main/dispatch.h', + target = 'main/api_exec_es2_dispatch.h', script = GLAPI + 'gen/gl_table.py', source = GLAPI + 'gen/gl_and_es_API.xml', command = python_cmd + ' $SCRIPT -c es2 -m remap_table -f $SOURCE > $TARGET', ) gles_headers += env.CodeGenerate( - target = 'es2api/main/remap_helper.h', + target = 'main/api_exec_es2_remap_helper.h', script = GLAPI + 'gen/remap_helper.py', source = GLAPI + 'gen/gl_and_es_API.xml', command = python_cmd + ' $SCRIPT -c es2 -f $SOURCE > $TARGET', diff --git a/src/mesa/main/es_generator.py b/src/mesa/main/es_generator.py index c0b0a445806..cad3deaef94 100644 --- a/src/mesa/main/es_generator.py +++ b/src/mesa/main/es_generator.py @@ -681,10 +681,10 @@ print """ #if FEATURE_remap_table /* define esLocalRemapTable */ -#include "%sapi/main/dispatch.h" +#include "main/api_exec_%s_dispatch.h" #define need_MESA_remap_table -#include "%sapi/main/remap_helper.h" +#include "main/api_exec_%s_remap_helper.h" static void init_remap_table(void) -- cgit v1.2.3 From 5880a9a4a7247e4c31df606bef089c45b4052aaa Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 3 Aug 2011 16:36:42 -0700 Subject: radeon: Explain to the user what went wrong when built without libdrm. Before this commit, even LIBGL_DEBUG=verbose would just fail with: libGL error: failed to create dri screen --- src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h index 607b7470d4b..a74c6c7a575 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h +++ b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h @@ -78,6 +78,9 @@ static inline uint32_t radeon_gem_name_bo(struct radeon_bo *dummy) static inline void *radeon_bo_manager_gem_ctor(int fd) { + fprintf(stderr, "[%s:%u] Mesa built without Radeon libdrm support.\n", + __func__, __LINE__); + return NULL; } @@ -87,6 +90,9 @@ static inline void radeon_bo_manager_gem_dtor(void *dummy) static inline void *radeon_cs_manager_gem_ctor(int fd) { + fprintf(stderr, "[%s:%u] Mesa built without Radeon libdrm support.\n", + __func__, __LINE__); + return NULL; } -- cgit v1.2.3 From 11e4ea0010c3a756cfdaf427c14e104c9a11a645 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 16 Aug 2011 13:05:26 -0600 Subject: mesa: ChooseTextureFormat() returns gl_format, not GLuint --- src/mesa/main/dd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index e0c5844e193..b5ed9a40c70 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -194,7 +194,7 @@ struct dd_function_table { * cases, srcFormat and srcType can be GL_NONE. * Called by glTexImage(), etc. */ - GLuint (*ChooseTextureFormat)( struct gl_context *ctx, GLint internalFormat, + gl_format (*ChooseTextureFormat)( struct gl_context *ctx, GLint internalFormat, GLenum srcFormat, GLenum srcType ); /** -- cgit v1.2.3 From c1f00731fd48dde68b67f157c27eb20982e82193 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 3 May 2011 15:27:38 -0700 Subject: i965: Generate driver-specific IR for non-fragment shaders as well. This will be used by the new vertex shader backend. The scalarizing passes are skipped for non-fragment, since vertex and geometry threads are based on vec4s. --- src/mesa/drivers/dri/i965/brw_shader.cpp | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 9471883fb2b..f4005f80055 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -75,10 +75,15 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) { struct brw_context *brw = brw_context(ctx); struct intel_context *intel = &brw->intel; + unsigned int stage; + + for (stage = 0; stage < ARRAY_SIZE(prog->_LinkedShaders); stage++) { + struct brw_shader *shader = + (struct brw_shader *)prog->_LinkedShaders[stage]; + + if (!shader) + continue; - struct brw_shader *shader = - (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; - if (shader != NULL) { void *mem_ctx = ralloc_context(NULL); bool progress; @@ -116,8 +121,10 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) do { progress = false; - brw_do_channel_expressions(shader->ir); - brw_do_vector_splitting(shader->ir); + if (stage == MESA_SHADER_FRAGMENT) { + brw_do_channel_expressions(shader->ir); + brw_do_vector_splitting(shader->ir); + } progress = do_lower_jumps(shader->ir, true, true, true, /* main return */ -- cgit v1.2.3 From 6034b9a5124475d300d0678bd2fb6160865fa972 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 3 May 2011 10:55:50 -0700 Subject: i965: Create a shared enum for hardware and compiler-internal opcodes. This should make gdbing more pleasant, and it might be used in sharing part of the codegen between the VS and FS backends. --- src/mesa/drivers/dri/i965/brw_defines.h | 134 +++++++++++++-------- src/mesa/drivers/dri/i965/brw_fs.cpp | 11 +- src/mesa/drivers/dri/i965/brw_fs.h | 56 +++------ src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 6 + src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 20 +-- .../dri/i965/brw_fs_schedule_instructions.cpp | 15 --- src/mesa/drivers/dri/i965/brw_shader.h | 4 + 7 files changed, 120 insertions(+), 126 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 0a3027d04ad..fe5d29c4328 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -557,58 +557,88 @@ #define BRW_WE_ALL 1 /** @} */ -#define BRW_OPCODE_MOV 1 -#define BRW_OPCODE_SEL 2 -#define BRW_OPCODE_NOT 4 -#define BRW_OPCODE_AND 5 -#define BRW_OPCODE_OR 6 -#define BRW_OPCODE_XOR 7 -#define BRW_OPCODE_SHR 8 -#define BRW_OPCODE_SHL 9 -#define BRW_OPCODE_RSR 10 -#define BRW_OPCODE_RSL 11 -#define BRW_OPCODE_ASR 12 -#define BRW_OPCODE_CMP 16 -#define BRW_OPCODE_CMPN 17 -#define BRW_OPCODE_JMPI 32 -#define BRW_OPCODE_IF 34 -#define BRW_OPCODE_IFF 35 -#define BRW_OPCODE_ELSE 36 -#define BRW_OPCODE_ENDIF 37 -#define BRW_OPCODE_DO 38 -#define BRW_OPCODE_WHILE 39 -#define BRW_OPCODE_BREAK 40 -#define BRW_OPCODE_CONTINUE 41 -#define BRW_OPCODE_HALT 42 -#define BRW_OPCODE_MSAVE 44 -#define BRW_OPCODE_MRESTORE 45 -#define BRW_OPCODE_PUSH 46 -#define BRW_OPCODE_POP 47 -#define BRW_OPCODE_WAIT 48 -#define BRW_OPCODE_SEND 49 -#define BRW_OPCODE_SENDC 50 -#define BRW_OPCODE_MATH 56 -#define BRW_OPCODE_ADD 64 -#define BRW_OPCODE_MUL 65 -#define BRW_OPCODE_AVG 66 -#define BRW_OPCODE_FRC 67 -#define BRW_OPCODE_RNDU 68 -#define BRW_OPCODE_RNDD 69 -#define BRW_OPCODE_RNDE 70 -#define BRW_OPCODE_RNDZ 71 -#define BRW_OPCODE_MAC 72 -#define BRW_OPCODE_MACH 73 -#define BRW_OPCODE_LZD 74 -#define BRW_OPCODE_SAD2 80 -#define BRW_OPCODE_SADA2 81 -#define BRW_OPCODE_DP4 84 -#define BRW_OPCODE_DPH 85 -#define BRW_OPCODE_DP3 86 -#define BRW_OPCODE_DP2 87 -#define BRW_OPCODE_DPA2 88 -#define BRW_OPCODE_LINE 89 -#define BRW_OPCODE_PLN 90 -#define BRW_OPCODE_NOP 126 +enum opcode { + /* These are the actual hardware opcodes. */ + BRW_OPCODE_MOV = 1, + BRW_OPCODE_SEL = 2, + BRW_OPCODE_NOT = 4, + BRW_OPCODE_AND = 5, + BRW_OPCODE_OR = 6, + BRW_OPCODE_XOR = 7, + BRW_OPCODE_SHR = 8, + BRW_OPCODE_SHL = 9, + BRW_OPCODE_RSR = 10, + BRW_OPCODE_RSL = 11, + BRW_OPCODE_ASR = 12, + BRW_OPCODE_CMP = 16, + BRW_OPCODE_CMPN = 17, + BRW_OPCODE_JMPI = 32, + BRW_OPCODE_IF = 34, + BRW_OPCODE_IFF = 35, + BRW_OPCODE_ELSE = 36, + BRW_OPCODE_ENDIF = 37, + BRW_OPCODE_DO = 38, + BRW_OPCODE_WHILE = 39, + BRW_OPCODE_BREAK = 40, + BRW_OPCODE_CONTINUE = 41, + BRW_OPCODE_HALT = 42, + BRW_OPCODE_MSAVE = 44, + BRW_OPCODE_MRESTORE = 45, + BRW_OPCODE_PUSH = 46, + BRW_OPCODE_POP = 47, + BRW_OPCODE_WAIT = 48, + BRW_OPCODE_SEND = 49, + BRW_OPCODE_SENDC = 50, + BRW_OPCODE_MATH = 56, + BRW_OPCODE_ADD = 64, + BRW_OPCODE_MUL = 65, + BRW_OPCODE_AVG = 66, + BRW_OPCODE_FRC = 67, + BRW_OPCODE_RNDU = 68, + BRW_OPCODE_RNDD = 69, + BRW_OPCODE_RNDE = 70, + BRW_OPCODE_RNDZ = 71, + BRW_OPCODE_MAC = 72, + BRW_OPCODE_MACH = 73, + BRW_OPCODE_LZD = 74, + BRW_OPCODE_SAD2 = 80, + BRW_OPCODE_SADA2 = 81, + BRW_OPCODE_DP4 = 84, + BRW_OPCODE_DPH = 85, + BRW_OPCODE_DP3 = 86, + BRW_OPCODE_DP2 = 87, + BRW_OPCODE_DPA2 = 88, + BRW_OPCODE_LINE = 89, + BRW_OPCODE_PLN = 90, + BRW_OPCODE_NOP = 126, + + /* These are compiler backend opcodes that get translated into other + * instructions. + */ + FS_OPCODE_FB_WRITE = 128, + FS_OPCODE_RCP, + FS_OPCODE_RSQ, + FS_OPCODE_SQRT, + FS_OPCODE_EXP2, + FS_OPCODE_LOG2, + FS_OPCODE_POW, + FS_OPCODE_SIN, + FS_OPCODE_COS, + FS_OPCODE_DDX, + FS_OPCODE_DDY, + FS_OPCODE_PIXEL_X, + FS_OPCODE_PIXEL_Y, + FS_OPCODE_CINTERP, + FS_OPCODE_LINTERP, + FS_OPCODE_TEX, + FS_OPCODE_TXB, + FS_OPCODE_TXD, + FS_OPCODE_TXL, + FS_OPCODE_DISCARD, + FS_OPCODE_SPILL, + FS_OPCODE_UNSPILL, + FS_OPCODE_PULL_CONSTANT_LOAD, +}; #define BRW_PREDICATE_NONE 0 #define BRW_PREDICATE_NORMAL 1 diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index cafb7092ac8..a0d75cc6f96 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -522,7 +522,7 @@ fs_visitor::emit_frontfacing_interpolation(ir_variable *ir) } fs_inst * -fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) +fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src) { switch (opcode) { case FS_OPCODE_RCP: @@ -565,7 +565,7 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) } fs_inst * -fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1) +fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) { int base_mrf = 2; fs_inst *inst; @@ -1149,6 +1149,9 @@ fs_visitor::propagate_constants() progress = true; } break; + + default: + break; } } @@ -1200,6 +1203,8 @@ fs_visitor::opt_algebraic() break; } + break; + default: break; } } @@ -1267,6 +1272,8 @@ fs_visitor::register_coalesce() case BRW_OPCODE_ENDIF: if_depth--; break; + default: + break; } if (loop_depth || if_depth) continue; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 4ec649014de..d207ac27aa2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -25,6 +25,8 @@ * */ +#include "brw_shader.h" + extern "C" { #include @@ -55,33 +57,6 @@ enum register_file { BAD_FILE }; -enum fs_opcodes { - FS_OPCODE_FB_WRITE = 256, - FS_OPCODE_RCP, - FS_OPCODE_RSQ, - FS_OPCODE_SQRT, - FS_OPCODE_EXP2, - FS_OPCODE_LOG2, - FS_OPCODE_POW, - FS_OPCODE_SIN, - FS_OPCODE_COS, - FS_OPCODE_DDX, - FS_OPCODE_DDY, - FS_OPCODE_PIXEL_X, - FS_OPCODE_PIXEL_Y, - FS_OPCODE_CINTERP, - FS_OPCODE_LINTERP, - FS_OPCODE_TEX, - FS_OPCODE_TXB, - FS_OPCODE_TXD, - FS_OPCODE_TXL, - FS_OPCODE_DISCARD, - FS_OPCODE_SPILL, - FS_OPCODE_UNSPILL, - FS_OPCODE_PULL_CONSTANT_LOAD, -}; - - class fs_reg { public: /* Callers of this ralloc-based new need not call delete. It's @@ -227,13 +202,13 @@ public: init(); } - fs_inst(int opcode) + fs_inst(enum opcode opcode) { init(); this->opcode = opcode; } - fs_inst(int opcode, fs_reg dst) + fs_inst(enum opcode opcode, fs_reg dst) { init(); this->opcode = opcode; @@ -243,7 +218,7 @@ public: assert(dst.reg_offset >= 0); } - fs_inst(int opcode, fs_reg dst, fs_reg src0) + fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0) { init(); this->opcode = opcode; @@ -256,7 +231,7 @@ public: assert(src[0].reg_offset >= 0); } - fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) + fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) { init(); this->opcode = opcode; @@ -272,7 +247,7 @@ public: assert(src[1].reg_offset >= 0); } - fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) + fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) { init(); this->opcode = opcode; @@ -331,7 +306,7 @@ public: opcode == FS_OPCODE_POW); } - int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ + enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ fs_reg dst; fs_reg src[3]; bool saturate; @@ -448,27 +423,28 @@ public: fs_inst *emit(fs_inst inst); - fs_inst *emit(int opcode) + fs_inst *emit(enum opcode opcode) { return emit(fs_inst(opcode)); } - fs_inst *emit(int opcode, fs_reg dst) + fs_inst *emit(enum opcode opcode, fs_reg dst) { return emit(fs_inst(opcode, dst)); } - fs_inst *emit(int opcode, fs_reg dst, fs_reg src0) + fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0) { return emit(fs_inst(opcode, dst, src0)); } - fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) + fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) { return emit(fs_inst(opcode, dst, src0, src1)); } - fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) + fs_inst *emit(enum opcode opcode, fs_reg dst, + fs_reg src0, fs_reg src1, fs_reg src2) { return emit(fs_inst(opcode, dst, src0, src1, src2)); } @@ -529,8 +505,8 @@ public: int sampler); fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, int sampler); - fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0); - fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0, fs_reg src1); + fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0); + fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1); bool try_emit_saturate(ir_expression *ir); void emit_bool_to_cond_code(ir_rvalue *condition); void emit_if_gen6(ir_if *ir); diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index e168e541bef..529df0880f0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -277,6 +277,9 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) /* There is no sample_d_c message; comparisons are done manually */ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; break; + default: + assert(!"not reached"); + break; } } else { switch (inst->opcode) { @@ -317,6 +320,9 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) assert(inst->mlen == 7 || inst->mlen == 10); msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS; break; + default: + assert(!"not reached"); + break; } } assert(msg_type != -1); diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 5c9cba99ae5..7c5414ac26c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -25,23 +25,6 @@ * */ -extern "C" { - -#include - -#include "main/macros.h" -#include "main/shaderobj.h" -#include "main/uniforms.h" -#include "program/prog_parameter.h" -#include "program/prog_print.h" -#include "program/prog_optimize.h" -#include "program/register_allocate.h" -#include "program/sampler.h" -#include "program/hash_table.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_wm.h" -} #include "brw_fs.h" #include "../glsl/glsl_types.h" #include "../glsl/ir_optimization.h" @@ -359,6 +342,9 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) if (inst->dst.file == GRF) no_spill[inst->dst.reg] = true; break; + + default: + break; } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp index f1a88fcfa79..965a5b333a2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -25,21 +25,6 @@ * */ -extern "C" { - -#include - -#include "main/macros.h" -#include "main/shaderobj.h" -#include "main/uniforms.h" -#include "program/prog_optimize.h" -#include "program/register_allocate.h" -#include "program/sampler.h" -#include "program/hash_table.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_wm.h" -} #include "brw_fs.h" #include "../glsl/glsl_types.h" #include "../glsl/ir_optimization.h" diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 4c568a26caa..21671d1c8d6 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -21,5 +21,9 @@ * IN THE SOFTWARE. */ +#include + +#pragma once + int brw_type_for_base_type(const struct glsl_type *type); uint32_t brw_conditional_for_comparison(unsigned int op); -- cgit v1.2.3 From 65b5cbbcf783f6c668ab5b31a0734680dd396794 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 12:38:58 -0700 Subject: i965: Rename math FS_OPCODE_* to SHADER_OPCODE_*. I want to just use the same enums in the VS. --- src/mesa/drivers/dri/i965/brw_defines.h | 16 +++++----- src/mesa/drivers/dri/i965/brw_fs.cpp | 34 +++++++++++----------- src/mesa/drivers/dri/i965/brw_fs.h | 16 +++++----- src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 34 +++++++++++----------- .../dri/i965/brw_fs_schedule_instructions.cpp | 16 +++++----- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 20 ++++++------- 6 files changed, 68 insertions(+), 68 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index fe5d29c4328..da8d016da42 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -616,14 +616,14 @@ enum opcode { * instructions. */ FS_OPCODE_FB_WRITE = 128, - FS_OPCODE_RCP, - FS_OPCODE_RSQ, - FS_OPCODE_SQRT, - FS_OPCODE_EXP2, - FS_OPCODE_LOG2, - FS_OPCODE_POW, - FS_OPCODE_SIN, - FS_OPCODE_COS, + SHADER_OPCODE_RCP, + SHADER_OPCODE_RSQ, + SHADER_OPCODE_SQRT, + SHADER_OPCODE_EXP2, + SHADER_OPCODE_LOG2, + SHADER_OPCODE_POW, + SHADER_OPCODE_SIN, + SHADER_OPCODE_COS, FS_OPCODE_DDX, FS_OPCODE_DDY, FS_OPCODE_PIXEL_X, diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index a0d75cc6f96..693ef0ce31a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -143,15 +143,15 @@ fs_visitor::implied_mrf_writes(fs_inst *inst) return 0; switch (inst->opcode) { - case FS_OPCODE_RCP: - case FS_OPCODE_RSQ: - case FS_OPCODE_SQRT: - case FS_OPCODE_EXP2: - case FS_OPCODE_LOG2: - case FS_OPCODE_SIN: - case FS_OPCODE_COS: + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: return 1 * c->dispatch_width / 8; - case FS_OPCODE_POW: + case SHADER_OPCODE_POW: return 2 * c->dispatch_width / 8; case FS_OPCODE_TEX: case FS_OPCODE_TXB: @@ -525,13 +525,13 @@ fs_inst * fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src) { switch (opcode) { - case FS_OPCODE_RCP: - case FS_OPCODE_RSQ: - case FS_OPCODE_SQRT: - case FS_OPCODE_EXP2: - case FS_OPCODE_LOG2: - case FS_OPCODE_SIN: - case FS_OPCODE_COS: + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: break; default: assert(!"not reached: bad math opcode"); @@ -570,7 +570,7 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) int base_mrf = 2; fs_inst *inst; - assert(opcode == FS_OPCODE_POW); + assert(opcode == SHADER_OPCODE_POW); if (intel->gen >= 6) { /* Can't do hstride == 0 args to gen6 math, so expand it out. @@ -1135,7 +1135,7 @@ fs_visitor::propagate_constants() } break; - case FS_OPCODE_RCP: + case SHADER_OPCODE_RCP: /* The hardware doesn't do math on immediate values * (because why are you doing that, seriously?), but * the correct answer is to just constant fold it diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index d207ac27aa2..94af0e1af16 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -296,14 +296,14 @@ public: bool is_math() { - return (opcode == FS_OPCODE_RCP || - opcode == FS_OPCODE_RSQ || - opcode == FS_OPCODE_SQRT || - opcode == FS_OPCODE_EXP2 || - opcode == FS_OPCODE_LOG2 || - opcode == FS_OPCODE_SIN || - opcode == FS_OPCODE_COS || - opcode == FS_OPCODE_POW); + return (opcode == SHADER_OPCODE_RCP || + opcode == SHADER_OPCODE_RSQ || + opcode == SHADER_OPCODE_SQRT || + opcode == SHADER_OPCODE_EXP2 || + opcode == SHADER_OPCODE_LOG2 || + opcode == SHADER_OPCODE_SIN || + opcode == SHADER_OPCODE_COS || + opcode == SHADER_OPCODE_POW); } enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 529df0880f0..285ba46bd46 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -149,28 +149,28 @@ fs_visitor::generate_math(fs_inst *inst, int op; switch (inst->opcode) { - case FS_OPCODE_RCP: + case SHADER_OPCODE_RCP: op = BRW_MATH_FUNCTION_INV; break; - case FS_OPCODE_RSQ: + case SHADER_OPCODE_RSQ: op = BRW_MATH_FUNCTION_RSQ; break; - case FS_OPCODE_SQRT: + case SHADER_OPCODE_SQRT: op = BRW_MATH_FUNCTION_SQRT; break; - case FS_OPCODE_EXP2: + case SHADER_OPCODE_EXP2: op = BRW_MATH_FUNCTION_EXP; break; - case FS_OPCODE_LOG2: + case SHADER_OPCODE_LOG2: op = BRW_MATH_FUNCTION_LOG; break; - case FS_OPCODE_POW: + case SHADER_OPCODE_POW: op = BRW_MATH_FUNCTION_POW; break; - case FS_OPCODE_SIN: + case SHADER_OPCODE_SIN: op = BRW_MATH_FUNCTION_SIN; break; - case FS_OPCODE_COS: + case SHADER_OPCODE_COS: op = BRW_MATH_FUNCTION_COS; break; default: @@ -182,7 +182,7 @@ fs_visitor::generate_math(fs_inst *inst, if (intel->gen >= 6) { assert(inst->mlen == 0); - if (inst->opcode == FS_OPCODE_POW) { + if (inst->opcode == SHADER_OPCODE_POW) { brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math2(p, dst, op, src[0], src[1]); @@ -775,14 +775,14 @@ fs_visitor::generate_code() } break; - case FS_OPCODE_RCP: - case FS_OPCODE_RSQ: - case FS_OPCODE_SQRT: - case FS_OPCODE_EXP2: - case FS_OPCODE_LOG2: - case FS_OPCODE_POW: - case FS_OPCODE_SIN: - case FS_OPCODE_COS: + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_POW: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: generate_math(inst, dst, src); break; case FS_OPCODE_PIXEL_X: diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp index 965a5b333a2..0ea4e5c36f0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -69,26 +69,26 @@ public: int math_latency = 22; switch (inst->opcode) { - case FS_OPCODE_RCP: + case SHADER_OPCODE_RCP: this->latency = 1 * chans * math_latency; break; - case FS_OPCODE_RSQ: + case SHADER_OPCODE_RSQ: this->latency = 2 * chans * math_latency; break; - case FS_OPCODE_SQRT: - case FS_OPCODE_LOG2: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_LOG2: /* full precision log. partial is 2. */ this->latency = 3 * chans * math_latency; break; - case FS_OPCODE_EXP2: + case SHADER_OPCODE_EXP2: /* full precision. partial is 3, same throughput. */ this->latency = 4 * chans * math_latency; break; - case FS_OPCODE_POW: + case SHADER_OPCODE_POW: this->latency = 8 * chans * math_latency; break; - case FS_OPCODE_SIN: - case FS_OPCODE_COS: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: /* minimum latency, max is 12 rounds. */ this->latency = 5 * chans * math_latency; break; diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 2e3f9be75b4..8b4f5bbac15 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -250,14 +250,14 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_rcp: - emit_math(FS_OPCODE_RCP, this->result, op[0]); + emit_math(SHADER_OPCODE_RCP, this->result, op[0]); break; case ir_unop_exp2: - emit_math(FS_OPCODE_EXP2, this->result, op[0]); + emit_math(SHADER_OPCODE_EXP2, this->result, op[0]); break; case ir_unop_log2: - emit_math(FS_OPCODE_LOG2, this->result, op[0]); + emit_math(SHADER_OPCODE_LOG2, this->result, op[0]); break; case ir_unop_exp: case ir_unop_log: @@ -265,11 +265,11 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_sin: case ir_unop_sin_reduced: - emit_math(FS_OPCODE_SIN, this->result, op[0]); + emit_math(SHADER_OPCODE_SIN, this->result, op[0]); break; case ir_unop_cos: case ir_unop_cos_reduced: - emit_math(FS_OPCODE_COS, this->result, op[0]); + emit_math(SHADER_OPCODE_COS, this->result, op[0]); break; case ir_unop_dFdx: @@ -340,11 +340,11 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_sqrt: - emit_math(FS_OPCODE_SQRT, this->result, op[0]); + emit_math(SHADER_OPCODE_SQRT, this->result, op[0]); break; case ir_unop_rsq: - emit_math(FS_OPCODE_RSQ, this->result, op[0]); + emit_math(SHADER_OPCODE_RSQ, this->result, op[0]); break; case ir_unop_i2u: @@ -423,7 +423,7 @@ fs_visitor::visit(ir_expression *ir) break; case ir_binop_pow: - emit_math(FS_OPCODE_POW, this->result, op[0], op[1]); + emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]); break; case ir_unop_bit_not: @@ -1694,7 +1694,7 @@ fs_visitor::emit_interpolation_setup_gen4() interp_reg(FRAG_ATTRIB_WPOS, 3)); /* Compute the pixel 1/W value from wpos.w. */ this->pixel_w = fs_reg(this, glsl_type::float_type); - emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); + emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w); this->current_annotation = NULL; } @@ -1731,7 +1731,7 @@ fs_visitor::emit_interpolation_setup_gen6() this->current_annotation = "compute pos.w"; this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); this->wpos_w = fs_reg(this, glsl_type::float_type); - emit_math(FS_OPCODE_RCP, this->wpos_w, this->pixel_w); + emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w); this->delta_x = fs_reg(brw_vec8_grf(2, 0)); this->delta_y = fs_reg(brw_vec8_grf(3, 0)); -- cgit v1.2.3 From af3c9803d818fd33139f1247a387d64b967b8992 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 2 May 2011 09:45:40 -0700 Subject: i965: Start adding the VS visitor and codegen. The low-level IR is a mashup of brw_fs.cpp and ir_to_mesa.cpp. It's currently controlled by the INTEL_NEW_VS=1 environment variable, and only tested for the trivial "gl_Position = gl_Vertex;" shader so far. --- src/mesa/drivers/dri/i965/Makefile | 5 +- src/mesa/drivers/dri/i965/brw_context.h | 2 +- src/mesa/drivers/dri/i965/brw_defines.h | 2 + src/mesa/drivers/dri/i965/brw_eu.h | 3 + src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 33 +- src/mesa/drivers/dri/i965/brw_shader.cpp | 26 + src/mesa/drivers/dri/i965/brw_shader.h | 2 + src/mesa/drivers/dri/i965/brw_vec4.h | 434 ++++++ src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 568 +++++++ .../drivers/dri/i965/brw_vec4_reg_allocate.cpp | 77 + src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 1649 ++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_vs.c | 12 +- src/mesa/drivers/dri/i965/brw_vs.h | 3 +- src/mesa/drivers/dri/i965/brw_vs_emit.c | 2 +- 14 files changed, 2781 insertions(+), 37 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/brw_vec4.h create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 44f28cd9d15..45a5350a383 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -124,7 +124,10 @@ CXX_SOURCES = \ brw_fs_reg_allocate.cpp \ brw_fs_schedule_instructions.cpp \ brw_fs_vector_splitting.cpp \ - brw_shader.cpp + brw_shader.cpp \ + brw_vec4_emit.cpp \ + brw_vec4_reg_allocate.cpp \ + brw_vec4_visitor.cpp ASM_SOURCES = diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index cc11d06874d..7b6b64c1a5c 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -529,7 +529,7 @@ struct brw_context * the CURBE, the depth buffer, and a query BO. */ drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + BRW_WM_MAX_SURF + 16]; - int validated_bo_count; + unsigned int validated_bo_count; } state; struct brw_cache cache; diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index da8d016da42..e3823c65d1a 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -638,6 +638,8 @@ enum opcode { FS_OPCODE_SPILL, FS_OPCODE_UNSPILL, FS_OPCODE_PULL_CONSTANT_LOAD, + + VS_OPCODE_URB_WRITE, }; #define BRW_PREDICATE_NONE 0 diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 72d50eadbce..38dd99b693d 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -44,6 +44,9 @@ #define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3) #define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3) #define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0) +#define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1) +#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2) +#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3) #define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 285ba46bd46..7367ccaa7e0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -146,38 +146,7 @@ void fs_visitor::generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src) { - int op; - - switch (inst->opcode) { - case SHADER_OPCODE_RCP: - op = BRW_MATH_FUNCTION_INV; - break; - case SHADER_OPCODE_RSQ: - op = BRW_MATH_FUNCTION_RSQ; - break; - case SHADER_OPCODE_SQRT: - op = BRW_MATH_FUNCTION_SQRT; - break; - case SHADER_OPCODE_EXP2: - op = BRW_MATH_FUNCTION_EXP; - break; - case SHADER_OPCODE_LOG2: - op = BRW_MATH_FUNCTION_LOG; - break; - case SHADER_OPCODE_POW: - op = BRW_MATH_FUNCTION_POW; - break; - case SHADER_OPCODE_SIN: - op = BRW_MATH_FUNCTION_SIN; - break; - case SHADER_OPCODE_COS: - op = BRW_MATH_FUNCTION_COS; - break; - default: - assert(!"not reached: unknown math function"); - op = 0; - break; - } + int op = brw_math_function(inst->opcode); if (intel->gen >= 6) { assert(inst->mlen == 0); diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index f4005f80055..2eeeec25cac 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -199,3 +199,29 @@ brw_conditional_for_comparison(unsigned int op) return BRW_CONDITIONAL_NZ; } } + +uint32_t +brw_math_function(enum opcode op) +{ + switch (op) { + case SHADER_OPCODE_RCP: + return BRW_MATH_FUNCTION_INV; + case SHADER_OPCODE_RSQ: + return BRW_MATH_FUNCTION_RSQ; + case SHADER_OPCODE_SQRT: + return BRW_MATH_FUNCTION_SQRT; + case SHADER_OPCODE_EXP2: + return BRW_MATH_FUNCTION_EXP; + case SHADER_OPCODE_LOG2: + return BRW_MATH_FUNCTION_LOG; + case SHADER_OPCODE_POW: + return BRW_MATH_FUNCTION_POW; + case SHADER_OPCODE_SIN: + return BRW_MATH_FUNCTION_SIN; + case SHADER_OPCODE_COS: + return BRW_MATH_FUNCTION_COS; + default: + assert(!"not reached: unknown math function"); + return 0; + } +} diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 21671d1c8d6..1054d7a589e 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -22,8 +22,10 @@ */ #include +#include "brw_defines.h" #pragma once int brw_type_for_base_type(const struct glsl_type *type); uint32_t brw_conditional_for_comparison(unsigned int op); +uint32_t brw_math_function(enum opcode op); diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h new file mode 100644 index 00000000000..10168fc1cb0 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -0,0 +1,434 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef BRW_VEC4_H +#define BRW_VEC4_H + +#include +#include "brw_shader.h" +#include "main/compiler.h" +#include "program/hash_table.h" + +extern "C" { +#include "brw_vs.h" +#include "brw_context.h" +#include "brw_eu.h" +}; + +#include "../glsl/ir.h" + +namespace brw { + +class dst_reg; + +/** + * Common helper for constructing swizzles. When only a subset of + * channels of a vec4 are used, we don't want to reference the other + * channels, as that will tell optimization passes that those other + * channels are used. + */ +static int +swizzle_for_size(int size) +{ + int size_swizzles[4] = { + BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), + BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), + }; + + assert((size >= 1) && (size <= 4)); + return size_swizzles[size - 1]; +} + +enum register_file { + ARF = BRW_ARCHITECTURE_REGISTER_FILE, + GRF = BRW_GENERAL_REGISTER_FILE, + MRF = BRW_MESSAGE_REGISTER_FILE, + IMM = BRW_IMMEDIATE_VALUE, + HW_REG, /* a struct brw_reg */ + ATTR, + UNIFORM, /* prog_data->params[hw_reg] */ + BAD_FILE +}; + +class reg +{ +public: + /** Register file: ARF, GRF, MRF, IMM. */ + enum register_file file; + /** virtual register number. 0 = fixed hw reg */ + int reg; + /** Offset within the virtual register. */ + int reg_offset; + /** Register type. BRW_REGISTER_TYPE_* */ + int type; + bool sechalf; + struct brw_reg fixed_hw_reg; + int smear; /* -1, or a channel of the reg to smear to all channels. */ + + /** Value for file == BRW_IMMMEDIATE_FILE */ + union { + int32_t i; + uint32_t u; + float f; + } imm; +}; + +class src_reg : public reg +{ +public: + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = ralloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + void init() + { + memset(this, 0, sizeof(*this)); + + this->file = BAD_FILE; + } + + src_reg(register_file file, int reg, const glsl_type *type) + { + init(); + + this->file = file; + this->reg = reg; + if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) + this->swizzle = swizzle_for_size(type->vector_elements); + else + this->swizzle = SWIZZLE_XYZW; + } + + /** Generic unset register constructor. */ + src_reg() + { + init(); + } + + src_reg(float f) + { + init(); + + this->file = IMM; + this->type = BRW_REGISTER_TYPE_F; + this->imm.f = f; + } + + src_reg(uint32_t u) + { + init(); + + this->file = IMM; + this->type = BRW_REGISTER_TYPE_UD; + this->imm.f = u; + } + + src_reg(int32_t i) + { + init(); + + this->file = IMM; + this->type = BRW_REGISTER_TYPE_D; + this->imm.i = i; + } + + src_reg(class vec4_visitor *v, const struct glsl_type *type); + + explicit src_reg(dst_reg reg); + + GLuint swizzle; /**< SWIZZLE_XYZW swizzles from Mesa. */ + bool negate; + bool abs; +}; + +class dst_reg : public reg +{ +public: + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = ralloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + void init() + { + memset(this, 0, sizeof(*this)); + this->file = BAD_FILE; + this->writemask = WRITEMASK_XYZW; + } + + dst_reg() + { + init(); + } + + dst_reg(register_file file, int reg) + { + init(); + + this->file = file; + this->reg = reg; + } + + dst_reg(struct brw_reg reg) + { + init(); + + this->file = HW_REG; + this->fixed_hw_reg = reg; + } + + dst_reg(class vec4_visitor *v, const struct glsl_type *type); + + explicit dst_reg(src_reg reg); + + int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ +}; + +class vec4_instruction : public exec_node { +public: + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = rzalloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + struct brw_reg get_dst(void); + struct brw_reg get_src(int i); + + enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ + dst_reg dst; + src_reg src[3]; + + bool saturate; + bool predicate_inverse; + uint32_t predicate; + + int conditional_mod; /**< BRW_CONDITIONAL_* */ + + int sampler; + int target; /**< MRT target. */ + bool shadow_compare; + + bool eot; + bool header_present; + int mlen; /**< SEND message length */ + int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */ + + uint32_t offset; /* spill/unspill offset */ + /** @{ + * Annotation for the generated IR. One of the two can be set. + */ + ir_instruction *ir; + const char *annotation; +}; + +class vec4_visitor : public ir_visitor +{ +public: + vec4_visitor(struct brw_vs_compile *c, + struct gl_shader_program *prog, struct brw_shader *shader); + ~vec4_visitor(); + + dst_reg dst_null_f() + { + return dst_reg(brw_null_reg()); + } + + dst_reg dst_null_d() + { + return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + } + + dst_reg dst_null_cmp() + { + if (intel->gen > 4) + return dst_null_d(); + else + return dst_null_f(); + } + + struct brw_context *brw; + const struct gl_vertex_program *vp; + struct intel_context *intel; + struct gl_context *ctx; + struct brw_vs_compile *c; + struct brw_vs_prog_data *prog_data; + struct brw_compile *p; + struct brw_shader *shader; + struct gl_shader_program *prog; + void *mem_ctx; + exec_list instructions; + + char *fail_msg; + bool failed; + + /** + * GLSL IR currently being processed, which is associated with our + * driver IR instructions for debugging purposes. + */ + ir_instruction *base_ir; + const char *current_annotation; + + int *virtual_grf_sizes; + int virtual_grf_count; + int virtual_grf_array_size; + int first_non_payload_grf; + + dst_reg *variable_storage(ir_variable *var); + + void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr); + + src_reg src_reg_for_float(float val); + + /** + * \name Visit methods + * + * As typical for the visitor pattern, there must be one \c visit method for + * each concrete subclass of \c ir_instruction. Virtual base classes within + * the hierarchy should not have \c visit methods. + */ + /*@{*/ + virtual void visit(ir_variable *); + virtual void visit(ir_loop *); + virtual void visit(ir_loop_jump *); + virtual void visit(ir_function_signature *); + virtual void visit(ir_function *); + virtual void visit(ir_expression *); + virtual void visit(ir_swizzle *); + virtual void visit(ir_dereference_variable *); + virtual void visit(ir_dereference_array *); + virtual void visit(ir_dereference_record *); + virtual void visit(ir_assignment *); + virtual void visit(ir_constant *); + virtual void visit(ir_call *); + virtual void visit(ir_return *); + virtual void visit(ir_discard *); + virtual void visit(ir_texture *); + virtual void visit(ir_if *); + /*@}*/ + + src_reg result; + + /* Regs for vertex results. Generated at ir_variable visiting time + * for the ir->location's used. + */ + dst_reg output_reg[VERT_RESULT_MAX]; + + struct hash_table *variable_ht; + + bool run(void); + void fail(const char *msg, ...); + + int virtual_grf_alloc(int size); + int setup_attributes(int payload_reg); + void setup_payload(); + void reg_allocate_trivial(); + void reg_allocate(); + + vec4_instruction *emit(enum opcode opcode); + + vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0); + + vec4_instruction *emit(enum opcode opcode, dst_reg dst, + src_reg src0, src_reg src1); + + vec4_instruction *emit(enum opcode opcode, dst_reg dst, + src_reg src0, src_reg src1, src_reg src2); + + /** Walks an exec_list of ir_instruction and sends it through this visitor. */ + void visit_instructions(const exec_list *list); + + void emit_bool_to_cond_code(ir_rvalue *ir); + void emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1); + void emit_if_gen6(ir_if *ir); + + void emit_block_move(ir_assignment *ir); + + /** + * Emit the correct dot-product instruction for the type of arguments + */ + void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements); + + void emit_scalar(ir_instruction *ir, enum prog_opcode op, + dst_reg dst, src_reg src0); + + void emit_scalar(ir_instruction *ir, enum prog_opcode op, + dst_reg dst, src_reg src0, src_reg src1); + + void emit_scs(ir_instruction *ir, enum prog_opcode op, + dst_reg dst, const src_reg &src); + + void emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src); + void emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src); + void emit_math(enum opcode opcode, dst_reg dst, src_reg src); + void emit_math2_gen6(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); + void emit_math2_gen4(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); + void emit_math(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); + + int emit_vue_header_gen6(int header_mrf); + int emit_vue_header_gen4(int header_mrf); + void emit_urb_writes(void); + + GLboolean try_emit_sat(ir_expression *ir); + + bool process_move_condition(ir_rvalue *ir); + + void generate_code(); + void generate_vs_instruction(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg *src); + void generate_math1_gen4(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src); + void generate_math1_gen6(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src); + void generate_urb_write(vec4_instruction *inst); +}; + +} /* namespace brw */ + +#endif /* BRW_VEC4_H */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp new file mode 100644 index 00000000000..bdc7a79d83d --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -0,0 +1,568 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_vec4.h" +#include "../glsl/ir_print_visitor.h" + +extern "C" { +#include "brw_eu.h" +}; + +using namespace brw; + +namespace brw { + +int +vec4_visitor::setup_attributes(int payload_reg) +{ + int nr_attributes; + int attribute_map[VERT_ATTRIB_MAX]; + + nr_attributes = 0; + for (int i = 0; i < VERT_ATTRIB_MAX; i++) { + if (prog_data->inputs_read & BITFIELD64_BIT(i)) { + attribute_map[i] = payload_reg + nr_attributes; + nr_attributes++; + } + } + + foreach_iter(exec_list_iterator, iter, this->instructions) { + vec4_instruction *inst = (vec4_instruction *)iter.get(); + + for (int i = 0; i < 3; i++) { + if (inst->src[i].file != ATTR) + continue; + + inst->src[i].file = HW_REG; + inst->src[i].fixed_hw_reg = brw_vec8_grf(attribute_map[inst->src[i].reg], 0); + inst->src[i].fixed_hw_reg.dw1.bits.swizzle = inst->src[i].swizzle; + } + } + + /* The BSpec says we always have to read at least one thing from + * the VF, and it appears that the hardware wedges otherwise. + */ + if (nr_attributes == 0) + nr_attributes = 1; + + prog_data->urb_read_length = (nr_attributes + 1) / 2; + + return nr_attributes; +} + +void +vec4_visitor::setup_payload(void) +{ + int reg = 0; + + /* r0 is always reserved, as it contains the payload with the URB + * handles that are passed on to the URB write at the end of the + * thread. + */ + reg++; + + /* User clip planes from curbe: + */ + if (c->key.nr_userclip) { + if (intel->gen >= 6) { + for (int i = 0; i < c->key.nr_userclip; i++) { + c->userplane[i] = stride(brw_vec4_grf(reg + i / 2, + (i % 2) * 4), 0, 4, 1); + } + reg += ALIGN(c->key.nr_userclip, 2) / 2; + } else { + for (int i = 0; i < c->key.nr_userclip; i++) { + c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2, + (i % 2) * 4), 0, 4, 1); + } + reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2; + } + } + + /* FINISHME: push constants */ + c->prog_data.curb_read_length = reg - 1; + c->prog_data.nr_params = 0; + /* XXX 0 causes a bug elsewhere... */ + if (intel->gen < 6 && c->prog_data.nr_params == 0) + c->prog_data.nr_params = 4; + + reg += setup_attributes(reg); + + this->first_non_payload_grf = reg; +} + +struct brw_reg +vec4_instruction::get_dst(void) +{ + struct brw_reg brw_reg; + + switch (dst.file) { + case GRF: + assert(dst.reg_offset == 0); + brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0); + brw_reg = retype(brw_reg, dst.type); + brw_reg.dw1.bits.writemask = dst.writemask; + break; + + case HW_REG: + brw_reg = dst.fixed_hw_reg; + break; + + case BAD_FILE: + brw_reg = brw_null_reg(); + break; + + default: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; + } + return brw_reg; +} + +struct brw_reg +vec4_instruction::get_src(int i) +{ + struct brw_reg brw_reg; + + switch (src[i].file) { + case GRF: + brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0); + brw_reg = retype(brw_reg, src[i].type); + brw_reg.dw1.bits.swizzle = src[i].swizzle; + if (src[i].abs) + brw_reg = brw_abs(brw_reg); + if (src[i].negate) + brw_reg = negate(brw_reg); + break; + + case IMM: + switch (src[i].type) { + case BRW_REGISTER_TYPE_F: + brw_reg = brw_imm_f(src[i].imm.f); + break; + case BRW_REGISTER_TYPE_D: + brw_reg = brw_imm_d(src[i].imm.i); + break; + case BRW_REGISTER_TYPE_UD: + brw_reg = brw_imm_ud(src[i].imm.u); + break; + default: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; + } + break; + + case HW_REG: + brw_reg = src[i].fixed_hw_reg; + break; + + case BAD_FILE: + /* Probably unused. */ + brw_reg = brw_null_reg(); + break; + case ATTR: + default: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; + } + + return brw_reg; +} + +void +vec4_visitor::generate_math1_gen4(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src) +{ + brw_math(p, + dst, + brw_math_function(inst->opcode), + BRW_MATH_SATURATE_NONE, + inst->base_mrf, + src, + BRW_MATH_DATA_SCALAR, + BRW_MATH_PRECISION_FULL); +} + +void +vec4_visitor::generate_math1_gen6(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src) +{ + brw_math(p, + dst, + brw_math_function(inst->opcode), + BRW_MATH_SATURATE_NONE, + inst->base_mrf, + src, + BRW_MATH_DATA_SCALAR, + BRW_MATH_PRECISION_FULL); +} + +void +vec4_visitor::generate_urb_write(vec4_instruction *inst) +{ + brw_urb_WRITE(p, + brw_null_reg(), /* dest */ + inst->base_mrf, /* starting mrf reg nr */ + brw_vec8_grf(0, 0), /* src */ + false, /* allocate */ + true, /* used */ + inst->mlen, + 0, /* response len */ + inst->eot, /* eot */ + inst->eot, /* writes complete */ + inst->offset, /* urb destination offset */ + BRW_URB_SWIZZLE_INTERLEAVE); +} + +void +vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, + struct brw_reg dst, + struct brw_reg *src) +{ + vec4_instruction *inst = (vec4_instruction *)instruction; + + switch (inst->opcode) { + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: + if (intel->gen >= 6) { + generate_math1_gen6(inst, dst, src[0]); + } else { + generate_math1_gen4(inst, dst, src[0]); + } + break; + + case SHADER_OPCODE_POW: + assert(!"finishme"); + break; + + case VS_OPCODE_URB_WRITE: + generate_urb_write(inst); + break; + + default: + if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { + fail("unsupported opcode in `%s' in VS\n", + brw_opcodes[inst->opcode].name); + } else { + fail("Unsupported opcode %d in VS", inst->opcode); + } + } +} + +bool +vec4_visitor::run() +{ + /* Generate FS IR for main(). (the visitor only descends into + * functions called "main"). + */ + foreach_iter(exec_list_iterator, iter, *shader->ir) { + ir_instruction *ir = (ir_instruction *)iter.get(); + base_ir = ir; + ir->accept(this); + } + + emit_urb_writes(); + + if (failed) + return false; + + setup_payload(); + reg_allocate(); + + brw_set_access_mode(p, BRW_ALIGN_16); + + generate_code(); + + return !failed; +} + +void +vec4_visitor::generate_code() +{ + int last_native_inst = p->nr_insn; + const char *last_annotation_string = NULL; + ir_instruction *last_annotation_ir = NULL; + + int loop_stack_array_size = 16; + int loop_stack_depth = 0; + brw_instruction **loop_stack = + rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size); + int *if_depth_in_loop = + rzalloc_array(this->mem_ctx, int, loop_stack_array_size); + + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + printf("Native code for vertex shader %d:\n", prog->Name); + } + + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + struct brw_reg src[3], dst; + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + if (last_annotation_ir != inst->ir) { + last_annotation_ir = inst->ir; + if (last_annotation_ir) { + printf(" "); + last_annotation_ir->print(); + printf("\n"); + } + } + if (last_annotation_string != inst->annotation) { + last_annotation_string = inst->annotation; + if (last_annotation_string) + printf(" %s\n", last_annotation_string); + } + } + + for (unsigned int i = 0; i < 3; i++) { + src[i] = inst->get_src(i); + } + dst = inst->get_dst(); + + brw_set_conditionalmod(p, inst->conditional_mod); + brw_set_predicate_control(p, inst->predicate); + brw_set_predicate_inverse(p, inst->predicate_inverse); + brw_set_saturate(p, inst->saturate); + + switch (inst->opcode) { + case BRW_OPCODE_MOV: + brw_MOV(p, dst, src[0]); + break; + case BRW_OPCODE_ADD: + brw_ADD(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_MUL: + brw_MUL(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_FRC: + brw_FRC(p, dst, src[0]); + break; + case BRW_OPCODE_RNDD: + brw_RNDD(p, dst, src[0]); + break; + case BRW_OPCODE_RNDE: + brw_RNDE(p, dst, src[0]); + break; + case BRW_OPCODE_RNDZ: + brw_RNDZ(p, dst, src[0]); + break; + + case BRW_OPCODE_AND: + brw_AND(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_OR: + brw_OR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_XOR: + brw_XOR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_NOT: + brw_NOT(p, dst, src[0]); + break; + case BRW_OPCODE_ASR: + brw_ASR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_SHR: + brw_SHR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_SHL: + brw_SHL(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_CMP: + brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); + break; + case BRW_OPCODE_SEL: + brw_SEL(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_IF: + if (inst->src[0].file != BAD_FILE) { + /* The instruction has an embedded compare (only allowed on gen6) */ + assert(intel->gen == 6); + gen6_IF(p, inst->conditional_mod, src[0], src[1]); + } else { + brw_IF(p, BRW_EXECUTE_8); + } + if_depth_in_loop[loop_stack_depth]++; + break; + + case BRW_OPCODE_ELSE: + brw_ELSE(p); + break; + case BRW_OPCODE_ENDIF: + brw_ENDIF(p); + if_depth_in_loop[loop_stack_depth]--; + break; + + case BRW_OPCODE_DO: + loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); + if (loop_stack_array_size <= loop_stack_depth) { + loop_stack_array_size *= 2; + loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *, + loop_stack_array_size); + if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int, + loop_stack_array_size); + } + if_depth_in_loop[loop_stack_depth] = 0; + break; + + case BRW_OPCODE_BREAK: + brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case BRW_OPCODE_CONTINUE: + /* FINISHME: We need to write the loop instruction support still. */ + if (intel->gen >= 6) + gen6_CONT(p, loop_stack[loop_stack_depth - 1]); + else + brw_CONT(p, if_depth_in_loop[loop_stack_depth]); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + + case BRW_OPCODE_WHILE: { + struct brw_instruction *inst0, *inst1; + GLuint br = 1; + + if (intel->gen >= 5) + br = 2; + + assert(loop_stack_depth > 0); + loop_stack_depth--; + inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); + if (intel->gen < 6) { + /* patch all the BREAK/CONT instructions from last BGNLOOP */ + while (inst0 > loop_stack[loop_stack_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); + } + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + } + } + } + } + break; + + default: + generate_vs_instruction(inst, dst, src); + break; + } + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { + if (0) { + printf("0x%08x 0x%08x 0x%08x 0x%08x ", + ((uint32_t *)&p->store[i])[3], + ((uint32_t *)&p->store[i])[2], + ((uint32_t *)&p->store[i])[1], + ((uint32_t *)&p->store[i])[0]); + } + brw_disasm(stdout, &p->store[i], intel->gen); + } + } + + last_native_inst = p->nr_insn; + } + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + printf("\n"); + } + + ralloc_free(loop_stack); + ralloc_free(if_depth_in_loop); + + brw_set_uip_jip(p); + + /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS + * emit issues, it doesn't get the jump distances into the output, + * which is often something we want to debug. So this is here in + * case you're doing that. + */ + if (0) { + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + for (unsigned int i = 0; i < p->nr_insn; i++) { + printf("0x%08x 0x%08x 0x%08x 0x%08x ", + ((uint32_t *)&p->store[i])[3], + ((uint32_t *)&p->store[i])[2], + ((uint32_t *)&p->store[i])[1], + ((uint32_t *)&p->store[i])[0]); + brw_disasm(stdout, &p->store[i], intel->gen); + } + } + } +} + +extern "C" { + +bool +brw_vs_emit(struct brw_vs_compile *c) +{ + struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; + struct intel_context *intel = &brw->intel; + struct gl_context *ctx = &intel->ctx; + struct gl_shader_program *prog = ctx->Shader.CurrentVertexProgram; + + if (!prog) + return false; + + struct brw_shader *shader = + (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX]; + if (!shader) + return false; + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + printf("GLSL IR for native vertex shader %d:\n", prog->Name); + _mesa_print_ir(shader->ir, NULL); + printf("\n\n"); + } + + vec4_visitor v(c, prog, shader); + if (!v.run()) { + /* FINISHME: Cleanly fail, test at link time, etc. */ + assert(!"not reached"); + return false; + } + + return true; +} + +} /* extern "C" */ + +} /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp new file mode 100644 index 00000000000..e7f6b28a536 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -0,0 +1,77 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_vec4.h" +#include "../glsl/ir_print_visitor.h" + +using namespace brw; + +namespace brw { + +static void +assign(int *reg_hw_locations, reg *reg) +{ + if (reg->file == GRF) { + reg->reg = reg_hw_locations[reg->reg]; + } +} + +void +vec4_visitor::reg_allocate_trivial() +{ + int last_grf = 0; + int hw_reg_mapping[this->virtual_grf_count]; + int i; + int next; + + /* Note that compressed instructions require alignment to 2 registers. */ + hw_reg_mapping[0] = this->first_non_payload_grf; + next = hw_reg_mapping[0] + this->virtual_grf_sizes[0]; + for (i = 1; i < this->virtual_grf_count; i++) { + hw_reg_mapping[i] = next; + next += this->virtual_grf_sizes[i]; + } + prog_data->total_grf = next; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + vec4_instruction *inst = (vec4_instruction *)iter.get(); + + assign(hw_reg_mapping, &inst->dst); + assign(hw_reg_mapping, &inst->src[0]); + assign(hw_reg_mapping, &inst->src[1]); + assign(hw_reg_mapping, &inst->src[2]); + } + + if (last_grf >= BRW_MAX_GRF) { + fail("Ran out of regs on trivial allocator (%d/%d)\n", + last_grf, BRW_MAX_GRF); + } +} + +void +vec4_visitor::reg_allocate() +{ + reg_allocate_trivial(); +} + +} /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp new file mode 100644 index 00000000000..bba1d810f19 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -0,0 +1,1649 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_vec4.h" +#include "main/macros.h" + +namespace brw { + +src_reg::src_reg(dst_reg reg) +{ + init(); + + this->file = reg.file; + this->reg = reg.reg; + this->reg_offset = reg.reg_offset; + this->type = reg.type; + + int swizzles[4]; + int next_chan = 0; + int last = 0; + + for (int i = 0; i < 4; i++) { + if (!(reg.writemask & (1 << i))) + continue; + + swizzles[next_chan++] = last = i; + } + + for (; next_chan < 4; next_chan++) { + swizzles[next_chan] = last; + } + + this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], + swizzles[2], swizzles[3]); +} + +dst_reg::dst_reg(src_reg reg) +{ + init(); + + this->file = reg.file; + this->reg = reg.reg; + this->reg_offset = reg.reg_offset; + this->type = reg.type; + this->writemask = WRITEMASK_XYZW; +} + +vec4_instruction * +vec4_visitor::emit(enum opcode opcode, dst_reg dst, + src_reg src0, src_reg src1, src_reg src2) +{ + vec4_instruction *inst = new(mem_ctx) vec4_instruction(); + + inst->opcode = opcode; + inst->dst = dst; + inst->src[0] = src0; + inst->src[1] = src1; + inst->src[2] = src2; + inst->ir = this->base_ir; + inst->annotation = this->current_annotation; + + this->instructions.push_tail(inst); + + return inst; +} + + +vec4_instruction * +vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1) +{ + return emit(opcode, dst, src0, src1, src_reg()); +} + +vec4_instruction * +vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0) +{ + assert(dst.writemask != 0); + return emit(opcode, dst, src0, src_reg(), src_reg()); +} + +vec4_instruction * +vec4_visitor::emit(enum opcode opcode) +{ + return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg()); +} + +void +vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements) +{ + static enum opcode dot_opcodes[] = { + BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4 + }; + + emit(dot_opcodes[elements - 2], dst, src0, src1); +} + +void +vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src) +{ + /* The gen6 math instruction ignores the source modifiers -- + * swizzle, abs, negate, and at least some parts of the register + * region description. Move the source to the corresponding slots + * of the destination generally work. + */ + src_reg expanded = src_reg(this, glsl_type::float_type); + emit(BRW_OPCODE_MOV, dst, src); + src = expanded; + + emit(opcode, dst, src); +} + +void +vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src) +{ + vec4_instruction *inst = emit(opcode, dst, src); + inst->base_mrf = 1; + inst->mlen = 1; +} + +void +vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src) +{ + switch (opcode) { + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: + break; + default: + assert(!"not reached: bad math opcode"); + return; + } + + if (intel->gen >= 6) { + return emit_math1_gen6(opcode, dst, src); + } else { + return emit_math1_gen4(opcode, dst, src); + } +} + +void +vec4_visitor::emit_math2_gen6(enum opcode opcode, + dst_reg dst, src_reg src0, src_reg src1) +{ + src_reg expanded; + + /* The gen6 math instruction ignores the source modifiers -- + * swizzle, abs, negate, and at least some parts of the register + * region description. Move the sources to temporaries to make it + * generally work. + */ + + expanded = src_reg(this, glsl_type::vec4_type); + emit(BRW_OPCODE_MOV, dst, src0); + src0 = expanded; + + expanded = src_reg(this, glsl_type::vec4_type); + emit(BRW_OPCODE_MOV, dst, src1); + src1 = expanded; + + emit(opcode, dst, src0, src1); +} + +void +vec4_visitor::emit_math2_gen4(enum opcode opcode, + dst_reg dst, src_reg src0, src_reg src1) +{ + vec4_instruction *inst = emit(opcode, dst, src0, src1); + inst->base_mrf = 1; + inst->mlen = 2; +} + +void +vec4_visitor::emit_math(enum opcode opcode, + dst_reg dst, src_reg src0, src_reg src1) +{ + assert(opcode == SHADER_OPCODE_POW); + + if (intel->gen >= 6) { + return emit_math2_gen6(opcode, dst, src0, src1); + } else { + return emit_math2_gen4(opcode, dst, src0, src1); + } +} + +void +vec4_visitor::visit_instructions(const exec_list *list) +{ + foreach_iter(exec_list_iterator, iter, *list) { + ir_instruction *ir = (ir_instruction *)iter.get(); + + base_ir = ir; + ir->accept(this); + } +} + + +static int +type_size(const struct glsl_type *type) +{ + unsigned int i; + int size; + + switch (type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + if (type->is_matrix()) { + return type->matrix_columns; + } else { + /* Regardless of size of vector, it gets a vec4. This is bad + * packing for things like floats, but otherwise arrays become a + * mess. Hopefully a later pass over the code can pack scalars + * down if appropriate. + */ + return 1; + } + case GLSL_TYPE_ARRAY: + assert(type->length > 0); + return type_size(type->fields.array) * type->length; + case GLSL_TYPE_STRUCT: + size = 0; + for (i = 0; i < type->length; i++) { + size += type_size(type->fields.structure[i].type); + } + return size; + case GLSL_TYPE_SAMPLER: + /* Samplers take up one slot in UNIFORMS[], but they're baked in + * at link time. + */ + return 1; + default: + assert(0); + return 0; + } +} + +int +vec4_visitor::virtual_grf_alloc(int size) +{ + if (virtual_grf_array_size <= virtual_grf_count) { + if (virtual_grf_array_size == 0) + virtual_grf_array_size = 16; + else + virtual_grf_array_size *= 2; + virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, + virtual_grf_array_size); + } + virtual_grf_sizes[virtual_grf_count] = size; + return virtual_grf_count++; +} + +src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) +{ + init(); + + this->file = GRF; + this->reg = v->virtual_grf_alloc(type_size(type)); + + if (type->is_array() || type->is_record()) { + this->swizzle = BRW_SWIZZLE_NOOP; + } else { + this->swizzle = swizzle_for_size(type->vector_elements); + } + + this->type = brw_type_for_base_type(type); +} + +dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) +{ + init(); + + this->file = GRF; + this->reg = v->virtual_grf_alloc(type_size(type)); + + if (type->is_array() || type->is_record()) { + this->writemask = WRITEMASK_XYZW; + } else { + this->writemask = (1 << type->vector_elements) - 1; + } + + this->type = brw_type_for_base_type(type); +} + +dst_reg * +vec4_visitor::variable_storage(ir_variable *var) +{ + return (dst_reg *)hash_table_find(this->variable_ht, var); +} + +void +vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir) +{ + ir_expression *expr = ir->as_expression(); + + if (expr) { + src_reg op[2]; + vec4_instruction *inst; + + assert(expr->get_num_operands() <= 2); + for (unsigned int i = 0; i < expr->get_num_operands(); i++) { + assert(expr->operands[i]->type->is_scalar()); + + expr->operands[i]->accept(this); + op[i] = this->result; + } + + switch (expr->operation) { + case ir_unop_logic_not: + inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1)); + inst->conditional_mod = BRW_CONDITIONAL_Z; + break; + + case ir_binop_logic_xor: + inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_binop_logic_or: + inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_binop_logic_and: + inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_unop_f2b: + if (intel->gen >= 6) { + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f)); + } else { + inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]); + } + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_unop_i2b: + if (intel->gen >= 6) { + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); + } else { + inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]); + } + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_binop_greater: + case ir_binop_gequal: + case ir_binop_less: + case ir_binop_lequal: + case ir_binop_equal: + case ir_binop_all_equal: + case ir_binop_nequal: + case ir_binop_any_nequal: + inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); + inst->conditional_mod = + brw_conditional_for_comparison(expr->operation); + break; + + default: + assert(!"not reached"); + break; + } + return; + } + + ir->accept(this); + + if (intel->gen >= 6) { + vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(), + this->result, src_reg(1)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + } else { + vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + } +} + +/** + * Emit a gen6 IF statement with the comparison folded into the IF + * instruction. + */ +void +vec4_visitor::emit_if_gen6(ir_if *ir) +{ + ir_expression *expr = ir->condition->as_expression(); + + if (expr) { + src_reg op[2]; + vec4_instruction *inst; + dst_reg temp; + + assert(expr->get_num_operands() <= 2); + for (unsigned int i = 0; i < expr->get_num_operands(); i++) { + assert(expr->operands[i]->type->is_scalar()); + + expr->operands[i]->accept(this); + op[i] = this->result; + } + + switch (expr->operation) { + case ir_unop_logic_not: + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_Z; + return; + + case ir_binop_logic_xor: + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_binop_logic_or: + temp = dst_reg(this, glsl_type::bool_type); + emit(BRW_OPCODE_OR, temp, op[0], op[1]); + inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_binop_logic_and: + temp = dst_reg(this, glsl_type::bool_type); + emit(BRW_OPCODE_AND, temp, op[0], op[1]); + inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_unop_f2b: + inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_unop_i2b: + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_binop_greater: + case ir_binop_gequal: + case ir_binop_less: + case ir_binop_lequal: + case ir_binop_equal: + case ir_binop_all_equal: + case ir_binop_nequal: + case ir_binop_any_nequal: + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); + inst->conditional_mod = + brw_conditional_for_comparison(expr->operation); + return; + default: + assert(!"not reached"); + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + } + return; + } + + ir->condition->accept(this); + + vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(), + this->result, src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; +} + +void +vec4_visitor::visit(ir_variable *ir) +{ + dst_reg *reg = NULL; + + if (variable_storage(ir)) + return; + + switch (ir->mode) { + case ir_var_in: + reg = new(mem_ctx) dst_reg(ATTR, ir->location); + reg->type = brw_type_for_base_type(ir->type); + hash_table_insert(this->variable_ht, reg, ir); + break; + + case ir_var_out: + reg = new(mem_ctx) dst_reg(this, ir->type); + hash_table_insert(this->variable_ht, reg, ir); + + for (int i = 0; i < type_size(ir->type); i++) { + output_reg[ir->location + i] = *reg; + output_reg[ir->location + i].reg_offset = i; + } + break; + + case ir_var_temporary: + reg = new(mem_ctx) dst_reg(this, ir->type); + hash_table_insert(this->variable_ht, reg, ir); + + break; + + case ir_var_uniform: + /* FINISHME: uniforms */ + break; + } +} + +void +vec4_visitor::visit(ir_loop *ir) +{ + ir_dereference_variable *counter = NULL; + + /* We don't want debugging output to print the whole body of the + * loop as the annotation. + */ + this->base_ir = NULL; + + if (ir->counter != NULL) + counter = new(ir) ir_dereference_variable(ir->counter); + + if (ir->from != NULL) { + assert(ir->counter != NULL); + + ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); + + a->accept(this); + delete a; + } + + emit(BRW_OPCODE_DO); + + if (ir->to) { + ir_expression *e = + new(ir) ir_expression(ir->cmp, glsl_type::bool_type, + counter, ir->to); + ir_if *if_stmt = new(ir) ir_if(e); + + ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); + + if_stmt->then_instructions.push_tail(brk); + + if_stmt->accept(this); + + delete if_stmt; + delete e; + delete brk; + } + + visit_instructions(&ir->body_instructions); + + if (ir->increment) { + ir_expression *e = + new(ir) ir_expression(ir_binop_add, counter->type, + counter, ir->increment); + + ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); + + a->accept(this); + delete a; + delete e; + } + + emit(BRW_OPCODE_WHILE); +} + +void +vec4_visitor::visit(ir_loop_jump *ir) +{ + switch (ir->mode) { + case ir_loop_jump::jump_break: + emit(BRW_OPCODE_BREAK); + break; + case ir_loop_jump::jump_continue: + emit(BRW_OPCODE_CONTINUE); + break; + } +} + + +void +vec4_visitor::visit(ir_function_signature *ir) +{ + assert(0); + (void)ir; +} + +void +vec4_visitor::visit(ir_function *ir) +{ + /* Ignore function bodies other than main() -- we shouldn't see calls to + * them since they should all be inlined. + */ + if (strcmp(ir->name, "main") == 0) { + const ir_function_signature *sig; + exec_list empty; + + sig = ir->matching_signature(&empty); + + assert(sig); + + visit_instructions(&sig->body); + } +} + +GLboolean +vec4_visitor::try_emit_sat(ir_expression *ir) +{ + ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); + if (!sat_src) + return false; + + sat_src->accept(this); + src_reg src = this->result; + + this->result = src_reg(this, ir->type); + vec4_instruction *inst; + inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src); + inst->saturate = true; + + return true; +} + +void +vec4_visitor::emit_bool_comparison(unsigned int op, + dst_reg dst, src_reg src0, src_reg src1) +{ + /* original gen4 does destination conversion before comparison. */ + if (intel->gen < 5) + dst.type = src0.type; + + vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1); + inst->conditional_mod = brw_conditional_for_comparison(op); + + dst.type = BRW_REGISTER_TYPE_D; + emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1)); +} + +void +vec4_visitor::visit(ir_expression *ir) +{ + unsigned int operand; + src_reg op[Elements(ir->operands)]; + src_reg result_src; + dst_reg result_dst; + vec4_instruction *inst; + + if (try_emit_sat(ir)) + return; + + for (operand = 0; operand < ir->get_num_operands(); operand++) { + this->result.file = BAD_FILE; + ir->operands[operand]->accept(this); + if (this->result.file == BAD_FILE) { + printf("Failed to get tree for expression operand:\n"); + ir->operands[operand]->print(); + exit(1); + } + op[operand] = this->result; + + /* Matrix expression operands should have been broken down to vector + * operations already. + */ + assert(!ir->operands[operand]->type->is_matrix()); + } + + int vector_elements = ir->operands[0]->type->vector_elements; + if (ir->operands[1]) { + vector_elements = MAX2(vector_elements, + ir->operands[1]->type->vector_elements); + } + + this->result.file = BAD_FILE; + + /* Storage for our result. Ideally for an assignment we'd be using + * the actual storage for the result here, instead. + */ + result_src = src_reg(this, ir->type); + /* convenience for the emit functions below. */ + result_dst = dst_reg(result_src); + /* If nothing special happens, this is the result. */ + this->result = result_src; + /* Limit writes to the channels that will be used by result_src later. + * This does limit this temp's use as a temporary for multi-instruction + * sequences. + */ + result_dst.writemask = (1 << ir->type->vector_elements) - 1; + + switch (ir->operation) { + case ir_unop_logic_not: + /* Note that BRW_OPCODE_NOT is not appropriate here, since it is + * ones complement of the whole register, not just bit 0. + */ + emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1)); + break; + case ir_unop_neg: + op[0].negate = !op[0].negate; + this->result = op[0]; + break; + case ir_unop_abs: + op[0].abs = true; + op[0].negate = false; + this->result = op[0]; + break; + + case ir_unop_sign: + emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f)); + + inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); + inst->conditional_mod = BRW_CONDITIONAL_G; + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f)); + inst->predicate = BRW_PREDICATE_NORMAL; + + inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); + inst->conditional_mod = BRW_CONDITIONAL_L; + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f)); + inst->predicate = BRW_PREDICATE_NORMAL; + + break; + + case ir_unop_rcp: + emit_math(SHADER_OPCODE_RCP, result_dst, op[0]); + break; + + case ir_unop_exp2: + emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]); + break; + case ir_unop_log2: + emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]); + break; + case ir_unop_exp: + case ir_unop_log: + assert(!"not reached: should be handled by ir_explog_to_explog2"); + break; + case ir_unop_sin: + case ir_unop_sin_reduced: + emit_math(SHADER_OPCODE_SIN, result_dst, op[0]); + break; + case ir_unop_cos: + case ir_unop_cos_reduced: + emit_math(SHADER_OPCODE_COS, result_dst, op[0]); + break; + + case ir_unop_dFdx: + case ir_unop_dFdy: + assert(!"derivatives not valid in vertex shader"); + break; + + case ir_unop_noise: + assert(!"not reached: should be handled by lower_noise"); + break; + + case ir_binop_add: + emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]); + break; + case ir_binop_sub: + assert(!"not reached: should be handled by ir_sub_to_add_neg"); + break; + + case ir_binop_mul: + emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]); + break; + case ir_binop_div: + assert(!"not reached: should be handled by ir_div_to_mul_rcp"); + case ir_binop_mod: + assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); + break; + + case ir_binop_less: + case ir_binop_greater: + case ir_binop_lequal: + case ir_binop_gequal: + case ir_binop_equal: + case ir_binop_nequal: { + dst_reg temp = result_dst; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); + inst->conditional_mod = brw_conditional_for_comparison(ir->operation); + emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1)); + break; + } + + case ir_binop_all_equal: + /* "==" operator producing a scalar boolean. */ + if (ir->operands[0]->type->is_vector() || + ir->operands[1]->type->is_vector()) { + inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_Z; + + emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); + inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; + } else { + dst_reg temp = result_dst; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); + } + break; + case ir_binop_any_nequal: + /* "!=" operator producing a scalar boolean. */ + if (ir->operands[0]->type->is_vector() || + ir->operands[1]->type->is_vector()) { + inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); + inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; + } else { + dst_reg temp = result_dst; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); + } + break; + + case ir_unop_any: + emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); + emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); + + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); + inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; + break; + + case ir_binop_logic_xor: + emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); + break; + + case ir_binop_logic_or: + emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); + break; + + case ir_binop_logic_and: + emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); + break; + + case ir_binop_dot: + assert(ir->operands[0]->type->is_vector()); + assert(ir->operands[0]->type == ir->operands[1]->type); + emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements); + break; + + case ir_unop_sqrt: + emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]); + break; + case ir_unop_rsq: + emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]); + break; + case ir_unop_i2f: + case ir_unop_i2u: + case ir_unop_u2i: + case ir_unop_u2f: + case ir_unop_b2f: + case ir_unop_b2i: + case ir_unop_f2i: + emit(BRW_OPCODE_MOV, result_dst, op[0]); + break; + case ir_unop_f2b: + case ir_unop_i2b: { + dst_reg temp = result_dst; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1)); + break; + } + + case ir_unop_trunc: + emit(BRW_OPCODE_RNDZ, result_dst, op[0]); + break; + case ir_unop_ceil: + op[0].negate = !op[0].negate; + inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); + this->result.negate = true; + break; + case ir_unop_floor: + inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); + break; + case ir_unop_fract: + inst = emit(BRW_OPCODE_FRC, result_dst, op[0]); + break; + case ir_unop_round_even: + emit(BRW_OPCODE_RNDE, result_dst, op[0]); + break; + + case ir_binop_min: + inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_L; + + inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); + inst->predicate = BRW_PREDICATE_NORMAL; + break; + case ir_binop_max: + inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_G; + + inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); + inst->predicate = BRW_PREDICATE_NORMAL; + break; + + case ir_binop_pow: + emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]); + break; + + case ir_unop_bit_not: + inst = emit(BRW_OPCODE_NOT, result_dst, op[0]); + break; + case ir_binop_bit_and: + inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); + break; + case ir_binop_bit_xor: + inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); + break; + case ir_binop_bit_or: + inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); + break; + + case ir_binop_lshift: + case ir_binop_rshift: + assert(!"GLSL 1.30 features unsupported"); + break; + + case ir_quadop_vector: + assert(!"not reached: should be handled by lower_quadop_vector"); + break; + } +} + + +void +vec4_visitor::visit(ir_swizzle *ir) +{ + src_reg src; + int i = 0; + int swizzle[4]; + + /* Note that this is only swizzles in expressions, not those on the left + * hand side of an assignment, which do write masking. See ir_assignment + * for that. + */ + + ir->val->accept(this); + src = this->result; + assert(src.file != BAD_FILE); + + if (i < ir->type->vector_elements) { + switch (i) { + case 0: + swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x); + break; + case 1: + swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y); + break; + case 2: + swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z); + break; + case 3: + swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w); + break; + } + } + for (; i < 4; i++) { + /* Replicate the last channel out. */ + swizzle[i] = swizzle[ir->type->vector_elements - 1]; + } + + src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); + + this->result = src; +} + +void +vec4_visitor::visit(ir_dereference_variable *ir) +{ + dst_reg *reg = variable_storage(ir->var); + + if (!reg) { + fail("Failed to find variable storage for %s\n", ir->var->name); + this->result = src_reg(brw_null_reg()); + return; + } + + this->result = src_reg(*reg); +} + +void +vec4_visitor::visit(ir_dereference_array *ir) +{ + ir_constant *constant_index; + src_reg src; + int element_size = type_size(ir->type); + + constant_index = ir->array_index->constant_expression_value(); + + ir->array->accept(this); + src = this->result; + + if (constant_index) { + src.reg_offset += constant_index->value.i[0] * element_size; + } else { +#if 0 /* Variable array index */ + /* Variable index array dereference. It eats the "vec4" of the + * base of the array and an index that offsets the Mesa register + * index. + */ + ir->array_index->accept(this); + + src_reg index_reg; + + if (element_size == 1) { + index_reg = this->result; + } else { + index_reg = src_reg(this, glsl_type::float_type); + + emit(BRW_OPCODE_MUL, dst_reg(index_reg), + this->result, src_reg_for_float(element_size)); + } + + src.reladdr = ralloc(mem_ctx, src_reg); + memcpy(src.reladdr, &index_reg, sizeof(index_reg)); +#endif + } + + /* If the type is smaller than a vec4, replicate the last channel out. */ + if (ir->type->is_scalar() || ir->type->is_vector()) + src.swizzle = swizzle_for_size(ir->type->vector_elements); + else + src.swizzle = BRW_SWIZZLE_NOOP; + + this->result = src; +} + +void +vec4_visitor::visit(ir_dereference_record *ir) +{ + unsigned int i; + const glsl_type *struct_type = ir->record->type; + int offset = 0; + + ir->record->accept(this); + + for (i = 0; i < struct_type->length; i++) { + if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) + break; + offset += type_size(struct_type->fields.structure[i].type); + } + + /* If the type is smaller than a vec4, replicate the last channel out. */ + if (ir->type->is_scalar() || ir->type->is_vector()) + this->result.swizzle = swizzle_for_size(ir->type->vector_elements); + else + this->result.swizzle = BRW_SWIZZLE_NOOP; + + this->result.reg_offset += offset; +} + +/** + * We want to be careful in assignment setup to hit the actual storage + * instead of potentially using a temporary like we might with the + * ir_dereference handler. + */ +static dst_reg +get_assignment_lhs(ir_dereference *ir, vec4_visitor *v) +{ + /* The LHS must be a dereference. If the LHS is a variable indexed array + * access of a vector, it must be separated into a series conditional moves + * before reaching this point (see ir_vec_index_to_cond_assign). + */ + assert(ir->as_dereference()); + ir_dereference_array *deref_array = ir->as_dereference_array(); + if (deref_array) { + assert(!deref_array->array->type->is_vector()); + } + + /* Use the rvalue deref handler for the most part. We'll ignore + * swizzles in it and write swizzles using writemask, though. + */ + ir->accept(v); + return dst_reg(v->result); +} + +void +vec4_visitor::emit_block_move(ir_assignment *ir) +{ + ir->rhs->accept(this); + src_reg src = this->result; + + dst_reg dst = get_assignment_lhs(ir->lhs, this); + + /* FINISHME: This should really set to the correct maximal writemask for each + * FINISHME: component written (in the loops below). + */ + dst.writemask = WRITEMASK_XYZW; + + for (int i = 0; i < type_size(ir->lhs->type); i++) { + vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src); + if (ir->condition) + inst->predicate = BRW_PREDICATE_NORMAL; + + dst.reg_offset++; + src.reg_offset++; + } +} + +void +vec4_visitor::visit(ir_assignment *ir) +{ + if (!ir->lhs->type->is_scalar() && + !ir->lhs->type->is_vector()) { + emit_block_move(ir); + return; + } + + /* Now we're down to just a scalar/vector with writemasks. */ + int i; + + ir->rhs->accept(this); + src_reg src = this->result; + + dst_reg dst = get_assignment_lhs(ir->lhs, this); + + int swizzles[4]; + int first_enabled_chan = 0; + int src_chan = 0; + + assert(ir->lhs->type->is_vector()); + dst.writemask = ir->write_mask; + + for (int i = 0; i < 4; i++) { + if (dst.writemask & (1 << i)) { + first_enabled_chan = BRW_GET_SWZ(src.swizzle, i); + break; + } + } + + /* Swizzle a small RHS vector into the channels being written. + * + * glsl ir treats write_mask as dictating how many channels are + * present on the RHS while in our instructions we need to make + * those channels appear in the slots of the vec4 they're written to. + */ + for (int i = 0; i < 4; i++) { + if (dst.writemask & (1 << i)) + swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++); + else + swizzles[i] = first_enabled_chan; + } + src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], + swizzles[2], swizzles[3]); + + if (ir->condition) { + emit_bool_to_cond_code(ir->condition); + } + + for (i = 0; i < type_size(ir->lhs->type); i++) { + vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src); + + if (ir->condition) + inst->predicate = BRW_PREDICATE_NORMAL; + + dst.reg_offset++; + src.reg_offset++; + } +} + + +void +vec4_visitor::visit(ir_constant *ir) +{ + if (ir->type->base_type == GLSL_TYPE_STRUCT) { + src_reg temp_base = src_reg(this, ir->type); + dst_reg temp = dst_reg(temp_base); + + foreach_iter(exec_list_iterator, iter, ir->components) { + ir_constant *field_value = (ir_constant *)iter.get(); + int size = type_size(field_value->type); + + assert(size > 0); + + field_value->accept(this); + src_reg src = this->result; + + for (int i = 0; i < (unsigned int)size; i++) { + emit(BRW_OPCODE_MOV, temp, src); + + src.reg_offset++; + temp.reg_offset++; + } + } + this->result = temp_base; + return; + } + + if (ir->type->is_array()) { + src_reg temp_base = src_reg(this, ir->type); + dst_reg temp = dst_reg(temp_base); + int size = type_size(ir->type->fields.array); + + assert(size > 0); + + for (unsigned int i = 0; i < ir->type->length; i++) { + ir->array_elements[i]->accept(this); + src_reg src = this->result; + for (int j = 0; j < size; j++) { + emit(BRW_OPCODE_MOV, temp, src); + + src.reg_offset++; + temp.reg_offset++; + } + } + this->result = temp_base; + return; + } + + if (ir->type->is_matrix()) { + this->result = src_reg(this, ir->type); + dst_reg dst = dst_reg(this->result); + + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + + for (int i = 0; i < ir->type->matrix_columns; i++) { + for (int j = 0; j < ir->type->vector_elements; j++) { + dst.writemask = 1 << j; + emit(BRW_OPCODE_MOV, dst, + src_reg(ir->value.f[i * ir->type->vector_elements + j])); + } + dst.reg_offset++; + } + return; + } + + for (int i = 0; i < ir->type->vector_elements; i++) { + this->result = src_reg(this, ir->type); + dst_reg dst = dst_reg(this->result); + + dst.writemask = 1 << i; + + switch (ir->type->base_type) { + case GLSL_TYPE_FLOAT: + emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.f[i])); + break; + case GLSL_TYPE_INT: + emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.i[i])); + break; + case GLSL_TYPE_UINT: + emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.u[i])); + break; + case GLSL_TYPE_BOOL: + emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.b[i])); + break; + default: + assert(!"Non-float/uint/int/bool constant"); + break; + } + } +} + +void +vec4_visitor::visit(ir_call *ir) +{ + assert(!"not reached"); +} + +void +vec4_visitor::visit(ir_texture *ir) +{ + assert(!"not reached"); +} + +void +vec4_visitor::visit(ir_return *ir) +{ + assert(!"not reached"); +} + +void +vec4_visitor::visit(ir_discard *ir) +{ + assert(!"not reached"); +} + +void +vec4_visitor::visit(ir_if *ir) +{ + this->base_ir = ir->condition; + ir->condition->accept(this); + assert(this->result.file != BAD_FILE); + + /* FINISHME: condcode */ + emit(BRW_OPCODE_IF); + + visit_instructions(&ir->then_instructions); + + if (!ir->else_instructions.is_empty()) { + this->base_ir = ir->condition; + emit(BRW_OPCODE_ELSE); + + visit_instructions(&ir->else_instructions); + } + + this->base_ir = ir->condition; + emit(BRW_OPCODE_ENDIF); +} + +int +vec4_visitor::emit_vue_header_gen4(int header_mrf) +{ + /* Get the position */ + src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]); + + /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */ + dst_reg ndc = dst_reg(this, glsl_type::vec4_type); + + current_annotation = "NDC"; + dst_reg ndc_w = ndc; + ndc_w.writemask = WRITEMASK_W; + src_reg pos_w = pos; + pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); + emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w); + + dst_reg ndc_xyz = ndc; + ndc_xyz.writemask = WRITEMASK_XYZ; + + emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w)); + + if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || + c->key.nr_userclip || brw->has_negative_rhw_bug) { + dst_reg header1 = dst_reg(this, glsl_type::uvec4_type); + GLuint i; + + emit(BRW_OPCODE_MOV, header1, 0u); + + if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { + assert(!"finishme: psiz"); + src_reg psiz; + + header1.writemask = WRITEMASK_W; + emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11); + emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8); + } + + for (i = 0; i < c->key.nr_userclip; i++) { + vec4_instruction *inst; + + inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()), + pos, src_reg(c->userplane[i])); + inst->conditional_mod = BRW_CONDITIONAL_L; + + emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i); + inst->predicate = BRW_PREDICATE_NORMAL; + } + + /* i965 clipping workaround: + * 1) Test for -ve rhw + * 2) If set, + * set ndc = (0,0,0,0) + * set ucp[6] = 1 + * + * Later, clipping will detect ucp[6] and ensure the primitive is + * clipped against all fixed planes. + */ + if (brw->has_negative_rhw_bug) { +#if 0 + /* FINISHME */ + brw_CMP(p, + vec8(brw_null_reg()), + BRW_CONDITIONAL_L, + brw_swizzle1(ndc, 3), + brw_imm_f(0)); + + brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6)); + brw_MOV(p, ndc, brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +#endif + } + + header1.writemask = WRITEMASK_XYZW; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1)); + } else { + emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++), + BRW_REGISTER_TYPE_UD), 0u); + } + + if (intel->gen == 5) { + /* There are 20 DWs (D0-D19) in VUE header on Ironlake: + * dword 0-3 (m1) of the header is indices, point width, clip flags. + * dword 4-7 (m2) is the ndc position (set above) + * dword 8-11 (m3) of the vertex header is the 4D space position + * dword 12-19 (m4,m5) of the vertex header is the user clip distance. + * m6 is a pad so that the vertex element data is aligned + * m7 is the first vertex data we fill, which is the vertex position. + */ + current_annotation = "NDC"; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); + + current_annotation = "gl_Position"; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); + + /* user clip distance. */ + header_mrf += 2; + + /* Pad so that vertex element data (starts with position) is aligned. */ + header_mrf++; + } else { + /* There are 8 dwords in VUE header pre-Ironlake: + * dword 0-3 (m1) is indices, point width, clip flags. + * dword 4-7 (m2) is ndc position (set above) + * + * dword 8-11 (m3) is the first vertex data, which we always have be the + * vertex position. + */ + current_annotation = "NDC"; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); + + current_annotation = "gl_Position"; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); + } + + return header_mrf; +} + +int +vec4_visitor::emit_vue_header_gen6(int header_mrf) +{ + struct brw_reg reg; + + /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge: + * dword 0-3 (m2) of the header is indices, point width, clip flags. + * dword 4-7 (m3) is the 4D space position + * dword 8-15 (m4,m5) of the vertex header is the user clip distance if + * enabled. + * + * m4 or 6 is the first vertex element data we fill, which is + * the vertex position. + */ + + current_annotation = "indices, point width, clip flags"; + reg = brw_message_reg(header_mrf++); + emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)); + if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { + emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W), + src_reg(output_reg[VERT_RESULT_PSIZ])); + } + + current_annotation = "gl_Position"; + emit(BRW_OPCODE_MOV, + brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS])); + + current_annotation = "user clip distances"; + if (c->key.nr_userclip) { + for (int i = 0; i < c->key.nr_userclip; i++) { + struct brw_reg m; + if (i < 4) + m = brw_message_reg(header_mrf); + else + m = brw_message_reg(header_mrf + 1); + + emit(BRW_OPCODE_DP4, + dst_reg(brw_writemask(m, 1 << (i & 7))), + src_reg(c->userplane[i])); + } + header_mrf += 2; + } + + current_annotation = NULL; + + return header_mrf; +} + +static int +align_interleaved_urb_mlen(struct brw_context *brw, int mlen) +{ + struct intel_context *intel = &brw->intel; + + if (intel->gen >= 6) { + /* URB data written (does not include the message header reg) must + * be a multiple of 256 bits, or 2 VS registers. See vol5c.5, + * section 5.4.3.2.2: URB_INTERLEAVED. + * + * URB entries are allocated on a multiple of 1024 bits, so an + * extra 128 bits written here to make the end align to 256 is + * no problem. + */ + if ((mlen % 2) != 1) + mlen++; + } + + return mlen; +} + +/** + * Generates the VUE payload plus the 1 or 2 URB write instructions to + * complete the VS thread. + * + * The VUE layout is documented in Volume 2a. + */ +void +vec4_visitor::emit_urb_writes() +{ + int base_mrf = 1; + int mrf = base_mrf; + int urb_entry_size; + + /* FINISHME: edgeflag */ + + /* First mrf is the g0-based message header containing URB handles and such, + * which is implied in VS_OPCODE_URB_WRITE. + */ + mrf++; + + if (intel->gen >= 6) { + mrf = emit_vue_header_gen6(mrf); + } else { + mrf = emit_vue_header_gen4(mrf); + } + + int attr; + for (attr = 0; attr < VERT_RESULT_MAX; attr++) { + if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) + continue; + + /* This is loaded into the VUE header, and thus doesn't occupy + * an attribute slot. + */ + if (attr == VERT_RESULT_PSIZ) + continue; + + emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); + + /* If this is MRF 15, we can't fit anything more into this URB + * WRITE. Note that base_mrf of 1 means that MRF 15 is an + * even-numbered amount of URB write data, which will meet + * gen6's requirements for length alignment. + */ + if (mrf == 15) + break; + } + + vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); + inst->base_mrf = base_mrf; + inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); + inst->eot = true; + + urb_entry_size = mrf - base_mrf; + + for (; attr < VERT_RESULT_MAX; attr++) { + if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) + continue; + fail("Second URB write not supported.\n"); + break; + } + + if (intel->gen == 6) + c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8; + else + c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4; +} + +vec4_visitor::vec4_visitor(struct brw_vs_compile *c, + struct gl_shader_program *prog, + struct brw_shader *shader) +{ + this->c = c; + this->p = &c->func; + this->brw = p->brw; + this->intel = &brw->intel; + this->ctx = &intel->ctx; + this->prog = prog; + this->shader = shader; + + this->mem_ctx = ralloc_context(NULL); + this->failed = false; + + this->base_ir = NULL; + this->current_annotation = NULL; + + this->c = c; + this->prog_data = &c->prog_data; + + this->variable_ht = hash_table_ctor(0, + hash_table_pointer_hash, + hash_table_pointer_compare); + + this->virtual_grf_sizes = NULL; + this->virtual_grf_count = 0; + this->virtual_grf_array_size = 0; +} + +vec4_visitor::~vec4_visitor() +{ + hash_table_dtor(this->variable_ht); +} + + +void +vec4_visitor::fail(const char *format, ...) +{ + va_list va; + char *msg; + + if (failed) + return; + + failed = true; + + va_start(va, format); + msg = ralloc_vasprintf(mem_ctx, format, va); + va_end(va); + msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg); + + this->fail_msg = msg; + + if (INTEL_DEBUG & DEBUG_VS) { + fprintf(stderr, "%s", msg); + } +} + +} /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index a9ad5311fe3..bd0677db151 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -30,6 +30,7 @@ */ +#include "main/compiler.h" #include "brw_context.h" #include "brw_vs.h" #include "brw_util.h" @@ -50,6 +51,7 @@ static void do_vs_prog( struct brw_context *brw, void *mem_ctx; int aux_size; int i; + static int new_vs = -1; memset(&c, 0, sizeof(c)); memcpy(&c.key, key, sizeof(*key)); @@ -85,7 +87,15 @@ static void do_vs_prog( struct brw_context *brw, /* Emit GEN4 code. */ - brw_vs_emit(&c); + if (new_vs == -1) + new_vs = getenv("INTEL_NEW_VS") != NULL; + + if (new_vs) { + if (!brw_vs_emit(&c)) + brw_old_vs_emit(&c); + } else { + brw_old_vs_emit(&c); + } /* get the program */ diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 432994a8534..9f9fed33970 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -92,6 +92,7 @@ struct brw_vs_compile { GLboolean needs_stack; }; -void brw_vs_emit( struct brw_vs_compile *c ); +bool brw_vs_emit(struct brw_vs_compile *c); +void brw_old_vs_emit(struct brw_vs_compile *c); #endif diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index dbabb44e45c..a06a2bbec52 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1903,7 +1903,7 @@ brw_vs_rescale_gl_fixed(struct brw_vs_compile *c) /* Emit the vertex program instructions here. */ -void brw_vs_emit(struct brw_vs_compile *c ) +void brw_old_vs_emit(struct brw_vs_compile *c ) { #define MAX_IF_DEPTH 32 #define MAX_LOOP_DEPTH 32 -- cgit v1.2.3 From a070d5f363e99b0f846d555e9ca3a74ec807fdc0 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 4 May 2011 12:50:16 -0700 Subject: i965/vs: Start adding support for uniforms There's no clever packing here, no pull constants, and no array support. --- src/mesa/drivers/dri/i965/brw_context.h | 22 +++- src/mesa/drivers/dri/i965/brw_curbe.c | 27 ++-- src/mesa/drivers/dri/i965/brw_vec4.h | 5 + src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 73 ++++++++--- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 150 ++++++++++++++++++++++- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 2 +- src/mesa/drivers/dri/i965/gen6_vs_state.c | 21 +++- src/mesa/drivers/dri/i965/gen6_wm_state.c | 2 +- src/mesa/drivers/dri/i965/gen7_wm_state.c | 2 +- 9 files changed, 256 insertions(+), 48 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 7b6b64c1a5c..4a1abd6252e 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -248,6 +248,7 @@ enum param_conversion { PARAM_CONVERT_F2I, PARAM_CONVERT_F2U, PARAM_CONVERT_F2B, + PARAM_CONVERT_ZERO, }; /* Data about a particular attempt to compile a program. Note that @@ -317,6 +318,13 @@ struct brw_vs_prog_data { /* Used for calculating urb partitions: */ GLuint urb_entry_size; + + const float *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */ + enum param_conversion param_convert[MAX_UNIFORMS * 4]; + const float *pull_param[MAX_UNIFORMS * 4]; + enum param_conversion pull_param_convert[MAX_UNIFORMS * 4]; + + bool uses_new_param_layout; }; @@ -898,7 +906,7 @@ brw_fragment_program_const(const struct gl_fragment_program *p) } static inline -float convert_param(enum param_conversion conversion, float param) +float convert_param(enum param_conversion conversion, const float *param) { union { float f; @@ -908,21 +916,23 @@ float convert_param(enum param_conversion conversion, float param) switch (conversion) { case PARAM_NO_CONVERT: - return param; + return *param; case PARAM_CONVERT_F2I: - fi.i = param; + fi.i = *param; return fi.f; case PARAM_CONVERT_F2U: - fi.u = param; + fi.u = *param; return fi.f; case PARAM_CONVERT_F2B: - if (param != 0.0) + if (*param != 0.0) fi.i = 1; else fi.i = 0; return fi.f; + case PARAM_CONVERT_ZERO: + return 0.0; default: - return param; + return *param; } } diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index ae11c487a2c..960be10006e 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -203,7 +203,7 @@ static void prepare_constant_buffer(struct brw_context *brw) /* copy float constants */ for (i = 0; i < brw->wm.prog_data->nr_params; i++) { buf[offset + i] = convert_param(brw->wm.prog_data->param_convert[i], - *brw->wm.prog_data->param[i]); + brw->wm.prog_data->param[i]); } } @@ -244,15 +244,22 @@ static void prepare_constant_buffer(struct brw_context *brw) GLuint offset = brw->curbe.vs_start * 16; GLuint nr = brw->vs.prog_data->nr_params / 4; - /* Load the subset of push constants that will get used when - * we also have a pull constant buffer. - */ - for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { - if (brw->vs.constant_map[i] != -1) { - assert(brw->vs.constant_map[i] <= nr); - memcpy(buf + offset + brw->vs.constant_map[i] * 4, - vp->program.Base.Parameters->ParameterValues[i], - 4 * sizeof(float)); + if (brw->vs.prog_data->uses_new_param_layout) { + for (i = 0; i < brw->vs.prog_data->nr_params; i++) { + buf[offset + i] = convert_param(brw->vs.prog_data->param_convert[i], + brw->vs.prog_data->param[i]); + } + } else { + /* Load the subset of push constants that will get used when + * we also have a pull constant buffer. + */ + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { + if (brw->vs.constant_map[i] != -1) { + assert(brw->vs.constant_map[i] <= nr); + memcpy(buf + offset + brw->vs.constant_map[i] * 4, + vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + } } } } diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 10168fc1cb0..01058243f04 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -356,6 +356,8 @@ public: * for the ir->location's used. */ dst_reg output_reg[VERT_RESULT_MAX]; + int uniform_size[MAX_UNIFORMS]; + int uniforms; struct hash_table *variable_ht; @@ -363,7 +365,10 @@ public: void fail(const char *msg, ...); int virtual_grf_alloc(int size); + int setup_uniform_values(int loc, const glsl_type *type); + void setup_builtin_uniform_values(ir_variable *ir); int setup_attributes(int payload_reg); + int setup_uniforms(int payload_reg); void setup_payload(); void reg_allocate_trivial(); void reg_allocate(); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index bdc7a79d83d..1f2853e1118 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -67,20 +67,12 @@ vec4_visitor::setup_attributes(int payload_reg) prog_data->urb_read_length = (nr_attributes + 1) / 2; - return nr_attributes; + return payload_reg + nr_attributes; } -void -vec4_visitor::setup_payload(void) +int +vec4_visitor::setup_uniforms(int reg) { - int reg = 0; - - /* r0 is always reserved, as it contains the payload with the URB - * handles that are passed on to the URB write at the end of the - * thread. - */ - reg++; - /* User clip planes from curbe: */ if (c->key.nr_userclip) { @@ -99,14 +91,49 @@ vec4_visitor::setup_payload(void) } } - /* FINISHME: push constants */ + /* The pre-gen6 VS requires that some push constants get loaded no + * matter what, or the GPU would hang. + */ + if (this->uniforms == 0) { + this->uniform_size[this->uniforms] = 1; + + for (unsigned int i = 0; i < 4; i++) { + unsigned int slot = this->uniforms * 4 + i; + + c->prog_data.param[slot] = NULL; + c->prog_data.param_convert[slot] = PARAM_CONVERT_ZERO; + } + + this->uniforms++; + } else { + reg += ALIGN(uniforms, 2) / 2; + } + + /* for now, we are not doing any elimination of unused slots, nor + * are we packing our uniforms. + */ + c->prog_data.nr_params = this->uniforms * 4; + c->prog_data.curb_read_length = reg - 1; - c->prog_data.nr_params = 0; - /* XXX 0 causes a bug elsewhere... */ - if (intel->gen < 6 && c->prog_data.nr_params == 0) - c->prog_data.nr_params = 4; + c->prog_data.uses_new_param_layout = true; + + return reg; +} + +void +vec4_visitor::setup_payload(void) +{ + int reg = 0; + + /* The payload always contains important data in g0, which contains + * the URB handles that are passed on to the URB write at the end + * of the thread. So, we always start push constants at g1. + */ + reg++; - reg += setup_attributes(reg); + reg = setup_uniforms(reg); + + reg = setup_attributes(reg); this->first_non_payload_grf = reg; } @@ -174,6 +201,18 @@ vec4_instruction::get_src(int i) } break; + case UNIFORM: + brw_reg = stride(brw_vec4_grf(1 + (src[i].reg + src[i].reg_offset) / 2, + ((src[i].reg + src[i].reg_offset) % 2) * 4), + 0, 4, 1); + brw_reg = retype(brw_reg, src[i].type); + brw_reg.dw1.bits.swizzle = src[i].swizzle; + if (src[i].abs) + brw_reg = brw_abs(brw_reg); + if (src[i].negate) + brw_reg = negate(brw_reg); + break; + case HW_REG: brw_reg = src[i].fixed_hw_reg; break; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index bba1d810f19..91abd40faad 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -22,7 +22,10 @@ */ #include "brw_vec4.h" +extern "C" { #include "main/macros.h" +#include "program/prog_parameter.h" +} namespace brw { @@ -306,6 +309,130 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) this->type = brw_type_for_base_type(type); } +/* Our support for uniforms is piggy-backed on the struct + * gl_fragment_program, because that's where the values actually + * get stored, rather than in some global gl_shader_program uniform + * store. + */ +int +vec4_visitor::setup_uniform_values(int loc, const glsl_type *type) +{ + unsigned int offset = 0; + float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f; + + if (type->is_matrix()) { + const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, + type->vector_elements, + 1); + + for (unsigned int i = 0; i < type->matrix_columns; i++) { + offset += setup_uniform_values(loc + offset, column); + } + + return offset; + } + + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_BOOL: + for (unsigned int i = 0; i < type->vector_elements; i++) { + int slot = this->uniforms * 4 + i; + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; + break; + case GLSL_TYPE_UINT: + c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U; + break; + case GLSL_TYPE_INT: + c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I; + break; + case GLSL_TYPE_BOOL: + c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B; + break; + default: + assert(!"not reached"); + c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; + break; + } + c->prog_data.param[slot] = &values[i]; + } + + for (unsigned int i = type->vector_elements; i < 4; i++) { + c->prog_data.param_convert[this->uniforms * 4 + i] = + PARAM_CONVERT_ZERO; + c->prog_data.param[this->uniforms * 4 + i] = NULL; + } + + this->uniform_size[this->uniforms] = type->vector_elements; + this->uniforms++; + + return 1; + + case GLSL_TYPE_STRUCT: + for (unsigned int i = 0; i < type->length; i++) { + offset += setup_uniform_values(loc + offset, + type->fields.structure[i].type); + } + return offset; + + case GLSL_TYPE_ARRAY: + for (unsigned int i = 0; i < type->length; i++) { + offset += setup_uniform_values(loc + offset, type->fields.array); + } + return offset; + + case GLSL_TYPE_SAMPLER: + /* The sampler takes up a slot, but we don't use any values from it. */ + return 1; + + default: + assert(!"not reached"); + return 0; + } +} + +/* Our support for builtin uniforms is even scarier than non-builtin. + * It sits on top of the PROG_STATE_VAR parameters that are + * automatically updated from GL context state. + */ +void +vec4_visitor::setup_builtin_uniform_values(ir_variable *ir) +{ + const ir_state_slot *const slots = ir->state_slots; + assert(ir->state_slots != NULL); + + for (unsigned int i = 0; i < ir->num_state_slots; i++) { + /* This state reference has already been setup by ir_to_mesa, + * but we'll get the same index back here. We can reference + * ParameterValues directly, since unlike brw_fs.cpp, we never + * add new state references during compile. + */ + int index = _mesa_add_state_reference(this->vp->Base.Parameters, + (gl_state_index *)slots[i].tokens); + float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f; + + this->uniform_size[this->uniforms] = 0; + /* Add each of the unique swizzled channels of the element. + * This will end up matching the size of the glsl_type of this field. + */ + int last_swiz = -1; + for (unsigned int j = 0; j < 4; j++) { + int swiz = GET_SWZ(slots[i].swizzle, j); + if (swiz == last_swiz) + break; + last_swiz = swiz; + + c->prog_data.param[this->uniforms * 4 + j] = &values[swiz]; + c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT; + this->uniform_size[this->uniforms]++; + } + this->uniforms++; + } +} + dst_reg * vec4_visitor::variable_storage(ir_variable *var) { @@ -496,13 +623,10 @@ vec4_visitor::visit(ir_variable *ir) switch (ir->mode) { case ir_var_in: reg = new(mem_ctx) dst_reg(ATTR, ir->location); - reg->type = brw_type_for_base_type(ir->type); - hash_table_insert(this->variable_ht, reg, ir); break; case ir_var_out: reg = new(mem_ctx) dst_reg(this, ir->type); - hash_table_insert(this->variable_ht, reg, ir); for (int i = 0; i < type_size(ir->type); i++) { output_reg[ir->location + i] = *reg; @@ -512,14 +636,21 @@ vec4_visitor::visit(ir_variable *ir) case ir_var_temporary: reg = new(mem_ctx) dst_reg(this, ir->type); - hash_table_insert(this->variable_ht, reg, ir); - break; case ir_var_uniform: - /* FINISHME: uniforms */ + reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms); + + if (!strncmp(ir->name, "gl_", 3)) { + setup_builtin_uniform_values(ir); + } else { + setup_uniform_values(ir->location, ir->type); + } break; } + + reg->type = brw_type_for_base_type(ir->type); + hash_table_insert(this->variable_ht, reg, ir); } void @@ -1606,6 +1737,7 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c, this->current_annotation = NULL; this->c = c; + this->vp = brw->vertex_program; /* FINISHME: change for precompile */ this->prog_data = &c->prog_data; this->variable_ht = hash_table_ctor(0, @@ -1615,6 +1747,12 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c, this->virtual_grf_sizes = NULL; this->virtual_grf_count = 0; this->virtual_grf_array_size = 0; + + this->uniforms = 0; + + this->variable_ht = hash_table_ctor(0, + hash_table_pointer_hash, + hash_table_pointer_compare); } vec4_visitor::~vec4_visitor() diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index fb4fb146f8d..ad909789d82 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -342,7 +342,7 @@ prepare_wm_pull_constants(struct brw_context *brw) constants = brw->wm.const_bo->virtual; for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) { constants[i] = convert_param(brw->wm.prog_data->pull_param_convert[i], - *brw->wm.prog_data->pull_param[i]); + brw->wm.prog_data->pull_param[i]); } drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo); diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index e70454416bf..affa72c7324 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -81,12 +81,21 @@ gen6_prepare_vs_push_constants(struct brw_context *brw) params_uploaded++; } - for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { - if (brw->vs.constant_map[i] != -1) { - memcpy(param + brw->vs.constant_map[i] * 4, - vp->program.Base.Parameters->ParameterValues[i], - 4 * sizeof(float)); - params_uploaded++; + if (brw->vs.prog_data->uses_new_param_layout) { + for (i = 0; i < brw->vs.prog_data->nr_params; i++) { + *param = convert_param(brw->vs.prog_data->param_convert[i], + brw->vs.prog_data->param[i]); + param++; + } + params_uploaded += brw->vs.prog_data->nr_params / 4; + } else { + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { + if (brw->vs.constant_map[i] != -1) { + memcpy(param + brw->vs.constant_map[i] * 4, + vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + params_uploaded++; + } } } diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 3d525248f25..07e9995f53b 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -61,7 +61,7 @@ gen6_prepare_wm_push_constants(struct brw_context *brw) for (i = 0; i < brw->wm.prog_data->nr_params; i++) { constants[i] = convert_param(brw->wm.prog_data->param_convert[i], - *brw->wm.prog_data->param[i]); + brw->wm.prog_data->param[i]); } if (0) { diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index a102ca772b3..1d80e96778e 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -58,7 +58,7 @@ gen7_prepare_wm_constants(struct brw_context *brw) for (i = 0; i < brw->wm.prog_data->nr_params; i++) { constants[i] = convert_param(brw->wm.prog_data->param_convert[i], - *brw->wm.prog_data->param[i]); + brw->wm.prog_data->param[i]); } if (0) { -- cgit v1.2.3 From 83d5850518388202c5589d3181b84fb54c213fb1 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 16:18:00 -0700 Subject: i965/vs: Fix constant vector construction. Fixes some issues noticed in glsl-vs-all-01. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 91abd40faad..8ee4884098c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1391,10 +1391,10 @@ vec4_visitor::visit(ir_constant *ir) return; } - for (int i = 0; i < ir->type->vector_elements; i++) { - this->result = src_reg(this, ir->type); - dst_reg dst = dst_reg(this->result); + this->result = src_reg(this, ir->type); + dst_reg dst = dst_reg(this->result); + for (int i = 0; i < ir->type->vector_elements; i++) { dst.writemask = 1 << i; switch (ir->type->base_type) { -- cgit v1.2.3 From 4a4857246c79c42d918a84d7e28e9afff3a9ef6d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 16:23:42 -0700 Subject: i965/vs: Port the fix for clip plane writemasks from brw_vs_emit.c. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 8ee4884098c..439969ab7ea 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1618,7 +1618,7 @@ vec4_visitor::emit_vue_header_gen6(int header_mrf) m = brw_message_reg(header_mrf + 1); emit(BRW_OPCODE_DP4, - dst_reg(brw_writemask(m, 1 << (i & 7))), + dst_reg(brw_writemask(m, 1 << (i & 3))), src_reg(c->userplane[i])); } header_mrf += 2; -- cgit v1.2.3 From c0f334a3ed3c6645abd1812e39cd52f1dfa32fa1 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 16:29:48 -0700 Subject: i965/vs: Don't emit an extra copy of the vertex position. Fixes glsl-vs-abs-neg, glsl-vs-all-01, and probably many other tests. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 439969ab7ea..c4a3c8a8667 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1550,7 +1550,7 @@ vec4_visitor::emit_vue_header_gen4(int header_mrf) * dword 8-11 (m3) of the vertex header is the 4D space position * dword 12-19 (m4,m5) of the vertex header is the user clip distance. * m6 is a pad so that the vertex element data is aligned - * m7 is the first vertex data we fill, which is the vertex position. + * m7 is the first vertex data we fill. */ current_annotation = "NDC"; emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); @@ -1561,15 +1561,14 @@ vec4_visitor::emit_vue_header_gen4(int header_mrf) /* user clip distance. */ header_mrf += 2; - /* Pad so that vertex element data (starts with position) is aligned. */ + /* Pad so that vertex element data is aligned. */ header_mrf++; } else { /* There are 8 dwords in VUE header pre-Ironlake: * dword 0-3 (m1) is indices, point width, clip flags. * dword 4-7 (m2) is ndc position (set above) * - * dword 8-11 (m3) is the first vertex data, which we always have be the - * vertex position. + * dword 8-11 (m3) is the first vertex data. */ current_annotation = "NDC"; emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); @@ -1592,8 +1591,7 @@ vec4_visitor::emit_vue_header_gen6(int header_mrf) * dword 8-15 (m4,m5) of the vertex header is the user clip distance if * enabled. * - * m4 or 6 is the first vertex element data we fill, which is - * the vertex position. + * m4 or 6 is the first vertex element data we fill. */ current_annotation = "indices, point width, clip flags"; @@ -1681,6 +1679,10 @@ vec4_visitor::emit_urb_writes() if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) continue; + /* This is set up in the VUE header. */ + if (attr == VERT_RESULT_HPOS) + continue; + /* This is loaded into the VUE header, and thus doesn't occupy * an attribute slot. */ -- cgit v1.2.3 From 82aa9299fbfe92d2526fa9f7ffd2a1ebc7827ee9 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 16:31:30 -0700 Subject: i965/vs: Allow scalar values in assignments, too. Fixes glsl-vs-all-02 and many other tests. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index c4a3c8a8667..e3779ab0444 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1285,7 +1285,8 @@ vec4_visitor::visit(ir_assignment *ir) int first_enabled_chan = 0; int src_chan = 0; - assert(ir->lhs->type->is_vector()); + assert(ir->lhs->type->is_vector() || + ir->lhs->type->is_scalar()); dst.writemask = ir->write_mask; for (int i = 0; i < 4; i++) { -- cgit v1.2.3 From 78fac1892a3a7a90eb7baf78903d70649028d27a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 16:35:24 -0700 Subject: i965/vs: Allocate storage for "auto" variables just like temps. Fixes segfault in glsl-vs-cross-2. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index e3779ab0444..3e62c9ebba8 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -634,6 +634,7 @@ vec4_visitor::visit(ir_variable *ir) } break; + case ir_var_auto: case ir_var_temporary: reg = new(mem_ctx) dst_reg(this, ir->type); break; @@ -647,6 +648,9 @@ vec4_visitor::visit(ir_variable *ir) setup_uniform_values(ir->location, ir->type); } break; + + default: + assert(!"not reached"); } reg->type = brw_type_for_base_type(ir->type); -- cgit v1.2.3 From bb468fc1ede9b0a5231ebfaa51df444502d33654 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 16:37:18 -0700 Subject: i965/vs: Fix ir_swizzle handling. I decided to refactor it a bit in adapting ir_to_mesa.cpp code, and mangled it. Fixes glsl-vs-cross-2. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 3e62c9ebba8..4f2a2011068 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1109,7 +1109,7 @@ vec4_visitor::visit(ir_swizzle *ir) src = this->result; assert(src.file != BAD_FILE); - if (i < ir->type->vector_elements) { + for (i = 0; i < ir->type->vector_elements; i++) { switch (i) { case 0: swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x); -- cgit v1.2.3 From aa753c5a14637ede804e8043762693122174bf8c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 19:05:42 -0700 Subject: i965/vs: Disable loops for now until rendering is generally correct. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 4f2a2011068..c3b55db4ac1 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -662,6 +662,8 @@ vec4_visitor::visit(ir_loop *ir) { ir_dereference_variable *counter = NULL; + fail("not yet\n"); + /* We don't want debugging output to print the whole body of the * loop as the annotation. */ -- cgit v1.2.3 From 164ccd27787e0df4ae6f85a7178aff0720d56ac9 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 19:12:16 -0700 Subject: i965/vs: Fix support for "IF" instructions by copying brw_fs_visitor.cpp. Fixes glsl-vs-if-greater. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index c3b55db4ac1..014f7e62284 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1451,12 +1451,18 @@ vec4_visitor::visit(ir_discard *ir) void vec4_visitor::visit(ir_if *ir) { + /* Don't point the annotation at the if statement, because then it plus + * the then and else blocks get printed. + */ this->base_ir = ir->condition; - ir->condition->accept(this); - assert(this->result.file != BAD_FILE); - /* FINISHME: condcode */ - emit(BRW_OPCODE_IF); + if (intel->gen == 6) { + emit_if_gen6(ir); + } else { + emit_bool_to_cond_code(ir->condition); + vec4_instruction *inst = emit(BRW_OPCODE_IF); + inst->predicate = BRW_PREDICATE_NORMAL; + } visit_instructions(&ir->then_instructions); -- cgit v1.2.3 From eca762d831e099b549dafa0be896eac82b3fceb9 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 19:18:31 -0700 Subject: i965/vs: Fix support for zero uniforms in use. We were looking for attributes in the wrong place, and pointlessly doing the work on gen6 at all. --- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 1f2853e1118..be089369bcf 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -94,7 +94,7 @@ vec4_visitor::setup_uniforms(int reg) /* The pre-gen6 VS requires that some push constants get loaded no * matter what, or the GPU would hang. */ - if (this->uniforms == 0) { + if (intel->gen < 6 && this->uniforms == 0) { this->uniform_size[this->uniforms] = 1; for (unsigned int i = 0; i < 4; i++) { @@ -105,6 +105,7 @@ vec4_visitor::setup_uniforms(int reg) } this->uniforms++; + reg++; } else { reg += ALIGN(uniforms, 2) / 2; } -- cgit v1.2.3 From e5363c7fd2ed6318e86ba4a62adc0c2377e51eef Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 19:29:41 -0700 Subject: i965/vs: Use an appropriate swizzle on src regs from variables. Fixes glsl-vs-if-bool. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 014f7e62284..734e2514536 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1140,6 +1140,7 @@ vec4_visitor::visit(ir_swizzle *ir) void vec4_visitor::visit(ir_dereference_variable *ir) { + const struct glsl_type *type = ir->type; dst_reg *reg = variable_storage(ir->var); if (!reg) { @@ -1149,6 +1150,9 @@ vec4_visitor::visit(ir_dereference_variable *ir) } this->result = src_reg(*reg); + + if (type->is_scalar() || type->is_vector() || type->is_matrix()) + this->result.swizzle = swizzle_for_size(type->vector_elements); } void -- cgit v1.2.3 From 814a9bef30beda427e8fbf6f3b8abb6a45f0e2e4 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 19:31:53 -0700 Subject: i965/vs: Drop the assertion about dst.reg_offset == 0. Adding the offset is the right thing to do here, and fixes glsl-vs-mat-add-1. --- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index be089369bcf..a41c58c7d52 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -146,7 +146,6 @@ vec4_instruction::get_dst(void) switch (dst.file) { case GRF: - assert(dst.reg_offset == 0); brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0); brw_reg = retype(brw_reg, dst.type); brw_reg.dw1.bits.writemask = dst.writemask; -- cgit v1.2.3 From 8e947c2546c25c0dfa93b538e54113af1bf582df Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 19:38:44 -0700 Subject: i965/vs: Fix the types of array/struct dereferences. Fixes glsl-vs-arrays-3. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 734e2514536..b6f3cbc265f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1198,6 +1198,7 @@ vec4_visitor::visit(ir_dereference_array *ir) src.swizzle = swizzle_for_size(ir->type->vector_elements); else src.swizzle = BRW_SWIZZLE_NOOP; + src.type = brw_type_for_base_type(ir->type); this->result = src; } @@ -1222,6 +1223,7 @@ vec4_visitor::visit(ir_dereference_record *ir) this->result.swizzle = swizzle_for_size(ir->type->vector_elements); else this->result.swizzle = BRW_SWIZZLE_NOOP; + this->result.type = brw_type_for_base_type(ir->type); this->result.reg_offset += offset; } -- cgit v1.2.3 From c3752b399ab376aa53392afb8f2d4b526054f0a8 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 19:40:46 -0700 Subject: i965/vs: Add support for dot product opcodes. Fixes glsl-vs-dot-vec2. --- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index a41c58c7d52..71caf907b38 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -447,6 +447,18 @@ vec4_visitor::generate_code() brw_SEL(p, dst, src[0], src[1]); break; + case BRW_OPCODE_DP4: + brw_DP4(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_DP3: + brw_DP3(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_DP2: + brw_DP2(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_IF: if (inst->src[0].file != BAD_FILE) { /* The instruction has an embedded compare (only allowed on gen6) */ -- cgit v1.2.3 From 2b7632aeaa5f8b4ab3da7d33a3c71c71023a072a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 20:03:31 -0700 Subject: i965/vs: Add support for if(any_nequal()) and if(all_equal()) on gen6. Fixes vs-temp-array-mat2-col-rd.shader_test. --- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 3 ++- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 23 ++++++++++++++++++++--- 2 files changed, 22 insertions(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 71caf907b38..bc3110b0458 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -465,7 +465,8 @@ vec4_visitor::generate_code() assert(intel->gen == 6); gen6_IF(p, inst->conditional_mod, src[0], src[1]); } else { - brw_IF(p, BRW_EXECUTE_8); + struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8); + brw_inst->header.predicate_control = inst->predicate; } if_depth_in_loop[loop_stack_depth]++; break; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index b6f3cbc265f..4237373c13d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -543,7 +543,9 @@ vec4_visitor::emit_if_gen6(ir_if *ir) assert(expr->get_num_operands() <= 2); for (unsigned int i = 0; i < expr->get_num_operands(); i++) { - assert(expr->operands[i]->type->is_scalar()); + assert(expr->operands[i]->type->is_scalar() || + expr->operation == ir_binop_any_nequal || + expr->operation == ir_binop_all_equal); expr->operands[i]->accept(this); op[i] = this->result; @@ -589,13 +591,28 @@ vec4_visitor::emit_if_gen6(ir_if *ir) case ir_binop_less: case ir_binop_lequal: case ir_binop_equal: - case ir_binop_all_equal: case ir_binop_nequal: - case ir_binop_any_nequal: inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); inst->conditional_mod = brw_conditional_for_comparison(expr->operation); return; + + case ir_binop_all_equal: + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_Z; + + inst = emit(BRW_OPCODE_IF); + inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; + return; + + case ir_binop_any_nequal: + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + inst = emit(BRW_OPCODE_IF); + inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; + return; + default: assert(!"not reached"); inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); -- cgit v1.2.3 From cda28bca0d789c328d19bf90afd35a5ff74cfb77 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 20:16:21 -0700 Subject: i965/vs: Apply the gen6 math workaround for math1 instructions. Fixes glsl-vs-masked-cos. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 4237373c13d..7e0535b5c02 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -122,14 +122,12 @@ vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src) { /* The gen6 math instruction ignores the source modifiers -- * swizzle, abs, negate, and at least some parts of the register - * region description. Move the source to the corresponding slots - * of the destination generally work. + * region description. */ - src_reg expanded = src_reg(this, glsl_type::float_type); - emit(BRW_OPCODE_MOV, dst, src); - src = expanded; + src_reg temp_src = src_reg(this, glsl_type::vec4_type); + emit(BRW_OPCODE_MOV, dst_reg(temp_src), src); - emit(opcode, dst, src); + emit(opcode, dst, temp_src); } void -- cgit v1.2.3 From 930afd1774bdcd013bccbd7b5717ae0bb8e3dea3 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 20:26:48 -0700 Subject: i965/vs: Don't forget to set up assignment condition code for arrays/structs. Fixes vs-uniform-array-mat2-index-col-rd. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 7e0535b5c02..27620c47085 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1276,6 +1276,10 @@ vec4_visitor::emit_block_move(ir_assignment *ir) dst_reg dst = get_assignment_lhs(ir->lhs, this); + if (ir->condition) { + emit_bool_to_cond_code(ir->condition); + } + /* FINISHME: This should really set to the correct maximal writemask for each * FINISHME: component written (in the loops below). */ -- cgit v1.2.3 From 979072613139870f12e329e4b483c7f688b40560 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 20:46:03 -0700 Subject: i965/vs: Handle assignment of structures/arrays/matrices better. This gets the right types on the instructions, as well as emitting minimal swizzles/writemasks. --- src/mesa/drivers/dri/i965/brw_vec4.h | 3 +- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 69 ++++++++++++++++++-------- 2 files changed, 51 insertions(+), 21 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 01058243f04..1619c2e1ef6 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -390,7 +390,8 @@ public: void emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1); void emit_if_gen6(ir_if *ir); - void emit_block_move(ir_assignment *ir); + void emit_block_move(dst_reg *dst, src_reg *src, + const struct glsl_type *type, bool predicated); /** * Emit the correct dot-product instruction for the type of arguments diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 27620c47085..4f7763d61bb 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1269,38 +1269,69 @@ get_assignment_lhs(ir_dereference *ir, vec4_visitor *v) } void -vec4_visitor::emit_block_move(ir_assignment *ir) +vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src, + const struct glsl_type *type, bool predicated) { - ir->rhs->accept(this); - src_reg src = this->result; - - dst_reg dst = get_assignment_lhs(ir->lhs, this); + if (type->base_type == GLSL_TYPE_STRUCT) { + for (unsigned int i = 0; i < type->length; i++) { + emit_block_move(dst, src, type->fields.structure[i].type, predicated); + } + return; + } - if (ir->condition) { - emit_bool_to_cond_code(ir->condition); + if (type->is_array()) { + for (unsigned int i = 0; i < type->length; i++) { + emit_block_move(dst, src, type->fields.array, predicated); + } + return; } - /* FINISHME: This should really set to the correct maximal writemask for each - * FINISHME: component written (in the loops below). - */ - dst.writemask = WRITEMASK_XYZW; + if (type->is_matrix()) { + const struct glsl_type *vec_type; - for (int i = 0; i < type_size(ir->lhs->type); i++) { - vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src); - if (ir->condition) - inst->predicate = BRW_PREDICATE_NORMAL; + vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, + type->vector_elements, 1); - dst.reg_offset++; - src.reg_offset++; + for (int i = 0; i < type->matrix_columns; i++) { + emit_block_move(dst, src, vec_type, predicated); + } + return; } + + assert(type->is_scalar() || type->is_vector()); + + dst->type = brw_type_for_base_type(type); + src->type = dst->type; + + dst->writemask = (1 << type->vector_elements) - 1; + + /* Do we need to worry about swizzling a swizzle? */ + assert(src->swizzle = BRW_SWIZZLE_NOOP); + src->swizzle = swizzle_for_size(type->vector_elements); + + vec4_instruction *inst = emit(BRW_OPCODE_MOV, *dst, *src); + if (predicated) + inst->predicate = BRW_PREDICATE_NORMAL; + + dst->reg_offset++; + src->reg_offset++; } void vec4_visitor::visit(ir_assignment *ir) { + dst_reg dst = get_assignment_lhs(ir->lhs, this); + if (!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()) { - emit_block_move(ir); + ir->rhs->accept(this); + src_reg src = this->result; + + if (ir->condition) { + emit_bool_to_cond_code(ir->condition); + } + + emit_block_move(&dst, &src, ir->rhs->type, ir->condition != NULL); return; } @@ -1310,8 +1341,6 @@ vec4_visitor::visit(ir_assignment *ir) ir->rhs->accept(this); src_reg src = this->result; - dst_reg dst = get_assignment_lhs(ir->lhs, this); - int swizzles[4]; int first_enabled_chan = 0; int src_chan = 0; -- cgit v1.2.3 From aba9801996f2f524a765df378c234a7645b3a5d1 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 20:54:25 -0700 Subject: i965/vs: Fix types of varying outputs. For structs/arrays/matrices, they were ending up as uint because we forgot to set them. All varyings in GLSL 1.20 are of base type float, so just force the matter here (which gets inherited at emit_urb_writes() time). Fixes vs-varying-array-mat2-col-rd. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 4f7763d61bb..5e2b3e5a5fe 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -646,6 +646,7 @@ vec4_visitor::visit(ir_variable *ir) for (int i = 0; i < type_size(ir->type); i++) { output_reg[ir->location + i] = *reg; output_reg[ir->location + i].reg_offset = i; + output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F; } break; -- cgit v1.2.3 From 31ef2e3ec2f5837eea0899b4bda5ea15e335a6a2 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 21:22:36 -0700 Subject: i965/vs: Avoid generating extra moves when setting up large ir_constants. We were also screwing up the types in the process, and just not emitting moves was easier. --- src/mesa/drivers/dri/i965/brw_vec4.h | 2 + src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 77 +++++++++----------------- 2 files changed, 28 insertions(+), 51 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 1619c2e1ef6..3e457fc61aa 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -393,6 +393,8 @@ public: void emit_block_move(dst_reg *dst, src_reg *src, const struct glsl_type *type, bool predicated); + void emit_constant_values(dst_reg *dst, ir_constant *value); + /** * Emit the correct dot-product instruction for the type of arguments */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 5e2b3e5a5fe..3562779413f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1387,96 +1387,71 @@ vec4_visitor::visit(ir_assignment *ir) } } - void -vec4_visitor::visit(ir_constant *ir) +vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir) { if (ir->type->base_type == GLSL_TYPE_STRUCT) { - src_reg temp_base = src_reg(this, ir->type); - dst_reg temp = dst_reg(temp_base); - - foreach_iter(exec_list_iterator, iter, ir->components) { - ir_constant *field_value = (ir_constant *)iter.get(); - int size = type_size(field_value->type); - - assert(size > 0); - - field_value->accept(this); - src_reg src = this->result; - - for (int i = 0; i < (unsigned int)size; i++) { - emit(BRW_OPCODE_MOV, temp, src); + foreach_list(node, &ir->components) { + ir_constant *field_value = (ir_constant *)node; - src.reg_offset++; - temp.reg_offset++; - } + emit_constant_values(dst, field_value); } - this->result = temp_base; return; } if (ir->type->is_array()) { - src_reg temp_base = src_reg(this, ir->type); - dst_reg temp = dst_reg(temp_base); - int size = type_size(ir->type->fields.array); - - assert(size > 0); - for (unsigned int i = 0; i < ir->type->length; i++) { - ir->array_elements[i]->accept(this); - src_reg src = this->result; - for (int j = 0; j < size; j++) { - emit(BRW_OPCODE_MOV, temp, src); - - src.reg_offset++; - temp.reg_offset++; - } + emit_constant_values(dst, ir->array_elements[i]); } - this->result = temp_base; return; } if (ir->type->is_matrix()) { - this->result = src_reg(this, ir->type); - dst_reg dst = dst_reg(this->result); - - assert(ir->type->base_type == GLSL_TYPE_FLOAT); - for (int i = 0; i < ir->type->matrix_columns; i++) { for (int j = 0; j < ir->type->vector_elements; j++) { - dst.writemask = 1 << j; - emit(BRW_OPCODE_MOV, dst, + dst->writemask = 1 << j; + dst->type = BRW_REGISTER_TYPE_F; + + emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.f[i * ir->type->vector_elements + j])); } - dst.reg_offset++; + dst->reg_offset++; } return; } - this->result = src_reg(this, ir->type); - dst_reg dst = dst_reg(this->result); - for (int i = 0; i < ir->type->vector_elements; i++) { - dst.writemask = 1 << i; + dst->writemask = 1 << i; + dst->type = brw_type_for_base_type(ir->type); switch (ir->type->base_type) { case GLSL_TYPE_FLOAT: - emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.f[i])); + emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.f[i])); break; case GLSL_TYPE_INT: - emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.i[i])); + emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.i[i])); break; case GLSL_TYPE_UINT: - emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.u[i])); + emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.u[i])); break; case GLSL_TYPE_BOOL: - emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.b[i])); + emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.b[i])); break; default: assert(!"Non-float/uint/int/bool constant"); break; } } + dst->reg_offset++; +} + +void +vec4_visitor::visit(ir_constant *ir) +{ + dst_reg dst = dst_reg(this, ir->type); + this->result = src_reg(dst); + + emit_constant_values(&dst, ir); } void -- cgit v1.2.3 From 160a5a3ff0fc826a2978c6bea6de21b445f612e9 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 21:53:00 -0700 Subject: i965/vs: Add support for VUEs larger than a single URB write. Fixes glsl-max-varyings. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 42 +++++++++++++++++++++----- 1 file changed, 34 insertions(+), 8 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 3562779413f..f90025c8e7e 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1703,6 +1703,7 @@ vec4_visitor::emit_urb_writes() int base_mrf = 1; int mrf = base_mrf; int urb_entry_size; + uint64_t outputs_remaining = c->prog_data.outputs_written; /* FINISHME: edgeflag */ @@ -1717,11 +1718,14 @@ vec4_visitor::emit_urb_writes() mrf = emit_vue_header_gen4(mrf); } + /* Set up the VUE data for the first URB write */ int attr; for (attr = 0; attr < VERT_RESULT_MAX; attr++) { if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) continue; + outputs_remaining &= ~BITFIELD64_BIT(attr); + /* This is set up in the VUE header. */ if (attr == VERT_RESULT_HPOS) continue; @@ -1734,27 +1738,49 @@ vec4_visitor::emit_urb_writes() emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); - /* If this is MRF 15, we can't fit anything more into this URB + /* If this was MRF 15, we can't fit anything more into this URB * WRITE. Note that base_mrf of 1 means that MRF 15 is an * even-numbered amount of URB write data, which will meet * gen6's requirements for length alignment. */ - if (mrf == 15) + if (mrf == 16) { + attr++; break; + } } vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); inst->base_mrf = base_mrf; inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); - inst->eot = true; + inst->eot = !outputs_remaining; urb_entry_size = mrf - base_mrf; - for (; attr < VERT_RESULT_MAX; attr++) { - if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) - continue; - fail("Second URB write not supported.\n"); - break; + /* Optional second URB write */ + if (outputs_remaining) { + mrf = base_mrf + 1; + + for (; attr < VERT_RESULT_MAX; attr++) { + if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) + continue; + + emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); + + assert(mrf != 16); + } + + inst = emit(VS_OPCODE_URB_WRITE); + inst->base_mrf = base_mrf; + inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); + inst->eot = true; + /* URB destination offset. In the previous write, we got MRFs 2- + * 15 MRFs minus the one header MRF, so 14 regs. URB offset is in + * URB row increments, and each of our MRFs is half of one of + * those, since we're doing interleaved writes. + */ + inst->offset = 14 / 2; + + urb_entry_size += mrf - base_mrf; } if (intel->gen == 6) -- cgit v1.2.3 From e355b179b2bd42a585464f17759764083fa3ef26 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 7 Aug 2011 10:43:49 -0700 Subject: i965: Remove dead brw->wm.max_threads field. --- src/mesa/drivers/dri/i965/brw_context.h | 1 - 1 file changed, 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 4a1abd6252e..38b13098bc0 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -735,7 +735,6 @@ struct brw_context GLuint render_surf; GLuint nr_surfaces; - GLuint max_threads; drm_intel_bo *scratch_bo; GLuint sampler_count; -- cgit v1.2.3 From 2b224d66a01f3ce867fb05558b25749705bbfe7a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 7 Aug 2011 10:44:15 -0700 Subject: i965: Set up allocation of a VS scratch space if required. --- src/mesa/drivers/dri/i965/brw_context.h | 6 ++++++ src/mesa/drivers/dri/i965/brw_program.c | 28 ++++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_vs.c | 9 +++++++++ src/mesa/drivers/dri/i965/brw_vs.h | 1 + src/mesa/drivers/dri/i965/brw_wm.c | 25 +++---------------------- 5 files changed, 47 insertions(+), 22 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 38b13098bc0..add8c568795 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -312,6 +312,7 @@ struct brw_vs_prog_data { GLuint total_grf; GLbitfield64 outputs_written; GLuint nr_params; /**< number of float params/constants */ + GLuint total_scratch; GLuint inputs_read; @@ -671,6 +672,7 @@ struct brw_context struct brw_vs_prog_data *prog_data; int8_t *constant_map; /* variable array following prog_data */ + drm_intel_bo *scratch_bo; drm_intel_bo *const_bo; /** Offset in the program cache to the VS program */ uint32_t prog_offset; @@ -858,6 +860,10 @@ void brw_validate_textures( struct brw_context *brw ); */ void brwInitFragProgFuncs( struct dd_function_table *functions ); +int brw_get_scratch_size(int size); +void brw_get_scratch_bo(struct intel_context *intel, + drm_intel_bo **scratch_bo, int size); + /* brw_urb.c */ diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 6674f1640c8..09b5be4c96e 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -226,6 +226,34 @@ static GLboolean brwProgramStringNotify( struct gl_context *ctx, return GL_TRUE; } +/* Per-thread scratch space is a power-of-two multiple of 1KB. */ +int +brw_get_scratch_size(int size) +{ + int i; + + for (i = 1024; i < size; i *= 2) + ; + + return i; +} + +void +brw_get_scratch_bo(struct intel_context *intel, + drm_intel_bo **scratch_bo, int size) +{ + drm_intel_bo *old_bo = *scratch_bo; + + if (old_bo && old_bo->size < size) { + drm_intel_bo_unreference(old_bo); + old_bo = NULL; + } + + if (!old_bo) { + *scratch_bo = drm_intel_bo_alloc(intel->bufmgr, "scratch bo", size, 4096); + } +} + void brwInitFragProgFuncs( struct dd_function_table *functions ) { assert(functions->ProgramStringNotify == _tnl_program_string); diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index bd0677db151..d389f602fba 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -45,6 +45,7 @@ static void do_vs_prog( struct brw_context *brw, struct brw_vs_prog_key *key ) { struct gl_context *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; GLuint program_size; const GLuint *program; struct brw_vs_compile c; @@ -97,6 +98,14 @@ static void do_vs_prog( struct brw_context *brw, brw_old_vs_emit(&c); } + /* Scratch space is used for register spilling */ + if (c.last_scratch) { + c.prog_data.total_scratch = brw_get_scratch_size(c.last_scratch); + + brw_get_scratch_bo(intel, &brw->vs.scratch_bo, + c.prog_data.total_scratch * brw->vs_max_threads); + } + /* get the program */ program = brw_get_program(&c.func, &program_size); diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 9f9fed33970..83a37f5b800 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -66,6 +66,7 @@ struct brw_vs_compile { GLuint first_output; GLuint nr_outputs; GLuint first_overflow_output; /**< VERT_ATTRIB_x */ + GLuint last_scratch; GLuint first_tmp; GLuint last_tmp; diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index d13ac6124c8..a4524fc7889 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -244,29 +244,10 @@ bool do_wm_prog(struct brw_context *brw, /* Scratch space is used for register spilling */ if (c->last_scratch) { - uint32_t total_scratch; + c->prog_data.total_scratch = brw_get_scratch_size(c->last_scratch); - /* Per-thread scratch space is power-of-two sized. */ - for (c->prog_data.total_scratch = 1024; - c->prog_data.total_scratch <= c->last_scratch; - c->prog_data.total_scratch *= 2) { - /* empty */ - } - total_scratch = c->prog_data.total_scratch * brw->wm_max_threads; - - if (brw->wm.scratch_bo && total_scratch > brw->wm.scratch_bo->size) { - drm_intel_bo_unreference(brw->wm.scratch_bo); - brw->wm.scratch_bo = NULL; - } - if (brw->wm.scratch_bo == NULL) { - brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr, - "wm scratch", - total_scratch, - 4096); - } - } - else { - c->prog_data.total_scratch = 0; + brw_get_scratch_bo(intel, &brw->vs.scratch_bo, + c->prog_data.total_scratch * brw->wm_max_threads); } if (unlikely(INTEL_DEBUG & DEBUG_WM)) -- cgit v1.2.3 From 314c2574ff6e562a6cfc5fb84980f092e495a948 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 7 Aug 2011 10:47:54 -0700 Subject: i965: Add remaining scratch space setup emit to unit states. --- src/mesa/drivers/dri/i965/brw_vs_state.c | 10 ++++++++++ src/mesa/drivers/dri/i965/gen6_vs_state.c | 10 +++++++++- src/mesa/drivers/dri/i965/gen7_vs_state.c | 10 +++++++++- src/mesa/drivers/dri/i965/gen7_wm_state.c | 8 +++++++- 4 files changed, 35 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index fc4373ab311..29b3e47ab0c 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -77,6 +77,16 @@ brw_prepare_vs_unit(struct brw_context *brw) else vs->thread1.binding_table_entry_count = brw->vs.nr_surfaces; + if (brw->vs.prog_data->total_scratch != 0) { + vs->thread2.scratch_space_base_pointer = + brw->vs.scratch_bo->offset >> 10; /* reloc */ + vs->thread2.per_thread_scratch_space = + ffs(brw->vs.prog_data->total_scratch) - 11; + } else { + vs->thread2.scratch_space_base_pointer = 0; + vs->thread2.per_thread_scratch_space = 0; + } + vs->thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length; vs->thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length; vs->thread3.dispatch_grf_start_reg = 1; diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index affa72c7324..b94121e8437 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -160,7 +160,15 @@ upload_vs_state(struct brw_context *brw) OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | GEN6_VS_FLOATING_POINT_MODE_ALT | (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - OUT_BATCH(0); /* scratch space base offset */ + + if (brw->vs.prog_data->total_scratch) { + OUT_RELOC(brw->vs.scratch_bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + ffs(brw->vs.prog_data->total_scratch) - 11); + } else { + OUT_BATCH(0); + } + OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) | (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index 0fad3d2fb68..f3cd5d15bf0 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -71,7 +71,15 @@ upload_vs_state(struct brw_context *brw) OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | GEN6_VS_FLOATING_POINT_MODE_ALT | (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - OUT_BATCH(0); /* scratch space base offset */ + + if (brw->vs.prog_data->total_scratch) { + OUT_RELOC(brw->vs.scratch_bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + ffs(brw->vs.prog_data->total_scratch) - 11); + } else { + OUT_BATCH(0); + } + OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) | (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 1d80e96778e..55a603e887a 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -228,7 +228,13 @@ upload_ps_state(struct brw_context *brw) OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); OUT_BATCH(brw->wm.prog_offset); OUT_BATCH(dw2); - OUT_BATCH(0); /* scratch space base offset */ + if (brw->wm.prog_data->total_scratch) { + OUT_RELOC(brw->wm.scratch_bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + ffs(brw->wm.prog_data->total_scratch) - 11); + } else { + OUT_BATCH(0); + } OUT_BATCH(dw4); OUT_BATCH(dw5); OUT_BATCH(0); /* kernel 1 pointer */ -- cgit v1.2.3 From 1ff4f11dd94711a498cde0330101c58636ef2741 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 7 Aug 2011 10:59:39 -0700 Subject: i965/vs: Track the variable index of array accesses. This isn't used currently, as we lower all array accesses. --- src/mesa/drivers/dri/i965/brw_vec4.h | 4 ++++ src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 16 ++++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 3e457fc61aa..bb40c71e4c9 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -169,6 +169,8 @@ public: GLuint swizzle; /**< SWIZZLE_XYZW swizzles from Mesa. */ bool negate; bool abs; + + src_reg *reladdr; }; class dst_reg : public reg @@ -219,6 +221,8 @@ public: explicit dst_reg(src_reg reg); int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ + + src_reg *reladdr; }; class vec4_instruction : public exec_node { diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index f90025c8e7e..8bd048ff459 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -37,6 +37,7 @@ src_reg::src_reg(dst_reg reg) this->reg = reg.reg; this->reg_offset = reg.reg_offset; this->type = reg.type; + this->reladdr = reg.reladdr; int swizzles[4]; int next_chan = 0; @@ -66,6 +67,7 @@ dst_reg::dst_reg(src_reg reg) this->reg_offset = reg.reg_offset; this->type = reg.type; this->writemask = WRITEMASK_XYZW; + this->reladdr = reg.reladdr; } vec4_instruction * @@ -1186,7 +1188,6 @@ vec4_visitor::visit(ir_dereference_array *ir) if (constant_index) { src.reg_offset += constant_index->value.i[0] * element_size; } else { -#if 0 /* Variable array index */ /* Variable index array dereference. It eats the "vec4" of the * base of the array and an index that offsets the Mesa register * index. @@ -1198,15 +1199,22 @@ vec4_visitor::visit(ir_dereference_array *ir) if (element_size == 1) { index_reg = this->result; } else { - index_reg = src_reg(this, glsl_type::float_type); + index_reg = src_reg(this, glsl_type::int_type); emit(BRW_OPCODE_MUL, dst_reg(index_reg), - this->result, src_reg_for_float(element_size)); + this->result, src_reg(element_size)); + } + + if (src.reladdr) { + src_reg temp = src_reg(this, glsl_type::int_type); + + emit(BRW_OPCODE_ADD, dst_reg(temp), *src.reladdr, index_reg); + + index_reg = temp; } src.reladdr = ralloc(mem_ctx, src_reg); memcpy(src.reladdr, &index_reg, sizeof(index_reg)); -#endif } /* If the type is smaller than a vec4, replicate the last channel out. */ -- cgit v1.2.3 From 758c3c2b4588f235def48b2f28c0479a70f7c194 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 7 Aug 2011 15:21:25 -0700 Subject: i965/vs: Reserve MRF 14/15 for array loads/register unspilling. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 8bd048ff459..e01318af1ab 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1708,10 +1708,18 @@ align_interleaved_urb_mlen(struct brw_context *brw, int mlen) void vec4_visitor::emit_urb_writes() { + /* MRF 0 is reserved for the debugger, so start with message header + * in MRF 1. + */ int base_mrf = 1; int mrf = base_mrf; int urb_entry_size; uint64_t outputs_remaining = c->prog_data.outputs_written; + /* In the process of generating our URB write message contents, we + * may need to unspill a register or load from an array. Those + * reads would use MRFs 14-15. + */ + int max_usable_mrf = 13; /* FINISHME: edgeflag */ @@ -1751,7 +1759,7 @@ vec4_visitor::emit_urb_writes() * even-numbered amount of URB write data, which will meet * gen6's requirements for length alignment. */ - if (mrf == 16) { + if (mrf > max_usable_mrf) { attr++; break; } @@ -1772,21 +1780,21 @@ vec4_visitor::emit_urb_writes() if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) continue; - emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); + assert(mrf < max_usable_mrf); - assert(mrf != 16); + emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); } inst = emit(VS_OPCODE_URB_WRITE); inst->base_mrf = base_mrf; inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); inst->eot = true; - /* URB destination offset. In the previous write, we got MRFs 2- - * 15 MRFs minus the one header MRF, so 14 regs. URB offset is in + /* URB destination offset. In the previous write, we got MRFs + * 2-13 minus the one header MRF, so 12 regs. URB offset is in * URB row increments, and each of our MRFs is half of one of * those, since we're doing interleaved writes. */ - inst->offset = 14 / 2; + inst->offset = (max_usable_mrf - base_mrf) / 2; urb_entry_size += mrf - base_mrf; } -- cgit v1.2.3 From d0e4d71070cd7fa197ed98612782484ec1f27123 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 7 Aug 2011 12:15:26 -0700 Subject: i965/vs: Move virtual GRFs with array accesses to them to scratch space. --- src/mesa/drivers/dri/i965/brw_defines.h | 2 + src/mesa/drivers/dri/i965/brw_vec4.h | 12 ++ src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 10 +- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 163 +++++++++++++++++++++++++ 4 files changed, 186 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index e3823c65d1a..b740d87c933 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -640,6 +640,8 @@ enum opcode { FS_OPCODE_PULL_CONSTANT_LOAD, VS_OPCODE_URB_WRITE, + VS_OPCODE_SCRATCH_READ, + VS_OPCODE_SCRATCH_WRITE, }; #define BRW_PREDICATE_NONE 0 diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index bb40c71e4c9..2f171b72049 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -376,6 +376,7 @@ public: void setup_payload(); void reg_allocate_trivial(); void reg_allocate(); + void move_grf_array_access_to_scratch(); vec4_instruction *emit(enum opcode opcode); @@ -424,6 +425,17 @@ public: int emit_vue_header_gen4(int header_mrf); void emit_urb_writes(void); + src_reg get_scratch_offset(vec4_instruction *inst, + src_reg *reladdr, int reg_offset); + void emit_scratch_read(vec4_instruction *inst, + dst_reg dst, + src_reg orig_src, + int base_offset); + void emit_scratch_write(vec4_instruction *inst, + src_reg temp, + dst_reg orig_dst, + int base_offset); + GLboolean try_emit_sat(ir_expression *ir); bool process_move_condition(ir_rvalue *ir); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index bc3110b0458..57eb467567e 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -321,7 +321,7 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, bool vec4_visitor::run() { - /* Generate FS IR for main(). (the visitor only descends into + /* Generate VS IR for main(). (the visitor only descends into * functions called "main"). */ foreach_iter(exec_list_iterator, iter, *shader->ir) { @@ -332,6 +332,14 @@ vec4_visitor::run() emit_urb_writes(); + /* Before any optimization, push array accesses out to scratch + * space where we need them to be. This pass may allocate new + * virtual GRFs, so we want to do it early. It also makes sure + * that we have reladdr computations available for CSE, since we'll + * often do repeated subexpressions for those. + */ + move_grf_array_access_to_scratch(); + if (failed) return false; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index e01318af1ab..049af6c3992 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1805,6 +1805,169 @@ vec4_visitor::emit_urb_writes() c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4; } +src_reg +vec4_visitor::get_scratch_offset(vec4_instruction *inst, + src_reg *reladdr, int reg_offset) +{ + /* Because we store the values to scratch interleaved like our + * vertex data, we need to scale the vec4 index by 2. + */ + int message_header_scale = 2; + + /* Pre-gen6, the message header uses byte offsets instead of vec4 + * (16-byte) offset units. + */ + if (intel->gen < 6) + message_header_scale *= 16; + + if (reladdr) { + src_reg index = src_reg(this, glsl_type::int_type); + + vec4_instruction *add = emit(BRW_OPCODE_ADD, + dst_reg(index), + *reladdr, + src_reg(reg_offset)); + /* Move our new instruction from the tail to its correct place. */ + add->remove(); + inst->insert_before(add); + + vec4_instruction *mul = emit(BRW_OPCODE_MUL, dst_reg(index), + index, src_reg(message_header_scale)); + mul->remove(); + inst->insert_before(mul); + + return index; + } else { + return src_reg(reg_offset * message_header_scale); + } +} + +/** + * Emits an instruction before @inst to load the value named by @orig_src + * from scratch space at @base_offset to @temp. + */ +void +vec4_visitor::emit_scratch_read(vec4_instruction *inst, + dst_reg temp, src_reg orig_src, + int base_offset) +{ + int reg_offset = base_offset + orig_src.reg_offset; + src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset); + + vec4_instruction *scratch_read_inst = emit(VS_OPCODE_SCRATCH_READ, + temp, index); + + scratch_read_inst->base_mrf = 14; + scratch_read_inst->mlen = 1; + /* Move our instruction from the tail to its correct place. */ + scratch_read_inst->remove(); + inst->insert_before(scratch_read_inst); +} + +/** + * Emits an instruction after @inst to store the value to be written + * to @orig_dst to scratch space at @base_offset, from @temp. + */ +void +vec4_visitor::emit_scratch_write(vec4_instruction *inst, + src_reg temp, dst_reg orig_dst, + int base_offset) +{ + int reg_offset = base_offset + orig_dst.reg_offset; + src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset); + + dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0), + orig_dst.writemask)); + vec4_instruction *scratch_write_inst = emit(VS_OPCODE_SCRATCH_WRITE, + dst, temp, index); + scratch_write_inst->base_mrf = 13; + scratch_write_inst->mlen = 2; + scratch_write_inst->predicate = inst->predicate; + /* Move our instruction from the tail to its correct place. */ + scratch_write_inst->remove(); + inst->insert_after(scratch_write_inst); +} + +/** + * We can't generally support array access in GRF space, because a + * single instruction's destination can only span 2 contiguous + * registers. So, we send all GRF arrays that get variable index + * access to scratch space. + */ +void +vec4_visitor::move_grf_array_access_to_scratch() +{ + int scratch_loc[this->virtual_grf_count]; + + for (int i = 0; i < this->virtual_grf_count; i++) { + scratch_loc[i] = -1; + } + + /* First, calculate the set of virtual GRFs that need to be punted + * to scratch due to having any array access on them, and where in + * scratch. + */ + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + if (inst->dst.file == GRF && inst->dst.reladdr && + scratch_loc[inst->dst.reg] == -1) { + scratch_loc[inst->dst.reg] = c->last_scratch; + c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4; + } + + for (int i = 0 ; i < 3; i++) { + src_reg *src = &inst->src[i]; + + if (src->file == GRF && src->reladdr && + scratch_loc[src->reg] == -1) { + scratch_loc[src->reg] = c->last_scratch; + c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4; + } + } + } + + /* Now, for anything that will be accessed through scratch, rewrite + * it to load/store. Note that this is a _safe list walk, because + * we may generate a new scratch_write instruction after the one + * we're processing. + */ + foreach_list_safe(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + /* Set up the annotation tracking for new generated instructions. */ + base_ir = inst->ir; + current_annotation = inst->annotation; + + if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) { + src_reg temp = src_reg(this, glsl_type::vec4_type); + + emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]); + + inst->dst.file = temp.file; + inst->dst.reg = temp.reg; + inst->dst.reg_offset = temp.reg_offset; + inst->dst.reladdr = NULL; + } + + for (int i = 0 ; i < 3; i++) { + if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1) + continue; + + dst_reg temp = dst_reg(this, glsl_type::vec4_type); + + emit_scratch_read(inst, temp, inst->src[i], + scratch_loc[inst->src[i].reg]); + + inst->src[i].file = temp.file; + inst->src[i].reg = temp.reg; + inst->src[i].reg_offset = temp.reg_offset; + inst->src[i].reladdr = NULL; + } + } +} + + vec4_visitor::vec4_visitor(struct brw_vs_compile *c, struct gl_shader_program *prog, struct brw_shader *shader) -- cgit v1.2.3 From 0f22f98ccd69bb5e8df3c78203bce9bc630965c1 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 7 Aug 2011 13:16:06 -0700 Subject: i965: Make some EU emit code for DP read/write messages non-static. We keep building these strange interfaces for DP read/write where there's a helper function with some partially-specific, partially-general controls, which is used in exactly one place in code generation. Making these public will let us set up those instructions in the one place they're to be generated. --- src/mesa/drivers/dri/i965/brw_eu.h | 27 ++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_eu_emit.c | 44 ++++++++++++++++----------------- 2 files changed, 49 insertions(+), 22 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 38dd99b693d..af50305fc2b 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -801,6 +801,12 @@ void brw_init_compile(struct brw_context *, struct brw_compile *p, void *mem_ctx); const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz ); +struct brw_instruction *brw_next_insn(struct brw_compile *p, GLuint opcode); +void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg dest); +void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg reg); + /* Helpers for regular instructions: */ @@ -855,6 +861,27 @@ ROUND(RNDE) /* Helpers for SEND instruction: */ +void brw_set_dp_read_message(struct brw_compile *p, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint msg_control, + GLuint msg_type, + GLuint target_cache, + GLuint msg_length, + GLuint response_length); + +void brw_set_dp_write_message(struct brw_compile *p, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint msg_control, + GLuint msg_type, + GLuint msg_length, + GLboolean header_present, + GLuint pixel_scoreboard_clear, + GLuint response_length, + GLuint end_of_thread, + GLuint send_commit_msg); + void brw_urb_WRITE(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index e7370f36064..b08906426e4 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -89,9 +89,9 @@ gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg) } -static void brw_set_dest(struct brw_compile *p, - struct brw_instruction *insn, - struct brw_reg dest) +void +brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg dest) { if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && dest.file != BRW_MESSAGE_REGISTER_FILE) @@ -221,9 +221,9 @@ validate_reg(struct brw_instruction *insn, struct brw_reg reg) /* 10. Check destination issues. */ } -static void brw_set_src0(struct brw_compile *p, - struct brw_instruction *insn, - struct brw_reg reg) +void +brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg reg) { if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) assert(reg.nr < 128); @@ -504,17 +504,18 @@ static void brw_set_urb_message( struct brw_compile *p, } } -static void brw_set_dp_write_message( struct brw_compile *p, - struct brw_instruction *insn, - GLuint binding_table_index, - GLuint msg_control, - GLuint msg_type, - GLuint msg_length, - GLboolean header_present, - GLuint pixel_scoreboard_clear, - GLuint response_length, - GLuint end_of_thread, - GLuint send_commit_msg) +void +brw_set_dp_write_message(struct brw_compile *p, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint msg_control, + GLuint msg_type, + GLuint msg_length, + GLboolean header_present, + GLuint pixel_scoreboard_clear, + GLuint response_length, + GLuint end_of_thread, + GLuint send_commit_msg) { struct brw_context *brw = p->brw; struct intel_context *intel = &brw->intel; @@ -570,7 +571,7 @@ static void brw_set_dp_write_message( struct brw_compile *p, } } -static void +void brw_set_dp_read_message(struct brw_compile *p, struct brw_instruction *insn, GLuint binding_table_index, @@ -709,9 +710,9 @@ static void brw_set_sampler_message(struct brw_compile *p, } - -static struct brw_instruction *next_insn( struct brw_compile *p, - GLuint opcode ) +#define next_insn brw_next_insn +struct brw_instruction * +brw_next_insn(struct brw_compile *p, GLuint opcode) { struct brw_instruction *insn; @@ -732,7 +733,6 @@ static struct brw_instruction *next_insn( struct brw_compile *p, return insn; } - static struct brw_instruction *brw_alu1( struct brw_compile *p, GLuint opcode, struct brw_reg dest, -- cgit v1.2.3 From 584ff407482fd3baf5ce081dbbf9653eb76c40f1 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 7 Aug 2011 13:36:11 -0700 Subject: i965/vs: Add support for scratch read/write codegen. --- src/mesa/drivers/dri/i965/brw_vec4.h | 9 ++ src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 144 +++++++++++++++++++++++++++- 2 files changed, 151 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 2f171b72049..b5f442e6d21 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -451,6 +451,15 @@ public: struct brw_reg dst, struct brw_reg src); void generate_urb_write(vec4_instruction *inst); + void generate_oword_dual_block_offsets(struct brw_reg m1, + struct brw_reg index); + void generate_scratch_write(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src, + struct brw_reg index); + void generate_scratch_read(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg index); }; } /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 57eb467567e..21830f99fc2 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -1,5 +1,4 @@ -/* - * Copyright © 2011 Intel Corporation +/* Copyright © 2011 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -278,6 +277,139 @@ vec4_visitor::generate_urb_write(vec4_instruction *inst) BRW_URB_SWIZZLE_INTERLEAVE); } +void +vec4_visitor::generate_oword_dual_block_offsets(struct brw_reg m1, + struct brw_reg index) +{ + int second_vertex_offset; + + if (intel->gen >= 6) + second_vertex_offset = 1; + else + second_vertex_offset = 16; + + m1 = retype(m1, BRW_REGISTER_TYPE_D); + + /* Set up M1 (message payload). Only the block offsets in M1.0 and + * M1.4 are used, and the rest are ignored. + */ + struct brw_reg m1_0 = suboffset(vec1(m1), 0); + struct brw_reg m1_4 = suboffset(vec1(m1), 4); + struct brw_reg index_0 = suboffset(vec1(index), 0); + struct brw_reg index_4 = suboffset(vec1(index), 4); + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_access_mode(p, BRW_ALIGN_1); + + brw_MOV(p, m1_0, index_0); + + brw_set_predicate_inverse(p, true); + if (index.file == BRW_IMMEDIATE_VALUE) { + index_4.dw1.ud++; + brw_MOV(p, m1_4, index_4); + } else { + brw_ADD(p, m1_4, index_4, brw_imm_d(second_vertex_offset)); + } + + brw_pop_insn_state(p); +} + +void +vec4_visitor::generate_scratch_read(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg index) +{ + if (intel->gen >= 6) { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, + retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D), + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D)); + brw_pop_insn_state(p); + } + + generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1), + index); + + uint32_t msg_type; + + if (intel->gen >= 6) + msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + else if (intel->gen == 5 || intel->is_g4x) + msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + else + msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + + /* Each of the 8 channel enables is considered for whether each + * dword is written. + */ + struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, send, dst); + brw_set_src0(p, send, brw_message_reg(inst->base_mrf)); + brw_set_dp_read_message(p, send, + 255, /* binding table index: stateless access */ + BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, + msg_type, + BRW_DATAPORT_READ_TARGET_RENDER_CACHE, + 2, /* mlen */ + 1 /* rlen */); +} + +void +vec4_visitor::generate_scratch_write(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src, + struct brw_reg index) +{ + /* If the instruction is predicated, we'll predicate the send, not + * the header setup. + */ + brw_set_predicate_control(p, false); + + if (intel->gen >= 6) { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, + retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D), + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D)); + brw_pop_insn_state(p); + } + + generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1), + index); + + brw_MOV(p, + retype(brw_message_reg(inst->base_mrf + 2), BRW_REGISTER_TYPE_D), + retype(src, BRW_REGISTER_TYPE_D)); + + uint32_t msg_type; + + if (intel->gen >= 6) + msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE; + else + msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE; + + brw_set_predicate_control(p, inst->predicate); + + /* Each of the 8 channel enables is considered for whether each + * dword is written. + */ + struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, send, dst); + brw_set_src0(p, send, brw_message_reg(inst->base_mrf)); + brw_set_dp_write_message(p, send, + 255, /* binding table index: stateless access */ + BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, + msg_type, + 3, /* mlen */ + true, /* header present */ + false, /* pixel scoreboard */ + 0, /* rlen */ + false, /* eot */ + false /* commit */); +} + void vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, struct brw_reg dst, @@ -308,6 +440,14 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, generate_urb_write(inst); break; + case VS_OPCODE_SCRATCH_READ: + generate_scratch_read(inst, dst, src[0]); + break; + + case VS_OPCODE_SCRATCH_WRITE: + generate_scratch_write(inst, dst, src[0], src[1]); + break; + default: if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { fail("unsupported opcode in `%s' in VS\n", -- cgit v1.2.3 From 54fa706d6f06955221cb6b452b5b170bfaaceef4 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 7 Aug 2011 13:38:50 -0700 Subject: i965/vs: Enable variable array indexing in the VS. --- src/mesa/drivers/dri/i965/brw_shader.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 2eeeec25cac..2dc32c95610 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -111,12 +111,14 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) brw_do_cubemap_normalize(shader->ir); lower_noise(shader->ir); lower_quadop_vector(shader->ir, false); + + bool input = true; + bool output = stage == MESA_SHADER_FRAGMENT; + bool temp = stage == MESA_SHADER_FRAGMENT; + bool uniform = true; + lower_variable_index_to_cond_assign(shader->ir, - GL_TRUE, /* input */ - GL_TRUE, /* output */ - GL_TRUE, /* temp */ - GL_TRUE /* uniform */ - ); + input, output, temp, uniform); do { progress = false; -- cgit v1.2.3 From e94bdbe04a4f0adb73ab92153987f0c9f48814f7 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 7 Aug 2011 17:09:12 -0700 Subject: i965: Add gen6 disassembly for DP render cache messages. --- src/mesa/drivers/dri/i965/brw_disasm.c | 49 +++++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index af41c848308..927b0b4acc9 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -309,6 +309,35 @@ char *target_function[16] = { [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner" }; +char *target_function_gen6[16] = { + [BRW_MESSAGE_TARGET_NULL] = "null", + [BRW_MESSAGE_TARGET_MATH] = "math", + [BRW_MESSAGE_TARGET_SAMPLER] = "sampler", + [BRW_MESSAGE_TARGET_GATEWAY] = "gateway", + [GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE] = "sampler", + [GEN6_MESSAGE_TARGET_DP_RENDER_CACHE] = "render", + [GEN6_MESSAGE_TARGET_DP_CONST_CACHE] = "const", + [BRW_MESSAGE_TARGET_URB] = "urb", + [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner" +}; + +char *dp_rc_msg_type_gen6[16] = { + [BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ] = "OWORD block read", + [GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ] = "RT UNORM read", + [GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ] = "OWORD dual block read", + [GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ] = "media block read", + [GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ] = "OWORD unaligned block read", + [GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ] = "DWORD scattered read", + [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE] = "DWORD atomic write", + [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE] = "OWORD block write", + [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE] = "OWORD dual block write", + [GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE] = "media block write", + [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE] = "DWORD scattered write", + [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE] = "RT write", + [GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE] = "streamed VB write", + [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE] = "RT UNORMc write", +}; + char *math_function[16] = { [BRW_MATH_FUNCTION_INV] = "inv", [BRW_MATH_FUNCTION_LOG] = "log", @@ -927,8 +956,14 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) newline (file); pad (file, 16); space = 0; - err |= control (file, "target function", target_function, - target, &space); + + if (gen >= 6) { + err |= control (file, "target function", target_function_gen6, + target, &space); + } else { + err |= control (file, "target function", target_function, + target, &space); + } switch (target) { case BRW_MESSAGE_TARGET_MATH: @@ -985,9 +1020,16 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) inst->bits3.dp_read.msg_type); } break; + case BRW_MESSAGE_TARGET_DATAPORT_WRITE: if (gen >= 6) { - format (file, " (%d, %d, %d, %d, %d, %d)", + format (file, " ("); + + err |= control (file, "DP rc message type", + dp_rc_msg_type_gen6, + inst->bits3.gen6_dp.msg_type, &space); + + format (file, ", %d, %d, %d, %d, %d, %d)", inst->bits3.gen6_dp.binding_table_index, inst->bits3.gen6_dp.msg_control, inst->bits3.gen6_dp.msg_type, @@ -1003,6 +1045,7 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) inst->bits3.dp_write.send_commit_msg); } break; + case BRW_MESSAGE_TARGET_URB: if (gen >= 5) { format (file, " %d", inst->bits3.urb_gen5.offset); -- cgit v1.2.3 From 7b91eefe7cbe771397684b5970f7c04313baa2f0 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 8 Aug 2011 15:56:11 -0700 Subject: i965/vs: Slightly improve the trivial reg allocator to skip unused regs. This fixes most of the regressions in the vs array test set from the varying array indexing work, since the giant array that was originally allocated in virtual GRF space never gets used and is only ever read/stored from scratch space. --- .../drivers/dri/i965/brw_vec4_reg_allocate.cpp | 26 ++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index e7f6b28a536..1bfd84d76e8 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -41,15 +41,37 @@ vec4_visitor::reg_allocate_trivial() { int last_grf = 0; int hw_reg_mapping[this->virtual_grf_count]; + bool virtual_grf_used[this->virtual_grf_count]; int i; int next; + /* Calculate which virtual GRFs are actually in use after whatever + * optimization passes have occurred. + */ + for (int i = 0; i < this->virtual_grf_count; i++) { + virtual_grf_used[i] = false; + } + + foreach_iter(exec_list_iterator, iter, this->instructions) { + vec4_instruction *inst = (vec4_instruction *)iter.get(); + + if (inst->dst.file == GRF) + virtual_grf_used[inst->dst.reg] = true; + + for (int i = 0; i < 3; i++) { + if (inst->src[i].file == GRF) + virtual_grf_used[inst->src[i].reg] = true; + } + } + /* Note that compressed instructions require alignment to 2 registers. */ hw_reg_mapping[0] = this->first_non_payload_grf; next = hw_reg_mapping[0] + this->virtual_grf_sizes[0]; for (i = 1; i < this->virtual_grf_count; i++) { - hw_reg_mapping[i] = next; - next += this->virtual_grf_sizes[i]; + if (virtual_grf_used[i]) { + hw_reg_mapping[i] = next; + next += this->virtual_grf_sizes[i]; + } } prog_data->total_grf = next; -- cgit v1.2.3 From 6408b0295f5c8be6fea891a025d79752484721b6 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 9 Aug 2011 10:57:09 -0700 Subject: i965/vs: Fix implementation of ir_unop_any. We were inheriting whatever previous predicate existed. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 049af6c3992..fde1d67759a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -998,7 +998,9 @@ vec4_visitor::visit(ir_expression *ir) break; case ir_unop_any: - emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); -- cgit v1.2.3 From 250770b74d33bb8625c780a74a89477af033d13a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 9 Aug 2011 11:00:28 -0700 Subject: i965/vs: Respect the gen6 limitation that math opcodes can't be align16. Fixes vs-acos-vec3 and friends. --- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 9 +++++++++ src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 26 ++++++++++++++++++++++++-- 2 files changed, 33 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 21830f99fc2..effc82a8004 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -250,6 +250,14 @@ vec4_visitor::generate_math1_gen6(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src) { + /* Can't do writemask because math can't be align16. */ + assert(dst.dw1.bits.writemask == WRITEMASK_XYZW); + /* Source swizzles are ignored. */ + assert(!src.abs); + assert(!src.negate); + assert(src.dw1.bits.swizzle = BRW_SWIZZLE_XYZW); + + brw_set_access_mode(p, BRW_ALIGN_1); brw_math(p, dst, brw_math_function(inst->opcode), @@ -258,6 +266,7 @@ vec4_visitor::generate_math1_gen6(vec4_instruction *inst, src, BRW_MATH_DATA_SCALAR, BRW_MATH_PRECISION_FULL); + brw_set_access_mode(p, BRW_ALIGN_16); } void diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index fde1d67759a..f4756a9a1a8 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -129,7 +129,18 @@ vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src) src_reg temp_src = src_reg(this, glsl_type::vec4_type); emit(BRW_OPCODE_MOV, dst_reg(temp_src), src); - emit(opcode, dst, temp_src); + if (dst.writemask != WRITEMASK_XYZW) { + /* The gen6 math instruction must be align1, so we can't do + * writemasks. + */ + dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type); + + emit(opcode, temp_dst, temp_src); + + emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst)); + } else { + emit(opcode, dst, temp_src); + } } void @@ -184,7 +195,18 @@ vec4_visitor::emit_math2_gen6(enum opcode opcode, emit(BRW_OPCODE_MOV, dst, src1); src1 = expanded; - emit(opcode, dst, src0, src1); + if (dst.writemask != WRITEMASK_XYZW) { + /* The gen6 math instruction must be align1, so we can't do + * writemasks. + */ + dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type); + + emit(opcode, temp_dst, src0, src1); + + emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst)); + } else { + emit(opcode, dst, src0, src1); + } } void -- cgit v1.2.3 From abf843a797876b5e3c5c91dbec25b6553d2cc281 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 9 Aug 2011 12:30:41 -0700 Subject: i965/vs: Add support for ir_binop_pow. Fixes vs-pow-float-float. --- src/mesa/drivers/dri/i965/brw_vec4.h | 10 ++++ src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 63 ++++++++++++++++++++++++-- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 +- 3 files changed, 70 insertions(+), 7 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index b5f442e6d21..082021513d2 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -444,12 +444,22 @@ public: void generate_vs_instruction(vec4_instruction *inst, struct brw_reg dst, struct brw_reg *src); + void generate_math1_gen4(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src); void generate_math1_gen6(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src); + void generate_math2_gen4(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1); + void generate_math2_gen6(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1); + void generate_urb_write(vec4_instruction *inst); void generate_oword_dual_block_offsets(struct brw_reg m1, struct brw_reg index); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index effc82a8004..df9521cd04e 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -245,6 +245,15 @@ vec4_visitor::generate_math1_gen4(vec4_instruction *inst, BRW_MATH_PRECISION_FULL); } +static void +check_gen6_math_src_arg(struct brw_reg src) +{ + /* Source swizzles are ignored. */ + assert(!src.abs); + assert(!src.negate); + assert(src.dw1.bits.swizzle = BRW_SWIZZLE_XYZW); +} + void vec4_visitor::generate_math1_gen6(vec4_instruction *inst, struct brw_reg dst, @@ -252,10 +261,7 @@ vec4_visitor::generate_math1_gen6(vec4_instruction *inst, { /* Can't do writemask because math can't be align16. */ assert(dst.dw1.bits.writemask == WRITEMASK_XYZW); - /* Source swizzles are ignored. */ - assert(!src.abs); - assert(!src.negate); - assert(src.dw1.bits.swizzle = BRW_SWIZZLE_XYZW); + check_gen6_math_src_arg(src); brw_set_access_mode(p, BRW_ALIGN_1); brw_math(p, @@ -269,6 +275,49 @@ vec4_visitor::generate_math1_gen6(vec4_instruction *inst, brw_set_access_mode(p, BRW_ALIGN_16); } +void +vec4_visitor::generate_math2_gen6(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) +{ + /* Can't do writemask because math can't be align16. */ + assert(dst.dw1.bits.writemask == WRITEMASK_XYZW); + /* Source swizzles are ignored. */ + check_gen6_math_src_arg(src0); + check_gen6_math_src_arg(src1); + + brw_set_access_mode(p, BRW_ALIGN_1); + brw_math2(p, + dst, + brw_math_function(inst->opcode), + src0, src1); + brw_set_access_mode(p, BRW_ALIGN_16); +} + +void +vec4_visitor::generate_math2_gen4(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) +{ + /* Can't do writemask because math can't be align16. */ + assert(dst.dw1.bits.writemask == WRITEMASK_XYZW); + + brw_MOV(p, brw_message_reg(inst->base_mrf + 1), src1); + + brw_set_access_mode(p, BRW_ALIGN_1); + brw_math(p, + dst, + brw_math_function(inst->opcode), + BRW_MATH_SATURATE_NONE, + inst->base_mrf, + src0, + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + brw_set_access_mode(p, BRW_ALIGN_16); +} + void vec4_visitor::generate_urb_write(vec4_instruction *inst) { @@ -442,7 +491,11 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, break; case SHADER_OPCODE_POW: - assert(!"finishme"); + if (intel->gen >= 6) { + generate_math2_gen6(inst, dst, src[0], src[1]); + } else { + generate_math2_gen4(inst, dst, src[0], src[1]); + } break; case VS_OPCODE_URB_WRITE: diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index f4756a9a1a8..f9447d7c391 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -188,11 +188,11 @@ vec4_visitor::emit_math2_gen6(enum opcode opcode, */ expanded = src_reg(this, glsl_type::vec4_type); - emit(BRW_OPCODE_MOV, dst, src0); + emit(BRW_OPCODE_MOV, dst_reg(expanded), src0); src0 = expanded; expanded = src_reg(this, glsl_type::vec4_type); - emit(BRW_OPCODE_MOV, dst, src1); + emit(BRW_OPCODE_MOV, dst_reg(expanded), src1); src1 = expanded; if (dst.writemask != WRITEMASK_XYZW) { -- cgit v1.2.3 From 0b359e3ea015576d0e75bf5ec19aceef337311a3 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 9 Aug 2011 14:35:38 -0700 Subject: i965/vs: Add support for loops. This is copied from brw_fs.cpp, instead of doing the temporary IR generation that ir_to_mesa does. Fixes glsl-vs-loop and friends. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 53 ++++++++++---------------- 1 file changed, 21 insertions(+), 32 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index f9447d7c391..e11ec40cc7b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -700,58 +700,47 @@ vec4_visitor::visit(ir_variable *ir) void vec4_visitor::visit(ir_loop *ir) { - ir_dereference_variable *counter = NULL; - - fail("not yet\n"); + dst_reg counter; /* We don't want debugging output to print the whole body of the * loop as the annotation. */ this->base_ir = NULL; - if (ir->counter != NULL) - counter = new(ir) ir_dereference_variable(ir->counter); - - if (ir->from != NULL) { - assert(ir->counter != NULL); + if (ir->counter != NULL) { + this->base_ir = ir->counter; + ir->counter->accept(this); + counter = *(variable_storage(ir->counter)); - ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); + if (ir->from != NULL) { + this->base_ir = ir->from; + ir->from->accept(this); - a->accept(this); - delete a; + emit(BRW_OPCODE_MOV, counter, this->result); + } } emit(BRW_OPCODE_DO); if (ir->to) { - ir_expression *e = - new(ir) ir_expression(ir->cmp, glsl_type::bool_type, - counter, ir->to); - ir_if *if_stmt = new(ir) ir_if(e); - - ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); - - if_stmt->then_instructions.push_tail(brk); + this->base_ir = ir->to; + ir->to->accept(this); - if_stmt->accept(this); + vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst_null_d(), + src_reg(counter), this->result); + inst->conditional_mod = brw_conditional_for_comparison(ir->cmp); - delete if_stmt; - delete e; - delete brk; + inst = emit(BRW_OPCODE_BREAK); + inst->predicate = BRW_PREDICATE_NORMAL; } visit_instructions(&ir->body_instructions); - if (ir->increment) { - ir_expression *e = - new(ir) ir_expression(ir_binop_add, counter->type, - counter, ir->increment); - - ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); - a->accept(this); - delete a; - delete e; + if (ir->increment) { + this->base_ir = ir->increment; + ir->increment->accept(this); + emit(BRW_OPCODE_ADD, counter, src_reg(counter), this->result); } emit(BRW_OPCODE_WHILE); -- cgit v1.2.3 From fea7d34b3545878ce00914f388e1eeebf55f7748 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 9 Aug 2011 14:49:29 -0700 Subject: i965/vs: Fix builtin uniform setup. I want to intelligently pack them at some point, but for now we have the params set up in groups of 4. Fixes glsl-vs-normalscale. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index e11ec40cc7b..93252f73285 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -443,13 +443,12 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable *ir) int last_swiz = -1; for (unsigned int j = 0; j < 4; j++) { int swiz = GET_SWZ(slots[i].swizzle, j); - if (swiz == last_swiz) - break; last_swiz = swiz; c->prog_data.param[this->uniforms * 4 + j] = &values[swiz]; c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT; - this->uniform_size[this->uniforms]++; + if (swiz <= last_swiz) + this->uniform_size[this->uniforms]++; } this->uniforms++; } -- cgit v1.2.3 From a55fbbc1a2b579aed1e80036367b521ef6928f66 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 9 Aug 2011 15:08:47 -0700 Subject: i965/vs: Fix access of attribute arrays. By leaving out the column index, we were reading an unallocated attribute on glsl-mat-attribute. --- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index df9521cd04e..517a3e3c75b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -52,8 +52,9 @@ vec4_visitor::setup_attributes(int payload_reg) if (inst->src[i].file != ATTR) continue; + int grf = attribute_map[inst->src[i].reg + inst->src[i].reg_offset]; inst->src[i].file = HW_REG; - inst->src[i].fixed_hw_reg = brw_vec8_grf(attribute_map[inst->src[i].reg], 0); + inst->src[i].fixed_hw_reg = brw_vec8_grf(grf, 0); inst->src[i].fixed_hw_reg.dw1.bits.swizzle = inst->src[i].swizzle; } } -- cgit v1.2.3 From aed5e353e95f47773864c6e61c506b9ddad0e2e9 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 9 Aug 2011 15:19:26 -0700 Subject: i965/vs: Clamp vertex color outputs when required by ARB_color_buffer_float. Fixes glsl-vs-vertex-color. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 93252f73285..2a1f003b5ce 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1764,7 +1764,16 @@ vec4_visitor::emit_urb_writes() if (attr == VERT_RESULT_PSIZ) continue; - emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); + vec4_instruction *inst = emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), + src_reg(output_reg[attr])); + + if ((attr == VERT_RESULT_COL0 || + attr == VERT_RESULT_COL1 || + attr == VERT_RESULT_BFC0 || + attr == VERT_RESULT_BFC1) && + c->key.clamp_vertex_color) { + inst->saturate = true; + } /* If this was MRF 15, we can't fit anything more into this URB * WRITE. Note that base_mrf of 1 means that MRF 15 is an -- cgit v1.2.3 From 072d64121e13ad6bcb9b703090de1ee4a59f7096 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 10 Aug 2011 11:38:42 -0700 Subject: i965/vs: Add support for GL_FIXED attributes. Fixes arb_es2_compatibility-fixed-type --- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 517a3e3c75b..350d544aba3 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -42,6 +42,18 @@ vec4_visitor::setup_attributes(int payload_reg) if (prog_data->inputs_read & BITFIELD64_BIT(i)) { attribute_map[i] = payload_reg + nr_attributes; nr_attributes++; + + /* Do GL_FIXED rescaling for GLES2.0. Our GL_FIXED + * attributes come in as floating point conversions of the + * integer values. + */ + if (c->key.gl_fixed_input_size[i] != 0) { + struct brw_reg reg = brw_vec8_grf(attribute_map[i], 0); + + brw_MUL(p, + brw_writemask(reg, (1 << c->key.gl_fixed_input_size[i]) - 1), + reg, brw_imm_f(1.0 / 65536.0)); + } } } -- cgit v1.2.3 From 193a9a209d5121e2c20f1d20c61587b1e3d0603d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 10 Aug 2011 14:13:23 -0700 Subject: i965/vs: Add support for if(any(bvec)) on gen6. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 2a1f003b5ce..d1888579597 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -564,10 +564,6 @@ vec4_visitor::emit_if_gen6(ir_if *ir) assert(expr->get_num_operands() <= 2); for (unsigned int i = 0; i < expr->get_num_operands(); i++) { - assert(expr->operands[i]->type->is_scalar() || - expr->operation == ir_binop_any_nequal || - expr->operation == ir_binop_all_equal); - expr->operands[i]->accept(this); op[i] = this->result; } @@ -634,6 +630,14 @@ vec4_visitor::emit_if_gen6(ir_if *ir) inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; return; + case ir_unop_any: + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + inst = emit(BRW_OPCODE_IF); + inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; + return; + default: assert(!"not reached"); inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); -- cgit v1.2.3 From e8980c61b2932cd4c8791fcc5afdb54fa033c224 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 11 Aug 2011 09:17:18 -0700 Subject: i965/vs: Fix the trivial register allocator's failure path. --- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 3 +++ src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp | 5 ++--- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 350d544aba3..27160fb40d4 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -561,6 +561,9 @@ vec4_visitor::run() setup_payload(); reg_allocate(); + if (failed) + return false; + brw_set_access_mode(p, BRW_ALIGN_16); generate_code(); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index 1bfd84d76e8..d5fd21d99a4 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -39,7 +39,6 @@ assign(int *reg_hw_locations, reg *reg) void vec4_visitor::reg_allocate_trivial() { - int last_grf = 0; int hw_reg_mapping[this->virtual_grf_count]; bool virtual_grf_used[this->virtual_grf_count]; int i; @@ -84,9 +83,9 @@ vec4_visitor::reg_allocate_trivial() assign(hw_reg_mapping, &inst->src[2]); } - if (last_grf >= BRW_MAX_GRF) { + if (prog_data->total_grf > BRW_MAX_GRF) { fail("Ran out of regs on trivial allocator (%d/%d)\n", - last_grf, BRW_MAX_GRF); + prog_data->total_grf, BRW_MAX_GRF); } } -- cgit v1.2.3 From d376fa8e84b044ead47586d1b56a10742bcbdac7 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 15 Aug 2011 18:40:14 -0700 Subject: i965: Fix assertion failure on a loop consisting of while (true) { break }. On enabling the precompile step in the VS, we tripped over this assertion failure in glsl-link-bug-30552. --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index b08906426e4..f5cc09dd49b 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2311,7 +2311,7 @@ brw_find_loop_end(struct brw_compile *p, int start) if (insn->header.opcode == BRW_OPCODE_WHILE) { int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count : insn->bits3.break_cont.jip; - if (ip + jip / br < start) + if (ip + jip / br <= start) return ip; } } -- cgit v1.2.3 From 7fbe7fe13359d3f349664410ec73d7bd48824ed6 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 11 Aug 2011 09:52:08 -0700 Subject: i965/vs: Run the shader backend at link time and return compile failures. Link failure is something that shouldn't happen, but we sometimes want it during development. The precompile also allows analysis of shader codegen with shader-db. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- src/mesa/drivers/dri/i965/brw_shader.cpp | 4 ++ src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 12 ++---- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 +- src/mesa/drivers/dri/i965/brw_vs.c | 51 ++++++++++++++++++++++---- src/mesa/drivers/dri/i965/brw_vs.h | 3 +- 6 files changed, 54 insertions(+), 20 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 693ef0ce31a..b19c6e72fa6 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1781,7 +1781,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c, fs_visitor v(c, prog, shader); if (!v.run()) { prog->LinkStatus = GL_FALSE; - prog->InfoLog = ralloc_strdup(prog, v.fail_msg); + ralloc_strcat(&prog->InfoLog, v.fail_msg); return false; } diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 2dc32c95610..3ff6bbaed47 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -24,6 +24,7 @@ extern "C" { #include "main/macros.h" #include "brw_context.h" +#include "brw_vs.h" } #include "brw_fs.h" #include "../glsl/ir_optimization.h" @@ -67,6 +68,9 @@ brw_shader_precompile(struct gl_context *ctx, struct gl_shader_program *prog) if (!brw_fs_precompile(ctx, prog)) return false; + if (!brw_vs_precompile(ctx, prog)) + return false; + return true; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 27160fb40d4..9ef6ab6de90 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -809,14 +809,8 @@ vec4_visitor::generate_code() extern "C" { bool -brw_vs_emit(struct brw_vs_compile *c) +brw_vs_emit(struct gl_shader_program *prog, struct brw_vs_compile *c) { - struct brw_compile *p = &c->func; - struct brw_context *brw = p->brw; - struct intel_context *intel = &brw->intel; - struct gl_context *ctx = &intel->ctx; - struct gl_shader_program *prog = ctx->Shader.CurrentVertexProgram; - if (!prog) return false; @@ -833,8 +827,8 @@ brw_vs_emit(struct brw_vs_compile *c) vec4_visitor v(c, prog, shader); if (!v.run()) { - /* FINISHME: Cleanly fail, test at link time, etc. */ - assert(!"not reached"); + prog->LinkStatus = GL_FALSE; + ralloc_strcat(&prog->InfoLog, v.fail_msg); return false; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index d1888579597..b1792a8ee16 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -2012,7 +2012,7 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c, this->current_annotation = NULL; this->c = c; - this->vp = brw->vertex_program; /* FINISHME: change for precompile */ + this->vp = prog->VertexProgram; this->prog_data = &c->prog_data; this->variable_ht = hash_table_ctor(0, diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index d389f602fba..3373e707d98 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -40,9 +40,11 @@ #include "../glsl/ralloc.h" -static void do_vs_prog( struct brw_context *brw, - struct brw_vertex_program *vp, - struct brw_vs_prog_key *key ) +static bool +do_vs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_vertex_program *vp, + struct brw_vs_prog_key *key) { struct gl_context *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; @@ -91,9 +93,11 @@ static void do_vs_prog( struct brw_context *brw, if (new_vs == -1) new_vs = getenv("INTEL_NEW_VS") != NULL; - if (new_vs) { - if (!brw_vs_emit(&c)) - brw_old_vs_emit(&c); + if (new_vs && prog) { + if (!brw_vs_emit(prog, &c)) { + ralloc_free(mem_ctx); + return false; + } } else { brw_old_vs_emit(&c); } @@ -130,6 +134,8 @@ static void do_vs_prog( struct brw_context *brw, &c.prog_data, aux_size, &brw->vs.prog_offset, &brw->vs.prog_data); ralloc_free(mem_ctx); + + return true; } @@ -174,13 +180,15 @@ static void brw_upload_vs_prog(struct brw_context *brw) if (!brw_search_cache(&brw->cache, BRW_VS_PROG, &key, sizeof(key), &brw->vs.prog_offset, &brw->vs.prog_data)) { - do_vs_prog(brw, vp, &key); + bool success = do_vs_prog(brw, ctx->Shader.CurrentVertexProgram, + vp, &key); + + assert(success); } brw->vs.constant_map = ((int8_t *)brw->vs.prog_data + sizeof(*brw->vs.prog_data)); } - /* See brw_vs.c: */ const struct brw_tracked_state brw_vs_prog = { @@ -193,3 +201,30 @@ const struct brw_tracked_state brw_vs_prog = { }, .prepare = brw_upload_vs_prog }; + +bool +brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_vs_prog_key key; + struct gl_vertex_program *vp = prog->VertexProgram; + struct brw_vertex_program *bvp = brw_vertex_program(vp); + uint32_t old_prog_offset = brw->vs.prog_offset; + struct brw_vs_prog_data *old_prog_data = brw->vs.prog_data; + bool success; + + if (!vp) + return true; + + memset(&key, 0, sizeof(key)); + + key.program_string_id = bvp->id; + key.clamp_vertex_color = true; + + success = do_vs_prog(brw, prog, bvp, &key); + + brw->vs.prog_offset = old_prog_offset; + brw->vs.prog_data = old_prog_data; + + return success; +} diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 83a37f5b800..beccb381ee2 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -93,7 +93,8 @@ struct brw_vs_compile { GLboolean needs_stack; }; -bool brw_vs_emit(struct brw_vs_compile *c); +bool brw_vs_emit(struct gl_shader_program *prog, struct brw_vs_compile *c); void brw_old_vs_emit(struct brw_vs_compile *c); +bool brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog); #endif -- cgit v1.2.3 From 7642c1de6b65b7dfd9e39904291cc9737cd54b56 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 11 Aug 2011 08:09:10 -0700 Subject: i965/vs: Avoid generating a MOV for most ir_assignment handling. Removes an average of 11.5% of instructions in 54% of vertex shaders in shader-db. --- src/mesa/drivers/dri/i965/brw_vec4.h | 6 +++ src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 67 ++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 082021513d2..620b05570a6 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -388,6 +388,12 @@ public: vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1, src_reg src2); + bool try_rewrite_rhs_to_dst(ir_assignment *ir, + dst_reg dst, + src_reg src, + vec4_instruction *pre_rhs_inst, + vec4_instruction *last_rhs_inst); + /** Walks an exec_list of ir_instruction and sends it through this visitor. */ void visit_instructions(const exec_list *list); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index b1792a8ee16..ae733810757 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1342,6 +1342,63 @@ vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src, src->reg_offset++; } + +/* If the RHS processing resulted in an instruction generating a + * temporary value, and it would be easy to rewrite the instruction to + * generate its result right into the LHS instead, do so. This ends + * up reliably removing instructions where it can be tricky to do so + * later without real UD chain information. + */ +bool +vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir, + dst_reg dst, + src_reg src, + vec4_instruction *pre_rhs_inst, + vec4_instruction *last_rhs_inst) +{ + /* This could be supported, but it would take more smarts. */ + if (ir->condition) + return false; + + if (pre_rhs_inst == last_rhs_inst) + return false; /* No instructions generated to work with. */ + + /* Make sure the last instruction generated our source reg. */ + if (src.file != GRF || + src.file != last_rhs_inst->dst.file || + src.reg != last_rhs_inst->dst.reg || + src.reg_offset != last_rhs_inst->dst.reg_offset || + src.reladdr || + src.abs || + src.negate || + last_rhs_inst->predicate != BRW_PREDICATE_NONE) + return false; + + /* Check that that last instruction fully initialized the channels + * we want to use, in the order we want to use them. We could + * potentially reswizzle the operands of many instructions so that + * we could handle out of order channels, but don't yet. + */ + for (int i = 0; i < 4; i++) { + if (dst.writemask & (1 << i)) { + if (!(last_rhs_inst->dst.writemask & (1 << i))) + return false; + + if (BRW_GET_SWZ(src.swizzle, i) != i) + return false; + } + } + + /* Success! Rewrite the instruction. */ + last_rhs_inst->dst.file = dst.file; + last_rhs_inst->dst.reg = dst.reg; + last_rhs_inst->dst.reg_offset = dst.reg_offset; + last_rhs_inst->dst.reladdr = dst.reladdr; + last_rhs_inst->dst.writemask &= dst.writemask; + + return true; +} + void vec4_visitor::visit(ir_assignment *ir) { @@ -1363,7 +1420,13 @@ vec4_visitor::visit(ir_assignment *ir) /* Now we're down to just a scalar/vector with writemasks. */ int i; + vec4_instruction *pre_rhs_inst, *last_rhs_inst; + pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail(); + ir->rhs->accept(this); + + last_rhs_inst = (vec4_instruction *)this->instructions.get_tail(); + src_reg src = this->result; int swizzles[4]; @@ -1396,6 +1459,10 @@ vec4_visitor::visit(ir_assignment *ir) src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], swizzles[2], swizzles[3]); + if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) { + return; + } + if (ir->condition) { emit_bool_to_cond_code(ir->condition); } -- cgit v1.2.3 From 54e66a0a6327b55f15a7c641ec68da505ff19a35 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 11 Aug 2011 16:27:41 -0700 Subject: i965/vs: Fix abs/negate handling on attributes. Fixes glsl-vs-neg-attribute and glsl-vs-abs-attribute. --- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 9ef6ab6de90..6b0ae42e0e0 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -65,9 +65,16 @@ vec4_visitor::setup_attributes(int payload_reg) continue; int grf = attribute_map[inst->src[i].reg + inst->src[i].reg_offset]; + + struct brw_reg reg = brw_vec8_grf(grf, 0); + reg.dw1.bits.swizzle = inst->src[i].swizzle; + if (inst->src[i].abs) + reg = brw_abs(reg); + if (inst->src[i].negate) + reg = negate(reg); + inst->src[i].file = HW_REG; - inst->src[i].fixed_hw_reg = brw_vec8_grf(grf, 0); - inst->src[i].fixed_hw_reg.dw1.bits.swizzle = inst->src[i].swizzle; + inst->src[i].fixed_hw_reg = reg; } } -- cgit v1.2.3 From 905f3d03090c7b86e410959c5640054f5f6894ef Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 12 Aug 2011 05:15:50 -0700 Subject: i965/vs: Remove remaining use of foreach_iter. --- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 10 +++------- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 ++-- 2 files changed, 5 insertions(+), 9 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 6b0ae42e0e0..fca31b6dec9 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -57,8 +57,8 @@ vec4_visitor::setup_attributes(int payload_reg) } } - foreach_iter(exec_list_iterator, iter, this->instructions) { - vec4_instruction *inst = (vec4_instruction *)iter.get(); + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; for (int i = 0; i < 3; i++) { if (inst->src[i].file != ATTR) @@ -546,11 +546,7 @@ vec4_visitor::run() /* Generate VS IR for main(). (the visitor only descends into * functions called "main"). */ - foreach_iter(exec_list_iterator, iter, *shader->ir) { - ir_instruction *ir = (ir_instruction *)iter.get(); - base_ir = ir; - ir->accept(this); - } + visit_instructions(shader->ir); emit_urb_writes(); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index ae733810757..fc75cc35172 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -234,8 +234,8 @@ vec4_visitor::emit_math(enum opcode opcode, void vec4_visitor::visit_instructions(const exec_list *list) { - foreach_iter(exec_list_iterator, iter, *list) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, list) { + ir_instruction *ir = (ir_instruction *)node; base_ir = ir; ir->accept(this); -- cgit v1.2.3 From d0c595ac8032aa9aed402a513870b8dc92e42903 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 12 Aug 2011 05:28:53 -0700 Subject: i965/gen6: Force WHILE exec size to 8. We can't just look at the instruction that happens to appear at the start of the loop, because it might be some other exec size and cause us to only loop on the first N channels. We always want 8 in our current code (since 16 doesn't work so we don't do 16-wide fragment in that case). Fixes loop-03.vert, which was triggering the assertions. --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index f5cc09dd49b..27e81306e9c 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1341,8 +1341,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, brw_set_src1(p, insn, brw_imm_ud(0)); insn->bits3.break_cont.jip = br * (do_insn - insn); - insn->header.execution_size = do_insn->header.execution_size; - assert(insn->header.execution_size == BRW_EXECUTE_8); + insn->header.execution_size = BRW_EXECUTE_8; } else if (intel->gen == 6) { insn = next_insn(p, BRW_OPCODE_WHILE); @@ -1351,8 +1350,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - insn->header.execution_size = do_insn->header.execution_size; - assert(insn->header.execution_size == BRW_EXECUTE_8); + insn->header.execution_size = BRW_EXECUTE_8; } else { if (p->single_program_flow) { insn = next_insn(p, BRW_OPCODE_ADD); -- cgit v1.2.3 From 8a649277cb57cc13fb38f8e8daf07e8a2b96223c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 12 Aug 2011 05:32:25 -0700 Subject: i965/vs: Don't assertion fail on vertex texturing. The linker will reject the program, but we need to survive until then. Fixes abort in glsl1-2D Texture lookup with explicit lod (Vertex shader) --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index fc75cc35172..d03fbff27fc 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1554,7 +1554,12 @@ vec4_visitor::visit(ir_call *ir) void vec4_visitor::visit(ir_texture *ir) { - assert(!"not reached"); + /* FINISHME: Implement vertex texturing. + * + * With 0 vertex samplers available, the linker will reject + * programs that do vertex texturing, but after our visitor has + * run. + */ } void -- cgit v1.2.3 From feff7c62ce446f4e3bb755a2f40dcbd0e70155e4 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 15 Aug 2011 20:13:53 -0700 Subject: i965/vs: Fix condition code for scalar expression all_equals. Fixes vs-op-eq-bool-bool. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index d03fbff27fc..3ae89dfbc45 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -985,7 +985,7 @@ vec4_visitor::visit(ir_expression *ir) temp.type = op[0].type; inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + inst->conditional_mod = BRW_CONDITIONAL_Z; emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); } break; -- cgit v1.2.3 From e9a86ae3370948acb1276e80fbbc421d7025db36 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 15 Aug 2011 20:43:42 -0700 Subject: i965/vs: Fix memory leak of ralloc context for the visitor. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 3ae89dfbc45..185a01e05f9 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -2104,6 +2104,7 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c, vec4_visitor::~vec4_visitor() { + ralloc_free(this->mem_ctx); hash_table_dtor(this->variable_ht); } -- cgit v1.2.3 From 7bf70c29adf175f51d0347d0187aecc0e9bbbcb8 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 15 Aug 2011 20:59:24 -0700 Subject: i965/vs: Add support for conversion of FIXED_HW_REG src_reg to/from dst_reg. This was quietly occurring in some emit code I produced, and failed. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 185a01e05f9..621cb53ff84 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -38,6 +38,7 @@ src_reg::src_reg(dst_reg reg) this->reg_offset = reg.reg_offset; this->type = reg.type; this->reladdr = reg.reladdr; + this->fixed_hw_reg = reg.fixed_hw_reg; int swizzles[4]; int next_chan = 0; @@ -68,6 +69,7 @@ dst_reg::dst_reg(src_reg reg) this->type = reg.type; this->writemask = WRITEMASK_XYZW; this->reladdr = reg.reladdr; + this->fixed_hw_reg = reg.fixed_hw_reg; } vec4_instruction * -- cgit v1.2.3 From 0ddf0f1c3451eef8a7c7f46afca623dc4f7c5af6 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 15 Aug 2011 21:02:10 -0700 Subject: i965/vs: Fix multiplies to actually do 32-bit multiplies. Fixes vs-op-mult-int-int and friends. --- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 5 +++++ src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 18 +++++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index fca31b6dec9..011af6f2d3e 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -633,6 +633,11 @@ vec4_visitor::generate_code() case BRW_OPCODE_MUL: brw_MUL(p, dst, src[0], src[1]); break; + case BRW_OPCODE_MACH: + brw_set_acc_write_control(p, 1); + brw_MACH(p, dst, src[0], src[1]); + brw_set_acc_write_control(p, 0); + break; case BRW_OPCODE_FRC: brw_FRC(p, dst, src[0]); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 621cb53ff84..a60fc5f6ada 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -945,7 +945,23 @@ vec4_visitor::visit(ir_expression *ir) break; case ir_binop_mul: - emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]); + if (ir->type->is_integer()) { + /* For integer multiplication, the MUL uses the low 16 bits + * of one of the operands (src0 on gen6, src1 on gen7). The + * MACH accumulates in the contribution of the upper 16 bits + * of that operand. + * + * FINISHME: Emit just the MUL if we know an operand is small + * enough. + */ + struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D); + + emit(BRW_OPCODE_MUL, acc, op[0], op[1]); + emit(BRW_OPCODE_MACH, dst_null_d(), op[0], op[1]); + emit(BRW_OPCODE_MOV, result_dst, src_reg(acc)); + } else { + emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]); + } break; case ir_binop_div: assert(!"not reached: should be handled by ir_div_to_mul_rcp"); -- cgit v1.2.3 From eb0ff1a1c0f1978d867c748bf2525f717a56bfce Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 15 Aug 2011 10:58:25 -0700 Subject: mesa: Remove use of fpu_control.h Remove the inclusion of fpu_control.h from compiler.h. Since Bionic lacks fpu_control.h, this fixes the Android build. Also remove the sole use of the fpu_control bits, which was in debug.c. Those were brianp's debug bits, and he approved of their removal. Reviewed-by: Eric Anholt Signed-off-by: Chad Versace --- src/mesa/main/compiler.h | 3 --- src/mesa/main/debug.c | 11 ----------- 2 files changed, 14 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h index ee7d0b2f880..8ed1c6fa61f 100644 --- a/src/mesa/main/compiler.h +++ b/src/mesa/main/compiler.h @@ -45,9 +45,6 @@ #include #include #include -#if defined(__linux__) && defined(__i386__) -#include -#endif #include #include diff --git a/src/mesa/main/debug.c b/src/mesa/main/debug.c index e7f6be99481..b1fc096f296 100644 --- a/src/mesa/main/debug.c +++ b/src/mesa/main/debug.c @@ -192,17 +192,6 @@ static void add_debug_flags( const char *debug ) if (strstr(debug, "flush")) MESA_DEBUG_FLAGS |= DEBUG_ALWAYS_FLUSH; -#if defined(_FPU_GETCW) && defined(_FPU_SETCW) - if (strstr(debug, "fpexceptions")) { - /* raise FP exceptions */ - fpu_control_t mask; - _FPU_GETCW(mask); - mask &= ~(_FPU_MASK_IM | _FPU_MASK_DM | _FPU_MASK_ZM - | _FPU_MASK_OM | _FPU_MASK_UM); - _FPU_SETCW(mask); - } -#endif - #else (void) debug; #endif -- cgit v1.2.3 From bd064a49f119d126623c0e85702801e4cee62187 Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 15 Aug 2011 13:26:21 -0700 Subject: mesa: Fix Android build by #ifdef'ing out locale support Bionic does not support locales. This commit #ifdef's out the locale usage in _mesa_strtof(). Signed-off-by: Chad Versace --- src/mesa/main/imports.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c index 0a572ec225d..8f097195922 100644 --- a/src/mesa/main/imports.c +++ b/src/mesa/main/imports.c @@ -753,7 +753,8 @@ _mesa_strdup( const char *s ) float _mesa_strtof( const char *s, char **end ) { -#if defined(_GNU_SOURCE) && !defined(__CYGWIN__) && !defined(__FreeBSD__) +#if defined(_GNU_SOURCE) && !defined(__CYGWIN__) && !defined(__FreeBSD__) && \ + !defined(ANDROID) static locale_t loc = NULL; if (!loc) { loc = newlocale(LC_CTYPE_MASK, "C", NULL); -- cgit v1.2.3 From 3c9f172fe801a8e954a40affc38942b628b81bda Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Mon, 15 Aug 2011 13:29:15 -0700 Subject: mesa: Add Android to list of platforms that define fpclassify() This is a fix for the Android build. Signed-off-by: Chad Versace --- src/mesa/main/querymatrix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/main/querymatrix.c b/src/mesa/main/querymatrix.c index 944ad435f7a..eaedf7cd238 100644 --- a/src/mesa/main/querymatrix.c +++ b/src/mesa/main/querymatrix.c @@ -73,7 +73,7 @@ fpclassify(double x) #elif defined(__APPLE__) || defined(__CYGWIN__) || defined(__FreeBSD__) || \ defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || \ (defined(__sun) && defined(__C99FEATURES__)) || defined(__MINGW32__) || \ - (defined(__sun) && defined(__GNUC__)) + (defined(__sun) && defined(__GNUC__)) || defined(ANDROID) /* fpclassify is available. */ -- cgit v1.2.3 From 6ad08989d7c10892919ce1cb9c88c4cf8b73e1dc Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Sat, 30 Jul 2011 10:48:10 -0700 Subject: ir_to_mesa: Implement ir_unop_logic_not using 1-x Since our logic values are 0.0 (false) and 1.0 (true), 1.0 - x accurately implements logical not. Reviewed-by: Eric Anholt --- src/mesa/program/ir_to_mesa.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 1ef609fe15d..f03ea7a95e0 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1135,7 +1135,13 @@ ir_to_mesa_visitor::visit(ir_expression *ir) switch (ir->operation) { case ir_unop_logic_not: - emit(ir, OPCODE_SEQ, result_dst, op[0], src_reg_for_float(0.0)); + /* Previously 'SEQ dst, src, 0.0' was used for this. However, many + * older GPUs implement SEQ using multiple instructions (i915 uses two + * SGE instructions and a MUL instruction). Since our logic values are + * 0.0 and 1.0, 1-x also implements !x. + */ + op[0].negate = ~op[0].negate; + emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0)); break; case ir_unop_neg: op[0].negate = ~op[0].negate; -- cgit v1.2.3 From 41f8ffe5e07c4f389eb13d17ecf0ff776890e9bc Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Sat, 30 Jul 2011 10:49:49 -0700 Subject: ir_to_mesa: Implement ir_binop_logic_or using an add w/saturate or add w/SLT Logical-or is implemented using addition (followed by clampling to [0,1]) on values of 0.0 and 1.0. Replacing the logical-or operators with addition gives a + b which has a result on the range [0, 2]. Previously a SNE instruction was used to clamp the resulting logic value to [0,1]. In a fragment shader, using a saturate on the add has the same effect. Adding the saturate to the add is free, so (at least) one instruction is saved. In a vertex shader, using an SLT on the negation of the add result has the same effect. Many older shader architectures do not support the SNE instruction. It must be emulated using two SLT instructions and an ADD. On these architectures, the single SLT saves two instructions. Reviewed-by: Eric Anholt --- src/mesa/program/ir_to_mesa.cpp | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index f03ea7a95e0..fcd14c89cd7 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1267,11 +1267,28 @@ ir_to_mesa_visitor::visit(ir_expression *ir) emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); break; - case ir_binop_logic_or: - /* This could be a saturated add and skip the SNE. */ - emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); - emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0)); + case ir_binop_logic_or: { + /* After the addition, the value will be an integer on the + * range [0,2]. Zero stays zero, and positive values become 1.0. + */ + ir_to_mesa_instruction *add = + emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate. + */ + add->saturate = true; + } else { + /* Negating the result of the addition gives values on the range + * [-2, 0]. Zero stays zero, and negative values become 1.0. This + * is achieved using SLT. + */ + src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0)); + } break; + } case ir_binop_logic_and: /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ -- cgit v1.2.3 From 7f4c65256cc3f4d9f6a214424beabe688a5dd6a2 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Sat, 30 Jul 2011 10:45:35 -0700 Subject: ir_to_mesa: Make ir_to_mesa_visitor::emit_dp return the instruction Reviewed-by: Eric Anholt --- src/mesa/program/ir_to_mesa.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index fcd14c89cd7..60d498bd9e3 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -297,11 +297,11 @@ public: /** * Emit the correct dot-product instruction for the type of arguments */ - void emit_dp(ir_instruction *ir, - dst_reg dst, - src_reg src0, - src_reg src1, - unsigned elements); + ir_to_mesa_instruction * emit_dp(ir_instruction *ir, + dst_reg dst, + src_reg src0, + src_reg src1, + unsigned elements); void emit_scalar(ir_instruction *ir, enum prog_opcode op, dst_reg dst, src_reg src0); @@ -408,7 +408,7 @@ ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op) return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); } -void +ir_to_mesa_instruction * ir_to_mesa_visitor::emit_dp(ir_instruction *ir, dst_reg dst, src_reg src0, src_reg src1, unsigned elements) @@ -417,7 +417,7 @@ ir_to_mesa_visitor::emit_dp(ir_instruction *ir, OPCODE_DP2, OPCODE_DP3, OPCODE_DP4 }; - emit(ir, dot_opcodes[elements - 2], dst, src0, src1); + return emit(ir, dot_opcodes[elements - 2], dst, src0, src1); } /** -- cgit v1.2.3 From 92ca560d68e8a6b532998707afcf4f60c0ce2806 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 3 Aug 2011 15:27:43 -0700 Subject: ir_to_mesa: Implement ir_unop_any using DP4 w/saturate or DP4 w/SLT This is just like the ir_binop_logic_or case. The operation ir_unop_any is (a.x || a.y || a.z || a.w). Logical-or is implemented using addition (followed by clampling to [0,1]) on values of 0.0 and 1.0. Replacing the logical-or operators with addition gives (a.x + a.y + a.z + a.w). This can be implemented using a dot-product with a vector of all 1.0. Previously a SNE instruction was used to clamp the resulting logic value to [0,1]. In a fragment shader, using a saturate on the dot-product has the same effect. Adding the saturate to the dot-product is free, so (at least) one instruction is saved. In a vertex shader, using an SLT on the negation of the dot-product result has the same effect. Many older shader architectures do not support the SNE instruction. It must be emulated using two SLT instructions and an ADD. On these architectures, the single SLT saves two instructions. Reviewed-by: Eric Anholt --- src/mesa/program/ir_to_mesa.cpp | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 60d498bd9e3..1bd9a2eee1b 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1256,12 +1256,31 @@ ir_to_mesa_visitor::visit(ir_expression *ir) } break; - case ir_unop_any: + case ir_unop_any: { assert(ir->operands[0]->type->is_vector()); - emit_dp(ir, result_dst, op[0], op[0], - ir->operands[0]->type->vector_elements); - emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0)); + + /* After the dot-product, the value will be an integer on the + * range [0,4]. Zero stays zero, and positive values become 1.0. + */ + ir_to_mesa_instruction *const dp = + emit_dp(ir, result_dst, op[0], op[0], + ir->operands[0]->type->vector_elements); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate. + */ + dp->saturate = true; + } else { + /* Negating the result of the dot-product gives values on the range + * [-4, 0]. Zero stays zero, and negative values become 1.0. This + * is achieved using SLT. + */ + src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0)); + } break; + } case ir_binop_logic_xor: emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); -- cgit v1.2.3 From e7bf096e8b04931996c8c56548ce0b2c0af3a0dc Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 3 Aug 2011 15:35:01 -0700 Subject: ir_to_mesa: Implement ir_binop_any_nequal using DP4 w/saturate or DP4 w/SLT The operation ir_binop_any_nequal is (a.x != b.x) || (a.y != b.y) || (a.z != b.z) || (a.w != b.w), and that is the same as any(bvec4(a.x != b.x, a.y != b.y, a.z != b.z, a.w != b.w)). Implement the any() part the same way the regular ir_unop_any is implemented. Reviewed-by: Eric Anholt --- src/mesa/program/ir_to_mesa.cpp | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 1bd9a2eee1b..1c674ea8756 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1249,8 +1249,26 @@ ir_to_mesa_visitor::visit(ir_expression *ir) ir->operands[1]->type->is_vector()) { src_reg temp = get_temp(glsl_type::vec4_type); emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]); - emit_dp(ir, result_dst, temp, temp, vector_elements); - emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0)); + + /* After the dot-product, the value will be an integer on the + * range [0,4]. Zero stays zero, and positive values become 1.0. + */ + ir_to_mesa_instruction *const dp = + emit_dp(ir, result_dst, temp, temp, vector_elements); + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { + /* The clamping to [0,1] can be done for free in the fragment + * shader with a saturate. + */ + dp->saturate = true; + } else { + /* Negating the result of the dot-product gives values on the range + * [-4, 0]. Zero stays zero, and negative values become 1.0. This + * achieved using SLT. + */ + src_reg slt_src = result_src; + slt_src.negate = ~slt_src.negate; + emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0)); + } } else { emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); } -- cgit v1.2.3 From ba01df11c4d09c65514a8522cb319e29034ab5a8 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 3 Aug 2011 15:42:05 -0700 Subject: ir_to_mesa: Implement ir_binop_all_equal using DP4 w/SGE The operation ir_binop_all_equal is !(a.x != b.x || a.y != b.y || a.z != b.z || a.w != b.w). Logical-or is implemented using addition (followed by clampling to [0,1]) on values of 0.0 and 1.0. Replacing the logical-or operators with addition gives !bool((int(a.x != b.x) + int(a.y == b.y) + int(a.z == b.z) + int(a.w == b.w)). This can be implemented using a dot-product with a vector of all 1.0. After the dot-product, the value will be an integer on the range [0,4]. Previously a SEQ instruction was used to clamp the resulting logic value to [0,1] and invert the result. Using an SGE instruction on the negation of the dot-product result has the same effect. Many older shader architectures do not support the SEQ instruction. It must be emulated using two SGE instructions and a MUL. On these architectures, the single SGE saves two instructions. Reviewed-by: Eric Anholt --- src/mesa/program/ir_to_mesa.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 1c674ea8756..4c8b097de6b 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1237,8 +1237,19 @@ ir_to_mesa_visitor::visit(ir_expression *ir) ir->operands[1]->type->is_vector()) { src_reg temp = get_temp(glsl_type::vec4_type); emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]); + + /* After the dot-product, the value will be an integer on the + * range [0,4]. Zero becomes 1.0, and positive values become zero. + */ emit_dp(ir, result_dst, temp, temp, vector_elements); - emit(ir, OPCODE_SEQ, result_dst, result_src, src_reg_for_float(0.0)); + + /* Negating the result of the dot-product gives values on the range + * [-4, 0]. Zero becomes 1.0, and negative values become zero. This + * achieved using SGE. + */ + src_reg sge_src = result_src; + sge_src.negate = ~sge_src.negate; + emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0)); } else { emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]); } -- cgit v1.2.3 From ff2cfb8989cd79218dfe2cd8c3de20f1ca7418e6 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 2 Aug 2011 12:17:20 -0700 Subject: ir_to_mesa: Emit a MAD(b, -a, b) for !a && b !a && b occurs frequently when nexted if-statements have been flattened. It should also be possible use a MAD for (a && b) || c, though that would require a MAD_SAT. Reviewed-by: Eric Anholt --- src/mesa/program/ir_to_mesa.cpp | 52 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 4c8b097de6b..b1211c1145c 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -314,6 +314,8 @@ public: GLboolean try_emit_mad(ir_expression *ir, int mul_operand); + bool try_emit_mad_for_and_not(ir_expression *ir, + int mul_operand); GLboolean try_emit_sat(ir_expression *ir); void emit_swz(ir_expression *ir); @@ -892,6 +894,46 @@ ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand) return true; } +/** + * Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b)) + * + * The logic values are 1.0 for true and 0.0 for false. Logical-and is + * implemented using multiplication, and logical-or is implemented using + * addition. Logical-not can be implemented as (true - x), or (1.0 - x). + * As result, the logical expression (a & !b) can be rewritten as: + * + * - a * !b + * - a * (1 - b) + * - (a * 1) - (a * b) + * - a + -(a * b) + * - a + (a * -b) + * + * This final expression can be implemented as a single MAD(a, -b, a) + * instruction. + */ +bool +ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) +{ + const int other_operand = 1 - try_operand; + src_reg a, b; + + ir_expression *expr = ir->operands[try_operand]->as_expression(); + if (!expr || expr->operation != ir_unop_logic_not) + return false; + + ir->operands[other_operand]->accept(this); + a = this->result; + expr->operands[0]->accept(this); + b = this->result; + + b.negate = ~b.negate; + + this->result = get_temp(ir->type); + emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a); + + return true; +} + GLboolean ir_to_mesa_visitor::try_emit_sat(ir_expression *ir) { @@ -1088,6 +1130,16 @@ ir_to_mesa_visitor::visit(ir_expression *ir) if (try_emit_mad(ir, 0)) return; } + + /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) + */ + if (ir->operation == ir_binop_logic_and) { + if (try_emit_mad_for_and_not(ir, 1)) + return; + if (try_emit_mad_for_and_not(ir, 0)) + return; + } + if (try_emit_sat(ir)) return; -- cgit v1.2.3 From 54c48a95e6e0573886433f94ac83293876ffe03d Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 10 Feb 2011 15:48:27 -0800 Subject: mesa: Add partial constant propagation pass for Mesa IR This cleans up some code generated by the IR-to-Mesa pass for i915. In particular, some shaders involving arrays of constant matrices result in really bad code. v2: Silence several warnings from merging the gl_constant_value work. Fix DP[23] folding. Add support for a bunch more opcodes that appear in piglit runs on i915. Reviewed-by: Eric Anholt --- src/mesa/SConscript | 1 + src/mesa/program/prog_opt_constant_fold.c | 451 ++++++++++++++++++++++++++++++ src/mesa/program/prog_optimize.c | 2 + src/mesa/program/prog_optimize.h | 3 + src/mesa/sources.mak | 1 + 5 files changed, 458 insertions(+) create mode 100644 src/mesa/program/prog_opt_constant_fold.c (limited to 'src/mesa') diff --git a/src/mesa/SConscript b/src/mesa/SConscript index 05aa0e8010e..b0c3334fa48 100644 --- a/src/mesa/SConscript +++ b/src/mesa/SConscript @@ -293,6 +293,7 @@ program_sources = [ 'program/prog_instruction.c', 'program/prog_noise.c', 'program/prog_optimize.c', + 'program/prog_opt_constant_fold.c', 'program/prog_parameter.c', 'program/prog_parameter_layout.c', 'program/prog_print.c', diff --git a/src/mesa/program/prog_opt_constant_fold.c b/src/mesa/program/prog_opt_constant_fold.c new file mode 100644 index 00000000000..e2418b55451 --- /dev/null +++ b/src/mesa/program/prog_opt_constant_fold.c @@ -0,0 +1,451 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "main/glheader.h" +#include "main/context.h" +#include "main/macros.h" +#include "program.h" +#include "prog_instruction.h" +#include "prog_optimize.h" +#include "prog_parameter.h" +#include + +static bool +src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs) +{ + unsigned i; + + for (i = 0; i < num_srcs; i++) { + if (inst->SrcReg[i].File != PROGRAM_CONSTANT) + return false; + } + + return true; +} + +static struct prog_src_register +src_reg_for_float(struct gl_program *prog, float val) +{ + struct prog_src_register src; + unsigned swiz; + + memset(&src, 0, sizeof(src)); + + src.File = PROGRAM_CONSTANT; + src.Index = _mesa_add_unnamed_constant(prog->Parameters, + (gl_constant_value *) &val, 1, &swiz); + src.Swizzle = swiz; + return src; +} + +static struct prog_src_register +src_reg_for_vec4(struct gl_program *prog, const float *val) +{ + struct prog_src_register src; + unsigned swiz; + + memset(&src, 0, sizeof(src)); + + src.File = PROGRAM_CONSTANT; + src.Index = _mesa_add_unnamed_constant(prog->Parameters, + (gl_constant_value *) val, 4, &swiz); + src.Swizzle = swiz; + return src; +} + +static bool +src_regs_are_same(const struct prog_src_register *a, + const struct prog_src_register *b) +{ + return (a->File == b->File) + && (a->Index == b->Index) + && (a->Swizzle == b->Swizzle) + && (a->Abs == b->Abs) + && (a->Negate == b->Negate) + && (a->RelAddr == 0) + && (b->RelAddr == 0); +} + +static void +get_value(struct gl_program *prog, struct prog_src_register *r, float *data) +{ + const gl_constant_value *const value = + prog->Parameters->ParameterValues[r->Index]; + + data[0] = value[GET_SWZ(r->Swizzle, 0)].f; + data[1] = value[GET_SWZ(r->Swizzle, 1)].f; + data[2] = value[GET_SWZ(r->Swizzle, 2)].f; + data[3] = value[GET_SWZ(r->Swizzle, 3)].f; + + if (r->Abs) { + data[0] = fabsf(data[0]); + data[1] = fabsf(data[1]); + data[2] = fabsf(data[2]); + data[3] = fabsf(data[3]); + } + + if (r->Negate & 0x01) { + data[0] = -data[0]; + } + + if (r->Negate & 0x02) { + data[1] = -data[1]; + } + + if (r->Negate & 0x04) { + data[2] = -data[2]; + } + + if (r->Negate & 0x08) { + data[3] = -data[3]; + } +} + +/** + * Try to replace instructions that produce a constant result with simple moves + * + * The hope is that a following copy propagation pass will eliminate the + * unnecessary move instructions. + */ +GLboolean +_mesa_constant_fold(struct gl_program *prog) +{ + bool progress = false; + unsigned i; + + for (i = 0; i < prog->NumInstructions; i++) { + struct prog_instruction *const inst = &prog->Instructions[i]; + + switch (inst->Opcode) { + case OPCODE_ADD: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = a[0] + b[0]; + result[1] = a[1] + b[1]; + result[2] = a[2] + b[2]; + result[3] = a[3] + b[3]; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_CMP: + /* FINISHME: We could also optimize CMP instructions where the first + * FINISHME: source is a constant that is either all < 0.0 or all + * FINISHME: >= 0.0. + */ + if (src_regs_are_constant(inst, 3)) { + float a[4]; + float b[4]; + float c[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + get_value(prog, &inst->SrcReg[2], c); + + result[0] = a[0] < 0.0f ? b[0] : c[0]; + result[1] = a[1] < 0.0f ? b[1] : c[1]; + result[2] = a[2] < 0.0f ? b[2] : c[2]; + result[3] = a[3] < 0.0f ? b[3] : c[3]; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + inst->SrcReg[2].File = PROGRAM_UNDEFINED; + inst->SrcReg[2].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_DP2: + case OPCODE_DP3: + case OPCODE_DP4: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + /* It seems like a loop could be used here, but we cleverly put + * DP2A between DP2 and DP3. Subtracting DP2 (or similar) from + * the opcode results in various failures of the loop control. + */ + result = (a[0] * b[0]) + (a[1] * b[1]); + + if (inst->Opcode >= OPCODE_DP3) + result += a[2] * b[2]; + + if (inst->Opcode == OPCODE_DP4) + result += a[3] * b[3]; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_MUL: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = a[0] * b[0]; + result[1] = a[1] * b[1]; + result[2] = a[2] * b[2]; + result[3] = a[3] * b[3]; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SEQ: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] == b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] == b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] == b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] == b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 1.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SGE: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] >= b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] >= b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] >= b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] >= b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 1.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SGT: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] > b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] > b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] > b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] > b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 0.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SLE: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] <= b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] <= b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] <= b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] <= b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 1.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SLT: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] < b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] < b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] < b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] < b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 0.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SNE: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] != b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] != b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] != b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] != b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 0.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + default: + break; + } + } + + return progress; +} diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c index 3340ce0498b..25d9684b137 100644 --- a/src/mesa/program/prog_optimize.c +++ b/src/mesa/program/prog_optimize.c @@ -1358,6 +1358,8 @@ _mesa_optimize_program(struct gl_context *ctx, struct gl_program *program) any_change = GL_TRUE; if (_mesa_remove_dead_code_local(program)) any_change = GL_TRUE; + + any_change = _mesa_constant_fold(program) || any_change; _mesa_reallocate_registers(program); } while (any_change); } diff --git a/src/mesa/program/prog_optimize.h b/src/mesa/program/prog_optimize.h index 463f5fc51c4..9854fb7a491 100644 --- a/src/mesa/program/prog_optimize.h +++ b/src/mesa/program/prog_optimize.h @@ -44,4 +44,7 @@ _mesa_find_temp_intervals(const struct prog_instruction *instructions, extern void _mesa_optimize_program(struct gl_context *ctx, struct gl_program *program); +extern GLboolean +_mesa_constant_fold(struct gl_program *prog); + #endif diff --git a/src/mesa/sources.mak b/src/mesa/sources.mak index ed008f8813e..5e77e0f5919 100644 --- a/src/mesa/sources.mak +++ b/src/mesa/sources.mak @@ -251,6 +251,7 @@ PROGRAM_SOURCES = \ program/prog_instruction.c \ program/prog_noise.c \ program/prog_optimize.c \ + program/prog_opt_constant_fold.c \ program/prog_parameter.c \ program/prog_parameter_layout.c \ program/prog_print.c \ -- cgit v1.2.3 From 7125f1e87df359be4aad1d801b633146eeac7292 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 3 Aug 2011 17:12:29 -0700 Subject: mesa: Bump instruction execution limit to 65536 Shader Model 3.0[1] requires that shaders be able to execute at least 65536 instructions. Bump Mesa maxExec to that limit. This allows several vertex shaders in the OpenGL ES 2.0 conformance test suite to run to completion. 1: http://en.wikipedia.org/wiki/High_Level_Shader_Language Reviewed-by: Eric Anholt --- src/mesa/program/prog_execute.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c index dbfd1b91875..c70a1e344e5 100644 --- a/src/mesa/program/prog_execute.c +++ b/src/mesa/program/prog_execute.c @@ -639,7 +639,7 @@ _mesa_execute_program(struct gl_context * ctx, struct gl_program_machine *machine) { const GLuint numInst = program->NumInstructions; - const GLuint maxExec = 10000; + const GLuint maxExec = 65536; GLuint pc, numExec = 0; machine->CurProgram = program; -- cgit v1.2.3 From b629d5ba24f76ed6af35455a874d351fde1e5bbe Mon Sep 17 00:00:00 2001 From: Lauri Kasanen Date: Fri, 1 Jul 2011 13:49:18 +0300 Subject: xmlconfig: Make the error message more informative --- src/mesa/drivers/dri/common/xmlconfig.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/common/xmlconfig.c b/src/mesa/drivers/dri/common/xmlconfig.c index 77967ac2a43..12dd31bb162 100644 --- a/src/mesa/drivers/dri/common/xmlconfig.c +++ b/src/mesa/drivers/dri/common/xmlconfig.c @@ -567,7 +567,7 @@ static void parseOptInfoAttr (struct OptInfoData *data, const XML_Char **attr) { } else defaultVal = attrVal[OA_DEFAULT]; if (!parseValue (&cache->values[opt], cache->info[opt].type, defaultVal)) - XML_FATAL ("illegal default value: %s.", defaultVal); + XML_FATAL ("illegal default value for %s: %s.", cache->info[opt].name, defaultVal); if (attrVal[OA_VALID]) { if (cache->info[opt].type == DRI_BOOL) -- cgit v1.2.3 From 59e56957cce16e5d993974e4b7f339afc9cb949b Mon Sep 17 00:00:00 2001 From: Lauri Kasanen Date: Fri, 1 Jul 2011 13:01:00 +0300 Subject: xmlpool.h: fix a typo --- src/mesa/drivers/dri/common/xmlpool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/common/xmlpool.h b/src/mesa/drivers/dri/common/xmlpool.h index 587517ea10a..ffea430024d 100644 --- a/src/mesa/drivers/dri/common/xmlpool.h +++ b/src/mesa/drivers/dri/common/xmlpool.h @@ -60,7 +60,7 @@ #define DRI_CONF_OPT_BEGIN(name,type,def) \ "