diff options
Diffstat (limited to 'src/mesa')
373 files changed, 14809 insertions, 6163 deletions
diff --git a/src/mesa/Makefile.am b/src/mesa/Makefile.am index 4ba5b2fac29..eb4a3da3c84 100644 --- a/src/mesa/Makefile.am +++ b/src/mesa/Makefile.am @@ -19,8 +19,6 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. -AUTOMAKE_OPTIONS = subdir-objects - SUBDIRS = . main/tests if HAVE_X11_DRIVER diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources index 83f500fbf20..ed9848c5454 100644 --- a/src/mesa/Makefile.sources +++ b/src/mesa/Makefile.sources @@ -407,6 +407,7 @@ STATETRACKER_FILES = \ state_tracker/st_atom_shader.c \ state_tracker/st_atom_shader.h \ state_tracker/st_atom_stipple.c \ + state_tracker/st_atom_tess.c \ state_tracker/st_atom_texture.c \ state_tracker/st_atom_viewport.c \ state_tracker/st_cache.h \ diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 71c1a763912..6fe42b1775c 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -94,14 +94,14 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->QuerySamplesForFormat = _mesa_query_samples_for_format; driver->TexImage = _mesa_store_teximage; driver->TexSubImage = _mesa_store_texsubimage; - driver->GetTexImage = _mesa_meta_GetTexImage; + driver->GetTexSubImage = _mesa_meta_GetTexSubImage; driver->ClearTexSubImage = _mesa_meta_ClearTexSubImage; driver->CopyTexSubImage = _mesa_meta_CopyTexSubImage; driver->GenerateMipmap = _mesa_meta_GenerateMipmap; driver->TestProxyTexImage = _mesa_test_proxy_teximage; driver->CompressedTexImage = _mesa_store_compressed_teximage; driver->CompressedTexSubImage = _mesa_store_compressed_texsubimage; - driver->GetCompressedTexImage = _mesa_GetCompressedTexImage_sw; + driver->GetCompressedTexSubImage = _mesa_GetCompressedTexSubImage_sw; driver->BindTexture = NULL; driver->NewTextureObject = _mesa_new_texture_object; driver->DeleteTexture = _mesa_delete_texture_object; diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index 214a68a9129..bde544ef490 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -728,7 +728,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) save->DepthNear = ctx->ViewportArray[0].Near; save->DepthFar = ctx->ViewportArray[0].Far; /* set depth range to default */ - _mesa_DepthRange(0.0, 1.0); + _mesa_set_depth_range(ctx, 0, 0.0, 1.0); } if (state & MESA_META_CLAMP_FRAGMENT_COLOR) { @@ -945,6 +945,8 @@ _mesa_meta_end(struct gl_context *ctx) if (state & MESA_META_SHADER) { static const GLenum targets[] = { GL_VERTEX_SHADER, + GL_TESS_CONTROL_SHADER, + GL_TESS_EVALUATION_SHADER, GL_GEOMETRY_SHADER, GL_FRAGMENT_SHADER, }; @@ -1129,7 +1131,7 @@ _mesa_meta_end(struct gl_context *ctx) _mesa_set_viewport(ctx, 0, save->ViewportX, save->ViewportY, save->ViewportW, save->ViewportH); } - _mesa_DepthRange(save->DepthNear, save->DepthFar); + _mesa_set_depth_range(ctx, 0, save->DepthNear, save->DepthFar); } if (state & MESA_META_CLAMP_FRAGMENT_COLOR && @@ -2449,30 +2451,53 @@ _mesa_meta_Bitmap(struct gl_context *ctx, /** * Compute the texture coordinates for the four vertices of a quad for - * drawing a 2D texture image or slice of a cube/3D texture. + * drawing a 2D texture image or slice of a cube/3D texture. The offset + * and width, height specify a sub-region of the 2D image. + * * \param faceTarget GL_TEXTURE_1D/2D/3D or cube face name * \param slice slice of a 1D/2D array texture or 3D texture - * \param width width of the texture image - * \param height height of the texture image + * \param xoffset X position of sub texture + * \param yoffset Y position of sub texture + * \param width width of the sub texture image + * \param height height of the sub texture image + * \param total_width total width of the texture image + * \param total_height total height of the texture image + * \param total_depth total depth of the texture image * \param coords0/1/2/3 returns the computed texcoords */ void _mesa_meta_setup_texture_coords(GLenum faceTarget, GLint slice, + GLint xoffset, + GLint yoffset, GLint width, GLint height, - GLint depth, + GLint total_width, + GLint total_height, + GLint total_depth, GLfloat coords0[4], GLfloat coords1[4], GLfloat coords2[4], GLfloat coords3[4]) { - static const GLfloat st[4][2] = { - {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f} - }; + float st[4][2]; GLuint i; + const float s0 = (float) xoffset / (float) total_width; + const float s1 = (float) (xoffset + width) / (float) total_width; + const float t0 = (float) yoffset / (float) total_height; + const float t1 = (float) (yoffset + height) / (float) total_height; GLfloat r; + /* setup the reference texcoords */ + st[0][0] = s0; + st[0][1] = t0; + st[1][0] = s1; + st[1][1] = t0; + st[2][0] = s1; + st[2][1] = t1; + st[3][0] = s0; + st[3][1] = t1; + if (faceTarget == GL_TEXTURE_CUBE_MAP_ARRAY) faceTarget = GL_TEXTURE_CUBE_MAP_POSITIVE_X + slice % 6; @@ -2489,52 +2514,52 @@ _mesa_meta_setup_texture_coords(GLenum faceTarget, case GL_TEXTURE_3D: case GL_TEXTURE_2D_ARRAY: if (faceTarget == GL_TEXTURE_3D) { - assert(slice < depth); - assert(depth >= 1); - r = (slice + 0.5f) / depth; + assert(slice < total_depth); + assert(total_depth >= 1); + r = (slice + 0.5f) / total_depth; } else if (faceTarget == GL_TEXTURE_2D_ARRAY) r = (float) slice; else r = 0.0F; - coords0[0] = 0.0F; /* s */ - coords0[1] = 0.0F; /* t */ + coords0[0] = st[0][0]; /* s */ + coords0[1] = st[0][1]; /* t */ coords0[2] = r; /* r */ - coords1[0] = 1.0F; - coords1[1] = 0.0F; + coords1[0] = st[1][0]; + coords1[1] = st[1][1]; coords1[2] = r; - coords2[0] = 1.0F; - coords2[1] = 1.0F; + coords2[0] = st[2][0]; + coords2[1] = st[2][1]; coords2[2] = r; - coords3[0] = 0.0F; - coords3[1] = 1.0F; + coords3[0] = st[3][0]; + coords3[1] = st[3][1]; coords3[2] = r; break; case GL_TEXTURE_RECTANGLE_ARB: - coords0[0] = 0.0F; /* s */ - coords0[1] = 0.0F; /* t */ + coords0[0] = (float) xoffset; /* s */ + coords0[1] = (float) yoffset; /* t */ coords0[2] = 0.0F; /* r */ - coords1[0] = (float) width; - coords1[1] = 0.0F; + coords1[0] = (float) (xoffset + width); + coords1[1] = (float) yoffset; coords1[2] = 0.0F; - coords2[0] = (float) width; - coords2[1] = (float) height; + coords2[0] = (float) (xoffset + width); + coords2[1] = (float) (yoffset + height); coords2[2] = 0.0F; - coords3[0] = 0.0F; - coords3[1] = (float) height; + coords3[0] = (float) xoffset; + coords3[1] = (float) (yoffset + height); coords3[2] = 0.0F; break; case GL_TEXTURE_1D_ARRAY: - coords0[0] = 0.0F; /* s */ + coords0[0] = st[0][0]; /* s */ coords0[1] = (float) slice; /* t */ coords0[2] = 0.0F; /* r */ - coords1[0] = 1.0f; + coords1[0] = st[1][0]; coords1[1] = (float) slice; coords1[2] = 0.0F; - coords2[0] = 1.0F; + coords2[0] = st[2][0]; coords2[1] = (float) slice; coords2[2] = 0.0F; - coords3[0] = 0.0F; + coords3[0] = st[3][0]; coords3[1] = (float) slice; coords3[2] = 0.0F; break; @@ -2943,15 +2968,14 @@ static bool decompress_texture_image(struct gl_context *ctx, struct gl_texture_image *texImage, GLuint slice, + GLint xoffset, GLint yoffset, + GLsizei width, GLsizei height, GLenum destFormat, GLenum destType, GLvoid *dest) { struct decompress_state *decompress = &ctx->Meta->Decompress; struct decompress_fbo_state *decompress_fbo; struct gl_texture_object *texObj = texImage->TexObject; - const GLint width = texImage->Width; - const GLint height = texImage->Height; - const GLint depth = texImage->Height; const GLenum target = texObj->Target; GLenum rbFormat; GLenum faceTarget; @@ -3069,7 +3093,10 @@ decompress_texture_image(struct gl_context *ctx, /* Silence valgrind warnings about reading uninitialized stack. */ memset(verts, 0, sizeof(verts)); - _mesa_meta_setup_texture_coords(faceTarget, slice, width, height, depth, + _mesa_meta_setup_texture_coords(faceTarget, slice, + xoffset, yoffset, width, height, + texImage->Width, texImage->Height, + texImage->Depth, verts[0].tex, verts[1].tex, verts[2].tex, @@ -3123,7 +3150,7 @@ decompress_texture_image(struct gl_context *ctx, /* read pixels from renderbuffer */ { GLenum baseTexFormat = texImage->_BaseFormat; - GLenum destBaseFormat = _mesa_base_tex_format(ctx, destFormat); + GLenum destBaseFormat = _mesa_unpack_format_to_base_format(destFormat); /* The pixel transfer state will be set to default values at this point * (see MESA_META_PIXEL_TRANSFER) so pixel transfer ops are effectively @@ -3132,19 +3159,13 @@ decompress_texture_image(struct gl_context *ctx, * returned as red and two-channel texture values are returned as * red/alpha. */ - if ((baseTexFormat == GL_LUMINANCE || - baseTexFormat == GL_LUMINANCE_ALPHA || - baseTexFormat == GL_INTENSITY) || + if (_mesa_need_luminance_to_rgb_conversion(baseTexFormat, + destBaseFormat) || /* If we're reading back an RGB(A) texture (using glGetTexImage) as * luminance then we need to return L=tex(R). */ - ((baseTexFormat == GL_RGBA || - baseTexFormat == GL_RGB || - baseTexFormat == GL_RG) && - (destBaseFormat == GL_LUMINANCE || - destBaseFormat == GL_LUMINANCE_ALPHA || - destBaseFormat == GL_LUMINANCE_INTEGER_EXT || - destBaseFormat == GL_LUMINANCE_ALPHA_INTEGER_EXT))) { + _mesa_need_rgb_to_luminance_conversion(baseTexFormat, + destBaseFormat)) { /* Green and blue must be zero */ _mesa_PixelTransferf(GL_GREEN_SCALE, 0.0f); _mesa_PixelTransferf(GL_BLUE_SCALE, 0.0f); @@ -3171,15 +3192,17 @@ decompress_texture_image(struct gl_context *ctx, * from core Mesa. */ void -_mesa_meta_GetTexImage(struct gl_context *ctx, - GLenum format, GLenum type, GLvoid *pixels, - struct gl_texture_image *texImage) +_mesa_meta_GetTexSubImage(struct gl_context *ctx, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, GLvoid *pixels, + struct gl_texture_image *texImage) { if (_mesa_is_format_compressed(texImage->TexFormat)) { GLuint slice; bool result = true; - for (slice = 0; slice < texImage->Depth; slice++) { + for (slice = 0; slice < depth; slice++) { void *dst; if (texImage->TexObject->Target == GL_TEXTURE_2D_ARRAY || texImage->TexObject->Target == GL_TEXTURE_CUBE_MAP_ARRAY) { @@ -3191,14 +3214,14 @@ _mesa_meta_GetTexImage(struct gl_context *ctx, struct gl_pixelstore_attrib packing = ctx->Pack; packing.SkipPixels = 0; packing.SkipRows = 0; - dst = _mesa_image_address3d(&packing, pixels, texImage->Width, - texImage->Height, format, type, - slice, 0, 0); + dst = _mesa_image_address3d(&packing, pixels, width, height, + format, type, slice, 0, 0); } else { dst = pixels; } result = decompress_texture_image(ctx, texImage, slice, + xoffset, yoffset, width, height, format, type, dst); if (!result) break; @@ -3208,7 +3231,8 @@ _mesa_meta_GetTexImage(struct gl_context *ctx, return; } - _mesa_GetTexImage_sw(ctx, format, type, pixels, texImage); + _mesa_GetTexSubImage_sw(ctx, xoffset, yoffset, zoffset, + width, height, depth, format, type, pixels, texImage); } diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h index e7d894df1d7..fe439153aa0 100644 --- a/src/mesa/drivers/common/meta.h +++ b/src/mesa/drivers/common/meta.h @@ -560,9 +560,11 @@ _mesa_meta_ClearTexSubImage(struct gl_context *ctx, const GLvoid *clearValue); extern void -_mesa_meta_GetTexImage(struct gl_context *ctx, - GLenum format, GLenum type, GLvoid *pixels, - struct gl_texture_image *texImage); +_mesa_meta_GetTexSubImage(struct gl_context *ctx, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, GLvoid *pixels, + struct gl_texture_image *texImage); extern void _mesa_meta_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, @@ -594,9 +596,13 @@ _mesa_meta_alloc_texture(struct temp_texture *tex, void _mesa_meta_setup_texture_coords(GLenum faceTarget, GLint slice, + GLint xoffset, + GLint yoffset, GLint width, GLint height, - GLint depth, + GLint total_width, + GLint total_height, + GLint total_depth, GLfloat coords0[4], GLfloat coords1[4], GLfloat coords2[4], diff --git a/src/mesa/drivers/common/meta_blit.c b/src/mesa/drivers/common/meta_blit.c index 9cace2b245a..71d18de87db 100644 --- a/src/mesa/drivers/common/meta_blit.c +++ b/src/mesa/drivers/common/meta_blit.c @@ -82,7 +82,7 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx, y_scale = samples * 0.5; /* We expect only power of 2 samples in source multisample buffer. */ - assert(samples > 0 && is_power_of_two(samples)); + assert(samples > 0 && _mesa_is_pow_two(samples)); while (samples >> (shader_offset + 1)) { shader_offset++; } @@ -263,7 +263,7 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx, } /* We expect only power of 2 samples in source multisample buffer. */ - assert(samples > 0 && is_power_of_two(samples)); + assert(samples > 0 && _mesa_is_pow_two(samples)); while (samples >> (shader_offset + 1)) { shader_offset++; } @@ -312,7 +312,7 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx, break; default: _mesa_problem(ctx, "Unkown texture target %s\n", - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); shader_index = BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_RESOLVE; } @@ -434,7 +434,7 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx, * (so the floating point exponent just gets increased), rather than * doing a naive sum and dividing. */ - assert(is_power_of_two(samples)); + assert(_mesa_is_pow_two(samples)); /* Fetch each individual sample. */ sample_resolve = rzalloc_size(mem_ctx, 1); for (i = 0; i < samples; i++) { diff --git a/src/mesa/drivers/common/meta_copy_image.c b/src/mesa/drivers/common/meta_copy_image.c index 1729766f78d..149ed18503c 100644 --- a/src/mesa/drivers/common/meta_copy_image.c +++ b/src/mesa/drivers/common/meta_copy_image.c @@ -138,8 +138,8 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx, goto cleanup; } - /* We really only need to stash the bound framebuffers. */ - _mesa_meta_begin(ctx, 0); + /* We really only need to stash the bound framebuffers and scissor. */ + _mesa_meta_begin(ctx, MESA_META_SCISSOR); _mesa_GenFramebuffers(2, fbos); _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, fbos[0]); diff --git a/src/mesa/drivers/common/meta_generate_mipmap.c b/src/mesa/drivers/common/meta_generate_mipmap.c index c1b6d3c1f86..0655f052219 100644 --- a/src/mesa/drivers/common/meta_generate_mipmap.c +++ b/src/mesa/drivers/common/meta_generate_mipmap.c @@ -66,7 +66,7 @@ fallback_required(struct gl_context *ctx, GLenum target, if (target == GL_TEXTURE_3D) { _mesa_perf_debug(ctx, MESA_DEBUG_SEVERITY_HIGH, "glGenerateMipmap() to %s target\n", - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); return true; } @@ -317,7 +317,9 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target, /* Setup texture coordinates */ _mesa_meta_setup_texture_coords(faceTarget, layer, - 0, 0, 1, /* width, height never used here */ + 0, 0, /* xoffset, yoffset */ + srcWidth, srcHeight, /* img size */ + srcWidth, srcHeight, srcDepth, verts[0].tex, verts[1].tex, verts[2].tex, diff --git a/src/mesa/drivers/common/meta_tex_subimage.c b/src/mesa/drivers/common/meta_tex_subimage.c index d2474f52718..16d8f5d4747 100644 --- a/src/mesa/drivers/common/meta_tex_subimage.c +++ b/src/mesa/drivers/common/meta_tex_subimage.c @@ -25,8 +25,10 @@ * Jason Ekstrand <[email protected]> */ +#include "blend.h" #include "bufferobj.h" #include "buffers.h" +#include "clear.h" #include "fbobject.h" #include "glformats.h" #include "glheader.h" @@ -248,6 +250,24 @@ fail: return success; } +static bool +need_signed_unsigned_int_conversion(mesa_format rbFormat, + GLenum format, GLenum type) +{ + const GLenum srcType = _mesa_get_format_datatype(rbFormat); + const bool is_dst_format_integer = _mesa_is_enum_format_integer(format); + return (srcType == GL_INT && + is_dst_format_integer && + (type == GL_UNSIGNED_INT || + type == GL_UNSIGNED_SHORT || + type == GL_UNSIGNED_BYTE)) || + (srcType == GL_UNSIGNED_INT && + is_dst_format_integer && + (type == GL_INT || + type == GL_SHORT || + type == GL_BYTE)); +} + bool _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims, struct gl_texture_image *tex_image, @@ -260,8 +280,10 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims, int full_height, image_height; struct gl_texture_image *pbo_tex_image; struct gl_renderbuffer *rb = NULL; - GLenum status; - bool success = false; + GLenum dstBaseFormat = _mesa_unpack_format_to_base_format(format); + GLenum status, src_base_format; + bool success = false, clear_channels_to_zero = false; + float save_clear_color[4]; int z; if (!_mesa_is_bufferobj(packing->BufferObj)) @@ -273,13 +295,27 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims, format == GL_COLOR_INDEX) return false; - if (ctx->_ImageTransferState) - return false; - - + /* Don't use meta path for readpixels in below conditions. */ if (!tex_image) { rb = ctx->ReadBuffer->_ColorReadBuffer; - if (_mesa_need_rgb_to_luminance_conversion(rb->Format, format)) + + /* _mesa_get_readpixels_transfer_ops() includes the cases of read + * color clamping along with the ctx->_ImageTransferState. + */ + if (_mesa_get_readpixels_transfer_ops(ctx, rb->Format, format, + type, GL_FALSE)) + return false; + + if (_mesa_need_rgb_to_luminance_conversion(rb->_BaseFormat, + dstBaseFormat)) + return false; + + /* This function rely on BlitFramebuffer to fill in the pixel data for + * ReadPixels. But, BlitFrameBuffer doesn't support signed to unsigned + * or unsigned to signed integer conversions. OpenGL spec expects an + * invalid operation in that case. + */ + if (need_signed_unsigned_int_conversion(rb->Format, format, type)) return false; } @@ -300,6 +336,10 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims, _mesa_meta_begin(ctx, ~(MESA_META_PIXEL_TRANSFER | MESA_META_PIXEL_STORE)); + /* GL_CLAMP_FRAGMENT_COLOR doesn't affect ReadPixels and GettexImage */ + if (ctx->Extensions.ARB_color_buffer_float) + _mesa_ClampColor(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE); + _mesa_GenFramebuffers(2, fbos); if (tex_image && tex_image->TexObject->Target == GL_TEXTURE_1D_ARRAY) { @@ -345,6 +385,27 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims, GL_COLOR_BUFFER_BIT, GL_NEAREST)) goto fail; + src_base_format = tex_image ? + tex_image->_BaseFormat : + ctx->ReadBuffer->_ColorReadBuffer->_BaseFormat; + + /* Depending on the base formats involved we might need to rebase some + * values. For example if we download from a Luminance format to RGBA + * format, we want G=0 and B=0. + */ + clear_channels_to_zero = + _mesa_need_luminance_to_rgb_conversion(src_base_format, + pbo_tex_image->_BaseFormat); + + if (clear_channels_to_zero) { + memcpy(save_clear_color, ctx->Color.ClearColor.f, 4 * sizeof(float)); + /* Clear the Green, Blue channels. */ + _mesa_ColorMask(GL_FALSE, GL_TRUE, GL_TRUE, + src_base_format != GL_LUMINANCE_ALPHA); + _mesa_ClearColor(0.0, 0.0, 0.0, 1.0); + _mesa_Clear(GL_COLOR_BUFFER_BIT); + } + for (z = 1; z < depth; z++) { _mesa_meta_bind_fbo_image(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, tex_image, zoffset + z); @@ -357,6 +418,15 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims, 0, z * image_height, width, z * image_height + height, GL_COLOR_BUFFER_BIT, GL_NEAREST); + if (clear_channels_to_zero) + _mesa_Clear(GL_COLOR_BUFFER_BIT); + } + + /* Unmask the color channels and restore the saved clear color values. */ + if (clear_channels_to_zero) { + _mesa_ColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + _mesa_ClearColor(save_clear_color[0], save_clear_color[1], + save_clear_color[2], save_clear_color[3]); } success = true; diff --git a/src/mesa/drivers/dri/common/Android.mk b/src/mesa/drivers/dri/common/Android.mk index 6986f5e8cb4..f1a733011b9 100644 --- a/src/mesa/drivers/dri/common/Android.mk +++ b/src/mesa/drivers/dri/common/Android.mk @@ -43,13 +43,6 @@ LOCAL_EXPORT_C_INCLUDE_DIRS := \ $(LOCAL_PATH) \ $(intermediates) -# swrast only -ifeq ($(MESA_GPU_DRIVERS),swrast) -LOCAL_CFLAGS := -D__NOT_HAVE_DRM_H -else -LOCAL_SHARED_LIBRARIES := libdrm -endif - LOCAL_SRC_FILES := \ $(DRI_COMMON_FILES) \ $(XMLCONFIG_FILES) @@ -110,13 +103,6 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES LOCAL_C_INCLUDES := \ $(MESA_DRI_C_INCLUDES) -# swrast only -ifeq ($(MESA_GPU_DRIVERS),swrast) -LOCAL_CFLAGS := -D__NOT_HAVE_DRM_H -else -LOCAL_SHARED_LIBRARIES := libdrm -endif - LOCAL_SRC_FILES := $(megadriver_stub_FILES) include $(MESA_COMMON_MK) diff --git a/src/mesa/drivers/dri/common/Makefile.am b/src/mesa/drivers/dri/common/Makefile.am index ae19fcb3565..b307f10f56b 100644 --- a/src/mesa/drivers/dri/common/Makefile.am +++ b/src/mesa/drivers/dri/common/Makefile.am @@ -32,6 +32,7 @@ AM_CFLAGS = \ -I$(top_srcdir)/src/mesa/ \ -I$(top_srcdir)/src/gallium/include \ -I$(top_srcdir)/src/gallium/auxiliary \ + $(LIBDRM_CFLAGS) \ $(DEFINES) \ $(VISIBILITY_CFLAGS) @@ -53,10 +54,3 @@ libdri_test_stubs_la_CFLAGS = $(AM_CFLAGS) -DNO_MAIN libmegadriver_stub_la_SOURCES = $(megadriver_stub_FILES) sysconf_DATA = drirc - -if DRICOMMON_NEED_LIBDRM -AM_CFLAGS += $(LIBDRM_CFLAGS) -libdricommon_la_LIBADD = $(LIBDRM_LIBS) -else -AM_CFLAGS += -D__NOT_HAVE_DRM_H -endif diff --git a/src/mesa/drivers/dri/common/SConscript b/src/mesa/drivers/dri/common/SConscript index b402736db69..52d201f8913 100644 --- a/src/mesa/drivers/dri/common/SConscript +++ b/src/mesa/drivers/dri/common/SConscript @@ -32,11 +32,6 @@ drienv.AppendUnique(LIBS = [ 'expat', ]) -# if HAVE_DRI2 -drienv.PkgUseModules('DRM') -# else -#env.Append(CPPDEFINES = ['__NOT_HAVE_DRM_H']) - sources = drienv.ParseSourceList('Makefile.sources', ['DRI_COMMON_FILES', 'XMLCONFIG_FILES' ]) dri_common = drienv.ConvenienceLibrary( @@ -57,7 +52,6 @@ env.Append(CPPPATH = [ ]) env.Append(CPPDEFINES = [ - '__NOT_HAVE_DRM_H', 'HAVE_DLADDR', ]) diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index e7ababe0b67..d35ac263a45 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -40,13 +40,9 @@ #include <stdbool.h> -#ifndef __NOT_HAVE_DRM_H -#include <xf86drm.h> -#endif #include "dri_util.h" #include "utils.h" #include "xmlpool.h" -#include "../glsl/glsl_parser_extras.h" #include "main/mtypes.h" #include "main/version.h" #include "main/errors.h" @@ -138,18 +134,6 @@ driCreateNewScreen2(int scrn, int fd, setupLoaderExtensions(psp, extensions); -#ifndef __NOT_HAVE_DRM_H - if (fd != -1) { - drmVersionPtr version = drmGetVersion(fd); - if (version) { - psp->drm_version.major = version->version_major; - psp->drm_version.minor = version->version_minor; - psp->drm_version.patch = version->version_patchlevel; - drmFreeVersion(version); - } - } -#endif - psp->loaderPrivate = data; psp->extensions = emptyExtensionList; @@ -179,7 +163,9 @@ driCreateNewScreen2(int scrn, int fd, } } - psp->api_mask = (1 << __DRI_API_OPENGL); + psp->api_mask = 0; + if (psp->max_gl_compat_version > 0) + psp->api_mask |= (1 << __DRI_API_OPENGL); if (psp->max_gl_core_version > 0) psp->api_mask |= (1 << __DRI_API_OPENGL_CORE); if (psp->max_gl_es1_version > 0) @@ -238,8 +224,6 @@ static void driDestroyScreen(__DRIscreen *psp) * stream open to the X-server anymore. */ - _mesa_destroy_shader_compiler(); - psp->driver->DestroyScreen(psp); driDestroyOptionCache(&psp->optionCache); diff --git a/src/mesa/drivers/dri/common/dri_util.h b/src/mesa/drivers/dri/common/dri_util.h index 1138bf106de..6987f555e66 100644 --- a/src/mesa/drivers/dri/common/dri_util.h +++ b/src/mesa/drivers/dri/common/dri_util.h @@ -149,11 +149,6 @@ struct __DRIscreenRec { int fd; /** - * DRM (kernel module) version information. - */ - __DRIversion drm_version; - - /** * Device-dependent private information (not stored in the SAREA). * * This pointer is never touched by the DRI layer. diff --git a/src/mesa/drivers/dri/common/drirc b/src/mesa/drivers/dri/common/drirc index 145e707a64c..97d961b6597 100644 --- a/src/mesa/drivers/dri/common/drirc +++ b/src/mesa/drivers/dri/common/drirc @@ -4,24 +4,15 @@ Application bugs worked around in this file: ============================================ +* Unigine Heaven 3.0 and older contain too many bugs and can't be supported + by drivers that want to be compliant. + * Various Unigine products don't use the #version and #extension GLSL directives, meaning they only get GLSL 1.10 and no extensions for their shaders. Enabling all extensions for Unigine fixes most issues, but the GLSL version is still 1.10. -* Unigine Heaven 3.0 with ARB_texture_multisample uses a "ivec4 * vec4" - expression, which is illegal in GLSL 1.10. - Adding "#version 130" fixes this. - -* Unigine Heaven 3.0 with ARB_shader_bit_encoding uses the uint keyword, which - is illegal in GLSL 1.10. - Adding "#version 130" fixes this. - -* Unigine Heaven 3.0 with ARB_shader_bit_encoding uses a "uint & int" - expression, which is illegal in any GLSL version. - Disabling ARB_shader_bit_encoding fixes this. - * If ARB_sample_shading is supported, Unigine Heaven 4.0 and Valley 1.0 uses an #extension directive in the middle of its shaders, which is illegal in GLSL. @@ -45,18 +36,10 @@ TODO: document the other workarounds. </application> <application name="Unigine Heaven (32-bit)" executable="heaven_x86"> - <option name="force_glsl_extensions_warn" value="true" /> - <option name="disable_blend_func_extended" value="true" /> - <option name="force_glsl_version" value="130" /> - <option name="disable_shader_bit_encoding" value="true" /> <option name="allow_glsl_extension_directive_midshader" value="true" /> </application> <application name="Unigine Heaven (64-bit)" executable="heaven_x64"> - <option name="force_glsl_extensions_warn" value="true" /> - <option name="disable_blend_func_extended" value="true" /> - <option name="force_glsl_version" value="130" /> - <option name="disable_shader_bit_encoding" value="true" /> <option name="allow_glsl_extension_directive_midshader" value="true" /> </application> diff --git a/src/mesa/drivers/dri/common/utils.c b/src/mesa/drivers/dri/common/utils.c index 70d34e8ce55..b51b263fe46 100644 --- a/src/mesa/drivers/dri/common/utils.c +++ b/src/mesa/drivers/dri/common/utils.c @@ -213,6 +213,7 @@ driCreateConfigs(mesa_format format, masks = masks_table[0]; break; case MESA_FORMAT_B8G8R8X8_UNORM: + case MESA_FORMAT_B8G8R8X8_SRGB: masks = masks_table[1]; break; case MESA_FORMAT_B8G8R8A8_UNORM: diff --git a/src/mesa/drivers/dri/i915/i830_state.c b/src/mesa/drivers/dri/i915/i830_state.c index ea54e2b25b1..906e942b020 100644 --- a/src/mesa/drivers/dri/i915/i830_state.c +++ b/src/mesa/drivers/dri/i915/i830_state.c @@ -57,7 +57,7 @@ i830StencilFuncSeparate(struct gl_context * ctx, GLenum face, GLenum func, GLint mask = mask & 0xff; DBG("%s : func: %s, ref : 0x%x, mask: 0x%x\n", __func__, - _mesa_lookup_enum_by_nr(func), ref, mask); + _mesa_enum_to_string(func), ref, mask); I830_STATECHANGE(i830, I830_UPLOAD_CTX); @@ -95,9 +95,9 @@ i830StencilOpSeparate(struct gl_context * ctx, GLenum face, GLenum fail, GLenum int fop, dfop, dpop; DBG("%s: fail : %s, zfail: %s, zpass : %s\n", __func__, - _mesa_lookup_enum_by_nr(fail), - _mesa_lookup_enum_by_nr(zfail), - _mesa_lookup_enum_by_nr(zpass)); + _mesa_enum_to_string(fail), + _mesa_enum_to_string(zfail), + _mesa_enum_to_string(zpass)); fop = 0; dfop = 0; @@ -389,8 +389,8 @@ static void i830BlendEquationSeparate(struct gl_context * ctx, GLenum modeRGB, GLenum modeA) { DBG("%s -> %s, %s\n", __func__, - _mesa_lookup_enum_by_nr(modeRGB), - _mesa_lookup_enum_by_nr(modeA)); + _mesa_enum_to_string(modeRGB), + _mesa_enum_to_string(modeA)); (void) modeRGB; (void) modeA; @@ -403,10 +403,10 @@ i830BlendFuncSeparate(struct gl_context * ctx, GLenum sfactorRGB, GLenum dfactorRGB, GLenum sfactorA, GLenum dfactorA) { DBG("%s -> RGB(%s, %s) A(%s, %s)\n", __func__, - _mesa_lookup_enum_by_nr(sfactorRGB), - _mesa_lookup_enum_by_nr(dfactorRGB), - _mesa_lookup_enum_by_nr(sfactorA), - _mesa_lookup_enum_by_nr(dfactorA)); + _mesa_enum_to_string(sfactorRGB), + _mesa_enum_to_string(dfactorRGB), + _mesa_enum_to_string(sfactorA), + _mesa_enum_to_string(dfactorA)); (void) sfactorRGB; (void) dfactorRGB; diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index 42ea54e087d..57b033c07ea 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -255,6 +255,8 @@ i915CreateContext(int api, * FINISHME: vertex shaders? */ ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitCondCodes = true; + ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectSampler = + true; struct gl_shader_compiler_options *const fs_options = & ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT]; @@ -266,6 +268,7 @@ i915CreateContext(int api, fs_options->EmitNoIndirectOutput = true; fs_options->EmitNoIndirectUniform = true; fs_options->EmitNoIndirectTemp = true; + fs_options->EmitNoIndirectSampler = true; ctx->Const.MaxDrawBuffers = 1; ctx->Const.QueryCounterBits.SamplesPassed = 0; diff --git a/src/mesa/drivers/dri/i915/i915_state.c b/src/mesa/drivers/dri/i915/i915_state.c index 5f10b840b1a..4c83073e692 100644 --- a/src/mesa/drivers/dri/i915/i915_state.c +++ b/src/mesa/drivers/dri/i915/i915_state.c @@ -402,7 +402,7 @@ void intelCalcViewport(struct gl_context * ctx) { struct intel_context *intel = intel_context(ctx); - double scale[3], translate[3]; + float scale[3], translate[3]; _mesa_get_viewport_xform(ctx, 0, scale, translate); diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c index aef5ff99eb2..f653f441ad8 100644 --- a/src/mesa/drivers/dri/i915/i915_texstate.c +++ b/src/mesa/drivers/dri/i915/i915_texstate.c @@ -342,7 +342,7 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) * Thus, I guess we need do this for other platforms as well. */ if (tObj->Target == GL_TEXTURE_CUBE_MAP_ARB && - !is_power_of_two(firstImage->Height)) + !_mesa_is_pow_two(firstImage->Height)) return false; state[I915_TEXREG_SS3] = ss3; /* SS3_NORMALIZED_COORDS */ diff --git a/src/mesa/drivers/dri/i915/intel_context.c b/src/mesa/drivers/dri/i915/intel_context.c index 5618dcd8358..c780103228f 100644 --- a/src/mesa/drivers/dri/i915/intel_context.c +++ b/src/mesa/drivers/dri/i915/intel_context.c @@ -428,7 +428,6 @@ intelInitContext(struct intel_context *intel, driContextPriv->driverPrivate = intel; intel->driContext = driContextPriv; - intel->driFd = sPriv->fd; intel->gen = intelScreen->gen; diff --git a/src/mesa/drivers/dri/i915/intel_context.h b/src/mesa/drivers/dri/i915/intel_context.h index 350d35d9033..4ec4015d453 100644 --- a/src/mesa/drivers/dri/i915/intel_context.h +++ b/src/mesa/drivers/dri/i915/intel_context.h @@ -273,8 +273,6 @@ struct intel_context bool use_early_z; - int driFd; - __DRIcontext *driContext; struct intel_screen *intelScreen; diff --git a/src/mesa/drivers/dri/i915/intel_fbo.c b/src/mesa/drivers/dri/i915/intel_fbo.c index a5d5c5832fb..67013666377 100644 --- a/src/mesa/drivers/dri/i915/intel_fbo.c +++ b/src/mesa/drivers/dri/i915/intel_fbo.c @@ -216,7 +216,7 @@ intel_alloc_private_renderbuffer_storage(struct gl_context * ctx, struct gl_rend intel_miptree_release(&irb->mt); DBG("%s: %s: %s (%dx%d)\n", __func__, - _mesa_lookup_enum_by_nr(internalFormat), + _mesa_enum_to_string(internalFormat), _mesa_get_format_name(rb->Format), width, height); if (width == 0 || height == 0) diff --git a/src/mesa/drivers/dri/i915/intel_mipmap_tree.c b/src/mesa/drivers/dri/i915/intel_mipmap_tree.c index e56b9859377..1aa06c18f15 100644 --- a/src/mesa/drivers/dri/i915/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i915/intel_mipmap_tree.c @@ -81,7 +81,7 @@ intel_miptree_create_layout(struct intel_context *intel, return NULL; DBG("%s target %s format %s level %d..%d <-- %p\n", __func__, - _mesa_lookup_enum_by_nr(target), + _mesa_enum_to_string(target), _mesa_get_format_name(format), first_level, last_level, mt); diff --git a/src/mesa/drivers/dri/i915/intel_render.c b/src/mesa/drivers/dri/i915/intel_render.c index 0b0d48e1663..5962dad7d11 100644 --- a/src/mesa/drivers/dri/i915/intel_render.c +++ b/src/mesa/drivers/dri/i915/intel_render.c @@ -113,7 +113,7 @@ static void intelDmaPrimitive(struct intel_context *intel, GLenum prim) { if (0) - fprintf(stderr, "%s %s\n", __func__, _mesa_lookup_enum_by_nr(prim)); + fprintf(stderr, "%s %s\n", __func__, _mesa_enum_to_string(prim)); INTEL_FIREVERTICES(intel); intel->vtbl.reduced_primitive_state(intel, reduced_prim[prim]); intel_set_prim(intel, hw_prim[prim]); diff --git a/src/mesa/drivers/dri/i915/intel_tex_image.c b/src/mesa/drivers/dri/i915/intel_tex_image.c index 01de966a134..0a213e9f614 100644 --- a/src/mesa/drivers/dri/i915/intel_tex_image.c +++ b/src/mesa/drivers/dri/i915/intel_tex_image.c @@ -189,7 +189,7 @@ intelTexImage(struct gl_context * ctx, const struct gl_pixelstore_attrib *unpack) { DBG("%s target %s level %d %dx%dx%d\n", __func__, - _mesa_lookup_enum_by_nr(texImage->TexObject->Target), + _mesa_enum_to_string(texImage->TexObject->Target), texImage->Level, texImage->Width, texImage->Height, texImage->Depth); /* Attempt to use the blitter for PBO image uploads. diff --git a/src/mesa/drivers/dri/i915/intel_tex_subimage.c b/src/mesa/drivers/dri/i915/intel_tex_subimage.c index 2e02d50f13f..f11ef2ea329 100644 --- a/src/mesa/drivers/dri/i915/intel_tex_subimage.c +++ b/src/mesa/drivers/dri/i915/intel_tex_subimage.c @@ -72,7 +72,7 @@ intel_blit_texsubimage(struct gl_context * ctx, DBG("BLT subimage %s target %s level %d offset %d,%d %dx%d\n", __func__, - _mesa_lookup_enum_by_nr(texImage->TexObject->Target), + _mesa_enum_to_string(texImage->TexObject->Target), texImage->Level, xoffset, yoffset, width, height); pixels = _mesa_validate_pbo_teximage(ctx, 2, width, height, 1, diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index 144f0fc911a..ae62a800fb7 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -1134,7 +1134,7 @@ intelRasterPrimitive(struct gl_context * ctx, GLenum rprim, GLuint hwprim) if (0) fprintf(stderr, "%s %s %x\n", __func__, - _mesa_lookup_enum_by_nr(rprim), hwprim); + _mesa_enum_to_string(rprim), hwprim); intel->vtbl.reduced_primitive_state(intel, rprim); @@ -1158,7 +1158,7 @@ intelRenderPrimitive(struct gl_context * ctx, GLenum prim) ctx->Polygon.BackMode != GL_FILL); if (0) - fprintf(stderr, "%s %s\n", __func__, _mesa_lookup_enum_by_nr(prim)); + fprintf(stderr, "%s %s\n", __func__, _mesa_enum_to_string(prim)); /* Let some clipping routines know which primitive they're dealing * with. diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 981fe79b132..dfdad75329d 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -60,6 +60,8 @@ i965_FILES = \ brw_fs_register_coalesce.cpp \ brw_fs_saturate_propagation.cpp \ brw_fs_sel_peephole.cpp \ + brw_fs_surface_builder.cpp \ + brw_fs_surface_builder.h \ brw_fs_vector_splitting.cpp \ brw_fs_visitor.cpp \ brw_gs.c \ @@ -86,6 +88,7 @@ i965_FILES = \ brw_object_purgeable.c \ brw_packed_float.c \ brw_performance_monitor.c \ + brw_pipe_control.c \ brw_primitive_restart.c \ brw_program.c \ brw_program.h \ @@ -122,6 +125,8 @@ i965_FILES = \ brw_vec4.h \ brw_vec4_live_variables.cpp \ brw_vec4_live_variables.h \ + brw_vec4_nir.cpp \ + brw_vec4_gs_nir.cpp \ brw_vec4_reg_allocate.cpp \ brw_vec4_visitor.cpp \ brw_vec4_vp.cpp \ diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c b/src/mesa/drivers/dri/i965/brw_binding_tables.c index 98ff0ddcd58..b188fc7de57 100644 --- a/src/mesa/drivers/dri/i965/brw_binding_tables.c +++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c @@ -44,6 +44,41 @@ #include "brw_state.h" #include "intel_batchbuffer.h" +static const GLuint stage_to_bt_edit[] = { + [MESA_SHADER_VERTEX] = _3DSTATE_BINDING_TABLE_EDIT_VS, + [MESA_SHADER_GEOMETRY] = _3DSTATE_BINDING_TABLE_EDIT_GS, + [MESA_SHADER_FRAGMENT] = _3DSTATE_BINDING_TABLE_EDIT_PS, +}; + +static uint32_t +reserve_hw_bt_space(struct brw_context *brw, unsigned bytes) +{ + /* From the Broadwell PRM, Volume 16, "Workarounds", + * WaStateBindingTableOverfetch: + * "HW over-fetches two cache lines of binding table indices. When + * using the resource streamer, SW needs to pad binding table pointer + * updates with an additional two cache lines." + * + * Cache lines are 64 bytes, so we subtract 128 bytes from the size of + * the binding table pool buffer. + */ + if (brw->hw_bt_pool.next_offset + bytes >= brw->hw_bt_pool.bo->size - 128) { + gen7_reset_hw_bt_pool_offsets(brw); + } + + uint32_t offset = brw->hw_bt_pool.next_offset; + + /* From the Haswell PRM, Volume 2b: Command Reference: Instructions, + * 3DSTATE_BINDING_TABLE_POINTERS_xS: + * + * "If HW Binding Table is enabled, the offset is relative to the + * Binding Table Pool Base Address and the alignment is 64 bytes." + */ + brw->hw_bt_pool.next_offset += ALIGN(bytes, 64); + + return offset; +} + /** * Upload a shader stage's binding table as indirect state. * @@ -72,22 +107,41 @@ brw_upload_binding_table(struct brw_context *brw, brw->shader_time.bo, 0, BRW_SURFACEFORMAT_RAW, brw->shader_time.bo->size, 1, true); } + /* When RS is enabled use hw-binding table uploads, otherwise fallback to + * software-uploads. + */ + if (brw->use_resource_streamer) { + gen7_update_binding_table_from_array(brw, stage_state->stage, + stage_state->surf_offset, + prog_data->binding_table + .size_bytes / 4); + } else { + uint32_t *bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE, + prog_data->binding_table.size_bytes, + 32, + &stage_state->bind_bo_offset); - uint32_t *bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE, - prog_data->binding_table.size_bytes, 32, - &stage_state->bind_bo_offset); - - /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */ - memcpy(bind, stage_state->surf_offset, - prog_data->binding_table.size_bytes); + /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */ + memcpy(bind, stage_state->surf_offset, + prog_data->binding_table.size_bytes); + } } brw->ctx.NewDriverState |= brw_new_binding_table; if (brw->gen >= 7) { + if (brw->use_resource_streamer) { + stage_state->bind_bo_offset = + reserve_hw_bt_space(brw, prog_data->binding_table.size_bytes); + } BEGIN_BATCH(2); OUT_BATCH(packet_name << 16 | (2 - 2)); - OUT_BATCH(stage_state->bind_bo_offset); + /* Align SurfaceStateOffset[16:6] format to [15:5] PS Binding Table field + * when hw-generated binding table is enabled. + */ + OUT_BATCH(brw->use_resource_streamer ? + (stage_state->bind_bo_offset >> 1) : + stage_state->bind_bo_offset); ADVANCE_BATCH(); } } @@ -170,6 +224,158 @@ const struct brw_tracked_state brw_gs_binding_table = { .emit = brw_gs_upload_binding_table, }; +/** + * Edit a single entry in a hardware-generated binding table + */ +void +gen7_edit_hw_binding_table_entry(struct brw_context *brw, + gl_shader_stage stage, + uint32_t index, + uint32_t surf_offset) +{ + assert(stage < ARRAY_SIZE(stage_to_bt_edit)); + assert(stage_to_bt_edit[stage]); + + uint32_t dw2 = SET_FIELD(index, BRW_BINDING_TABLE_INDEX) | + (brw->gen >= 8 ? GEN8_SURFACE_STATE_EDIT(surf_offset) : + HSW_SURFACE_STATE_EDIT(surf_offset)); + + BEGIN_BATCH(3); + OUT_BATCH(stage_to_bt_edit[stage] << 16 | (3 - 2)); + OUT_BATCH(BRW_BINDING_TABLE_EDIT_TARGET_ALL); + OUT_BATCH(dw2); + ADVANCE_BATCH(); +} + +/** + * Upload a whole hardware binding table for the given stage. + * + * Takes an array of surface offsets and the number of binding table + * entries. + */ +void +gen7_update_binding_table_from_array(struct brw_context *brw, + gl_shader_stage stage, + const uint32_t* binding_table, + int num_surfaces) +{ + uint32_t dw2 = 0; + + assert(stage < ARRAY_SIZE(stage_to_bt_edit)); + assert(stage_to_bt_edit[stage]); + + BEGIN_BATCH(num_surfaces + 2); + OUT_BATCH(stage_to_bt_edit[stage] << 16 | num_surfaces); + OUT_BATCH(BRW_BINDING_TABLE_EDIT_TARGET_ALL); + for (int i = 0; i < num_surfaces; i++) { + dw2 = SET_FIELD(i, BRW_BINDING_TABLE_INDEX) | + (brw->gen >= 8 ? GEN8_SURFACE_STATE_EDIT(binding_table[i]) : + HSW_SURFACE_STATE_EDIT(binding_table[i])); + OUT_BATCH(dw2); + } + ADVANCE_BATCH(); +} + +/** + * Disable hardware binding table support, falling back to the + * older software-generated binding table mechanism. + */ +void +gen7_disable_hw_binding_tables(struct brw_context *brw) +{ + if (!brw->use_resource_streamer) + return; + /* From the Haswell PRM, Volume 7: 3D Media GPGPU, + * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note: + * + * "When switching between HW and SW binding table generation, SW must + * issue a state cache invalidate." + */ + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE); + + int pkt_len = brw->gen >= 8 ? 4 : 3; + + BEGIN_BATCH(pkt_len); + OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2)); + if (brw->gen >= 8) { + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + } else { + OUT_BATCH(HSW_BT_POOL_ALLOC_MUST_BE_ONE); + OUT_BATCH(0); + } + ADVANCE_BATCH(); +} + +/** + * Enable hardware binding tables and set up the binding table pool. + */ +void +gen7_enable_hw_binding_tables(struct brw_context *brw) +{ + if (!brw->use_resource_streamer) + return; + + if (!brw->hw_bt_pool.bo) { + /* We use a single re-usable buffer object for the lifetime of the + * context and size it to maximum allowed binding tables that can be + * programmed per batch: + * + * From the Haswell PRM, Volume 7: 3D Media GPGPU, + * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note: + * "A maximum of 16,383 Binding tables are allowed in any batch buffer" + */ + static const int max_size = 16383 * 4; + brw->hw_bt_pool.bo = drm_intel_bo_alloc(brw->bufmgr, "hw_bt", + max_size, 64); + brw->hw_bt_pool.next_offset = 0; + } + + /* From the Haswell PRM, Volume 7: 3D Media GPGPU, + * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note: + * + * "When switching between HW and SW binding table generation, SW must + * issue a state cache invalidate." + */ + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE); + + int pkt_len = brw->gen >= 8 ? 4 : 3; + uint32_t dw1 = BRW_HW_BINDING_TABLE_ENABLE; + if (brw->is_haswell) { + dw1 |= SET_FIELD(GEN7_MOCS_L3, GEN7_HW_BT_POOL_MOCS) | + HSW_BT_POOL_ALLOC_MUST_BE_ONE; + } else if (brw->gen >= 8) { + dw1 |= BDW_MOCS_WB; + } + + BEGIN_BATCH(pkt_len); + OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2)); + if (brw->gen >= 8) { + OUT_RELOC64(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1); + OUT_BATCH(brw->hw_bt_pool.bo->size); + } else { + OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1); + OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, + brw->hw_bt_pool.bo->size); + } + ADVANCE_BATCH(); +} + +void +gen7_reset_hw_bt_pool_offsets(struct brw_context *brw) +{ + brw->hw_bt_pool.next_offset = 0; +} + +const struct brw_tracked_state gen7_hw_binding_tables = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + }, + .emit = gen7_enable_hw_binding_tables +}; + /** @} */ /** diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp index b404869f0c7..eac1f005496 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp @@ -220,13 +220,13 @@ brw_blorp_exec(struct brw_context *brw, const brw_blorp_params *params) * data with different formats, which blorp does for stencil and depth * data. */ - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); retry: intel_batchbuffer_require_space(brw, estimated_max_batch_usage, RENDER_RING); intel_batchbuffer_save_state(brw); drm_intel_bo *saved_bo = brw->batch.bo; - uint32_t saved_used = brw->batch.used; + uint32_t saved_used = USED_BATCH(brw->batch); uint32_t saved_state_batch_offset = brw->batch.state_batch_offset; switch (brw->gen) { @@ -245,7 +245,7 @@ retry: * reserved enough space that a wrap will never happen. */ assert(brw->batch.bo == saved_bo); - assert((brw->batch.used - saved_used) * 4 + + assert((USED_BATCH(brw->batch) - saved_used) * 4 + (saved_state_batch_offset - brw->batch.state_batch_offset) < estimated_max_batch_usage); /* Shut up compiler warnings on release build */ @@ -283,7 +283,7 @@ retry: /* Flush the sampler cache so any texturing from the destination is * coherent. */ - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); } brw_hiz_op_params::brw_hiz_op_params(struct intel_mipmap_tree *mt, diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index 1561b593969..205c905b447 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -1285,8 +1285,8 @@ brw_blorp_blit_program::translate_dst_to_src() /* Round the float coordinates down to nearest integer */ emit_rndd(Xp_f, X_f); emit_rndd(Yp_f, Y_f); - emit_mul(X_f, Xp_f, brw_imm_f(1 / key->x_scale)); - emit_mul(Y_f, Yp_f, brw_imm_f(1 / key->y_scale)); + emit_mul(X_f, Xp_f, brw_imm_f(1.0f / key->x_scale)); + emit_mul(Y_f, Yp_f, brw_imm_f(1.0f / key->y_scale)); SWAP_XY_AND_XPYP(); } else if (!key->bilinear_filter) { /* Round the float coordinates down to nearest integer by moving to @@ -1442,7 +1442,7 @@ brw_blorp_blit_program::manual_blend_average(unsigned num_samples) for (int j = 0; j < 4; ++j) { emit_mul(offset(texture_data[0], 2*j), offset(vec8(texture_data[0]), 2*j), - brw_imm_f(1.0/num_samples)); + brw_imm_f(1.0f / num_samples)); } } @@ -1475,9 +1475,9 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned num_samples) /* Compute pixel coordinates */ emit_add(vec16(x_sample_coords), Xp_f, - brw_imm_f((float)(i & 0x1) * (1.0 / key->x_scale))); + brw_imm_f((float)(i & 0x1) * (1.0f / key->x_scale))); emit_add(vec16(y_sample_coords), Yp_f, - brw_imm_f((float)((i >> 1) & 0x1) * (1.0 / key->y_scale))); + brw_imm_f((float)((i >> 1) & 0x1) * (1.0f / key->y_scale))); emit_mov(vec16(X), x_sample_coords); emit_mov(vec16(Y), y_sample_coords); @@ -1789,7 +1789,7 @@ brw_blorp_coord_transform_params::setup(GLfloat src0, GLfloat src1, * so 0.5 provides the necessary correction. */ multiplier = scale; - offset = src0 + (-dst0 + 0.5) * scale; + offset = src0 + (-dst0 + 0.5f) * scale; } else { /* When mirroring X we need: * src_x - src_x0 = dst_x1 - dst_x - 0.5 @@ -1797,7 +1797,7 @@ brw_blorp_coord_transform_params::setup(GLfloat src0, GLfloat src1, * src_x = src_x0 + (dst_x1 -dst_x - 0.5) * scale */ multiplier = -scale; - offset = src0 + (dst1 - 0.5) * scale; + offset = src0 + (dst1 - 0.5f) * scale; } } @@ -1952,8 +1952,8 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw, /* Scaling factors used for bilinear filtering in multisample scaled * blits. */ - wm_prog_key.x_scale = 2.0; - wm_prog_key.y_scale = src_mt->num_samples / 2.0; + wm_prog_key.x_scale = 2.0f; + wm_prog_key.y_scale = src_mt->num_samples / 2.0f; if (filter == GL_LINEAR && src.num_samples <= 1 && dst.num_samples <= 1) wm_prog_key.bilinear_filter = true; @@ -2000,9 +2000,9 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw, x1 = wm_push_consts.dst_x1 = roundf(dst_x1); y1 = wm_push_consts.dst_y1 = roundf(dst_y1); wm_push_consts.rect_grid_x1 = (minify(src_mt->logical_width0, src_level) * - wm_prog_key.x_scale - 1.0); + wm_prog_key.x_scale - 1.0f); wm_push_consts.rect_grid_y1 = (minify(src_mt->logical_height0, src_level) * - wm_prog_key.y_scale - 1.0); + wm_prog_key.y_scale - 1.0f); wm_push_consts.x_transform.setup(src_x0, src_x1, dst_x0, dst_x1, mirror_x); wm_push_consts.y_transform.setup(src_y0, src_y1, dst_y0, dst_y1, mirror_y); diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp index 789520c7353..d458ad846bf 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp @@ -73,7 +73,7 @@ brw_blorp_eu_emitter::emit_kill_if_outside_rect(const struct brw_reg &x, emit_cmp(BRW_CONDITIONAL_L, x, dst_x1)->predicate = BRW_PREDICATE_NORMAL; emit_cmp(BRW_CONDITIONAL_L, y, dst_y1)->predicate = BRW_PREDICATE_NORMAL; - fs_inst *inst = new (mem_ctx) fs_inst(BRW_OPCODE_AND, g1, f0, g1); + fs_inst *inst = new (mem_ctx) fs_inst(BRW_OPCODE_AND, 16, g1, f0, g1); inst->force_writemask_all = true; insts.push_tail(inst); } @@ -84,7 +84,7 @@ brw_blorp_eu_emitter::emit_texture_lookup(const struct brw_reg &dst, unsigned base_mrf, unsigned msg_length) { - fs_inst *inst = new (mem_ctx) fs_inst(op, dst, brw_message_reg(base_mrf), + fs_inst *inst = new (mem_ctx) fs_inst(op, 16, dst, brw_message_reg(base_mrf), fs_reg(0u)); inst->base_mrf = base_mrf; @@ -119,7 +119,8 @@ brw_blorp_eu_emitter::emit_combine(enum opcode combine_opcode, { assert(combine_opcode == BRW_OPCODE_ADD || combine_opcode == BRW_OPCODE_AVG); - insts.push_tail(new (mem_ctx) fs_inst(combine_opcode, dst, src_1, src_2)); + insts.push_tail(new (mem_ctx) fs_inst(combine_opcode, 16, dst, + src_1, src_2)); } fs_inst * @@ -127,7 +128,7 @@ brw_blorp_eu_emitter::emit_cmp(enum brw_conditional_mod op, const struct brw_reg &x, const struct brw_reg &y) { - fs_inst *cmp = new (mem_ctx) fs_inst(BRW_OPCODE_CMP, + fs_inst *cmp = new (mem_ctx) fs_inst(BRW_OPCODE_CMP, 16, vec16(brw_null_reg()), x, y); cmp->conditional_mod = op; insts.push_tail(cmp); diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp b/src/mesa/drivers/dri/i965/brw_cfg.cpp index f1f230e3751..91d53eff5a7 100644 --- a/src/mesa/drivers/dri/i965/brw_cfg.cpp +++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp @@ -208,6 +208,7 @@ cfg_t::cfg_t(exec_list *instructions) cur_else = cur; next = new_block(); + assert(cur_if != NULL); cur_if->add_successor(mem_ctx, next); set_next_block(&cur, next, ip); @@ -274,6 +275,7 @@ cfg_t::cfg_t(exec_list *instructions) inst->exec_node::remove(); cur->instructions.push_tail(inst); + assert(cur_do != NULL); cur->add_successor(mem_ctx, cur_do); next = new_block(); @@ -287,6 +289,7 @@ cfg_t::cfg_t(exec_list *instructions) inst->exec_node::remove(); cur->instructions.push_tail(inst); + assert(cur_while != NULL); cur->add_successor(mem_ctx, cur_while); next = new_block(); diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c index 1d4ba3cac7e..f981388ef1a 100644 --- a/src/mesa/drivers/dri/i965/brw_clear.c +++ b/src/mesa/drivers/dri/i965/brw_clear.c @@ -184,7 +184,7 @@ brw_fast_clear_depth(struct gl_context *ctx) * must be issued before the rectangle primitive used for the depth * buffer clear operation. */ - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); if (fb->MaxNumLayers > 0) { for (unsigned layer = 0; layer < depth_irb->layer_count; layer++) { @@ -204,7 +204,7 @@ brw_fast_clear_depth(struct gl_context *ctx) * by a PIPE_CONTROL command with DEPTH_STALL bit set and Then * followed by Depth FLUSH' */ - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); } /* Now, the HiZ buffer contains data that needs to be resolved to the depth diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index ebf12fab69e..328662da82e 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -506,6 +506,18 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers = BRW_MAX_ABO; ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = BRW_MAX_ABO; ctx->Const.MaxCombinedAtomicBuffers = 3 * BRW_MAX_ABO; + + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms = + BRW_MAX_IMAGES; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms = + (brw->intelScreen->compiler->scalar_vs ? BRW_MAX_IMAGES : 0); + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms = + BRW_MAX_IMAGES; + ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS; + ctx->Const.MaxCombinedImageUnitsAndFragmentOutputs = + MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS; + ctx->Const.MaxImageSamples = 0; + ctx->Const.MaxCombinedImageUniforms = 3 * BRW_MAX_IMAGES; } /* Gen6 converts quads to polygon in beginning of 3D pipeline, @@ -716,6 +728,7 @@ brwCreateContext(gl_api api, brw->is_baytrail = devinfo->is_baytrail; brw->is_haswell = devinfo->is_haswell; brw->is_cherryview = devinfo->is_cherryview; + brw->is_broxton = devinfo->is_broxton; brw->has_llc = devinfo->has_llc; brw->has_hiz = devinfo->has_hiz_and_separate_stencil; brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil; @@ -820,6 +833,12 @@ brwCreateContext(gl_api api, } } + if (brw_init_pipe_control(brw, devinfo)) { + *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; + intelDestroyContext(driContextPriv); + return false; + } + brw_init_state(brw); #endif @@ -867,6 +886,10 @@ brwCreateContext(gl_api api, brw->predicate.state = BRW_PREDICATE_STATE_RENDER; + brw->use_resource_streamer = screen->has_resource_streamer && + (brw_env_var_as_boolean("INTEL_USE_HW_BT", false) || + brw_env_var_as_boolean("INTEL_USE_GATHER", false)); + ctx->VertexProgram._MaintainTnlProgram = true; ctx->FragmentProgram._MaintainTexEnvProgram = true; @@ -935,6 +958,10 @@ intelDestroyContext(__DRIcontext * driContextPriv) if (brw->wm.base.scratch_bo) drm_intel_bo_unreference(brw->wm.base.scratch_bo); + gen7_reset_hw_bt_pool_offsets(brw); + drm_intel_bo_unreference(brw->hw_bt_pool.bo); + brw->hw_bt_pool.bo = NULL; + drm_intel_gem_context_destroy(brw->hw_ctx); if (ctx->swrast_context) { @@ -946,6 +973,7 @@ intelDestroyContext(__DRIcontext * driContextPriv) if (ctx->swrast_context) _swrast_DestroyContext(&brw->ctx); + brw_fini_pipe_control(brw); intel_batchbuffer_free(brw); drm_intel_bo_unreference(brw->throttle_batch[1]); diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 9e1f722df9e..1267a6f5a97 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -201,6 +201,7 @@ enum brw_state_id { BRW_STATE_STATS_WM, BRW_STATE_UNIFORM_BUFFER, BRW_STATE_ATOMIC_BUFFER, + BRW_STATE_IMAGE_UNITS, BRW_STATE_META_IN_PROGRESS, BRW_STATE_INTERPOLATION_MAP, BRW_STATE_PUSH_CONSTANT_ALLOCATION, @@ -282,6 +283,7 @@ enum brw_state_id { #define BRW_NEW_STATS_WM (1ull << BRW_STATE_STATS_WM) #define BRW_NEW_UNIFORM_BUFFER (1ull << BRW_STATE_UNIFORM_BUFFER) #define BRW_NEW_ATOMIC_BUFFER (1ull << BRW_STATE_ATOMIC_BUFFER) +#define BRW_NEW_IMAGE_UNITS (1ull << BRW_STATE_IMAGE_UNITS) #define BRW_NEW_META_IN_PROGRESS (1ull << BRW_STATE_META_IN_PROGRESS) #define BRW_NEW_INTERPOLATION_MAP (1ull << BRW_STATE_INTERPOLATION_MAP) #define BRW_NEW_PUSH_CONSTANT_ALLOCATION (1ull << BRW_STATE_PUSH_CONSTANT_ALLOCATION) @@ -367,6 +369,7 @@ struct brw_stage_prog_data { GLuint nr_params; /**< number of float params/constants */ GLuint nr_pull_params; + unsigned nr_image_params; unsigned curb_read_length; unsigned total_scratch; @@ -387,6 +390,59 @@ struct brw_stage_prog_data { */ const gl_constant_value **param; const gl_constant_value **pull_param; + + /** + * Image metadata passed to the shader as uniforms. This is deliberately + * ignored by brw_stage_prog_data_compare() because its contents don't have + * any influence on program compilation. + */ + struct brw_image_param *image_param; +}; + +/* + * Image metadata structure as laid out in the shader parameter + * buffer. Entries have to be 16B-aligned for the vec4 back-end to be + * able to use them. That's okay because the padding and any unused + * entries [most of them except when we're doing untyped surface + * access] will be removed by the uniform packing pass. + */ +#define BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET 0 +#define BRW_IMAGE_PARAM_OFFSET_OFFSET 4 +#define BRW_IMAGE_PARAM_SIZE_OFFSET 8 +#define BRW_IMAGE_PARAM_STRIDE_OFFSET 12 +#define BRW_IMAGE_PARAM_TILING_OFFSET 16 +#define BRW_IMAGE_PARAM_SWIZZLING_OFFSET 20 +#define BRW_IMAGE_PARAM_SIZE 24 + +struct brw_image_param { + /** Surface binding table index. */ + uint32_t surface_idx; + + /** Offset applied to the X and Y surface coordinates. */ + uint32_t offset[2]; + + /** Surface X, Y and Z dimensions. */ + uint32_t size[3]; + + /** X-stride in bytes, Y-stride in pixels, horizontal slice stride in + * pixels, vertical slice stride in pixels. + */ + uint32_t stride[4]; + + /** Log2 of the tiling modulus in the X, Y and Z dimension. */ + uint32_t tiling[3]; + + /** + * Right shift to apply for bit 6 address swizzling. Two different + * swizzles can be specified and will be applied one after the other. The + * resulting address will be: + * + * addr' = addr ^ ((1 << 6) & ((addr >> swizzling[0]) ^ + * (addr >> swizzling[1]))) + * + * Use \c 0xff if any of the swizzles is not required. + */ + uint32_t swizzling[2]; }; /* Data about a particular attempt to compile a program. Note that @@ -416,11 +472,13 @@ struct brw_wm_prog_data { uint8_t computed_depth_mode; + bool early_fragment_tests; bool no_8; bool dual_src_blend; bool uses_pos_offset; bool uses_omask; bool uses_kill; + bool pulls_bary; uint32_t prog_offset_16; /** @@ -874,11 +932,12 @@ struct intel_batchbuffer { drm_intel_bo *bo; /** Last BO submitted to the hardware. Used for glFinish(). */ drm_intel_bo *last_bo; - /** BO for post-sync nonzero writes for gen6 workaround. */ - drm_intel_bo *workaround_bo; +#ifdef DEBUG uint16_t emit, total; - uint16_t used, reserved_space; +#endif + uint16_t reserved_space; + uint32_t *map_next; uint32_t *map; uint32_t *cpu_map; #define BATCH_SZ (8192*sizeof(uint32_t)) @@ -887,10 +946,8 @@ struct intel_batchbuffer { enum brw_gpu_ring ring; bool needs_sol_reset; - uint8_t pipe_controls_since_last_cs_stall; - struct { - uint16_t used; + uint32_t *map_next; int reloc_count; } saved; }; @@ -1040,6 +1097,10 @@ struct brw_context drm_intel_context *hw_ctx; + /** BO for post-sync nonzero writes for gen6 workaround. */ + drm_intel_bo *workaround_bo; + uint8_t pipe_controls_since_last_cs_stall; + /** * Set of drm_intel_bo * that have been rendered to within this batchbuffer * and would need flushing before being used from another cache domain that @@ -1123,6 +1184,7 @@ struct brw_context bool is_baytrail; bool is_haswell; bool is_cherryview; + bool is_broxton; bool has_hiz; bool has_separate_stencil; @@ -1135,6 +1197,7 @@ struct brw_context bool has_pln; bool no_simd8; bool use_rep_send; + bool use_resource_streamer; /** * Some versions of Gen hardware don't do centroid interpolation correctly @@ -1241,12 +1304,12 @@ struct brw_context * Platform specific constants containing the maximum number of threads * for each pipeline stage. */ - int max_vs_threads; - int max_hs_threads; - int max_ds_threads; - int max_gs_threads; - int max_wm_threads; - int max_cs_threads; + unsigned max_vs_threads; + unsigned max_hs_threads; + unsigned max_ds_threads; + unsigned max_gs_threads; + unsigned max_wm_threads; + unsigned max_cs_threads; /* BRW_NEW_URB_ALLOCATIONS: */ @@ -1398,6 +1461,12 @@ struct brw_context struct brw_cs_prog_data *prog_data; } cs; + /* RS hardware binding table */ + struct { + drm_intel_bo *bo; + uint32_t next_offset; + } hw_bt_pool; + struct { uint32_t state_offset; uint32_t blend_state_offset; @@ -1453,8 +1522,8 @@ struct brw_context } perfmon; int num_atoms[BRW_NUM_PIPELINES]; - const struct brw_tracked_state render_atoms[57]; - const struct brw_tracked_state compute_atoms[3]; + const struct brw_tracked_state render_atoms[60]; + const struct brw_tracked_state compute_atoms[4]; /* If (INTEL_DEBUG & DEBUG_BATCH) */ struct { @@ -1732,11 +1801,17 @@ void brw_upload_abo_surfaces(struct brw_context *brw, struct gl_shader_program *prog, struct brw_stage_state *stage_state, struct brw_stage_prog_data *prog_data); +void brw_upload_image_surfaces(struct brw_context *brw, + struct gl_shader *shader, + struct brw_stage_state *stage_state, + struct brw_stage_prog_data *prog_data); /* brw_surface_formats.c */ bool brw_render_target_supported(struct brw_context *brw, struct gl_renderbuffer *rb); uint32_t brw_depth_format(struct brw_context *brw, mesa_format format); +mesa_format brw_lower_mesa_image_format(const struct brw_device_info *devinfo, + mesa_format format); /* brw_performance_monitor.c */ void brw_init_performance_monitors(struct brw_context *brw); @@ -2013,6 +2088,21 @@ bool gen9_use_linear_1d_layout(const struct brw_context *brw, const struct intel_mipmap_tree *mt); +/* brw_pipe_control.c */ +int brw_init_pipe_control(struct brw_context *brw, + const struct brw_device_info *info); +void brw_fini_pipe_control(struct brw_context *brw); + +void brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags); +void brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags, + drm_intel_bo *bo, uint32_t offset, + uint32_t imm_lower, uint32_t imm_upper); +void brw_emit_mi_flush(struct brw_context *brw); +void brw_emit_post_sync_nonzero_flush(struct brw_context *brw); +void brw_emit_depth_stall_flushes(struct brw_context *brw); +void gen7_emit_vs_workaround_flush(struct brw_context *brw); +void gen7_emit_cs_stall_flush(struct brw_context *brw); + #ifdef __cplusplus } #endif diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp index 42a082b57b6..6ce5779137e 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.cpp +++ b/src/mesa/drivers/dri/i965/brw_cs.cpp @@ -82,7 +82,7 @@ brw_cs_emit(struct brw_context *brw, prog_data->local_size[0] = cp->LocalSize[0]; prog_data->local_size[1] = cp->LocalSize[1]; prog_data->local_size[2] = cp->LocalSize[2]; - int local_workgroup_size = + unsigned local_workgroup_size = cp->LocalSize[0] * cp->LocalSize[1] * cp->LocalSize[2]; cfg_t *cfg = NULL; @@ -182,7 +182,8 @@ brw_codegen_cs_prog(struct brw_context *brw, * prog_data associated with the compiled program, and which will be freed * by the state cache. */ - int param_count = cs->num_uniform_components; + int param_count = cs->num_uniform_components + + cs->NumImages * BRW_IMAGE_PARAM_SIZE; /* The backend also sometimes adds params for texture size. */ param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits; @@ -190,7 +191,10 @@ brw_codegen_cs_prog(struct brw_context *brw, rzalloc_array(NULL, const gl_constant_value *, param_count); prog_data.base.pull_param = rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data.base.image_param = + rzalloc_array(NULL, struct brw_image_param, cs->NumImages); prog_data.base.nr_params = param_count; + prog_data.base.nr_image_params = cs->NumImages; program = brw_cs_emit(brw, mem_ctx, key, &prog_data, &cp->program, prog, &program_size); @@ -291,6 +295,17 @@ brw_cs_precompile(struct gl_context *ctx, } +static unsigned +get_cs_thread_count(const struct brw_cs_prog_data *cs_prog_data) +{ + const unsigned simd_size = cs_prog_data->simd_size; + unsigned group_size = cs_prog_data->local_size[0] * + cs_prog_data->local_size[1] * cs_prog_data->local_size[2]; + + return (group_size + simd_size - 1) / simd_size; +} + + static void brw_upload_cs_state(struct brw_context *brw) { @@ -316,6 +331,8 @@ brw_upload_cs_state(struct brw_context *brw) prog_data->binding_table.size_bytes, 32, &stage_state->bind_bo_offset); + unsigned threads = get_cs_thread_count(cs_prog_data); + uint32_t dwords = brw->gen < 8 ? 8 : 9; BEGIN_BATCH(dwords); OUT_BATCH(MEDIA_VFE_STATE << 16 | (dwords - 2)); @@ -365,6 +382,13 @@ brw_upload_cs_state(struct brw_context *brw) desc[dw++] = 0; desc[dw++] = 0; desc[dw++] = stage_state->bind_bo_offset; + desc[dw++] = 0; + const uint32_t media_threads = + brw->gen >= 8 ? + SET_FIELD(threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) : + SET_FIELD(threads, MEDIA_GPGPU_THREAD_COUNT); + assert(threads <= brw->max_cs_threads); + desc[dw++] = media_threads; BEGIN_BATCH(4); OUT_BATCH(MEDIA_INTERFACE_DESCRIPTOR_LOAD << 16 | (4 - 2)); diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index befd7a9538c..a149ce3ba12 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -176,7 +176,7 @@ void brw_upload_cs_urb_state(struct brw_context *brw) ADVANCE_BATCH(); } -static GLfloat fixed_plane[6][4] = { +static const GLfloat fixed_plane[6][4] = { { 0, 0, -1, 1 }, { 0, 0, 1, 1 }, { 0, -1, 0, 1 }, diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index c113d52a3d3..3bbaf977bc5 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -877,6 +877,21 @@ enum opcode { * instructions. */ FS_OPCODE_FB_WRITE = 128, + + /** + * Same as FS_OPCODE_FB_WRITE but expects its arguments separately as + * individual sources instead of as a single payload blob: + * + * Source 0: [required] Color 0. + * Source 1: [optional] Color 1 (for dual source blend messages). + * Source 2: [optional] Src0 Alpha. + * Source 3: [optional] Source Depth (passthrough from the thread payload). + * Source 4: [optional] Destination Depth (gl_FragDepth). + * Source 5: [optional] Sample Mask (gl_SampleMask). + * Source 6: [required] Number of color components (as a UD immediate). + */ + FS_OPCODE_FB_WRITE_LOGICAL, + FS_OPCODE_BLORP_FB_WRITE, FS_OPCODE_REP_FB_WRITE, SHADER_OPCODE_RCP, @@ -890,18 +905,49 @@ enum opcode { SHADER_OPCODE_SIN, SHADER_OPCODE_COS, + /** + * Texture sampling opcodes. + * + * LOGICAL opcodes are eventually translated to the matching non-LOGICAL + * opcode but instead of taking a single payload blob they expect their + * arguments separately as individual sources: + * + * Source 0: [optional] Texture coordinates. + * Source 1: [optional] Shadow comparitor. + * Source 2: [optional] dPdx if the operation takes explicit derivatives, + * otherwise LOD value. + * Source 3: [optional] dPdy if the operation takes explicit derivatives. + * Source 4: [optional] Sample index. + * Source 5: [optional] MCS data. + * Source 6: [required] Texture sampler. + * Source 7: [optional] Texel offset. + * Source 8: [required] Number of coordinate components (as UD immediate). + * Source 9: [required] Number derivative components (as UD immediate). + */ SHADER_OPCODE_TEX, + SHADER_OPCODE_TEX_LOGICAL, SHADER_OPCODE_TXD, + SHADER_OPCODE_TXD_LOGICAL, SHADER_OPCODE_TXF, + SHADER_OPCODE_TXF_LOGICAL, SHADER_OPCODE_TXL, + SHADER_OPCODE_TXL_LOGICAL, SHADER_OPCODE_TXS, + SHADER_OPCODE_TXS_LOGICAL, FS_OPCODE_TXB, + FS_OPCODE_TXB_LOGICAL, SHADER_OPCODE_TXF_CMS, + SHADER_OPCODE_TXF_CMS_LOGICAL, SHADER_OPCODE_TXF_UMS, + SHADER_OPCODE_TXF_UMS_LOGICAL, SHADER_OPCODE_TXF_MCS, + SHADER_OPCODE_TXF_MCS_LOGICAL, SHADER_OPCODE_LOD, + SHADER_OPCODE_LOD_LOGICAL, SHADER_OPCODE_TG4, + SHADER_OPCODE_TG4_LOGICAL, SHADER_OPCODE_TG4_OFFSET, + SHADER_OPCODE_TG4_OFFSET_LOGICAL, /** * Combines multiple sources of size 1 into a larger virtual GRF. @@ -919,13 +965,33 @@ enum opcode { SHADER_OPCODE_SHADER_TIME_ADD, + /** + * Typed and untyped surface access opcodes. + * + * LOGICAL opcodes are eventually translated to the matching non-LOGICAL + * opcode but instead of taking a single payload blob they expect their + * arguments separately as individual sources: + * + * Source 0: [required] Surface coordinates. + * Source 1: [optional] Operation source. + * Source 2: [required] Surface index. + * Source 3: [required] Number of coordinate components (as UD immediate). + * Source 4: [required] Opcode-specific control immediate, same as source 2 + * of the matching non-LOGICAL opcode. + */ SHADER_OPCODE_UNTYPED_ATOMIC, + SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL, SHADER_OPCODE_UNTYPED_SURFACE_READ, + SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL, SHADER_OPCODE_UNTYPED_SURFACE_WRITE, + SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL, SHADER_OPCODE_TYPED_ATOMIC, + SHADER_OPCODE_TYPED_ATOMIC_LOGICAL, SHADER_OPCODE_TYPED_SURFACE_READ, + SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL, SHADER_OPCODE_TYPED_SURFACE_WRITE, + SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL, SHADER_OPCODE_MEMORY_FENCE, @@ -971,7 +1037,6 @@ enum opcode { FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7, FS_OPCODE_MOV_DISPATCH_TO_FLAGS, FS_OPCODE_DISCARD_JUMP, - FS_OPCODE_SET_OMASK, FS_OPCODE_SET_SAMPLE_ID, FS_OPCODE_SET_SIMD4X2_OFFSET, FS_OPCODE_PACK_HALF_2x16_SPLIT, @@ -1151,6 +1216,11 @@ enum opcode { * GLSL barrier() */ SHADER_OPCODE_BARRIER, + + /** + * Calculate the high 32-bits of a 32x32 multiply. + */ + SHADER_OPCODE_MULH, }; enum brw_urb_write_flags { @@ -1642,6 +1712,36 @@ enum brw_message_target { #define _3DSTATE_BINDING_TABLE_POINTERS_GS 0x7829 /* GEN7+ */ #define _3DSTATE_BINDING_TABLE_POINTERS_PS 0x782A /* GEN7+ */ +#define _3DSTATE_BINDING_TABLE_POOL_ALLOC 0x7919 /* GEN7.5+ */ +#define BRW_HW_BINDING_TABLE_ENABLE (1 << 11) +#define GEN7_HW_BT_POOL_MOCS_SHIFT 7 +#define GEN7_HW_BT_POOL_MOCS_MASK INTEL_MASK(10, 7) +#define GEN8_HW_BT_POOL_MOCS_SHIFT 0 +#define GEN8_HW_BT_POOL_MOCS_MASK INTEL_MASK(6, 0) +/* Only required in HSW */ +#define HSW_BT_POOL_ALLOC_MUST_BE_ONE (3 << 5) + +#define _3DSTATE_BINDING_TABLE_EDIT_VS 0x7843 /* GEN7.5 */ +#define _3DSTATE_BINDING_TABLE_EDIT_GS 0x7844 /* GEN7.5 */ +#define _3DSTATE_BINDING_TABLE_EDIT_HS 0x7845 /* GEN7.5 */ +#define _3DSTATE_BINDING_TABLE_EDIT_DS 0x7846 /* GEN7.5 */ +#define _3DSTATE_BINDING_TABLE_EDIT_PS 0x7847 /* GEN7.5 */ +#define BRW_BINDING_TABLE_INDEX_SHIFT 16 +#define BRW_BINDING_TABLE_INDEX_MASK INTEL_MASK(23, 16) + +#define BRW_BINDING_TABLE_EDIT_TARGET_ALL 3 +#define BRW_BINDING_TABLE_EDIT_TARGET_CORE1 2 +#define BRW_BINDING_TABLE_EDIT_TARGET_CORE0 1 +/* In HSW, when editing binding table entries to surface state offsets, + * the surface state offset is a 16-bit value aligned to 32 bytes. But + * Surface State Pointer in dword 2 is [15:0]. Right shift surf_offset + * by 5 bits so it won't disturb bit 16 (which is used as the binding + * table index entry), otherwise it would hang the GPU. + */ +#define HSW_SURFACE_STATE_EDIT(value) (value >> 5) +/* Same as Haswell, but surface state offsets now aligned to 64 bytes.*/ +#define GEN8_SURFACE_STATE_EDIT(value) (value >> 6) + #define _3DSTATE_SAMPLER_STATE_POINTERS 0x7802 /* GEN6+ */ # define PS_SAMPLER_STATE_CHANGE (1 << 12) # define GS_SAMPLER_STATE_CHANGE (1 << 9) @@ -1757,6 +1857,7 @@ enum brw_message_target { # define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 # define GEN6_VS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) # define GEN6_VS_FLOATING_POINT_MODE_ALT (1 << 16) +# define HSW_VS_UAV_ACCESS_ENABLE (1 << 12) /* DW4 */ # define GEN6_VS_DISPATCH_START_GRF_SHIFT 20 # define GEN6_VS_URB_READ_LENGTH_SHIFT 11 @@ -1782,6 +1883,7 @@ enum brw_message_target { # define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 # define GEN6_GS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) # define GEN6_GS_FLOATING_POINT_MODE_ALT (1 << 16) +# define HSW_GS_UAV_ACCESS_ENABLE (1 << 12) /* DW4 */ # define GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT 23 # define GEN7_GS_OUTPUT_TOPOLOGY_SHIFT 17 @@ -2147,6 +2249,7 @@ enum brw_pixel_shader_computed_depth_mode { # define GEN8_PSX_SHADER_DISABLES_ALPHA_TO_COVERAGE (1 << 7) # define GEN8_PSX_SHADER_IS_PER_SAMPLE (1 << 6) # define GEN8_PSX_SHADER_COMPUTES_STENCIL (1 << 5) +# define GEN9_PSX_SHADER_PULLS_BARY (1 << 3) # define GEN8_PSX_SHADER_HAS_UAV (1 << 2) # define GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK (1 << 1) @@ -2283,6 +2386,9 @@ enum brw_wm_barycentric_interp_mode { # define GEN7_WM_KILL_ENABLE (1 << 25) # define GEN7_WM_COMPUTED_DEPTH_MODE_SHIFT 23 # define GEN7_WM_USES_SOURCE_DEPTH (1 << 20) +# define GEN7_WM_EARLY_DS_CONTROL_NORMAL (0 << 21) +# define GEN7_WM_EARLY_DS_CONTROL_PSEXEC (1 << 21) +# define GEN7_WM_EARLY_DS_CONTROL_PREPS (2 << 21) # define GEN7_WM_USES_SOURCE_W (1 << 19) # define GEN7_WM_POSITION_ZW_PIXEL (0 << 17) # define GEN7_WM_POSITION_ZW_CENTROID (2 << 17) @@ -2307,6 +2413,7 @@ enum brw_wm_barycentric_interp_mode { /* DW2 */ # define GEN7_WM_MSDISPMODE_PERSAMPLE (0 << 31) # define GEN7_WM_MSDISPMODE_PERPIXEL (1 << 31) +# define HSW_WM_UAV_ONLY (1 << 30) #define _3DSTATE_PS 0x7820 /* GEN7+ */ /* DW1: kernel pointer */ @@ -2330,6 +2437,7 @@ enum brw_wm_barycentric_interp_mode { # define GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE (1 << 8) # define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7) # define GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE (1 << 6) +# define HSW_PS_UAV_ACCESS_ENABLE (1 << 5) # define GEN7_PS_POSOFFSET_NONE (0 << 3) # define GEN7_PS_POSOFFSET_CENTROID (2 << 3) # define GEN7_PS_POSOFFSET_SAMPLE (3 << 3) @@ -2493,12 +2601,13 @@ enum brw_wm_barycentric_interp_mode { #define BDW_MOCS_WT 0x58 #define BDW_MOCS_PTE 0x18 -/* Skylake: MOCS is now an index into an array of 64 different configurable - * cache settings. We still use only either write-back or write-through; and - * rely on the documented default values. +/* Skylake: MOCS is now an index into an array of 62 different caching + * configurations programmed by the kernel. */ -#define SKL_MOCS_WB (0b001001 << 1) -#define SKL_MOCS_WT (0b000101 << 1) +/* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ +#define SKL_MOCS_WB (2 << 1) +/* TC=LLC/eLLC, LeCC=PTE, LRUM=3, L3CC=WB */ +#define SKL_MOCS_PTE (1 << 1) #define MEDIA_VFE_STATE 0x7000 /* GEN7 DW2, GEN8+ DW3 */ @@ -2519,6 +2628,11 @@ enum brw_wm_barycentric_interp_mode { # define MEDIA_VFE_STATE_CURBE_ALLOC_MASK INTEL_MASK(15, 0) #define MEDIA_INTERFACE_DESCRIPTOR_LOAD 0x7002 +/* GEN7 DW5, GEN8+ DW6 */ +# define MEDIA_GPGPU_THREAD_COUNT_SHIFT 0 +# define MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(7, 0) +# define GEN8_MEDIA_GPGPU_THREAD_COUNT_SHIFT 0 +# define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(9, 0) #define MEDIA_STATE_FLUSH 0x7004 #define GPGPU_WALKER 0x7105 /* GEN8+ DW2 */ diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c index a07b86e60e2..16c125d07ee 100644 --- a/src/mesa/drivers/dri/i965/brw_device_info.c +++ b/src/mesa/drivers/dri/i965/brw_device_info.c @@ -170,7 +170,8 @@ static const struct brw_device_info brw_device_info_byt = { #define HSW_FEATURES \ GEN7_FEATURES, \ .is_haswell = true, \ - .supports_simd16_3src = true + .supports_simd16_3src = true, \ + .has_resource_streamer = true static const struct brw_device_info brw_device_info_hsw_gt1 = { HSW_FEATURES, .gt = 1, @@ -229,6 +230,7 @@ static const struct brw_device_info brw_device_info_hsw_gt3 = { #define GEN8_FEATURES \ .gen = 8, \ .has_hiz_and_separate_stencil = true, \ + .has_resource_streamer = true, \ .must_use_separate_stencil = true, \ .has_llc = true, \ .has_pln = true, \ @@ -297,41 +299,62 @@ static const struct brw_device_info brw_device_info_chv = { } }; -/* Thread counts and URB limits are placeholders, and may not be accurate. */ #define GEN9_FEATURES \ .gen = 9, \ .has_hiz_and_separate_stencil = true, \ + .has_resource_streamer = true, \ .must_use_separate_stencil = true, \ .has_llc = true, \ .has_pln = true, \ - .max_vs_threads = 280, \ - .max_gs_threads = 256, \ - .max_wm_threads = 408, \ + .supports_simd16_3src = true, \ + .max_vs_threads = 336, \ + .max_gs_threads = 336, \ + .max_hs_threads = 336, \ + .max_ds_threads = 336, \ + .max_wm_threads = 64 * 6, \ + .max_cs_threads = 56, \ .urb = { \ - .size = 128, \ + .size = 192, \ .min_vs_entries = 64, \ - .max_vs_entries = 1664, \ + .max_vs_entries = 1856, \ + .max_hs_entries = 672, \ + .max_ds_entries = 1120, \ .max_gs_entries = 640, \ } -static const struct brw_device_info brw_device_info_skl_early = { - GEN9_FEATURES, .gt = 1, - .supports_simd16_3src = false, -}; - static const struct brw_device_info brw_device_info_skl_gt1 = { GEN9_FEATURES, .gt = 1, - .supports_simd16_3src = true, }; static const struct brw_device_info brw_device_info_skl_gt2 = { GEN9_FEATURES, .gt = 2, - .supports_simd16_3src = true, }; static const struct brw_device_info brw_device_info_skl_gt3 = { GEN9_FEATURES, .gt = 3, - .supports_simd16_3src = true, +}; + +static const struct brw_device_info brw_device_info_bxt = { + GEN9_FEATURES, + .is_broxton = 1, + .gt = 1, + .has_llc = false, + + /* XXX: These are preliminary thread counts and URB sizes. */ + .max_vs_threads = 56, + .max_hs_threads = 56, + .max_ds_threads = 56, + .max_gs_threads = 56, + .max_wm_threads = 32, + .max_cs_threads = 28, + .urb = { + .size = 64, + .min_vs_entries = 34, + .max_vs_entries = 640, + .max_hs_entries = 80, + .max_ds_entries = 80, + .max_gs_entries = 256, + } }; const struct brw_device_info * @@ -348,9 +371,6 @@ brw_get_device_info(int devid, int revision) return NULL; } - if (devinfo->gen == 9 && (revision == 2 || revision == 3 || revision == -1)) - return &brw_device_info_skl_early; - return devinfo; } diff --git a/src/mesa/drivers/dri/i965/brw_device_info.h b/src/mesa/drivers/dri/i965/brw_device_info.h index 9192235fb0e..7bab5716b43 100644 --- a/src/mesa/drivers/dri/i965/brw_device_info.h +++ b/src/mesa/drivers/dri/i965/brw_device_info.h @@ -35,6 +35,7 @@ struct brw_device_info bool is_baytrail; bool is_haswell; bool is_cherryview; + bool is_broxton; bool has_hiz_and_separate_stencil; bool must_use_separate_stencil; @@ -45,6 +46,7 @@ struct brw_device_info bool has_compr4; bool has_surface_tile_offset; bool supports_simd16_3src; + bool has_resource_streamer; /** * Quirks: diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index b91597a9f5d..e092ef4a7c6 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -104,13 +104,13 @@ get_hw_prim_for_gl_prim(int mode) * programs be immune to the active primitive (ie. cope with all * possibilities). That may not be realistic however. */ -static void brw_set_prim(struct brw_context *brw, - const struct _mesa_prim *prim) +static void +brw_set_prim(struct brw_context *brw, const struct _mesa_prim *prim) { struct gl_context *ctx = &brw->ctx; uint32_t hw_prim = get_hw_prim_for_gl_prim(prim->mode); - DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode)); + DBG("PRIM: %s\n", _mesa_enum_to_string(prim->mode)); /* Slight optimization to avoid the GS program when not needed: */ @@ -138,15 +138,12 @@ static void brw_set_prim(struct brw_context *brw, } } -static void gen6_set_prim(struct brw_context *brw, - const struct _mesa_prim *prim) +static void +gen6_set_prim(struct brw_context *brw, const struct _mesa_prim *prim) { - uint32_t hw_prim; - - DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode)); - - hw_prim = get_hw_prim_for_gl_prim(prim->mode); + DBG("PRIM: %s\n", _mesa_enum_to_string(prim->mode)); + const uint32_t hw_prim = get_hw_prim_for_gl_prim(prim->mode); if (hw_prim != brw->primitive) { brw->primitive = hw_prim; brw->ctx.NewDriverState |= BRW_NEW_PRIMITIVE; @@ -162,7 +159,8 @@ static void gen6_set_prim(struct brw_context *brw, * quads so that those dangling vertices won't get drawn when we convert to * trifans/tristrips. */ -static GLuint trim(GLenum prim, GLuint length) +static GLuint +trim(GLenum prim, GLuint length) { if (prim == GL_QUAD_STRIP) return length > 3 ? (length - length % 2) : 0; @@ -173,16 +171,16 @@ static GLuint trim(GLenum prim, GLuint length) } -static void brw_emit_prim(struct brw_context *brw, - const struct _mesa_prim *prim, - uint32_t hw_prim) +static void +brw_emit_prim(struct brw_context *brw, + const struct _mesa_prim *prim, + uint32_t hw_prim) { int verts_per_instance; int vertex_access_type; int indirect_flag; - int predicate_enable; - DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), + DBG("PRIM: %s %d %d\n", _mesa_enum_to_string(prim->mode), prim->start, prim->count); int start_vertex_location = prim->start; @@ -216,9 +214,8 @@ static void brw_emit_prim(struct brw_context *brw, * and missed flushes of the render cache as it heads to other parts of * the besides the draw code. */ - if (brw->always_flush_cache) { - intel_batchbuffer_emit_mi_flush(brw); - } + if (brw->always_flush_cache) + brw_emit_mi_flush(brw); /* If indirect, emit a bunch of loads from the indirect BO. */ if (prim->is_indirect) { @@ -256,22 +253,20 @@ static void brw_emit_prim(struct brw_context *brw, OUT_BATCH(0); ADVANCE_BATCH(); } - } - else { + } else { indirect_flag = 0; } + BEGIN_BATCH(brw->gen >= 7 ? 7 : 6); + if (brw->gen >= 7) { - if (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT) - predicate_enable = GEN7_3DPRIM_PREDICATE_ENABLE; - else - predicate_enable = 0; + const int predicate_enable = + (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT) + ? GEN7_3DPRIM_PREDICATE_ENABLE : 0; - BEGIN_BATCH(7); OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag | predicate_enable); OUT_BATCH(hw_prim | vertex_access_type); } else { - BEGIN_BATCH(6); OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) | hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT | vertex_access_type); @@ -283,14 +278,14 @@ static void brw_emit_prim(struct brw_context *brw, OUT_BATCH(base_vertex_location); ADVANCE_BATCH(); - if (brw->always_flush_cache) { - intel_batchbuffer_emit_mi_flush(brw); - } + if (brw->always_flush_cache) + brw_emit_mi_flush(brw); } -static void brw_merge_inputs( struct brw_context *brw, - const struct gl_client_array *arrays[]) +static void +brw_merge_inputs(struct brw_context *brw, + const struct gl_client_array *arrays[]) { const struct gl_context *ctx = &brw->ctx; GLuint i; @@ -359,7 +354,8 @@ static void brw_merge_inputs( struct brw_context *brw, * Also mark any render targets which will be textured as needing a render * cache flush. */ -static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw) +static void +brw_postdraw_set_buffers_need_resolve(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; struct gl_framebuffer *fb = ctx->DrawBuffer; @@ -399,21 +395,22 @@ static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw) /* May fail if out of video memory for texture or vbo upload, or on * fallback conditions. */ -static void brw_try_draw_prims( struct gl_context *ctx, - const struct gl_client_array *arrays[], - const struct _mesa_prim *prims, - GLuint nr_prims, - const struct _mesa_index_buffer *ib, - GLuint min_index, - GLuint max_index, - struct gl_buffer_object *indirect) +static void +brw_try_draw_prims(struct gl_context *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prims, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLuint min_index, + GLuint max_index, + struct gl_buffer_object *indirect) { struct brw_context *brw = brw_context(ctx); GLuint i; bool fail_next = false; if (ctx->NewState) - _mesa_update_state( ctx ); + _mesa_update_state(ctx); /* Find the highest sampler unit used by each shader program. A bit-count * won't work since ARB programs use the texture unit number as the sampler @@ -433,7 +430,7 @@ static void brw_try_draw_prims( struct gl_context *ctx, * software fallback will segfault if it attempts to access any * texture level other than level 0. */ - brw_validate_textures( brw ); + brw_validate_textures(brw); intel_prepare_render(brw); @@ -445,7 +442,7 @@ static void brw_try_draw_prims( struct gl_context *ctx, /* Bind all inputs, derive varying and size information: */ - brw_merge_inputs( brw, arrays ); + brw_merge_inputs(brw, arrays); brw->ib.ib = ib; brw->ctx.NewDriverState |= BRW_NEW_INDICES; @@ -553,15 +550,17 @@ retry: return; } -void brw_draw_prims( struct gl_context *ctx, - const struct _mesa_prim *prims, - GLuint nr_prims, - const struct _mesa_index_buffer *ib, - GLboolean index_bounds_valid, - GLuint min_index, - GLuint max_index, - struct gl_transform_feedback_object *unused_tfb_object, - struct gl_buffer_object *indirect ) +void +brw_draw_prims(struct gl_context *ctx, + const struct _mesa_prim *prims, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index, + struct gl_transform_feedback_object *unused_tfb_object, + unsigned stream, + struct gl_buffer_object *indirect) { struct brw_context *brw = brw_context(ctx); const struct gl_client_array **arrays = ctx->Array._DrawArrays; @@ -582,11 +581,11 @@ void brw_draw_prims( struct gl_context *ctx, */ if (ctx->RenderMode != GL_RENDER) { perf_debug("%s render mode not supported in hardware\n", - _mesa_lookup_enum_by_nr(ctx->RenderMode)); + _mesa_enum_to_string(ctx->RenderMode)); _swsetup_Wakeup(ctx); _tnl_wakeup(ctx); _tnl_draw_prims(ctx, prims, nr_prims, ib, - index_bounds_valid, min_index, max_index, NULL, NULL); + index_bounds_valid, min_index, max_index, NULL, 0, NULL); return; } @@ -604,26 +603,28 @@ void brw_draw_prims( struct gl_context *ctx, * manage it. swrast doesn't support our featureset, so we can't fall back * to it. */ - brw_try_draw_prims(ctx, arrays, prims, nr_prims, ib, min_index, max_index, indirect); + brw_try_draw_prims(ctx, arrays, prims, nr_prims, ib, min_index, max_index, + indirect); } -void brw_draw_init( struct brw_context *brw ) +void +brw_draw_init(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; struct vbo_context *vbo = vbo_context(ctx); - int i; /* Register our drawing function: */ vbo->draw_prims = brw_draw_prims; - for (i = 0; i < VERT_ATTRIB_MAX; i++) + for (int i = 0; i < VERT_ATTRIB_MAX; i++) brw->vb.inputs[i].buffer = -1; brw->vb.nr_buffers = 0; brw->vb.nr_enabled = 0; } -void brw_draw_destroy( struct brw_context *brw ) +void +brw_draw_destroy(struct brw_context *brw) { int i; diff --git a/src/mesa/drivers/dri/i965/brw_draw.h b/src/mesa/drivers/dri/i965/brw_draw.h index fc83dcdd0bb..f994726f5b6 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.h +++ b/src/mesa/drivers/dri/i965/brw_draw.h @@ -34,7 +34,7 @@ struct brw_context; -void brw_draw_prims( struct gl_context *ctx, +void brw_draw_prims(struct gl_context *ctx, const struct _mesa_prim *prims, GLuint nr_prims, const struct _mesa_index_buffer *ib, @@ -42,6 +42,7 @@ void brw_draw_prims( struct gl_context *ctx, GLuint min_index, GLuint max_index, struct gl_transform_feedback_object *unused_tfb_object, + unsigned stream, struct gl_buffer_object *indirect ); void brw_draw_init( struct brw_context *brw ); diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 320e40e1007..cbfd5855410 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -40,7 +40,7 @@ #include "intel_batchbuffer.h" #include "intel_buffer_objects.h" -static GLuint double_types[5] = { +static const GLuint double_types[5] = { 0, BRW_SURFACEFORMAT_R64_FLOAT, BRW_SURFACEFORMAT_R64G64_FLOAT, @@ -48,7 +48,7 @@ static GLuint double_types[5] = { BRW_SURFACEFORMAT_R64G64B64A64_FLOAT }; -static GLuint float_types[5] = { +static const GLuint float_types[5] = { 0, BRW_SURFACEFORMAT_R32_FLOAT, BRW_SURFACEFORMAT_R32G32_FLOAT, @@ -56,7 +56,7 @@ static GLuint float_types[5] = { BRW_SURFACEFORMAT_R32G32B32A32_FLOAT }; -static GLuint half_float_types[5] = { +static const GLuint half_float_types[5] = { 0, BRW_SURFACEFORMAT_R16_FLOAT, BRW_SURFACEFORMAT_R16G16_FLOAT, @@ -64,7 +64,7 @@ static GLuint half_float_types[5] = { BRW_SURFACEFORMAT_R16G16B16A16_FLOAT }; -static GLuint fixed_point_types[5] = { +static const GLuint fixed_point_types[5] = { 0, BRW_SURFACEFORMAT_R32_SFIXED, BRW_SURFACEFORMAT_R32G32_SFIXED, @@ -72,7 +72,7 @@ static GLuint fixed_point_types[5] = { BRW_SURFACEFORMAT_R32G32B32A32_SFIXED, }; -static GLuint uint_types_direct[5] = { +static const GLuint uint_types_direct[5] = { 0, BRW_SURFACEFORMAT_R32_UINT, BRW_SURFACEFORMAT_R32G32_UINT, @@ -80,7 +80,7 @@ static GLuint uint_types_direct[5] = { BRW_SURFACEFORMAT_R32G32B32A32_UINT }; -static GLuint uint_types_norm[5] = { +static const GLuint uint_types_norm[5] = { 0, BRW_SURFACEFORMAT_R32_UNORM, BRW_SURFACEFORMAT_R32G32_UNORM, @@ -88,7 +88,7 @@ static GLuint uint_types_norm[5] = { BRW_SURFACEFORMAT_R32G32B32A32_UNORM }; -static GLuint uint_types_scale[5] = { +static const GLuint uint_types_scale[5] = { 0, BRW_SURFACEFORMAT_R32_USCALED, BRW_SURFACEFORMAT_R32G32_USCALED, @@ -96,7 +96,7 @@ static GLuint uint_types_scale[5] = { BRW_SURFACEFORMAT_R32G32B32A32_USCALED }; -static GLuint int_types_direct[5] = { +static const GLuint int_types_direct[5] = { 0, BRW_SURFACEFORMAT_R32_SINT, BRW_SURFACEFORMAT_R32G32_SINT, @@ -104,7 +104,7 @@ static GLuint int_types_direct[5] = { BRW_SURFACEFORMAT_R32G32B32A32_SINT }; -static GLuint int_types_norm[5] = { +static const GLuint int_types_norm[5] = { 0, BRW_SURFACEFORMAT_R32_SNORM, BRW_SURFACEFORMAT_R32G32_SNORM, @@ -112,7 +112,7 @@ static GLuint int_types_norm[5] = { BRW_SURFACEFORMAT_R32G32B32A32_SNORM }; -static GLuint int_types_scale[5] = { +static const GLuint int_types_scale[5] = { 0, BRW_SURFACEFORMAT_R32_SSCALED, BRW_SURFACEFORMAT_R32G32_SSCALED, @@ -120,7 +120,7 @@ static GLuint int_types_scale[5] = { BRW_SURFACEFORMAT_R32G32B32A32_SSCALED }; -static GLuint ushort_types_direct[5] = { +static const GLuint ushort_types_direct[5] = { 0, BRW_SURFACEFORMAT_R16_UINT, BRW_SURFACEFORMAT_R16G16_UINT, @@ -128,7 +128,7 @@ static GLuint ushort_types_direct[5] = { BRW_SURFACEFORMAT_R16G16B16A16_UINT }; -static GLuint ushort_types_norm[5] = { +static const GLuint ushort_types_norm[5] = { 0, BRW_SURFACEFORMAT_R16_UNORM, BRW_SURFACEFORMAT_R16G16_UNORM, @@ -136,7 +136,7 @@ static GLuint ushort_types_norm[5] = { BRW_SURFACEFORMAT_R16G16B16A16_UNORM }; -static GLuint ushort_types_scale[5] = { +static const GLuint ushort_types_scale[5] = { 0, BRW_SURFACEFORMAT_R16_USCALED, BRW_SURFACEFORMAT_R16G16_USCALED, @@ -144,7 +144,7 @@ static GLuint ushort_types_scale[5] = { BRW_SURFACEFORMAT_R16G16B16A16_USCALED }; -static GLuint short_types_direct[5] = { +static const GLuint short_types_direct[5] = { 0, BRW_SURFACEFORMAT_R16_SINT, BRW_SURFACEFORMAT_R16G16_SINT, @@ -152,7 +152,7 @@ static GLuint short_types_direct[5] = { BRW_SURFACEFORMAT_R16G16B16A16_SINT }; -static GLuint short_types_norm[5] = { +static const GLuint short_types_norm[5] = { 0, BRW_SURFACEFORMAT_R16_SNORM, BRW_SURFACEFORMAT_R16G16_SNORM, @@ -160,7 +160,7 @@ static GLuint short_types_norm[5] = { BRW_SURFACEFORMAT_R16G16B16A16_SNORM }; -static GLuint short_types_scale[5] = { +static const GLuint short_types_scale[5] = { 0, BRW_SURFACEFORMAT_R16_SSCALED, BRW_SURFACEFORMAT_R16G16_SSCALED, @@ -168,7 +168,7 @@ static GLuint short_types_scale[5] = { BRW_SURFACEFORMAT_R16G16B16A16_SSCALED }; -static GLuint ubyte_types_direct[5] = { +static const GLuint ubyte_types_direct[5] = { 0, BRW_SURFACEFORMAT_R8_UINT, BRW_SURFACEFORMAT_R8G8_UINT, @@ -176,7 +176,7 @@ static GLuint ubyte_types_direct[5] = { BRW_SURFACEFORMAT_R8G8B8A8_UINT }; -static GLuint ubyte_types_norm[5] = { +static const GLuint ubyte_types_norm[5] = { 0, BRW_SURFACEFORMAT_R8_UNORM, BRW_SURFACEFORMAT_R8G8_UNORM, @@ -184,7 +184,7 @@ static GLuint ubyte_types_norm[5] = { BRW_SURFACEFORMAT_R8G8B8A8_UNORM }; -static GLuint ubyte_types_scale[5] = { +static const GLuint ubyte_types_scale[5] = { 0, BRW_SURFACEFORMAT_R8_USCALED, BRW_SURFACEFORMAT_R8G8_USCALED, @@ -192,7 +192,7 @@ static GLuint ubyte_types_scale[5] = { BRW_SURFACEFORMAT_R8G8B8A8_USCALED }; -static GLuint byte_types_direct[5] = { +static const GLuint byte_types_direct[5] = { 0, BRW_SURFACEFORMAT_R8_SINT, BRW_SURFACEFORMAT_R8G8_SINT, @@ -200,7 +200,7 @@ static GLuint byte_types_direct[5] = { BRW_SURFACEFORMAT_R8G8B8A8_SINT }; -static GLuint byte_types_norm[5] = { +static const GLuint byte_types_norm[5] = { 0, BRW_SURFACEFORMAT_R8_SNORM, BRW_SURFACEFORMAT_R8G8_SNORM, @@ -208,7 +208,7 @@ static GLuint byte_types_norm[5] = { BRW_SURFACEFORMAT_R8G8B8A8_SNORM }; -static GLuint byte_types_scale[5] = { +static const GLuint byte_types_scale[5] = { 0, BRW_SURFACEFORMAT_R8_SSCALED, BRW_SURFACEFORMAT_R8G8_SSCALED, @@ -230,7 +230,7 @@ brw_get_vertex_surface_type(struct brw_context *brw, if (unlikely(INTEL_DEBUG & DEBUG_VERTS)) fprintf(stderr, "type %s size %d normalized %d\n", - _mesa_lookup_enum_by_nr(glarray->Type), + _mesa_enum_to_string(glarray->Type), glarray->Size, glarray->Normalized); if (glarray->Integer) { @@ -604,14 +604,15 @@ brw_prepare_shader_draw_parameters(struct brw_context *brw) /** * Emit a VERTEX_BUFFER_STATE entry (part of 3DSTATE_VERTEX_BUFFERS). */ -static void +static uint32_t * emit_vertex_buffer_state(struct brw_context *brw, unsigned buffer_nr, drm_intel_bo *bo, unsigned bo_ending_address, unsigned bo_offset, unsigned stride, - unsigned step_rate) + unsigned step_rate, + uint32_t *__map) { struct gl_context *ctx = &brw->ctx; uint32_t dw0; @@ -643,9 +644,13 @@ emit_vertex_buffer_state(struct brw_context *brw, OUT_BATCH(0); } OUT_BATCH(step_rate); + + return __map; } +#define EMIT_VERTEX_BUFFER_STATE(...) __map = emit_vertex_buffer_state(__VA_ARGS__, __map) -static void brw_emit_vertices(struct brw_context *brw) +static void +brw_emit_vertices(struct brw_context *brw) { GLuint i; @@ -704,14 +709,14 @@ static void brw_emit_vertices(struct brw_context *brw) OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1)); for (i = 0; i < brw->vb.nr_buffers; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; - emit_vertex_buffer_state(brw, i, buffer->bo, buffer->bo->size - 1, + EMIT_VERTEX_BUFFER_STATE(brw, i, buffer->bo, buffer->bo->size - 1, buffer->offset, buffer->stride, buffer->step_rate); } if (brw->vs.prog_data->uses_vertexid) { - emit_vertex_buffer_state(brw, brw->vb.nr_buffers, + EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers, brw->draw.draw_params_bo, brw->draw.draw_params_bo->size - 1, brw->draw.draw_params_offset, @@ -855,7 +860,8 @@ const struct brw_tracked_state brw_vertices = { .emit = brw_emit_vertices, }; -static void brw_upload_indices(struct brw_context *brw) +static void +brw_upload_indices(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; const struct _mesa_index_buffer *index_buffer = brw->ib.ib; @@ -935,7 +941,8 @@ const struct brw_tracked_state brw_indices = { .emit = brw_upload_indices, }; -static void brw_emit_index_buffer(struct brw_context *brw) +static void +brw_emit_index_buffer(struct brw_context *brw) { const struct _mesa_index_buffer *index_buffer = brw->ib.ib; GLuint cut_index_setting; diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 0f536046f6f..4d397622fc1 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1584,8 +1584,8 @@ brw_ENDIF(struct brw_codegen *p) } if (devinfo->gen < 6) { - brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src1(p, insn, brw_imm_d(0x0)); } else if (devinfo->gen == 6) { brw_set_dest(p, insn, brw_imm_w(0)); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 8984b4cb3ca..0e091ddc227 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -68,28 +68,6 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, assert(dst.file != IMM && dst.file != UNIFORM); - /* If exec_size == 0, try to guess it from the registers. Since all - * manner of things may use hardware registers, we first try to guess - * based on GRF registers. If this fails, we will go ahead and take the - * width from the destination register. - */ - if (this->exec_size == 0) { - if (dst.file == GRF) { - this->exec_size = dst.width; - } else { - for (unsigned i = 0; i < sources; ++i) { - if (src[i].file != GRF && src[i].file != ATTR) - continue; - - if (this->exec_size <= 1) - this->exec_size = src[i].width; - assert(src[i].width == 1 || src[i].width == this->exec_size); - } - } - - if (this->exec_size == 0 && dst.file != BAD_FILE) - this->exec_size = dst.width; - } assert(this->exec_size != 0); this->conditional_mod = BRW_CONDITIONAL_NONE; @@ -100,8 +78,8 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, case HW_REG: case MRF: case ATTR: - this->regs_written = - DIV_ROUND_UP(MAX2(dst.width * dst.stride, 1) * type_sz(dst.type), 32); + this->regs_written = DIV_ROUND_UP(dst.component_size(exec_size), + REG_SIZE); break; case BAD_FILE: this->regs_written = 0; @@ -126,9 +104,9 @@ fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size) init(opcode, exec_size, reg_undef, NULL, 0); } -fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst) +fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst) { - init(opcode, 0, dst, NULL, 0); + init(opcode, exec_size, dst, NULL, 0); } fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, @@ -138,12 +116,6 @@ fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, init(opcode, exec_size, dst, src, 1); } -fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0) -{ - const fs_reg src[1] = { src0 }; - init(opcode, 0, dst, src, 1); -} - fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, const fs_reg &src0, const fs_reg &src1) { @@ -151,13 +123,6 @@ fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, init(opcode, exec_size, dst, src, 2); } -fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0, - const fs_reg &src1) -{ - const fs_reg src[2] = { src0, src1 }; - init(opcode, 0, dst, src, 2); -} - fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, const fs_reg &src0, const fs_reg &src1, const fs_reg &src2) { @@ -165,19 +130,6 @@ fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, init(opcode, exec_size, dst, src, 3); } -fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0, - const fs_reg &src1, const fs_reg &src2) -{ - const fs_reg src[3] = { src0, src1, src2 }; - init(opcode, 0, dst, src, 3); -} - -fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, - const fs_reg src[], unsigned sources) -{ - init(opcode, 0, dst, src, sources); -} - fs_inst::fs_inst(enum opcode opcode, uint8_t exec_width, const fs_reg &dst, const fs_reg src[], unsigned sources) { @@ -236,7 +188,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld, bld.ADD(vec4_offset, varying_offset, fs_reg(const_offset & ~3)); int scale = 1; - if (devinfo->gen == 4 && dst.width == 8) { + if (devinfo->gen == 4 && bld.dispatch_width() == 8) { /* Pre-gen5, we can either use a SIMD8 message that requires (header, * u, v, r) as parameters, or we can just use the SIMD16 message * consisting of (header, u). We choose the second, at the cost of a @@ -251,10 +203,8 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld, else op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD; - assert(dst.width % 8 == 0); - int regs_written = 4 * (dst.width / 8) * scale; - fs_reg vec4_result = fs_reg(GRF, alloc.allocate(regs_written), - dst.type, dst.width); + int regs_written = 4 * (bld.dispatch_width() / 8) * scale; + fs_reg vec4_result = fs_reg(GRF, alloc.allocate(regs_written), dst.type); fs_inst *inst = bld.emit(op, vec4_result, surf_index, vec4_offset); inst->regs_written = regs_written; @@ -264,10 +214,10 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld, if (devinfo->gen == 4) inst->mlen = 3; else - inst->mlen = 1 + dispatch_width / 8; + inst->mlen = 1 + bld.dispatch_width() / 8; } - bld.MOV(dst, offset(vec4_result, (const_offset & 3) * scale)); + bld.MOV(dst, offset(vec4_result, bld, (const_offset & 3) * scale)); } /** @@ -358,10 +308,14 @@ fs_inst::is_copy_payload(const brw::simple_allocator &grf_alloc) const for (int i = 0; i < this->sources; i++) { reg.type = this->src[i].type; - reg.width = this->src[i].width; if (!this->src[i].equals(reg)) return false; - reg = ::offset(reg, 1); + + if (i < this->header_size) { + reg.reg_offset += 1; + } else { + reg.reg_offset += this->exec_size / 8; + } } return true; @@ -408,8 +362,8 @@ fs_reg::fs_reg(float f) init(); this->file = IMM; this->type = BRW_REGISTER_TYPE_F; + this->stride = 0; this->fixed_hw_reg.dw1.f = f; - this->width = 1; } /** Immediate value constructor. */ @@ -418,8 +372,8 @@ fs_reg::fs_reg(int32_t i) init(); this->file = IMM; this->type = BRW_REGISTER_TYPE_D; + this->stride = 0; this->fixed_hw_reg.dw1.d = i; - this->width = 1; } /** Immediate value constructor. */ @@ -428,8 +382,8 @@ fs_reg::fs_reg(uint32_t u) init(); this->file = IMM; this->type = BRW_REGISTER_TYPE_UD; + this->stride = 0; this->fixed_hw_reg.dw1.ud = u; - this->width = 1; } /** Vector float immediate value constructor. */ @@ -460,7 +414,6 @@ fs_reg::fs_reg(struct brw_reg fixed_hw_reg) this->file = HW_REG; this->fixed_hw_reg = fixed_hw_reg; this->type = fixed_hw_reg.type; - this->width = 1 << fixed_hw_reg.width; } bool @@ -475,7 +428,6 @@ fs_reg::equals(const fs_reg &r) const abs == r.abs && !reladdr && !r.reladdr && memcmp(&fixed_hw_reg, &r.fixed_hw_reg, sizeof(fixed_hw_reg)) == 0 && - width == r.width && stride == r.stride); } @@ -494,6 +446,15 @@ fs_reg::is_contiguous() const return stride == 1; } +unsigned +fs_reg::component_size(unsigned width) const +{ + const unsigned stride = (file != HW_REG ? this->stride : + fixed_hw_reg.hstride == 0 ? 0 : + 1 << (fixed_hw_reg.hstride - 1)); + return MAX2(width * stride, 1) * type_sz(type); +} + int fs_visitor::type_size(const struct glsl_type *type) { @@ -520,7 +481,10 @@ fs_visitor::type_size(const struct glsl_type *type) return 0; case GLSL_TYPE_ATOMIC_UINT: return 0; + case GLSL_TYPE_SUBROUTINE: + return 1; case GLSL_TYPE_IMAGE: + return BRW_IMAGE_PARAM_SIZE; case GLSL_TYPE_VOID: case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: @@ -548,12 +512,12 @@ fs_visitor::get_timestamp(const fs_builder &bld) 0), BRW_REGISTER_TYPE_UD)); - fs_reg dst = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 4); + fs_reg dst = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); /* We want to read the 3 fields we care about even if it's not enabled in * the dispatch. */ - bld.exec_all().MOV(dst, ts); + bld.group(4, 0).exec_all().MOV(dst, ts); /* The caller wants the low 32 bits of the timestamp. Since it's running * at the GPU clock rate of ~1.2ghz, it will roll over every ~3 seconds, @@ -598,19 +562,21 @@ fs_visitor::emit_shader_time_end() fs_reg start = shader_start_time; start.negate = true; - fs_reg diff = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 1); + fs_reg diff = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); diff.set_smear(0); - ibld.ADD(diff, start, shader_end_time); + + const fs_builder cbld = ibld.group(1, 0); + cbld.group(1, 0).ADD(diff, start, shader_end_time); /* If there were no instructions between the two timestamp gets, the diff * is 2 cycles. Remove that overhead, so I can forget about that when * trying to determine the time taken for single instructions. */ - ibld.ADD(diff, diff, fs_reg(-2u)); - SHADER_TIME_ADD(ibld, 0, diff); - SHADER_TIME_ADD(ibld, 1, fs_reg(1u)); + cbld.ADD(diff, diff, fs_reg(-2u)); + SHADER_TIME_ADD(cbld, 0, diff); + SHADER_TIME_ADD(cbld, 1, fs_reg(1u)); ibld.emit(BRW_OPCODE_ELSE); - SHADER_TIME_ADD(ibld, 2, fs_reg(1u)); + SHADER_TIME_ADD(cbld, 2, fs_reg(1u)); ibld.emit(BRW_OPCODE_ENDIF); } @@ -695,50 +661,160 @@ bool fs_inst::is_partial_write() const { return ((this->predicate && this->opcode != BRW_OPCODE_SEL) || - (this->dst.width * type_sz(this->dst.type)) < 32 || + (this->exec_size * type_sz(this->dst.type)) < 32 || !this->dst.is_contiguous()); } +unsigned +fs_inst::components_read(unsigned i) const +{ + switch (opcode) { + case FS_OPCODE_LINTERP: + if (i == 0) + return 2; + else + return 1; + + case FS_OPCODE_PIXEL_X: + case FS_OPCODE_PIXEL_Y: + assert(i == 0); + return 2; + + case FS_OPCODE_FB_WRITE_LOGICAL: + assert(src[6].file == IMM); + /* First/second FB write color. */ + if (i < 2) + return src[6].fixed_hw_reg.dw1.ud; + else + return 1; + + case SHADER_OPCODE_TEX_LOGICAL: + case SHADER_OPCODE_TXD_LOGICAL: + case SHADER_OPCODE_TXF_LOGICAL: + case SHADER_OPCODE_TXL_LOGICAL: + case SHADER_OPCODE_TXS_LOGICAL: + case FS_OPCODE_TXB_LOGICAL: + case SHADER_OPCODE_TXF_CMS_LOGICAL: + case SHADER_OPCODE_TXF_UMS_LOGICAL: + case SHADER_OPCODE_TXF_MCS_LOGICAL: + case SHADER_OPCODE_LOD_LOGICAL: + case SHADER_OPCODE_TG4_LOGICAL: + case SHADER_OPCODE_TG4_OFFSET_LOGICAL: + assert(src[8].file == IMM && src[9].file == IMM); + /* Texture coordinates. */ + if (i == 0) + return src[8].fixed_hw_reg.dw1.ud; + /* Texture derivatives. */ + else if ((i == 2 || i == 3) && opcode == SHADER_OPCODE_TXD_LOGICAL) + return src[9].fixed_hw_reg.dw1.ud; + /* Texture offset. */ + else if (i == 7) + return 2; + else + return 1; + + case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: + case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: + assert(src[3].file == IMM); + /* Surface coordinates. */ + if (i == 0) + return src[3].fixed_hw_reg.dw1.ud; + /* Surface operation source (ignored for reads). */ + else if (i == 1) + return 0; + else + return 1; + + case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: + case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: + assert(src[3].file == IMM && + src[4].file == IMM); + /* Surface coordinates. */ + if (i == 0) + return src[3].fixed_hw_reg.dw1.ud; + /* Surface operation source. */ + else if (i == 1) + return src[4].fixed_hw_reg.dw1.ud; + else + return 1; + + case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: + case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: { + assert(src[3].file == IMM && + src[4].file == IMM); + const unsigned op = src[4].fixed_hw_reg.dw1.ud; + /* Surface coordinates. */ + if (i == 0) + return src[3].fixed_hw_reg.dw1.ud; + /* Surface operation source. */ + else if (i == 1 && op == BRW_AOP_CMPWR) + return 2; + else if (i == 1 && (op == BRW_AOP_INC || op == BRW_AOP_DEC || + op == BRW_AOP_PREDEC)) + return 0; + else + return 1; + } + + default: + return 1; + } +} + int fs_inst::regs_read(int arg) const { - if (is_tex() && arg == 0 && src[0].file == GRF) { - return mlen; - } else if (opcode == FS_OPCODE_FB_WRITE && arg == 0) { - return mlen; - } else if (opcode == SHADER_OPCODE_URB_WRITE_SIMD8 && arg == 0) { - return mlen; - } else if (opcode == SHADER_OPCODE_UNTYPED_ATOMIC && arg == 0) { - return mlen; - } else if (opcode == SHADER_OPCODE_UNTYPED_SURFACE_READ && arg == 0) { - return mlen; - } else if (opcode == SHADER_OPCODE_UNTYPED_SURFACE_WRITE && arg == 0) { - return mlen; - } else if (opcode == SHADER_OPCODE_TYPED_ATOMIC && arg == 0) { - return mlen; - } else if (opcode == SHADER_OPCODE_TYPED_SURFACE_READ && arg == 0) { - return mlen; - } else if (opcode == SHADER_OPCODE_TYPED_SURFACE_WRITE && arg == 0) { - return mlen; - } else if (opcode == FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET && arg == 0) { - return mlen; - } else if (opcode == FS_OPCODE_LINTERP && arg == 0) { - return exec_size / 4; + switch (opcode) { + case FS_OPCODE_FB_WRITE: + case SHADER_OPCODE_URB_WRITE_SIMD8: + case SHADER_OPCODE_UNTYPED_ATOMIC: + case SHADER_OPCODE_UNTYPED_SURFACE_READ: + case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: + case SHADER_OPCODE_TYPED_ATOMIC: + case SHADER_OPCODE_TYPED_SURFACE_READ: + case SHADER_OPCODE_TYPED_SURFACE_WRITE: + case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: + if (arg == 0) + return mlen; + break; + + case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7: + /* The payload is actually stored in src1 */ + if (arg == 1) + return mlen; + break; + + case FS_OPCODE_LINTERP: + if (arg == 1) + return 1; + break; + + case SHADER_OPCODE_LOAD_PAYLOAD: + if (arg < this->header_size) + return 1; + break; + + case CS_OPCODE_CS_TERMINATE: + return 1; + + default: + if (is_tex() && arg == 0 && src[0].file == GRF) + return mlen; + break; } switch (src[arg].file) { case BAD_FILE: + return 0; case UNIFORM: case IMM: return 1; case GRF: + case ATTR: case HW_REG: - if (src[arg].stride == 0) { - return 1; - } else { - int size = src[arg].width * src[arg].stride * type_sz(src[arg].type); - return (size + 31) / 32; - } + return DIV_ROUND_UP(components_read(arg) * + src[arg].component_size(exec_size), + REG_SIZE); case MRF: unreachable("MRF registers are not allowed as sources"); default: @@ -832,7 +908,7 @@ fs_visitor::vgrf(const glsl_type *const type) { int reg_width = dispatch_width / 8; return fs_reg(GRF, alloc.allocate(type_size(type) * reg_width), - brw_type_for_base_type(type), dispatch_width); + brw_type_for_base_type(type)); } /** Fixed HW reg constructor. */ @@ -842,14 +918,7 @@ fs_reg::fs_reg(enum register_file file, int reg) this->file = file; this->reg = reg; this->type = BRW_REGISTER_TYPE_F; - - switch (file) { - case UNIFORM: - this->width = 1; - break; - default: - this->width = 8; - } + this->stride = (file == UNIFORM ? 0 : 1); } /** Fixed HW reg constructor. */ @@ -859,25 +928,7 @@ fs_reg::fs_reg(enum register_file file, int reg, enum brw_reg_type type) this->file = file; this->reg = reg; this->type = type; - - switch (file) { - case UNIFORM: - this->width = 1; - break; - default: - this->width = 8; - } -} - -/** Fixed HW reg constructor. */ -fs_reg::fs_reg(enum register_file file, int reg, enum brw_reg_type type, - uint8_t width) -{ - init(); - this->file = file; - this->reg = reg; - this->type = type; - this->width = width; + this->stride = (file == UNIFORM ? 0 : 1); } /* For SIMD16, we need to follow from the uniform setup of SIMD8 dispatch. @@ -892,6 +943,18 @@ fs_visitor::import_uniforms(fs_visitor *v) this->param_size = v->param_size; } +void +fs_visitor::setup_vector_uniform_values(const gl_constant_value *values, unsigned n) +{ + static const gl_constant_value zero = { 0 }; + + for (unsigned i = 0; i < n; ++i) + stage_prog_data->param[uniforms++] = &values[i]; + + for (unsigned i = n; i < 4; ++i) + stage_prog_data->param[uniforms++] = &zero; +} + fs_reg * fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer, bool origin_upper_left) @@ -908,23 +971,23 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer, } else { bld.ADD(wpos, this->pixel_x, fs_reg(0.5f)); } - wpos = offset(wpos, 1); + wpos = offset(wpos, bld, 1); /* gl_FragCoord.y */ if (!flip && pixel_center_integer) { bld.MOV(wpos, this->pixel_y); } else { fs_reg pixel_y = this->pixel_y; - float offset = (pixel_center_integer ? 0.0 : 0.5); + float offset = (pixel_center_integer ? 0.0f : 0.5f); if (flip) { pixel_y.negate = true; - offset += key->drawable_height - 1.0; + offset += key->drawable_height - 1.0f; } bld.ADD(wpos, pixel_y, fs_reg(offset)); } - wpos = offset(wpos, 1); + wpos = offset(wpos, bld, 1); /* gl_FragCoord.z */ if (devinfo->gen >= 6) { @@ -934,7 +997,7 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer, this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], interp_reg(VARYING_SLOT_POS, 2)); } - wpos = offset(wpos, 1); + wpos = offset(wpos, bld, 1); /* gl_FragCoord.w: Already set up in emit_interpolation */ bld.MOV(wpos, this->wpos_w); @@ -1017,7 +1080,7 @@ fs_visitor::emit_general_interpolation(fs_reg attr, const char *name, /* If there's no incoming setup data for this slot, don't * emit interpolation for it. */ - attr = offset(attr, type->vector_elements); + attr = offset(attr, bld, type->vector_elements); location++; continue; } @@ -1032,7 +1095,7 @@ fs_visitor::emit_general_interpolation(fs_reg attr, const char *name, interp = suboffset(interp, 3); interp.type = attr.type; bld.emit(FS_OPCODE_CINTERP, attr, fs_reg(interp)); - attr = offset(attr, 1); + attr = offset(attr, bld, 1); } } else { /* Smooth/noperspective interpolation case. */ @@ -1070,7 +1133,7 @@ fs_visitor::emit_general_interpolation(fs_reg attr, const char *name, if (devinfo->gen < 6 && interpolation_mode == INTERP_QUALIFIER_SMOOTH) { bld.MUL(attr, attr, this->pixel_w); } - attr = offset(attr, 1); + attr = offset(attr, bld, 1); } } @@ -1178,7 +1241,7 @@ fs_visitor::emit_samplepos_setup() } /* Compute gl_SamplePosition.x */ compute_sample_position(pos, int_sample_x); - pos = offset(pos, 1); + pos = offset(pos, abld, 1); if (dispatch_width == 8) { abld.MOV(int_sample_y, fs_reg(suboffset(sample_pos_reg, 1))); } else { @@ -1250,15 +1313,16 @@ fs_visitor::emit_sampleid_setup() return reg; } -void -fs_visitor::resolve_source_modifiers(fs_reg *src) +fs_reg +fs_visitor::resolve_source_modifiers(const fs_reg &src) { - if (!src->abs && !src->negate) - return; + if (!src.abs && !src.negate) + return src; - fs_reg temp = bld.vgrf(src->type); - bld.MOV(temp, *src); - *src = temp; + fs_reg temp = bld.vgrf(src.type); + bld.MOV(temp, src); + + return temp; } void @@ -1318,6 +1382,7 @@ fs_visitor::assign_curb_setup() constant_nr / 8, constant_nr % 8); + assert(inst->src[i].stride == 0); inst->src[i].file = HW_REG; inst->src[i].fixed_hw_reg = byte_offset( retype(brw_reg, inst->src[i].type), @@ -1867,11 +1932,12 @@ fs_visitor::demote_pull_constants() continue; /* Set up the annotation tracking for new generated instructions. */ - const fs_builder ibld = bld.annotate(inst->annotation, inst->ir) - .at(block, inst); + const fs_builder ibld(this, block, inst); fs_reg surf_index(stage_prog_data->binding_table.pull_constants_start); fs_reg dst = vgrf(glsl_type::float_type); + assert(inst->src[i].stride == 0); + /* Generate a pull load into dst. */ if (inst->src[i].reladdr) { VARYING_PULL_CONSTANT_LOAD(ibld, dst, @@ -1879,9 +1945,11 @@ fs_visitor::demote_pull_constants() *inst->src[i].reladdr, pull_index); inst->src[i].reladdr = NULL; + inst->src[i].stride = 1; } else { + const fs_builder ubld = ibld.exec_all().group(8, 0); fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15); - ibld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, + ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, dst, surf_index, offset); inst->src[i].set_smear(pull_index & 3); } @@ -1890,7 +1958,6 @@ fs_visitor::demote_pull_constants() inst->src[i].file = GRF; inst->src[i].reg = dst.reg; inst->src[i].reg_offset = 0; - inst->src[i].width = dispatch_width; } } invalidate_live_intervals(); @@ -2158,11 +2225,11 @@ fs_visitor::opt_zero_samples() * "Parameter 0 is required except for the sampleinfo message, which * has no parameter 0" */ - while (inst->mlen > inst->header_size + dispatch_width / 8 && + while (inst->mlen > inst->header_size + inst->exec_size / 8 && load_payload->src[(inst->mlen - inst->header_size) / - (dispatch_width / 8) + + (inst->exec_size / 8) + inst->header_size - 1].is_zero()) { - inst->mlen -= dispatch_width / 8; + inst->mlen -= inst->exec_size / 8; progress = true; } } @@ -2199,7 +2266,8 @@ fs_visitor::opt_sampler_eot() return false; /* Look for a texturing instruction immediately before the final FB_WRITE. */ - fs_inst *fb_write = (fs_inst *) cfg->blocks[cfg->num_blocks - 1]->end(); + bblock_t *block = cfg->blocks[cfg->num_blocks - 1]; + fs_inst *fb_write = (fs_inst *)block->end(); assert(fb_write->eot); assert(fb_write->opcode == FS_OPCODE_FB_WRITE); @@ -2230,9 +2298,11 @@ fs_visitor::opt_sampler_eot() assert(!tex_inst->eot); /* We can't get here twice */ assert((tex_inst->offset & (0xff << 24)) == 0); + const fs_builder ibld(this, block, tex_inst); + tex_inst->offset |= fb_write->target << 24; tex_inst->eot = true; - tex_inst->dst = bld.null_reg_ud(); + tex_inst->dst = ibld.null_reg_ud(); fb_write->remove(cfg->blocks[cfg->num_blocks - 1]); /* If a header is present, marking the eot is sufficient. Otherwise, we need @@ -2244,8 +2314,8 @@ fs_visitor::opt_sampler_eot() if (tex_inst->header_size != 0) return true; - fs_reg send_header = bld.vgrf(BRW_REGISTER_TYPE_F, - load_payload->sources + 1); + fs_reg send_header = ibld.vgrf(BRW_REGISTER_TYPE_F, + load_payload->sources + 1); fs_reg *new_sources = ralloc_array(mem_ctx, fs_reg, load_payload->sources + 1); @@ -2307,12 +2377,12 @@ fs_visitor::opt_register_renaming() if (depth == 0 && inst->dst.file == GRF && - alloc.sizes[inst->dst.reg] == inst->dst.width / 8 && + alloc.sizes[inst->dst.reg] == inst->exec_size / 8 && !inst->is_partial_write()) { if (remap[dst] == -1) { remap[dst] = dst; } else { - remap[dst] = alloc.allocate(inst->dst.width / 8); + remap[dst] = alloc.allocate(inst->exec_size / 8); inst->dst.reg = remap[dst]; progress = true; } @@ -2443,7 +2513,7 @@ fs_visitor::compute_to_mrf() /* Things returning more than one register would need us to * understand coalescing out more than one MOV at a time. */ - if (scan_inst->regs_written > scan_inst->dst.width / 8) + if (scan_inst->regs_written > scan_inst->exec_size / 8) break; /* SEND instructions can't have MRF as a destination. */ @@ -2780,7 +2850,8 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block, if (block->start() == scan_inst) { for (int i = 0; i < write_len; i++) { if (needs_dep[i]) - DEP_RESOLVE_MOV(bld.at(block, inst), first_write_grf + i); + DEP_RESOLVE_MOV(fs_builder(this, block, inst), + first_write_grf + i); } return; } @@ -2796,7 +2867,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block, if (reg >= first_write_grf && reg < first_write_grf + write_len && needs_dep[reg - first_write_grf]) { - DEP_RESOLVE_MOV(bld.at(block, inst), reg); + DEP_RESOLVE_MOV(fs_builder(this, block, inst), reg); needs_dep[reg - first_write_grf] = false; if (scan_inst->exec_size == 16) needs_dep[reg - first_write_grf + 1] = false; @@ -2843,7 +2914,8 @@ fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_ins if (block->end() == scan_inst) { for (int i = 0; i < write_len; i++) { if (needs_dep[i]) - DEP_RESOLVE_MOV(bld.at(block, scan_inst), first_write_grf + i); + DEP_RESOLVE_MOV(fs_builder(this, block, scan_inst), + first_write_grf + i); } return; } @@ -2858,7 +2930,8 @@ fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_ins scan_inst->dst.reg >= first_write_grf && scan_inst->dst.reg < first_write_grf + write_len && needs_dep[scan_inst->dst.reg - first_write_grf]) { - DEP_RESOLVE_MOV(bld.at(block, scan_inst), scan_inst->dst.reg); + DEP_RESOLVE_MOV(fs_builder(this, block, scan_inst), + scan_inst->dst.reg); needs_dep[scan_inst->dst.reg - first_write_grf] = false; } @@ -2928,14 +3001,18 @@ fs_visitor::lower_uniform_pull_constant_loads() assert(const_offset_reg.file == IMM && const_offset_reg.type == BRW_REGISTER_TYPE_UD); const_offset_reg.fixed_hw_reg.dw1.ud /= 4; - fs_reg payload = fs_reg(GRF, alloc.allocate(1)); - /* We have to use a message header on Skylake to get SIMD4x2 mode. - * Reserve space for the register. - */ + fs_reg payload, offset; if (devinfo->gen >= 9) { - payload.reg_offset++; - alloc.sizes[payload.reg] = 2; + /* We have to use a message header on Skylake to get SIMD4x2 + * mode. Reserve space for the register. + */ + offset = payload = fs_reg(GRF, alloc.allocate(2)); + offset.reg_offset++; + inst->mlen = 2; + } else { + offset = payload = fs_reg(GRF, alloc.allocate(1)); + inst->mlen = 1; } /* This is actually going to be a MOV, but since only the first dword @@ -2944,7 +3021,7 @@ fs_visitor::lower_uniform_pull_constant_loads() * by live variable analysis, or register allocation will explode. */ fs_inst *setup = new(mem_ctx) fs_inst(FS_OPCODE_SET_SIMD4X2_OFFSET, - 8, payload, const_offset_reg); + 8, offset, const_offset_reg); setup->force_writemask_all = true; setup->ir = inst->ir; @@ -2957,6 +3034,7 @@ fs_visitor::lower_uniform_pull_constant_loads() */ inst->opcode = FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7; inst->src[1] = payload; + inst->base_mrf = -1; invalidate_live_intervals(); } else { @@ -2982,28 +3060,24 @@ fs_visitor::lower_load_payload() assert(inst->dst.file == MRF || inst->dst.file == GRF); assert(inst->saturate == false); - - const fs_builder ibld = bld.group(inst->exec_size, inst->force_sechalf) - .exec_all(inst->force_writemask_all) - .at(block, inst); fs_reg dst = inst->dst; /* Get rid of COMPR4. We'll add it back in if we need it */ if (dst.file == MRF) dst.reg = dst.reg & ~BRW_MRF_COMPR4; - dst.width = 8; + const fs_builder ibld(this, block, inst); + const fs_builder hbld = ibld.exec_all().group(8, 0); + for (uint8_t i = 0; i < inst->header_size; i++) { if (inst->src[i].file != BAD_FILE) { fs_reg mov_dst = retype(dst, BRW_REGISTER_TYPE_UD); fs_reg mov_src = retype(inst->src[i], BRW_REGISTER_TYPE_UD); - mov_src.width = 8; - ibld.exec_all().MOV(mov_dst, mov_src); + hbld.MOV(mov_dst, mov_src); } - dst = offset(dst, 1); + dst = offset(dst, hbld, 1); } - dst.width = inst->exec_size; if (inst->dst.file == MRF && (inst->dst.reg & BRW_MRF_COMPR4) && inst->exec_size > 8) { /* In this case, the payload portion of the LOAD_PAYLOAD isn't @@ -3033,9 +3107,9 @@ fs_visitor::lower_load_payload() } else { /* Platform doesn't have COMPR4. We have to fake it */ fs_reg mov_dst = retype(dst, inst->src[i].type); - mov_dst.width = 8; ibld.half(0).MOV(mov_dst, half(inst->src[i], 0)); - ibld.half(1).MOV(offset(mov_dst, 4), half(inst->src[i], 1)); + mov_dst.reg += 4; + ibld.half(1).MOV(mov_dst, half(inst->src[i], 1)); } } @@ -3060,7 +3134,7 @@ fs_visitor::lower_load_payload() for (uint8_t i = inst->header_size; i < inst->sources; i++) { if (inst->src[i].file != BAD_FILE) ibld.MOV(retype(dst, inst->src[i].type), inst->src[i]); - dst = offset(dst, 1); + dst = offset(dst, ibld, 1); } inst->remove(block); @@ -3078,158 +3152,989 @@ fs_visitor::lower_integer_multiplication() { bool progress = false; - /* Gen8's MUL instruction can do a 32-bit x 32-bit -> 32-bit operation - * directly, but Cherryview cannot. - */ - if (devinfo->gen >= 8 && !devinfo->is_cherryview) - return false; - foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { - if (inst->opcode != BRW_OPCODE_MUL || - inst->dst.is_accumulator() || - (inst->dst.type != BRW_REGISTER_TYPE_D && - inst->dst.type != BRW_REGISTER_TYPE_UD)) - continue; + const fs_builder ibld(this, block, inst); - const fs_builder ibld = bld.at(block, inst); + if (inst->opcode == BRW_OPCODE_MUL) { + if (inst->dst.is_accumulator() || + (inst->dst.type != BRW_REGISTER_TYPE_D && + inst->dst.type != BRW_REGISTER_TYPE_UD)) + continue; - /* The MUL instruction isn't commutative. On Gen <= 6, only the low - * 16-bits of src0 are read, and on Gen >= 7 only the low 16-bits of - * src1 are used. - * - * If multiplying by an immediate value that fits in 16-bits, do a - * single MUL instruction with that value in the proper location. - */ - if (inst->src[1].file == IMM && - inst->src[1].fixed_hw_reg.dw1.ud < (1 << 16)) { - if (devinfo->gen < 7) { - fs_reg imm(GRF, alloc.allocate(dispatch_width / 8), - inst->dst.type, dispatch_width); - ibld.MOV(imm, inst->src[1]); - ibld.MUL(inst->dst, imm, inst->src[0]); - } else { - ibld.MUL(inst->dst, inst->src[0], inst->src[1]); - } - } else { - /* Gen < 8 (and some Gen8+ low-power parts like Cherryview) cannot - * do 32-bit integer multiplication in one instruction, but instead - * must do a sequence (which actually calculates a 64-bit result): - * - * mul(8) acc0<1>D g3<8,8,1>D g4<8,8,1>D - * mach(8) null g3<8,8,1>D g4<8,8,1>D - * mov(8) g2<1>D acc0<8,8,1>D - * - * But on Gen > 6, the ability to use second accumulator register - * (acc1) for non-float data types was removed, preventing a simple - * implementation in SIMD16. A 16-channel result can be calculated by - * executing the three instructions twice in SIMD8, once with quarter - * control of 1Q for the first eight channels and again with 2Q for - * the second eight channels. - * - * Which accumulator register is implicitly accessed (by AccWrEnable - * for instance) is determined by the quarter control. Unfortunately - * Ivybridge (and presumably Baytrail) has a hardware bug in which an - * implicit accumulator access by an instruction with 2Q will access - * acc1 regardless of whether the data type is usable in acc1. - * - * Specifically, the 2Q mach(8) writes acc1 which does not exist for - * integer data types. - * - * Since we only want the low 32-bits of the result, we can do two - * 32-bit x 16-bit multiplies (like the mul and mach are doing), and - * adjust the high result and add them (like the mach is doing): - * - * mul(8) g7<1>D g3<8,8,1>D g4.0<8,8,1>UW - * mul(8) g8<1>D g3<8,8,1>D g4.1<8,8,1>UW - * shl(8) g9<1>D g8<8,8,1>D 16D - * add(8) g2<1>D g7<8,8,1>D g8<8,8,1>D - * - * We avoid the shl instruction by realizing that we only want to add - * the low 16-bits of the "high" result to the high 16-bits of the - * "low" result and using proper regioning on the add: - * - * mul(8) g7<1>D g3<8,8,1>D g4.0<16,8,2>UW - * mul(8) g8<1>D g3<8,8,1>D g4.1<16,8,2>UW - * add(8) g7.1<2>UW g7.1<16,8,2>UW g8<16,8,2>UW - * - * Since it does not use the (single) accumulator register, we can - * schedule multi-component multiplications much better. + /* Gen8's MUL instruction can do a 32-bit x 32-bit -> 32-bit + * operation directly, but CHV/BXT cannot. */ + if (devinfo->gen >= 8 && + !devinfo->is_cherryview && !devinfo->is_broxton) + continue; - if (inst->conditional_mod && inst->dst.is_null()) { - inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8), - inst->dst.type, dispatch_width); - } - fs_reg low = inst->dst; - fs_reg high(GRF, alloc.allocate(dispatch_width / 8), - inst->dst.type, dispatch_width); + if (inst->src[1].file == IMM && + inst->src[1].fixed_hw_reg.dw1.ud < (1 << 16)) { + /* The MUL instruction isn't commutative. On Gen <= 6, only the low + * 16-bits of src0 are read, and on Gen >= 7 only the low 16-bits of + * src1 are used. + * + * If multiplying by an immediate value that fits in 16-bits, do a + * single MUL instruction with that value in the proper location. + */ + if (devinfo->gen < 7) { + fs_reg imm(GRF, alloc.allocate(dispatch_width / 8), + inst->dst.type); + ibld.MOV(imm, inst->src[1]); + ibld.MUL(inst->dst, imm, inst->src[0]); + } else { + ibld.MUL(inst->dst, inst->src[0], inst->src[1]); + } + } else { + /* Gen < 8 (and some Gen8+ low-power parts like Cherryview) cannot + * do 32-bit integer multiplication in one instruction, but instead + * must do a sequence (which actually calculates a 64-bit result): + * + * mul(8) acc0<1>D g3<8,8,1>D g4<8,8,1>D + * mach(8) null g3<8,8,1>D g4<8,8,1>D + * mov(8) g2<1>D acc0<8,8,1>D + * + * But on Gen > 6, the ability to use second accumulator register + * (acc1) for non-float data types was removed, preventing a simple + * implementation in SIMD16. A 16-channel result can be calculated by + * executing the three instructions twice in SIMD8, once with quarter + * control of 1Q for the first eight channels and again with 2Q for + * the second eight channels. + * + * Which accumulator register is implicitly accessed (by AccWrEnable + * for instance) is determined by the quarter control. Unfortunately + * Ivybridge (and presumably Baytrail) has a hardware bug in which an + * implicit accumulator access by an instruction with 2Q will access + * acc1 regardless of whether the data type is usable in acc1. + * + * Specifically, the 2Q mach(8) writes acc1 which does not exist for + * integer data types. + * + * Since we only want the low 32-bits of the result, we can do two + * 32-bit x 16-bit multiplies (like the mul and mach are doing), and + * adjust the high result and add them (like the mach is doing): + * + * mul(8) g7<1>D g3<8,8,1>D g4.0<8,8,1>UW + * mul(8) g8<1>D g3<8,8,1>D g4.1<8,8,1>UW + * shl(8) g9<1>D g8<8,8,1>D 16D + * add(8) g2<1>D g7<8,8,1>D g8<8,8,1>D + * + * We avoid the shl instruction by realizing that we only want to add + * the low 16-bits of the "high" result to the high 16-bits of the + * "low" result and using proper regioning on the add: + * + * mul(8) g7<1>D g3<8,8,1>D g4.0<16,8,2>UW + * mul(8) g8<1>D g3<8,8,1>D g4.1<16,8,2>UW + * add(8) g7.1<2>UW g7.1<16,8,2>UW g8<16,8,2>UW + * + * Since it does not use the (single) accumulator register, we can + * schedule multi-component multiplications much better. + */ - if (devinfo->gen >= 7) { - fs_reg src1_0_w = inst->src[1]; - fs_reg src1_1_w = inst->src[1]; + if (inst->conditional_mod && inst->dst.is_null()) { + inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8), + inst->dst.type); + } + fs_reg low = inst->dst; + fs_reg high(GRF, alloc.allocate(dispatch_width / 8), + inst->dst.type); + + if (devinfo->gen >= 7) { + fs_reg src1_0_w = inst->src[1]; + fs_reg src1_1_w = inst->src[1]; - if (inst->src[1].file == IMM) { - src1_0_w.fixed_hw_reg.dw1.ud &= 0xffff; - src1_1_w.fixed_hw_reg.dw1.ud >>= 16; + if (inst->src[1].file == IMM) { + src1_0_w.fixed_hw_reg.dw1.ud &= 0xffff; + src1_1_w.fixed_hw_reg.dw1.ud >>= 16; + } else { + src1_0_w.type = BRW_REGISTER_TYPE_UW; + if (src1_0_w.stride != 0) { + assert(src1_0_w.stride == 1); + src1_0_w.stride = 2; + } + + src1_1_w.type = BRW_REGISTER_TYPE_UW; + if (src1_1_w.stride != 0) { + assert(src1_1_w.stride == 1); + src1_1_w.stride = 2; + } + src1_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW); + } + ibld.MUL(low, inst->src[0], src1_0_w); + ibld.MUL(high, inst->src[0], src1_1_w); } else { - src1_0_w.type = BRW_REGISTER_TYPE_UW; - if (src1_0_w.stride != 0) { - assert(src1_0_w.stride == 1); - src1_0_w.stride = 2; + fs_reg src0_0_w = inst->src[0]; + fs_reg src0_1_w = inst->src[0]; + + src0_0_w.type = BRW_REGISTER_TYPE_UW; + if (src0_0_w.stride != 0) { + assert(src0_0_w.stride == 1); + src0_0_w.stride = 2; } - src1_1_w.type = BRW_REGISTER_TYPE_UW; - if (src1_1_w.stride != 0) { - assert(src1_1_w.stride == 1); - src1_1_w.stride = 2; + src0_1_w.type = BRW_REGISTER_TYPE_UW; + if (src0_1_w.stride != 0) { + assert(src0_1_w.stride == 1); + src0_1_w.stride = 2; } - src1_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW); - } - ibld.MUL(low, inst->src[0], src1_0_w); - ibld.MUL(high, inst->src[0], src1_1_w); - } else { - fs_reg src0_0_w = inst->src[0]; - fs_reg src0_1_w = inst->src[0]; + src0_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW); - src0_0_w.type = BRW_REGISTER_TYPE_UW; - if (src0_0_w.stride != 0) { - assert(src0_0_w.stride == 1); - src0_0_w.stride = 2; + ibld.MUL(low, src0_0_w, inst->src[1]); + ibld.MUL(high, src0_1_w, inst->src[1]); } - src0_1_w.type = BRW_REGISTER_TYPE_UW; - if (src0_1_w.stride != 0) { - assert(src0_1_w.stride == 1); - src0_1_w.stride = 2; + fs_reg dst = inst->dst; + dst.type = BRW_REGISTER_TYPE_UW; + dst.subreg_offset = 2; + dst.stride = 2; + + high.type = BRW_REGISTER_TYPE_UW; + high.stride = 2; + + low.type = BRW_REGISTER_TYPE_UW; + low.subreg_offset = 2; + low.stride = 2; + + ibld.ADD(dst, low, high); + + if (inst->conditional_mod) { + fs_reg null(retype(ibld.null_reg_f(), inst->dst.type)); + set_condmod(inst->conditional_mod, + ibld.MOV(null, inst->dst)); } - src0_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW); + } - ibld.MUL(low, src0_0_w, inst->src[1]); - ibld.MUL(high, src0_1_w, inst->src[1]); + } else if (inst->opcode == SHADER_OPCODE_MULH) { + /* Should have been lowered to 8-wide. */ + assert(inst->exec_size <= 8); + const fs_reg acc = retype(brw_acc_reg(inst->exec_size), + inst->dst.type); + fs_inst *mul = ibld.MUL(acc, inst->src[0], inst->src[1]); + fs_inst *mach = ibld.MACH(inst->dst, inst->src[0], inst->src[1]); + + if (devinfo->gen >= 8) { + /* Until Gen8, integer multiplies read 32-bits from one source, + * and 16-bits from the other, and relying on the MACH instruction + * to generate the high bits of the result. + * + * On Gen8, the multiply instruction does a full 32x32-bit + * multiply, but in order to do a 64-bit multiply we can simulate + * the previous behavior and then use a MACH instruction. + * + * FINISHME: Don't use source modifiers on src1. + */ + assert(mul->src[1].type == BRW_REGISTER_TYPE_D || + mul->src[1].type == BRW_REGISTER_TYPE_UD); + mul->src[1].type = (type_is_signed(mul->src[1].type) ? + BRW_REGISTER_TYPE_W : BRW_REGISTER_TYPE_UW); + mul->src[1].stride *= 2; + + } else if (devinfo->gen == 7 && !devinfo->is_haswell && + inst->force_sechalf) { + /* Among other things the quarter control bits influence which + * accumulator register is used by the hardware for instructions + * that access the accumulator implicitly (e.g. MACH). A + * second-half instruction would normally map to acc1, which + * doesn't exist on Gen7 and up (the hardware does emulate it for + * floating-point instructions *only* by taking advantage of the + * extra precision of acc0 not normally used for floating point + * arithmetic). + * + * HSW and up are careful enough not to try to access an + * accumulator register that doesn't exist, but on earlier Gen7 + * hardware we need to make sure that the quarter control bits are + * zero to avoid non-deterministic behaviour and emit an extra MOV + * to get the result masked correctly according to the current + * channel enables. + */ + mach->force_sechalf = false; + mach->force_writemask_all = true; + mach->dst = ibld.vgrf(inst->dst.type); + ibld.MOV(inst->dst, mach->dst); } + } else { + continue; + } + + inst->remove(block); + progress = true; + } + + if (progress) + invalidate_live_intervals(); + + return progress; +} + +static void +setup_color_payload(const fs_builder &bld, const brw_wm_prog_key *key, + fs_reg *dst, fs_reg color, unsigned components) +{ + if (key->clamp_fragment_color) { + fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 4); + assert(color.type == BRW_REGISTER_TYPE_F); + + for (unsigned i = 0; i < components; i++) + set_saturate(true, + bld.MOV(offset(tmp, bld, i), offset(color, bld, i))); + + color = tmp; + } + + for (unsigned i = 0; i < components; i++) + dst[i] = offset(color, bld, i); +} + +static void +lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, + const brw_wm_prog_data *prog_data, + const brw_wm_prog_key *key, + const fs_visitor::thread_payload &payload) +{ + assert(inst->src[6].file == IMM); + const brw_device_info *devinfo = bld.shader->devinfo; + const fs_reg &color0 = inst->src[0]; + const fs_reg &color1 = inst->src[1]; + const fs_reg &src0_alpha = inst->src[2]; + const fs_reg &src_depth = inst->src[3]; + const fs_reg &dst_depth = inst->src[4]; + fs_reg sample_mask = inst->src[5]; + const unsigned components = inst->src[6].fixed_hw_reg.dw1.ud; + + /* We can potentially have a message length of up to 15, so we have to set + * base_mrf to either 0 or 1 in order to fit in m0..m15. + */ + fs_reg sources[15]; + int header_size = 2, payload_header_size; + unsigned length = 0; + + /* From the Sandy Bridge PRM, volume 4, page 198: + * + * "Dispatched Pixel Enables. One bit per pixel indicating + * which pixels were originally enabled when the thread was + * dispatched. This field is only required for the end-of- + * thread message and on all dual-source messages." + */ + if (devinfo->gen >= 6 && + (devinfo->is_haswell || devinfo->gen >= 8 || !prog_data->uses_kill) && + color1.file == BAD_FILE && + key->nr_color_regions == 1) { + header_size = 0; + } + + if (header_size != 0) { + assert(header_size == 2); + /* Allocate 2 registers for a header */ + length += 2; + } + + if (payload.aa_dest_stencil_reg) { + sources[length] = fs_reg(GRF, bld.shader->alloc.allocate(1)); + bld.group(8, 0).exec_all().annotate("FB write stencil/AA alpha") + .MOV(sources[length], + fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0))); + length++; + } + + if (prog_data->uses_omask) { + sources[length] = fs_reg(GRF, bld.shader->alloc.allocate(1), + BRW_REGISTER_TYPE_UD); + + /* Hand over gl_SampleMask. Only the lower 16 bits of each channel are + * relevant. Since it's unsigned single words one vgrf is always + * 16-wide, but only the lower or higher 8 channels will be used by the + * hardware when doing a SIMD8 write depending on whether we have + * selected the subspans for the first or second half respectively. + */ + assert(sample_mask.file != BAD_FILE && type_sz(sample_mask.type) == 4); + sample_mask.type = BRW_REGISTER_TYPE_UW; + sample_mask.stride *= 2; + + bld.exec_all().annotate("FB write oMask") + .MOV(half(retype(sources[length], BRW_REGISTER_TYPE_UW), + inst->force_sechalf), + sample_mask); + length++; + } + + payload_header_size = length; + + if (src0_alpha.file != BAD_FILE) { + /* FIXME: This is being passed at the wrong location in the payload and + * doesn't work when gl_SampleMask and MRTs are used simultaneously. + * It's supposed to be immediately before oMask but there seems to be no + * reasonable way to pass them in the correct order because LOAD_PAYLOAD + * requires header sources to form a contiguous segment at the beginning + * of the message and src0_alpha has per-channel semantics. + */ + setup_color_payload(bld, key, &sources[length], src0_alpha, 1); + length++; + } + + setup_color_payload(bld, key, &sources[length], color0, components); + length += 4; + + if (color1.file != BAD_FILE) { + setup_color_payload(bld, key, &sources[length], color1, components); + length += 4; + } + + if (src_depth.file != BAD_FILE) { + sources[length] = src_depth; + length++; + } + + if (dst_depth.file != BAD_FILE) { + sources[length] = dst_depth; + length++; + } + + fs_inst *load; + if (devinfo->gen >= 7) { + /* Send from the GRF */ + fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F); + load = bld.LOAD_PAYLOAD(payload, sources, length, payload_header_size); + payload.reg = bld.shader->alloc.allocate(load->regs_written); + load->dst = payload; + + inst->src[0] = payload; + inst->resize_sources(1); + inst->base_mrf = -1; + } else { + /* Send from the MRF */ + load = bld.LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F), + sources, length, payload_header_size); + + /* On pre-SNB, we have to interlace the color values. LOAD_PAYLOAD + * will do this for us if we just give it a COMPR4 destination. + */ + if (devinfo->gen < 6 && bld.dispatch_width() == 16) + load->dst.reg |= BRW_MRF_COMPR4; + + inst->resize_sources(0); + inst->base_mrf = 1; + } + + inst->opcode = FS_OPCODE_FB_WRITE; + inst->mlen = load->regs_written; + inst->header_size = header_size; +} + +static void +lower_sampler_logical_send_gen4(const fs_builder &bld, fs_inst *inst, opcode op, + const fs_reg &coordinate, + const fs_reg &shadow_c, + const fs_reg &lod, const fs_reg &lod2, + const fs_reg &sampler, + unsigned coord_components, + unsigned grad_components) +{ + const bool has_lod = (op == SHADER_OPCODE_TXL || op == FS_OPCODE_TXB || + op == SHADER_OPCODE_TXF || op == SHADER_OPCODE_TXS); + fs_reg msg_begin(MRF, 1, BRW_REGISTER_TYPE_F); + fs_reg msg_end = msg_begin; + + /* g0 header. */ + msg_end = offset(msg_end, bld.group(8, 0), 1); + + for (unsigned i = 0; i < coord_components; i++) + bld.MOV(retype(offset(msg_end, bld, i), coordinate.type), + offset(coordinate, bld, i)); + + msg_end = offset(msg_end, bld, coord_components); + + /* Messages other than SAMPLE and RESINFO in SIMD16 and TXD in SIMD8 + * require all three components to be present and zero if they are unused. + */ + if (coord_components > 0 && + (has_lod || shadow_c.file != BAD_FILE || + (op == SHADER_OPCODE_TEX && bld.dispatch_width() == 8))) { + for (unsigned i = coord_components; i < 3; i++) + bld.MOV(offset(msg_end, bld, i), fs_reg(0.0f)); + + msg_end = offset(msg_end, bld, 3 - coord_components); + } + + if (op == SHADER_OPCODE_TXD) { + /* TXD unsupported in SIMD16 mode. */ + assert(bld.dispatch_width() == 8); + + /* the slots for u and v are always present, but r is optional */ + if (coord_components < 2) + msg_end = offset(msg_end, bld, 2 - coord_components); + + /* P = u, v, r + * dPdx = dudx, dvdx, drdx + * dPdy = dudy, dvdy, drdy + * + * 1-arg: Does not exist. + * + * 2-arg: dudx dvdx dudy dvdy + * dPdx.x dPdx.y dPdy.x dPdy.y + * m4 m5 m6 m7 + * + * 3-arg: dudx dvdx drdx dudy dvdy drdy + * dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z + * m5 m6 m7 m8 m9 m10 + */ + for (unsigned i = 0; i < grad_components; i++) + bld.MOV(offset(msg_end, bld, i), offset(lod, bld, i)); + + msg_end = offset(msg_end, bld, MAX2(grad_components, 2)); + + for (unsigned i = 0; i < grad_components; i++) + bld.MOV(offset(msg_end, bld, i), offset(lod2, bld, i)); + + msg_end = offset(msg_end, bld, MAX2(grad_components, 2)); + } + + if (has_lod) { + /* Bias/LOD with shadow comparitor is unsupported in SIMD16 -- *Without* + * shadow comparitor (including RESINFO) it's unsupported in SIMD8 mode. + */ + assert(shadow_c.file != BAD_FILE ? bld.dispatch_width() == 8 : + bld.dispatch_width() == 16); + + const brw_reg_type type = + (op == SHADER_OPCODE_TXF || op == SHADER_OPCODE_TXS ? + BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_F); + bld.MOV(retype(msg_end, type), lod); + msg_end = offset(msg_end, bld, 1); + } + + if (shadow_c.file != BAD_FILE) { + if (op == SHADER_OPCODE_TEX && bld.dispatch_width() == 8) { + /* There's no plain shadow compare message, so we use shadow + * compare with a bias of 0.0. + */ + bld.MOV(msg_end, fs_reg(0.0f)); + msg_end = offset(msg_end, bld, 1); + } + + bld.MOV(msg_end, shadow_c); + msg_end = offset(msg_end, bld, 1); + } + + inst->opcode = op; + inst->src[0] = reg_undef; + inst->src[1] = sampler; + inst->resize_sources(2); + inst->base_mrf = msg_begin.reg; + inst->mlen = msg_end.reg - msg_begin.reg; + inst->header_size = 1; +} + +static void +lower_sampler_logical_send_gen5(const fs_builder &bld, fs_inst *inst, opcode op, + fs_reg coordinate, + const fs_reg &shadow_c, + fs_reg lod, fs_reg lod2, + const fs_reg &sample_index, + const fs_reg &sampler, + const fs_reg &offset_value, + unsigned coord_components, + unsigned grad_components) +{ + fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F); + fs_reg msg_coords = message; + unsigned header_size = 0; + + if (offset_value.file != BAD_FILE) { + /* The offsets set up by the visitor are in the m1 header, so we can't + * go headerless. + */ + header_size = 1; + message.reg--; + } + + for (unsigned i = 0; i < coord_components; i++) { + bld.MOV(retype(offset(msg_coords, bld, i), coordinate.type), coordinate); + coordinate = offset(coordinate, bld, 1); + } + fs_reg msg_end = offset(msg_coords, bld, coord_components); + fs_reg msg_lod = offset(msg_coords, bld, 4); + + if (shadow_c.file != BAD_FILE) { + fs_reg msg_shadow = msg_lod; + bld.MOV(msg_shadow, shadow_c); + msg_lod = offset(msg_shadow, bld, 1); + msg_end = msg_lod; + } + + switch (op) { + case SHADER_OPCODE_TXL: + case FS_OPCODE_TXB: + bld.MOV(msg_lod, lod); + msg_end = offset(msg_lod, bld, 1); + break; + case SHADER_OPCODE_TXD: + /** + * P = u, v, r + * dPdx = dudx, dvdx, drdx + * dPdy = dudy, dvdy, drdy + * + * Load up these values: + * - dudx dudy dvdx dvdy drdx drdy + * - dPdx.x dPdy.x dPdx.y dPdy.y dPdx.z dPdy.z + */ + msg_end = msg_lod; + for (unsigned i = 0; i < grad_components; i++) { + bld.MOV(msg_end, lod); + lod = offset(lod, bld, 1); + msg_end = offset(msg_end, bld, 1); + + bld.MOV(msg_end, lod2); + lod2 = offset(lod2, bld, 1); + msg_end = offset(msg_end, bld, 1); + } + break; + case SHADER_OPCODE_TXS: + msg_lod = retype(msg_end, BRW_REGISTER_TYPE_UD); + bld.MOV(msg_lod, lod); + msg_end = offset(msg_lod, bld, 1); + break; + case SHADER_OPCODE_TXF: + msg_lod = offset(msg_coords, bld, 3); + bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), lod); + msg_end = offset(msg_lod, bld, 1); + break; + case SHADER_OPCODE_TXF_CMS: + msg_lod = offset(msg_coords, bld, 3); + /* lod */ + bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u)); + /* sample index */ + bld.MOV(retype(offset(msg_lod, bld, 1), BRW_REGISTER_TYPE_UD), sample_index); + msg_end = offset(msg_lod, bld, 2); + break; + default: + break; + } + + inst->opcode = op; + inst->src[0] = reg_undef; + inst->src[1] = sampler; + inst->resize_sources(2); + inst->base_mrf = message.reg; + inst->mlen = msg_end.reg - message.reg; + inst->header_size = header_size; + + /* Message length > MAX_SAMPLER_MESSAGE_SIZE disallowed by hardware. */ + assert(inst->mlen <= MAX_SAMPLER_MESSAGE_SIZE); +} - fs_reg dst = inst->dst; - dst.type = BRW_REGISTER_TYPE_UW; - dst.subreg_offset = 2; - dst.stride = 2; +static bool +is_high_sampler(const struct brw_device_info *devinfo, const fs_reg &sampler) +{ + if (devinfo->gen < 8 && !devinfo->is_haswell) + return false; + + return sampler.file != IMM || sampler.fixed_hw_reg.dw1.ud >= 16; +} - high.type = BRW_REGISTER_TYPE_UW; - high.stride = 2; +static void +lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, + fs_reg coordinate, + const fs_reg &shadow_c, + fs_reg lod, fs_reg lod2, + const fs_reg &sample_index, + const fs_reg &mcs, const fs_reg &sampler, + fs_reg offset_value, + unsigned coord_components, + unsigned grad_components) +{ + const brw_device_info *devinfo = bld.shader->devinfo; + int reg_width = bld.dispatch_width() / 8; + unsigned header_size = 0, length = 0; + fs_reg sources[MAX_SAMPLER_MESSAGE_SIZE]; + for (unsigned i = 0; i < ARRAY_SIZE(sources); i++) + sources[i] = bld.vgrf(BRW_REGISTER_TYPE_F); + + if (op == SHADER_OPCODE_TG4 || op == SHADER_OPCODE_TG4_OFFSET || + offset_value.file != BAD_FILE || + is_high_sampler(devinfo, sampler)) { + /* For general texture offsets (no txf workaround), we need a header to + * put them in. Note that we're only reserving space for it in the + * message payload as it will be initialized implicitly by the + * generator. + * + * TG4 needs to place its channel select in the header, for interaction + * with ARB_texture_swizzle. The sampler index is only 4-bits, so for + * larger sampler numbers we need to offset the Sampler State Pointer in + * the header. + */ + header_size = 1; + sources[0] = fs_reg(); + length++; + } - low.type = BRW_REGISTER_TYPE_UW; - low.subreg_offset = 2; - low.stride = 2; + if (shadow_c.file != BAD_FILE) { + bld.MOV(sources[length], shadow_c); + length++; + } - ibld.ADD(dst, low, high); + bool coordinate_done = false; - if (inst->conditional_mod) { - fs_reg null(retype(ibld.null_reg_f(), inst->dst.type)); - set_condmod(inst->conditional_mod, - ibld.MOV(null, inst->dst)); + /* The sampler can only meaningfully compute LOD for fragment shader + * messages. For all other stages, we change the opcode to TXL and + * hardcode the LOD to 0. + */ + if (bld.shader->stage != MESA_SHADER_FRAGMENT && + op == SHADER_OPCODE_TEX) { + op = SHADER_OPCODE_TXL; + lod = fs_reg(0.0f); + } + + /* Set up the LOD info */ + switch (op) { + case FS_OPCODE_TXB: + case SHADER_OPCODE_TXL: + bld.MOV(sources[length], lod); + length++; + break; + case SHADER_OPCODE_TXD: + /* TXD should have been lowered in SIMD16 mode. */ + assert(bld.dispatch_width() == 8); + + /* Load dPdx and the coordinate together: + * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z + */ + for (unsigned i = 0; i < coord_components; i++) { + bld.MOV(sources[length], coordinate); + coordinate = offset(coordinate, bld, 1); + length++; + + /* For cube map array, the coordinate is (u,v,r,ai) but there are + * only derivatives for (u, v, r). + */ + if (i < grad_components) { + bld.MOV(sources[length], lod); + lod = offset(lod, bld, 1); + length++; + + bld.MOV(sources[length], lod2); + lod2 = offset(lod2, bld, 1); + length++; } } - inst->remove(block); + coordinate_done = true; + break; + case SHADER_OPCODE_TXS: + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod); + length++; + break; + case SHADER_OPCODE_TXF: + /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. + * On Gen9 they are u, v, lod, r + */ + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate); + coordinate = offset(coordinate, bld, 1); + length++; + + if (devinfo->gen >= 9) { + if (coord_components >= 2) { + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate); + coordinate = offset(coordinate, bld, 1); + } + length++; + } + + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod); + length++; + + for (unsigned i = devinfo->gen >= 9 ? 2 : 1; i < coord_components; i++) { + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate); + coordinate = offset(coordinate, bld, 1); + length++; + } + + coordinate_done = true; + break; + case SHADER_OPCODE_TXF_CMS: + case SHADER_OPCODE_TXF_UMS: + case SHADER_OPCODE_TXF_MCS: + if (op == SHADER_OPCODE_TXF_UMS || op == SHADER_OPCODE_TXF_CMS) { + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index); + length++; + } + + if (op == SHADER_OPCODE_TXF_CMS) { + /* Data from the multisample control surface. */ + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs); + length++; + } + + /* There is no offsetting for this message; just copy in the integer + * texture coordinates. + */ + for (unsigned i = 0; i < coord_components; i++) { + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate); + coordinate = offset(coordinate, bld, 1); + length++; + } + + coordinate_done = true; + break; + case SHADER_OPCODE_TG4_OFFSET: + /* gather4_po_c should have been lowered in SIMD16 mode. */ + assert(bld.dispatch_width() == 8 || shadow_c.file == BAD_FILE); + + /* More crazy intermixing */ + for (unsigned i = 0; i < 2; i++) { /* u, v */ + bld.MOV(sources[length], coordinate); + coordinate = offset(coordinate, bld, 1); + length++; + } + + for (unsigned i = 0; i < 2; i++) { /* offu, offv */ + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), offset_value); + offset_value = offset(offset_value, bld, 1); + length++; + } + + if (coord_components == 3) { /* r if present */ + bld.MOV(sources[length], coordinate); + coordinate = offset(coordinate, bld, 1); + length++; + } + + coordinate_done = true; + break; + default: + break; + } + + /* Set up the coordinate (except for cases where it was done above) */ + if (!coordinate_done) { + for (unsigned i = 0; i < coord_components; i++) { + bld.MOV(sources[length], coordinate); + coordinate = offset(coordinate, bld, 1); + length++; + } + } + + int mlen; + if (reg_width == 2) + mlen = length * reg_width - header_size; + else + mlen = length * reg_width; + + const fs_reg src_payload = fs_reg(GRF, bld.shader->alloc.allocate(mlen), + BRW_REGISTER_TYPE_F); + bld.LOAD_PAYLOAD(src_payload, sources, length, header_size); + + /* Generate the SEND. */ + inst->opcode = op; + inst->src[0] = src_payload; + inst->src[1] = sampler; + inst->resize_sources(2); + inst->base_mrf = -1; + inst->mlen = mlen; + inst->header_size = header_size; + + /* Message length > MAX_SAMPLER_MESSAGE_SIZE disallowed by hardware. */ + assert(inst->mlen <= MAX_SAMPLER_MESSAGE_SIZE); +} + +static void +lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op) +{ + const brw_device_info *devinfo = bld.shader->devinfo; + const fs_reg &coordinate = inst->src[0]; + const fs_reg &shadow_c = inst->src[1]; + const fs_reg &lod = inst->src[2]; + const fs_reg &lod2 = inst->src[3]; + const fs_reg &sample_index = inst->src[4]; + const fs_reg &mcs = inst->src[5]; + const fs_reg &sampler = inst->src[6]; + const fs_reg &offset_value = inst->src[7]; + assert(inst->src[8].file == IMM && inst->src[9].file == IMM); + const unsigned coord_components = inst->src[8].fixed_hw_reg.dw1.ud; + const unsigned grad_components = inst->src[9].fixed_hw_reg.dw1.ud; + + if (devinfo->gen >= 7) { + lower_sampler_logical_send_gen7(bld, inst, op, coordinate, + shadow_c, lod, lod2, sample_index, + mcs, sampler, offset_value, + coord_components, grad_components); + } else if (devinfo->gen >= 5) { + lower_sampler_logical_send_gen5(bld, inst, op, coordinate, + shadow_c, lod, lod2, sample_index, + sampler, offset_value, + coord_components, grad_components); + } else { + lower_sampler_logical_send_gen4(bld, inst, op, coordinate, + shadow_c, lod, lod2, sampler, + coord_components, grad_components); + } +} + +/** + * Initialize the header present in some typed and untyped surface + * messages. + */ +static fs_reg +emit_surface_header(const fs_builder &bld, const fs_reg &sample_mask) +{ + fs_builder ubld = bld.exec_all().group(8, 0); + const fs_reg dst = ubld.vgrf(BRW_REGISTER_TYPE_UD); + ubld.MOV(dst, fs_reg(0)); + ubld.MOV(component(dst, 7), sample_mask); + return dst; +} + +static void +lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op, + const fs_reg &sample_mask) +{ + /* Get the logical send arguments. */ + const fs_reg &addr = inst->src[0]; + const fs_reg &src = inst->src[1]; + const fs_reg &surface = inst->src[2]; + const UNUSED fs_reg &dims = inst->src[3]; + const fs_reg &arg = inst->src[4]; + + /* Calculate the total number of components of the payload. */ + const unsigned addr_sz = inst->components_read(0); + const unsigned src_sz = inst->components_read(1); + const unsigned header_sz = (sample_mask.file == BAD_FILE ? 0 : 1); + const unsigned sz = header_sz + addr_sz + src_sz; + + /* Allocate space for the payload. */ + fs_reg *const components = new fs_reg[sz]; + const fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz); + unsigned n = 0; + + /* Construct the payload. */ + if (header_sz) + components[n++] = emit_surface_header(bld, sample_mask); + + for (unsigned i = 0; i < addr_sz; i++) + components[n++] = offset(addr, bld, i); + + for (unsigned i = 0; i < src_sz; i++) + components[n++] = offset(src, bld, i); + + bld.LOAD_PAYLOAD(payload, components, sz, header_sz); + + /* Update the original instruction. */ + inst->opcode = op; + inst->mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8; + inst->header_size = header_sz; + + inst->src[0] = payload; + inst->src[1] = surface; + inst->src[2] = arg; + inst->resize_sources(3); + + delete[] components; +} + +bool +fs_visitor::lower_logical_sends() +{ + bool progress = false; + + foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { + const fs_builder ibld(this, block, inst); + + switch (inst->opcode) { + case FS_OPCODE_FB_WRITE_LOGICAL: + assert(stage == MESA_SHADER_FRAGMENT); + lower_fb_write_logical_send(ibld, inst, + (const brw_wm_prog_data *)prog_data, + (const brw_wm_prog_key *)key, + payload); + break; + + case SHADER_OPCODE_TEX_LOGICAL: + lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TEX); + break; + + case SHADER_OPCODE_TXD_LOGICAL: + lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXD); + break; + + case SHADER_OPCODE_TXF_LOGICAL: + lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF); + break; + + case SHADER_OPCODE_TXL_LOGICAL: + lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXL); + break; + + case SHADER_OPCODE_TXS_LOGICAL: + lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXS); + break; + + case FS_OPCODE_TXB_LOGICAL: + lower_sampler_logical_send(ibld, inst, FS_OPCODE_TXB); + break; + + case SHADER_OPCODE_TXF_CMS_LOGICAL: + lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_CMS); + break; + + case SHADER_OPCODE_TXF_UMS_LOGICAL: + lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_UMS); + break; + + case SHADER_OPCODE_TXF_MCS_LOGICAL: + lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_MCS); + break; + + case SHADER_OPCODE_LOD_LOGICAL: + lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_LOD); + break; + + case SHADER_OPCODE_TG4_LOGICAL: + lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TG4); + break; + + case SHADER_OPCODE_TG4_OFFSET_LOGICAL: + lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TG4_OFFSET); + break; + + case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: + lower_surface_logical_send(ibld, inst, + SHADER_OPCODE_UNTYPED_SURFACE_READ, + fs_reg(0xffff)); + break; + + case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: + lower_surface_logical_send(ibld, inst, + SHADER_OPCODE_UNTYPED_SURFACE_WRITE, + ibld.sample_mask_reg()); + break; + + case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: + lower_surface_logical_send(ibld, inst, + SHADER_OPCODE_UNTYPED_ATOMIC, + ibld.sample_mask_reg()); + break; + + case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: + lower_surface_logical_send(ibld, inst, + SHADER_OPCODE_TYPED_SURFACE_READ, + fs_reg(0xffff)); + break; + + case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: + lower_surface_logical_send(ibld, inst, + SHADER_OPCODE_TYPED_SURFACE_WRITE, + ibld.sample_mask_reg()); + break; + + case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: + lower_surface_logical_send(ibld, inst, + SHADER_OPCODE_TYPED_ATOMIC, + ibld.sample_mask_reg()); + break; + + default: + continue; + } + progress = true; } @@ -3239,6 +4144,265 @@ fs_visitor::lower_integer_multiplication() return progress; } +/** + * Get the closest native SIMD width supported by the hardware for instruction + * \p inst. The instruction will be left untouched by + * fs_visitor::lower_simd_width() if the returned value is equal to the + * original execution size. + */ +static unsigned +get_lowered_simd_width(const struct brw_device_info *devinfo, + const fs_inst *inst) +{ + switch (inst->opcode) { + case BRW_OPCODE_MOV: + case BRW_OPCODE_SEL: + case BRW_OPCODE_NOT: + case BRW_OPCODE_AND: + case BRW_OPCODE_OR: + case BRW_OPCODE_XOR: + case BRW_OPCODE_SHR: + case BRW_OPCODE_SHL: + case BRW_OPCODE_ASR: + case BRW_OPCODE_CMP: + case BRW_OPCODE_CMPN: + case BRW_OPCODE_CSEL: + case BRW_OPCODE_F32TO16: + case BRW_OPCODE_F16TO32: + case BRW_OPCODE_BFREV: + case BRW_OPCODE_BFE: + case BRW_OPCODE_BFI1: + case BRW_OPCODE_BFI2: + case BRW_OPCODE_ADD: + case BRW_OPCODE_MUL: + case BRW_OPCODE_AVG: + case BRW_OPCODE_FRC: + case BRW_OPCODE_RNDU: + case BRW_OPCODE_RNDD: + case BRW_OPCODE_RNDE: + case BRW_OPCODE_RNDZ: + case BRW_OPCODE_LZD: + case BRW_OPCODE_FBH: + case BRW_OPCODE_FBL: + case BRW_OPCODE_CBIT: + case BRW_OPCODE_SAD2: + case BRW_OPCODE_MAD: + case BRW_OPCODE_LRP: + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_POW: + case SHADER_OPCODE_INT_QUOTIENT: + case SHADER_OPCODE_INT_REMAINDER: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: { + /* According to the PRMs: + * "A. In Direct Addressing mode, a source cannot span more than 2 + * adjacent GRF registers. + * B. A destination cannot span more than 2 adjacent GRF registers." + * + * Look for the source or destination with the largest register region + * which is the one that is going to limit the overal execution size of + * the instruction due to this rule. + */ + unsigned reg_count = inst->regs_written; + + for (unsigned i = 0; i < inst->sources; i++) + reg_count = MAX2(reg_count, (unsigned)inst->regs_read(i)); + + /* Calculate the maximum execution size of the instruction based on the + * factor by which it goes over the hardware limit of 2 GRFs. + */ + return inst->exec_size / DIV_ROUND_UP(reg_count, 2); + } + case SHADER_OPCODE_MULH: + /* MULH is lowered to the MUL/MACH sequence using the accumulator, which + * is 8-wide on Gen7+. + */ + return (devinfo->gen >= 7 ? 8 : inst->exec_size); + + case FS_OPCODE_FB_WRITE_LOGICAL: + /* Gen6 doesn't support SIMD16 depth writes but we cannot handle them + * here. + */ + assert(devinfo->gen != 6 || inst->src[3].file == BAD_FILE || + inst->exec_size == 8); + /* Dual-source FB writes are unsupported in SIMD16 mode. */ + return (inst->src[1].file != BAD_FILE ? 8 : inst->exec_size); + + case SHADER_OPCODE_TXD_LOGICAL: + /* TXD is unsupported in SIMD16 mode. */ + return 8; + + case SHADER_OPCODE_TG4_OFFSET_LOGICAL: { + /* gather4_po_c is unsupported in SIMD16 mode. */ + const fs_reg &shadow_c = inst->src[1]; + return (shadow_c.file != BAD_FILE ? 8 : inst->exec_size); + } + case SHADER_OPCODE_TXL_LOGICAL: + case FS_OPCODE_TXB_LOGICAL: { + /* Gen4 doesn't have SIMD8 non-shadow-compare bias/LOD instructions, and + * Gen4-6 can't support TXL and TXB with shadow comparison in SIMD16 + * mode because the message exceeds the maximum length of 11. + */ + const fs_reg &shadow_c = inst->src[1]; + if (devinfo->gen == 4 && shadow_c.file == BAD_FILE) + return 16; + else if (devinfo->gen < 7 && shadow_c.file != BAD_FILE) + return 8; + else + return inst->exec_size; + } + case SHADER_OPCODE_TXF_LOGICAL: + case SHADER_OPCODE_TXS_LOGICAL: + /* Gen4 doesn't have SIMD8 variants for the RESINFO and LD-with-LOD + * messages. Use SIMD16 instead. + */ + if (devinfo->gen == 4) + return 16; + else + return inst->exec_size; + + case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: + case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: + case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: + return 8; + + default: + return inst->exec_size; + } +} + +/** + * The \p rows array of registers represents a \p num_rows by \p num_columns + * matrix in row-major order, write it in column-major order into the register + * passed as destination. \p stride gives the separation between matrix + * elements in the input in fs_builder::dispatch_width() units. + */ +static void +emit_transpose(const fs_builder &bld, + const fs_reg &dst, const fs_reg *rows, + unsigned num_rows, unsigned num_columns, unsigned stride) +{ + fs_reg *const components = new fs_reg[num_rows * num_columns]; + + for (unsigned i = 0; i < num_columns; ++i) { + for (unsigned j = 0; j < num_rows; ++j) + components[num_rows * i + j] = offset(rows[j], bld, stride * i); + } + + bld.LOAD_PAYLOAD(dst, components, num_rows * num_columns, 0); + + delete[] components; +} + +bool +fs_visitor::lower_simd_width() +{ + bool progress = false; + + foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { + const unsigned lower_width = get_lowered_simd_width(devinfo, inst); + + if (lower_width != inst->exec_size) { + /* Builder matching the original instruction. We may also need to + * emit an instruction of width larger than the original, set the + * execution size of the builder to the highest of both for now so + * we're sure that both cases can be handled. + */ + const fs_builder ibld = bld.at(block, inst) + .exec_all(inst->force_writemask_all) + .group(MAX2(inst->exec_size, lower_width), + inst->force_sechalf); + + /* Split the copies in chunks of the execution width of either the + * original or the lowered instruction, whichever is lower. + */ + const unsigned copy_width = MIN2(lower_width, inst->exec_size); + const unsigned n = inst->exec_size / copy_width; + const unsigned dst_size = inst->regs_written * REG_SIZE / + inst->dst.component_size(inst->exec_size); + fs_reg dsts[4]; + + assert(n > 0 && n <= ARRAY_SIZE(dsts) && + !inst->writes_accumulator && !inst->mlen); + + for (unsigned i = 0; i < n; i++) { + /* Emit a copy of the original instruction with the lowered width. + * If the EOT flag was set throw it away except for the last + * instruction to avoid killing the thread prematurely. + */ + fs_inst split_inst = *inst; + split_inst.exec_size = lower_width; + split_inst.eot = inst->eot && i == n - 1; + + /* Select the correct channel enables for the i-th group, then + * transform the sources and destination and emit the lowered + * instruction. + */ + const fs_builder lbld = ibld.group(lower_width, i); + + for (unsigned j = 0; j < inst->sources; j++) { + if (inst->src[j].file != BAD_FILE && + !is_uniform(inst->src[j])) { + /* Get the i-th copy_width-wide chunk of the source. */ + const fs_reg src = horiz_offset(inst->src[j], copy_width * i); + const unsigned src_size = inst->components_read(j); + + /* Use a trivial transposition to copy one every n + * copy_width-wide components of the register into a + * temporary passed as source to the lowered instruction. + */ + split_inst.src[j] = lbld.vgrf(inst->src[j].type, src_size); + emit_transpose(lbld.group(copy_width, 0), + split_inst.src[j], &src, 1, src_size, n); + } + } + + if (inst->regs_written) { + /* Allocate enough space to hold the result of the lowered + * instruction and fix up the number of registers written. + */ + split_inst.dst = dsts[i] = + lbld.vgrf(inst->dst.type, dst_size); + split_inst.regs_written = + DIV_ROUND_UP(inst->regs_written * lower_width, + inst->exec_size); + } + + lbld.emit(split_inst); + } + + if (inst->regs_written) { + /* Distance between useful channels in the temporaries, skipping + * garbage if the lowered instruction is wider than the original. + */ + const unsigned m = lower_width / copy_width; + + /* Interleave the components of the result from the lowered + * instructions. We need to set exec_all() when copying more than + * one half per component, because LOAD_PAYLOAD (in terms of which + * emit_transpose is implemented) can only use the same channel + * enable signals for all of its non-header sources. + */ + emit_transpose(ibld.exec_all(inst->exec_size > copy_width) + .group(copy_width, 0), + inst->dst, dsts, n, dst_size, m); + } + + inst->remove(block); + progress = true; + } + } + + if (progress) + invalidate_live_intervals(); + + return progress; +} + void fs_visitor::dump_instructions() { @@ -3316,9 +4480,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) switch (inst->dst.file) { case GRF: fprintf(file, "vgrf%d", inst->dst.reg); - if (inst->dst.width != dispatch_width) - fprintf(file, "@%d", inst->dst.width); - if (alloc.sizes[inst->dst.reg] != inst->dst.width / 8 || + if (alloc.sizes[inst->dst.reg] != inst->regs_written || inst->dst.subreg_offset) fprintf(file, "+%d.%d", inst->dst.reg_offset, inst->dst.subreg_offset); @@ -3376,9 +4538,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) switch (inst->src[i].file) { case GRF: fprintf(file, "vgrf%d", inst->src[i].reg); - if (inst->src[i].width != dispatch_width) - fprintf(file, "@%d", inst->src[i].width); - if (alloc.sizes[inst->src[i].reg] != inst->src[i].width / 8 || + if (alloc.sizes[inst->src[i].reg] != (unsigned)inst->regs_read(i) || inst->src[i].subreg_offset) fprintf(file, "+%d.%d", inst->src[i].reg_offset, inst->src[i].subreg_offset); @@ -3655,9 +4815,11 @@ fs_visitor::optimize() * Ideally optimization passes wouldn't be part of the visitor so they * wouldn't have access to bld at all, but they do, so just in case some * pass forgets to ask for a location explicitly set it to NULL here to - * make it trip. + * make it trip. The dispatch width is initialized to a bogus value to + * make sure that optimizations set the execution controls explicitly to + * match the code they are manipulating instead of relying on the defaults. */ - bld = bld.at(NULL, NULL); + bld = fs_builder(this, 64); split_virtual_grfs(); @@ -3690,9 +4852,13 @@ fs_visitor::optimize() backend_shader::dump_instructions(filename); } - bool progress; + bool progress = false; int iteration = 0; int pass_num = 0; + + OPT(lower_simd_width); + OPT(lower_logical_sends); + do { progress = false; pass_num = 0; @@ -3837,7 +5003,9 @@ fs_visitor::run_vs(gl_clip_plane *clip_planes) if (failed) return false; - emit_urb_writes(clip_planes); + compute_clip_distance(clip_planes); + + emit_urb_writes(); if (shader_time_index >= 0) emit_shader_time_end(); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 243baf688de..975183e990d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -62,6 +62,27 @@ namespace brw { class fs_live_variables; } +static inline fs_reg +offset(fs_reg reg, const brw::fs_builder& bld, unsigned delta) +{ + switch (reg.file) { + case BAD_FILE: + break; + case GRF: + case MRF: + case HW_REG: + case ATTR: + return byte_offset(reg, + delta * reg.component_size(bld.dispatch_width())); + case UNIFORM: + reg.reg_offset += delta; + break; + case IMM: + assert(delta == 0); + } + return reg; +} + /** * The fragment shader front-end. * @@ -161,7 +182,9 @@ public: void no16(const char *msg); void lower_uniform_pull_constant_loads(); bool lower_load_payload(); + bool lower_logical_sends(); bool lower_integer_multiplication(); + bool lower_simd_width(); bool opt_combine_constants(); void emit_dummy_fs(); @@ -185,27 +208,6 @@ public: void compute_sample_position(fs_reg dst, fs_reg int_sample_pos); fs_reg rescale_texcoord(fs_reg coordinate, int coord_components, bool is_rect, uint32_t sampler, int texunit); - fs_inst *emit_texture_gen4(ir_texture_opcode op, fs_reg dst, - fs_reg coordinate, int coord_components, - fs_reg shadow_comp, - fs_reg lod, fs_reg lod2, int grad_components, - uint32_t sampler); - fs_inst *emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst, - fs_reg coordinate, int vector_elements, - fs_reg shadow_c, fs_reg lod, - uint32_t sampler); - fs_inst *emit_texture_gen5(ir_texture_opcode op, fs_reg dst, - fs_reg coordinate, int coord_components, - fs_reg shadow_comp, - fs_reg lod, fs_reg lod2, int grad_components, - fs_reg sample_index, uint32_t sampler, - bool has_offset); - fs_inst *emit_texture_gen7(ir_texture_opcode op, fs_reg dst, - fs_reg coordinate, int coord_components, - fs_reg shadow_comp, - fs_reg lod, fs_reg lod2, int grad_components, - fs_reg sample_index, fs_reg mcs, fs_reg sampler, - fs_reg offset_value); void emit_texture(ir_texture_opcode op, const glsl_type *dest_type, fs_reg coordinate, int components, @@ -220,9 +222,10 @@ public: uint32_t sampler, fs_reg sampler_reg, int texunit); - fs_reg emit_mcs_fetch(fs_reg coordinate, int components, fs_reg sampler); + fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components, + const fs_reg &sampler); void emit_gen6_gather_wa(uint8_t wa, fs_reg dst); - void resolve_source_modifiers(fs_reg *src); + fs_reg resolve_source_modifiers(const fs_reg &src); void emit_discard_jump(); bool try_replace_with_sel(); bool opt_peephole_sel(); @@ -249,6 +252,10 @@ public: void nir_emit_block(nir_block *block); void nir_emit_instr(nir_instr *instr); void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr); + void nir_emit_load_const(const brw::fs_builder &bld, + nir_load_const_instr *instr); + void nir_emit_undef(const brw::fs_builder &bld, + nir_ssa_undef_instr *instr); void nir_emit_intrinsic(const brw::fs_builder &bld, nir_intrinsic_instr *instr); void nir_emit_texture(const brw::fs_builder &bld, @@ -257,21 +264,19 @@ public: nir_jump_instr *instr); fs_reg get_nir_src(nir_src src); fs_reg get_nir_dest(nir_dest dest); + fs_reg get_nir_image_deref(const nir_deref_var *deref); void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst, unsigned wr_mask); bool optimize_frontfacing_ternary(nir_alu_instr *instr, const fs_reg &result); - void setup_color_payload(fs_reg *dst, fs_reg color, unsigned components, - unsigned exec_size, bool use_2nd_half); void emit_alpha_test(); fs_inst *emit_single_fb_write(const brw::fs_builder &bld, fs_reg color1, fs_reg color2, - fs_reg src0_alpha, unsigned components, - unsigned exec_size, bool use_2nd_half = false); + fs_reg src0_alpha, unsigned components); void emit_fb_writes(); - void emit_urb_writes(gl_clip_plane *clip_planes); + void emit_urb_writes(); void emit_cs_terminate(); void emit_barrier(); @@ -282,16 +287,13 @@ public: int shader_time_subindex, fs_reg value); - void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, - fs_reg dst, fs_reg offset, fs_reg src0, - fs_reg src1); - - void emit_untyped_surface_read(unsigned surf_index, fs_reg dst, - fs_reg offset); - fs_reg get_timestamp(const brw::fs_builder &bld); struct brw_reg interp_reg(int location, int channel); + + virtual void setup_vector_uniform_values(const gl_constant_value *values, + unsigned n); + int implied_mrf_writes(fs_inst *inst); virtual void dump_instructions(); @@ -345,7 +347,7 @@ public: unsigned max_grf; fs_reg *nir_locals; - fs_reg *nir_globals; + fs_reg *nir_ssa_values; fs_reg nir_inputs; fs_reg nir_outputs; fs_reg *nir_system_values; @@ -359,7 +361,7 @@ public: fs_reg result; /** Register numbers for thread payload fields. */ - struct { + struct thread_payload { uint8_t source_depth_reg; uint8_t source_w_reg; uint8_t aa_dest_stencil_reg; @@ -468,10 +470,6 @@ private: struct brw_reg msg_data, unsigned msg_type); - void generate_set_omask(fs_inst *inst, - struct brw_reg dst, - struct brw_reg sample_mask); - void generate_set_sample_id(fs_inst *inst, struct brw_reg dst, struct brw_reg src0, diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h index 58ac5980da5..34545eaa0fb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_builder.h +++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h @@ -64,6 +64,22 @@ namespace brw { } /** + * Construct an fs_builder that inserts instructions into \p shader + * before instruction \p inst in basic block \p block. The default + * execution controls and debug annotation are initialized from the + * instruction passed as argument. + */ + fs_builder(backend_shader *shader, bblock_t *block, fs_inst *inst) : + shader(shader), block(block), cursor(inst), + _dispatch_width(inst->exec_size), + _group(inst->force_sechalf ? 8 : 0), + force_writemask_all(inst->force_writemask_all) + { + annotation.str = inst->annotation; + annotation.ir = inst->ir; + } + + /** * Construct an fs_builder that inserts instructions before \p cursor in * basic block \p block, inheriting other code generation parameters * from this. @@ -99,8 +115,8 @@ namespace brw { fs_builder group(unsigned n, unsigned i) const { - assert(n <= dispatch_width() && - i < dispatch_width() / n); + assert(force_writemask_all || + (n <= dispatch_width() && i < dispatch_width() / n)); fs_builder bld = *this; bld._dispatch_width = n; bld._group += i * n; @@ -160,10 +176,15 @@ namespace brw { dst_reg vgrf(enum brw_reg_type type, unsigned n = 1) const { - return dst_reg(GRF, shader->alloc.allocate( - DIV_ROUND_UP(n * type_sz(type) * dispatch_width(), - REG_SIZE)), - type, dispatch_width()); + assert(dispatch_width() <= 32); + + if (n > 0) + return dst_reg(GRF, shader->alloc.allocate( + DIV_ROUND_UP(n * type_sz(type) * dispatch_width(), + REG_SIZE)), + type); + else + return retype(null_reg_ud(), type); } /** @@ -235,7 +256,7 @@ namespace brw { instruction * emit(enum opcode opcode, const dst_reg &dst) const { - return emit(instruction(opcode, dst)); + return emit(instruction(opcode, dispatch_width(), dst)); } /** @@ -253,11 +274,11 @@ namespace brw { case SHADER_OPCODE_SIN: case SHADER_OPCODE_COS: return fix_math_instruction( - emit(instruction(opcode, dst.width, dst, + emit(instruction(opcode, dispatch_width(), dst, fix_math_operand(src0)))); default: - return emit(instruction(opcode, dst.width, dst, src0)); + return emit(instruction(opcode, dispatch_width(), dst, src0)); } } @@ -273,12 +294,12 @@ namespace brw { case SHADER_OPCODE_INT_QUOTIENT: case SHADER_OPCODE_INT_REMAINDER: return fix_math_instruction( - emit(instruction(opcode, dst.width, dst, + emit(instruction(opcode, dispatch_width(), dst, fix_math_operand(src0), fix_math_operand(src1)))); default: - return emit(instruction(opcode, dst.width, dst, src0, src1)); + return emit(instruction(opcode, dispatch_width(), dst, src0, src1)); } } @@ -295,22 +316,35 @@ namespace brw { case BRW_OPCODE_BFI2: case BRW_OPCODE_MAD: case BRW_OPCODE_LRP: - return emit(instruction(opcode, dst.width, dst, + return emit(instruction(opcode, dispatch_width(), dst, fix_3src_operand(src0), fix_3src_operand(src1), fix_3src_operand(src2))); default: - return emit(instruction(opcode, dst.width, dst, src0, src1, src2)); + return emit(instruction(opcode, dispatch_width(), dst, + src0, src1, src2)); } } /** + * Create and insert an instruction with a variable number of sources + * into the program. + */ + instruction * + emit(enum opcode opcode, const dst_reg &dst, const src_reg srcs[], + unsigned n) const + { + return emit(instruction(opcode, dispatch_width(), dst, srcs, n)); + } + + /** * Insert a preallocated instruction into the program. */ instruction * emit(instruction *inst) const { + assert(inst->exec_size <= 32); assert(inst->exec_size == dispatch_width() || force_writemask_all); assert(_group == 0 || _group == 8); @@ -349,17 +383,19 @@ namespace brw { } /** - * Copy any live channel from \p src to the first channel of \p dst. + * Copy any live channel from \p src to the first channel of the result. */ - void - emit_uniformize(const dst_reg &dst, const src_reg &src) const + src_reg + emit_uniformize(const src_reg &src) const { const fs_builder ubld = exec_all(); - const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD); + const dst_reg chan_index = component(vgrf(BRW_REGISTER_TYPE_UD), 0); + const dst_reg dst = component(vgrf(src.type), 0); + + ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index); + ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index); - ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, component(chan_index, 0)); - ubld.emit(SHADER_OPCODE_BROADCAST, component(dst, 0), - src, component(chan_index, 0)); + return src_reg(dst); } /** @@ -515,20 +551,10 @@ namespace brw { LOAD_PAYLOAD(const dst_reg &dst, const src_reg *src, unsigned sources, unsigned header_size) const { - assert(dst.width % 8 == 0); - instruction *inst = emit(instruction(SHADER_OPCODE_LOAD_PAYLOAD, - dst.width, dst, src, sources)); + instruction *inst = emit(SHADER_OPCODE_LOAD_PAYLOAD, dst, src, sources); inst->header_size = header_size; - - for (unsigned i = 0; i < header_size; i++) - assert(src[i].file != GRF || - src[i].width * type_sz(src[i].type) == 32); - inst->regs_written = header_size; - - for (unsigned i = header_size; i < sources; ++i) - assert(src[i].file != GRF || - src[i].width == dst.width); - inst->regs_written += (sources - header_size) * (dst.width / 8); + inst->regs_written = header_size + + (sources - header_size) * (dispatch_width() / 8); return inst; } @@ -626,8 +652,8 @@ namespace brw { inst->resize_sources(1); inst->src[0] = src0; - at(block, inst).MOV(fs_reg(MRF, inst->base_mrf + 1, src1.type, - dispatch_width()), src1); + at(block, inst).MOV(fs_reg(MRF, inst->base_mrf + 1, src1.type), + src1); } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index d0f61222e5a..a8883a35ef2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -243,6 +243,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) case ir_unop_find_msb: case ir_unop_find_lsb: case ir_unop_saturate: + case ir_unop_subroutine_to_int: for (i = 0; i < vector_elements; i++) { ir_rvalue *op0 = get_element(op_var[0], i); diff --git a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp index 0af5a915c9f..c182232285e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp @@ -277,7 +277,7 @@ fs_visitor::opt_combine_constants() */ exec_node *n = (imm->inst ? imm->inst : imm->block->last_non_control_flow_inst()->next); - const fs_builder ibld = bld.at(imm->block, n).exec_all(); + const fs_builder ibld = bld.at(imm->block, n).exec_all().group(1, 0); ibld.MOV(reg, fs_reg(imm->val)); imm->reg = reg.reg; diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index c92aae4b1d6..5445ad55670 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -339,6 +339,14 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) if (entry->src.stride * inst->src[arg].stride > 4) return false; + /* Bail if the instruction type is larger than the execution type of the + * copy, what implies that each channel is reading multiple channels of the + * destination of the copy, and simply replacing the sources would give a + * program with different semantics. + */ + if (type_sz(entry->dst.type) < type_sz(inst->src[arg].type)) + return false; + /* Bail if the result of composing both strides cannot be expressed * as another stride. This avoids, for example, trying to transform * this: @@ -388,17 +396,14 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) switch (entry->src.file) { case UNIFORM: - assert(entry->src.width == 1); case BAD_FILE: case HW_REG: - inst->src[arg].width = entry->src.width; inst->src[arg].reg_offset = entry->src.reg_offset; inst->src[arg].subreg_offset = entry->src.subreg_offset; break; case ATTR: case GRF: { - assert(entry->src.width % inst->src[arg].width == 0); /* In this case, we'll just leave the width alone. The source * register could have different widths depending on how it is * being used. For instance, if only half of the register was @@ -529,6 +534,7 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) case BRW_OPCODE_MACH: case BRW_OPCODE_MUL: + case SHADER_OPCODE_MULH: case BRW_OPCODE_ADD: case BRW_OPCODE_OR: case BRW_OPCODE_AND: @@ -715,7 +721,6 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block, acp_entry *entry = ralloc(copy_prop_ctx, acp_entry); entry->dst = inst->dst; entry->dst.reg_offset = offset; - entry->dst.width = effective_width; entry->src = inst->src[i]; entry->regs_written = regs_written; entry->opcode = inst->opcode; diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 70f0217b93d..c7628dcc2f4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -61,6 +61,7 @@ is_expression(const fs_visitor *v, const fs_inst *const inst) case BRW_OPCODE_CMPN: case BRW_OPCODE_ADD: case BRW_OPCODE_MUL: + case SHADER_OPCODE_MULH: case BRW_OPCODE_FRC: case BRW_OPCODE_RNDU: case BRW_OPCODE_RNDD: @@ -179,9 +180,7 @@ static void create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate) { int written = inst->regs_written; - int dst_width = inst->dst.width / 8; - const fs_builder ubld = bld.group(inst->exec_size, inst->force_sechalf) - .exec_all(inst->force_writemask_all); + int dst_width = inst->exec_size / 8; fs_inst *copy; if (written > dst_width) { @@ -200,16 +199,15 @@ create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate) payload = ralloc_array(bld.shader->mem_ctx, fs_reg, sources); for (int i = 0; i < header_size; i++) { payload[i] = src; - payload[i].width = 8; src.reg_offset++; } for (int i = header_size; i < sources; i++) { payload[i] = src; - src = offset(src, 1); + src = offset(src, bld, 1); } - copy = ubld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size); + copy = bld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size); } else { - copy = ubld.MOV(inst->dst, src); + copy = bld.MOV(inst->dst, src); copy->src[0].negate = negate; } assert(copy->regs_written == written); @@ -259,15 +257,14 @@ fs_visitor::opt_cse_local(bblock_t *block) */ bool no_existing_temp = entry->tmp.file == BAD_FILE; if (no_existing_temp && !entry->generator->dst.is_null()) { + const fs_builder ibld = fs_builder(this, block, entry->generator) + .at(block, entry->generator->next); int written = entry->generator->regs_written; - assert((written * 8) % entry->generator->dst.width == 0); entry->tmp = fs_reg(GRF, alloc.allocate(written), - entry->generator->dst.type, - entry->generator->dst.width); + entry->generator->dst.type); - create_copy_instr(bld.at(block, entry->generator->next), - entry->generator, entry->tmp, false); + create_copy_instr(ibld, entry->generator, entry->tmp, false); entry->generator->dst = entry->tmp; } @@ -275,10 +272,10 @@ fs_visitor::opt_cse_local(bblock_t *block) /* dest <- temp */ if (!inst->dst.is_null()) { assert(inst->regs_written == entry->generator->regs_written); - assert(inst->dst.width == entry->generator->dst.width); assert(inst->dst.type == entry->tmp.type); + const fs_builder ibld(this, block, inst); - create_copy_instr(bld.at(block, inst), inst, entry->tmp, negate); + create_copy_instr(ibld, inst, entry->tmp, negate); } /* Set our iterator so that next time through the loop inst->next diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 2ed0bac6fd9..c86ca043b63 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -48,7 +48,7 @@ static uint32_t brw_file_from_reg(fs_reg *reg) } static struct brw_reg -brw_reg_from_fs_reg(fs_reg *reg) +brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg) { struct brw_reg brw_reg; @@ -57,10 +57,10 @@ brw_reg_from_fs_reg(fs_reg *reg) case MRF: if (reg->stride == 0) { brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg, 0); - } else if (reg->width < 8) { + } else if (inst->exec_size < 8) { brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0); - brw_reg = stride(brw_reg, reg->width * reg->stride, - reg->width, reg->stride); + brw_reg = stride(brw_reg, inst->exec_size * reg->stride, + inst->exec_size, reg->stride); } else { /* From the Haswell PRM: * @@ -79,6 +79,10 @@ brw_reg_from_fs_reg(fs_reg *reg) brw_reg = byte_offset(brw_reg, reg->subreg_offset); break; case IMM: + assert(reg->stride == ((reg->type == BRW_REGISTER_TYPE_V || + reg->type == BRW_REGISTER_TYPE_UV || + reg->type == BRW_REGISTER_TYPE_VF) ? 1 : 0)); + switch (reg->type) { case BRW_REGISTER_TYPE_F: brw_reg = brw_imm_f(reg->fixed_hw_reg.dw1.f); @@ -217,11 +221,11 @@ fs_generator::fire_fb_write(fs_inst *inst, if (inst->opcode == FS_OPCODE_REP_FB_WRITE) msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED; else if (prog_data->dual_src_blend) { - if (dispatch_width == 8 || !inst->eot) + if (!inst->force_sechalf) msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01; else msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23; - } else if (dispatch_width == 16) + } else if (inst->exec_size == 16) msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; else msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; @@ -414,7 +418,7 @@ fs_generator::generate_blorp_fb_write(fs_inst *inst) brw_fb_WRITE(p, 16 /* dispatch_width */, brw_message_reg(inst->base_mrf), - brw_reg_from_fs_reg(&inst->src[0]), + brw_reg_from_fs_reg(inst, &inst->src[0]), BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, inst->target, inst->mlen, @@ -651,7 +655,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src /* Note that G45 and older determines shadow compare and dispatch width * from message length for most messages. */ - if (dispatch_width == 8) { + if (inst->exec_size == 8) { msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; if (inst->shadow_compare) { assert(inst->mlen == 6); @@ -670,7 +674,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src break; case FS_OPCODE_TXB: if (inst->shadow_compare) { - assert(dispatch_width == 8); + assert(inst->exec_size == 8); assert(inst->mlen == 6); msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE; } else { @@ -681,7 +685,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src break; case SHADER_OPCODE_TXL: if (inst->shadow_compare) { - assert(dispatch_width == 8); + assert(inst->exec_size == 8); assert(inst->mlen == 6); msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE; } else { @@ -692,7 +696,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src break; case SHADER_OPCODE_TXD: /* There is no sample_d_c message; comparisons are done manually */ - assert(dispatch_width == 8); + assert(inst->exec_size == 8); assert(inst->mlen == 7 || inst->mlen == 10); msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS; break; @@ -1054,7 +1058,6 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, struct brw_reg index, struct brw_reg offset) { - assert(inst->mlen == 0); assert(index.type == BRW_REGISTER_TYPE_UD); assert(offset.file == BRW_GENERAL_REGISTER_FILE); @@ -1069,12 +1072,10 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, struct brw_reg src = offset; bool header_present = false; - int mlen = 1; if (devinfo->gen >= 9) { /* Skylake requires a message header in order to use SIMD4x2 mode. */ - src = retype(brw_vec4_grf(offset.nr - 1, 0), BRW_REGISTER_TYPE_UD); - mlen = 2; + src = retype(brw_vec4_grf(offset.nr, 0), BRW_REGISTER_TYPE_UD); header_present = true; brw_push_insn_state(p); @@ -1105,7 +1106,7 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, 0, /* LD message ignores sampler unit */ GEN5_SAMPLER_MESSAGE_SAMPLE_LD, 1, /* rlen */ - mlen, + inst->mlen, header_present, BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0); @@ -1135,7 +1136,7 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, 0, /* LD message ignores sampler unit */ GEN5_SAMPLER_MESSAGE_SAMPLE_LD, 1, /* rlen */ - mlen, + inst->mlen, header_present, BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0); @@ -1363,37 +1364,6 @@ fs_generator::generate_set_simd4x2_offset(fs_inst *inst, brw_pop_insn_state(p); } -/* Sets vstride=16, width=8, hstride=2 or vstride=0, width=1, hstride=0 - * (when mask is passed as a uniform) of register mask before moving it - * to register dst. - */ -void -fs_generator::generate_set_omask(fs_inst *inst, - struct brw_reg dst, - struct brw_reg mask) -{ - bool stride_8_8_1 = - (mask.vstride == BRW_VERTICAL_STRIDE_8 && - mask.width == BRW_WIDTH_8 && - mask.hstride == BRW_HORIZONTAL_STRIDE_1); - - bool stride_0_1_0 = has_scalar_region(mask); - - assert(stride_8_8_1 || stride_0_1_0); - assert(dst.type == BRW_REGISTER_TYPE_UW); - - brw_push_insn_state(p); - brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - - if (stride_8_8_1) { - brw_MOV(p, dst, retype(stride(mask, 16, 8, 2), dst.type)); - } else if (stride_0_1_0) { - brw_MOV(p, dst, retype(mask, dst.type)); - } - brw_pop_insn_state(p); -} - /* Sets vstride=1, width=4, hstride=0 of register src1 during * the ADD instruction. */ @@ -1563,7 +1533,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) annotate(p->devinfo, &annotation, cfg, inst, p->next_insn_offset); for (unsigned int i = 0; i < inst->sources; i++) { - src[i] = brw_reg_from_fs_reg(&inst->src[i]); + src[i] = brw_reg_from_fs_reg(inst, &inst->src[i]); /* The accumulator result appears to get used for the * conditional modifier generation. When negating a UD @@ -1575,7 +1545,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) inst->src[i].type != BRW_REGISTER_TYPE_UD || !inst->src[i].negate); } - dst = brw_reg_from_fs_reg(&inst->dst); + dst = brw_reg_from_fs_reg(inst, &inst->dst); brw_set_default_predicate_control(p, inst->predicate); brw_set_default_predicate_inverse(p, inst->predicate_inverse); @@ -1604,7 +1574,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) /* If the instruction writes to more than one register, it needs to * be a "compressed" instruction on Gen <= 5. */ - if (inst->exec_size * inst->dst.stride * type_sz(inst->dst.type) > 32) + if (inst->dst.component_size(inst->exec_size) > REG_SIZE) brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED); else brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); @@ -1872,7 +1842,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) break; case BRW_OPCODE_DO: - brw_DO(p, BRW_EXECUTE_8); + brw_DO(p, dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8); break; case BRW_OPCODE_BREAK: @@ -2019,19 +1989,15 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) break; case SHADER_OPCODE_UNTYPED_ATOMIC: - assert(src[1].file == BRW_IMMEDIATE_VALUE && - src[2].file == BRW_IMMEDIATE_VALUE); + assert(src[2].file == BRW_IMMEDIATE_VALUE); brw_untyped_atomic(p, dst, src[0], src[1], src[2].dw1.ud, inst->mlen, !inst->dst.is_null()); - brw_mark_surface_used(prog_data, src[1].dw1.ud); break; case SHADER_OPCODE_UNTYPED_SURFACE_READ: - assert(src[1].file == BRW_IMMEDIATE_VALUE && - src[2].file == BRW_IMMEDIATE_VALUE); + assert(src[2].file == BRW_IMMEDIATE_VALUE); brw_untyped_surface_read(p, dst, src[0], src[1], inst->mlen, src[2].dw1.ud); - brw_mark_surface_used(prog_data, src[1].dw1.ud); break; case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: @@ -2073,10 +2039,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) brw_broadcast(p, dst, src[0], src[1]); break; - case FS_OPCODE_SET_OMASK: - generate_set_omask(inst, dst, src[0]); - break; - case FS_OPCODE_SET_SAMPLE_ID: generate_set_sample_id(inst, dst, src[0], src[1]); break; diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp index 502161d5128..19aec92fad1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp @@ -204,27 +204,9 @@ fs_live_variables::compute_live_variables() while (cont) { cont = false; - foreach_block (block, cfg) { + foreach_block_reverse (block, cfg) { struct block_data *bd = &block_data[block->num]; - /* Update livein */ - for (int i = 0; i < bitset_words; i++) { - BITSET_WORD new_livein = (bd->use[i] | - (bd->liveout[i] & - ~bd->def[i])); - if (new_livein & ~bd->livein[i]) { - bd->livein[i] |= new_livein; - cont = true; - } - } - BITSET_WORD new_livein = (bd->flag_use[0] | - (bd->flag_liveout[0] & - ~bd->flag_def[0])); - if (new_livein & ~bd->flag_livein[0]) { - bd->flag_livein[0] |= new_livein; - cont = true; - } - /* Update liveout */ foreach_list_typed(bblock_link, child_link, link, &block->children) { struct block_data *child_bd = &block_data[child_link->block->num]; @@ -244,6 +226,24 @@ fs_live_variables::compute_live_variables() cont = true; } } + + /* Update livein */ + for (int i = 0; i < bitset_words; i++) { + BITSET_WORD new_livein = (bd->use[i] | + (bd->liveout[i] & + ~bd->def[i])); + if (new_livein & ~bd->livein[i]) { + bd->livein[i] |= new_livein; + cont = true; + } + } + BITSET_WORD new_livein = (bd->flag_use[0] | + (bd->flag_liveout[0] & + ~bd->flag_def[0])); + if (new_livein & ~bd->flag_livein[0]) { + bd->flag_livein[0] |= new_livein; + cont = true; + } } } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 4d98b048433..93a36cc03bf 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -24,8 +24,10 @@ #include "glsl/ir.h" #include "glsl/ir_optimization.h" #include "glsl/nir/glsl_to_nir.h" +#include "main/shaderimage.h" #include "program/prog_to_nir.h" #include "brw_fs.h" +#include "brw_fs_surface_builder.h" #include "brw_nir.h" using namespace brw; @@ -38,31 +40,11 @@ fs_visitor::emit_nir_code() /* emit the arrays used for inputs and outputs - load/store intrinsics will * be converted to reads/writes of these arrays */ - - if (nir->num_inputs > 0) { - nir_inputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_inputs); - nir_setup_inputs(nir); - } - - if (nir->num_outputs > 0) { - nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_outputs); - nir_setup_outputs(nir); - } - - if (nir->num_uniforms > 0) { - nir_setup_uniforms(nir); - } - + nir_setup_inputs(nir); + nir_setup_outputs(nir); + nir_setup_uniforms(nir); nir_emit_system_values(nir); - nir_globals = ralloc_array(mem_ctx, fs_reg, nir->reg_alloc); - foreach_list_typed(nir_register, reg, node, &nir->registers) { - unsigned array_elems = - reg->num_array_elems == 0 ? 1 : reg->num_array_elems; - unsigned size = array_elems * reg->num_components; - nir_globals[reg->index] = bld.vgrf(BRW_REGISTER_TYPE_F, size); - } - /* get the main function and emit it */ nir_foreach_overload(nir, overload) { assert(strcmp(overload->function->name, "main") == 0); @@ -74,9 +56,11 @@ fs_visitor::emit_nir_code() void fs_visitor::nir_setup_inputs(nir_shader *shader) { + nir_inputs = bld.vgrf(BRW_REGISTER_TYPE_F, shader->num_inputs); + foreach_list_typed(nir_variable, var, node, &shader->inputs) { enum brw_reg_type type = brw_type_for_base_type(var->type); - fs_reg input = offset(nir_inputs, var->data.driver_location); + fs_reg input = offset(nir_inputs, bld, var->data.driver_location); fs_reg reg; switch (stage) { @@ -91,25 +75,35 @@ fs_visitor::nir_setup_inputs(nir_shader *shader) * So, we need to copy from fs_reg(ATTR, var->location) to * offset(nir_inputs, var->data.driver_location). */ - unsigned components = var->type->without_array()->components(); + const glsl_type *const t = var->type->without_array(); + const unsigned components = t->components(); + const unsigned cols = t->matrix_columns; + const unsigned elts = t->vector_elements; unsigned array_length = var->type->is_array() ? var->type->length : 1; for (unsigned i = 0; i < array_length; i++) { - for (unsigned j = 0; j < components; j++) { - bld.MOV(retype(offset(input, components * i + j), type), - offset(fs_reg(ATTR, var->data.location + i, type), j)); + for (unsigned j = 0; j < cols; j++) { + for (unsigned k = 0; k < elts; k++) { + bld.MOV(offset(retype(input, type), bld, + components * i + elts * j + k), + offset(fs_reg(ATTR, var->data.location + i, type), + bld, 4 * j + k)); + } } } break; } case MESA_SHADER_GEOMETRY: case MESA_SHADER_COMPUTE: + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: unreachable("fs_visitor not used for these stages yet."); break; case MESA_SHADER_FRAGMENT: if (var->data.location == VARYING_SLOT_POS) { reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer, var->data.origin_upper_left); - emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, input, reg), 0xF); + emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(), + input, reg), 0xF); } else { emit_general_interpolation(input, var->name, var->type, (glsl_interp_qualifier) var->data.interpolation, @@ -126,45 +120,54 @@ fs_visitor::nir_setup_outputs(nir_shader *shader) { brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; + nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, shader->num_outputs); + foreach_list_typed(nir_variable, var, node, &shader->outputs) { - fs_reg reg = offset(nir_outputs, var->data.driver_location); + fs_reg reg = offset(nir_outputs, bld, var->data.driver_location); int vector_elements = var->type->is_array() ? var->type->fields.array->vector_elements : var->type->vector_elements; - if (stage == MESA_SHADER_VERTEX) { + switch (stage) { + case MESA_SHADER_VERTEX: for (int i = 0; i < ALIGN(type_size(var->type), 4) / 4; i++) { int output = var->data.location + i; - this->outputs[output] = offset(reg, 4 * i); + this->outputs[output] = offset(reg, bld, 4 * i); this->output_components[output] = vector_elements; } - } else if (var->data.index > 0) { - assert(var->data.location == FRAG_RESULT_DATA0); - assert(var->data.index == 1); - this->dual_src_output = reg; - this->do_dual_src = true; - } else if (var->data.location == FRAG_RESULT_COLOR) { - /* Writing gl_FragColor outputs to all color regions. */ - for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) { - this->outputs[i] = reg; - this->output_components[i] = 4; - } - } else if (var->data.location == FRAG_RESULT_DEPTH) { - this->frag_depth = reg; - } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) { - this->sample_mask = reg; - } else { - /* gl_FragData or a user-defined FS output */ - assert(var->data.location >= FRAG_RESULT_DATA0 && - var->data.location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS); - - /* General color output. */ - for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) { - int output = var->data.location - FRAG_RESULT_DATA0 + i; - this->outputs[output] = offset(reg, vector_elements * i); - this->output_components[output] = vector_elements; + break; + case MESA_SHADER_FRAGMENT: + if (var->data.index > 0) { + assert(var->data.location == FRAG_RESULT_DATA0); + assert(var->data.index == 1); + this->dual_src_output = reg; + this->do_dual_src = true; + } else if (var->data.location == FRAG_RESULT_COLOR) { + /* Writing gl_FragColor outputs to all color regions. */ + for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) { + this->outputs[i] = reg; + this->output_components[i] = 4; + } + } else if (var->data.location == FRAG_RESULT_DEPTH) { + this->frag_depth = reg; + } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) { + this->sample_mask = reg; + } else { + /* gl_FragData or a user-defined FS output */ + assert(var->data.location >= FRAG_RESULT_DATA0 && + var->data.location < FRAG_RESULT_DATA0+BRW_MAX_DRAW_BUFFERS); + + /* General color output. */ + for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) { + int output = var->data.location - FRAG_RESULT_DATA0 + i; + this->outputs[output] = offset(reg, bld, vector_elements * i); + this->output_components[output] = vector_elements; + } } + break; + default: + unreachable("unhandled shader stage"); } } } @@ -172,18 +175,20 @@ fs_visitor::nir_setup_outputs(nir_shader *shader) void fs_visitor::nir_setup_uniforms(nir_shader *shader) { - uniforms = shader->num_uniforms; num_direct_uniforms = shader->num_direct_uniforms; + if (dispatch_width != 8) + return; + /* We split the uniform register file in half. The first half is * entirely direct uniforms. The second half is indirect. */ - param_size[0] = num_direct_uniforms; + if (num_direct_uniforms > 0) + param_size[0] = num_direct_uniforms; if (shader->num_uniforms > num_direct_uniforms) param_size[num_direct_uniforms] = shader->num_uniforms - num_direct_uniforms; - if (dispatch_width != 8) - return; + uniforms = shader->num_uniforms; if (shader_prog) { foreach_list_typed(nir_variable, var, node, &shader->uniforms) { @@ -233,17 +238,26 @@ fs_visitor::nir_setup_uniform(nir_variable *var) continue; } - unsigned slots = storage->type->component_slots(); - if (storage->array_elements) - slots *= storage->array_elements; + if (storage->type->is_image()) { + /* Images don't get a valid location assigned by nir_lower_io() + * because their size is driver-specific, so we need to allocate + * space for them here at the end of the parameter array. + */ + var->data.driver_location = uniforms; + param_size[uniforms] = + BRW_IMAGE_PARAM_SIZE * MAX2(storage->array_elements, 1); - for (unsigned i = 0; i < slots; i++) { - stage_prog_data->param[index++] = &storage->storage[i]; + setup_image_uniform_values(storage); + } else { + unsigned slots = storage->type->component_slots(); + if (storage->array_elements) + slots *= storage->array_elements; + + for (unsigned i = 0; i < slots; i++) { + stage_prog_data->param[index++] = &storage->storage[i]; + } } } - - /* Make sure we actually initialized the right amount of stuff here. */ - assert(var->data.driver_location + var->type->component_slots() == index); } void @@ -366,6 +380,9 @@ fs_visitor::nir_emit_impl(nir_function_impl *impl) nir_locals[reg->index] = bld.vgrf(BRW_REGISTER_TYPE_F, size); } + nir_ssa_values = reralloc(mem_ctx, nir_ssa_values, fs_reg, + impl->ssa_alloc); + nir_emit_cf_list(&impl->body); } @@ -413,18 +430,12 @@ fs_visitor::nir_emit_if(nir_if *if_stmt) bld.emit(BRW_OPCODE_ENDIF); - if (!try_replace_with_sel() && devinfo->gen < 6) { - no16("Can't support (non-uniform) control flow on SIMD16\n"); - } + try_replace_with_sel(); } void fs_visitor::nir_emit_loop(nir_loop *loop) { - if (devinfo->gen < 6) { - no16("Can't support (non-uniform) control flow on SIMD16\n"); - } - bld.emit(BRW_OPCODE_DO); nir_emit_cf_list(&loop->body); @@ -459,9 +470,11 @@ fs_visitor::nir_emit_instr(nir_instr *instr) break; case nir_instr_type_load_const: - /* We can hit these, but we do nothing now and use them as - * immediates later. - */ + nir_emit_load_const(abld, nir_instr_as_load_const(instr)); + break; + + case nir_instr_type_ssa_undef: + nir_emit_undef(abld, nir_instr_as_ssa_undef(instr)); break; case nir_instr_type_jump: @@ -473,39 +486,16 @@ fs_visitor::nir_emit_instr(nir_instr *instr) } } -static brw_reg_type -brw_type_for_nir_type(nir_alu_type type) -{ - switch (type) { - case nir_type_unsigned: - return BRW_REGISTER_TYPE_UD; - case nir_type_bool: - case nir_type_int: - return BRW_REGISTER_TYPE_D; - case nir_type_float: - return BRW_REGISTER_TYPE_F; - default: - unreachable("unknown type"); - } - - return BRW_REGISTER_TYPE_F; -} - bool fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr, const fs_reg &result) { - if (instr->src[0].src.is_ssa || - !instr->src[0].src.reg.reg || - !instr->src[0].src.reg.reg->parent_instr) - return false; - - if (instr->src[0].src.reg.reg->parent_instr->type != - nir_instr_type_intrinsic) + if (!instr->src[0].src.is_ssa || + instr->src[0].src.ssa->parent_instr->type != nir_instr_type_intrinsic) return false; nir_intrinsic_instr *src0 = - nir_instr_as_intrinsic(instr->src[0].src.reg.reg->parent_instr); + nir_instr_as_intrinsic(instr->src[0].src.ssa->parent_instr); if (src0->intrinsic != nir_intrinsic_load_front_face) return false; @@ -618,11 +608,11 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) continue; if (instr->op == nir_op_imov || instr->op == nir_op_fmov) { - inst = bld.MOV(offset(temp, i), - offset(op[0], instr->src[0].swizzle[i])); + inst = bld.MOV(offset(temp, bld, i), + offset(op[0], bld, instr->src[0].swizzle[i])); } else { - inst = bld.MOV(offset(temp, i), - offset(op[i], instr->src[i].swizzle[0])); + inst = bld.MOV(offset(temp, bld, i), + offset(op[i], bld, instr->src[i].swizzle[0])); } inst->saturate = instr->dest.saturate; } @@ -636,7 +626,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) if (!(instr->dest.write_mask & (1 << i))) continue; - bld.MOV(offset(result, i), offset(temp, i)); + bld.MOV(offset(result, bld, i), offset(temp, bld, i)); } } return; @@ -657,12 +647,12 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) assert(_mesa_bitcount(instr->dest.write_mask) == 1); channel = ffs(instr->dest.write_mask) - 1; - result = offset(result, channel); + result = offset(result, bld, channel); } for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { assert(nir_op_infos[instr->op].input_sizes[i] < 2); - op[i] = offset(op[i], instr->src[i].swizzle[channel]); + op[i] = offset(op[i], bld, instr->src[i].swizzle[channel]); } switch (instr->op) { @@ -788,67 +778,20 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) break; case nir_op_imul_high: - case nir_op_umul_high: { - if (devinfo->gen >= 7) - no16("SIMD16 explicit accumulator operands unsupported\n"); - - struct brw_reg acc = retype(brw_acc_reg(dispatch_width), result.type); - - fs_inst *mul = bld.MUL(acc, op[0], op[1]); - bld.MACH(result, op[0], op[1]); - - /* Until Gen8, integer multiplies read 32-bits from one source, and - * 16-bits from the other, and relying on the MACH instruction to - * generate the high bits of the result. - * - * On Gen8, the multiply instruction does a full 32x32-bit multiply, - * but in order to do a 64x64-bit multiply we have to simulate the - * previous behavior and then use a MACH instruction. - * - * FINISHME: Don't use source modifiers on src1. - */ - if (devinfo->gen >= 8) { - assert(mul->src[1].type == BRW_REGISTER_TYPE_D || - mul->src[1].type == BRW_REGISTER_TYPE_UD); - if (mul->src[1].type == BRW_REGISTER_TYPE_D) { - mul->src[1].type = BRW_REGISTER_TYPE_W; - mul->src[1].stride = 2; - } else { - mul->src[1].type = BRW_REGISTER_TYPE_UW; - mul->src[1].stride = 2; - } - } + case nir_op_umul_high: + bld.emit(SHADER_OPCODE_MULH, result, op[0], op[1]); break; - } case nir_op_idiv: case nir_op_udiv: bld.emit(SHADER_OPCODE_INT_QUOTIENT, result, op[0], op[1]); break; - case nir_op_uadd_carry: { - if (devinfo->gen >= 7) - no16("SIMD16 explicit accumulator operands unsupported\n"); + case nir_op_uadd_carry: + unreachable("Should have been lowered by carry_to_arith()."); - struct brw_reg acc = retype(brw_acc_reg(dispatch_width), - BRW_REGISTER_TYPE_UD); - - bld.ADDC(bld.null_reg_ud(), op[0], op[1]); - bld.MOV(result, fs_reg(acc)); - break; - } - - case nir_op_usub_borrow: { - if (devinfo->gen >= 7) - no16("SIMD16 explicit accumulator operands unsupported\n"); - - struct brw_reg acc = retype(brw_acc_reg(dispatch_width), - BRW_REGISTER_TYPE_UD); - - bld.SUBB(bld.null_reg_ud(), op[0], op[1]); - bld.MOV(result, fs_reg(acc)); - break; - } + case nir_op_usub_borrow: + unreachable("Should have been lowered by borrow_to_arith()."); case nir_op_umod: bld.emit(SHADER_OPCODE_INT_REMAINDER, result, op[0], op[1]); @@ -878,28 +821,28 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) case nir_op_inot: if (devinfo->gen >= 8) { - resolve_source_modifiers(&op[0]); + op[0] = resolve_source_modifiers(op[0]); } bld.NOT(result, op[0]); break; case nir_op_ixor: if (devinfo->gen >= 8) { - resolve_source_modifiers(&op[0]); - resolve_source_modifiers(&op[1]); + op[0] = resolve_source_modifiers(op[0]); + op[1] = resolve_source_modifiers(op[1]); } bld.XOR(result, op[0], op[1]); break; case nir_op_ior: if (devinfo->gen >= 8) { - resolve_source_modifiers(&op[0]); - resolve_source_modifiers(&op[1]); + op[0] = resolve_source_modifiers(op[0]); + op[1] = resolve_source_modifiers(op[1]); } bld.OR(result, op[0], op[1]); break; case nir_op_iand: if (devinfo->gen >= 8) { - resolve_source_modifiers(&op[0]); - resolve_source_modifiers(&op[1]); + op[0] = resolve_source_modifiers(op[0]); + op[1] = resolve_source_modifiers(op[1]); } bld.AND(result, op[0], op[1]); break; @@ -959,10 +902,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) break; case nir_op_b2i: - bld.AND(result, op[0], fs_reg(1)); - break; case nir_op_b2f: - bld.AND(retype(result, BRW_REGISTER_TYPE_UD), op[0], fs_reg(0x3f800000u)); + bld.MOV(result, negate(op[0])); break; case nir_op_f2b: @@ -1146,17 +1087,36 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) } } +void +fs_visitor::nir_emit_load_const(const fs_builder &bld, + nir_load_const_instr *instr) +{ + fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_D, instr->def.num_components); + + for (unsigned i = 0; i < instr->def.num_components; i++) + bld.MOV(offset(reg, bld, i), fs_reg(instr->value.i[i])); + + nir_ssa_values[instr->def.index] = reg; +} + +void +fs_visitor::nir_emit_undef(const fs_builder &bld, nir_ssa_undef_instr *instr) +{ + nir_ssa_values[instr->def.index] = bld.vgrf(BRW_REGISTER_TYPE_D, + instr->def.num_components); +} + static fs_reg fs_reg_for_nir_reg(fs_visitor *v, nir_register *nir_reg, unsigned base_offset, nir_src *indirect) { fs_reg reg; - if (nir_reg->is_global) - reg = v->nir_globals[nir_reg->index]; - else - reg = v->nir_locals[nir_reg->index]; - reg = offset(reg, base_offset * nir_reg->num_components); + assert(!nir_reg->is_global); + + reg = v->nir_locals[nir_reg->index]; + + reg = offset(reg, v->bld, base_offset * nir_reg->num_components); if (indirect) { int multiplier = nir_reg->num_components * (v->dispatch_width / 8); @@ -1171,34 +1131,77 @@ fs_reg_for_nir_reg(fs_visitor *v, nir_register *nir_reg, fs_reg fs_visitor::get_nir_src(nir_src src) { + fs_reg reg; if (src.is_ssa) { - assert(src.ssa->parent_instr->type == nir_instr_type_load_const); - nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr); - fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_D, src.ssa->num_components); - - for (unsigned i = 0; i < src.ssa->num_components; ++i) - bld.MOV(offset(reg, i), fs_reg(load->value.i[i])); - - return reg; + reg = nir_ssa_values[src.ssa->index]; } else { - fs_reg reg = fs_reg_for_nir_reg(this, src.reg.reg, src.reg.base_offset, - src.reg.indirect); - - /* to avoid floating-point denorm flushing problems, set the type by - * default to D - instructions that need floating point semantics will set - * this to F if they need to - */ - return retype(reg, BRW_REGISTER_TYPE_D); + reg = fs_reg_for_nir_reg(this, src.reg.reg, src.reg.base_offset, + src.reg.indirect); } + + /* to avoid floating-point denorm flushing problems, set the type by + * default to D - instructions that need floating point semantics will set + * this to F if they need to + */ + return retype(reg, BRW_REGISTER_TYPE_D); } fs_reg fs_visitor::get_nir_dest(nir_dest dest) { + if (dest.is_ssa) { + nir_ssa_values[dest.ssa.index] = bld.vgrf(BRW_REGISTER_TYPE_F, + dest.ssa.num_components); + return nir_ssa_values[dest.ssa.index]; + } + return fs_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset, dest.reg.indirect); } +fs_reg +fs_visitor::get_nir_image_deref(const nir_deref_var *deref) +{ + fs_reg image(UNIFORM, deref->var->data.driver_location, + BRW_REGISTER_TYPE_UD); + + if (deref->deref.child) { + const nir_deref_array *deref_array = + nir_deref_as_array(deref->deref.child); + assert(deref->deref.child->deref_type == nir_deref_type_array && + deref_array->deref.child == NULL); + const unsigned size = glsl_get_length(deref->var->type); + const unsigned base = MIN2(deref_array->base_offset, size - 1); + + image = offset(image, bld, base * BRW_IMAGE_PARAM_SIZE); + + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + fs_reg *tmp = new(mem_ctx) fs_reg(vgrf(glsl_type::int_type)); + + if (devinfo->gen == 7 && !devinfo->is_haswell) { + /* IVB hangs when trying to access an invalid surface index with + * the dataport. According to the spec "if the index used to + * select an individual element is negative or greater than or + * equal to the size of the array, the results of the operation + * are undefined but may not lead to termination" -- which is one + * of the possible outcomes of the hang. Clamp the index to + * prevent access outside of the array bounds. + */ + bld.emit_minmax(*tmp, retype(get_nir_src(deref_array->indirect), + BRW_REGISTER_TYPE_UD), + fs_reg(size - base - 1), BRW_CONDITIONAL_L); + } else { + bld.MOV(*tmp, get_nir_src(deref_array->indirect)); + } + + bld.MUL(*tmp, *tmp, fs_reg(BRW_IMAGE_PARAM_SIZE)); + image.reladdr = tmp; + } + } + + return image; +} + void fs_visitor::emit_percomp(const fs_builder &bld, const fs_inst &inst, unsigned wr_mask) @@ -1208,15 +1211,64 @@ fs_visitor::emit_percomp(const fs_builder &bld, const fs_inst &inst, continue; fs_inst *new_inst = new(mem_ctx) fs_inst(inst); - new_inst->dst = offset(new_inst->dst, i); + new_inst->dst = offset(new_inst->dst, bld, i); for (unsigned j = 0; j < new_inst->sources; j++) if (new_inst->src[j].file == GRF) - new_inst->src[j] = offset(new_inst->src[j], i); + new_inst->src[j] = offset(new_inst->src[j], bld, i); bld.emit(new_inst); } } +/** + * Get the matching channel register datatype for an image intrinsic of the + * specified GLSL image type. + */ +static brw_reg_type +get_image_base_type(const glsl_type *type) +{ + switch ((glsl_base_type)type->sampler_type) { + case GLSL_TYPE_UINT: + return BRW_REGISTER_TYPE_UD; + case GLSL_TYPE_INT: + return BRW_REGISTER_TYPE_D; + case GLSL_TYPE_FLOAT: + return BRW_REGISTER_TYPE_F; + default: + unreachable("Not reached."); + } +} + +/** + * Get the appropriate atomic op for an image atomic intrinsic. + */ +static unsigned +get_image_atomic_op(nir_intrinsic_op op, const glsl_type *type) +{ + switch (op) { + case nir_intrinsic_image_atomic_add: + return BRW_AOP_ADD; + case nir_intrinsic_image_atomic_min: + return (get_image_base_type(type) == BRW_REGISTER_TYPE_D ? + BRW_AOP_IMIN : BRW_AOP_UMIN); + case nir_intrinsic_image_atomic_max: + return (get_image_base_type(type) == BRW_REGISTER_TYPE_D ? + BRW_AOP_IMAX : BRW_AOP_UMAX); + case nir_intrinsic_image_atomic_and: + return BRW_AOP_AND; + case nir_intrinsic_image_atomic_or: + return BRW_AOP_OR; + case nir_intrinsic_image_atomic_xor: + return BRW_AOP_XOR; + case nir_intrinsic_image_atomic_exchange: + return BRW_AOP_MOV; + case nir_intrinsic_image_atomic_comp_swap: + return BRW_AOP_CMPWR; + default: + unreachable("Not reachable."); + } +} + void fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr) { @@ -1255,25 +1307,102 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr case nir_intrinsic_atomic_counter_inc: case nir_intrinsic_atomic_counter_dec: case nir_intrinsic_atomic_counter_read: { - unsigned surf_index = prog_data->binding_table.abo_start + - (unsigned) instr->const_index[0]; - fs_reg offset = fs_reg(get_nir_src(instr->src[0])); + using namespace surface_access; + /* Get the arguments of the atomic intrinsic. */ + const fs_reg offset = get_nir_src(instr->src[0]); + const unsigned surface = (stage_prog_data->binding_table.abo_start + + instr->const_index[0]); + fs_reg tmp; + + /* Emit a surface read or atomic op. */ switch (instr->intrinsic) { - case nir_intrinsic_atomic_counter_inc: - emit_untyped_atomic(BRW_AOP_INC, surf_index, dest, offset, - fs_reg(), fs_reg()); - break; - case nir_intrinsic_atomic_counter_dec: - emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dest, offset, - fs_reg(), fs_reg()); - break; - case nir_intrinsic_atomic_counter_read: - emit_untyped_surface_read(surf_index, dest, offset); - break; - default: - unreachable("Unreachable"); + case nir_intrinsic_atomic_counter_read: + tmp = emit_untyped_read(bld, fs_reg(surface), offset, 1, 1); + break; + + case nir_intrinsic_atomic_counter_inc: + tmp = emit_untyped_atomic(bld, fs_reg(surface), offset, fs_reg(), + fs_reg(), 1, 1, BRW_AOP_INC); + break; + + case nir_intrinsic_atomic_counter_dec: + tmp = emit_untyped_atomic(bld, fs_reg(surface), offset, fs_reg(), + fs_reg(), 1, 1, BRW_AOP_PREDEC); + break; + + default: + unreachable("Unreachable"); } + + /* Assign the result. */ + bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD), tmp); + + /* Mark the surface as used. */ + brw_mark_surface_used(stage_prog_data, surface); + break; + } + + case nir_intrinsic_image_load: + case nir_intrinsic_image_store: + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_min: + case nir_intrinsic_image_atomic_max: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: { + using namespace image_access; + + /* Get the referenced image variable and type. */ + const nir_variable *var = instr->variables[0]->var; + const glsl_type *type = var->type->without_array(); + const brw_reg_type base_type = get_image_base_type(type); + + /* Get some metadata from the image intrinsic. */ + const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; + const unsigned arr_dims = type->sampler_array ? 1 : 0; + const unsigned surf_dims = type->coordinate_components() - arr_dims; + const mesa_format format = + (var->data.image.write_only ? MESA_FORMAT_NONE : + _mesa_get_shader_image_format(var->data.image.format)); + + /* Get the arguments of the image intrinsic. */ + const fs_reg image = get_nir_image_deref(instr->variables[0]); + const fs_reg addr = retype(get_nir_src(instr->src[0]), + BRW_REGISTER_TYPE_UD); + const fs_reg src0 = (info->num_srcs >= 3 ? + retype(get_nir_src(instr->src[2]), base_type) : + fs_reg()); + const fs_reg src1 = (info->num_srcs >= 4 ? + retype(get_nir_src(instr->src[3]), base_type) : + fs_reg()); + fs_reg tmp; + + /* Emit an image load, store or atomic op. */ + if (instr->intrinsic == nir_intrinsic_image_load) + tmp = emit_image_load(bld, image, addr, surf_dims, arr_dims, format); + + else if (instr->intrinsic == nir_intrinsic_image_store) + emit_image_store(bld, image, addr, src0, surf_dims, arr_dims, format); + + else + tmp = emit_image_atomic(bld, image, addr, src0, src1, + surf_dims, arr_dims, info->dest_components, + get_image_atomic_op(instr->intrinsic, type)); + + /* Assign the result. */ + for (unsigned c = 0; c < info->dest_components; ++c) + bld.MOV(offset(retype(dest, base_type), bld, c), + offset(tmp, bld, c)); + break; + } + + case nir_intrinsic_memory_barrier: { + const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 16 / dispatch_width); + bld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp) + ->regs_written = 2; break; } @@ -1322,7 +1451,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr assert(sample_pos.file != BAD_FILE); dest.type = sample_pos.type; bld.MOV(dest, sample_pos); - bld.MOV(offset(dest, 1), offset(sample_pos, 1)); + bld.MOV(offset(dest, bld, 1), offset(sample_pos, bld, 1)); break; } @@ -1349,13 +1478,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr } for (unsigned j = 0; j < instr->num_components; j++) { - fs_reg src = offset(retype(uniform_reg, dest.type), index); + fs_reg src = offset(retype(uniform_reg, dest.type), bld, index); if (has_indirect) src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); index++; bld.MOV(dest, src); - dest = offset(dest, 1); + dest = offset(dest, bld, 1); } break; } @@ -1387,7 +1516,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr surf_index = vgrf(glsl_type::uint_type); bld.ADD(surf_index, get_nir_src(instr->src[0]), fs_reg(stage_prog_data->binding_table.ubo_start)); - bld.emit_uniformize(surf_index, surf_index); + surf_index = bld.emit_uniformize(surf_index); /* Assume this may touch any UBO. It would be nice to provide * a tighter bound, but the array information is already lowered away. @@ -1406,7 +1535,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr unsigned vec4_offset = instr->const_index[1] / 4; for (int i = 0; i < instr->num_components; i++) - VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, i), surf_index, + VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index, base_offset, vec4_offset + i); } else { fs_reg packed_consts = vgrf(glsl_type::float_type); @@ -1425,7 +1554,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr assert(packed_consts.subreg_offset < 32); bld.MOV(dest, packed_consts); - dest = offset(dest, 1); + dest = offset(dest, bld, 1); } } break; @@ -1437,14 +1566,14 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr case nir_intrinsic_load_input: { unsigned index = 0; for (unsigned j = 0; j < instr->num_components; j++) { - fs_reg src = offset(retype(nir_inputs, dest.type), + fs_reg src = offset(retype(nir_inputs, dest.type), bld, instr->const_index[0] + index); if (has_indirect) src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); index++; bld.MOV(dest, src); - dest = offset(dest, 1); + dest = offset(dest, bld, 1); } break; } @@ -1470,11 +1599,9 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr case nir_intrinsic_interp_var_at_centroid: case nir_intrinsic_interp_var_at_sample: case nir_intrinsic_interp_var_at_offset: { - /* in SIMD16 mode, the pixel interpolator returns coords interleaved - * 8 channels at a time, same as the barycentric coords presented in - * the FS payload. this requires a bit of extra work to support. - */ - no16("interpolate_at_* not yet supported in SIMD16 mode."); + assert(stage == MESA_SHADER_FRAGMENT); + + ((struct brw_wm_prog_data *) prog_data)->pulls_bary = true; fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2); @@ -1517,7 +1644,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr BRW_REGISTER_TYPE_F); for (int i = 0; i < 2; i++) { fs_reg temp = vgrf(glsl_type::float_type); - bld.MUL(temp, offset(offset_src, i), fs_reg(16.0f)); + bld.MUL(temp, offset(offset_src, bld, i), fs_reg(16.0f)); fs_reg itemp = vgrf(glsl_type::int_type); bld.MOV(itemp, temp); /* float to int */ @@ -1537,10 +1664,10 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr * FRAGMENT_INTERPOLATION_OFFSET_BITS" */ set_condmod(BRW_CONDITIONAL_L, - bld.SEL(offset(src, i), itemp, fs_reg(7))); + bld.SEL(offset(src, bld, i), itemp, fs_reg(7))); } - mlen = 2; + mlen = 2 * dispatch_width / 8; inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src, fs_reg(0u)); } @@ -1552,7 +1679,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr } inst->mlen = mlen; - inst->regs_written = 2; /* 2 floats per slot returned */ + /* 2 floats per slot returned */ + inst->regs_written = 2 * dispatch_width / 8; inst->pi_noperspective = instr->variables[0]->var->data.interpolation == INTERP_QUALIFIER_NOPERSPECTIVE; @@ -1561,7 +1689,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr src.type = dest.type; bld.emit(FS_OPCODE_LINTERP, dest, dst_xy, src); - dest = offset(dest, 1); + dest = offset(dest, bld, 1); } break; } @@ -1573,13 +1701,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr fs_reg src = get_nir_src(instr->src[0]); unsigned index = 0; for (unsigned j = 0; j < instr->num_components; j++) { - fs_reg new_dest = offset(retype(nir_outputs, src.type), + fs_reg new_dest = offset(retype(nir_outputs, src.type), bld, instr->const_index[0] + index); if (has_indirect) src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1])); index++; bld.MOV(new_dest, src); - src = offset(src, 1); + src = offset(src, bld, 1); } break; } @@ -1689,7 +1817,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) /* Emit code to evaluate the actual indexing expression */ sampler_reg = vgrf(glsl_type::uint_type); bld.ADD(sampler_reg, src, fs_reg(sampler)); - bld.emit_uniformize(sampler_reg, sampler_reg); + sampler_reg = bld.emit_uniformize(sampler_reg); break; } @@ -1715,20 +1843,8 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) } } - enum glsl_base_type dest_base_type; - switch (instr->dest_type) { - case nir_type_float: - dest_base_type = GLSL_TYPE_FLOAT; - break; - case nir_type_int: - dest_base_type = GLSL_TYPE_INT; - break; - case nir_type_unsigned: - dest_base_type = GLSL_TYPE_UINT; - break; - default: - unreachable("bad type"); - } + enum glsl_base_type dest_base_type = + brw_glsl_base_type_for_nir_type (instr->dest_type); const glsl_type *dest_type = glsl_type::get_instance(dest_base_type, nir_tex_instr_dest_size(instr), @@ -1758,7 +1874,8 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) fs_reg dest = get_nir_dest(instr->dest); dest.type = this->result.type; unsigned num_components = nir_tex_instr_dest_size(instr); - emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, dest, this->result), + emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(), + dest, this->result), (1 << num_components) - 1); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp b/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp index d92d4bbd81d..b75f40ba5a1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp @@ -24,6 +24,8 @@ #include "brw_fs.h" #include "brw_cfg.h" +using namespace brw; + /** @file brw_fs_peephole_predicated_break.cpp * * Loops are often structured as @@ -85,9 +87,9 @@ fs_visitor::opt_peephole_predicated_break() * instruction to set the flag register. */ if (devinfo->gen == 6 && if_inst->conditional_mod) { - bld.at(if_block, if_inst) - .CMP(bld.null_reg_d(), if_inst->src[0], if_inst->src[1], - if_inst->conditional_mod); + const fs_builder ibld(this, if_block, if_inst); + ibld.CMP(ibld.null_reg_d(), if_inst->src[0], if_inst->src[1], + if_inst->conditional_mod); jump_inst->predicate = BRW_PREDICATE_NORMAL; } else { jump_inst->predicate = if_inst->predicate; diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 364fc4a5ad2..b70895ec2ff 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -73,11 +73,20 @@ fs_visitor::assign_regs_trivial() } static void -brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width) +brw_alloc_reg_set(struct brw_compiler *compiler, int dispatch_width) { const struct brw_device_info *devinfo = compiler->devinfo; int base_reg_count = BRW_MAX_GRF; - int index = reg_width - 1; + int index = (dispatch_width / 8) - 1; + + if (dispatch_width > 8 && devinfo->gen >= 7) { + /* For IVB+, we don't need the PLN hacks or the even-reg alignment in + * SIMD16. Therefore, we can use the exact same register sets for + * SIMD16 as we do for SIMD8 and we don't need to recalculate them. + */ + compiler->fs_reg_sets[index] = compiler->fs_reg_sets[0]; + return; + } /* The registers used to make up almost all values handled in the compiler * are a scalar value occupying a single register (or 2 registers in the @@ -121,7 +130,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width) /* Compute the total number of registers across all classes. */ int ra_reg_count = 0; for (int i = 0; i < class_count; i++) { - if (devinfo->gen <= 5 && reg_width == 2) { + if (devinfo->gen <= 5 && dispatch_width == 16) { /* From the G45 PRM: * * In order to reduce the hardware complexity, the following @@ -168,7 +177,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width) int pairs_reg_count = 0; for (int i = 0; i < class_count; i++) { int class_reg_count; - if (devinfo->gen <= 5 && reg_width == 2) { + if (devinfo->gen <= 5 && dispatch_width == 16) { class_reg_count = (base_reg_count - (class_sizes[i] - 1)) / 2; /* See comment below. The only difference here is that we are @@ -214,7 +223,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width) pairs_reg_count = class_reg_count; } - if (devinfo->gen <= 5 && reg_width == 2) { + if (devinfo->gen <= 5 && dispatch_width == 16) { for (int j = 0; j < class_reg_count; j++) { ra_class_add_reg(regs, classes[i], reg); @@ -249,7 +258,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width) /* Add a special class for aligned pairs, which we'll put delta_xy * in on Gen <= 6 so that we can do PLN. */ - if (devinfo->has_pln && reg_width == 1 && devinfo->gen <= 6) { + if (devinfo->has_pln && dispatch_width == 8 && devinfo->gen <= 6) { aligned_pairs_class = ra_alloc_reg_class(regs); for (int i = 0; i < pairs_reg_count; i++) { @@ -287,8 +296,8 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width) void brw_fs_alloc_reg_sets(struct brw_compiler *compiler) { - brw_alloc_reg_set(compiler, 1); - brw_alloc_reg_set(compiler, 2); + brw_alloc_reg_set(compiler, 8); + brw_alloc_reg_set(compiler, 16); } static int @@ -341,7 +350,9 @@ fs_visitor::setup_payload_interference(struct ra_graph *g, int loop_end_ip = 0; int payload_last_use_ip[payload_node_count]; - memset(payload_last_use_ip, 0, sizeof(payload_last_use_ip)); + for (int i = 0; i < payload_node_count; i++) + payload_last_use_ip[i] = -1; + int ip = 0; foreach_block_and_inst(block, fs_inst, inst, cfg) { switch (inst->opcode) { @@ -380,32 +391,15 @@ fs_visitor::setup_payload_interference(struct ra_graph *g, if (node_nr >= payload_node_count) continue; - payload_last_use_ip[node_nr] = use_ip; + for (int j = 0; j < inst->regs_read(i); j++) { + payload_last_use_ip[node_nr + j] = use_ip; + assert(node_nr + j < payload_node_count); + } } } /* Special case instructions which have extra implied registers used. */ switch (inst->opcode) { - case FS_OPCODE_LINTERP: - /* On gen6+ in SIMD16, there are 4 adjacent registers used by - * PLN's sourcing of the deltas, while we list only the first one - * in the arguments. Pre-gen6, the deltas are computed in normal - * VGRFs. - */ - if (devinfo->gen >= 6) { - int delta_x_arg = 0; - if (inst->src[delta_x_arg].file == HW_REG && - inst->src[delta_x_arg].fixed_hw_reg.file == - BRW_GENERAL_REGISTER_FILE) { - for (int i = 1; i < 4; ++i) { - int node = inst->src[delta_x_arg].fixed_hw_reg.nr + i; - assert(node < payload_node_count); - payload_last_use_ip[node] = use_ip; - } - } - } - break; - case CS_OPCODE_CS_TERMINATE: payload_last_use_ip[0] = use_ip; break; @@ -428,6 +422,9 @@ fs_visitor::setup_payload_interference(struct ra_graph *g, } for (int i = 0; i < payload_node_count; i++) { + if (payload_last_use_ip[i] == -1) + continue; + /* Mark the payload node as interfering with any virtual grf that is * live between the start of the program and our last use of the payload * node. @@ -706,10 +703,8 @@ fs_visitor::emit_unspill(bblock_t *block, fs_inst *inst, fs_reg dst, uint32_t spill_offset, int count) { int reg_size = 1; - if (dispatch_width == 16 && count % 2 == 0) { + if (dispatch_width == 16 && count % 2 == 0) reg_size = 2; - dst.width = 16; - } const fs_builder ibld = bld.annotate(inst->annotation, inst->ir) .group(reg_size * 8, 0) @@ -752,7 +747,7 @@ fs_visitor::emit_spill(bblock_t *block, fs_inst *inst, fs_reg src, for (int i = 0; i < count / reg_size; i++) { fs_inst *spill_inst = - ibld.emit(SHADER_OPCODE_GEN4_SCRATCH_WRITE, bld.null_reg_f(), src); + ibld.emit(SHADER_OPCODE_GEN4_SCRATCH_WRITE, ibld.null_reg_f(), src); src.reg_offset += reg_size; spill_inst->offset = spill_offset + i * reg_size * REG_SIZE; spill_inst->mlen = 1 + reg_size; /* header, value */ diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp index 2ad7079bdf8..72e873857ce 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp @@ -167,7 +167,6 @@ fs_visitor::register_coalesce() src_size = alloc.sizes[inst->src[0].reg]; assert(src_size <= MAX_VGRF_SIZE); - assert(inst->src[0].width % 8 == 0); channels_remaining = src_size; memset(mov, 0, sizeof(mov)); @@ -196,7 +195,7 @@ fs_visitor::register_coalesce() continue; } reg_to_offset[offset] = inst->dst.reg_offset; - if (inst->src[0].width == 16) + if (inst->regs_written > 1) reg_to_offset[offset + 1] = inst->dst.reg_offset + 1; mov[offset] = inst; channels_remaining -= inst->regs_written; @@ -229,7 +228,6 @@ fs_visitor::register_coalesce() continue; progress = true; - bool was_load_payload = inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD; for (int i = 0; i < src_size; i++) { if (mov[i]) { @@ -243,22 +241,19 @@ fs_visitor::register_coalesce() } foreach_block_and_inst(block, fs_inst, scan_inst, cfg) { - for (int i = 0; i < src_size; i++) { - if (mov[i] || was_load_payload) { - if (scan_inst->dst.file == GRF && - scan_inst->dst.reg == reg_from && - scan_inst->dst.reg_offset == i) { - scan_inst->dst.reg = reg_to; - scan_inst->dst.reg_offset = reg_to_offset[i]; - } - for (int j = 0; j < scan_inst->sources; j++) { - if (scan_inst->src[j].file == GRF && - scan_inst->src[j].reg == reg_from && - scan_inst->src[j].reg_offset == i) { - scan_inst->src[j].reg = reg_to; - scan_inst->src[j].reg_offset = reg_to_offset[i]; - } - } + if (scan_inst->dst.file == GRF && + scan_inst->dst.reg == reg_from) { + scan_inst->dst.reg = reg_to; + scan_inst->dst.reg_offset = + reg_to_offset[scan_inst->dst.reg_offset]; + } + + for (int j = 0; j < scan_inst->sources; j++) { + if (scan_inst->src[j].file == GRF && + scan_inst->src[j].reg == reg_from) { + scan_inst->src[j].reg = reg_to; + scan_inst->src[j].reg_offset = + reg_to_offset[scan_inst->src[j].reg_offset]; } } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp index 8660ec08b8f..d190d8eb6b4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp @@ -174,6 +174,9 @@ fs_visitor::opt_peephole_sel() /* Check that the MOVs are the right form. */ if (!then_mov[i]->dst.equals(else_mov[i]->dst) || + then_mov[i]->exec_size != else_mov[i]->exec_size || + then_mov[i]->force_sechalf != else_mov[i]->force_sechalf || + then_mov[i]->force_writemask_all != else_mov[i]->force_writemask_all || then_mov[i]->is_partial_write() || else_mov[i]->is_partial_write() || then_mov[i]->conditional_mod != BRW_CONDITIONAL_NONE || @@ -192,14 +195,17 @@ fs_visitor::opt_peephole_sel() if (movs == 0) continue; - const fs_builder ibld = bld.at(block, if_inst); - /* Emit a CMP if our IF used the embedded comparison */ - if (devinfo->gen == 6 && if_inst->conditional_mod) + if (devinfo->gen == 6 && if_inst->conditional_mod) { + const fs_builder ibld(this, block, if_inst); ibld.CMP(ibld.null_reg_d(), if_inst->src[0], if_inst->src[1], if_inst->conditional_mod); + } for (int i = 0; i < movs; i++) { + const fs_builder ibld = fs_builder(this, then_block, then_mov[i]) + .at(block, if_inst); + if (then_mov[i]->src[0].equals(else_mov[i]->src[0])) { ibld.MOV(then_mov[i]->dst, then_mov[i]->src[0]); } else { diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp new file mode 100644 index 00000000000..50e0acd05f5 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp @@ -0,0 +1,1096 @@ +/* + * Copyright © 2013-2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_fs_surface_builder.h" +#include "brw_fs.h" + +using namespace brw; + +namespace brw { + namespace surface_access { + namespace { + /** + * Generate a logical send opcode for a surface message and return + * the result. + */ + fs_reg + emit_send(const fs_builder &bld, enum opcode opcode, + const fs_reg &addr, const fs_reg &src, const fs_reg &surface, + unsigned dims, unsigned arg, unsigned rsize, + brw_predicate pred = BRW_PREDICATE_NONE) + { + /* Reduce the dynamically uniform surface index to a single + * scalar. + */ + const fs_reg usurface = bld.emit_uniformize(surface); + const fs_reg srcs[] = { + addr, src, usurface, fs_reg(dims), fs_reg(arg) + }; + const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, rsize); + fs_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs)); + + inst->regs_written = rsize * bld.dispatch_width() / 8; + inst->predicate = pred; + return dst; + } + } + + /** + * Emit an untyped surface read opcode. \p dims determines the number + * of components of the address and \p size the number of components of + * the returned value. + */ + fs_reg + emit_untyped_read(const fs_builder &bld, + const fs_reg &surface, const fs_reg &addr, + unsigned dims, unsigned size, + brw_predicate pred) + { + return emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL, + addr, fs_reg(), surface, dims, size, size, pred); + } + + /** + * Emit an untyped surface write opcode. \p dims determines the number + * of components of the address and \p size the number of components of + * the argument. + */ + void + emit_untyped_write(const fs_builder &bld, const fs_reg &surface, + const fs_reg &addr, const fs_reg &src, + unsigned dims, unsigned size, + brw_predicate pred) + { + emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL, + addr, src, surface, dims, size, 0, pred); + } + + /** + * Emit an untyped surface atomic opcode. \p dims determines the number + * of components of the address and \p rsize the number of components of + * the returned value (either zero or one). + */ + fs_reg + emit_untyped_atomic(const fs_builder &bld, + const fs_reg &surface, const fs_reg &addr, + const fs_reg &src0, const fs_reg &src1, + unsigned dims, unsigned rsize, unsigned op, + brw_predicate pred) + { + /* FINISHME: Factor out this frequently recurring pattern into a + * helper function. + */ + const unsigned n = (src0.file != BAD_FILE) + (src1.file != BAD_FILE); + const fs_reg srcs[] = { src0, src1 }; + const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, n); + bld.LOAD_PAYLOAD(tmp, srcs, n, 0); + + return emit_send(bld, SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL, + addr, tmp, surface, dims, op, rsize, pred); + } + + /** + * Emit a typed surface read opcode. \p dims determines the number of + * components of the address and \p size the number of components of the + * returned value. + */ + fs_reg + emit_typed_read(const fs_builder &bld, const fs_reg &surface, + const fs_reg &addr, unsigned dims, unsigned size) + { + return emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL, + addr, fs_reg(), surface, dims, size, size); + } + + /** + * Emit a typed surface write opcode. \p dims determines the number of + * components of the address and \p size the number of components of the + * argument. + */ + void + emit_typed_write(const fs_builder &bld, const fs_reg &surface, + const fs_reg &addr, const fs_reg &src, + unsigned dims, unsigned size) + { + emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL, + addr, src, surface, dims, size, 0); + } + + /** + * Emit a typed surface atomic opcode. \p dims determines the number of + * components of the address and \p rsize the number of components of + * the returned value (either zero or one). + */ + fs_reg + emit_typed_atomic(const fs_builder &bld, const fs_reg &surface, + const fs_reg &addr, + const fs_reg &src0, const fs_reg &src1, + unsigned dims, unsigned rsize, unsigned op, + brw_predicate pred) + { + /* FINISHME: Factor out this frequently recurring pattern into a + * helper function. + */ + const unsigned n = (src0.file != BAD_FILE) + (src1.file != BAD_FILE); + const fs_reg srcs[] = { src0, src1 }; + const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, n); + bld.LOAD_PAYLOAD(tmp, srcs, n, 0); + + return emit_send(bld, SHADER_OPCODE_TYPED_ATOMIC_LOGICAL, + addr, tmp, surface, dims, op, rsize); + } + } +} + +namespace { + namespace image_format_info { + /** + * Simple 4-tuple of scalars used to pass around per-color component + * values. + */ + struct color_u { + color_u(unsigned x = 0) : r(x), g(x), b(x), a(x) + { + } + + color_u(unsigned r, unsigned g, unsigned b, unsigned a) : + r(r), g(g), b(b), a(a) + { + } + + unsigned + operator[](unsigned i) const + { + const unsigned xs[] = { r, g, b, a }; + return xs[i]; + } + + unsigned r, g, b, a; + }; + + /** + * Return the per-channel bitfield widths for a given image format. + */ + inline color_u + get_bit_widths(mesa_format format) + { + return color_u(_mesa_get_format_bits(format, GL_RED_BITS), + _mesa_get_format_bits(format, GL_GREEN_BITS), + _mesa_get_format_bits(format, GL_BLUE_BITS), + _mesa_get_format_bits(format, GL_ALPHA_BITS)); + } + + /** + * Return the per-channel bitfield shifts for a given image format. + */ + inline color_u + get_bit_shifts(mesa_format format) + { + const color_u widths = get_bit_widths(format); + return color_u(0, widths.r, widths.r + widths.g, + widths.r + widths.g + widths.b); + } + + /** + * Return true if all present components have the same bit width. + */ + inline bool + is_homogeneous(mesa_format format) + { + const color_u widths = get_bit_widths(format); + return ((widths.g == 0 || widths.g == widths.r) && + (widths.b == 0 || widths.b == widths.r) && + (widths.a == 0 || widths.a == widths.r)); + } + + /** + * Return true if the format conversion boils down to a trivial copy. + */ + inline bool + is_conversion_trivial(const brw_device_info *devinfo, mesa_format format) + { + return (get_bit_widths(format).r == 32 && is_homogeneous(format)) || + format == brw_lower_mesa_image_format(devinfo, format); + } + + /** + * Return true if the hardware natively supports some format with + * compatible bitfield layout, but possibly different data types. + */ + inline bool + has_supported_bit_layout(const brw_device_info *devinfo, + mesa_format format) + { + const color_u widths = get_bit_widths(format); + const color_u lower_widths = get_bit_widths( + brw_lower_mesa_image_format(devinfo, format)); + + return (widths.r == lower_widths.r && + widths.g == lower_widths.g && + widths.b == lower_widths.b && + widths.a == lower_widths.a); + } + + /** + * Return true if we are required to spread individual components over + * several components of the format used by the hardware (RG32 and + * friends implemented as RGBA16UI). + */ + inline bool + has_split_bit_layout(const brw_device_info *devinfo, mesa_format format) + { + const mesa_format lower_format = + brw_lower_mesa_image_format(devinfo, format); + + return (_mesa_format_num_components(format) < + _mesa_format_num_components(lower_format)); + } + + /** + * Return true unless we have to fall back to untyped surface access. + * Fail! + */ + inline bool + has_matching_typed_format(const brw_device_info *devinfo, + mesa_format format) + { + return (_mesa_get_format_bytes(format) <= 4 || + (_mesa_get_format_bytes(format) <= 8 && + (devinfo->gen >= 8 || devinfo->is_haswell)) || + devinfo->gen >= 9); + } + + /** + * Return true if the hardware returns garbage in the unused high bits + * of each component. This may happen on IVB because we rely on the + * undocumented behavior that typed reads from surfaces of the + * unsupported R8 and R16 formats return useful data in their least + * significant bits. + */ + inline bool + has_undefined_high_bits(const brw_device_info *devinfo, + mesa_format format) + { + const mesa_format lower_format = + brw_lower_mesa_image_format(devinfo, format); + + return (devinfo->gen == 7 && !devinfo->is_haswell && + (lower_format == MESA_FORMAT_R_UINT16 || + lower_format == MESA_FORMAT_R_UINT8)); + } + + /** + * Return true if the format represents values as signed integers + * requiring sign extension when unpacking. + */ + inline bool + needs_sign_extension(mesa_format format) + { + return (_mesa_get_format_datatype(format) == GL_SIGNED_NORMALIZED || + _mesa_get_format_datatype(format) == GL_INT); + } + } + + namespace image_validity { + /** + * Check whether there is an image bound at the given index and write + * the comparison result to f0.0. Returns an appropriate predication + * mode to use on subsequent image operations. + */ + brw_predicate + emit_surface_check(const fs_builder &bld, const fs_reg &image) + { + const brw_device_info *devinfo = bld.shader->devinfo; + const fs_reg size = offset(image, bld, BRW_IMAGE_PARAM_SIZE_OFFSET); + + if (devinfo->gen == 7 && !devinfo->is_haswell) { + /* Check the first component of the size field to find out if the + * image is bound. Necessary on IVB for typed atomics because + * they don't seem to respect null surfaces and will happily + * corrupt or read random memory when no image is bound. + */ + bld.CMP(bld.null_reg_ud(), + retype(size, BRW_REGISTER_TYPE_UD), + fs_reg(0), BRW_CONDITIONAL_NZ); + + return BRW_PREDICATE_NORMAL; + } else { + /* More recent platforms implement compliant behavior when a null + * surface is bound. + */ + return BRW_PREDICATE_NONE; + } + } + + /** + * Check whether the provided coordinates are within the image bounds + * and write the comparison result to f0.0. Returns an appropriate + * predication mode to use on subsequent image operations. + */ + brw_predicate + emit_bounds_check(const fs_builder &bld, const fs_reg &image, + const fs_reg &addr, unsigned dims) + { + const fs_reg size = offset(image, bld, BRW_IMAGE_PARAM_SIZE_OFFSET); + + for (unsigned c = 0; c < dims; ++c) + set_predicate(c == 0 ? BRW_PREDICATE_NONE : BRW_PREDICATE_NORMAL, + bld.CMP(bld.null_reg_ud(), + offset(retype(addr, BRW_REGISTER_TYPE_UD), bld, c), + offset(size, bld, c), + BRW_CONDITIONAL_L)); + + return BRW_PREDICATE_NORMAL; + } + } + + namespace image_coordinates { + /** + * Return the total number of coordinates needed to address a texel of + * the surface, which may be more than the sum of \p surf_dims and \p + * arr_dims if padding is required. + */ + unsigned + num_image_coordinates(const fs_builder &bld, + unsigned surf_dims, unsigned arr_dims, + mesa_format format) + { + /* HSW in vec4 mode and our software coordinate handling for untyped + * reads want the array index to be at the Z component. + */ + const bool array_index_at_z = + !image_format_info::has_matching_typed_format( + bld.shader->devinfo, format); + const unsigned zero_dims = + ((surf_dims == 1 && arr_dims == 1 && array_index_at_z) ? 1 : 0); + + return surf_dims + zero_dims + arr_dims; + } + + /** + * Transform image coordinates into the form expected by the + * implementation. + */ + fs_reg + emit_image_coordinates(const fs_builder &bld, const fs_reg &addr, + unsigned surf_dims, unsigned arr_dims, + mesa_format format) + { + const unsigned dims = + num_image_coordinates(bld, surf_dims, arr_dims, format); + + if (dims > surf_dims + arr_dims) { + assert(surf_dims == 1 && arr_dims == 1 && dims == 3); + /* The array index is required to be passed in as the Z component, + * insert a zero at the Y component to shift it to the right + * position. + * + * FINISHME: Factor out this frequently recurring pattern into a + * helper function. + */ + const fs_reg srcs[] = { addr, fs_reg(0), offset(addr, bld, 1) }; + const fs_reg dst = bld.vgrf(addr.type, dims); + bld.LOAD_PAYLOAD(dst, srcs, dims, 0); + return dst; + } else { + return addr; + } + } + + /** + * Calculate the offset in memory of the texel given by \p coord. + * + * This is meant to be used with untyped surface messages to access a + * tiled surface, what involves taking into account the tiling and + * swizzling modes of the surface manually so it will hopefully not + * happen very often. + * + * The tiling algorithm implemented here matches either the X or Y + * tiling layouts supported by the hardware depending on the tiling + * coefficients passed to the program as uniforms. See Volume 1 Part 2 + * Section 4.5 "Address Tiling Function" of the IVB PRM for an in-depth + * explanation of the hardware tiling format. + */ + fs_reg + emit_address_calculation(const fs_builder &bld, const fs_reg &image, + const fs_reg &coord, unsigned dims) + { + const brw_device_info *devinfo = bld.shader->devinfo; + const fs_reg off = offset(image, bld, BRW_IMAGE_PARAM_OFFSET_OFFSET); + const fs_reg stride = offset(image, bld, BRW_IMAGE_PARAM_STRIDE_OFFSET); + const fs_reg tile = offset(image, bld, BRW_IMAGE_PARAM_TILING_OFFSET); + const fs_reg swz = offset(image, bld, BRW_IMAGE_PARAM_SWIZZLING_OFFSET); + const fs_reg addr = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); + const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); + const fs_reg minor = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); + const fs_reg major = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); + const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD); + + /* Shift the coordinates by the fixed surface offset. It may be + * non-zero if the image is a single slice of a higher-dimensional + * surface, or if a non-zero mipmap level of the surface is bound to + * the pipeline. The offset needs to be applied here rather than at + * surface state set-up time because the desired slice-level may + * start mid-tile, so simply shifting the surface base address + * wouldn't give a well-formed tiled surface in the general case. + */ + for (unsigned c = 0; c < 2; ++c) + bld.ADD(offset(addr, bld, c), offset(off, bld, c), + (c < dims ? + offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, c) : + fs_reg(0))); + + /* The layout of 3-D textures in memory is sort-of like a tiling + * format. At each miplevel, the slices are arranged in rows of + * 2^level slices per row. The slice row is stored in tmp.y and + * the slice within the row is stored in tmp.x. + * + * The layout of 2-D array textures and cubemaps is much simpler: + * Depending on whether the ARYSPC_LOD0 layout is in use it will be + * stored in memory as an array of slices, each one being a 2-D + * arrangement of miplevels, or as a 2D arrangement of miplevels, + * each one being an array of slices. In either case the separation + * between slices of the same LOD is equal to the qpitch value + * provided as stride.w. + * + * This code can be made to handle either 2D arrays and 3D textures + * by passing in the miplevel as tile.z for 3-D textures and 0 in + * tile.z for 2-D array textures. + * + * See Volume 1 Part 1 of the Gen7 PRM, sections 6.18.4.7 "Surface + * Arrays" and 6.18.6 "3D Surfaces" for a more extensive discussion + * of the hardware 3D texture and 2D array layouts. + */ + if (dims > 2) { + /* Decompose z into a major (tmp.y) and a minor (tmp.x) + * index. + */ + bld.BFE(offset(tmp, bld, 0), offset(tile, bld, 2), fs_reg(0), + offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, 2)); + bld.SHR(offset(tmp, bld, 1), + offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, 2), + offset(tile, bld, 2)); + + /* Take into account the horizontal (tmp.x) and vertical (tmp.y) + * slice offset. + */ + for (unsigned c = 0; c < 2; ++c) { + bld.MUL(offset(tmp, bld, c), + offset(stride, bld, 2 + c), offset(tmp, bld, c)); + bld.ADD(offset(addr, bld, c), + offset(addr, bld, c), offset(tmp, bld, c)); + } + } + + if (dims > 1) { + /* Calculate the major/minor x and y indices. In order to + * accommodate both X and Y tiling, the Y-major tiling format is + * treated as being a bunch of narrow X-tiles placed next to each + * other. This means that the tile width for Y-tiling is actually + * the width of one sub-column of the Y-major tile where each 4K + * tile has 8 512B sub-columns. + * + * The major Y value is the row of tiles in which the pixel lives. + * The major X value is the tile sub-column in which the pixel + * lives; for X tiling, this is the same as the tile column, for Y + * tiling, each tile has 8 sub-columns. The minor X and Y indices + * are the position within the sub-column. + */ + for (unsigned c = 0; c < 2; ++c) { + /* Calculate the minor x and y indices. */ + bld.BFE(offset(minor, bld, c), offset(tile, bld, c), + fs_reg(0), offset(addr, bld, c)); + + /* Calculate the major x and y indices. */ + bld.SHR(offset(major, bld, c), + offset(addr, bld, c), offset(tile, bld, c)); + } + + /* Calculate the texel index from the start of the tile row and + * the vertical coordinate of the row. + * Equivalent to: + * tmp.x = (major.x << tile.y << tile.x) + + * (minor.y << tile.x) + minor.x + * tmp.y = major.y << tile.y + */ + bld.SHL(tmp, major, offset(tile, bld, 1)); + bld.ADD(tmp, tmp, offset(minor, bld, 1)); + bld.SHL(tmp, tmp, offset(tile, bld, 0)); + bld.ADD(tmp, tmp, minor); + bld.SHL(offset(tmp, bld, 1), + offset(major, bld, 1), offset(tile, bld, 1)); + + /* Add it to the start of the tile row. */ + bld.MUL(offset(tmp, bld, 1), + offset(tmp, bld, 1), offset(stride, bld, 1)); + bld.ADD(tmp, tmp, offset(tmp, bld, 1)); + + /* Multiply by the Bpp value. */ + bld.MUL(dst, tmp, stride); + + if (devinfo->gen < 8 && !devinfo->is_baytrail) { + /* Take into account the two dynamically specified shifts. + * Both need are used to implement swizzling of X-tiled + * surfaces. For Y-tiled surfaces only one bit needs to be + * XOR-ed with bit 6 of the memory address, so a swz value of + * 0xff (actually interpreted as 31 by the hardware) will be + * provided to cause the relevant bit of tmp.y to be zero and + * turn the first XOR into the identity. For linear surfaces + * or platforms lacking address swizzling both shifts will be + * 0xff causing the relevant bits of both tmp.x and .y to be + * zero, what effectively disables swizzling. + */ + for (unsigned c = 0; c < 2; ++c) + bld.SHR(offset(tmp, bld, c), dst, offset(swz, bld, c)); + + /* XOR tmp.x and tmp.y with bit 6 of the memory address. */ + bld.XOR(tmp, tmp, offset(tmp, bld, 1)); + bld.AND(tmp, tmp, fs_reg(1 << 6)); + bld.XOR(dst, dst, tmp); + } + + } else { + /* Multiply by the Bpp/stride value. Note that the addr.y may be + * non-zero even if the image is one-dimensional because a + * vertical offset may have been applied above to select a + * non-zero slice or level of a higher-dimensional texture. + */ + bld.MUL(offset(addr, bld, 1), + offset(addr, bld, 1), offset(stride, bld, 1)); + bld.ADD(addr, addr, offset(addr, bld, 1)); + bld.MUL(dst, addr, stride); + } + + return dst; + } + } + + namespace image_format_conversion { + using image_format_info::color_u; + + namespace { + /** + * Maximum representable value in an unsigned integer with the given + * number of bits. + */ + inline unsigned + scale(unsigned n) + { + return (1 << n) - 1; + } + } + + /** + * Pack the vector \p src in a bitfield given the per-component bit + * shifts and widths. Note that bitfield components are not allowed to + * cross 32-bit boundaries. + */ + fs_reg + emit_pack(const fs_builder &bld, const fs_reg &src, + const color_u &shifts, const color_u &widths) + { + const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4); + bool seen[4] = {}; + + for (unsigned c = 0; c < 4; ++c) { + if (widths[c]) { + const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD); + + /* Shift each component left to the correct bitfield position. */ + bld.SHL(tmp, offset(src, bld, c), fs_reg(shifts[c] % 32)); + + /* Add everything up. */ + if (seen[shifts[c] / 32]) { + bld.OR(offset(dst, bld, shifts[c] / 32), + offset(dst, bld, shifts[c] / 32), tmp); + } else { + bld.MOV(offset(dst, bld, shifts[c] / 32), tmp); + seen[shifts[c] / 32] = true; + } + } + } + + return dst; + } + + /** + * Unpack a vector from the bitfield \p src given the per-component bit + * shifts and widths. Note that bitfield components are not allowed to + * cross 32-bit boundaries. + */ + fs_reg + emit_unpack(const fs_builder &bld, const fs_reg &src, + const color_u &shifts, const color_u &widths) + { + const fs_reg dst = bld.vgrf(src.type, 4); + + for (unsigned c = 0; c < 4; ++c) { + if (widths[c]) { + /* Shift left to discard the most significant bits. */ + bld.SHL(offset(dst, bld, c), + offset(src, bld, shifts[c] / 32), + fs_reg(32 - shifts[c] % 32 - widths[c])); + + /* Shift back to the least significant bits using an arithmetic + * shift to get sign extension on signed types. + */ + bld.ASR(offset(dst, bld, c), + offset(dst, bld, c), fs_reg(32 - widths[c])); + } + } + + return dst; + } + + /** + * Convert an integer vector into another integer vector of the + * specified bit widths, properly handling overflow. + */ + fs_reg + emit_convert_to_integer(const fs_builder &bld, const fs_reg &src, + const color_u &widths, bool is_signed) + { + const unsigned s = (is_signed ? 1 : 0); + const fs_reg dst = bld.vgrf( + is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD, 4); + assert(src.type == dst.type); + + for (unsigned c = 0; c < 4; ++c) { + if (widths[c]) { + /* Clamp to the maximum value. */ + bld.emit_minmax(offset(dst, bld, c), offset(src, bld, c), + fs_reg((int)scale(widths[c] - s)), + BRW_CONDITIONAL_L); + + /* Clamp to the minimum value. */ + if (is_signed) + bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c), + fs_reg(-(int)scale(widths[c] - s) - 1), + BRW_CONDITIONAL_G); + } + } + + return dst; + } + + /** + * Convert a normalized fixed-point vector of the specified signedness + * and bit widths into a floating point vector. + */ + fs_reg + emit_convert_from_scaled(const fs_builder &bld, const fs_reg &src, + const color_u &widths, bool is_signed) + { + const unsigned s = (is_signed ? 1 : 0); + const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_F, 4); + + for (unsigned c = 0; c < 4; ++c) { + if (widths[c]) { + /* Convert to float. */ + bld.MOV(offset(dst, bld, c), offset(src, bld, c)); + + /* Divide by the normalization constants. */ + bld.MUL(offset(dst, bld, c), offset(dst, bld, c), + fs_reg(1.0f / scale(widths[c] - s))); + + /* Clamp to the minimum value. */ + if (is_signed) + bld.emit_minmax(offset(dst, bld, c), + offset(dst, bld, c), fs_reg(-1.0f), + BRW_CONDITIONAL_G); + } + } + return dst; + } + + /** + * Convert a floating-point vector into a normalized fixed-point vector + * of the specified signedness and bit widths. + */ + fs_reg + emit_convert_to_scaled(const fs_builder &bld, const fs_reg &src, + const color_u &widths, bool is_signed) + { + const unsigned s = (is_signed ? 1 : 0); + const fs_reg dst = bld.vgrf( + is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD, 4); + const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F); + + for (unsigned c = 0; c < 4; ++c) { + if (widths[c]) { + /* Clamp the normalized floating-point argument. */ + if (is_signed) { + bld.emit_minmax(offset(fdst, bld, c), offset(src, bld, c), + fs_reg(-1.0f), BRW_CONDITIONAL_G); + + bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c), + fs_reg(1.0f), BRW_CONDITIONAL_L); + } else { + set_saturate(true, bld.MOV(offset(fdst, bld, c), + offset(src, bld, c))); + } + + /* Multiply by the normalization constants. */ + bld.MUL(offset(fdst, bld, c), offset(fdst, bld, c), + fs_reg((float)scale(widths[c] - s))); + + /* Convert to integer. */ + bld.RNDE(offset(fdst, bld, c), offset(fdst, bld, c)); + bld.MOV(offset(dst, bld, c), offset(fdst, bld, c)); + } + } + + return dst; + } + + /** + * Convert a floating point vector of the specified bit widths into a + * 32-bit floating point vector. + */ + fs_reg + emit_convert_from_float(const fs_builder &bld, const fs_reg &src, + const color_u &widths) + { + const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4); + const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F); + + for (unsigned c = 0; c < 4; ++c) { + if (widths[c]) { + bld.MOV(offset(dst, bld, c), offset(src, bld, c)); + + /* Extend 10-bit and 11-bit floating point numbers to 15 bits. + * This works because they have a 5-bit exponent just like the + * 16-bit floating point format, and they have no sign bit. + */ + if (widths[c] < 16) + bld.SHL(offset(dst, bld, c), + offset(dst, bld, c), fs_reg(15 - widths[c])); + + /* Convert to 32-bit floating point. */ + bld.F16TO32(offset(fdst, bld, c), offset(dst, bld, c)); + } + } + + return fdst; + } + + /** + * Convert a vector into a floating point vector of the specified bit + * widths. + */ + fs_reg + emit_convert_to_float(const fs_builder &bld, const fs_reg &src, + const color_u &widths) + { + const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4); + const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F); + + for (unsigned c = 0; c < 4; ++c) { + if (widths[c]) { + bld.MOV(offset(fdst, bld, c), offset(src, bld, c)); + + /* Clamp to the minimum value. */ + if (widths[c] < 16) + bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c), + fs_reg(0.0f), BRW_CONDITIONAL_G); + + /* Convert to 16-bit floating-point. */ + bld.F32TO16(offset(dst, bld, c), offset(fdst, bld, c)); + + /* Discard the least significant bits to get floating point + * numbers of the requested width. This works because the + * 10-bit and 11-bit floating point formats have a 5-bit + * exponent just like the 16-bit format, and they have no sign + * bit. + */ + if (widths[c] < 16) + bld.SHR(offset(dst, bld, c), offset(dst, bld, c), + fs_reg(15 - widths[c])); + } + } + + return dst; + } + + /** + * Fill missing components of a vector with 0, 0, 0, 1. + */ + fs_reg + emit_pad(const fs_builder &bld, const fs_reg &src, + const color_u &widths) + { + const fs_reg dst = bld.vgrf(src.type, 4); + const unsigned pad[] = { 0, 0, 0, 1 }; + + for (unsigned c = 0; c < 4; ++c) + bld.MOV(offset(dst, bld, c), + widths[c] ? offset(src, bld, c) : fs_reg(pad[c])); + + return dst; + } + } +} + +namespace brw { + namespace image_access { + /** + * Load a vector from a surface of the given format and dimensionality + * at the given coordinates. \p surf_dims and \p arr_dims give the + * number of non-array and array coordinates of the image respectively. + */ + fs_reg + emit_image_load(const fs_builder &bld, + const fs_reg &image, const fs_reg &addr, + unsigned surf_dims, unsigned arr_dims, + mesa_format format) + { + using namespace image_format_info; + using namespace image_format_conversion; + using namespace image_validity; + using namespace image_coordinates; + using namespace surface_access; + const brw_device_info *devinfo = bld.shader->devinfo; + const mesa_format lower_format = + brw_lower_mesa_image_format(devinfo, format); + fs_reg tmp; + + /* Transform the image coordinates into actual surface coordinates. */ + const fs_reg saddr = + emit_image_coordinates(bld, addr, surf_dims, arr_dims, format); + const unsigned dims = + num_image_coordinates(bld, surf_dims, arr_dims, format); + + if (has_matching_typed_format(devinfo, format)) { + /* Hopefully we get here most of the time... */ + tmp = emit_typed_read(bld, image, saddr, dims, + _mesa_format_num_components(lower_format)); + } else { + /* Untyped surface reads return 32 bits of the surface per + * component, without any sort of unpacking or type conversion, + */ + const unsigned size = _mesa_get_format_bytes(format) / 4; + + /* they don't properly handle out of bounds access, so we have to + * check manually if the coordinates are valid and predicate the + * surface read on the result, + */ + const brw_predicate pred = + emit_bounds_check(bld, image, saddr, dims); + + /* and they don't know about surface coordinates, we need to + * convert them to a raw memory offset. + */ + const fs_reg laddr = emit_address_calculation(bld, image, saddr, dims); + + tmp = emit_untyped_read(bld, image, laddr, 1, size, pred); + + /* An out of bounds surface access should give zero as result. */ + for (unsigned c = 0; c < 4; ++c) + set_predicate(pred, bld.SEL(offset(tmp, bld, c), + offset(tmp, bld, c), fs_reg(0))); + } + + /* Set the register type to D instead of UD if the data type is + * represented as a signed integer in memory so that sign extension + * is handled correctly by unpack. + */ + if (needs_sign_extension(format)) + tmp = retype(tmp, BRW_REGISTER_TYPE_D); + + if (!has_supported_bit_layout(devinfo, format)) { + /* Unpack individual vector components from the bitfield if the + * hardware is unable to do it for us. + */ + if (has_split_bit_layout(devinfo, format)) + tmp = emit_pack(bld, tmp, get_bit_shifts(lower_format), + get_bit_widths(lower_format)); + else + tmp = emit_unpack(bld, tmp, get_bit_shifts(format), + get_bit_widths(format)); + + } else if ((needs_sign_extension(format) && + !is_conversion_trivial(devinfo, format)) || + has_undefined_high_bits(devinfo, format)) { + /* Perform a trivial unpack even though the bit layout matches in + * order to get the most significant bits of each component + * initialized properly. + */ + tmp = emit_unpack(bld, tmp, color_u(0, 32, 64, 96), + get_bit_widths(format)); + } + + if (!_mesa_is_format_integer(format)) { + if (is_conversion_trivial(devinfo, format)) { + /* Just need to cast the vector to the target type. */ + tmp = retype(tmp, BRW_REGISTER_TYPE_F); + } else { + /* Do the right sort of type conversion to float. */ + if (_mesa_get_format_datatype(format) == GL_FLOAT) + tmp = emit_convert_from_float( + bld, tmp, get_bit_widths(format)); + else + tmp = emit_convert_from_scaled( + bld, tmp, get_bit_widths(format), + _mesa_is_format_signed(format)); + } + } + + /* Initialize missing components of the result. */ + return emit_pad(bld, tmp, get_bit_widths(format)); + } + + /** + * Store a vector in a surface of the given format and dimensionality at + * the given coordinates. \p surf_dims and \p arr_dims give the number + * of non-array and array coordinates of the image respectively. + */ + void + emit_image_store(const fs_builder &bld, const fs_reg &image, + const fs_reg &addr, const fs_reg &src, + unsigned surf_dims, unsigned arr_dims, + mesa_format format) + { + using namespace image_format_info; + using namespace image_format_conversion; + using namespace image_validity; + using namespace image_coordinates; + using namespace surface_access; + const brw_device_info *devinfo = bld.shader->devinfo; + + /* Transform the image coordinates into actual surface coordinates. */ + const fs_reg saddr = + emit_image_coordinates(bld, addr, surf_dims, arr_dims, format); + const unsigned dims = + num_image_coordinates(bld, surf_dims, arr_dims, format); + + if (format == MESA_FORMAT_NONE) { + /* We don't know what the format is, but that's fine because it + * implies write-only access, and typed surface writes are always + * able to take care of type conversion and packing for us. + */ + emit_typed_write(bld, image, saddr, src, dims, 4); + + } else { + const mesa_format lower_format = + brw_lower_mesa_image_format(devinfo, format); + fs_reg tmp = src; + + if (!is_conversion_trivial(devinfo, format)) { + /* Do the right sort of type conversion. */ + if (_mesa_get_format_datatype(format) == GL_FLOAT) + tmp = emit_convert_to_float(bld, tmp, get_bit_widths(format)); + + else if (_mesa_is_format_integer(format)) + tmp = emit_convert_to_integer(bld, tmp, get_bit_widths(format), + _mesa_is_format_signed(format)); + + else + tmp = emit_convert_to_scaled(bld, tmp, get_bit_widths(format), + _mesa_is_format_signed(format)); + } + + /* We're down to bit manipulation at this point. */ + tmp = retype(tmp, BRW_REGISTER_TYPE_UD); + + if (!has_supported_bit_layout(devinfo, format)) { + /* Pack the vector components into a bitfield if the hardware + * is unable to do it for us. + */ + if (has_split_bit_layout(devinfo, format)) + tmp = emit_unpack(bld, tmp, get_bit_shifts(lower_format), + get_bit_widths(lower_format)); + + else + tmp = emit_pack(bld, tmp, get_bit_shifts(format), + get_bit_widths(format)); + } + + if (has_matching_typed_format(devinfo, format)) { + /* Hopefully we get here most of the time... */ + emit_typed_write(bld, image, saddr, tmp, dims, + _mesa_format_num_components(lower_format)); + + } else { + /* Untyped surface writes store 32 bits of the surface per + * component, without any sort of packing or type conversion, + */ + const unsigned size = _mesa_get_format_bytes(format) / 4; + + /* they don't properly handle out of bounds access, so we have + * to check manually if the coordinates are valid and predicate + * the surface write on the result, + */ + const brw_predicate pred = + emit_bounds_check(bld, image, saddr, dims); + + /* and, phew, they don't know about surface coordinates, we + * need to convert them to a raw memory offset. + */ + const fs_reg laddr = emit_address_calculation( + bld, image, saddr, dims); + + emit_untyped_write(bld, image, laddr, tmp, 1, size, pred); + } + } + } + + /** + * Perform an atomic read-modify-write operation in a surface of the + * given dimensionality at the given coordinates. \p surf_dims and \p + * arr_dims give the number of non-array and array coordinates of the + * image respectively. Main building block of the imageAtomic GLSL + * built-ins. + */ + fs_reg + emit_image_atomic(const fs_builder &bld, + const fs_reg &image, const fs_reg &addr, + const fs_reg &src0, const fs_reg &src1, + unsigned surf_dims, unsigned arr_dims, + unsigned rsize, unsigned op) + { + using namespace image_validity; + using namespace image_coordinates; + using namespace surface_access; + /* Avoid performing an atomic operation on an unbound surface. */ + const brw_predicate pred = emit_surface_check(bld, image); + + /* Transform the image coordinates into actual surface coordinates. */ + const fs_reg saddr = + emit_image_coordinates(bld, addr, surf_dims, arr_dims, + MESA_FORMAT_R_UINT32); + const unsigned dims = + num_image_coordinates(bld, surf_dims, arr_dims, + MESA_FORMAT_R_UINT32); + + /* Thankfully we can do without untyped atomics here. */ + const fs_reg tmp = emit_typed_atomic(bld, image, saddr, src0, src1, + dims, rsize, op, pred); + + /* An unbound surface access should give zero as result. */ + if (rsize) + set_predicate(pred, bld.SEL(tmp, tmp, fs_reg(0))); + + return tmp; + } + } +} diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.h b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.h new file mode 100644 index 00000000000..a3dd839955b --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.h @@ -0,0 +1,89 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2013-2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef BRW_FS_SURFACE_BUILDER_H +#define BRW_FS_SURFACE_BUILDER_H + +#include "brw_fs_builder.h" +#include "brw_context.h" + +namespace brw { + namespace surface_access { + fs_reg + emit_untyped_read(const fs_builder &bld, + const fs_reg &surface, const fs_reg &addr, + unsigned dims, unsigned size, + brw_predicate pred = BRW_PREDICATE_NONE); + + void + emit_untyped_write(const fs_builder &bld, const fs_reg &surface, + const fs_reg &addr, const fs_reg &src, + unsigned dims, unsigned size, + brw_predicate pred = BRW_PREDICATE_NONE); + + fs_reg + emit_untyped_atomic(const fs_builder &bld, + const fs_reg &surface, const fs_reg &addr, + const fs_reg &src0, const fs_reg &src1, + unsigned dims, unsigned rsize, unsigned op, + brw_predicate pred = BRW_PREDICATE_NONE); + + fs_reg + emit_typed_read(const fs_builder &bld, const fs_reg &surface, + const fs_reg &addr, unsigned dims, unsigned size); + + void + emit_typed_write(const fs_builder &bld, const fs_reg &surface, + const fs_reg &addr, const fs_reg &src, + unsigned dims, unsigned size); + + fs_reg + emit_typed_atomic(const fs_builder &bld, const fs_reg &surface, + const fs_reg &addr, + const fs_reg &src0, const fs_reg &src1, + unsigned dims, unsigned rsize, unsigned op, + brw_predicate pred = BRW_PREDICATE_NONE); + } + + namespace image_access { + fs_reg + emit_image_load(const fs_builder &bld, + const fs_reg &image, const fs_reg &addr, + unsigned surf_dims, unsigned arr_dims, + mesa_format format); + + void + emit_image_store(const fs_builder &bld, const fs_reg &image, + const fs_reg &addr, const fs_reg &src, + unsigned surf_dims, unsigned arr_dims, + mesa_format format); + fs_reg + emit_image_atomic(const fs_builder &bld, + const fs_reg &image, const fs_reg &addr, + const fs_reg &src0, const fs_reg &src1, + unsigned surf_dims, unsigned arr_dims, + unsigned rsize, unsigned op); + } +} +#endif diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp index 01d3a569858..96d4f375da2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp @@ -173,7 +173,7 @@ ir_vector_reference_visitor::visit_enter(ir_assignment *ir) return visit_continue_with_parent; } if (ir->lhs->as_dereference_variable() && - is_power_of_two(ir->write_mask) && + _mesa_is_pow_two(ir->write_mask) && !ir->condition) { /* If we're writing just a channel, then channel-splitting the LHS is OK. */ diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 9a4bad6bcf5..111db8c4323 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -77,612 +77,6 @@ fs_visitor::emit_vs_system_value(int location) return reg; } -fs_inst * -fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, - fs_reg coordinate, int coord_components, - fs_reg shadow_c, - fs_reg lod, fs_reg dPdy, int grad_components, - uint32_t sampler) -{ - int mlen; - int base_mrf = 1; - bool simd16 = false; - fs_reg orig_dst; - - /* g0 header. */ - mlen = 1; - - if (shadow_c.file != BAD_FILE) { - for (int i = 0; i < coord_components; i++) { - bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate); - coordinate = offset(coordinate, 1); - } - - /* gen4's SIMD8 sampler always has the slots for u,v,r present. - * the unused slots must be zeroed. - */ - for (int i = coord_components; i < 3; i++) { - bld.MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f)); - } - mlen += 3; - - if (op == ir_tex) { - /* There's no plain shadow compare message, so we use shadow - * compare with a bias of 0.0. - */ - bld.MOV(fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f)); - mlen++; - } else if (op == ir_txb || op == ir_txl) { - bld.MOV(fs_reg(MRF, base_mrf + mlen), lod); - mlen++; - } else { - unreachable("Should not get here."); - } - - bld.MOV(fs_reg(MRF, base_mrf + mlen), shadow_c); - mlen++; - } else if (op == ir_tex) { - for (int i = 0; i < coord_components; i++) { - bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate); - coordinate = offset(coordinate, 1); - } - /* zero the others. */ - for (int i = coord_components; i<3; i++) { - bld.MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f)); - } - /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ - mlen += 3; - } else if (op == ir_txd) { - fs_reg &dPdx = lod; - - for (int i = 0; i < coord_components; i++) { - bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate); - coordinate = offset(coordinate, 1); - } - /* the slots for u and v are always present, but r is optional */ - mlen += MAX2(coord_components, 2); - - /* P = u, v, r - * dPdx = dudx, dvdx, drdx - * dPdy = dudy, dvdy, drdy - * - * 1-arg: Does not exist. - * - * 2-arg: dudx dvdx dudy dvdy - * dPdx.x dPdx.y dPdy.x dPdy.y - * m4 m5 m6 m7 - * - * 3-arg: dudx dvdx drdx dudy dvdy drdy - * dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z - * m5 m6 m7 m8 m9 m10 - */ - for (int i = 0; i < grad_components; i++) { - bld.MOV(fs_reg(MRF, base_mrf + mlen), dPdx); - dPdx = offset(dPdx, 1); - } - mlen += MAX2(grad_components, 2); - - for (int i = 0; i < grad_components; i++) { - bld.MOV(fs_reg(MRF, base_mrf + mlen), dPdy); - dPdy = offset(dPdy, 1); - } - mlen += MAX2(grad_components, 2); - } else if (op == ir_txs) { - /* There's no SIMD8 resinfo message on Gen4. Use SIMD16 instead. */ - simd16 = true; - bld.MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod); - mlen += 2; - } else { - /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod - * instructions. We'll need to do SIMD16 here. - */ - simd16 = true; - assert(op == ir_txb || op == ir_txl || op == ir_txf); - - for (int i = 0; i < coord_components; i++) { - bld.MOV(fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type), - coordinate); - coordinate = offset(coordinate, 1); - } - - /* Initialize the rest of u/v/r with 0.0. Empirically, this seems to - * be necessary for TXF (ld), but seems wise to do for all messages. - */ - for (int i = coord_components; i < 3; i++) { - bld.MOV(fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f)); - } - - /* lod/bias appears after u/v/r. */ - mlen += 6; - - bld.MOV(fs_reg(MRF, base_mrf + mlen, lod.type), lod); - mlen++; - - /* The unused upper half. */ - mlen++; - } - - if (simd16) { - /* Now, since we're doing simd16, the return is 2 interleaved - * vec4s where the odd-indexed ones are junk. We'll need to move - * this weirdness around to the expected layout. - */ - orig_dst = dst; - dst = fs_reg(GRF, alloc.allocate(8), orig_dst.type); - } - - enum opcode opcode; - switch (op) { - case ir_tex: opcode = SHADER_OPCODE_TEX; break; - case ir_txb: opcode = FS_OPCODE_TXB; break; - case ir_txl: opcode = SHADER_OPCODE_TXL; break; - case ir_txd: opcode = SHADER_OPCODE_TXD; break; - case ir_txs: opcode = SHADER_OPCODE_TXS; break; - case ir_txf: opcode = SHADER_OPCODE_TXF; break; - default: - unreachable("not reached"); - } - - fs_inst *inst = bld.emit(opcode, dst, reg_undef, fs_reg(sampler)); - inst->base_mrf = base_mrf; - inst->mlen = mlen; - inst->header_size = 1; - inst->regs_written = simd16 ? 8 : 4; - - if (simd16) { - for (int i = 0; i < 4; i++) { - bld.MOV(orig_dst, dst); - orig_dst = offset(orig_dst, 1); - dst = offset(dst, 2); - } - } - - return inst; -} - -fs_inst * -fs_visitor::emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst, - fs_reg coordinate, int vector_elements, - fs_reg shadow_c, fs_reg lod, - uint32_t sampler) -{ - fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F, dispatch_width); - bool has_lod = op == ir_txl || op == ir_txb || op == ir_txf; - - if (has_lod && shadow_c.file != BAD_FILE) - no16("TXB and TXL with shadow comparison unsupported in SIMD16."); - - if (op == ir_txd) - no16("textureGrad unsupported in SIMD16."); - - /* Copy the coordinates. */ - for (int i = 0; i < vector_elements; i++) { - bld.MOV(retype(offset(message, i), coordinate.type), coordinate); - coordinate = offset(coordinate, 1); - } - - fs_reg msg_end = offset(message, vector_elements); - - /* Messages other than sample and ld require all three components */ - if (has_lod || shadow_c.file != BAD_FILE) { - for (int i = vector_elements; i < 3; i++) { - bld.MOV(offset(message, i), fs_reg(0.0f)); - } - } - - if (has_lod) { - fs_reg msg_lod = retype(offset(message, 3), op == ir_txf ? - BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_F); - bld.MOV(msg_lod, lod); - msg_end = offset(msg_lod, 1); - } - - if (shadow_c.file != BAD_FILE) { - fs_reg msg_ref = offset(message, 3 + has_lod); - bld.MOV(msg_ref, shadow_c); - msg_end = offset(msg_ref, 1); - } - - enum opcode opcode; - switch (op) { - case ir_tex: opcode = SHADER_OPCODE_TEX; break; - case ir_txb: opcode = FS_OPCODE_TXB; break; - case ir_txd: opcode = SHADER_OPCODE_TXD; break; - case ir_txl: opcode = SHADER_OPCODE_TXL; break; - case ir_txs: opcode = SHADER_OPCODE_TXS; break; - case ir_txf: opcode = SHADER_OPCODE_TXF; break; - default: unreachable("not reached"); - } - - fs_inst *inst = bld.emit(opcode, dst, reg_undef, fs_reg(sampler)); - inst->base_mrf = message.reg - 1; - inst->mlen = msg_end.reg - inst->base_mrf; - inst->header_size = 1; - inst->regs_written = 8; - - return inst; -} - -/* gen5's sampler has slots for u, v, r, array index, then optional - * parameters like shadow comparitor or LOD bias. If optional - * parameters aren't present, those base slots are optional and don't - * need to be included in the message. - * - * We don't fill in the unnecessary slots regardless, which may look - * surprising in the disassembly. - */ -fs_inst * -fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst, - fs_reg coordinate, int vector_elements, - fs_reg shadow_c, - fs_reg lod, fs_reg lod2, int grad_components, - fs_reg sample_index, uint32_t sampler, - bool has_offset) -{ - int reg_width = dispatch_width / 8; - unsigned header_size = 0; - - fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F, dispatch_width); - fs_reg msg_coords = message; - - if (has_offset) { - /* The offsets set up by the ir_texture visitor are in the - * m1 header, so we can't go headerless. - */ - header_size = 1; - message.reg--; - } - - for (int i = 0; i < vector_elements; i++) { - bld.MOV(retype(offset(msg_coords, i), coordinate.type), coordinate); - coordinate = offset(coordinate, 1); - } - fs_reg msg_end = offset(msg_coords, vector_elements); - fs_reg msg_lod = offset(msg_coords, 4); - - if (shadow_c.file != BAD_FILE) { - fs_reg msg_shadow = msg_lod; - bld.MOV(msg_shadow, shadow_c); - msg_lod = offset(msg_shadow, 1); - msg_end = msg_lod; - } - - enum opcode opcode; - switch (op) { - case ir_tex: - opcode = SHADER_OPCODE_TEX; - break; - case ir_txb: - bld.MOV(msg_lod, lod); - msg_end = offset(msg_lod, 1); - - opcode = FS_OPCODE_TXB; - break; - case ir_txl: - bld.MOV(msg_lod, lod); - msg_end = offset(msg_lod, 1); - - opcode = SHADER_OPCODE_TXL; - break; - case ir_txd: { - /** - * P = u, v, r - * dPdx = dudx, dvdx, drdx - * dPdy = dudy, dvdy, drdy - * - * Load up these values: - * - dudx dudy dvdx dvdy drdx drdy - * - dPdx.x dPdy.x dPdx.y dPdy.y dPdx.z dPdy.z - */ - msg_end = msg_lod; - for (int i = 0; i < grad_components; i++) { - bld.MOV(msg_end, lod); - lod = offset(lod, 1); - msg_end = offset(msg_end, 1); - - bld.MOV(msg_end, lod2); - lod2 = offset(lod2, 1); - msg_end = offset(msg_end, 1); - } - - opcode = SHADER_OPCODE_TXD; - break; - } - case ir_txs: - msg_lod = retype(msg_end, BRW_REGISTER_TYPE_UD); - bld.MOV(msg_lod, lod); - msg_end = offset(msg_lod, 1); - - opcode = SHADER_OPCODE_TXS; - break; - case ir_query_levels: - msg_lod = msg_end; - bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u)); - msg_end = offset(msg_lod, 1); - - opcode = SHADER_OPCODE_TXS; - break; - case ir_txf: - msg_lod = offset(msg_coords, 3); - bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), lod); - msg_end = offset(msg_lod, 1); - - opcode = SHADER_OPCODE_TXF; - break; - case ir_txf_ms: - msg_lod = offset(msg_coords, 3); - /* lod */ - bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u)); - /* sample index */ - bld.MOV(retype(offset(msg_lod, 1), BRW_REGISTER_TYPE_UD), sample_index); - msg_end = offset(msg_lod, 2); - - opcode = SHADER_OPCODE_TXF_CMS; - break; - case ir_lod: - opcode = SHADER_OPCODE_LOD; - break; - case ir_tg4: - opcode = SHADER_OPCODE_TG4; - break; - default: - unreachable("not reached"); - } - - fs_inst *inst = bld.emit(opcode, dst, reg_undef, fs_reg(sampler)); - inst->base_mrf = message.reg; - inst->mlen = msg_end.reg - message.reg; - inst->header_size = header_size; - inst->regs_written = 4 * reg_width; - - if (inst->mlen > MAX_SAMPLER_MESSAGE_SIZE) { - fail("Message length >" STRINGIFY(MAX_SAMPLER_MESSAGE_SIZE) - " disallowed by hardware\n"); - } - - return inst; -} - -static bool -is_high_sampler(const struct brw_device_info *devinfo, fs_reg sampler) -{ - if (devinfo->gen < 8 && !devinfo->is_haswell) - return false; - - return sampler.file != IMM || sampler.fixed_hw_reg.dw1.ud >= 16; -} - -fs_inst * -fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, - fs_reg coordinate, int coord_components, - fs_reg shadow_c, - fs_reg lod, fs_reg lod2, int grad_components, - fs_reg sample_index, fs_reg mcs, fs_reg sampler, - fs_reg offset_value) -{ - int reg_width = dispatch_width / 8; - unsigned header_size = 0; - - fs_reg *sources = ralloc_array(mem_ctx, fs_reg, MAX_SAMPLER_MESSAGE_SIZE); - for (int i = 0; i < MAX_SAMPLER_MESSAGE_SIZE; i++) { - sources[i] = vgrf(glsl_type::float_type); - } - int length = 0; - - if (op == ir_tg4 || offset_value.file != BAD_FILE || - is_high_sampler(devinfo, sampler)) { - /* For general texture offsets (no txf workaround), we need a header to - * put them in. Note that for SIMD16 we're making space for two actual - * hardware registers here, so the emit will have to fix up for this. - * - * * ir4_tg4 needs to place its channel select in the header, - * for interaction with ARB_texture_swizzle - * - * The sampler index is only 4-bits, so for larger sampler numbers we - * need to offset the Sampler State Pointer in the header. - */ - header_size = 1; - sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); - length++; - } - - if (shadow_c.file != BAD_FILE) { - bld.MOV(sources[length], shadow_c); - length++; - } - - bool has_nonconstant_offset = - offset_value.file != BAD_FILE && offset_value.file != IMM; - bool coordinate_done = false; - - /* The sampler can only meaningfully compute LOD for fragment shader - * messages. For all other stages, we change the opcode to ir_txl and - * hardcode the LOD to 0. - */ - if (stage != MESA_SHADER_FRAGMENT && op == ir_tex) { - op = ir_txl; - lod = fs_reg(0.0f); - } - - /* Set up the LOD info */ - switch (op) { - case ir_tex: - case ir_lod: - break; - case ir_txb: - bld.MOV(sources[length], lod); - length++; - break; - case ir_txl: - bld.MOV(sources[length], lod); - length++; - break; - case ir_txd: { - no16("Gen7 does not support sample_d/sample_d_c in SIMD16 mode."); - - /* Load dPdx and the coordinate together: - * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z - */ - for (int i = 0; i < coord_components; i++) { - bld.MOV(sources[length], coordinate); - coordinate = offset(coordinate, 1); - length++; - - /* For cube map array, the coordinate is (u,v,r,ai) but there are - * only derivatives for (u, v, r). - */ - if (i < grad_components) { - bld.MOV(sources[length], lod); - lod = offset(lod, 1); - length++; - - bld.MOV(sources[length], lod2); - lod2 = offset(lod2, 1); - length++; - } - } - - coordinate_done = true; - break; - } - case ir_txs: - bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod); - length++; - break; - case ir_query_levels: - bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), fs_reg(0u)); - length++; - break; - case ir_txf: - /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. - * On Gen9 they are u, v, lod, r - */ - - bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate); - coordinate = offset(coordinate, 1); - length++; - - if (devinfo->gen >= 9) { - if (coord_components >= 2) { - bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate); - coordinate = offset(coordinate, 1); - } - length++; - } - - bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod); - length++; - - for (int i = devinfo->gen >= 9 ? 2 : 1; i < coord_components; i++) { - bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate); - coordinate = offset(coordinate, 1); - length++; - } - - coordinate_done = true; - break; - case ir_txf_ms: - bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index); - length++; - - /* data from the multisample control surface */ - bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs); - length++; - - /* there is no offsetting for this message; just copy in the integer - * texture coordinates - */ - for (int i = 0; i < coord_components; i++) { - bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate); - coordinate = offset(coordinate, 1); - length++; - } - - coordinate_done = true; - break; - case ir_tg4: - if (has_nonconstant_offset) { - if (shadow_c.file != BAD_FILE) - no16("Gen7 does not support gather4_po_c in SIMD16 mode."); - - /* More crazy intermixing */ - for (int i = 0; i < 2; i++) { /* u, v */ - bld.MOV(sources[length], coordinate); - coordinate = offset(coordinate, 1); - length++; - } - - for (int i = 0; i < 2; i++) { /* offu, offv */ - bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), offset_value); - offset_value = offset(offset_value, 1); - length++; - } - - if (coord_components == 3) { /* r if present */ - bld.MOV(sources[length], coordinate); - coordinate = offset(coordinate, 1); - length++; - } - - coordinate_done = true; - } - break; - } - - /* Set up the coordinate (except for cases where it was done above) */ - if (!coordinate_done) { - for (int i = 0; i < coord_components; i++) { - bld.MOV(sources[length], coordinate); - coordinate = offset(coordinate, 1); - length++; - } - } - - int mlen; - if (reg_width == 2) - mlen = length * reg_width - header_size; - else - mlen = length * reg_width; - - fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen), - BRW_REGISTER_TYPE_F, dispatch_width); - bld.LOAD_PAYLOAD(src_payload, sources, length, header_size); - - /* Generate the SEND */ - enum opcode opcode; - switch (op) { - case ir_tex: opcode = SHADER_OPCODE_TEX; break; - case ir_txb: opcode = FS_OPCODE_TXB; break; - case ir_txl: opcode = SHADER_OPCODE_TXL; break; - case ir_txd: opcode = SHADER_OPCODE_TXD; break; - case ir_txf: opcode = SHADER_OPCODE_TXF; break; - case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break; - case ir_txs: opcode = SHADER_OPCODE_TXS; break; - case ir_query_levels: opcode = SHADER_OPCODE_TXS; break; - case ir_lod: opcode = SHADER_OPCODE_LOD; break; - case ir_tg4: - if (has_nonconstant_offset) - opcode = SHADER_OPCODE_TG4_OFFSET; - else - opcode = SHADER_OPCODE_TG4; - break; - default: - unreachable("not reached"); - } - fs_inst *inst = bld.emit(opcode, dst, src_payload, sampler); - inst->base_mrf = -1; - inst->mlen = mlen; - inst->header_size = header_size; - inst->regs_written = 4 * reg_width; - - if (inst->mlen > MAX_SAMPLER_MESSAGE_SIZE) { - fail("Message length >" STRINGIFY(MAX_SAMPLER_MESSAGE_SIZE) - " disallowed by hardware\n"); - } - - return inst; -} - fs_reg fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components, bool is_rect, uint32_t sampler, int texunit) @@ -746,8 +140,8 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components, coordinate = dst; bld.MUL(dst, src, scale_x); - dst = offset(dst, 1); - src = offset(src, 1); + dst = offset(dst, bld, 1); + src = offset(src, bld, 1); bld.MUL(dst, src, scale_y); } else if (is_rect) { /* On gen6+, the sampler handles the rectangle coordinates @@ -760,7 +154,7 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components, for (int i = 0; i < 2; i++) { if (key_tex->gl_clamp_mask[i] & (1 << sampler)) { fs_reg chan = coordinate; - chan = offset(chan, i); + chan = offset(chan, bld, i); set_condmod(BRW_CONDITIONAL_GE, bld.emit(BRW_OPCODE_SEL, chan, chan, fs_reg(0.0f))); @@ -785,7 +179,7 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components, for (int i = 0; i < MIN2(coord_components, 3); i++) { if (key_tex->gl_clamp_mask[i] & (1 << sampler)) { fs_reg chan = coordinate; - chan = offset(chan, i); + chan = offset(chan, bld, i); set_saturate(true, bld.MOV(chan, chan)); } } @@ -795,31 +189,21 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components, /* Sample from the MCS surface attached to this multisample texture. */ fs_reg -fs_visitor::emit_mcs_fetch(fs_reg coordinate, int components, fs_reg sampler) +fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components, + const fs_reg &sampler) { - int reg_width = dispatch_width / 8; - fs_reg payload = fs_reg(GRF, alloc.allocate(components * reg_width), - BRW_REGISTER_TYPE_F, dispatch_width); - fs_reg dest = vgrf(glsl_type::uvec4_type); - fs_reg *sources = ralloc_array(mem_ctx, fs_reg, components); - - /* parameters are: u, v, r; missing parameters are treated as zero */ - for (int i = 0; i < components; i++) { - sources[i] = vgrf(glsl_type::float_type); - bld.MOV(retype(sources[i], BRW_REGISTER_TYPE_D), coordinate); - coordinate = offset(coordinate, 1); - } - - bld.LOAD_PAYLOAD(payload, sources, components, 0); + const fs_reg dest = vgrf(glsl_type::uvec4_type); + const fs_reg srcs[] = { + coordinate, fs_reg(), fs_reg(), fs_reg(), fs_reg(), fs_reg(), + sampler, fs_reg(), fs_reg(components), fs_reg(0) + }; + fs_inst *inst = bld.emit(SHADER_OPCODE_TXF_MCS_LOGICAL, dest, srcs, + ARRAY_SIZE(srcs)); - fs_inst *inst = bld.emit(SHADER_OPCODE_TXF_MCS, dest, payload, sampler); - inst->base_mrf = -1; - inst->mlen = components * reg_width; - inst->header_size = 0; - inst->regs_written = 4 * reg_width; /* we only care about one reg of - * response, but the sampler always - * writes 4/8 - */ + /* We only care about one reg of response, but the sampler always writes + * 4/8. + */ + inst->regs_written = 4 * dispatch_width / 8; return dest; } @@ -853,12 +237,20 @@ fs_visitor::emit_texture(ir_texture_opcode op, for (int i=0; i<4; i++) { bld.MOV(res, fs_reg(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f)); - res = offset(res, 1); + res = offset(res, bld, 1); } return; } } + if (op == ir_query_levels) { + /* textureQueryLevels() is implemented in terms of TXS so we need to + * pass a valid LOD argument. + */ + assert(lod.file == BAD_FILE); + lod = fs_reg(0u); + } + if (coordinate.file != BAD_FILE) { /* FINISHME: Texture coordinate rescaling doesn't work with non-constant * samplers. This should only be a problem with GL_CLAMP on Gen7. @@ -871,26 +263,50 @@ fs_visitor::emit_texture(ir_texture_opcode op, * samples, so don't worry about them. */ fs_reg dst = vgrf(glsl_type::get_instance(dest_type->base_type, 4, 1)); + const fs_reg srcs[] = { + coordinate, shadow_c, lod, lod2, + sample_index, mcs, sampler_reg, offset_value, + fs_reg(coord_components), fs_reg(grad_components) + }; + enum opcode opcode; - if (devinfo->gen >= 7) { - inst = emit_texture_gen7(op, dst, coordinate, coord_components, - shadow_c, lod, lod2, grad_components, - sample_index, mcs, sampler_reg, - offset_value); - } else if (devinfo->gen >= 5) { - inst = emit_texture_gen5(op, dst, coordinate, coord_components, - shadow_c, lod, lod2, grad_components, - sample_index, sampler, - offset_value.file != BAD_FILE); - } else if (dispatch_width == 16) { - inst = emit_texture_gen4_simd16(op, dst, coordinate, coord_components, - shadow_c, lod, sampler); - } else { - inst = emit_texture_gen4(op, dst, coordinate, coord_components, - shadow_c, lod, lod2, grad_components, - sampler); + switch (op) { + case ir_tex: + opcode = SHADER_OPCODE_TEX_LOGICAL; + break; + case ir_txb: + opcode = FS_OPCODE_TXB_LOGICAL; + break; + case ir_txl: + opcode = SHADER_OPCODE_TXL_LOGICAL; + break; + case ir_txd: + opcode = SHADER_OPCODE_TXD_LOGICAL; + break; + case ir_txf: + opcode = SHADER_OPCODE_TXF_LOGICAL; + break; + case ir_txf_ms: + opcode = SHADER_OPCODE_TXF_CMS_LOGICAL; + break; + case ir_txs: + case ir_query_levels: + opcode = SHADER_OPCODE_TXS_LOGICAL; + break; + case ir_lod: + opcode = SHADER_OPCODE_LOD_LOGICAL; + break; + case ir_tg4: + opcode = (offset_value.file != BAD_FILE && offset_value.file != IMM ? + SHADER_OPCODE_TG4_OFFSET_LOGICAL : SHADER_OPCODE_TG4_LOGICAL); + break; + default: + unreachable("Invalid texture opcode."); } + inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs)); + inst->regs_written = 4 * dispatch_width / 8; + if (shadow_c.file != BAD_FILE) inst->shadow_compare = true; @@ -907,17 +323,17 @@ fs_visitor::emit_texture(ir_texture_opcode op, /* fixup #layers for cube map arrays */ if (op == ir_txs && is_cube_array) { - fs_reg depth = offset(dst, 2); + fs_reg depth = offset(dst, bld, 2); fs_reg fixed_depth = vgrf(glsl_type::int_type); bld.emit(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, fs_reg(6)); fs_reg *fixed_payload = ralloc_array(mem_ctx, fs_reg, inst->regs_written); - int components = inst->regs_written / (dst.width / 8); + int components = inst->regs_written / (inst->exec_size / 8); for (int i = 0; i < components; i++) { if (i == 2) { fixed_payload[i] = fixed_depth; } else { - fixed_payload[i] = offset(dst, i); + fixed_payload[i] = offset(dst, bld, i); } } bld.LOAD_PAYLOAD(dst, fixed_payload, components, 0); @@ -952,7 +368,7 @@ fs_visitor::emit_gen6_gather_wa(uint8_t wa, fs_reg dst) bld.ASR(dst, dst, fs_reg(32 - width)); } - dst = offset(dst, 1); + dst = offset(dst, bld, 1); } } @@ -989,7 +405,7 @@ fs_visitor::swizzle_result(ir_texture_opcode op, int dest_components, { if (op == ir_query_levels) { /* # levels is in .w */ - this->result = offset(orig_val, 3); + this->result = offset(orig_val, bld, 3); return; } @@ -1010,15 +426,15 @@ fs_visitor::swizzle_result(ir_texture_opcode op, int dest_components, for (int i = 0; i < 4; i++) { int swiz = GET_SWZ(key_tex->swizzles[sampler], i); fs_reg l = swizzled_result; - l = offset(l, i); + l = offset(l, bld, i); if (swiz == SWIZZLE_ZERO) { bld.MOV(l, fs_reg(0.0f)); } else if (swiz == SWIZZLE_ONE) { bld.MOV(l, fs_reg(1.0f)); } else { - bld.MOV(l, offset(orig_val, - GET_SWZ(key_tex->swizzles[sampler], i))); + bld.MOV(l, offset(orig_val, bld, + GET_SWZ(key_tex->swizzles[sampler], i))); } } this->result = swizzled_result; @@ -1114,118 +530,6 @@ fs_visitor::try_replace_with_sel() return false; } -void -fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, - fs_reg dst, fs_reg offset, fs_reg src0, - fs_reg src1) -{ - int reg_width = dispatch_width / 8; - int length = 0; - - fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 4); - - sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); - /* Initialize the sample mask in the message header. */ - bld.exec_all().MOV(sources[0], fs_reg(0u)); - - if (stage == MESA_SHADER_FRAGMENT) { - if (((brw_wm_prog_data*)this->prog_data)->uses_kill) { - bld.exec_all() - .MOV(component(sources[0], 7), brw_flag_reg(0, 1)); - } else { - bld.exec_all() - .MOV(component(sources[0], 7), - retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD)); - } - } else { - /* The execution mask is part of the side-band information sent together with - * the message payload to the data port. It's implicitly ANDed with the sample - * mask sent in the header to compute the actual set of channels that execute - * the atomic operation. - */ - assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_COMPUTE); - bld.exec_all() - .MOV(component(sources[0], 7), fs_reg(0xffffu)); - } - length++; - - /* Set the atomic operation offset. */ - sources[1] = vgrf(glsl_type::uint_type); - bld.MOV(sources[1], offset); - length++; - - /* Set the atomic operation arguments. */ - if (src0.file != BAD_FILE) { - sources[length] = vgrf(glsl_type::uint_type); - bld.MOV(sources[length], src0); - length++; - } - - if (src1.file != BAD_FILE) { - sources[length] = vgrf(glsl_type::uint_type); - bld.MOV(sources[length], src1); - length++; - } - - int mlen = 1 + (length - 1) * reg_width; - fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen), - BRW_REGISTER_TYPE_UD, dispatch_width); - bld.LOAD_PAYLOAD(src_payload, sources, length, 1); - - /* Emit the instruction. */ - fs_inst *inst = bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst, src_payload, - fs_reg(surf_index), fs_reg(atomic_op)); - inst->mlen = mlen; -} - -void -fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst, - fs_reg offset) -{ - int reg_width = dispatch_width / 8; - - fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 2); - - sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); - /* Initialize the sample mask in the message header. */ - bld.exec_all() - .MOV(sources[0], fs_reg(0u)); - - if (stage == MESA_SHADER_FRAGMENT) { - if (((brw_wm_prog_data*)this->prog_data)->uses_kill) { - bld.exec_all() - .MOV(component(sources[0], 7), brw_flag_reg(0, 1)); - } else { - bld.exec_all() - .MOV(component(sources[0], 7), - retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD)); - } - } else { - /* The execution mask is part of the side-band information sent together with - * the message payload to the data port. It's implicitly ANDed with the sample - * mask sent in the header to compute the actual set of channels that execute - * the atomic operation. - */ - assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_COMPUTE); - bld.exec_all() - .MOV(component(sources[0], 7), fs_reg(0xffffu)); - } - - /* Set the surface read offset. */ - sources[1] = vgrf(glsl_type::uint_type); - bld.MOV(sources[1], offset); - - int mlen = 1 + reg_width; - fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen), - BRW_REGISTER_TYPE_UD, dispatch_width); - fs_inst *inst = bld.LOAD_PAYLOAD(src_payload, sources, 2, 1); - - /* Emit the instruction. */ - inst = bld.emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst, src_payload, - fs_reg(surf_index), fs_reg(1)); - inst->mlen = mlen; -} - /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ void fs_visitor::emit_dummy_fs() @@ -1235,8 +539,8 @@ fs_visitor::emit_dummy_fs() /* Everyone's favorite color. */ const float color[4] = { 1.0, 0.0, 1.0, 0.0 }; for (int i = 0; i < 4; i++) { - bld.MOV(fs_reg(MRF, 2 + i * reg_width, BRW_REGISTER_TYPE_F, - dispatch_width), fs_reg(color[i])); + bld.MOV(fs_reg(MRF, 2 + i * reg_width, BRW_REGISTER_TYPE_F), + fs_reg(color[i])); } fs_inst *write; @@ -1315,14 +619,14 @@ fs_visitor::emit_interpolation_setup_gen4() if (devinfo->has_pln && dispatch_width == 16) { for (unsigned i = 0; i < 2; i++) { - abld.half(i).ADD(half(offset(delta_xy, i), 0), + abld.half(i).ADD(half(offset(delta_xy, abld, i), 0), half(this->pixel_x, i), xstart); - abld.half(i).ADD(half(offset(delta_xy, i), 1), + abld.half(i).ADD(half(offset(delta_xy, abld, i), 1), half(this->pixel_y, i), ystart); } } else { - abld.ADD(offset(delta_xy, 0), this->pixel_x, xstart); - abld.ADD(offset(delta_xy, 1), this->pixel_y, ystart); + abld.ADD(offset(delta_xy, abld, 0), this->pixel_x, xstart); + abld.ADD(offset(delta_xy, abld, 1), this->pixel_y, ystart); } abld = bld.annotate("compute pos.w and 1/pos.w"); @@ -1356,9 +660,10 @@ fs_visitor::emit_interpolation_setup_gen6() * compute our pixel centers. */ fs_reg int_pixel_xy(GRF, alloc.allocate(dispatch_width / 8), - BRW_REGISTER_TYPE_UW, dispatch_width * 2); - abld.exec_all() - .ADD(int_pixel_xy, + BRW_REGISTER_TYPE_UW); + + const fs_builder dbld = abld.exec_all().group(dispatch_width * 2, 0); + dbld.ADD(int_pixel_xy, fs_reg(stride(suboffset(g1_uw, 4), 1, 4, 0)), fs_reg(brw_imm_v(0x11001010))); @@ -1407,33 +712,6 @@ fs_visitor::emit_interpolation_setup_gen6() } } -void -fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components, - unsigned exec_size, bool use_2nd_half) -{ - brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; - fs_inst *inst; - - if (key->clamp_fragment_color) { - fs_reg tmp = vgrf(glsl_type::vec4_type); - assert(color.type == BRW_REGISTER_TYPE_F); - for (unsigned i = 0; i < components; i++) { - inst = bld.MOV(offset(tmp, i), offset(color, i)); - inst->saturate = true; - } - color = tmp; - } - - if (exec_size < dispatch_width) { - unsigned half_idx = use_2nd_half ? 1 : 0; - for (unsigned i = 0; i < components; i++) - dst[i] = half(offset(color, i), half_idx); - } else { - for (unsigned i = 0; i < components; i++) - dst[i] = offset(color, i); - } -} - static enum brw_conditional_mod cond_for_alpha_func(GLenum func) { @@ -1478,7 +756,7 @@ fs_visitor::emit_alpha_test() BRW_CONDITIONAL_NEQ); } else { /* RT0 alpha */ - fs_reg color = offset(outputs[0], 3); + fs_reg color = offset(outputs[0], bld, 3); /* f0.1 &= func(color, ref) */ cmp = abld.CMP(bld.null_reg_f(), color, fs_reg(key->alpha_test_ref), @@ -1491,152 +769,36 @@ fs_visitor::emit_alpha_test() fs_inst * fs_visitor::emit_single_fb_write(const fs_builder &bld, fs_reg color0, fs_reg color1, - fs_reg src0_alpha, unsigned components, - unsigned exec_size, bool use_2nd_half) + fs_reg src0_alpha, unsigned components) { assert(stage == MESA_SHADER_FRAGMENT); brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data; - brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; - int header_size = 2, payload_header_size; - - /* We can potentially have a message length of up to 15, so we have to set - * base_mrf to either 0 or 1 in order to fit in m0..m15. - */ - fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 15); - int length = 0; - - /* From the Sandy Bridge PRM, volume 4, page 198: - * - * "Dispatched Pixel Enables. One bit per pixel indicating - * which pixels were originally enabled when the thread was - * dispatched. This field is only required for the end-of- - * thread message and on all dual-source messages." - */ - if (devinfo->gen >= 6 && - (devinfo->is_haswell || devinfo->gen >= 8 || !prog_data->uses_kill) && - color1.file == BAD_FILE && - key->nr_color_regions == 1) { - header_size = 0; - } - - if (header_size != 0) { - assert(header_size == 2); - /* Allocate 2 registers for a header */ - length += 2; - } - if (payload.aa_dest_stencil_reg) { - sources[length] = fs_reg(GRF, alloc.allocate(1)); - bld.exec_all().annotate("FB write stencil/AA alpha") - .MOV(sources[length], - fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0))); - length++; - } - - prog_data->uses_omask = - prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK); - if (prog_data->uses_omask) { - assert(this->sample_mask.file != BAD_FILE); - /* Hand over gl_SampleMask. Only lower 16 bits are relevant. Since - * it's unsinged single words, one vgrf is always 16-wide. - */ - sources[length] = fs_reg(GRF, alloc.allocate(1), - BRW_REGISTER_TYPE_UW, 16); - bld.exec_all().annotate("FB write oMask") - .emit(FS_OPCODE_SET_OMASK, sources[length], this->sample_mask); - length++; - } - - payload_header_size = length; - - if (color0.file == BAD_FILE) { - /* Even if there's no color buffers enabled, we still need to send - * alpha out the pipeline to our null renderbuffer to support - * alpha-testing, alpha-to-coverage, and so on. - */ - if (this->outputs[0].file != BAD_FILE) - setup_color_payload(&sources[length + 3], offset(this->outputs[0], 3), - 1, exec_size, false); - length += 4; - } else if (color1.file == BAD_FILE) { - if (src0_alpha.file != BAD_FILE) { - setup_color_payload(&sources[length], src0_alpha, 1, exec_size, false); - length++; - } - - setup_color_payload(&sources[length], color0, components, - exec_size, use_2nd_half); - length += 4; - } else { - setup_color_payload(&sources[length], color0, components, - exec_size, use_2nd_half); - length += 4; - setup_color_payload(&sources[length], color1, components, - exec_size, use_2nd_half); - length += 4; - } + /* Hand over gl_FragDepth or the payload depth. */ + const fs_reg dst_depth = (payload.dest_depth_reg ? + fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0)) : + fs_reg()); + fs_reg src_depth; if (source_depth_to_render_target) { - if (devinfo->gen == 6) { - /* For outputting oDepth on gen6, SIMD8 writes have to be - * used. This would require SIMD8 moves of each half to - * message regs, kind of like pre-gen5 SIMD16 FB writes. - * Just bail on doing so for now. - */ - no16("Missing support for simd16 depth writes on gen6\n"); - } - - if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { - /* Hand over gl_FragDepth. */ - assert(this->frag_depth.file != BAD_FILE); - if (exec_size < dispatch_width) { - sources[length] = half(this->frag_depth, use_2nd_half); - } else { - sources[length] = this->frag_depth; - } - } else { - /* Pass through the payload depth. */ - sources[length] = fs_reg(brw_vec8_grf(payload.source_depth_reg, 0)); - } - length++; + if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) + src_depth = frag_depth; + else + src_depth = fs_reg(brw_vec8_grf(payload.source_depth_reg, 0)); } - if (payload.dest_depth_reg) - sources[length++] = fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0)); - - const fs_builder ubld = bld.group(exec_size, use_2nd_half); - fs_inst *load; - fs_inst *write; - if (devinfo->gen >= 7) { - /* Send from the GRF */ - fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F, exec_size); - load = ubld.LOAD_PAYLOAD(payload, sources, length, payload_header_size); - payload.reg = alloc.allocate(load->regs_written); - load->dst = payload; - write = ubld.emit(FS_OPCODE_FB_WRITE, reg_undef, payload); - write->base_mrf = -1; - } else { - /* Send from the MRF */ - load = ubld.LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F, exec_size), - sources, length, payload_header_size); - - /* On pre-SNB, we have to interlace the color values. LOAD_PAYLOAD - * will do this for us if we just give it a COMPR4 destination. - */ - if (devinfo->gen < 6 && exec_size == 16) - load->dst.reg |= BRW_MRF_COMPR4; - - write = ubld.emit(FS_OPCODE_FB_WRITE); - write->exec_size = exec_size; - write->base_mrf = 1; - } + const fs_reg sources[] = { + color0, color1, src0_alpha, src_depth, dst_depth, sample_mask, + fs_reg(components) + }; + fs_inst *write = bld.emit(FS_OPCODE_FB_WRITE_LOGICAL, fs_reg(), + sources, ARRAY_SIZE(sources)); - write->mlen = load->regs_written; - write->header_size = header_size; if (prog_data->uses_kill) { write->predicate = BRW_PREDICATE_NORMAL; write->flag_subreg = 1; } + return write; } @@ -1648,37 +810,24 @@ fs_visitor::emit_fb_writes() brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; fs_inst *inst = NULL; + + if (source_depth_to_render_target && devinfo->gen == 6) { + /* For outputting oDepth on gen6, SIMD8 writes have to be used. This + * would require SIMD8 moves of each half to message regs, e.g. by using + * the SIMD lowering pass. Unfortunately this is more difficult than it + * sounds because the SIMD8 single-source message lacks channel selects + * for the second and third subspans. + */ + no16("Missing support for simd16 depth writes on gen6\n"); + } + if (do_dual_src) { const fs_builder abld = bld.annotate("FB dual-source write"); inst = emit_single_fb_write(abld, this->outputs[0], - this->dual_src_output, reg_undef, 4, 8); + this->dual_src_output, reg_undef, 4); inst->target = 0; - /* SIMD16 dual source blending requires to send two SIMD8 dual source - * messages, where each message contains color data for 8 pixels. Color - * data for the first group of pixels is stored in the "lower" half of - * the color registers, so in SIMD16, the previous message did: - * m + 0: r0 - * m + 1: g0 - * m + 2: b0 - * m + 3: a0 - * - * Here goes the second message, which packs color data for the - * remaining 8 pixels. Color data for these pixels is stored in the - * "upper" half of the color registers, so we need to do: - * m + 0: r1 - * m + 1: g1 - * m + 2: b1 - * m + 3: a1 - */ - if (dispatch_width == 16) { - inst = emit_single_fb_write(abld, this->outputs[0], - this->dual_src_output, reg_undef, 4, 8, - true); - inst->target = 0; - } - prog_data->dual_src_blend = true; } else { for (int target = 0; target < key->nr_color_regions; target++) { @@ -1691,12 +840,11 @@ fs_visitor::emit_fb_writes() fs_reg src0_alpha; if (devinfo->gen >= 6 && key->replicate_alpha && target != 0) - src0_alpha = offset(outputs[0], 3); + src0_alpha = offset(outputs[0], bld, 3); inst = emit_single_fb_write(abld, this->outputs[target], reg_undef, src0_alpha, - this->output_components[target], - dispatch_width); + this->output_components[target]); inst->target = target; } } @@ -1706,8 +854,15 @@ fs_visitor::emit_fb_writes() * alpha out the pipeline to our null renderbuffer to support * alpha-testing, alpha-to-coverage, and so on. */ - inst = emit_single_fb_write(bld, reg_undef, reg_undef, reg_undef, 0, - dispatch_width); + /* FINISHME: Factor out this frequently recurring pattern into a + * helper function. + */ + const fs_reg srcs[] = { reg_undef, reg_undef, + reg_undef, offset(this->outputs[0], bld, 3) }; + const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 4); + bld.LOAD_PAYLOAD(tmp, srcs, 4, 0); + + inst = emit_single_fb_write(bld, tmp, reg_undef, reg_undef, 4); inst->target = 0; } @@ -1730,6 +885,12 @@ fs_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes) } } +/** + * Lower legacy fixed-function and gl_ClipVertex clipping to clip distances. + * + * This does nothing if the shader uses gl_ClipDistance or user clipping is + * disabled altogether. + */ void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes) { struct brw_vue_prog_data *vue_prog_data = @@ -1737,6 +898,10 @@ void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes) const struct brw_vue_prog_key *key = (const struct brw_vue_prog_key *) this->key; + /* Bail unless some sort of legacy clipping is enabled */ + if (!key->userclip_active || prog->UsesClipDistanceOut) + return; + /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables): * * "If a linked set of shaders forming the vertex stage contains no @@ -1774,13 +939,13 @@ void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes) abld.MUL(output, outputs[clip_vertex], u); for (int j = 1; j < 4; j++) { u.reg = userplane[i].reg + j; - abld.MAD(output, output, offset(outputs[clip_vertex], j), u); + abld.MAD(output, output, offset(outputs[clip_vertex], bld, j), u); } } } void -fs_visitor::emit_urb_writes(gl_clip_plane *clip_planes) +fs_visitor::emit_urb_writes() { int slot, urb_offset, length; struct brw_vs_prog_data *vs_prog_data = @@ -1793,21 +958,24 @@ fs_visitor::emit_urb_writes(gl_clip_plane *clip_planes) bool flush; fs_reg sources[8]; - /* Lower legacy ff and ClipVertex clipping to clip distances */ - if (key->base.userclip_active && !prog->UsesClipDistanceOut) - compute_clip_distance(clip_planes); - /* If we don't have any valid slots to write, just do a minimal urb write - * send to terminate the shader. */ + * send to terminate the shader. This includes 1 slot of undefined data, + * because it's invalid to write 0 data: + * + * From the Broadwell PRM, Volume 7: 3D Media GPGPU, Shared Functions - + * Unified Return Buffer (URB) > URB_SIMD8_Write and URB_SIMD8_Read > + * Write Data Payload: + * + * "The write data payload can be between 1 and 8 message phases long." + */ if (vue_map->slots_valid == 0) { - - fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); + fs_reg payload = fs_reg(GRF, alloc.allocate(2), BRW_REGISTER_TYPE_UD); bld.exec_all().MOV(payload, fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD))); fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload); inst->eot = true; - inst->mlen = 1; + inst->mlen = 2; inst->offset = 1; return; } @@ -1888,13 +1056,13 @@ fs_visitor::emit_urb_writes(gl_clip_plane *clip_planes) */ for (int i = 0; i < 4; i++) { reg = fs_reg(GRF, alloc.allocate(1), outputs[varying].type); - src = offset(this->outputs[varying], i); + src = offset(this->outputs[varying], bld, i); set_saturate(true, bld.MOV(reg, src)); sources[length++] = reg; } } else { for (int i = 0; i < 4; i++) - sources[length++] = offset(this->outputs[varying], i); + sources[length++] = offset(this->outputs[varying], bld, i); } break; } @@ -1911,7 +1079,7 @@ fs_visitor::emit_urb_writes(gl_clip_plane *clip_planes) if (flush) { fs_reg *payload_sources = ralloc_array(mem_ctx, fs_reg, length + 1); fs_reg payload = fs_reg(GRF, alloc.allocate(length + 1), - BRW_REGISTER_TYPE_F, dispatch_width); + BRW_REGISTER_TYPE_F); payload_sources[0] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)); @@ -1944,7 +1112,7 @@ fs_visitor::emit_cs_terminate() */ struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD); fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); - bld.exec_all().MOV(payload, g0); + bld.group(8, 0).exec_all().MOV(payload, g0); /* Send a message to the thread spawner to terminate the thread. */ fs_inst *inst = bld.exec_all() @@ -2012,7 +1180,7 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data, this->no16_msg = NULL; this->nir_locals = NULL; - this->nir_globals = NULL; + this->nir_ssa_values = NULL; memset(&this->payload, 0, sizeof(this->payload)); memset(this->outputs, 0, sizeof(this->outputs)); diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 45c132b4a9e..4ad65215756 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -68,12 +68,16 @@ brw_compile_gs_prog(struct brw_context *brw, /* We also upload clip plane data as uniforms */ param_count += MAX_CLIP_PLANES * 4; + param_count += gs->NumImages * BRW_IMAGE_PARAM_SIZE; c.prog_data.base.base.param = rzalloc_array(NULL, const gl_constant_value *, param_count); c.prog_data.base.base.pull_param = rzalloc_array(NULL, const gl_constant_value *, param_count); + c.prog_data.base.base.image_param = + rzalloc_array(NULL, struct brw_image_param, gs->NumImages); c.prog_data.base.base.nr_params = param_count; + c.prog_data.base.base.nr_image_params = gs->NumImages; if (brw->gen >= 7) { if (gp->program.OutputType == GL_POINTS) { @@ -270,16 +274,6 @@ brw_compile_gs_prog(struct brw_context *brw, return false; } - /* Scratch space is used for register spilling */ - if (c.base.last_scratch) { - perf_debug("Geometry shader triggered register spilling. " - "Try reducing the number of live vec4 values to " - "improve performance.\n"); - - c.prog_data.base.base.total_scratch - = brw_get_scratch_size(c.base.last_scratch*REG_SIZE); - } - output->mem_ctx = mem_ctx; output->program = program; output->program_size = program_size; diff --git a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c index 0b8bfc3d9bd..0bb307432d0 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c @@ -119,3 +119,28 @@ const struct brw_tracked_state brw_gs_abo_surfaces = { }, .emit = brw_upload_gs_abo_surfaces, }; + +static void +brw_upload_gs_image_surfaces(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->ctx; + /* BRW_NEW_GEOMETRY_PROGRAM */ + struct gl_shader_program *prog = + ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]; + + if (prog) { + /* BRW_NEW_GS_PROG_DATA, BRW_NEW_IMAGE_UNITS */ + brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_GEOMETRY], + &brw->gs.base, &brw->gs.prog_data->base.base); + } +} + +const struct brw_tracked_state brw_gs_image_surfaces = { + .dirty = { + .brw = BRW_NEW_BATCH | + BRW_NEW_GEOMETRY_PROGRAM | + BRW_NEW_GS_PROG_DATA | + BRW_NEW_IMAGE_UNITS, + }, + .emit = brw_upload_gs_image_surfaces, +}; diff --git a/src/mesa/drivers/dri/i965/brw_inst.h b/src/mesa/drivers/dri/i965/brw_inst.h index 7a8c210118c..46eff1dd381 100644 --- a/src/mesa/drivers/dri/i965/brw_inst.h +++ b/src/mesa/drivers/dri/i965/brw_inst.h @@ -683,9 +683,9 @@ brw_inst_bits(const brw_inst *inst, unsigned high, unsigned low) high %= 64; low %= 64; - const uint64_t mask = (((1ull << (high - low + 1)) - 1) << low); + const uint64_t mask = (1ull << (high - low + 1)) - 1; - return (inst->data[word] & mask) >> low; + return (inst->data[word] >> low) & mask; } /** @@ -702,12 +702,12 @@ brw_inst_set_bits(brw_inst *inst, unsigned high, unsigned low, uint64_t value) high %= 64; low %= 64; - const uint64_t mask = (((1ull << (high - low + 1)) - 1) << low); + const uint64_t mask = ((1ull << (high - low + 1)) - 1) << low; /* Make sure the supplied value actually fits in the given bitfield. */ assert((value & (mask >> low)) == value); - inst->data[word] = (inst->data[word] & ~mask) | ((value << low) & mask); + inst->data[word] = (inst->data[word] & ~mask) | (value << low); } #undef BRW_IA16_ADDR_IMM @@ -731,9 +731,9 @@ typedef struct { static inline unsigned brw_compact_inst_bits(brw_compact_inst *inst, unsigned high, unsigned low) { - const uint64_t mask = (((1ull << (high - low + 1)) - 1) << low); + const uint64_t mask = (1ull << (high - low + 1)) - 1; - return (inst->data & mask) >> low; + return (inst->data >> low) & mask; } /** @@ -745,12 +745,12 @@ static inline void brw_compact_inst_set_bits(brw_compact_inst *inst, unsigned high, unsigned low, uint64_t value) { - const uint64_t mask = (((1ull << (high - low + 1)) - 1) << low); + const uint64_t mask = ((1ull << (high - low + 1)) - 1) << low; /* Make sure the supplied value actually fits in the given bitfield. */ assert((value & (mask >> low)) == value); - inst->data = (inst->data & ~mask) | ((value << low) & mask); + inst->data = (inst->data & ~mask) | (value << low); } #define F(name, high, low) \ diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h index 96dc20da3cf..97c6f8b2500 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h @@ -44,11 +44,16 @@ public: fs_reg(struct brw_reg fixed_hw_reg); fs_reg(enum register_file file, int reg); fs_reg(enum register_file file, int reg, enum brw_reg_type type); - fs_reg(enum register_file file, int reg, enum brw_reg_type type, uint8_t width); bool equals(const fs_reg &r) const; bool is_contiguous() const; + /** + * Return the size in bytes of a single logical component of the + * register assuming the given execution width. + */ + unsigned component_size(unsigned width) const; + /** Smear a channel of the reg to all channels. */ fs_reg &set_smear(unsigned subreg); @@ -60,14 +65,6 @@ public: fs_reg *reladdr; - /** - * The register width. This indicates how many hardware values are - * represented by each virtual value. Valid values are 1, 8, or 16. - * For immediate values, this is 1. Most of the rest of the time, it - * will be equal to the dispatch width. - */ - uint8_t width; - /** Register region horizontal stride */ uint8_t stride; }; @@ -129,33 +126,10 @@ horiz_offset(fs_reg reg, unsigned delta) } static inline fs_reg -offset(fs_reg reg, unsigned delta) -{ - switch (reg.file) { - case BAD_FILE: - break; - case GRF: - case MRF: - case ATTR: - return byte_offset(reg, - delta * MAX2(reg.width * reg.stride, 1) * - type_sz(reg.type)); - case UNIFORM: - reg.reg_offset += delta; - break; - default: - assert(delta == 0); - } - return reg; -} - -static inline fs_reg component(fs_reg reg, unsigned idx) { assert(reg.subreg_offset == 0); - assert(idx < reg.width); reg.subreg_offset = idx * type_sz(reg.type); - reg.width = 1; reg.stride = 0; return reg; } @@ -163,7 +137,7 @@ component(fs_reg reg, unsigned idx) static inline bool is_uniform(const fs_reg ®) { - return (reg.width == 1 || reg.stride == 0 || reg.is_null()) && + return (reg.stride == 0 || reg.is_null()) && (!reg.reladdr || is_uniform(*reg.reladdr)); } @@ -185,8 +159,6 @@ half(fs_reg reg, unsigned idx) case GRF: case MRF: - assert(reg.width == 16); - reg.width = 8; return horiz_offset(reg, 8 * idx); case ATTR: @@ -210,20 +182,13 @@ public: fs_inst(); fs_inst(enum opcode opcode, uint8_t exec_size); - fs_inst(enum opcode opcode, const fs_reg &dst); + fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst); fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, const fs_reg &src0); - fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0); fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); - fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0, - const fs_reg &src1); fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, const fs_reg &src0, const fs_reg &src1, const fs_reg &src2); - fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0, - const fs_reg &src1, const fs_reg &src2); - fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg src[], - unsigned sources); fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, const fs_reg src[], unsigned sources); fs_inst(const fs_inst &that); @@ -236,6 +201,7 @@ public: bool is_send_from_grf() const; bool is_partial_write() const; bool is_copy_payload(const brw::simple_allocator &grf_alloc) const; + unsigned components_read(unsigned i) const; int regs_read(int arg) const; bool can_do_source_mods(const struct brw_device_info *devinfo); bool has_side_effects() const; diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h index fceacae0e51..966a410a15d 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h @@ -113,6 +113,8 @@ public: dst_reg(register_file file, int reg); dst_reg(register_file file, int reg, const glsl_type *type, unsigned writemask); + dst_reg(register_file file, int reg, brw_reg_type type, + unsigned writemask); dst_reg(struct brw_reg reg); dst_reg(class vec4_visitor *v, const struct glsl_type *type); diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c index 49f2e3e498c..f5ecbb54989 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c +++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c @@ -128,7 +128,7 @@ brw_bind_rep_write_shader(struct brw_context *brw, float *color) _mesa_AttachShader(clear->shader_prog, vs); _mesa_DeleteShader(vs); _mesa_BindAttribLocation(clear->shader_prog, 0, "position"); - _mesa_ObjectLabel(GL_PROGRAM, clear->shader_prog, -1, "meta clear"); + _mesa_ObjectLabel(GL_PROGRAM, clear->shader_prog, -1, "meta repclear"); _mesa_LinkProgram(clear->shader_prog); clear->color_location = @@ -200,7 +200,7 @@ brw_draw_rectlist(struct gl_context *ctx, struct rect *rect, int num_instances) brw_draw_prims(ctx, &prim, 1, NULL, GL_TRUE, start, start + count - 1, - NULL, NULL); + NULL, 0, NULL); } static void @@ -348,7 +348,7 @@ is_color_fast_clear_compatible(struct brw_context *brw, } for (int i = 0; i < 4; i++) { - if (color->f[i] != 0.0 && color->f[i] != 1.0 && + if (color->f[i] != 0.0f && color->f[i] != 1.0f && _mesa_format_has_color_component(format, i)) { return false; } @@ -366,7 +366,7 @@ compute_fast_clear_color_bits(const union gl_color_union *color) uint32_t bits = 0; for (int i = 0; i < 4; i++) { /* Testing for non-0 works for integer and float colors */ - if (color->f[i] != 0.0) + if (color->f[i] != 0.0f) bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i)); } return bits; @@ -623,7 +623,7 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb, * write-flush must be issued before sending any DRAW commands on that * render target. */ - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); /* If we had to fall back to plain clear for any buffers, clear those now * by calling into meta. @@ -677,7 +677,7 @@ brw_meta_resolve_color(struct brw_context *brw, GLuint fbo, rbo; struct rect rect; - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); _mesa_meta_begin(ctx, MESA_META_ALL); diff --git a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c index d079197a2a9..aa6df16eb04 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c +++ b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c @@ -239,10 +239,10 @@ setup_coord_coeff(GLuint prog, GLuint multiplier, GLuint offset, if (mirror) { _mesa_Uniform1f(multiplier, -scale); - _mesa_Uniform1f(offset, src_0 + (dst_1 - 0.5) * scale); + _mesa_Uniform1f(offset, src_0 + (dst_1 - 0.5f) * scale); } else { _mesa_Uniform1f(multiplier, scale); - _mesa_Uniform1f(offset, src_0 + (-dst_0 + 0.5) * scale); + _mesa_Uniform1f(offset, src_0 + (-dst_0 + 0.5f) * scale); } } @@ -500,11 +500,11 @@ brw_meta_fbo_stencil_blit(struct brw_context *brw, .mirror_x = mirror_x, .mirror_y = mirror_y }; adjust_mip_level(dst_mt, dst_irb->mt_level, dst_irb->mt_layer, &dims); - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); _mesa_meta_begin(ctx, MESA_META_ALL); brw_meta_stencil_blit(brw, dst_mt, dst_irb->mt_level, dst_irb->mt_layer, &dims); - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); } void @@ -524,7 +524,7 @@ brw_meta_stencil_updownsample(struct brw_context *brw, if (dst->stencil_mt) dst = dst->stencil_mt; - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); _mesa_meta_begin(ctx, MESA_META_ALL); _mesa_GenFramebuffers(1, &fbo); @@ -535,7 +535,7 @@ brw_meta_stencil_updownsample(struct brw_context *brw, GL_RENDERBUFFER, rbo); brw_meta_stencil_blit(brw, dst, 0, 0, &dims); - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); _mesa_DeleteRenderbuffers(1, &rbo); _mesa_DeleteFramebuffers(1, &fbo); diff --git a/src/mesa/drivers/dri/i965/brw_meta_updownsample.c b/src/mesa/drivers/dri/i965/brw_meta_updownsample.c index 21507b1ad2a..f39d50a69e6 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_updownsample.c +++ b/src/mesa/drivers/dri/i965/brw_meta_updownsample.c @@ -116,7 +116,7 @@ brw_meta_updownsample(struct brw_context *brw, blit_bit = GL_COLOR_BUFFER_BIT; } - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); _mesa_meta_begin(ctx, MESA_META_ALL); _mesa_GenFramebuffers(2, fbos); @@ -147,5 +147,5 @@ brw_meta_updownsample(struct brw_context *brw, _mesa_meta_end(ctx); - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); } diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 5a4515b582d..e9d9467d330 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -44,7 +44,8 @@ #include "main/glformats.h" /* Constant single cliprect for framebuffer object or DRI2 drawing */ -static void upload_drawing_rect(struct brw_context *brw) +static void +upload_drawing_rect(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; const struct gl_framebuffer *fb = ctx->DrawBuffer; @@ -73,7 +74,8 @@ const struct brw_tracked_state brw_drawing_rect = { * The state pointers in this packet are all relative to the general state * base address set by CMD_STATE_BASE_ADDRESS, which is 0. */ -static void upload_pipelined_state_pointers(struct brw_context *brw ) +static void +upload_pipelined_state_pointers(struct brw_context *brw) { if (brw->gen == 5) { /* Need to flush before changing clip max threads for errata. */ @@ -104,7 +106,8 @@ static void upload_pipelined_state_pointers(struct brw_context *brw ) brw->ctx.NewDriverState |= BRW_NEW_PSP; } -static void upload_psp_urb_cbs(struct brw_context *brw ) +static void +upload_psp_urb_cbs(struct brw_context *brw) { upload_pipelined_state_pointers(brw); brw_upload_urb_fence(brw); @@ -580,7 +583,7 @@ brw_emit_depth_stencil_hiz(struct brw_context *brw, * non-pipelined state that will need the PIPE_CONTROL workaround. */ if (brw->gen == 6) { - intel_emit_depth_stall_flushes(brw); + brw_emit_depth_stall_flushes(brw); } unsigned int len; @@ -700,13 +703,11 @@ const struct brw_tracked_state brw_depthbuffer = { .emit = brw_emit_depthbuffer, }; - - -/*********************************************************************** +/** * Polygon stipple packet */ - -static void upload_polygon_stipple(struct brw_context *brw) +static void +upload_polygon_stipple(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; GLuint i; @@ -728,8 +729,7 @@ static void upload_polygon_stipple(struct brw_context *brw) if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) { for (i = 0; i < 32; i++) OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */ - } - else { + } else { for (i = 0; i < 32; i++) OUT_BATCH(ctx->PolygonStipple[i]); } @@ -745,12 +745,11 @@ const struct brw_tracked_state brw_polygon_stipple = { .emit = upload_polygon_stipple }; - -/*********************************************************************** +/** * Polygon stipple offset packet */ - -static void upload_polygon_stipple_offset(struct brw_context *brw) +static void +upload_polygon_stipple_offset(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; @@ -785,10 +784,11 @@ const struct brw_tracked_state brw_polygon_stipple_offset = { .emit = upload_polygon_stipple_offset }; -/********************************************************************** +/** * AA Line parameters */ -static void upload_aa_line_parameters(struct brw_context *brw) +static void +upload_aa_line_parameters(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; @@ -815,11 +815,11 @@ const struct brw_tracked_state brw_aa_line_parameters = { .emit = upload_aa_line_parameters }; -/*********************************************************************** +/** * Line stipple packet */ - -static void upload_line_stipple(struct brw_context *brw) +static void +upload_line_stipple(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; GLfloat tmp; @@ -834,13 +834,12 @@ static void upload_line_stipple(struct brw_context *brw) if (brw->gen >= 7) { /* in U1.16 */ - tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor; + tmp = 1.0f / ctx->Line.StippleFactor; tmpi = tmp * (1<<16); OUT_BATCH(tmpi << 15 | ctx->Line.StippleFactor); - } - else { + } else { /* in U1.13 */ - tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor; + tmp = 1.0f / ctx->Line.StippleFactor; tmpi = tmp * (1<<13); OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor); } @@ -856,7 +855,6 @@ const struct brw_tracked_state brw_line_stipple = { .emit = upload_line_stipple }; - void brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline) { @@ -872,11 +870,9 @@ brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline) ADVANCE_BATCH(); } - -/*********************************************************************** +/** * Misc invariant state packets */ - void brw_upload_invariant_state(struct brw_context *brw) { @@ -930,7 +926,8 @@ const struct brw_tracked_state brw_invariant_state = { * surface state objects, but not the surfaces that the surface state * objects point to. */ -static void upload_state_base_address( struct brw_context *brw ) +static void +upload_state_base_address(struct brw_context *brw) { /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index e7e16b6686a..79e31d86759 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -27,19 +27,27 @@ #include "program/prog_to_nir.h" static void -nir_optimize(nir_shader *nir) +nir_optimize(nir_shader *nir, bool is_scalar) { bool progress; do { progress = false; nir_lower_vars_to_ssa(nir); nir_validate_shader(nir); - nir_lower_alu_to_scalar(nir); - nir_validate_shader(nir); + + if (is_scalar) { + nir_lower_alu_to_scalar(nir); + nir_validate_shader(nir); + } + progress |= nir_copy_prop(nir); nir_validate_shader(nir); - nir_lower_phis_to_scalar(nir); - nir_validate_shader(nir); + + if (is_scalar) { + nir_lower_phis_to_scalar(nir); + nir_validate_shader(nir); + } + progress |= nir_copy_prop(nir); nir_validate_shader(nir); progress |= nir_opt_dce(nir); @@ -57,33 +65,12 @@ nir_optimize(nir_shader *nir) } while (progress); } -static bool -count_nir_instrs_in_block(nir_block *block, void *state) -{ - int *count = (int *) state; - nir_foreach_instr(block, instr) { - *count = *count + 1; - } - return true; -} - -static int -count_nir_instrs(nir_shader *nir) -{ - int count = 0; - nir_foreach_overload(nir, overload) { - if (!overload->impl) - continue; - nir_foreach_block(overload->impl, count_nir_instrs_in_block, &count); - } - return count; -} - nir_shader * brw_create_nir(struct brw_context *brw, const struct gl_shader_program *shader_prog, const struct gl_program *prog, - gl_shader_stage stage) + gl_shader_stage stage, + bool is_scalar) { struct gl_context *ctx = &brw->ctx; const nir_shader_compiler_options *options = @@ -100,16 +87,15 @@ brw_create_nir(struct brw_context *brw, } nir_validate_shader(nir); - brw_process_nir(nir, brw->intelScreen->devinfo, shader_prog, stage); + brw_process_nir(nir, brw->intelScreen->devinfo, shader_prog, stage, is_scalar); static GLuint msg_id = 0; _mesa_gl_debug(&brw->ctx, &msg_id, MESA_DEBUG_SOURCE_SHADER_COMPILER, MESA_DEBUG_TYPE_OTHER, MESA_DEBUG_SEVERITY_NOTIFICATION, - "%s NIR shader: %d inst\n", - _mesa_shader_stage_to_abbrev(stage), - count_nir_instrs(nir)); + "%s NIR shader:\n", + _mesa_shader_stage_to_abbrev(stage)); return nir; } @@ -118,7 +104,7 @@ void brw_process_nir(nir_shader *nir, const struct brw_device_info *devinfo, const struct gl_shader_program *shader_prog, - gl_shader_stage stage) + gl_shader_stage stage, bool is_scalar) { bool debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage); @@ -134,22 +120,33 @@ brw_process_nir(nir_shader *nir, nir_split_var_copies(nir); nir_validate_shader(nir); - nir_optimize(nir); + nir_optimize(nir, is_scalar); /* Lower a bunch of stuff */ nir_lower_var_copies(nir); nir_validate_shader(nir); /* Get rid of split copies */ - nir_optimize(nir); + nir_optimize(nir, is_scalar); + + if (is_scalar) { + nir_assign_var_locations_direct_first(nir, &nir->uniforms, + &nir->num_direct_uniforms, + &nir->num_uniforms, + is_scalar); + nir_assign_var_locations(&nir->outputs, &nir->num_outputs, is_scalar); + } else { + nir_assign_var_locations(&nir->uniforms, + &nir->num_uniforms, + is_scalar); - nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms, - &nir->num_direct_uniforms, - &nir->num_uniforms); - nir_assign_var_locations_scalar(&nir->inputs, &nir->num_inputs); - nir_assign_var_locations_scalar(&nir->outputs, &nir->num_outputs); + foreach_list_typed(nir_variable, var, node, &nir->outputs) + var->data.driver_location = var->data.location; + } + nir_assign_var_locations(&nir->inputs, &nir->num_inputs, is_scalar); + + nir_lower_io(nir, is_scalar); - nir_lower_io(nir); nir_validate_shader(nir); nir_remove_dead_variables(nir); @@ -168,7 +165,7 @@ brw_process_nir(nir_shader *nir, nir_lower_atomics(nir); nir_validate_shader(nir); - nir_optimize(nir); + nir_optimize(nir, is_scalar); if (devinfo->gen >= 6) { /* Try and fuse multiply-adds */ @@ -201,9 +198,14 @@ brw_process_nir(nir_shader *nir, nir_print_shader(nir, stderr); } - nir_convert_from_ssa(nir); + nir_convert_from_ssa(nir, is_scalar); nir_validate_shader(nir); + if (!is_scalar) { + nir_lower_vec_to_movs(nir); + nir_validate_shader(nir); + } + /* This is the last pass we run before we start emitting stuff. It * determines when we need to insert boolean resolves on Gen <= 5. We * run it last because it stashes data in instr->pass_flags and we don't @@ -220,3 +222,42 @@ brw_process_nir(nir_shader *nir, nir_print_shader(nir, stderr); } } + +enum brw_reg_type +brw_type_for_nir_type(nir_alu_type type) +{ + switch (type) { + case nir_type_unsigned: + return BRW_REGISTER_TYPE_UD; + case nir_type_bool: + case nir_type_int: + return BRW_REGISTER_TYPE_D; + case nir_type_float: + return BRW_REGISTER_TYPE_F; + default: + unreachable("unknown type"); + } + + return BRW_REGISTER_TYPE_F; +} + +/* Returns the glsl_base_type corresponding to a nir_alu_type. + * This is used by both brw_vec4_nir and brw_fs_nir. + */ +enum glsl_base_type +brw_glsl_base_type_for_nir_type(nir_alu_type type) +{ + switch (type) { + case nir_type_float: + return GLSL_TYPE_FLOAT; + + case nir_type_int: + return GLSL_TYPE_INT; + + case nir_type_unsigned: + return GLSL_TYPE_UINT; + + default: + unreachable("bad type"); + } +} diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h index 8487cef0901..5a1358890cc 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.h +++ b/src/mesa/drivers/dri/i965/brw_nir.h @@ -24,6 +24,7 @@ #pragma once #include "brw_context.h" +#include "brw_reg.h" #include "glsl/nir/nir.h" #ifdef __cplusplus @@ -77,13 +78,18 @@ void brw_nir_analyze_boolean_resolves(nir_shader *nir); nir_shader *brw_create_nir(struct brw_context *brw, const struct gl_shader_program *shader_prog, const struct gl_program *prog, - gl_shader_stage stage); + gl_shader_stage stage, + bool is_scalar); + +enum brw_reg_type brw_type_for_nir_type(nir_alu_type type); + +enum glsl_base_type brw_glsl_base_type_for_nir_type(nir_alu_type type); void brw_process_nir(nir_shader *nir, const struct brw_device_info *devinfo, const struct gl_shader_program *shader_prog, - gl_shader_stage stage); + gl_shader_stage stage, bool is_scalar); #ifdef __cplusplus } diff --git a/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c b/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c index f0b018cf84a..c995d2b7e2d 100644 --- a/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c +++ b/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c @@ -43,8 +43,8 @@ static uint8_t get_resolve_status_for_src(nir_src *src) { - nir_instr *src_instr = nir_src_get_parent_instr(src); - if (src_instr) { + if (src->is_ssa) { + nir_instr *src_instr = src->ssa->parent_instr; uint8_t resolve_status = src_instr->pass_flags & BRW_NIR_BOOLEAN_MASK; /* If the source instruction needs resolve, then from the perspective @@ -66,8 +66,8 @@ get_resolve_status_for_src(nir_src *src) static bool src_mark_needs_resolve(nir_src *src, void *void_state) { - nir_instr *src_instr = nir_src_get_parent_instr(src); - if (src_instr) { + if (src->is_ssa) { + nir_instr *src_instr = src->ssa->parent_instr; uint8_t resolve_status = src_instr->pass_flags & BRW_NIR_BOOLEAN_MASK; /* If the source instruction is unresolved, then mark it as needing @@ -109,28 +109,27 @@ analyze_boolean_resolves_block(nir_block *block, void *void_state) uint8_t resolve_status; nir_alu_instr *alu = nir_instr_as_alu(instr); switch (alu->op) { - case nir_op_flt: - case nir_op_ilt: - case nir_op_ult: - case nir_op_fge: - case nir_op_ige: - case nir_op_uge: - case nir_op_feq: - case nir_op_ieq: - case nir_op_fne: - case nir_op_ine: - case nir_op_f2b: - case nir_op_i2b: - /* This instruction will turn into a CMP when we actually emit - * so the result will have to be resolved before it can be used. + case nir_op_bany2: + case nir_op_bany3: + case nir_op_bany4: + case nir_op_ball_fequal2: + case nir_op_ball_iequal2: + case nir_op_ball_fequal3: + case nir_op_ball_iequal3: + case nir_op_ball_fequal4: + case nir_op_ball_iequal4: + case nir_op_bany_fnequal2: + case nir_op_bany_inequal2: + case nir_op_bany_fnequal3: + case nir_op_bany_inequal3: + case nir_op_bany_fnequal4: + case nir_op_bany_inequal4: + /* These are only implemented by the vec4 backend and its + * implementation emits resolved booleans. At some point in the + * future, this may change and we'll have to remove some of the + * above cases. */ - resolve_status = BRW_NIR_BOOLEAN_UNRESOLVED; - - /* Even though the destination is allowed to be left unresolved, - * the sources are treated as regular integers or floats so - * they need to be resolved. - */ - nir_foreach_src(instr, src_mark_needs_resolve, NULL); + resolve_status = BRW_NIR_BOOLEAN_NO_RESOLVE; break; case nir_op_imov: @@ -169,14 +168,28 @@ analyze_boolean_resolves_block(nir_block *block, void *void_state) } default: - resolve_status = BRW_NIR_NON_BOOLEAN; + if (nir_op_infos[alu->op].output_type == nir_type_bool) { + /* This instructions will turn into a CMP when we actually emit + * them so the result will have to be resolved before it can be + * used. + */ + resolve_status = BRW_NIR_BOOLEAN_UNRESOLVED; + + /* Even though the destination is allowed to be left + * unresolved, the sources are treated as regular integers or + * floats so they need to be resolved. + */ + nir_foreach_src(instr, src_mark_needs_resolve, NULL); + } else { + resolve_status = BRW_NIR_NON_BOOLEAN; + } } - /* If the destination is SSA-like, go ahead allow unresolved booleans. + /* If the destination is SSA, go ahead allow unresolved booleans. * If the destination register doesn't have a well-defined parent_instr * we need to resolve immediately. */ - if (alu->dest.dest.reg.reg->parent_instr == NULL && + if (!alu->dest.dest.is_ssa && resolve_status == BRW_NIR_BOOLEAN_UNRESOLVED) { resolve_status = BRW_NIR_BOOLEAN_NEEDS_RESOLVE; } diff --git a/src/mesa/drivers/dri/i965/brw_performance_monitor.c b/src/mesa/drivers/dri/i965/brw_performance_monitor.c index 2c8cd491a8e..7e90e8a8fa1 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_monitor.c +++ b/src/mesa/drivers/dri/i965/brw_performance_monitor.c @@ -581,7 +581,7 @@ snapshot_statistics_registers(struct brw_context *brw, const int group = PIPELINE_STATS_COUNTERS; const int num_counters = ctx->PerfMonitor.Groups[group].NumCounters; - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); for (int i = 0; i < num_counters; i++) { if (BITSET_TEST(monitor->base.ActiveCounters[group], i)) { @@ -687,7 +687,7 @@ stop_oa_counters(struct brw_context *brw) * The amount of batch space it takes to emit an MI_REPORT_PERF_COUNT snapshot, * including the required PIPE_CONTROL flushes. * - * Sandybridge is the worst case scenario: intel_batchbuffer_emit_mi_flush + * Sandybridge is the worst case scenario: brw_emit_mi_flush * expands to three PIPE_CONTROLs which are 4 DWords each. We have to flush * before and after MI_REPORT_PERF_COUNT, so multiply by two. Finally, add * the 3 DWords for MI_REPORT_PERF_COUNT itself. @@ -710,10 +710,10 @@ emit_mi_report_perf_count(struct brw_context *brw, /* Make sure the commands to take a snapshot fits in a single batch. */ intel_batchbuffer_require_space(brw, MI_REPORT_PERF_COUNT_BATCH_DWORDS * 4, RENDER_RING); - int batch_used = brw->batch.used; + int batch_used = USED_BATCH(brw->batch); /* Reports apparently don't always get written unless we flush first. */ - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); if (brw->gen == 5) { /* Ironlake requires two MI_REPORT_PERF_COUNT commands to write all @@ -751,10 +751,10 @@ emit_mi_report_perf_count(struct brw_context *brw, } /* Reports apparently don't always get written unless we flush after. */ - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); (void) batch_used; - assert(brw->batch.used - batch_used <= MI_REPORT_PERF_COUNT_BATCH_DWORDS * 4); + assert(USED_BATCH(brw->batch) - batch_used <= MI_REPORT_PERF_COUNT_BATCH_DWORDS * 4); } /** @@ -1386,7 +1386,7 @@ void brw_perf_monitor_new_batch(struct brw_context *brw) { assert(brw->batch.ring == RENDER_RING); - assert(brw->gen < 6 || brw->batch.used == 0); + assert(brw->gen < 6 || USED_BATCH(brw->batch) == 0); if (brw->perfmon.oa_users == 0) return; diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c new file mode 100644 index 00000000000..7ee3cb680f7 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c @@ -0,0 +1,359 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_context.h" +#include "intel_batchbuffer.h" +#include "intel_fbo.h" +#include "intel_reg.h" + +/** + * According to the latest documentation, any PIPE_CONTROL with the + * "Command Streamer Stall" bit set must also have another bit set, + * with five different options: + * + * - Render Target Cache Flush + * - Depth Cache Flush + * - Stall at Pixel Scoreboard + * - Post-Sync Operation + * - Depth Stall + * + * I chose "Stall at Pixel Scoreboard" since we've used it effectively + * in the past, but the choice is fairly arbitrary. + */ +static void +gen8_add_cs_stall_workaround_bits(uint32_t *flags) +{ + uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_WRITE_IMMEDIATE | + PIPE_CONTROL_WRITE_DEPTH_COUNT | + PIPE_CONTROL_WRITE_TIMESTAMP | + PIPE_CONTROL_STALL_AT_SCOREBOARD | + PIPE_CONTROL_DEPTH_STALL; + + /* If we're doing a CS stall, and don't already have one of the + * workaround bits set, add "Stall at Pixel Scoreboard." + */ + if ((*flags & PIPE_CONTROL_CS_STALL) != 0 && (*flags & wa_bits) == 0) + *flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; +} + +/* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT: + * + * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with + * only read-cache-invalidate bit(s) set, must have a CS_STALL bit set." + * + * Note that the kernel does CS stalls between batches, so we only need + * to count them within a batch. + */ +static uint32_t +gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags) +{ + if (brw->gen == 7 && !brw->is_haswell) { + if (flags & PIPE_CONTROL_CS_STALL) { + /* If we're doing a CS stall, reset the counter and carry on. */ + brw->pipe_controls_since_last_cs_stall = 0; + return 0; + } + + /* If this is the fourth pipe control without a CS stall, do one now. */ + if (++brw->pipe_controls_since_last_cs_stall == 4) { + brw->pipe_controls_since_last_cs_stall = 0; + return PIPE_CONTROL_CS_STALL; + } + } + return 0; +} + +/** + * Emit a PIPE_CONTROL with various flushing flags. + * + * The caller is responsible for deciding what flags are appropriate for the + * given generation. + */ +void +brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags) +{ + if (brw->gen >= 8) { + gen8_add_cs_stall_workaround_bits(&flags); + + BEGIN_BATCH(6); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2)); + OUT_BATCH(flags); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } else if (brw->gen >= 6) { + flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags); + + BEGIN_BATCH(5); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2)); + OUT_BATCH(flags); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } +} + +/** + * Emit a PIPE_CONTROL that writes to a buffer object. + * + * \p flags should contain one of the following items: + * - PIPE_CONTROL_WRITE_IMMEDIATE + * - PIPE_CONTROL_WRITE_TIMESTAMP + * - PIPE_CONTROL_WRITE_DEPTH_COUNT + */ +void +brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags, + drm_intel_bo *bo, uint32_t offset, + uint32_t imm_lower, uint32_t imm_upper) +{ + if (brw->gen >= 8) { + gen8_add_cs_stall_workaround_bits(&flags); + + BEGIN_BATCH(6); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2)); + OUT_BATCH(flags); + OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + offset); + OUT_BATCH(imm_lower); + OUT_BATCH(imm_upper); + ADVANCE_BATCH(); + } else if (brw->gen >= 6) { + flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags); + + /* PPGTT/GGTT is selected by DW2 bit 2 on Sandybridge, but DW1 bit 24 + * on later platforms. We always use PPGTT on Gen7+. + */ + unsigned gen6_gtt = brw->gen == 6 ? PIPE_CONTROL_GLOBAL_GTT_WRITE : 0; + + BEGIN_BATCH(5); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2)); + OUT_BATCH(flags); + OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + gen6_gtt | offset); + OUT_BATCH(imm_lower); + OUT_BATCH(imm_upper); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2)); + OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + PIPE_CONTROL_GLOBAL_GTT_WRITE | offset); + OUT_BATCH(imm_lower); + OUT_BATCH(imm_upper); + ADVANCE_BATCH(); + } +} + +/** + * Restriction [DevSNB, DevIVB]: + * + * Prior to changing Depth/Stencil Buffer state (i.e. any combination of + * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER, + * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall + * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth + * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by + * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set), + * unless SW can otherwise guarantee that the pipeline from WM onwards is + * already flushed (e.g., via a preceding MI_FLUSH). + */ +void +brw_emit_depth_stall_flushes(struct brw_context *brw) +{ + assert(brw->gen >= 6 && brw->gen <= 9); + + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL); + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_CACHE_FLUSH); + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL); +} + +/** + * From the Ivybridge PRM, Volume 2 Part 1, Section 3.2 (VS Stage Input): + * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth + * stall needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS, + * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS, + * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL needs + * to be sent before any combination of VS associated 3DSTATE." + */ +void +gen7_emit_vs_workaround_flush(struct brw_context *brw) +{ + assert(brw->gen == 7); + brw_emit_pipe_control_write(brw, + PIPE_CONTROL_WRITE_IMMEDIATE + | PIPE_CONTROL_DEPTH_STALL, + brw->workaround_bo, 0, + 0, 0); +} + + +/** + * Emit a PIPE_CONTROL command for gen7 with the CS Stall bit set. + */ +void +gen7_emit_cs_stall_flush(struct brw_context *brw) +{ + brw_emit_pipe_control_write(brw, + PIPE_CONTROL_CS_STALL + | PIPE_CONTROL_WRITE_IMMEDIATE, + brw->workaround_bo, 0, + 0, 0); +} + + +/** + * Emits a PIPE_CONTROL with a non-zero post-sync operation, for + * implementing two workarounds on gen6. From section 1.4.7.1 + * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: + * + * [DevSNB-C+{W/A}] Before any depth stall flush (including those + * produced by non-pipelined state commands), software needs to first + * send a PIPE_CONTROL with no bits set except Post-Sync Operation != + * 0. + * + * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable + * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. + * + * And the workaround for these two requires this workaround first: + * + * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent + * BEFORE the pipe-control with a post-sync op and no write-cache + * flushes. + * + * And this last workaround is tricky because of the requirements on + * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM + * volume 2 part 1: + * + * "1 of the following must also be set: + * - Render Target Cache Flush Enable ([12] of DW1) + * - Depth Cache Flush Enable ([0] of DW1) + * - Stall at Pixel Scoreboard ([1] of DW1) + * - Depth Stall ([13] of DW1) + * - Post-Sync Operation ([13] of DW1) + * - Notify Enable ([8] of DW1)" + * + * The cache flushes require the workaround flush that triggered this + * one, so we can't use it. Depth stall would trigger the same. + * Post-sync nonzero is what triggered this second workaround, so we + * can't use that one either. Notify enable is IRQs, which aren't + * really our business. That leaves only stall at scoreboard. + */ +void +brw_emit_post_sync_nonzero_flush(struct brw_context *brw) +{ + brw_emit_pipe_control_flush(brw, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_STALL_AT_SCOREBOARD); + + brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_IMMEDIATE, + brw->workaround_bo, 0, 0, 0); +} + +/* Emit a pipelined flush to either flush render and texture cache for + * reading from a FBO-drawn texture, or flush so that frontbuffer + * render appears on the screen in DRI1. + * + * This is also used for the always_flush_cache driconf debug option. + */ +void +brw_emit_mi_flush(struct brw_context *brw) +{ + if (brw->batch.ring == BLT_RING && brw->gen >= 6) { + BEGIN_BATCH_BLT(4); + OUT_BATCH(MI_FLUSH_DW); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } else { + int flags = PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_RENDER_TARGET_FLUSH; + if (brw->gen >= 6) { + if (brw->gen == 9) { + /* Hardware workaround: SKL + * + * Emit Pipe Control with all bits set to zero before emitting + * a Pipe Control with VF Cache Invalidate set. + */ + brw_emit_pipe_control_flush(brw, 0); + } + + flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_VF_CACHE_INVALIDATE | + PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | + PIPE_CONTROL_CS_STALL; + + if (brw->gen == 6) { + /* Hardware workaround: SNB B-Spec says: + * + * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache + * Flush Enable =1, a PIPE_CONTROL with any non-zero + * post-sync-op is required. + */ + brw_emit_post_sync_nonzero_flush(brw); + } + } + brw_emit_pipe_control_flush(brw, flags); + } + + brw_render_cache_set_clear(brw); +} + +int +brw_init_pipe_control(struct brw_context *brw, + const struct brw_device_info *devinfo) +{ + if (devinfo->gen < 6) + return 0; + + /* We can't just use brw_state_batch to get a chunk of space for + * the gen6 workaround because it involves actually writing to + * the buffer, and the kernel doesn't let us write to the batch. + */ + brw->workaround_bo = drm_intel_bo_alloc(brw->bufmgr, + "pipe_control workaround", + 4096, 4096); + if (brw->workaround_bo == NULL) + return -ENOMEM; + + brw->pipe_controls_since_last_cs_stall = 0; + + return 0; +} + +void +brw_fini_pipe_control(struct brw_context *brw) +{ + drm_intel_bo_unreference(brw->workaround_bo); +} diff --git a/src/mesa/drivers/dri/i965/brw_primitive_restart.c b/src/mesa/drivers/dri/i965/brw_primitive_restart.c index 2c7a7e8b8dd..6ed79d7cb75 100644 --- a/src/mesa/drivers/dri/i965/brw_primitive_restart.c +++ b/src/mesa/drivers/dri/i965/brw_primitive_restart.c @@ -161,7 +161,8 @@ brw_handle_primitive_restart(struct gl_context *ctx, /* Cut index should work for primitive restart, so use it */ brw->prim_restart.enable_cut_index = true; - brw_draw_prims(ctx, prims, nr_prims, ib, GL_FALSE, -1, -1, NULL, indirect); + brw_draw_prims(ctx, prims, nr_prims, ib, GL_FALSE, -1, -1, NULL, 0, + indirect); brw->prim_restart.enable_cut_index = false; } else { /* Not all the primitive draw modes are supported by the cut index, diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index ea128ccb670..5a54cd39076 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -143,7 +143,7 @@ brwProgramStringNotify(struct gl_context *ctx, brw_add_texrect_params(prog); if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions) { - prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT); + prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true); } brw_fs_precompile(ctx, NULL, prog); @@ -169,7 +169,8 @@ brwProgramStringNotify(struct gl_context *ctx, brw_add_texrect_params(prog); if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions) { - prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX); + prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX, + brw->intelScreen->compiler->scalar_vs); } brw_vs_precompile(ctx, NULL, prog); @@ -196,7 +197,7 @@ brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers) unsigned bits = (PIPE_CONTROL_DATA_CACHE_INVALIDATE | PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_CS_STALL); - assert(brw->gen >= 7 && brw->gen <= 8); + assert(brw->gen >= 7 && brw->gen <= 9); if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT | GL_ELEMENT_ARRAY_BARRIER_BIT | @@ -574,10 +575,13 @@ brw_dump_ir(const char *stage, struct gl_shader_program *shader_prog, struct gl_shader *shader, struct gl_program *prog) { if (shader_prog) { - fprintf(stderr, - "GLSL IR for native %s shader %d:\n", stage, shader_prog->Name); - _mesa_print_ir(stderr, shader->ir, NULL); - fprintf(stderr, "\n\n"); + if (shader->ir) { + fprintf(stderr, + "GLSL IR for native %s shader %d:\n", + stage, shader_prog->Name); + _mesa_print_ir(stderr, shader->ir, NULL); + fprintf(stderr, "\n\n"); + } } else { fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n", stage, prog->Id, stage); diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c index aea4d9b77d3..d6b012c392e 100644 --- a/src/mesa/drivers/dri/i965/brw_queryobj.c +++ b/src/mesa/drivers/dri/i965/brw_queryobj.c @@ -497,13 +497,22 @@ brw_get_timestamp(struct gl_context *ctx) struct brw_context *brw = brw_context(ctx); uint64_t result = 0; - drm_intel_reg_read(brw->bufmgr, TIMESTAMP, &result); + switch (brw->intelScreen->hw_has_timestamp) { + case 3: /* New kernel, always full 36bit accuracy */ + drm_intel_reg_read(brw->bufmgr, TIMESTAMP | 1, &result); + break; + case 2: /* 64bit kernel, result is left-shifted by 32bits, losing 4bits */ + drm_intel_reg_read(brw->bufmgr, TIMESTAMP, &result); + result = result >> 32; + break; + case 1: /* 32bit kernel, result is 36bit wide but may be inaccurate! */ + drm_intel_reg_read(brw->bufmgr, TIMESTAMP, &result); + break; + } /* See logic in brw_queryobj_get_results() */ - result = result >> 32; result *= 80; result &= (1ull << 36) - 1; - return result; } diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h index c8b134103bb..31806f769bd 100644 --- a/src/mesa/drivers/dri/i965/brw_reg.h +++ b/src/mesa/drivers/dri/i965/brw_reg.h @@ -853,7 +853,7 @@ static inline struct brw_reg spread(struct brw_reg reg, unsigned s) { if (s) { - assert(is_power_of_two(s)); + assert(_mesa_is_pow_two(s)); if (reg.hstride) reg.hstride += cvt(s) - 1; @@ -950,6 +950,12 @@ brw_set_writemask(struct brw_reg reg, unsigned mask) return reg; } +static inline unsigned +brw_writemask_for_size(unsigned n) +{ + return (1 << n) - 1; +} + static inline struct brw_reg negate(struct brw_reg reg) { diff --git a/src/mesa/drivers/dri/i965/brw_sampler_state.c b/src/mesa/drivers/dri/i965/brw_sampler_state.c index 22ccbfe8461..2021bb3b460 100644 --- a/src/mesa/drivers/dri/i965/brw_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_sampler_state.c @@ -425,11 +425,11 @@ brw_update_sampler_state(struct brw_context *brw, /* Enable anisotropic filtering if desired. */ unsigned max_anisotropy = BRW_ANISORATIO_2; - if (sampler->MaxAnisotropy > 1.0) { + if (sampler->MaxAnisotropy > 1.0f) { min_filter = BRW_MAPFILTER_ANISOTROPIC; mag_filter = BRW_MAPFILTER_ANISOTROPIC; - if (sampler->MaxAnisotropy > 2.0) { + if (sampler->MaxAnisotropy > 2.0f) { max_anisotropy = MIN2((sampler->MaxAnisotropy - 2) / 2, BRW_ANISORATIO_16); } diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index ee0add5d765..b49961fff68 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -1314,8 +1314,8 @@ fs_instruction_scheduler::choose_instruction_to_schedule() * single-result send is probably actually reducing register * pressure. */ - if (inst->regs_written <= inst->dst.width / 8 && - chosen_inst->regs_written > chosen_inst->dst.width / 8) { + if (inst->regs_written <= inst->exec_size / 8 && + chosen_inst->regs_written > chosen_inst->exec_size / 8) { chosen = n; continue; } else if (inst->regs_written > chosen_inst->regs_written) { diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 5d9892214a9..b126f82ebbf 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -45,7 +45,7 @@ static void upload_sf_vp(struct brw_context *brw) struct gl_context *ctx = &brw->ctx; struct brw_sf_viewport *sfv; GLfloat y_scale, y_bias; - double scale[3], translate[3]; + float scale[3], translate[3]; const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); sfv = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE, @@ -220,7 +220,7 @@ static void upload_sf_unit( struct brw_context *brw ) /* _NEW_LINE */ sf->sf6.line_width = - CLAMP(ctx->Line.Width, 1.0, ctx->Const.MaxLineWidth) * (1<<1); + CLAMP(ctx->Line.Width, 1.0f, ctx->Const.MaxLineWidth) * (1<<1); sf->sf6.line_endcap_aa_region_width = 1; if (ctx->Line.SmoothFlag) @@ -259,9 +259,10 @@ static void upload_sf_unit( struct brw_context *brw ) /* _NEW_POINT */ sf->sf7.sprite_point = ctx->Point.PointSprite; - sf->sf7.point_size = CLAMP(rint(CLAMP(ctx->Point.Size, - ctx->Point.MinSize, - ctx->Point.MaxSize)), 1, 255) * (1<<3); + sf->sf7.point_size = CLAMP(rintf(CLAMP(ctx->Point.Size, + ctx->Point.MinSize, + ctx->Point.MaxSize)), 1.0f, 255.0f) * + (1<<3); /* _NEW_PROGRAM | _NEW_POINT */ sf->sf7.use_point_size_state = !(ctx->VertexProgram.PointSizeEnabled || ctx->Point._Attenuated); diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 06393c8ff2b..67b8dde7cc8 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -113,22 +113,32 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) (i == MESA_SHADER_FRAGMENT); compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false; compiler->glsl_compiler_options[i].LowerClipDistance = true; + + /* !ARB_gpu_shader5 */ + if (devinfo->gen < 7) + compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true; } compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = true; compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].OptimizeForAOS = true; - if (compiler->scalar_vs) { - /* If we're using the scalar backend for vertex shaders, we need to - * configure these accordingly. - */ - compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true; - compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true; - compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = false; + if (compiler->scalar_vs || brw_env_var_as_boolean("INTEL_USE_NIR", false)) { + if (compiler->scalar_vs) { + /* If we're using the scalar backend for vertex shaders, we need to + * configure these accordingly. + */ + compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true; + compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true; + compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = false; + } compiler->glsl_compiler_options[MESA_SHADER_VERTEX].NirOptions = nir_options; } + if (brw_env_var_as_boolean("INTEL_USE_NIR", false)) { + compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].NirOptions = nir_options; + } + compiler->glsl_compiler_options[MESA_SHADER_FRAGMENT].NirOptions = nir_options; compiler->glsl_compiler_options[MESA_SHADER_COMPUTE].NirOptions = nir_options; @@ -229,7 +239,8 @@ brw_lower_packing_builtins(struct brw_context *brw, } static void -process_glsl_ir(struct brw_context *brw, +process_glsl_ir(gl_shader_stage stage, + struct brw_context *brw, struct gl_shader_program *shader_prog, struct gl_shader *shader) { @@ -255,7 +266,9 @@ process_glsl_ir(struct brw_context *brw, EXP_TO_EXP2 | LOG_TO_LOG2 | bitfield_insert | - LDEXP_TO_ARITH); + LDEXP_TO_ARITH | + CARRY_TO_ARITH | + BORROW_TO_ARITH); /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this, * if-statements need to be flattened. @@ -275,15 +288,17 @@ process_glsl_ir(struct brw_context *brw, lower_quadop_vector(shader->ir, false); bool lowered_variable_indexing = - lower_variable_index_to_cond_assign(shader->ir, + lower_variable_index_to_cond_assign((gl_shader_stage)stage, + shader->ir, options->EmitNoIndirectInput, options->EmitNoIndirectOutput, options->EmitNoIndirectTemp, options->EmitNoIndirectUniform); if (unlikely(brw->perf_debug && lowered_variable_indexing)) { - perf_debug("Unsupported form of variable indexing in FS; falling " - "back to very inefficient code generation\n"); + perf_debug("Unsupported form of variable indexing in %s; falling " + "back to very inefficient code generation\n", + _mesa_shader_stage_to_abbrev(shader->Stage)); } lower_ubo_reference(shader, shader->ir); @@ -308,7 +323,7 @@ process_glsl_ir(struct brw_context *brw, } while (progress); if (options->NirOptions != NULL) - lower_output_reads(shader->ir); + lower_output_reads(stage, shader->ir); validate_ir_tree(shader->ir); @@ -352,7 +367,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) _mesa_copy_linked_program_data((gl_shader_stage) stage, shProg, prog); - process_glsl_ir(brw, shProg, shader); + process_glsl_ir((gl_shader_stage) stage, brw, shProg, shader); /* Make a pass over the IR to add state references for any built-in * uniforms that are used. This has to be done now (during linking). @@ -387,8 +402,10 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) brw_add_texrect_params(prog); - if (options->NirOptions) - prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage); + if (options->NirOptions) { + prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage, + is_scalar_shader_stage(brw, stage)); + } _mesa_reference_program(ctx, &prog, NULL); } @@ -422,6 +439,7 @@ brw_type_for_base_type(const struct glsl_type *type) return BRW_REGISTER_TYPE_F; case GLSL_TYPE_INT: case GLSL_TYPE_BOOL: + case GLSL_TYPE_SUBROUTINE: return BRW_REGISTER_TYPE_D; case GLSL_TYPE_UINT: return BRW_REGISTER_TYPE_UD; @@ -528,6 +546,8 @@ brw_instruction_name(enum opcode op) return opcode_descs[op].name; case FS_OPCODE_FB_WRITE: return "fb_write"; + case FS_OPCODE_FB_WRITE_LOGICAL: + return "fb_write_logical"; case FS_OPCODE_BLORP_FB_WRITE: return "blorp_fb_write"; case FS_OPCODE_REP_FB_WRITE: @@ -556,43 +576,80 @@ brw_instruction_name(enum opcode op) case SHADER_OPCODE_TEX: return "tex"; + case SHADER_OPCODE_TEX_LOGICAL: + return "tex_logical"; case SHADER_OPCODE_TXD: return "txd"; + case SHADER_OPCODE_TXD_LOGICAL: + return "txd_logical"; case SHADER_OPCODE_TXF: return "txf"; + case SHADER_OPCODE_TXF_LOGICAL: + return "txf_logical"; case SHADER_OPCODE_TXL: return "txl"; + case SHADER_OPCODE_TXL_LOGICAL: + return "txl_logical"; case SHADER_OPCODE_TXS: return "txs"; + case SHADER_OPCODE_TXS_LOGICAL: + return "txs_logical"; case FS_OPCODE_TXB: return "txb"; + case FS_OPCODE_TXB_LOGICAL: + return "txb_logical"; case SHADER_OPCODE_TXF_CMS: return "txf_cms"; + case SHADER_OPCODE_TXF_CMS_LOGICAL: + return "txf_cms_logical"; case SHADER_OPCODE_TXF_UMS: return "txf_ums"; + case SHADER_OPCODE_TXF_UMS_LOGICAL: + return "txf_ums_logical"; case SHADER_OPCODE_TXF_MCS: return "txf_mcs"; + case SHADER_OPCODE_TXF_MCS_LOGICAL: + return "txf_mcs_logical"; case SHADER_OPCODE_LOD: return "lod"; + case SHADER_OPCODE_LOD_LOGICAL: + return "lod_logical"; case SHADER_OPCODE_TG4: return "tg4"; + case SHADER_OPCODE_TG4_LOGICAL: + return "tg4_logical"; case SHADER_OPCODE_TG4_OFFSET: return "tg4_offset"; + case SHADER_OPCODE_TG4_OFFSET_LOGICAL: + return "tg4_offset_logical"; + case SHADER_OPCODE_SHADER_TIME_ADD: return "shader_time_add"; case SHADER_OPCODE_UNTYPED_ATOMIC: return "untyped_atomic"; + case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: + return "untyped_atomic_logical"; case SHADER_OPCODE_UNTYPED_SURFACE_READ: return "untyped_surface_read"; + case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: + return "untyped_surface_read_logical"; case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: return "untyped_surface_write"; + case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: + return "untyped_surface_write_logical"; case SHADER_OPCODE_TYPED_ATOMIC: return "typed_atomic"; + case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: + return "typed_atomic_logical"; case SHADER_OPCODE_TYPED_SURFACE_READ: return "typed_surface_read"; + case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: + return "typed_surface_read_logical"; case SHADER_OPCODE_TYPED_SURFACE_WRITE: return "typed_surface_write"; + case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: + return "typed_surface_write_logical"; case SHADER_OPCODE_MEMORY_FENCE: return "memory_fence"; @@ -653,8 +710,6 @@ brw_instruction_name(enum opcode op) case FS_OPCODE_DISCARD_JUMP: return "discard_jump"; - case FS_OPCODE_SET_OMASK: - return "set_omask"; case FS_OPCODE_SET_SAMPLE_ID: return "set_sample_id"; case FS_OPCODE_SET_SIMD4X2_OFFSET: @@ -724,6 +779,8 @@ brw_instruction_name(enum opcode op) return "cs_terminate"; case SHADER_OPCODE_BARRIER: return "barrier"; + case SHADER_OPCODE_MULH: + return "mulh"; } unreachable("not reached"); @@ -942,6 +999,7 @@ backend_instruction::is_commutative() const case BRW_OPCODE_XOR: case BRW_OPCODE_ADD: case BRW_OPCODE_MUL: + case SHADER_OPCODE_MULH: return true; case BRW_OPCODE_SEL: /* MIN and MAX are commutative. */ @@ -1049,6 +1107,7 @@ backend_instruction::can_do_saturate() const case BRW_OPCODE_MATH: case BRW_OPCODE_MOV: case BRW_OPCODE_MUL: + case SHADER_OPCODE_MULH: case BRW_OPCODE_PLN: case BRW_OPCODE_RNDD: case BRW_OPCODE_RNDE: @@ -1147,10 +1206,14 @@ backend_instruction::has_side_effects() const { switch (opcode) { case SHADER_OPCODE_UNTYPED_ATOMIC: + case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: case SHADER_OPCODE_GEN4_SCRATCH_WRITE: case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: + case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: case SHADER_OPCODE_TYPED_ATOMIC: + case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: case SHADER_OPCODE_TYPED_SURFACE_WRITE: + case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: case SHADER_OPCODE_MEMORY_FENCE: case SHADER_OPCODE_URB_WRITE_SIMD8: case FS_OPCODE_FB_WRITE: @@ -1356,3 +1419,34 @@ backend_shader::assign_common_binding_table_offsets(uint32_t next_binding_table_ /* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */ } + +void +backend_shader::setup_image_uniform_values(const gl_uniform_storage *storage) +{ + const unsigned stage = _mesa_program_enum_to_shader_stage(prog->Target); + + for (unsigned i = 0; i < MAX2(storage->array_elements, 1); i++) { + const unsigned image_idx = storage->image[stage].index + i; + const brw_image_param *param = &stage_prog_data->image_param[image_idx]; + + /* Upload the brw_image_param structure. The order is expected to match + * the BRW_IMAGE_PARAM_*_OFFSET defines. + */ + setup_vector_uniform_values( + (const gl_constant_value *)¶m->surface_idx, 1); + setup_vector_uniform_values( + (const gl_constant_value *)param->offset, 2); + setup_vector_uniform_values( + (const gl_constant_value *)param->size, 3); + setup_vector_uniform_values( + (const gl_constant_value *)param->stride, 4); + setup_vector_uniform_values( + (const gl_constant_value *)param->tiling, 3); + setup_vector_uniform_values( + (const gl_constant_value *)param->swizzling, 2); + + brw_mark_surface_used( + stage_prog_data, + stage_prog_data->binding_table.image_start + image_idx); + } +} diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index b2c1a0b8d69..2cc97f24972 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -26,6 +26,7 @@ #include "brw_defines.h" #include "main/compiler.h" #include "glsl/ir.h" +#include "program/prog_parameter.h" #ifdef __cplusplus #include "brw_ir_allocator.h" @@ -268,6 +269,10 @@ public: void assign_common_binding_table_offsets(uint32_t next_binding_table_offset); virtual void invalidate_live_intervals() = 0; + + virtual void setup_vector_uniform_values(const gl_constant_value *values, + unsigned n) = 0; + void setup_image_uniform_values(const gl_uniform_storage *storage); }; uint32_t brw_texture_offset(int *offsets, unsigned num_components); diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 987672f8815..78a1f874b4e 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -72,8 +72,10 @@ extern const struct brw_tracked_state brw_vs_samplers; extern const struct brw_tracked_state brw_gs_samplers; extern const struct brw_tracked_state brw_vs_ubo_surfaces; extern const struct brw_tracked_state brw_vs_abo_surfaces; +extern const struct brw_tracked_state brw_vs_image_surfaces; extern const struct brw_tracked_state brw_gs_ubo_surfaces; extern const struct brw_tracked_state brw_gs_abo_surfaces; +extern const struct brw_tracked_state brw_gs_image_surfaces; extern const struct brw_tracked_state brw_vs_unit; extern const struct brw_tracked_state brw_gs_prog; extern const struct brw_tracked_state brw_wm_prog; @@ -84,7 +86,9 @@ extern const struct brw_tracked_state brw_gs_binding_table; extern const struct brw_tracked_state brw_vs_binding_table; extern const struct brw_tracked_state brw_wm_ubo_surfaces; extern const struct brw_tracked_state brw_wm_abo_surfaces; +extern const struct brw_tracked_state brw_wm_image_surfaces; extern const struct brw_tracked_state brw_cs_abo_surfaces; +extern const struct brw_tracked_state brw_cs_image_surfaces; extern const struct brw_tracked_state brw_wm_unit; extern const struct brw_tracked_state brw_interpolation_map; @@ -121,7 +125,6 @@ extern const struct brw_tracked_state gen6_wm_state; extern const struct brw_tracked_state gen7_depthbuffer; extern const struct brw_tracked_state gen7_clip_state; extern const struct brw_tracked_state gen7_disable_stages; -extern const struct brw_tracked_state gen7_gs_push_constants; extern const struct brw_tracked_state gen7_gs_state; extern const struct brw_tracked_state gen7_ps_state; extern const struct brw_tracked_state gen7_push_constant_space; @@ -132,6 +135,7 @@ extern const struct brw_tracked_state gen7_sol_state; extern const struct brw_tracked_state gen7_urb; extern const struct brw_tracked_state gen7_vs_state; extern const struct brw_tracked_state gen7_wm_state; +extern const struct brw_tracked_state gen7_hw_binding_tables; extern const struct brw_tracked_state haswell_cut_index; extern const struct brw_tracked_state gen8_blend_state; extern const struct brw_tracked_state gen8_disable_stages; @@ -266,15 +270,6 @@ void brw_update_renderbuffer_surfaces(struct brw_context *brw, uint32_t render_target_start, uint32_t *surf_offset); -/* gen7_wm_state.c */ -void -gen7_upload_ps_state(struct brw_context *brw, - const struct gl_fragment_program *fp, - const struct brw_stage_state *stage_state, - const struct brw_wm_prog_data *prog_data, - bool enable_dual_src_blend, unsigned sample_mask, - unsigned fast_clear_op); - /* gen7_wm_surface_state.c */ uint32_t gen7_surface_tiling_mode(uint32_t tiling); uint32_t gen7_surface_msaa_bits(unsigned num_samples, enum intel_msaa_layout l); @@ -372,6 +367,20 @@ gen7_upload_constant_state(struct brw_context *brw, const struct brw_stage_state *stage_state, bool active, unsigned opcode); +void gen7_rs_control(struct brw_context *brw, int enable); + +void gen7_edit_hw_binding_table_entry(struct brw_context *brw, + gl_shader_stage stage, + uint32_t index, + uint32_t surf_offset); +void gen7_update_binding_table_from_array(struct brw_context *brw, + gl_shader_stage stage, + const uint32_t* binding_table, + int num_surfaces); +void gen7_enable_hw_binding_tables(struct brw_context *brw); +void gen7_disable_hw_binding_tables(struct brw_context *brw); +void gen7_reset_hw_bt_pool_offsets(struct brw_context *brw); + #ifdef __cplusplus } #endif diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index a405a80ef6e..d79e0ea00c7 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -87,7 +87,7 @@ brw_annotate_aub(struct brw_context *brw) drm_intel_aub_annotation annotations[annotation_count]; int a = 0; make_annotation(&annotations[a++], AUB_TRACE_TYPE_BATCH, 0, - 4*brw->batch.used); + 4 * USED_BATCH(brw->batch)); for (int i = brw->state_batch_count; i-- > 0; ) { uint32_t type = brw->state_batch_list[i].type; uint32_t start_offset = brw->state_batch_list[i].offset; @@ -136,7 +136,7 @@ __brw_state_batch(struct brw_context *brw, * space, then flush and try again. */ if (batch->state_batch_offset < size || - offset < 4*batch->used + batch->reserved_space) { + offset < 4 * USED_BATCH(*batch) + batch->reserved_space) { intel_batchbuffer_flush(brw); offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment); } diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index 24778d25379..5effb4c8829 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -200,36 +200,23 @@ brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size) } /** - * Attempts to find an item in the cache with identical data and aux - * data to use + * Attempts to find an item in the cache with identical data. */ -static bool -brw_try_upload_using_copy(struct brw_cache *cache, - struct brw_cache_item *result_item, - const void *data, - const void *aux) +static const struct brw_cache_item * +brw_lookup_prog(const struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, unsigned data_size) { - struct brw_context *brw = cache->brw; + const struct brw_context *brw = cache->brw; int i; - struct brw_cache_item *item; + const struct brw_cache_item *item; for (i = 0; i < cache->size; i++) { for (item = cache->items[i]; item; item = item->next) { - const void *item_aux = item->key + item->key_size; int ret; - if (item->cache_id != result_item->cache_id || - item->size != result_item->size || - item->aux_size != result_item->aux_size) { - continue; - } - - if (cache->aux_compare[result_item->cache_id]) { - if (!cache->aux_compare[result_item->cache_id](item_aux, aux)) - continue; - } else if (memcmp(item_aux, aux, item->aux_size) != 0) { + if (item->cache_id != cache_id || item->size != data_size) continue; - } if (!brw->has_llc) drm_intel_bo_map(cache->bo, false); @@ -239,27 +226,24 @@ brw_try_upload_using_copy(struct brw_cache *cache, if (ret) continue; - result_item->offset = item->offset; - - return true; + return item; } } - return false; + return NULL; } -static void -brw_upload_item_data(struct brw_cache *cache, - struct brw_cache_item *item, - const void *data) +static uint32_t +brw_alloc_item_data(struct brw_cache *cache, uint32_t size) { + uint32_t offset; struct brw_context *brw = cache->brw; /* Allocate space in the cache BO for our new program. */ - if (cache->next_offset + item->size > cache->bo->size) { + if (cache->next_offset + size > cache->bo->size) { uint32_t new_size = cache->bo->size * 2; - while (cache->next_offset + item->size > new_size) + while (cache->next_offset + size > new_size) new_size *= 2; brw_cache_new_bo(cache, new_size); @@ -273,10 +257,12 @@ brw_upload_item_data(struct brw_cache *cache, brw_cache_new_bo(cache, cache->bo->size); } - item->offset = cache->next_offset; + offset = cache->next_offset; /* Programs are always 64-byte aligned, so set up the next one now */ - cache->next_offset = ALIGN(item->offset + item->size, 64); + cache->next_offset = ALIGN(offset + size, 64); + + return offset; } void @@ -293,6 +279,8 @@ brw_upload_cache(struct brw_cache *cache, { struct brw_context *brw = cache->brw; struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); + const struct brw_cache_item *matching_data = + brw_lookup_prog(cache, cache_id, data, data_size); GLuint hash; void *tmp; @@ -304,15 +292,23 @@ brw_upload_cache(struct brw_cache *cache, hash = hash_key(item); item->hash = hash; - /* If we can find a matching prog/prog_data combo in the cache - * already, then reuse the existing stuff. This will mean not - * flagging CACHE_NEW_* when transitioning between the two - * equivalent hash keys. This is notably useful for programs - * generating shaders at runtime, where multiple shaders may - * compile to the thing in our backend. + /* If we can find a matching prog in the cache already, then reuse the + * existing stuff without creating new copy into the underlying buffer + * object. This is notably useful for programs generating shaders at + * runtime, where multiple shaders may compile to the same thing in our + * backend. */ - if (!brw_try_upload_using_copy(cache, item, data, aux)) { - brw_upload_item_data(cache, item, data); + if (matching_data) { + item->offset = matching_data->offset; + } else { + item->offset = brw_alloc_item_data(cache, data_size); + + /* Copy data to the buffer */ + if (brw->has_llc) { + memcpy((char *)cache->bo->virtual + item->offset, data, data_size); + } else { + drm_intel_bo_subdata(cache->bo, item->offset, data_size, data); + } } /* Set up the memory containing the key and aux_data */ @@ -323,7 +319,7 @@ brw_upload_cache(struct brw_cache *cache, item->key = tmp; - if (cache->n_items > cache->size * 1.5) + if (cache->n_items > cache->size * 1.5f) rehash(cache); hash %= cache->size; @@ -331,13 +327,6 @@ brw_upload_cache(struct brw_cache *cache, cache->items[hash] = item; cache->n_items++; - /* Copy data to the buffer */ - if (brw->has_llc) { - memcpy((char *) cache->bo->virtual + item->offset, data, data_size); - } else { - drm_intel_bo_subdata(cache->bo, item->offset, data_size, data); - } - *out_offset = item->offset; *(void **)out_aux = (void *)((char *)item->key + item->key_size); cache->brw->ctx.NewDriverState |= 1 << cache_id; diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 08d1ac28885..9de42ce8503 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -192,6 +192,12 @@ static const struct brw_tracked_state *gen7_render_atoms[] = &gen6_color_calc_state, /* must do before cc unit */ &gen6_depth_stencil_state, /* must do before cc unit */ + &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */ + + &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */ + &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */ + &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */ + &gen6_vs_push_constants, /* Before vs_state */ &gen6_gs_push_constants, /* Before gs_state */ &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */ @@ -251,6 +257,7 @@ static const struct brw_tracked_state *gen7_render_atoms[] = static const struct brw_tracked_state *gen7_compute_atoms[] = { &brw_state_base_address, + &brw_cs_image_surfaces, &brw_cs_abo_surfaces, &brw_cs_state, }; @@ -268,6 +275,12 @@ static const struct brw_tracked_state *gen8_render_atoms[] = &gen8_blend_state, &gen6_color_calc_state, + &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */ + + &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */ + &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */ + &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */ + &gen6_vs_push_constants, /* Before vs_state */ &gen6_gs_push_constants, /* Before gs_state */ &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */ @@ -334,6 +347,7 @@ static const struct brw_tracked_state *gen8_render_atoms[] = static const struct brw_tracked_state *gen8_compute_atoms[] = { &gen8_state_base_address, + &brw_cs_image_surfaces, &brw_cs_abo_surfaces, &brw_cs_state, }; @@ -349,7 +363,7 @@ brw_upload_initial_gpu_state(struct brw_context *brw) return; if (brw->gen == 6) - intel_emit_post_sync_nonzero_flush(brw); + brw_emit_post_sync_nonzero_flush(brw); brw_upload_invariant_state(brw); @@ -468,6 +482,7 @@ void brw_init_state( struct brw_context *brw ) ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER; ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER; ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER; + ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS; } @@ -581,6 +596,7 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_STATS_WM), DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER), DEFINE_BIT(BRW_NEW_ATOMIC_BUFFER), + DEFINE_BIT(BRW_NEW_IMAGE_UNITS), DEFINE_BIT(BRW_NEW_META_IN_PROGRESS), DEFINE_BIT(BRW_NEW_INTERPOLATION_MAP), DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION), @@ -710,7 +726,7 @@ brw_upload_pipeline_state(struct brw_context *brw, /* Emit Sandybridge workaround flushes on every primitive, for safety. */ if (brw->gen == 6) - intel_emit_post_sync_nonzero_flush(brw); + brw_emit_post_sync_nonzero_flush(brw); brw_upload_programs(brw, pipeline); merge_ctx_state(brw, &state); diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c index 05016067bba..a33fd88a026 100644 --- a/src/mesa/drivers/dri/i965/brw_surface_formats.c +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c @@ -813,3 +813,112 @@ brw_depth_format(struct brw_context *brw, mesa_format format) unreachable("Unexpected depth format."); } } + +mesa_format +brw_lower_mesa_image_format(const struct brw_device_info *devinfo, + mesa_format format) +{ + switch (format) { + /* These are never lowered. Up to BDW we'll have to fall back to untyped + * surface access for 128bpp formats. + */ + case MESA_FORMAT_RGBA_UINT32: + case MESA_FORMAT_RGBA_SINT32: + case MESA_FORMAT_RGBA_FLOAT32: + case MESA_FORMAT_R_UINT32: + case MESA_FORMAT_R_SINT32: + case MESA_FORMAT_R_FLOAT32: + return format; + + /* From HSW to BDW the only 64bpp format supported for typed access is + * RGBA_UINT16. IVB falls back to untyped. + */ + case MESA_FORMAT_RGBA_UINT16: + case MESA_FORMAT_RGBA_SINT16: + case MESA_FORMAT_RGBA_FLOAT16: + case MESA_FORMAT_RG_UINT32: + case MESA_FORMAT_RG_SINT32: + case MESA_FORMAT_RG_FLOAT32: + return (devinfo->gen >= 9 ? format : + devinfo->gen >= 8 || devinfo->is_haswell ? + MESA_FORMAT_RGBA_UINT16 : MESA_FORMAT_RG_UINT32); + + /* Up to BDW no SINT or FLOAT formats of less than 32 bits per component + * are supported. IVB doesn't support formats with more than one component + * for typed access. For 8 and 16 bpp formats IVB relies on the + * undocumented behavior that typed reads from R_UINT8 and R_UINT16 + * surfaces actually do a 32-bit misaligned read. The alternative would be + * to use two surface state entries with different formats for each image, + * one for reading (using R_UINT32) and another one for writing (using + * R_UINT8 or R_UINT16), but that would complicate the shaders we generate + * even more. + */ + case MESA_FORMAT_RGBA_UINT8: + case MESA_FORMAT_RGBA_SINT8: + return (devinfo->gen >= 9 ? format : + devinfo->gen >= 8 || devinfo->is_haswell ? + MESA_FORMAT_RGBA_UINT8 : MESA_FORMAT_R_UINT32); + + case MESA_FORMAT_RG_UINT16: + case MESA_FORMAT_RG_SINT16: + case MESA_FORMAT_RG_FLOAT16: + return (devinfo->gen >= 9 ? format : + devinfo->gen >= 8 || devinfo->is_haswell ? + MESA_FORMAT_RG_UINT16 : MESA_FORMAT_R_UINT32); + + case MESA_FORMAT_RG_UINT8: + case MESA_FORMAT_RG_SINT8: + return (devinfo->gen >= 9 ? format : + devinfo->gen >= 8 || devinfo->is_haswell ? + MESA_FORMAT_RG_UINT8 : MESA_FORMAT_R_UINT16); + + case MESA_FORMAT_R_UINT16: + case MESA_FORMAT_R_FLOAT16: + case MESA_FORMAT_R_SINT16: + return (devinfo->gen >= 9 ? format : MESA_FORMAT_R_UINT16); + + case MESA_FORMAT_R_UINT8: + case MESA_FORMAT_R_SINT8: + return (devinfo->gen >= 9 ? format : MESA_FORMAT_R_UINT8); + + /* Neither the 2/10/10/10 nor the 11/11/10 packed formats are supported + * by the hardware. + */ + case MESA_FORMAT_R10G10B10A2_UINT: + case MESA_FORMAT_R10G10B10A2_UNORM: + case MESA_FORMAT_R11G11B10_FLOAT: + return MESA_FORMAT_R_UINT32; + + /* No normalized fixed-point formats are supported by the hardware. */ + case MESA_FORMAT_RGBA_UNORM16: + case MESA_FORMAT_RGBA_SNORM16: + return (devinfo->gen >= 8 || devinfo->is_haswell ? + MESA_FORMAT_RGBA_UINT16 : MESA_FORMAT_RG_UINT32); + + case MESA_FORMAT_R8G8B8A8_UNORM: + case MESA_FORMAT_R8G8B8A8_SNORM: + return (devinfo->gen >= 8 || devinfo->is_haswell ? + MESA_FORMAT_RGBA_UINT8 : MESA_FORMAT_R_UINT32); + + case MESA_FORMAT_R16G16_UNORM: + case MESA_FORMAT_R16G16_SNORM: + return (devinfo->gen >= 8 || devinfo->is_haswell ? + MESA_FORMAT_RG_UINT16 : MESA_FORMAT_R_UINT32); + + case MESA_FORMAT_R8G8_UNORM: + case MESA_FORMAT_R8G8_SNORM: + return (devinfo->gen >= 8 || devinfo->is_haswell ? + MESA_FORMAT_RG_UINT8 : MESA_FORMAT_R_UINT16); + + case MESA_FORMAT_R_UNORM16: + case MESA_FORMAT_R_SNORM16: + return MESA_FORMAT_R_UINT16; + + case MESA_FORMAT_R_UNORM8: + case MESA_FORMAT_R_SNORM8: + return MESA_FORMAT_R_UINT8; + + default: + unreachable("Unknown image format"); + } +} diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 998d8c42770..b8b03932065 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -63,7 +63,7 @@ tr_mode_horizontal_texture_alignment(const struct brw_context *brw, int i = 0; /* Alignment computations below assume bpp >= 8 and a power of 2. */ - assert (bpp >= 8 && bpp <= 128 && is_power_of_two(bpp)); + assert (bpp >= 8 && bpp <= 128 && _mesa_is_pow_two(bpp)); switch(mt->target) { case GL_TEXTURE_1D: @@ -95,7 +95,7 @@ tr_mode_horizontal_texture_alignment(const struct brw_context *brw, ret_align = mt->tr_mode == INTEL_MIPTREE_TRMODE_YF ? align_yf[i] : align_ys[i]; - assert(is_power_of_two(mt->num_samples)); + assert(_mesa_is_pow_two(mt->num_samples)); switch (mt->num_samples) { case 2: @@ -199,7 +199,7 @@ tr_mode_vertical_texture_alignment(const struct brw_context *brw, mt->target != GL_TEXTURE_1D_ARRAY); /* Alignment computations below assume bpp >= 8 and a power of 2. */ - assert (bpp >= 8 && bpp <= 128 && is_power_of_two(bpp)) ; + assert (bpp >= 8 && bpp <= 128 && _mesa_is_pow_two(bpp)) ; switch(mt->target) { case GL_TEXTURE_2D: @@ -226,7 +226,7 @@ tr_mode_vertical_texture_alignment(const struct brw_context *brw, ret_align = mt->tr_mode == INTEL_MIPTREE_TRMODE_YF ? align_yf[i] : align_ys[i]; - assert(is_power_of_two(mt->num_samples)); + assert(_mesa_is_pow_two(mt->num_samples)); switch (mt->num_samples) { case 4: @@ -366,9 +366,8 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt) mt->total_width = mt->physical_width0; - if (mt->compressed) { - mt->total_width = ALIGN(mt->physical_width0, mt->align_w); - } + if (mt->compressed) + mt->total_width = ALIGN(mt->total_width, bw); /* May need to adjust width to accommodate the placement of * the 2nd mipmap. This occurs when the alignment @@ -433,9 +432,7 @@ brw_miptree_get_horizontal_slice_pitch(const struct brw_context *brw, const struct intel_mipmap_tree *mt, unsigned level) { - assert(brw->gen < 9); - - if (mt->target == GL_TEXTURE_3D || + if ((brw->gen < 9 && mt->target == GL_TEXTURE_3D) || (brw->gen == 4 && mt->target == GL_TEXTURE_CUBE_MAP)) { return ALIGN(minify(mt->physical_width0, level), mt->align_w); } else { @@ -615,8 +612,8 @@ brw_miptree_layout_texture_3d(struct brw_context *brw, */ static uint32_t brw_miptree_choose_tiling(struct brw_context *brw, - enum intel_miptree_tiling_mode requested, - const struct intel_mipmap_tree *mt) + const struct intel_mipmap_tree *mt, + uint32_t layout_flags) { if (mt->format == MESA_FORMAT_S_UINT8) { /* The stencil buffer is W tiled. However, we request from the kernel a @@ -625,15 +622,18 @@ brw_miptree_choose_tiling(struct brw_context *brw, return I915_TILING_NONE; } + /* Do not support changing the tiling for miptrees with pre-allocated BOs. */ + assert((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0); + /* Some usages may want only one type of tiling, like depth miptrees (Y * tiled), or temporary BOs for uploading data once (linear). */ - switch (requested) { - case INTEL_MIPTREE_TILING_ANY: + switch (layout_flags & MIPTREE_LAYOUT_TILING_ANY) { + case MIPTREE_LAYOUT_TILING_ANY: break; - case INTEL_MIPTREE_TILING_Y: + case MIPTREE_LAYOUT_TILING_Y: return I915_TILING_Y; - case INTEL_MIPTREE_TILING_NONE: + case MIPTREE_LAYOUT_TILING_NONE: return I915_TILING_NONE; } @@ -762,16 +762,13 @@ intel_miptree_set_total_width_height(struct brw_context *brw, mt->total_width, mt->total_height, mt->cpp); } -void -brw_miptree_layout(struct brw_context *brw, - struct intel_mipmap_tree *mt, - enum intel_miptree_tiling_mode requested, - uint32_t layout_flags) +static void +intel_miptree_set_alignment(struct brw_context *brw, + struct intel_mipmap_tree *mt, + uint32_t layout_flags) { bool gen6_hiz_or_stencil = false; - mt->tr_mode = INTEL_MIPTREE_TRMODE_NONE; - if (brw->gen == 6 && mt->array_layout == ALL_SLICES_AT_EACH_LOD) { const GLenum base_format = _mesa_get_format_base_format(mt->format); gen6_hiz_or_stencil = _mesa_is_depth_or_stencil_format(base_format); @@ -806,7 +803,16 @@ brw_miptree_layout(struct brw_context *brw, intel_horizontal_texture_alignment_unit(brw, mt, layout_flags); mt->align_h = intel_vertical_texture_alignment_unit(brw, mt); } +} + +void +brw_miptree_layout(struct brw_context *brw, + struct intel_mipmap_tree *mt, + uint32_t layout_flags) +{ + mt->tr_mode = INTEL_MIPTREE_TRMODE_NONE; + intel_miptree_set_alignment(brw, mt, layout_flags); intel_miptree_set_total_width_height(brw, mt); if (!mt->total_width || !mt->total_height) { @@ -825,6 +831,6 @@ brw_miptree_layout(struct brw_context *brw, } if ((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0) - mt->tiling = brw_miptree_choose_tiling(brw, requested, mt); + mt->tiling = brw_miptree_choose_tiling(brw, mt, layout_flags); } diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c index 6fcf1b0cb1d..6078c3810d4 100644 --- a/src/mesa/drivers/dri/i965/brw_urb.c +++ b/src/mesa/drivers/dri/i965/brw_urb.c @@ -249,10 +249,10 @@ void brw_upload_urb_fence(struct brw_context *brw) uf.bits1.cs_fence = brw->urb.size; /* erratum: URB_FENCE must not cross a 64byte cacheline */ - if ((brw->batch.used & 15) > 12) { - int pad = 16 - (brw->batch.used & 15); + if ((USED_BATCH(brw->batch) & 15) > 12) { + int pad = 16 - (USED_BATCH(brw->batch) & 15); do - brw->batch.map[brw->batch.used++] = MI_NOOP; + *brw->batch.map_next++ = MI_NOOP; while (--pad); } diff --git a/src/mesa/drivers/dri/i965/brw_util.h b/src/mesa/drivers/dri/i965/brw_util.h index 04e4e944118..68f4318d371 100644 --- a/src/mesa/drivers/dri/i965/brw_util.h +++ b/src/mesa/drivers/dri/i965/brw_util.h @@ -53,14 +53,14 @@ brw_get_line_width(struct brw_context *brw) float line_width = CLAMP(!brw->ctx.Multisample._Enabled && !brw->ctx.Line.SmoothFlag ? roundf(brw->ctx.Line.Width) : brw->ctx.Line.Width, - 0.0, brw->ctx.Const.MaxLineWidth); + 0.0f, brw->ctx.Const.MaxLineWidth); uint32_t line_width_u3_7 = U_FIXED(line_width, 7); /* Line width of 0 is not allowed when MSAA enabled */ if (brw->ctx.Multisample._Enabled) { if (line_width_u3_7 == 0) line_width_u3_7 = 1; - } else if (brw->ctx.Line.SmoothFlag && line_width < 1.5) { + } else if (brw->ctx.Line.SmoothFlag && line_width < 1.5f) { /* For 1 pixel line thickness or less, the general * anti-aliasing algorithm gives up, and a garbage line is * generated. Setting a Line Width of 0.0 specifies the diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 05f188fe116..63f75da7e99 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -171,6 +171,17 @@ dst_reg::dst_reg(register_file file, int reg, const glsl_type *type, this->writemask = writemask; } +dst_reg::dst_reg(register_file file, int reg, brw_reg_type type, + unsigned writemask) +{ + init(); + + this->file = file; + this->reg = reg; + this->type = type; + this->writemask = writemask; +} + dst_reg::dst_reg(struct brw_reg reg) { init(); @@ -1709,6 +1720,9 @@ vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value) bool vec4_visitor::run(gl_clip_plane *clip_planes) { + bool use_vec4_nir = + compiler->glsl_compiler_options[stage].NirOptions != NULL; + sanity_param_count = prog->Parameters->NumParameters; if (shader_time_index >= 0) @@ -1718,10 +1732,15 @@ vec4_visitor::run(gl_clip_plane *clip_planes) emit_prolog(); - /* Generate VS IR for main(). (the visitor only descends into - * functions called "main"). - */ - if (shader) { + if (use_vec4_nir) { + assert(prog->nir != NULL); + emit_nir_code(); + if (failed) + return false; + } else if (shader) { + /* Generate VS IR for main(). (the visitor only descends into + * functions called "main"). + */ visit_instructions(shader->base.ir); } else { emit_program_code(); @@ -1741,7 +1760,7 @@ vec4_visitor::run(gl_clip_plane *clip_planes) * that we have reladdr computations available for CSE, since we'll * often do repeated subexpressions for those. */ - if (shader) { + if (shader || use_vec4_nir) { move_grf_array_access_to_scratch(); move_uniform_array_access_to_pull_constants(); } else { @@ -1827,15 +1846,30 @@ vec4_visitor::run(gl_clip_plane *clip_planes) } } - while (!reg_allocate()) { - if (failed) - return false; + bool allocated_without_spills = reg_allocate(); + + if (!allocated_without_spills) { + compiler->shader_perf_log(log_data, + "%s shader triggered register spilling. " + "Try reducing the number of live vec4 values " + "to improve performance.\n", + stage_name); + + while (!reg_allocate()) { + if (failed) + return false; + } } opt_schedule_instructions(); opt_set_dependency_control(); + if (last_scratch > 0) { + prog_data->base.total_scratch = + brw_get_scratch_size(last_scratch * REG_SIZE); + } + /* If any state parameters were appended, then ParameterValues could have * been realloced, in which case the driver uniform storage set up by * _mesa_associate_uniform_storage() would point to freed memory. Make @@ -1857,10 +1891,11 @@ extern "C" { */ const unsigned * brw_vs_emit(struct brw_context *brw, - struct gl_shader_program *prog, - struct brw_vs_compile *c, - struct brw_vs_prog_data *prog_data, void *mem_ctx, + const struct brw_vs_prog_key *key, + struct brw_vs_prog_data *prog_data, + struct gl_vertex_program *vp, + struct gl_shader_program *prog, unsigned *final_assembly_size) { bool start_busy = false; @@ -1879,29 +1914,31 @@ brw_vs_emit(struct brw_context *brw, int st_index = -1; if (INTEL_DEBUG & DEBUG_SHADER_TIME) - st_index = brw_get_shader_time_index(brw, prog, &c->vp->program.Base, - ST_VS); + st_index = brw_get_shader_time_index(brw, prog, &vp->Base, ST_VS); if (unlikely(INTEL_DEBUG & DEBUG_VS) && shader->base.ir) - brw_dump_ir("vertex", prog, &shader->base, &c->vp->program.Base); + brw_dump_ir("vertex", prog, &shader->base, &vp->Base); + + if (!vp->Base.nir && + (brw->intelScreen->compiler->scalar_vs || + brw->intelScreen->compiler->glsl_compiler_options[MESA_SHADER_VERTEX].NirOptions != NULL)) { + /* Normally we generate NIR in LinkShader() or + * ProgramStringNotify(), but Mesa's fixed-function vertex program + * handling doesn't notify the driver at all. Just do it here, at + * the last minute, even though it's lame. + */ + assert(vp->Base.Id == 0 && prog == NULL); + vp->Base.nir = + brw_create_nir(brw, NULL, &vp->Base, MESA_SHADER_VERTEX, + brw->intelScreen->compiler->scalar_vs); + } if (brw->intelScreen->compiler->scalar_vs) { - if (!c->vp->program.Base.nir) { - /* Normally we generate NIR in LinkShader() or - * ProgramStringNotify(), but Mesa's fixed-function vertex program - * handling doesn't notify the driver at all. Just do it here, at - * the last minute, even though it's lame. - */ - assert(c->vp->program.Base.Id == 0 && prog == NULL); - c->vp->program.Base.nir = - brw_create_nir(brw, NULL, &c->vp->program.Base, MESA_SHADER_VERTEX); - } - prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; fs_visitor v(brw->intelScreen->compiler, brw, - mem_ctx, MESA_SHADER_VERTEX, &c->key, - &prog_data->base.base, prog, &c->vp->program.Base, + mem_ctx, MESA_SHADER_VERTEX, key, + &prog_data->base.base, prog, &vp->Base, 8, st_index); if (!v.run_vs(brw_select_clip_planes(&brw->ctx))) { if (prog) { @@ -1916,8 +1953,8 @@ brw_vs_emit(struct brw_context *brw, } fs_generator g(brw->intelScreen->compiler, brw, - mem_ctx, (void *) &c->key, &prog_data->base.base, - &c->vp->program.Base, v.promoted_constants, + mem_ctx, (void *) key, &prog_data->base.base, + &vp->Base, v.promoted_constants, v.runtime_check_aads_emit, "VS"); if (INTEL_DEBUG & DEBUG_VS) { char *name; @@ -1927,21 +1964,19 @@ brw_vs_emit(struct brw_context *brw, prog->Name); } else { name = ralloc_asprintf(mem_ctx, "vertex program %d", - c->vp->program.Base.Id); + vp->Base.Id); } g.enable_debug(name); } g.generate_code(v.cfg, 8); assembly = g.get_assembly(final_assembly_size); - - c->base.last_scratch = v.last_scratch; } if (!assembly) { prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; - vec4_vs_visitor v(brw->intelScreen->compiler, - c, prog_data, prog, mem_ctx, st_index, + vec4_vs_visitor v(brw->intelScreen->compiler, brw, key, prog_data, + vp, prog, mem_ctx, st_index, !_mesa_is_gles3(&brw->ctx)); if (!v.run(brw_select_clip_planes(&brw->ctx))) { if (prog) { @@ -1956,14 +1991,14 @@ brw_vs_emit(struct brw_context *brw, } vec4_generator g(brw->intelScreen->compiler, brw, - prog, &c->vp->program.Base, &prog_data->base, + prog, &vp->Base, &prog_data->base, mem_ctx, INTEL_DEBUG & DEBUG_VS, "vertex", "VS"); assembly = g.generate_assembly(v.cfg, final_assembly_size); } if (unlikely(brw->perf_debug) && shader) { if (shader->compiled_once) { - brw_vs_debug_recompile(brw, prog, &c->key); + brw_vs_debug_recompile(brw, prog, key); } if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) { perf_debug("VS compile took %.03f ms and stalled the GPU\n", diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 2ac16932189..341c516b39a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -45,12 +45,9 @@ extern "C" { #endif #include "glsl/ir.h" +#include "glsl/nir/nir.h" -struct brw_vec4_compile { - GLuint last_scratch; /**< measured in 32-byte (register size) units */ -}; - #ifdef __cplusplus extern "C" { #endif @@ -77,7 +74,7 @@ class vec4_visitor : public backend_shader, public ir_visitor { public: vec4_visitor(const struct brw_compiler *compiler, - struct brw_vec4_compile *c, + void *log_data, struct gl_program *prog, const struct brw_vue_prog_key *key, struct brw_vue_prog_data *prog_data, @@ -103,7 +100,6 @@ public: return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); } - struct brw_vec4_compile * const c; const struct brw_vue_prog_key * const key; struct brw_vue_prog_data * const prog_data; unsigned int sanity_param_count; @@ -181,9 +177,12 @@ public: void fail(const char *msg, ...); void setup_uniform_clipplane_values(gl_clip_plane *clip_planes); + virtual void setup_vector_uniform_values(const gl_constant_value *values, + unsigned n); void setup_uniform_values(ir_variable *ir); void setup_builtin_uniform_values(ir_variable *ir); int setup_uniforms(int payload_reg); + bool reg_allocate_trivial(); bool reg_allocate(); void evaluate_spill_costs(float *spill_costs, bool *no_spill); @@ -292,14 +291,17 @@ public: void emit_bool_to_cond_code(ir_rvalue *ir, enum brw_predicate *predicate); void emit_if_gen6(ir_if *ir); - void emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst, - src_reg src0, src_reg src1); + vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst, + src_reg src0, src_reg src1); - void emit_lrp(const dst_reg &dst, - const src_reg &x, const src_reg &y, const src_reg &a); + vec4_instruction *emit_lrp(const dst_reg &dst, const src_reg &x, + const src_reg &y, const src_reg &a); - /** Copy any live channel from \p src to the first channel of \p dst. */ - void emit_uniformize(const dst_reg &dst, const src_reg &src); + /** + * Copy any live channel from \p src to the first channel of the + * result. + */ + src_reg emit_uniformize(const src_reg &src); void emit_block_move(dst_reg *dst, src_reg *src, const struct glsl_type *type, brw_predicate predicate); @@ -317,11 +319,13 @@ public: void emit_scalar(ir_instruction *ir, enum prog_opcode op, dst_reg dst, src_reg src0, src_reg src1); - src_reg fix_3src_operand(src_reg src); + src_reg fix_3src_operand(const src_reg &src); + src_reg resolve_source_modifiers(const src_reg &src); + + vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0, + const src_reg &src1 = src_reg()); - void emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0, - const src_reg &src1 = src_reg()); - src_reg fix_math_operand(src_reg src); + src_reg fix_math_operand(const src_reg &src); void emit_pack_half_2x16(dst_reg dst, src_reg src0); void emit_unpack_half_2x16(dst_reg dst, src_reg src0); @@ -330,10 +334,27 @@ public: void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0); void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0); - uint32_t gather_channel(ir_texture *ir, uint32_t sampler); - src_reg emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler); + void emit_texture(ir_texture_opcode op, + dst_reg dest, + const glsl_type *dest_type, + src_reg coordinate, + int coord_components, + src_reg shadow_comparitor, + src_reg lod, src_reg lod2, + src_reg sample_index, + uint32_t constant_offset, + src_reg offset_value, + src_reg mcs, + bool is_cube_array, + uint32_t sampler, src_reg sampler_reg); + + uint32_t gather_channel(unsigned gather_component, uint32_t sampler); + src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate, + src_reg sampler); void emit_gen6_gather_wa(uint8_t wa, dst_reg dst); - void swizzle_result(ir_texture *ir, src_reg orig_val, uint32_t sampler); + void swizzle_result(ir_texture_opcode op, dst_reg dest, + src_reg orig_val, uint32_t sampler, + const glsl_type *dest_type); void emit_ndc_computation(); void emit_psiz_and_flags(dst_reg reg); @@ -388,13 +409,53 @@ public: void visit_atomic_counter_intrinsic(ir_call *ir); + int type_size(const struct glsl_type *type); + bool is_high_sampler(src_reg sampler); + + virtual void emit_nir_code(); + virtual void nir_setup_inputs(nir_shader *shader); + virtual void nir_setup_uniforms(nir_shader *shader); + virtual void nir_setup_uniform(nir_variable *var); + virtual void nir_setup_builtin_uniform(nir_variable *var); + virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr); + virtual void nir_setup_system_values(nir_shader *shader); + virtual void nir_emit_impl(nir_function_impl *impl); + virtual void nir_emit_cf_list(exec_list *list); + virtual void nir_emit_if(nir_if *if_stmt); + virtual void nir_emit_loop(nir_loop *loop); + virtual void nir_emit_block(nir_block *block); + virtual void nir_emit_instr(nir_instr *instr); + virtual void nir_emit_load_const(nir_load_const_instr *instr); + virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr); + virtual void nir_emit_alu(nir_alu_instr *instr); + virtual void nir_emit_jump(nir_jump_instr *instr); + virtual void nir_emit_texture(nir_tex_instr *instr); + + dst_reg get_nir_dest(nir_dest dest, enum brw_reg_type type); + dst_reg get_nir_dest(nir_dest dest, nir_alu_type type); + dst_reg get_nir_dest(nir_dest dest); + src_reg get_nir_src(nir_src src, enum brw_reg_type type, + unsigned num_components = 4); + src_reg get_nir_src(nir_src src, nir_alu_type type, + unsigned num_components = 4); + src_reg get_nir_src(nir_src src, + unsigned num_components = 4); + + virtual dst_reg *make_reg_for_system_value(int location, + const glsl_type *type) = 0; + + dst_reg *nir_locals; + dst_reg *nir_ssa_values; + src_reg *nir_inputs; + unsigned *nir_uniform_driver_location; + dst_reg *nir_system_values; + protected: void emit_vertex(); void lower_attributes_to_hw_regs(const int *attribute_map, bool interleaved); void setup_payload_interference(struct ra_graph *g, int first_payload_node, int reg_node_count); - virtual dst_reg *make_reg_for_system_value(ir_variable *ir) = 0; virtual void assign_binding_table_offsets(); virtual void setup_payload() = 0; virtual void emit_prolog() = 0; @@ -403,6 +464,8 @@ protected: virtual void emit_urb_write_header(int mrf) = 0; virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0; virtual int compute_array_stride(ir_dereference_array *ir); + virtual void gs_emit_vertex(int stream_id); + virtual void gs_end_primitive(); private: /** @@ -411,6 +474,8 @@ private: const bool no_spills; int shader_time_index; + + unsigned last_scratch; /**< measured in 32-byte (register size) units */ }; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp index 2d9afa8145f..5a15eb89766 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp @@ -179,6 +179,7 @@ try_constant_propagate(const struct brw_device_info *devinfo, case BRW_OPCODE_MACH: case BRW_OPCODE_MUL: + case SHADER_OPCODE_MULH: case BRW_OPCODE_ADD: case BRW_OPCODE_OR: case BRW_OPCODE_AND: diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp index c9fe0cebf27..5a277f74c44 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp @@ -62,6 +62,7 @@ is_expression(const vec4_instruction *const inst) case BRW_OPCODE_CMPN: case BRW_OPCODE_ADD: case BRW_OPCODE_MUL: + case SHADER_OPCODE_MULH: case BRW_OPCODE_FRC: case BRW_OPCODE_RNDU: case BRW_OPCODE_RNDD: diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index d2de2f0be25..92050b94d33 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1465,19 +1465,15 @@ vec4_generator::generate_code(const cfg_t *cfg) break; case SHADER_OPCODE_UNTYPED_ATOMIC: - assert(src[1].file == BRW_IMMEDIATE_VALUE && - src[2].file == BRW_IMMEDIATE_VALUE); + assert(src[2].file == BRW_IMMEDIATE_VALUE); brw_untyped_atomic(p, dst, src[0], src[1], src[2].dw1.ud, inst->mlen, !inst->dst.is_null()); - brw_mark_surface_used(&prog_data->base, src[1].dw1.ud); break; case SHADER_OPCODE_UNTYPED_SURFACE_READ: - assert(src[1].file == BRW_IMMEDIATE_VALUE && - src[2].file == BRW_IMMEDIATE_VALUE); + assert(src[2].file == BRW_IMMEDIATE_VALUE); brw_untyped_surface_read(p, dst, src[0], src[1], inst->mlen, src[2].dw1.ud); - brw_mark_surface_used(&prog_data->base, src[1].dw1.ud); break; case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: @@ -1549,7 +1545,7 @@ vec4_generator::generate_code(const cfg_t *cfg) * * where they pack the four bytes from the low and high four DW. */ - assert(is_power_of_two(dst.dw1.bits.writemask) && + assert(_mesa_is_pow_two(dst.dw1.bits.writemask) && dst.dw1.bits.writemask != 0); unsigned offset = __builtin_ctz(dst.dw1.bits.writemask); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp new file mode 100644 index 00000000000..d85fb6f31ec --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp @@ -0,0 +1,118 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_nir.h" +#include "brw_vec4_gs_visitor.h" + +namespace brw { + +void +vec4_gs_visitor::nir_setup_inputs(nir_shader *shader) +{ + nir_inputs = ralloc_array(mem_ctx, src_reg, shader->num_inputs); + + foreach_list_typed(nir_variable, var, node, &shader->inputs) { + int offset = var->data.driver_location; + if (var->type->base_type == GLSL_TYPE_ARRAY) { + /* Geometry shader inputs are arrays, but they use an unusual array + * layout: instead of all array elements for a given geometry shader + * input being stored consecutively, all geometry shader inputs are + * interleaved into one giant array. At this stage of compilation, we + * assume that the stride of the array is BRW_VARYING_SLOT_COUNT. + * Later, setup_attributes() will remap our accesses to the actual + * input array. + */ + assert(var->type->length > 0); + int length = var->type->length; + int size = type_size(var->type) / length; + for (int i = 0; i < length; i++) { + int location = var->data.location + i * BRW_VARYING_SLOT_COUNT; + for (int j = 0; j < size; j++) { + src_reg src = src_reg(ATTR, location + j, var->type); + src = retype(src, brw_type_for_base_type(var->type)); + nir_inputs[offset] = src; + offset++; + } + } + } else { + int size = type_size(var->type); + for (int i = 0; i < size; i++) { + src_reg src = src_reg(ATTR, var->data.location + i, var->type); + src = retype(src, brw_type_for_base_type(var->type)); + nir_inputs[offset] = src; + offset++; + } + } + } +} + +void +vec4_gs_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr) +{ + dst_reg *reg; + + switch (instr->intrinsic) { + case nir_intrinsic_load_invocation_id: + reg = &this->nir_system_values[SYSTEM_VALUE_INVOCATION_ID]; + if (reg->file == BAD_FILE) + *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_INVOCATION_ID, + glsl_type::int_type); + break; + + default: + vec4_visitor::nir_setup_system_value_intrinsic(instr); + } + +} + +void +vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) +{ + dst_reg dest; + src_reg src; + + switch (instr->intrinsic) { + case nir_intrinsic_emit_vertex: { + int stream_id = instr->const_index[0]; + gs_emit_vertex(stream_id); + break; + } + + case nir_intrinsic_end_primitive: + gs_end_primitive(); + break; + + case nir_intrinsic_load_invocation_id: { + src_reg invocation_id = + src_reg(nir_system_values[SYSTEM_VALUE_INVOCATION_ID]); + assert(invocation_id.file != BAD_FILE); + dest = get_nir_dest(instr->dest, invocation_id.type); + emit(MOV(dest, invocation_id)); + break; + } + + default: + vec4_visitor::nir_emit_intrinsic(instr); + } +} +} diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 69bcf5afc51..019efecac66 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -35,12 +35,14 @@ const unsigned MAX_GS_INPUT_VERTICES = 6; namespace brw { vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler, + void *log_data, struct brw_gs_compile *c, struct gl_shader_program *prog, void *mem_ctx, bool no_spills, int shader_time_index) - : vec4_visitor(compiler, &c->base, &c->gp->program.Base, &c->key.base, + : vec4_visitor(compiler, log_data, + &c->gp->program.Base, &c->key.base, &c->prog_data.base, prog, MESA_SHADER_GEOMETRY, mem_ctx, no_spills, shader_time_index), c(c) @@ -49,11 +51,12 @@ vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler, dst_reg * -vec4_gs_visitor::make_reg_for_system_value(ir_variable *ir) +vec4_gs_visitor::make_reg_for_system_value(int location, + const glsl_type *type) { - dst_reg *reg = new(mem_ctx) dst_reg(this, ir->type); + dst_reg *reg = new(mem_ctx) dst_reg(this, type); - switch (ir->data.location) { + switch (location) { case SYSTEM_VALUE_INVOCATION_ID: this->current_annotation = "initialize gl_InvocationID"; emit(GS_OPCODE_GET_INSTANCE_ID, *reg); @@ -346,90 +349,82 @@ vec4_gs_visitor::emit_control_data_bits() if (c->control_data_header_size_bits > 128) urb_write_flags = urb_write_flags | BRW_URB_WRITE_PER_SLOT_OFFSET; - /* If vertex_count is 0, then no control data bits have been accumulated - * yet, so we should do nothing. + /* If we are using either channel masks or a per-slot offset, then we + * need to figure out which DWORD we are trying to write to, using the + * formula: + * + * dword_index = (vertex_count - 1) * bits_per_vertex / 32 + * + * Since bits_per_vertex is a power of two, and is known at compile + * time, this can be optimized to: + * + * dword_index = (vertex_count - 1) >> (6 - log2(bits_per_vertex)) */ - emit(CMP(dst_null_d(), this->vertex_count, 0u, BRW_CONDITIONAL_NEQ)); - emit(IF(BRW_PREDICATE_NORMAL)); - { - /* If we are using either channel masks or a per-slot offset, then we - * need to figure out which DWORD we are trying to write to, using the - * formula: - * - * dword_index = (vertex_count - 1) * bits_per_vertex / 32 - * - * Since bits_per_vertex is a power of two, and is known at compile - * time, this can be optimized to: - * - * dword_index = (vertex_count - 1) >> (6 - log2(bits_per_vertex)) + src_reg dword_index(this, glsl_type::uint_type); + if (urb_write_flags) { + src_reg prev_count(this, glsl_type::uint_type); + emit(ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu)); + unsigned log2_bits_per_vertex = + _mesa_fls(c->control_data_bits_per_vertex); + emit(SHR(dst_reg(dword_index), prev_count, + (uint32_t) (6 - log2_bits_per_vertex))); + } + + /* Start building the URB write message. The first MRF gets a copy of + * R0. + */ + int base_mrf = 1; + dst_reg mrf_reg(MRF, base_mrf); + src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); + vec4_instruction *inst = emit(MOV(mrf_reg, r0)); + inst->force_writemask_all = true; + + if (urb_write_flags & BRW_URB_WRITE_PER_SLOT_OFFSET) { + /* Set the per-slot offset to dword_index / 4, to that we'll write to + * the appropriate OWORD within the control data header. */ - src_reg dword_index(this, glsl_type::uint_type); - if (urb_write_flags) { - src_reg prev_count(this, glsl_type::uint_type); - emit(ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu)); - unsigned log2_bits_per_vertex = - _mesa_fls(c->control_data_bits_per_vertex); - emit(SHR(dst_reg(dword_index), prev_count, - (uint32_t) (6 - log2_bits_per_vertex))); - } + src_reg per_slot_offset(this, glsl_type::uint_type); + emit(SHR(dst_reg(per_slot_offset), dword_index, 2u)); + emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset, 1u); + } - /* Start building the URB write message. The first MRF gets a copy of - * R0. + if (urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS) { + /* Set the channel masks to 1 << (dword_index % 4), so that we'll + * write to the appropriate DWORD within the OWORD. We need to do + * this computation with force_writemask_all, otherwise garbage data + * from invocation 0 might clobber the mask for invocation 1 when + * GS_OPCODE_PREPARE_CHANNEL_MASKS tries to OR the two masks + * together. */ - int base_mrf = 1; - dst_reg mrf_reg(MRF, base_mrf); - src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); - vec4_instruction *inst = emit(MOV(mrf_reg, r0)); + src_reg channel(this, glsl_type::uint_type); + inst = emit(AND(dst_reg(channel), dword_index, 3u)); inst->force_writemask_all = true; - - if (urb_write_flags & BRW_URB_WRITE_PER_SLOT_OFFSET) { - /* Set the per-slot offset to dword_index / 4, to that we'll write to - * the appropriate OWORD within the control data header. - */ - src_reg per_slot_offset(this, glsl_type::uint_type); - emit(SHR(dst_reg(per_slot_offset), dword_index, 2u)); - emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset, 1u); - } - - if (urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS) { - /* Set the channel masks to 1 << (dword_index % 4), so that we'll - * write to the appropriate DWORD within the OWORD. We need to do - * this computation with force_writemask_all, otherwise garbage data - * from invocation 0 might clobber the mask for invocation 1 when - * GS_OPCODE_PREPARE_CHANNEL_MASKS tries to OR the two masks - * together. - */ - src_reg channel(this, glsl_type::uint_type); - inst = emit(AND(dst_reg(channel), dword_index, 3u)); - inst->force_writemask_all = true; - src_reg one(this, glsl_type::uint_type); - inst = emit(MOV(dst_reg(one), 1u)); - inst->force_writemask_all = true; - src_reg channel_mask(this, glsl_type::uint_type); - inst = emit(SHL(dst_reg(channel_mask), one, channel)); - inst->force_writemask_all = true; - emit(GS_OPCODE_PREPARE_CHANNEL_MASKS, dst_reg(channel_mask), - channel_mask); - emit(GS_OPCODE_SET_CHANNEL_MASKS, mrf_reg, channel_mask); - } - - /* Store the control data bits in the message payload and send it. */ - dst_reg mrf_reg2(MRF, base_mrf + 1); - inst = emit(MOV(mrf_reg2, this->control_data_bits)); + src_reg one(this, glsl_type::uint_type); + inst = emit(MOV(dst_reg(one), 1u)); inst->force_writemask_all = true; - inst = emit(GS_OPCODE_URB_WRITE); - inst->urb_write_flags = urb_write_flags; - /* We need to increment Global Offset by 256-bits to make room for - * Broadwell's extra "Vertex Count" payload at the beginning of the - * URB entry. Since this is an OWord message, Global Offset is counted - * in 128-bit units, so we must set it to 2. - */ - if (devinfo->gen >= 8) - inst->offset = 2; - inst->base_mrf = base_mrf; - inst->mlen = 2; + src_reg channel_mask(this, glsl_type::uint_type); + inst = emit(SHL(dst_reg(channel_mask), one, channel)); + inst->force_writemask_all = true; + emit(GS_OPCODE_PREPARE_CHANNEL_MASKS, dst_reg(channel_mask), + channel_mask); + emit(GS_OPCODE_SET_CHANNEL_MASKS, mrf_reg, channel_mask); } - emit(BRW_OPCODE_ENDIF); + + /* Store the control data bits in the message payload and send it. */ + dst_reg mrf_reg2(MRF, base_mrf + 1); + inst = emit(MOV(mrf_reg2, this->control_data_bits)); + inst->force_writemask_all = true; + inst = emit(GS_OPCODE_URB_WRITE); + inst->urb_write_flags = urb_write_flags; + /* We need to increment Global Offset by 256-bits to make room for + * Broadwell's extra "Vertex Count" payload at the beginning of the + * URB entry. Since this is an OWord message, Global Offset is counted + * in 128-bit units, so we must set it to 2. + */ + if (devinfo->gen >= 8) + inst->offset = 2; + inst->base_mrf = base_mrf; + inst->mlen = 2; } void @@ -472,7 +467,7 @@ vec4_gs_visitor::set_stream_control_data_bits(unsigned stream_id) } void -vec4_gs_visitor::visit(ir_emit_vertex *ir) +vec4_gs_visitor::gs_emit_vertex(int stream_id) { this->current_annotation = "emit vertex: safety check"; @@ -486,7 +481,7 @@ vec4_gs_visitor::visit(ir_emit_vertex *ir) * be recorded by transform feedback, we can simply discard all geometry * bound to these streams when transform feedback is disabled. */ - if (ir->stream_id() > 0 && shader_prog->TransformFeedback.NumVarying == 0) + if (stream_id > 0 && shader_prog->TransformFeedback.NumVarying == 0) return; /* To ensure that we don't output more vertices than the shader specified @@ -529,9 +524,17 @@ vec4_gs_visitor::visit(ir_emit_vertex *ir) emit(AND(dst_null_d(), this->vertex_count, (uint32_t) (32 / c->control_data_bits_per_vertex - 1))); inst->conditional_mod = BRW_CONDITIONAL_Z; + emit(IF(BRW_PREDICATE_NORMAL)); { + /* If vertex_count is 0, then no control data bits have been + * accumulated yet, so we skip emitting them. + */ + emit(CMP(dst_null_d(), this->vertex_count, 0u, + BRW_CONDITIONAL_NEQ)); + emit(IF(BRW_PREDICATE_NORMAL)); emit_control_data_bits(); + emit(BRW_OPCODE_ENDIF); /* Reset control_data_bits to 0 so we can start accumulating a new * batch. @@ -557,7 +560,7 @@ vec4_gs_visitor::visit(ir_emit_vertex *ir) c->prog_data.control_data_format == GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) { this->current_annotation = "emit vertex: Stream control data bits"; - set_stream_control_data_bits(ir->stream_id()); + set_stream_control_data_bits(stream_id); } this->current_annotation = "emit vertex: increment vertex count"; @@ -570,7 +573,13 @@ vec4_gs_visitor::visit(ir_emit_vertex *ir) } void -vec4_gs_visitor::visit(ir_end_primitive *) +vec4_gs_visitor::visit(ir_emit_vertex *ir) +{ + gs_emit_vertex(ir->stream_id()); +} + +void +vec4_gs_visitor::gs_end_primitive() { /* We can only do EndPrimitive() functionality when the control data * consists of cut bits. Fortunately, the only time it isn't is when the @@ -620,6 +629,12 @@ vec4_gs_visitor::visit(ir_end_primitive *) emit(OR(dst_reg(this->control_data_bits), this->control_data_bits, mask)); } +void +vec4_gs_visitor::visit(ir_end_primitive *) +{ + gs_end_primitive(); +} + static const unsigned * generate_assembly(struct brw_context *brw, struct gl_shader_program *shader_prog, @@ -662,7 +677,7 @@ brw_gs_emit(struct brw_context *brw, likely(!(INTEL_DEBUG & DEBUG_NO_DUAL_OBJECT_GS))) { c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; - vec4_gs_visitor v(brw->intelScreen->compiler, + vec4_gs_visitor v(brw->intelScreen->compiler, brw, c, prog, mem_ctx, true /* no_spills */, st_index); if (v.run(NULL /* clip planes */)) { return generate_assembly(brw, prog, &c->gp->program.Base, @@ -704,11 +719,11 @@ brw_gs_emit(struct brw_context *brw, const unsigned *ret = NULL; if (brw->gen >= 7) - gs = new vec4_gs_visitor(brw->intelScreen->compiler, + gs = new vec4_gs_visitor(brw->intelScreen->compiler, brw, c, prog, mem_ctx, false /* no_spills */, st_index); else - gs = new gen6_gs_visitor(brw->intelScreen->compiler, + gs = new gen6_gs_visitor(brw->intelScreen->compiler, brw, c, prog, mem_ctx, false /* no_spills */, st_index); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h index e693c56b58f..0e8fefabecc 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h @@ -37,7 +37,6 @@ */ struct brw_gs_compile { - struct brw_vec4_compile base; struct brw_gs_prog_key key; struct brw_gs_prog_data prog_data; struct brw_vue_map input_vue_map; @@ -69,14 +68,19 @@ class vec4_gs_visitor : public vec4_visitor { public: vec4_gs_visitor(const struct brw_compiler *compiler, + void *log_data, struct brw_gs_compile *c, struct gl_shader_program *prog, void *mem_ctx, bool no_spills, int shader_time_index); + virtual void nir_setup_inputs(nir_shader *shader); + virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr); + protected: - virtual dst_reg *make_reg_for_system_value(ir_variable *ir); + virtual dst_reg *make_reg_for_system_value(int location, + const glsl_type *type); virtual void setup_payload(); virtual void emit_prolog(); virtual void emit_program_code(); @@ -86,6 +90,9 @@ protected: virtual int compute_array_stride(ir_dereference_array *ir); virtual void visit(ir_emit_vertex *); virtual void visit(ir_end_primitive *); + virtual void gs_emit_vertex(int stream_id); + virtual void gs_end_primitive(); + virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr); protected: int setup_varying_inputs(int payload_reg, int *attribute_map, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp index 95b9d9017e2..cc688ef8083 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp @@ -96,7 +96,8 @@ vec4_live_variables::setup_def_use() * are the things that screen off preceding definitions of a * variable, and thus qualify for being in def[]. */ - if (inst->dst.file == GRF && !inst->predicate) { + if (inst->dst.file == GRF && + (!inst->predicate || inst->opcode == BRW_OPCODE_SEL)) { for (unsigned i = 0; i < inst->regs_written; i++) { for (int c = 0; c < 4; c++) { if (inst->dst.writemask & (1 << c)) { @@ -133,27 +134,9 @@ vec4_live_variables::compute_live_variables() while (cont) { cont = false; - foreach_block (block, cfg) { + foreach_block_reverse (block, cfg) { struct block_data *bd = &block_data[block->num]; - /* Update livein */ - for (int i = 0; i < bitset_words; i++) { - BITSET_WORD new_livein = (bd->use[i] | - (bd->liveout[i] & - ~bd->def[i])); - if (new_livein & ~bd->livein[i]) { - bd->livein[i] |= new_livein; - cont = true; - } - } - BITSET_WORD new_livein = (bd->flag_use[0] | - (bd->flag_liveout[0] & - ~bd->flag_def[0])); - if (new_livein & ~bd->flag_livein[0]) { - bd->flag_livein[0] |= new_livein; - cont = true; - } - /* Update liveout */ foreach_list_typed(bblock_link, child_link, link, &block->children) { struct block_data *child_bd = &block_data[child_link->block->num]; @@ -173,6 +156,24 @@ vec4_live_variables::compute_live_variables() cont = true; } } + + /* Update livein */ + for (int i = 0; i < bitset_words; i++) { + BITSET_WORD new_livein = (bd->use[i] | + (bd->liveout[i] & + ~bd->def[i])); + if (new_livein & ~bd->livein[i]) { + bd->livein[i] |= new_livein; + cont = true; + } + } + BITSET_WORD new_livein = (bd->flag_use[0] | + (bd->flag_liveout[0] & + ~bd->flag_def[0])); + if (new_livein & ~bd->flag_livein[0]) { + bd->flag_livein[0] |= new_livein; + cont = true; + } } } } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp new file mode 100644 index 00000000000..923e2d30a4c --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -0,0 +1,1548 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_nir.h" +#include "brw_vec4.h" +#include "glsl/ir_uniform.h" + +namespace brw { + +void +vec4_visitor::emit_nir_code() +{ + nir_shader *nir = prog->nir; + + if (nir->num_inputs > 0) + nir_setup_inputs(nir); + + if (nir->num_uniforms > 0) + nir_setup_uniforms(nir); + + nir_setup_system_values(nir); + + /* get the main function and emit it */ + nir_foreach_overload(nir, overload) { + assert(strcmp(overload->function->name, "main") == 0); + assert(overload->impl); + nir_emit_impl(overload->impl); + } +} + +void +vec4_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr) +{ + dst_reg *reg; + + switch (instr->intrinsic) { + case nir_intrinsic_load_vertex_id: + unreachable("should be lowered by lower_vertex_id()."); + + case nir_intrinsic_load_vertex_id_zero_base: + reg = &this->nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE]; + if (reg->file == BAD_FILE) + *reg = + *this->make_reg_for_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, + glsl_type::int_type); + break; + + case nir_intrinsic_load_base_vertex: + reg = &this->nir_system_values[SYSTEM_VALUE_BASE_VERTEX]; + if (reg->file == BAD_FILE) + *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_BASE_VERTEX, + glsl_type::int_type); + break; + + case nir_intrinsic_load_instance_id: + reg = &this->nir_system_values[SYSTEM_VALUE_INSTANCE_ID]; + if (reg->file == BAD_FILE) + *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_INSTANCE_ID, + glsl_type::int_type); + break; + + default: + break; + } +} + +static bool +setup_system_values_block(nir_block *block, void *void_visitor) +{ + vec4_visitor *v = (vec4_visitor *)void_visitor; + + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + v->nir_setup_system_value_intrinsic(intrin); + } + + return true; +} + +void +vec4_visitor::nir_setup_system_values(nir_shader *shader) +{ + nir_system_values = ralloc_array(mem_ctx, dst_reg, SYSTEM_VALUE_MAX); + + nir_foreach_overload(shader, overload) { + assert(strcmp(overload->function->name, "main") == 0); + assert(overload->impl); + nir_foreach_block(overload->impl, setup_system_values_block, this); + } +} + +void +vec4_visitor::nir_setup_inputs(nir_shader *shader) +{ + nir_inputs = ralloc_array(mem_ctx, src_reg, shader->num_inputs); + + foreach_list_typed(nir_variable, var, node, &shader->inputs) { + int offset = var->data.driver_location; + unsigned size = type_size(var->type); + for (unsigned i = 0; i < size; i++) { + src_reg src = src_reg(ATTR, var->data.location + i, var->type); + nir_inputs[offset + i] = src; + } + } +} + +void +vec4_visitor::nir_setup_uniforms(nir_shader *shader) +{ + uniforms = 0; + + nir_uniform_driver_location = + rzalloc_array(mem_ctx, unsigned, this->uniform_array_size); + + if (shader_prog) { + foreach_list_typed(nir_variable, var, node, &shader->uniforms) { + /* UBO's, atomics and samplers don't take up space in the + uniform file */ + if (var->interface_type != NULL || var->type->contains_atomic() || + type_size(var->type) == 0) { + continue; + } + + assert(uniforms < uniform_array_size); + this->uniform_size[uniforms] = type_size(var->type); + + if (strncmp(var->name, "gl_", 3) == 0) + nir_setup_builtin_uniform(var); + else + nir_setup_uniform(var); + } + } else { + /* For ARB_vertex_program, only a single "parameters" variable is + * generated to support uniform data. + */ + nir_variable *var = (nir_variable *) shader->uniforms.get_head(); + assert(shader->uniforms.length() == 1 && + strcmp(var->name, "parameters") == 0); + + assert(uniforms < uniform_array_size); + this->uniform_size[uniforms] = type_size(var->type); + + struct gl_program_parameter_list *plist = prog->Parameters; + for (unsigned p = 0; p < plist->NumParameters; p++) { + uniform_vector_size[uniforms] = plist->Parameters[p].Size; + + /* Parameters should be either vec4 uniforms or single component + * constants; matrices and other larger types should have been broken + * down earlier. + */ + assert(uniform_vector_size[uniforms] <= 4); + + int i; + for (i = 0; i < uniform_vector_size[uniforms]; i++) { + stage_prog_data->param[uniforms * 4 + i] = &plist->ParameterValues[p][i]; + } + for (; i < 4; i++) { + static const gl_constant_value zero = { 0.0 }; + stage_prog_data->param[uniforms * 4 + i] = &zero; + } + + nir_uniform_driver_location[uniforms] = var->data.driver_location; + uniforms++; + } + } +} + +void +vec4_visitor::nir_setup_uniform(nir_variable *var) +{ + int namelen = strlen(var->name); + + /* The data for our (non-builtin) uniforms is stored in a series of + * gl_uniform_driver_storage structs for each subcomponent that + * glGetUniformLocation() could name. We know it's been set up in the same + * order we'd walk the type, so walk the list of storage and find anything + * with our name, or the prefix of a component that starts with our name. + */ + for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) { + struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u]; + + if (storage->builtin) + continue; + + if (strncmp(var->name, storage->name, namelen) != 0 || + (storage->name[namelen] != 0 && + storage->name[namelen] != '.' && + storage->name[namelen] != '[')) { + continue; + } + + gl_constant_value *components = storage->storage; + unsigned vector_count = (MAX2(storage->array_elements, 1) * + storage->type->matrix_columns); + + for (unsigned s = 0; s < vector_count; s++) { + assert(uniforms < uniform_array_size); + uniform_vector_size[uniforms] = storage->type->vector_elements; + + int i; + for (i = 0; i < uniform_vector_size[uniforms]; i++) { + stage_prog_data->param[uniforms * 4 + i] = components; + components++; + } + for (; i < 4; i++) { + static const gl_constant_value zero = { 0.0 }; + stage_prog_data->param[uniforms * 4 + i] = &zero; + } + + nir_uniform_driver_location[uniforms] = var->data.driver_location; + uniforms++; + } + } +} + +void +vec4_visitor::nir_setup_builtin_uniform(nir_variable *var) +{ + const nir_state_slot *const slots = var->state_slots; + assert(var->state_slots != NULL); + + for (unsigned int i = 0; i < var->num_state_slots; i++) { + /* This state reference has already been setup by ir_to_mesa, + * but we'll get the same index back here. We can reference + * ParameterValues directly, since unlike brw_fs.cpp, we never + * add new state references during compile. + */ + int index = _mesa_add_state_reference(this->prog->Parameters, + (gl_state_index *)slots[i].tokens); + gl_constant_value *values = + &this->prog->Parameters->ParameterValues[index][0]; + + assert(uniforms < uniform_array_size); + + for (unsigned j = 0; j < 4; j++) + stage_prog_data->param[uniforms * 4 + j] = + &values[GET_SWZ(slots[i].swizzle, j)]; + + this->uniform_vector_size[uniforms] = + (var->type->is_scalar() || var->type->is_vector() || + var->type->is_matrix() ? var->type->vector_elements : 4); + + nir_uniform_driver_location[uniforms] = var->data.driver_location; + uniforms++; + } +} + +void +vec4_visitor::nir_emit_impl(nir_function_impl *impl) +{ + nir_locals = ralloc_array(mem_ctx, dst_reg, impl->reg_alloc); + + foreach_list_typed(nir_register, reg, node, &impl->registers) { + unsigned array_elems = + reg->num_array_elems == 0 ? 1 : reg->num_array_elems; + + nir_locals[reg->index] = dst_reg(GRF, alloc.allocate(array_elems)); + } + + nir_ssa_values = ralloc_array(mem_ctx, dst_reg, impl->ssa_alloc); + + nir_emit_cf_list(&impl->body); +} + +void +vec4_visitor::nir_emit_cf_list(exec_list *list) +{ + exec_list_validate(list); + foreach_list_typed(nir_cf_node, node, node, list) { + switch (node->type) { + case nir_cf_node_if: + nir_emit_if(nir_cf_node_as_if(node)); + break; + + case nir_cf_node_loop: + nir_emit_loop(nir_cf_node_as_loop(node)); + break; + + case nir_cf_node_block: + nir_emit_block(nir_cf_node_as_block(node)); + break; + + default: + unreachable("Invalid CFG node block"); + } + } +} + +void +vec4_visitor::nir_emit_if(nir_if *if_stmt) +{ + /* First, put the condition in f0 */ + src_reg condition = get_nir_src(if_stmt->condition, BRW_REGISTER_TYPE_D, 1); + vec4_instruction *inst = emit(MOV(dst_null_d(), condition)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + emit(IF(BRW_PREDICATE_NORMAL)); + + nir_emit_cf_list(&if_stmt->then_list); + + /* note: if the else is empty, dead CF elimination will remove it */ + emit(BRW_OPCODE_ELSE); + + nir_emit_cf_list(&if_stmt->else_list); + + emit(BRW_OPCODE_ENDIF); +} + +void +vec4_visitor::nir_emit_loop(nir_loop *loop) +{ + emit(BRW_OPCODE_DO); + + nir_emit_cf_list(&loop->body); + + emit(BRW_OPCODE_WHILE); +} + +void +vec4_visitor::nir_emit_block(nir_block *block) +{ + nir_foreach_instr(block, instr) { + nir_emit_instr(instr); + } +} + +void +vec4_visitor::nir_emit_instr(nir_instr *instr) +{ + this->base_ir = instr; + + switch (instr->type) { + case nir_instr_type_load_const: + nir_emit_load_const(nir_instr_as_load_const(instr)); + break; + + case nir_instr_type_intrinsic: + nir_emit_intrinsic(nir_instr_as_intrinsic(instr)); + break; + + case nir_instr_type_alu: + nir_emit_alu(nir_instr_as_alu(instr)); + break; + + case nir_instr_type_jump: + nir_emit_jump(nir_instr_as_jump(instr)); + break; + + case nir_instr_type_tex: + nir_emit_texture(nir_instr_as_tex(instr)); + break; + + default: + fprintf(stderr, "VS instruction not yet implemented by NIR->vec4\n"); + break; + } +} + +static dst_reg +dst_reg_for_nir_reg(vec4_visitor *v, nir_register *nir_reg, + unsigned base_offset, nir_src *indirect) +{ + dst_reg reg; + + reg = v->nir_locals[nir_reg->index]; + reg = offset(reg, base_offset); + if (indirect) { + reg.reladdr = + new(v->mem_ctx) src_reg(v->get_nir_src(*indirect, + BRW_REGISTER_TYPE_D, + 1)); + } + return reg; +} + +dst_reg +vec4_visitor::get_nir_dest(nir_dest dest) +{ + assert(!dest.is_ssa); + return dst_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset, + dest.reg.indirect); +} + +dst_reg +vec4_visitor::get_nir_dest(nir_dest dest, enum brw_reg_type type) +{ + return retype(get_nir_dest(dest), type); +} + +dst_reg +vec4_visitor::get_nir_dest(nir_dest dest, nir_alu_type type) +{ + return get_nir_dest(dest, brw_type_for_nir_type(type)); +} + +src_reg +vec4_visitor::get_nir_src(nir_src src, enum brw_reg_type type, + unsigned num_components) +{ + dst_reg reg; + + if (src.is_ssa) { + assert(src.ssa != NULL); + reg = nir_ssa_values[src.ssa->index]; + } + else { + reg = dst_reg_for_nir_reg(this, src.reg.reg, src.reg.base_offset, + src.reg.indirect); + } + + reg = retype(reg, type); + + src_reg reg_as_src = src_reg(reg); + reg_as_src.swizzle = brw_swizzle_for_size(num_components); + return reg_as_src; +} + +src_reg +vec4_visitor::get_nir_src(nir_src src, nir_alu_type type, + unsigned num_components) +{ + return get_nir_src(src, brw_type_for_nir_type(type), num_components); +} + +src_reg +vec4_visitor::get_nir_src(nir_src src, unsigned num_components) +{ + /* if type is not specified, default to signed int */ + return get_nir_src(src, nir_type_int, num_components); +} + +void +vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr) +{ + dst_reg reg = dst_reg(GRF, alloc.allocate(1)); + reg.type = BRW_REGISTER_TYPE_F; + + /* @FIXME: consider emitting vector operations to save some MOVs in + * cases where the components are representable in 8 bits. + * By now, we emit a MOV for each component. + */ + for (unsigned i = 0; i < instr->def.num_components; ++i) { + reg.writemask = 1 << i; + emit(MOV(reg, src_reg(instr->value.f[i]))); + } + + /* Set final writemask */ + reg.writemask = brw_writemask_for_size(instr->def.num_components); + + nir_ssa_values[instr->def.index] = reg; +} + +void +vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) +{ + dst_reg dest; + src_reg src; + + bool has_indirect = false; + + switch (instr->intrinsic) { + + case nir_intrinsic_load_input_indirect: + has_indirect = true; + /* fallthrough */ + case nir_intrinsic_load_input: { + int offset = instr->const_index[0]; + src = nir_inputs[offset]; + + if (has_indirect) { + dest.reladdr = new(mem_ctx) src_reg(get_nir_src(instr->src[0], + BRW_REGISTER_TYPE_D, + 1)); + } + dest = get_nir_dest(instr->dest, src.type); + dest.writemask = brw_writemask_for_size(instr->num_components); + + emit(MOV(dest, src)); + break; + } + + case nir_intrinsic_store_output_indirect: + has_indirect = true; + /* fallthrough */ + case nir_intrinsic_store_output: { + int varying = instr->const_index[0]; + + src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F, + instr->num_components); + dest = dst_reg(src); + + if (has_indirect) { + dest.reladdr = new(mem_ctx) src_reg(get_nir_src(instr->src[1], + BRW_REGISTER_TYPE_D, + 1)); + } + output_reg[varying] = dest; + break; + } + + case nir_intrinsic_load_vertex_id: + unreachable("should be lowered by lower_vertex_id()"); + + case nir_intrinsic_load_vertex_id_zero_base: { + src_reg vertex_id = + src_reg(nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE]); + assert(vertex_id.file != BAD_FILE); + dest = get_nir_dest(instr->dest, vertex_id.type); + emit(MOV(dest, vertex_id)); + break; + } + + case nir_intrinsic_load_base_vertex: { + src_reg base_vertex = + src_reg(nir_system_values[SYSTEM_VALUE_BASE_VERTEX]); + assert(base_vertex.file != BAD_FILE); + dest = get_nir_dest(instr->dest, base_vertex.type); + emit(MOV(dest, base_vertex)); + break; + } + + case nir_intrinsic_load_instance_id: { + src_reg instance_id = + src_reg(nir_system_values[SYSTEM_VALUE_INSTANCE_ID]); + assert(instance_id.file != BAD_FILE); + dest = get_nir_dest(instr->dest, instance_id.type); + emit(MOV(dest, instance_id)); + break; + } + + case nir_intrinsic_load_uniform_indirect: + has_indirect = true; + /* fallthrough */ + case nir_intrinsic_load_uniform: { + int uniform = instr->const_index[0]; + + dest = get_nir_dest(instr->dest); + + if (has_indirect) { + /* Split addressing into uniform and offset */ + int offset = uniform - nir_uniform_driver_location[uniform]; + assert(offset >= 0); + + uniform -= offset; + assert(uniform >= 0); + + src = src_reg(dst_reg(UNIFORM, uniform)); + src.reg_offset = offset; + src_reg tmp = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_D, 1); + src.reladdr = new(mem_ctx) src_reg(tmp); + } else { + src = src_reg(dst_reg(UNIFORM, uniform)); + } + + emit(MOV(dest, src)); + break; + } + + case nir_intrinsic_atomic_counter_read: + case nir_intrinsic_atomic_counter_inc: + case nir_intrinsic_atomic_counter_dec: { + unsigned surf_index = prog_data->base.binding_table.abo_start + + (unsigned) instr->const_index[0]; + src_reg offset = get_nir_src(instr->src[0], nir_type_int, + instr->num_components); + dest = get_nir_dest(instr->dest); + + switch (instr->intrinsic) { + case nir_intrinsic_atomic_counter_inc: + emit_untyped_atomic(BRW_AOP_INC, surf_index, dest, offset, + src_reg(), src_reg()); + break; + case nir_intrinsic_atomic_counter_dec: + emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dest, offset, + src_reg(), src_reg()); + break; + case nir_intrinsic_atomic_counter_read: + emit_untyped_surface_read(surf_index, dest, offset); + break; + default: + unreachable("Unreachable"); + } + + brw_mark_surface_used(stage_prog_data, surf_index); + break; + } + + case nir_intrinsic_load_ubo_indirect: + has_indirect = true; + /* fallthrough */ + case nir_intrinsic_load_ubo: { + nir_const_value *const_block_index = nir_src_as_const_value(instr->src[0]); + src_reg surf_index; + + dest = get_nir_dest(instr->dest); + + if (const_block_index) { + /* The block index is a constant, so just emit the binding table entry + * as an immediate. + */ + surf_index = src_reg(prog_data->base.binding_table.ubo_start + + const_block_index->u[0]); + } else { + /* The block index is not a constant. Evaluate the index expression + * per-channel and add the base UBO index; we have to select a value + * from any live channel. + */ + surf_index = src_reg(this, glsl_type::uint_type); + emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[0], nir_type_int, + instr->num_components), + src_reg(prog_data->base.binding_table.ubo_start))); + surf_index = emit_uniformize(surf_index); + + /* Assume this may touch any UBO. It would be nice to provide + * a tighter bound, but the array information is already lowered away. + */ + brw_mark_surface_used(&prog_data->base, + prog_data->base.binding_table.ubo_start + + shader_prog->NumUniformBlocks - 1); + } + + unsigned const_offset = instr->const_index[0]; + src_reg offset; + + if (!has_indirect) { + offset = src_reg(const_offset / 16); + } else { + offset = src_reg(this, glsl_type::uint_type); + emit(SHR(dst_reg(offset), get_nir_src(instr->src[1], nir_type_int, 1), + src_reg(4u))); + } + + src_reg packed_consts = src_reg(this, glsl_type::vec4_type); + packed_consts.type = dest.type; + + emit_pull_constant_load_reg(dst_reg(packed_consts), + surf_index, + offset, + NULL, NULL /* before_block/inst */); + + packed_consts.swizzle = brw_swizzle_for_size(instr->num_components); + packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4, + const_offset % 16 / 4, + const_offset % 16 / 4, + const_offset % 16 / 4); + + emit(MOV(dest, packed_consts)); + break; + } + + default: + unreachable("Unknown intrinsic"); + } +} + +static unsigned +brw_swizzle_for_nir_swizzle(uint8_t swizzle[4]) +{ + return BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); +} + +static enum brw_conditional_mod +brw_conditional_for_nir_comparison(nir_op op) +{ + switch (op) { + case nir_op_flt: + case nir_op_ilt: + case nir_op_ult: + return BRW_CONDITIONAL_L; + + case nir_op_fge: + case nir_op_ige: + case nir_op_uge: + return BRW_CONDITIONAL_GE; + + case nir_op_feq: + case nir_op_ieq: + case nir_op_ball_fequal2: + case nir_op_ball_iequal2: + case nir_op_ball_fequal3: + case nir_op_ball_iequal3: + case nir_op_ball_fequal4: + case nir_op_ball_iequal4: + return BRW_CONDITIONAL_Z; + + case nir_op_fne: + case nir_op_ine: + case nir_op_bany_fnequal2: + case nir_op_bany_inequal2: + case nir_op_bany_fnequal3: + case nir_op_bany_inequal3: + case nir_op_bany_fnequal4: + case nir_op_bany_inequal4: + return BRW_CONDITIONAL_NZ; + + default: + unreachable("not reached: bad operation for comparison"); + } +} + +void +vec4_visitor::nir_emit_alu(nir_alu_instr *instr) +{ + vec4_instruction *inst; + + dst_reg dst = get_nir_dest(instr->dest.dest, + nir_op_infos[instr->op].output_type); + dst.writemask = instr->dest.write_mask; + + src_reg op[4]; + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { + op[i] = get_nir_src(instr->src[i].src, + nir_op_infos[instr->op].input_types[i], 4); + op[i].swizzle = brw_swizzle_for_nir_swizzle(instr->src[i].swizzle); + op[i].abs = instr->src[i].abs; + op[i].negate = instr->src[i].negate; + } + + switch (instr->op) { + case nir_op_imov: + case nir_op_fmov: + inst = emit(MOV(dst, op[0])); + inst->saturate = instr->dest.saturate; + break; + + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + unreachable("not reached: should be handled by lower_vec_to_movs()"); + + case nir_op_i2f: + case nir_op_u2f: + inst = emit(MOV(dst, op[0])); + inst->saturate = instr->dest.saturate; + break; + + case nir_op_f2i: + case nir_op_f2u: + inst = emit(MOV(dst, op[0])); + break; + + case nir_op_fadd: + /* fall through */ + case nir_op_iadd: + inst = emit(ADD(dst, op[0], op[1])); + inst->saturate = instr->dest.saturate; + break; + + case nir_op_fmul: + inst = emit(MUL(dst, op[0], op[1])); + inst->saturate = instr->dest.saturate; + break; + + case nir_op_imul: { + if (devinfo->gen < 8) { + nir_const_value *value0 = nir_src_as_const_value(instr->src[0].src); + nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src); + + /* For integer multiplication, the MUL uses the low 16 bits of one of + * the operands (src0 through SNB, src1 on IVB and later). The MACH + * accumulates in the contribution of the upper 16 bits of that + * operand. If we can determine that one of the args is in the low + * 16 bits, though, we can just emit a single MUL. + */ + if (value0 && value0->u[0] < (1 << 16)) { + if (devinfo->gen < 7) + emit(MUL(dst, op[0], op[1])); + else + emit(MUL(dst, op[1], op[0])); + } else if (value1 && value1->u[0] < (1 << 16)) { + if (devinfo->gen < 7) + emit(MUL(dst, op[1], op[0])); + else + emit(MUL(dst, op[0], op[1])); + } else { + struct brw_reg acc = retype(brw_acc_reg(8), dst.type); + + emit(MUL(acc, op[0], op[1])); + emit(MACH(dst_null_d(), op[0], op[1])); + emit(MOV(dst, src_reg(acc))); + } + } else { + emit(MUL(dst, op[0], op[1])); + } + break; + } + + case nir_op_imul_high: + case nir_op_umul_high: { + struct brw_reg acc = retype(brw_acc_reg(8), dst.type); + + emit(MUL(acc, op[0], op[1])); + emit(MACH(dst, op[0], op[1])); + break; + } + + case nir_op_frcp: + inst = emit_math(SHADER_OPCODE_RCP, dst, op[0]); + inst->saturate = instr->dest.saturate; + break; + + case nir_op_fexp2: + inst = emit_math(SHADER_OPCODE_EXP2, dst, op[0]); + inst->saturate = instr->dest.saturate; + break; + + case nir_op_flog2: + inst = emit_math(SHADER_OPCODE_LOG2, dst, op[0]); + inst->saturate = instr->dest.saturate; + break; + + case nir_op_fsin: + inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]); + inst->saturate = instr->dest.saturate; + break; + + case nir_op_fcos: + inst = emit_math(SHADER_OPCODE_COS, dst, op[0]); + inst->saturate = instr->dest.saturate; + break; + + case nir_op_idiv: + case nir_op_udiv: + emit_math(SHADER_OPCODE_INT_QUOTIENT, dst, op[0], op[1]); + break; + + case nir_op_umod: + emit_math(SHADER_OPCODE_INT_REMAINDER, dst, op[0], op[1]); + break; + + case nir_op_ldexp: + unreachable("not reached: should be handled by ldexp_to_arith()"); + + case nir_op_fsqrt: + inst = emit_math(SHADER_OPCODE_SQRT, dst, op[0]); + inst->saturate = instr->dest.saturate; + break; + + case nir_op_frsq: + inst = emit_math(SHADER_OPCODE_RSQ, dst, op[0]); + inst->saturate = instr->dest.saturate; + break; + + case nir_op_fpow: + inst = emit_math(SHADER_OPCODE_POW, dst, op[0], op[1]); + inst->saturate = instr->dest.saturate; + break; + + case nir_op_uadd_carry: { + struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD); + + emit(ADDC(dst_null_ud(), op[0], op[1])); + emit(MOV(dst, src_reg(acc))); + break; + } + + case nir_op_usub_borrow: { + struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD); + + emit(SUBB(dst_null_ud(), op[0], op[1])); + emit(MOV(dst, src_reg(acc))); + break; + } + + case nir_op_ftrunc: + inst = emit(RNDZ(dst, op[0])); + inst->saturate = instr->dest.saturate; + break; + + case nir_op_fceil: { + src_reg tmp = src_reg(this, glsl_type::float_type); + tmp.swizzle = + brw_swizzle_for_size(instr->src[0].src.is_ssa ? + instr->src[0].src.ssa->num_components : + instr->src[0].src.reg.reg->num_components); + + op[0].negate = !op[0].negate; + emit(RNDD(dst_reg(tmp), op[0])); + tmp.negate = true; + inst = emit(MOV(dst, tmp)); + inst->saturate = instr->dest.saturate; + break; + } + + case nir_op_ffloor: + inst = emit(RNDD(dst, op[0])); + inst->saturate = instr->dest.saturate; + break; + + case nir_op_ffract: + inst = emit(FRC(dst, op[0])); + inst->saturate = instr->dest.saturate; + break; + + case nir_op_fround_even: + inst = emit(RNDE(dst, op[0])); + inst->saturate = instr->dest.saturate; + break; + + case nir_op_fmin: + case nir_op_imin: + case nir_op_umin: + inst = emit_minmax(BRW_CONDITIONAL_L, dst, op[0], op[1]); + inst->saturate = instr->dest.saturate; + break; + + case nir_op_fmax: + case nir_op_imax: + case nir_op_umax: + inst = emit_minmax(BRW_CONDITIONAL_GE, dst, op[0], op[1]); + inst->saturate = instr->dest.saturate; + break; + + case nir_op_fddx: + case nir_op_fddx_coarse: + case nir_op_fddx_fine: + case nir_op_fddy: + case nir_op_fddy_coarse: + case nir_op_fddy_fine: + unreachable("derivatives are not valid in vertex shaders"); + + case nir_op_flt: + case nir_op_ilt: + case nir_op_ult: + case nir_op_fge: + case nir_op_ige: + case nir_op_uge: + case nir_op_feq: + case nir_op_ieq: + case nir_op_fne: + case nir_op_ine: + emit(CMP(dst, op[0], op[1], + brw_conditional_for_nir_comparison(instr->op))); + break; + + case nir_op_ball_fequal2: + case nir_op_ball_iequal2: + case nir_op_ball_fequal3: + case nir_op_ball_iequal3: + case nir_op_ball_fequal4: + case nir_op_ball_iequal4: { + dst_reg tmp = dst_reg(this, glsl_type::bool_type); + + switch (instr->op) { + case nir_op_ball_fequal2: + case nir_op_ball_iequal2: + tmp.writemask = WRITEMASK_XY; + break; + case nir_op_ball_fequal3: + case nir_op_ball_iequal3: + tmp.writemask = WRITEMASK_XYZ; + break; + case nir_op_ball_fequal4: + case nir_op_ball_iequal4: + tmp.writemask = WRITEMASK_XYZW; + break; + default: + unreachable("not reached"); + } + + emit(CMP(tmp, op[0], op[1], + brw_conditional_for_nir_comparison(instr->op))); + emit(MOV(dst, src_reg(0))); + inst = emit(MOV(dst, src_reg(~0))); + inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; + break; + } + + case nir_op_bany_fnequal2: + case nir_op_bany_inequal2: + case nir_op_bany_fnequal3: + case nir_op_bany_inequal3: + case nir_op_bany_fnequal4: + case nir_op_bany_inequal4: { + dst_reg tmp = dst_reg(this, glsl_type::bool_type); + + switch (instr->op) { + case nir_op_bany_fnequal2: + case nir_op_bany_inequal2: + tmp.writemask = WRITEMASK_XY; + break; + case nir_op_bany_fnequal3: + case nir_op_bany_inequal3: + tmp.writemask = WRITEMASK_XYZ; + break; + case nir_op_bany_fnequal4: + case nir_op_bany_inequal4: + tmp.writemask = WRITEMASK_XYZW; + break; + default: + unreachable("not reached"); + } + + emit(CMP(tmp, op[0], op[1], + brw_conditional_for_nir_comparison(instr->op))); + + emit(MOV(dst, src_reg(0))); + inst = emit(MOV(dst, src_reg(~0))); + inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; + break; + } + + case nir_op_inot: + if (devinfo->gen >= 8) { + op[0] = resolve_source_modifiers(op[0]); + } + emit(NOT(dst, op[0])); + break; + + case nir_op_ixor: + if (devinfo->gen >= 8) { + op[0] = resolve_source_modifiers(op[0]); + op[1] = resolve_source_modifiers(op[1]); + } + emit(XOR(dst, op[0], op[1])); + break; + + case nir_op_ior: + if (devinfo->gen >= 8) { + op[0] = resolve_source_modifiers(op[0]); + op[1] = resolve_source_modifiers(op[1]); + } + emit(OR(dst, op[0], op[1])); + break; + + case nir_op_iand: + if (devinfo->gen >= 8) { + op[0] = resolve_source_modifiers(op[0]); + op[1] = resolve_source_modifiers(op[1]); + } + emit(AND(dst, op[0], op[1])); + break; + + case nir_op_b2i: + emit(AND(dst, op[0], src_reg(1))); + break; + + case nir_op_b2f: + op[0].type = BRW_REGISTER_TYPE_D; + dst.type = BRW_REGISTER_TYPE_D; + emit(AND(dst, op[0], src_reg(0x3f800000u))); + dst.type = BRW_REGISTER_TYPE_F; + break; + + case nir_op_f2b: + emit(CMP(dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ)); + break; + + case nir_op_i2b: + emit(CMP(dst, op[0], src_reg(0), BRW_CONDITIONAL_NZ)); + break; + + case nir_op_fnoise1_1: + case nir_op_fnoise1_2: + case nir_op_fnoise1_3: + case nir_op_fnoise1_4: + case nir_op_fnoise2_1: + case nir_op_fnoise2_2: + case nir_op_fnoise2_3: + case nir_op_fnoise2_4: + case nir_op_fnoise3_1: + case nir_op_fnoise3_2: + case nir_op_fnoise3_3: + case nir_op_fnoise3_4: + case nir_op_fnoise4_1: + case nir_op_fnoise4_2: + case nir_op_fnoise4_3: + case nir_op_fnoise4_4: + unreachable("not reached: should be handled by lower_noise"); + + case nir_op_unpack_half_2x16_split_x: + case nir_op_unpack_half_2x16_split_y: + case nir_op_pack_half_2x16_split: + unreachable("not reached: should not occur in vertex shader"); + + case nir_op_unpack_snorm_2x16: + case nir_op_unpack_unorm_2x16: + case nir_op_pack_snorm_2x16: + case nir_op_pack_unorm_2x16: + unreachable("not reached: should be handled by lower_packing_builtins"); + + case nir_op_unpack_half_2x16: + /* As NIR does not guarantee that we have a correct swizzle outside the + * boundaries of a vector, and the implementation of emit_unpack_half_2x16 + * uses the source operand in an operation with WRITEMASK_Y while our + * source operand has only size 1, it accessed incorrect data producing + * regressions in Piglit. We repeat the swizzle of the first component on the + * rest of components to avoid regressions. In the vec4_visitor IR code path + * this is not needed because the operand has already the correct swizzle. + */ + op[0].swizzle = brw_compose_swizzle(BRW_SWIZZLE_XXXX, op[0].swizzle); + emit_unpack_half_2x16(dst, op[0]); + break; + + case nir_op_pack_half_2x16: + emit_pack_half_2x16(dst, op[0]); + break; + + case nir_op_unpack_unorm_4x8: + emit_unpack_unorm_4x8(dst, op[0]); + break; + + case nir_op_pack_unorm_4x8: + emit_pack_unorm_4x8(dst, op[0]); + break; + + case nir_op_unpack_snorm_4x8: + emit_unpack_snorm_4x8(dst, op[0]); + break; + + case nir_op_pack_snorm_4x8: + emit_pack_snorm_4x8(dst, op[0]); + break; + + case nir_op_bitfield_reverse: + emit(BFREV(dst, op[0])); + break; + + case nir_op_bit_count: + emit(CBIT(dst, op[0])); + break; + + case nir_op_ufind_msb: + case nir_op_ifind_msb: { + src_reg temp = src_reg(this, glsl_type::uint_type); + + inst = emit(FBH(dst_reg(temp), op[0])); + inst->dst.writemask = WRITEMASK_XYZW; + + /* FBH counts from the MSB side, while GLSL's findMSB() wants the count + * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then + * subtract the result from 31 to convert the MSB count into an LSB count. + */ + + /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */ + temp.swizzle = BRW_SWIZZLE_NOOP; + emit(MOV(dst, temp)); + + src_reg src_tmp = src_reg(dst); + emit(CMP(dst_null_d(), src_tmp, src_reg(-1), BRW_CONDITIONAL_NZ)); + + src_tmp.negate = true; + inst = emit(ADD(dst, src_tmp, src_reg(31))); + inst->predicate = BRW_PREDICATE_NORMAL; + break; + } + + case nir_op_find_lsb: + emit(FBL(dst, op[0])); + break; + + case nir_op_ubitfield_extract: + case nir_op_ibitfield_extract: + op[0] = fix_3src_operand(op[0]); + op[1] = fix_3src_operand(op[1]); + op[2] = fix_3src_operand(op[2]); + + emit(BFE(dst, op[2], op[1], op[0])); + break; + + case nir_op_bfm: + emit(BFI1(dst, op[0], op[1])); + break; + + case nir_op_bfi: + op[0] = fix_3src_operand(op[0]); + op[1] = fix_3src_operand(op[1]); + op[2] = fix_3src_operand(op[2]); + + emit(BFI2(dst, op[0], op[1], op[2])); + break; + + case nir_op_bitfield_insert: + unreachable("not reached: should be handled by " + "lower_instructions::bitfield_insert_to_bfm_bfi"); + + case nir_op_fsign: + /* AND(val, 0x80000000) gives the sign bit. + * + * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not + * zero. + */ + emit(CMP(dst_null_f(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ)); + + op[0].type = BRW_REGISTER_TYPE_UD; + dst.type = BRW_REGISTER_TYPE_UD; + emit(AND(dst, op[0], src_reg(0x80000000u))); + + inst = emit(OR(dst, src_reg(dst), src_reg(0x3f800000u))); + inst->predicate = BRW_PREDICATE_NORMAL; + dst.type = BRW_REGISTER_TYPE_F; + + if (instr->dest.saturate) { + inst = emit(MOV(dst, src_reg(dst))); + inst->saturate = true; + } + break; + + case nir_op_isign: + /* ASR(val, 31) -> negative val generates 0xffffffff (signed -1). + * -> non-negative val generates 0x00000000. + * Predicated OR sets 1 if val is positive. + */ + emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_G)); + emit(ASR(dst, op[0], src_reg(31))); + inst = emit(OR(dst, src_reg(dst), src_reg(1))); + inst->predicate = BRW_PREDICATE_NORMAL; + break; + + case nir_op_ishl: + emit(SHL(dst, op[0], op[1])); + break; + + case nir_op_ishr: + emit(ASR(dst, op[0], op[1])); + break; + + case nir_op_ushr: + emit(SHR(dst, op[0], op[1])); + break; + + case nir_op_ffma: + op[0] = fix_3src_operand(op[0]); + op[1] = fix_3src_operand(op[1]); + op[2] = fix_3src_operand(op[2]); + + inst = emit(MAD(dst, op[2], op[1], op[0])); + inst->saturate = instr->dest.saturate; + break; + + case nir_op_flrp: + inst = emit_lrp(dst, op[0], op[1], op[2]); + inst->saturate = instr->dest.saturate; + break; + + case nir_op_bcsel: + emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ)); + inst = emit(BRW_OPCODE_SEL, dst, op[1], op[2]); + inst->predicate = BRW_PREDICATE_NORMAL; + break; + + case nir_op_fdot2: + inst = emit(BRW_OPCODE_DP2, dst, op[0], op[1]); + inst->saturate = instr->dest.saturate; + break; + + case nir_op_fdot3: + inst = emit(BRW_OPCODE_DP3, dst, op[0], op[1]); + inst->saturate = instr->dest.saturate; + break; + + case nir_op_fdot4: + inst = emit(BRW_OPCODE_DP4, dst, op[0], op[1]); + inst->saturate = instr->dest.saturate; + break; + + case nir_op_bany2: + case nir_op_bany3: + case nir_op_bany4: { + dst_reg tmp = dst_reg(this, glsl_type::bool_type); + tmp.writemask = brw_writemask_for_size(nir_op_infos[instr->op].input_sizes[0]); + + emit(CMP(tmp, op[0], src_reg(0), BRW_CONDITIONAL_NZ)); + + emit(MOV(dst, src_reg(0))); + inst = emit(MOV(dst, src_reg(~0))); + inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; + break; + } + + case nir_op_fabs: + case nir_op_iabs: + case nir_op_fneg: + case nir_op_ineg: + case nir_op_fsat: + unreachable("not reached: should be lowered by lower_source mods"); + + case nir_op_fdiv: + unreachable("not reached: should be lowered by DIV_TO_MUL_RCP in the compiler"); + + case nir_op_fmod: + unreachable("not reached: should be lowered by MOD_TO_FLOOR in the compiler"); + + case nir_op_fsub: + case nir_op_isub: + unreachable("not reached: should be handled by ir_sub_to_add_neg"); + + default: + unreachable("Unimplemented ALU operation"); + } + + /* If we need to do a boolean resolve, replace the result with -(x & 1) + * to sign extend the low bit to 0/~0 + */ + if (devinfo->gen <= 5 && + (instr->instr.pass_flags & BRW_NIR_BOOLEAN_MASK) == + BRW_NIR_BOOLEAN_NEEDS_RESOLVE) { + dst_reg masked = dst_reg(this, glsl_type::int_type); + masked.writemask = dst.writemask; + emit(AND(masked, src_reg(dst), src_reg(1))); + src_reg masked_neg = src_reg(masked); + masked_neg.negate = true; + emit(MOV(retype(dst, BRW_REGISTER_TYPE_D), masked_neg)); + } +} + +void +vec4_visitor::nir_emit_jump(nir_jump_instr *instr) +{ + switch (instr->type) { + case nir_jump_break: + emit(BRW_OPCODE_BREAK); + break; + + case nir_jump_continue: + emit(BRW_OPCODE_CONTINUE); + break; + + case nir_jump_return: + /* fall through */ + default: + unreachable("unknown jump"); + } +} + +enum ir_texture_opcode +ir_texture_opcode_for_nir_texop(nir_texop texop) +{ + enum ir_texture_opcode op; + + switch (texop) { + case nir_texop_lod: op = ir_lod; break; + case nir_texop_query_levels: op = ir_query_levels; break; + case nir_texop_tex: op = ir_tex; break; + case nir_texop_tg4: op = ir_tg4; break; + case nir_texop_txb: op = ir_txb; break; + case nir_texop_txd: op = ir_txd; break; + case nir_texop_txf: op = ir_txf; break; + case nir_texop_txf_ms: op = ir_txf_ms; break; + case nir_texop_txl: op = ir_txl; break; + case nir_texop_txs: op = ir_txs; break; + default: + unreachable("unknown texture opcode"); + } + + return op; +} +const glsl_type * +glsl_type_for_nir_alu_type(nir_alu_type alu_type, + unsigned components) +{ + switch (alu_type) { + case nir_type_float: + return glsl_type::vec(components); + case nir_type_int: + return glsl_type::ivec(components); + case nir_type_unsigned: + return glsl_type::uvec(components); + case nir_type_bool: + return glsl_type::bvec(components); + default: + return glsl_type::error_type; + } + + return glsl_type::error_type; +} + +void +vec4_visitor::nir_emit_texture(nir_tex_instr *instr) +{ + unsigned sampler = instr->sampler_index; + src_reg sampler_reg = src_reg(sampler); + src_reg coordinate; + const glsl_type *coord_type = NULL; + src_reg shadow_comparitor; + src_reg offset_value; + src_reg lod, lod2; + src_reg sample_index; + src_reg mcs; + + const glsl_type *dest_type = + glsl_type_for_nir_alu_type(instr->dest_type, + nir_tex_instr_dest_size(instr)); + dst_reg dest = get_nir_dest(instr->dest, instr->dest_type); + + /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother + * emitting anything other than setting up the constant result. + */ + if (instr->op == nir_texop_tg4) { + int swiz = GET_SWZ(key->tex.swizzles[sampler], instr->component); + if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) { + emit(MOV(dest, src_reg(swiz == SWIZZLE_ONE ? 1.0f : 0.0f))); + return; + } + } + + /* Load the texture operation sources */ + for (unsigned i = 0; i < instr->num_srcs; i++) { + switch (instr->src[i].src_type) { + case nir_tex_src_comparitor: + shadow_comparitor = get_nir_src(instr->src[i].src, + BRW_REGISTER_TYPE_F, 1); + break; + + case nir_tex_src_coord: { + unsigned src_size = nir_tex_instr_src_size(instr, i); + + switch (instr->op) { + case nir_texop_txf: + case nir_texop_txf_ms: + coordinate = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, + src_size); + coord_type = glsl_type::ivec(src_size); + break; + + default: + coordinate = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_F, + src_size); + coord_type = glsl_type::vec(src_size); + break; + } + break; + } + + case nir_tex_src_ddx: + lod = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_F, + nir_tex_instr_src_size(instr, i)); + break; + + case nir_tex_src_ddy: + lod2 = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_F, + nir_tex_instr_src_size(instr, i)); + break; + + case nir_tex_src_lod: + switch (instr->op) { + case nir_texop_txs: + case nir_texop_txf: + lod = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 1); + break; + + default: + lod = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_F, 1); + break; + } + break; + + case nir_tex_src_ms_index: { + sample_index = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 1); + assert(coord_type != NULL); + if (devinfo->gen >= 7 && + key->tex.compressed_multisample_layout_mask & (1<<sampler)) { + mcs = emit_mcs_fetch(coord_type, coordinate, sampler_reg); + } else { + mcs = src_reg(0u); + } + mcs = retype(mcs, BRW_REGISTER_TYPE_UD); + break; + } + + case nir_tex_src_offset: + offset_value = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 2); + break; + + case nir_tex_src_sampler_offset: { + /* The highest sampler which may be used by this operation is + * the last element of the array. Mark it here, because the generator + * doesn't have enough information to determine the bound. + */ + uint32_t array_size = instr->sampler_array_size; + uint32_t max_used = sampler + array_size - 1; + if (instr->op == nir_texop_tg4) { + max_used += prog_data->base.binding_table.gather_texture_start; + } else { + max_used += prog_data->base.binding_table.texture_start; + } + + brw_mark_surface_used(&prog_data->base, max_used); + + /* Emit code to evaluate the actual indexing expression */ + src_reg src = get_nir_src(instr->src[i].src, 1); + src_reg temp(this, glsl_type::uint_type); + emit(ADD(dst_reg(temp), src, src_reg(sampler))); + sampler_reg = emit_uniformize(temp); + break; + } + + case nir_tex_src_projector: + unreachable("Should be lowered by do_lower_texture_projection"); + + case nir_tex_src_bias: + unreachable("LOD bias is not valid for vertex shaders.\n"); + + default: + unreachable("unknown texture source"); + } + } + + uint32_t constant_offset = 0; + for (unsigned i = 0; i < 3; i++) { + if (instr->const_offset[i] != 0) { + constant_offset = brw_texture_offset(instr->const_offset, 3); + break; + } + } + + /* Stuff the channel select bits in the top of the texture offset */ + if (instr->op == nir_texop_tg4) + constant_offset |= gather_channel(instr->component, sampler) << 16; + + ir_texture_opcode op = ir_texture_opcode_for_nir_texop(instr->op); + + bool is_cube_array = + instr->op == nir_texop_txs && + instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && + instr->is_array; + + emit_texture(op, dest, dest_type, coordinate, instr->coord_components, + shadow_comparitor, + lod, lod2, sample_index, + constant_offset, offset_value, + mcs, is_cube_array, sampler, sampler_reg); +} + +} diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index 555c42e2f24..617c9889cad 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -280,15 +280,15 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) */ foreach_block_and_inst(block, vec4_instruction, inst, cfg) { for (unsigned int i = 0; i < 3; i++) { - if (inst->src[i].file == GRF) { - spill_costs[inst->src[i].reg] += loop_scale; + if (inst->src[i].file == GRF) { + spill_costs[inst->src[i].reg] += loop_scale; if (inst->src[i].reladdr) no_spill[inst->src[i].reg] = true; - } + } } if (inst->dst.file == GRF) { - spill_costs[inst->dst.reg] += loop_scale; + spill_costs[inst->dst.reg] += loop_scale; if (inst->dst.reladdr) no_spill[inst->dst.reg] = true; } @@ -296,12 +296,12 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) switch (inst->opcode) { case BRW_OPCODE_DO: - loop_scale *= 10; - break; + loop_scale *= 10; + break; case BRW_OPCODE_WHILE: - loop_scale /= 10; - break; + loop_scale /= 10; + break; case SHADER_OPCODE_GEN4_SCRATCH_READ: case SHADER_OPCODE_GEN4_SCRATCH_WRITE: @@ -309,12 +309,12 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) if (inst->src[i].file == GRF) no_spill[inst->src[i].reg] = true; } - if (inst->dst.file == GRF) - no_spill[inst->dst.reg] = true; - break; + if (inst->dst.file == GRF) + no_spill[inst->dst.reg] = true; + break; default: - break; + break; } } } @@ -339,7 +339,7 @@ void vec4_visitor::spill_reg(int spill_reg_nr) { assert(alloc.sizes[spill_reg_nr] == 1); - unsigned int spill_offset = c->last_scratch++; + unsigned int spill_offset = last_scratch++; /* Generate spill/unspill instructions for the objects being spilled. */ foreach_block_and_inst(block, vec4_instruction, inst, cfg) { diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 236fa51f92c..20b628e9192 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -287,7 +287,7 @@ vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements } src_reg -vec4_visitor::fix_3src_operand(src_reg src) +vec4_visitor::fix_3src_operand(const src_reg &src) { /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be * able to use vertical stride of zero to replicate the vec4 uniform, like @@ -313,7 +313,20 @@ vec4_visitor::fix_3src_operand(src_reg src) } src_reg -vec4_visitor::fix_math_operand(src_reg src) +vec4_visitor::resolve_source_modifiers(const src_reg &src) +{ + if (!src.abs && !src.negate) + return src; + + dst_reg resolved = dst_reg(this, glsl_type::ivec4_type); + resolved.type = src.type; + emit(MOV(resolved, src)); + + return src_reg(resolved); +} + +src_reg +vec4_visitor::fix_math_operand(const src_reg &src) { if (devinfo->gen < 6 || devinfo->gen >= 8 || src.file == BAD_FILE) return src; @@ -338,7 +351,7 @@ vec4_visitor::fix_math_operand(src_reg src) return src_reg(expanded); } -void +vec4_instruction * vec4_visitor::emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0, const src_reg &src1) @@ -350,11 +363,13 @@ vec4_visitor::emit_math(enum opcode opcode, /* MATH on Gen6 must be align1, so we can't do writemasks. */ math->dst = dst_reg(this, glsl_type::vec4_type); math->dst.type = dst.type; - emit(MOV(dst, src_reg(math->dst))); + math = emit(MOV(dst, src_reg(math->dst))); } else if (devinfo->gen < 6) { math->base_mrf = 1; math->mlen = src1.file == BAD_FILE ? 1 : 2; } + + return math; } void @@ -572,9 +587,18 @@ vec4_visitor::visit_instructions(const exec_list *list) } } - -static int -type_size(const struct glsl_type *type) +/** + * Returns the minimum number of vec4 elements needed to pack a type. + * + * For simple types, it will return 1 (a single vec4); for matrices, the + * number of columns; for array and struct, the sum of the vec4_size of + * each of its elements; and for sampler and atomic, zero. + * + * This method is useful to calculate how much register space is needed to + * store a particular type. + */ +int +vec4_visitor::type_size(const struct glsl_type *type) { unsigned int i; int size; @@ -603,6 +627,9 @@ type_size(const struct glsl_type *type) size += type_size(type->fields.structure[i].type); } return size; + case GLSL_TYPE_SUBROUTINE: + return 1; + case GLSL_TYPE_SAMPLER: /* Samplers take up no register space, since they're baked in at * link time. @@ -611,6 +638,7 @@ type_size(const struct glsl_type *type) case GLSL_TYPE_ATOMIC_UINT: return 0; case GLSL_TYPE_IMAGE: + return DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4); case GLSL_TYPE_VOID: case GLSL_TYPE_DOUBLE: case GLSL_TYPE_ERROR: @@ -627,7 +655,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) init(); this->file = GRF; - this->reg = v->alloc.allocate(type_size(type)); + this->reg = v->alloc.allocate(v->type_size(type)); if (type->is_array() || type->is_record()) { this->swizzle = BRW_SWIZZLE_NOOP; @@ -645,7 +673,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size) init(); this->file = GRF; - this->reg = v->alloc.allocate(type_size(type) * size); + this->reg = v->alloc.allocate(v->type_size(type) * size); this->swizzle = BRW_SWIZZLE_NOOP; @@ -657,7 +685,7 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) init(); this->file = GRF; - this->reg = v->alloc.allocate(type_size(type)); + this->reg = v->alloc.allocate(v->type_size(type)); if (type->is_array() || type->is_record()) { this->writemask = WRITEMASK_XYZW; @@ -668,6 +696,21 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) this->type = brw_type_for_base_type(type); } +void +vec4_visitor::setup_vector_uniform_values(const gl_constant_value *values, + unsigned n) +{ + static const gl_constant_value zero = { 0 }; + + for (unsigned i = 0; i < n; ++i) + stage_prog_data->param[4 * uniforms + i] = &values[i]; + + for (unsigned i = n; i < 4; ++i) + stage_prog_data->param[4 * uniforms + i] = &zero; + + uniform_vector_size[uniforms++] = n; +} + /* Our support for uniforms is piggy-backed on the struct * gl_fragment_program, because that's where the values actually * get stored, rather than in some global gl_shader_program uniform @@ -697,26 +740,13 @@ vec4_visitor::setup_uniform_values(ir_variable *ir) continue; } - gl_constant_value *components = storage->storage; - unsigned vector_count = (MAX2(storage->array_elements, 1) * - storage->type->matrix_columns); - - for (unsigned s = 0; s < vector_count; s++) { - assert(uniforms < uniform_array_size); - uniform_vector_size[uniforms] = storage->type->vector_elements; - - int i; - for (i = 0; i < uniform_vector_size[uniforms]; i++) { - stage_prog_data->param[uniforms * 4 + i] = components; - components++; - } - for (; i < 4; i++) { - static gl_constant_value zero = { 0.0 }; - stage_prog_data->param[uniforms * 4 + i] = &zero; - } + const unsigned vector_count = (MAX2(storage->array_elements, 1) * + storage->type->matrix_columns); + const unsigned vector_size = storage->type->vector_elements; - uniforms++; - } + for (unsigned s = 0; s < vector_count; s++) + setup_vector_uniform_values(&storage->storage[s * vector_size], + vector_size); } } @@ -1043,8 +1073,6 @@ vec4_visitor::visit(ir_variable *ir) for (int i = 0; i < type_size(ir->type); i++) { output_reg[ir->data.location + i] = *reg; output_reg[ir->data.location + i].reg_offset = i; - output_reg[ir->data.location + i].type = - brw_type_for_base_type(ir->type->get_scalar_type()); output_reg_annotation[ir->data.location + i] = ir->name; } break; @@ -1064,7 +1092,7 @@ vec4_visitor::visit(ir_variable *ir) * Some uniforms, such as samplers and atomic counters, have no actual * storage, so we should ignore them. */ - if (ir->is_in_uniform_block() || type_size(ir->type) == 0) + if (ir->is_in_buffer_block() || type_size(ir->type) == 0) return; /* Track how big the whole uniform variable is, in case we need to put a @@ -1081,7 +1109,7 @@ vec4_visitor::visit(ir_variable *ir) break; case ir_var_system_value: - reg = make_reg_for_system_value(ir); + reg = make_reg_for_system_value(ir->data.location, ir->type); break; default: @@ -1253,7 +1281,7 @@ vec4_visitor::try_emit_b2f_of_compare(ir_expression *ir) return true; } -void +vec4_instruction * vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst, src_reg src0, src_reg src1) { @@ -1268,9 +1296,11 @@ vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst, inst = emit(BRW_OPCODE_SEL, dst, src0, src1); inst->predicate = BRW_PREDICATE_NORMAL; } + + return inst; } -void +vec4_instruction * vec4_visitor::emit_lrp(const dst_reg &dst, const src_reg &x, const src_reg &y, const src_reg &a) { @@ -1278,8 +1308,8 @@ vec4_visitor::emit_lrp(const dst_reg &dst, /* Note that the instruction's argument order is reversed from GLSL * and the IR. */ - emit(LRP(dst, - fix_3src_operand(a), fix_3src_operand(y), fix_3src_operand(x))); + return emit(LRP(dst, fix_3src_operand(a), fix_3src_operand(y), + fix_3src_operand(x))); } else { /* Earlier generations don't support three source operations, so we * need to emit x*(1-a) + y*a. @@ -1294,7 +1324,7 @@ vec4_visitor::emit_lrp(const dst_reg &dst, emit(MUL(y_times_a, y, a)); emit(ADD(one_minus_a, negate(a), src_reg(1.0f))); emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a))); - emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a))); + return emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a))); } } @@ -1375,15 +1405,19 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst, emit(pull); } -void -vec4_visitor::emit_uniformize(const dst_reg &dst, const src_reg &src) +src_reg +vec4_visitor::emit_uniformize(const src_reg &src) { const src_reg chan_index(this, glsl_type::uint_type); + const dst_reg dst = retype(dst_reg(this, glsl_type::uint_type), + src.type); emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, dst_reg(chan_index)) ->force_writemask_all = true; emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index) ->force_writemask_all = true; + + return src_reg(dst); } void @@ -1555,6 +1589,10 @@ vec4_visitor::visit(ir_expression *ir) case ir_unop_noise: unreachable("not reached: should be handled by lower_noise"); + case ir_unop_subroutine_to_int: + emit(MOV(result_dst, op[0])); + break; + case ir_binop_add: emit(ADD(result_dst, op[0], op[1])); break; @@ -1602,20 +1640,13 @@ vec4_visitor::visit(ir_expression *ir) assert(ir->type->is_integer()); emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]); break; - case ir_binop_carry: { - struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD); - emit(ADDC(dst_null_ud(), op[0], op[1])); - emit(MOV(result_dst, src_reg(acc))); - break; - } - case ir_binop_borrow: { - struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD); + case ir_binop_carry: + unreachable("Should have been lowered by carry_to_arith()."); + + case ir_binop_borrow: + unreachable("Should have been lowered by borrow_to_arith()."); - emit(SUBB(dst_null_ud(), op[0], op[1])); - emit(MOV(result_dst, src_reg(acc))); - break; - } case ir_binop_mod: /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */ assert(ir->type->is_integer()); @@ -1734,16 +1765,11 @@ vec4_visitor::visit(ir_expression *ir) emit(MOV(result_dst, op[0])); break; case ir_unop_b2i: - emit(AND(result_dst, op[0], src_reg(1))); - break; case ir_unop_b2f: if (devinfo->gen <= 5) { resolve_bool_comparison(ir->operands[0], &op[0]); } - op[0].type = BRW_REGISTER_TYPE_D; - result_dst.type = BRW_REGISTER_TYPE_D; - emit(AND(result_dst, op[0], src_reg(0x3f800000u))); - result_dst.type = BRW_REGISTER_TYPE_F; + emit(MOV(result_dst, negate(op[0]))); break; case ir_unop_f2b: emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ)); @@ -1839,7 +1865,7 @@ vec4_visitor::visit(ir_expression *ir) surf_index = src_reg(this, glsl_type::uint_type); emit(ADD(dst_reg(surf_index), op[0], src_reg(prog_data->base.binding_table.ubo_start))); - emit_uniformize(dst_reg(surf_index), surf_index); + surf_index = emit_uniformize(surf_index); /* Assume this may touch any UBO. It would be nice to provide * a tighter bound, but the array information is already lowered away. @@ -2439,6 +2465,8 @@ vec4_visitor::visit_atomic_counter_intrinsic(ir_call *ir) emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset, src_reg(), src_reg()); } + + brw_mark_surface_used(stage_prog_data, surf_index); } void @@ -2456,7 +2484,8 @@ vec4_visitor::visit(ir_call *ir) } src_reg -vec4_visitor::emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler) +vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type, + src_reg coordinate, src_reg sampler) { vec4_instruction *inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_TXF_MCS, @@ -2483,21 +2512,21 @@ vec4_visitor::emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler } /* parameters are: u, v, r, lod; lod will always be zero due to api restrictions */ - int coord_mask = (1 << ir->coordinate->type->vector_elements) - 1; + int coord_mask = (1 << coordinate_type->vector_elements) - 1; int zero_mask = 0xf & ~coord_mask; - emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask), + emit(MOV(dst_reg(MRF, param_base, coordinate_type, coord_mask), coordinate)); - emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask), + emit(MOV(dst_reg(MRF, param_base, coordinate_type, zero_mask), src_reg(0))); emit(inst); return src_reg(inst->dst); } -static bool -is_high_sampler(const struct brw_device_info *devinfo, src_reg sampler) +bool +vec4_visitor::is_high_sampler(src_reg sampler) { if (devinfo->gen < 8 && !devinfo->is_haswell) return false; @@ -2506,6 +2535,183 @@ is_high_sampler(const struct brw_device_info *devinfo, src_reg sampler) } void +vec4_visitor::emit_texture(ir_texture_opcode op, + dst_reg dest, + const glsl_type *dest_type, + src_reg coordinate, + int coord_components, + src_reg shadow_comparitor, + src_reg lod, src_reg lod2, + src_reg sample_index, + uint32_t constant_offset, + src_reg offset_value, + src_reg mcs, + bool is_cube_array, + uint32_t sampler, + src_reg sampler_reg) +{ + enum opcode opcode; + switch (op) { + case ir_tex: opcode = SHADER_OPCODE_TXL; break; + case ir_txl: opcode = SHADER_OPCODE_TXL; break; + case ir_txd: opcode = SHADER_OPCODE_TXD; break; + case ir_txf: opcode = SHADER_OPCODE_TXF; break; + case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break; + case ir_txs: opcode = SHADER_OPCODE_TXS; break; + case ir_tg4: opcode = offset_value.file != BAD_FILE + ? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break; + case ir_query_levels: opcode = SHADER_OPCODE_TXS; break; + case ir_txb: + unreachable("TXB is not valid for vertex shaders."); + case ir_lod: + unreachable("LOD is not valid for vertex shaders."); + default: + unreachable("Unrecognized tex op"); + } + + vec4_instruction *inst = new(mem_ctx) vec4_instruction( + opcode, dst_reg(this, dest_type)); + + inst->offset = constant_offset; + + /* The message header is necessary for: + * - Gen4 (always) + * - Gen9+ for selecting SIMD4x2 + * - Texel offsets + * - Gather channel selection + * - Sampler indices too large to fit in a 4-bit value. + */ + inst->header_size = + (devinfo->gen < 5 || devinfo->gen >= 9 || + inst->offset != 0 || op == ir_tg4 || + is_high_sampler(sampler_reg)) ? 1 : 0; + inst->base_mrf = 2; + inst->mlen = inst->header_size + 1; /* always at least one */ + inst->dst.writemask = WRITEMASK_XYZW; + inst->shadow_compare = shadow_comparitor.file != BAD_FILE; + + inst->src[1] = sampler_reg; + + /* MRF for the first parameter */ + int param_base = inst->base_mrf + inst->header_size; + + if (op == ir_txs || op == ir_query_levels) { + int writemask = devinfo->gen == 4 ? WRITEMASK_W : WRITEMASK_X; + emit(MOV(dst_reg(MRF, param_base, lod.type, writemask), lod)); + } else { + /* Load the coordinate */ + /* FINISHME: gl_clamp_mask and saturate */ + int coord_mask = (1 << coord_components) - 1; + int zero_mask = 0xf & ~coord_mask; + + emit(MOV(dst_reg(MRF, param_base, coordinate.type, coord_mask), + coordinate)); + + if (zero_mask != 0) { + emit(MOV(dst_reg(MRF, param_base, coordinate.type, zero_mask), + src_reg(0))); + } + /* Load the shadow comparitor */ + if (shadow_comparitor.file != BAD_FILE && op != ir_txd && (op != ir_tg4 || offset_value.file == BAD_FILE)) { + emit(MOV(dst_reg(MRF, param_base + 1, shadow_comparitor.type, + WRITEMASK_X), + shadow_comparitor)); + inst->mlen++; + } + + /* Load the LOD info */ + if (op == ir_tex || op == ir_txl) { + int mrf, writemask; + if (devinfo->gen >= 5) { + mrf = param_base + 1; + if (shadow_comparitor.file != BAD_FILE) { + writemask = WRITEMASK_Y; + /* mlen already incremented */ + } else { + writemask = WRITEMASK_X; + inst->mlen++; + } + } else /* devinfo->gen == 4 */ { + mrf = param_base; + writemask = WRITEMASK_W; + } + lod.swizzle = BRW_SWIZZLE_XXXX; + emit(MOV(dst_reg(MRF, mrf, lod.type, writemask), lod)); + } else if (op == ir_txf) { + emit(MOV(dst_reg(MRF, param_base, lod.type, WRITEMASK_W), lod)); + } else if (op == ir_txf_ms) { + emit(MOV(dst_reg(MRF, param_base + 1, sample_index.type, WRITEMASK_X), + sample_index)); + if (devinfo->gen >= 7) { + /* MCS data is in the first channel of `mcs`, but we need to get it into + * the .y channel of the second vec4 of params, so replicate .x across + * the whole vec4 and then mask off everything except .y + */ + mcs.swizzle = BRW_SWIZZLE_XXXX; + emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::uint_type, WRITEMASK_Y), + mcs)); + } + inst->mlen++; + } else if (op == ir_txd) { + const brw_reg_type type = lod.type; + + if (devinfo->gen >= 5) { + lod.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y); + lod2.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y); + emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), lod)); + emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), lod2)); + inst->mlen++; + + if (dest_type->vector_elements == 3 || shadow_comparitor.file != BAD_FILE) { + lod.swizzle = BRW_SWIZZLE_ZZZZ; + lod2.swizzle = BRW_SWIZZLE_ZZZZ; + emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), lod)); + emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), lod2)); + inst->mlen++; + + if (shadow_comparitor.file != BAD_FILE) { + emit(MOV(dst_reg(MRF, param_base + 2, + shadow_comparitor.type, WRITEMASK_Z), + shadow_comparitor)); + } + } + } else /* devinfo->gen == 4 */ { + emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), lod)); + emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), lod2)); + inst->mlen += 2; + } + } else if (op == ir_tg4 && offset_value.file != BAD_FILE) { + if (shadow_comparitor.file != BAD_FILE) { + emit(MOV(dst_reg(MRF, param_base, shadow_comparitor.type, WRITEMASK_W), + shadow_comparitor)); + } + + emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::ivec2_type, WRITEMASK_XY), + offset_value)); + inst->mlen++; + } + } + + emit(inst); + + /* fixup num layers (z) for cube arrays: hardware returns faces * layers; + * spec requires layers. + */ + if (op == ir_txs && is_cube_array) { + emit_math(SHADER_OPCODE_INT_QUOTIENT, + writemask(inst->dst, WRITEMASK_Z), + src_reg(inst->dst), src_reg(6)); + } + + if (devinfo->gen == 6 && op == ir_tg4) { + emit_gen6_gather_wa(key->tex.gen6_gather_wa[sampler], inst->dst); + } + + swizzle_result(op, dest, + src_reg(inst->dst), sampler, dest_type); +} + +void vec4_visitor::visit(ir_texture *ir) { uint32_t sampler = @@ -2535,11 +2741,9 @@ vec4_visitor::visit(ir_texture *ir) /* Emit code to evaluate the actual indexing expression */ nonconst_sampler_index->accept(this); - dst_reg temp(this, glsl_type::uint_type); - emit(ADD(temp, this->result, src_reg(sampler))); - emit_uniformize(temp, src_reg(temp)); - - sampler_reg = src_reg(temp); + src_reg temp(this, glsl_type::uint_type); + emit(ADD(dst_reg(temp), this->result, src_reg(sampler))); + sampler_reg = emit_uniformize(temp); } else { /* Single sampler, or constant array index; the indexing expression * is just an immediate. @@ -2572,7 +2776,9 @@ vec4_visitor::visit(ir_texture *ir) * generating these values may involve SEND messages that need the MRFs. */ src_reg coordinate; + int coord_components = 0; if (ir->coordinate) { + coord_components = ir->coordinate->type->vector_elements; ir->coordinate->accept(this); coordinate = this->result; } @@ -2590,42 +2796,35 @@ vec4_visitor::visit(ir_texture *ir) offset_value = src_reg(this->result); } - const glsl_type *lod_type = NULL, *sample_index_type = NULL; - src_reg lod, dPdx, dPdy, sample_index, mcs; + src_reg lod, lod2, sample_index, mcs; switch (ir->op) { case ir_tex: lod = src_reg(0.0f); - lod_type = glsl_type::float_type; break; case ir_txf: case ir_txl: case ir_txs: ir->lod_info.lod->accept(this); lod = this->result; - lod_type = ir->lod_info.lod->type; break; case ir_query_levels: lod = src_reg(0); - lod_type = glsl_type::int_type; break; case ir_txf_ms: ir->lod_info.sample_index->accept(this); sample_index = this->result; - sample_index_type = ir->lod_info.sample_index->type; if (devinfo->gen >= 7 && key->tex.compressed_multisample_layout_mask & (1<<sampler)) - mcs = emit_mcs_fetch(ir, coordinate, sampler_reg); + mcs = emit_mcs_fetch(ir->coordinate->type, coordinate, sampler_reg); else mcs = src_reg(0u); break; case ir_txd: ir->lod_info.grad.dPdx->accept(this); - dPdx = this->result; + lod = this->result; ir->lod_info.grad.dPdy->accept(this); - dPdy = this->result; - - lod_type = ir->lod_info.grad.dPdx->type; + lod2 = this->result; break; case ir_txb: case ir_lod: @@ -2633,175 +2832,31 @@ vec4_visitor::visit(ir_texture *ir) break; } - enum opcode opcode; - switch (ir->op) { - case ir_tex: opcode = SHADER_OPCODE_TXL; break; - case ir_txl: opcode = SHADER_OPCODE_TXL; break; - case ir_txd: opcode = SHADER_OPCODE_TXD; break; - case ir_txf: opcode = SHADER_OPCODE_TXF; break; - case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break; - case ir_txs: opcode = SHADER_OPCODE_TXS; break; - case ir_tg4: opcode = has_nonconstant_offset - ? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break; - case ir_query_levels: opcode = SHADER_OPCODE_TXS; break; - case ir_txb: - unreachable("TXB is not valid for vertex shaders."); - case ir_lod: - unreachable("LOD is not valid for vertex shaders."); - default: - unreachable("Unrecognized tex op"); - } - - vec4_instruction *inst = new(mem_ctx) vec4_instruction( - opcode, dst_reg(this, ir->type)); - + uint32_t constant_offset = 0; if (ir->offset != NULL && !has_nonconstant_offset) { - inst->offset = + constant_offset = brw_texture_offset(ir->offset->as_constant()->value.i, ir->offset->type->vector_elements); } /* Stuff the channel select bits in the top of the texture offset */ if (ir->op == ir_tg4) - inst->offset |= gather_channel(ir, sampler) << 16; - - /* The message header is necessary for: - * - Gen4 (always) - * - Gen9+ for selecting SIMD4x2 - * - Texel offsets - * - Gather channel selection - * - Sampler indices too large to fit in a 4-bit value. - */ - inst->header_size = - (devinfo->gen < 5 || devinfo->gen >= 9 || - inst->offset != 0 || ir->op == ir_tg4 || - is_high_sampler(devinfo, sampler_reg)) ? 1 : 0; - inst->base_mrf = 2; - inst->mlen = inst->header_size + 1; /* always at least one */ - inst->dst.writemask = WRITEMASK_XYZW; - inst->shadow_compare = ir->shadow_comparitor != NULL; - - inst->src[1] = sampler_reg; - - /* MRF for the first parameter */ - int param_base = inst->base_mrf + inst->header_size; - - if (ir->op == ir_txs || ir->op == ir_query_levels) { - int writemask = devinfo->gen == 4 ? WRITEMASK_W : WRITEMASK_X; - emit(MOV(dst_reg(MRF, param_base, lod_type, writemask), lod)); - } else { - /* Load the coordinate */ - /* FINISHME: gl_clamp_mask and saturate */ - int coord_mask = (1 << ir->coordinate->type->vector_elements) - 1; - int zero_mask = 0xf & ~coord_mask; - - emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask), - coordinate)); - - if (zero_mask != 0) { - emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask), - src_reg(0))); - } - /* Load the shadow comparitor */ - if (ir->shadow_comparitor && ir->op != ir_txd && (ir->op != ir_tg4 || !has_nonconstant_offset)) { - emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type, - WRITEMASK_X), - shadow_comparitor)); - inst->mlen++; - } + constant_offset |= + gather_channel( ir->lod_info.component->as_constant()->value.i[0], + sampler) << 16; - /* Load the LOD info */ - if (ir->op == ir_tex || ir->op == ir_txl) { - int mrf, writemask; - if (devinfo->gen >= 5) { - mrf = param_base + 1; - if (ir->shadow_comparitor) { - writemask = WRITEMASK_Y; - /* mlen already incremented */ - } else { - writemask = WRITEMASK_X; - inst->mlen++; - } - } else /* devinfo->gen == 4 */ { - mrf = param_base; - writemask = WRITEMASK_W; - } - emit(MOV(dst_reg(MRF, mrf, lod_type, writemask), lod)); - } else if (ir->op == ir_txf) { - emit(MOV(dst_reg(MRF, param_base, lod_type, WRITEMASK_W), lod)); - } else if (ir->op == ir_txf_ms) { - emit(MOV(dst_reg(MRF, param_base + 1, sample_index_type, WRITEMASK_X), - sample_index)); - if (devinfo->gen >= 7) { - /* MCS data is in the first channel of `mcs`, but we need to get it into - * the .y channel of the second vec4 of params, so replicate .x across - * the whole vec4 and then mask off everything except .y - */ - mcs.swizzle = BRW_SWIZZLE_XXXX; - emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::uint_type, WRITEMASK_Y), - mcs)); - } - inst->mlen++; - } else if (ir->op == ir_txd) { - const glsl_type *type = lod_type; + glsl_type const *type = ir->sampler->type; + bool is_cube_array = type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE && + type->sampler_array; - if (devinfo->gen >= 5) { - dPdx.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y); - dPdy.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y); - emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), dPdx)); - emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), dPdy)); - inst->mlen++; - - if (ir->type->vector_elements == 3 || ir->shadow_comparitor) { - dPdx.swizzle = BRW_SWIZZLE_ZZZZ; - dPdy.swizzle = BRW_SWIZZLE_ZZZZ; - emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), dPdx)); - emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), dPdy)); - inst->mlen++; - - if (ir->shadow_comparitor) { - emit(MOV(dst_reg(MRF, param_base + 2, - ir->shadow_comparitor->type, WRITEMASK_Z), - shadow_comparitor)); - } - } - } else /* devinfo->gen == 4 */ { - emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), dPdx)); - emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), dPdy)); - inst->mlen += 2; - } - } else if (ir->op == ir_tg4 && has_nonconstant_offset) { - if (ir->shadow_comparitor) { - emit(MOV(dst_reg(MRF, param_base, ir->shadow_comparitor->type, WRITEMASK_W), - shadow_comparitor)); - } - - emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::ivec2_type, WRITEMASK_XY), - offset_value)); - inst->mlen++; - } - } - - emit(inst); - - /* fixup num layers (z) for cube arrays: hardware returns faces * layers; - * spec requires layers. - */ - if (ir->op == ir_txs) { - glsl_type const *type = ir->sampler->type; - if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE && - type->sampler_array) { - emit_math(SHADER_OPCODE_INT_QUOTIENT, - writemask(inst->dst, WRITEMASK_Z), - src_reg(inst->dst), src_reg(6)); - } - } - - if (devinfo->gen == 6 && ir->op == ir_tg4) { - emit_gen6_gather_wa(key->tex.gen6_gather_wa[sampler], inst->dst); - } + this->result = src_reg(this, ir->type); + dst_reg dest = dst_reg(this->result); - swizzle_result(ir, src_reg(inst->dst), sampler); + emit_texture(ir->op, dest, ir->type, coordinate, coord_components, + shadow_comparitor, + lod, lod2, sample_index, + constant_offset, offset_value, + mcs, is_cube_array, sampler, sampler_reg); } /** @@ -2835,10 +2890,9 @@ vec4_visitor::emit_gen6_gather_wa(uint8_t wa, dst_reg dst) * Set up the gather channel based on the swizzle, for gather4. */ uint32_t -vec4_visitor::gather_channel(ir_texture *ir, uint32_t sampler) +vec4_visitor::gather_channel(unsigned gather_component, uint32_t sampler) { - ir_constant *chan = ir->lod_info.component->as_constant(); - int swiz = GET_SWZ(key->tex.swizzles[sampler], chan->value.i[0]); + int swiz = GET_SWZ(key->tex.swizzles[sampler], gather_component); switch (swiz) { case SWIZZLE_X: return 0; case SWIZZLE_Y: @@ -2856,22 +2910,23 @@ vec4_visitor::gather_channel(ir_texture *ir, uint32_t sampler) } void -vec4_visitor::swizzle_result(ir_texture *ir, src_reg orig_val, uint32_t sampler) +vec4_visitor::swizzle_result(ir_texture_opcode op, dst_reg dest, + src_reg orig_val, uint32_t sampler, + const glsl_type *dest_type) { int s = key->tex.swizzles[sampler]; - this->result = src_reg(this, ir->type); - dst_reg swizzled_result(this->result); + dst_reg swizzled_result = dest; - if (ir->op == ir_query_levels) { + if (op == ir_query_levels) { /* # levels is in .w */ orig_val.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); emit(MOV(swizzled_result, orig_val)); return; } - if (ir->op == ir_txs || ir->type == glsl_type::float_type - || s == SWIZZLE_NOOP || ir->op == ir_tg4) { + if (op == ir_txs || dest_type == glsl_type::float_type + || s == SWIZZLE_NOOP || op == ir_tg4) { emit(MOV(swizzled_result, orig_val)); return; } @@ -2954,12 +3009,25 @@ vec4_visitor::visit(ir_if *ir) } void +vec4_visitor::gs_emit_vertex(int stream_id) +{ + unreachable("not reached"); +} + +void vec4_visitor::visit(ir_emit_vertex *) { unreachable("not reached"); } void +vec4_visitor::gs_end_primitive() +{ + unreachable("not reached"); +} + + +void vec4_visitor::visit(ir_end_primitive *) { unreachable("not reached"); @@ -3094,6 +3162,7 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg) vec4_instruction *inst; inst = emit(OR(header1_w, src_reg(header1_w), src_reg(1u << 6))); inst->predicate = BRW_PREDICATE_NORMAL; + output_reg[BRW_VARYING_SLOT_NDC].type = BRW_REGISTER_TYPE_F; inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], src_reg(0.0f))); inst->predicate = BRW_PREDICATE_NORMAL; } @@ -3106,18 +3175,23 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg) if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) { dst_reg reg_w = reg; reg_w.writemask = WRITEMASK_W; - emit(MOV(reg_w, src_reg(output_reg[VARYING_SLOT_PSIZ]))); + src_reg reg_as_src = src_reg(output_reg[VARYING_SLOT_PSIZ]); + reg_as_src.type = reg_w.type; + reg_as_src.swizzle = brw_swizzle_for_size(1); + emit(MOV(reg_w, reg_as_src)); } if (prog_data->vue_map.slots_valid & VARYING_BIT_LAYER) { dst_reg reg_y = reg; reg_y.writemask = WRITEMASK_Y; reg_y.type = BRW_REGISTER_TYPE_D; + output_reg[VARYING_SLOT_LAYER].type = reg_y.type; emit(MOV(reg_y, src_reg(output_reg[VARYING_SLOT_LAYER]))); } if (prog_data->vue_map.slots_valid & VARYING_BIT_VIEWPORT) { dst_reg reg_z = reg; reg_z.writemask = WRITEMASK_Z; reg_z.type = BRW_REGISTER_TYPE_D; + output_reg[VARYING_SLOT_VIEWPORT].type = reg_z.type; emit(MOV(reg_z, src_reg(output_reg[VARYING_SLOT_VIEWPORT]))); } } @@ -3155,8 +3229,8 @@ vec4_visitor::emit_clip_distances(dst_reg reg, int offset) vec4_instruction * vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying) { - assert (varying < VARYING_SLOT_MAX); - reg.type = output_reg[varying].type; + assert(varying < VARYING_SLOT_MAX); + assert(output_reg[varying].type == reg.type); current_annotation = output_reg_annotation[varying]; /* Copy the register, saturating if necessary */ return emit(MOV(reg, src_reg(output_reg[varying]))); @@ -3166,6 +3240,7 @@ void vec4_visitor::emit_urb_slot(dst_reg reg, int varying) { reg.type = BRW_REGISTER_TYPE_F; + output_reg[varying].type = reg.type; switch (varying) { case VARYING_SLOT_PSIZ: @@ -3422,7 +3497,8 @@ vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst, dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0), inst->dst.writemask)); vec4_instruction *write = SCRATCH_WRITE(dst, temp, index); - write->predicate = inst->predicate; + if (inst->opcode != BRW_OPCODE_SEL) + write->predicate = inst->predicate; write->ir = inst->ir; write->annotation = inst->annotation; inst->insert_after(block, write); @@ -3485,16 +3561,16 @@ vec4_visitor::move_grf_array_access_to_scratch() foreach_block_and_inst(block, vec4_instruction, inst, cfg) { if (inst->dst.file == GRF && inst->dst.reladdr) { if (scratch_loc[inst->dst.reg] == -1) { - scratch_loc[inst->dst.reg] = c->last_scratch; - c->last_scratch += this->alloc.sizes[inst->dst.reg]; + scratch_loc[inst->dst.reg] = last_scratch; + last_scratch += this->alloc.sizes[inst->dst.reg]; } for (src_reg *iter = inst->dst.reladdr; iter->reladdr; iter = iter->reladdr) { if (iter->file == GRF && scratch_loc[iter->reg] == -1) { - scratch_loc[iter->reg] = c->last_scratch; - c->last_scratch += this->alloc.sizes[iter->reg]; + scratch_loc[iter->reg] = last_scratch; + last_scratch += this->alloc.sizes[iter->reg]; } } } @@ -3504,8 +3580,8 @@ vec4_visitor::move_grf_array_access_to_scratch() iter->reladdr; iter = iter->reladdr) { if (iter->file == GRF && scratch_loc[iter->reg] == -1) { - scratch_loc[iter->reg] = c->last_scratch; - c->last_scratch += this->alloc.sizes[iter->reg]; + scratch_loc[iter->reg] = last_scratch; + last_scratch += this->alloc.sizes[iter->reg]; } } } @@ -3679,7 +3755,7 @@ vec4_visitor::resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg) } vec4_visitor::vec4_visitor(const struct brw_compiler *compiler, - struct brw_vec4_compile *c, + void *log_data, struct gl_program *prog, const struct brw_vue_prog_key *key, struct brw_vue_prog_data *prog_data, @@ -3688,9 +3764,8 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler, void *mem_ctx, bool no_spills, int shader_time_index) - : backend_shader(compiler, NULL, mem_ctx, + : backend_shader(compiler, log_data, mem_ctx, shader_prog, prog, &prog_data->base, stage), - c(c), key(key), prog_data(prog_data), sanity_param_count(0), @@ -3698,7 +3773,8 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler, first_non_payload_grf(0), need_all_constants_in_pull_buffer(false), no_spills(no_spills), - shader_time_index(shader_time_index) + shader_time_index(shader_time_index), + last_scratch(0) { this->failed = false; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp index dcbd2405078..d1a72d787e7 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp @@ -394,8 +394,7 @@ vec4_vs_visitor::emit_program_code() * pull constants. Do that now. */ if (this->need_all_constants_in_pull_buffer) { - const struct gl_program_parameter_list *params = - vs_compile->vp->program.Base.Parameters; + const struct gl_program_parameter_list *params = vp->Base.Parameters; unsigned i; for (i = 0; i < params->NumParameters * 4; i++) { stage_prog_data->pull_param[i] = @@ -415,8 +414,7 @@ vec4_vs_visitor::setup_vp_regs() vp_temp_regs[i] = src_reg(this, glsl_type::vec4_type); /* PROGRAM_STATE_VAR etc. */ - struct gl_program_parameter_list *plist = - vs_compile->vp->program.Base.Parameters; + struct gl_program_parameter_list *plist = vp->Base.Parameters; for (unsigned p = 0; p < plist->NumParameters; p++) { unsigned components = plist->Parameters[p].Size; @@ -486,8 +484,7 @@ vec4_vs_visitor::get_vp_dst_reg(const prog_dst_register &dst) src_reg vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src) { - struct gl_program_parameter_list *plist = - vs_compile->vp->program.Base.Parameters; + struct gl_program_parameter_list *plist = vp->Base.Parameters; src_reg result; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp index f93062b46d0..620f652d6dc 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp @@ -36,7 +36,7 @@ vec4_vs_visitor::emit_prolog() for (int i = 0; i < VERT_ATTRIB_MAX; i++) { if (vs_prog_data->inputs_read & BITFIELD64_BIT(i)) { - uint8_t wa_flags = vs_compile->key.gl_attrib_wa_flags[i]; + uint8_t wa_flags = key->gl_attrib_wa_flags[i]; dst_reg reg(ATTR, i); dst_reg reg_d = reg; reg_d.type = BRW_REGISTER_TYPE_D; @@ -143,7 +143,8 @@ vec4_vs_visitor::emit_prolog() dst_reg * -vec4_vs_visitor::make_reg_for_system_value(ir_variable *ir) +vec4_vs_visitor::make_reg_for_system_value(int location, + const glsl_type *type) { /* VertexID is stored by the VF as the last vertex element, but * we don't represent it with a flag in inputs_read, so we call @@ -151,7 +152,7 @@ vec4_vs_visitor::make_reg_for_system_value(ir_variable *ir) */ dst_reg *reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX); - switch (ir->data.location) { + switch (location) { case SYSTEM_VALUE_BASE_VERTEX: reg->writemask = WRITEMASK_X; vs_prog_data->uses_vertexid = true; @@ -212,19 +213,22 @@ vec4_vs_visitor::emit_thread_end() vec4_vs_visitor::vec4_vs_visitor(const struct brw_compiler *compiler, - struct brw_vs_compile *vs_compile, + void *log_data, + const struct brw_vs_prog_key *key, struct brw_vs_prog_data *vs_prog_data, + struct gl_vertex_program *vp, struct gl_shader_program *prog, void *mem_ctx, int shader_time_index, bool use_legacy_snorm_formula) - : vec4_visitor(compiler, &vs_compile->base, &vs_compile->vp->program.Base, - &vs_compile->key.base, &vs_prog_data->base, prog, + : vec4_visitor(compiler, log_data, + &vp->Base, &key->base, &vs_prog_data->base, prog, MESA_SHADER_VERTEX, mem_ctx, false /* no_spills */, shader_time_index), - vs_compile(vs_compile), + key(key), vs_prog_data(vs_prog_data), + vp(vp), use_legacy_snorm_formula(use_legacy_snorm_formula) { } diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 6e9848fb1e9..c53cb49b612 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -94,7 +94,6 @@ brw_codegen_vs_prog(struct brw_context *brw, { GLuint program_size; const GLuint *program; - struct brw_vs_compile c; struct brw_vs_prog_data prog_data; struct brw_stage_prog_data *stage_prog_data = &prog_data.base.base; void *mem_ctx; @@ -104,8 +103,6 @@ brw_codegen_vs_prog(struct brw_context *brw, if (prog) vs = prog->_LinkedShaders[MESA_SHADER_VERTEX]; - memset(&c, 0, sizeof(c)); - memcpy(&c.key, key, sizeof(*key)); memset(&prog_data, 0, sizeof(prog_data)); /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */ @@ -114,8 +111,6 @@ brw_codegen_vs_prog(struct brw_context *brw, mem_ctx = ralloc_context(NULL); - c.vp = vp; - /* Allocate the references to the uniforms that will end up in the * prog_data associated with the compiled program, and which will be freed * by the state cache. @@ -126,26 +121,30 @@ brw_codegen_vs_prog(struct brw_context *brw, * case being a float value that gets blown up to a vec4, so be * conservative here. */ - param_count = vs->num_uniform_components * 4; - + param_count = vs->num_uniform_components * 4 + + vs->NumImages * BRW_IMAGE_PARAM_SIZE; + stage_prog_data->nr_image_params = vs->NumImages; } else { param_count = vp->program.Base.Parameters->NumParameters * 4; } /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip * planes as uniforms. */ - param_count += c.key.base.nr_userclip_plane_consts * 4; + param_count += key->base.nr_userclip_plane_consts * 4; stage_prog_data->param = rzalloc_array(NULL, const gl_constant_value *, param_count); stage_prog_data->pull_param = rzalloc_array(NULL, const gl_constant_value *, param_count); + stage_prog_data->image_param = + rzalloc_array(NULL, struct brw_image_param, + stage_prog_data->nr_image_params); stage_prog_data->nr_params = param_count; GLbitfield64 outputs_written = vp->program.Base.OutputsWritten; prog_data.inputs_read = vp->program.Base.InputsRead; - if (c.key.copy_edgeflag) { + if (key->copy_edgeflag) { outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE); prog_data.inputs_read |= VERT_BIT_EDGEFLAG; } @@ -158,7 +157,7 @@ brw_codegen_vs_prog(struct brw_context *brw, * coords, which would be a pain to handle. */ for (i = 0; i < 8; i++) { - if (c.key.point_coord_replace & (1 << i)) + if (key->point_coord_replace & (1 << i)) outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i); } @@ -173,7 +172,7 @@ brw_codegen_vs_prog(struct brw_context *brw, * distance varying slots whenever clipping is enabled, even if the vertex * shader doesn't write to gl_ClipDistance. */ - if (c.key.base.userclip_active) { + if (key->base.userclip_active) { outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0); outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1); } @@ -182,34 +181,28 @@ brw_codegen_vs_prog(struct brw_context *brw, &prog_data.base.vue_map, outputs_written); if (0) { - _mesa_fprint_program_opt(stderr, &c.vp->program.Base, PROG_PRINT_DEBUG, + _mesa_fprint_program_opt(stderr, &vp->program.Base, PROG_PRINT_DEBUG, true); } /* Emit GEN4 code. */ - program = brw_vs_emit(brw, prog, &c, &prog_data, mem_ctx, &program_size); + program = brw_vs_emit(brw, mem_ctx, key, &prog_data, + &vp->program, prog, &program_size); if (program == NULL) { ralloc_free(mem_ctx); return false; } /* Scratch space is used for register spilling */ - if (c.base.last_scratch) { - perf_debug("Vertex shader triggered register spilling. " - "Try reducing the number of live vec4 values to " - "improve performance.\n"); - - prog_data.base.base.total_scratch - = brw_get_scratch_size(c.base.last_scratch*REG_SIZE); - + if (prog_data.base.base.total_scratch) { brw_get_scratch_bo(brw, &brw->vs.base.scratch_bo, prog_data.base.base.total_scratch * brw->max_vs_threads); } brw_upload_cache(&brw->cache, BRW_CACHE_VS_PROG, - &c.key, sizeof(c.key), + key, sizeof(struct brw_vs_prog_key), program, program_size, &prog_data, sizeof(prog_data), &brw->vs.base.prog_offset, &brw->vs.prog_data); diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 61f9b006a58..1d9bee11c56 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -50,22 +50,16 @@ #define BRW_ATTRIB_WA_SIGN 32 /* interpret as signed in shader */ #define BRW_ATTRIB_WA_SCALE 64 /* interpret as scaled in shader */ -struct brw_vs_compile { - struct brw_vec4_compile base; - struct brw_vs_prog_key key; - - struct brw_vertex_program *vp; -}; - #ifdef __cplusplus extern "C" { #endif const unsigned *brw_vs_emit(struct brw_context *brw, - struct gl_shader_program *prog, - struct brw_vs_compile *c, - struct brw_vs_prog_data *prog_data, void *mem_ctx, + const struct brw_vs_prog_key *key, + struct brw_vs_prog_data *prog_data, + struct gl_vertex_program *vp, + struct gl_shader_program *shader_prog, unsigned *program_size); void brw_vs_debug_recompile(struct brw_context *brw, struct gl_shader_program *prog, @@ -91,15 +85,18 @@ class vec4_vs_visitor : public vec4_visitor { public: vec4_vs_visitor(const struct brw_compiler *compiler, - struct brw_vs_compile *vs_compile, + void *log_data, + const struct brw_vs_prog_key *key, struct brw_vs_prog_data *vs_prog_data, + struct gl_vertex_program *vp, struct gl_shader_program *prog, void *mem_ctx, int shader_time_index, bool use_legacy_snorm_formula); protected: - virtual dst_reg *make_reg_for_system_value(ir_variable *ir); + virtual dst_reg *make_reg_for_system_value(int location, + const glsl_type *type); virtual void setup_payload(); virtual void emit_prolog(); virtual void emit_program_code(); @@ -113,8 +110,9 @@ private: dst_reg get_vp_dst_reg(const prog_dst_register &dst); src_reg get_vp_src_reg(const prog_src_register &src); - struct brw_vs_compile * const vs_compile; + const struct brw_vs_prog_key *const key; struct brw_vs_prog_data * const vs_prog_data; + struct gl_vertex_program *const vp; src_reg *vp_temp_regs; src_reg vp_addr_reg; diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index b2f91bd412b..72e37d4b467 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -191,3 +191,28 @@ const struct brw_tracked_state brw_vs_abo_surfaces = { }, .emit = brw_upload_vs_abo_surfaces, }; + +static void +brw_upload_vs_image_surfaces(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->ctx; + /* BRW_NEW_VERTEX_PROGRAM */ + struct gl_shader_program *prog = + ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]; + + if (prog) { + /* BRW_NEW_VS_PROG_DATA, BRW_NEW_IMAGE_UNITS */ + brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_VERTEX], + &brw->vs.base, &brw->vs.prog_data->base.base); + } +} + +const struct brw_tracked_state brw_vs_image_surfaces = { + .dirty = { + .brw = BRW_NEW_BATCH | + BRW_NEW_IMAGE_UNITS | + BRW_NEW_VERTEX_PROGRAM | + BRW_NEW_VS_PROG_DATA, + }, + .emit = brw_upload_vs_image_surfaces, +}; diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 4619ce1080d..41266f57560 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -1,34 +1,28 @@ /* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <[email protected]> - */ - + * Copyright (C) Intel Corp. 2006. All Rights Reserved. + * Intel funded Tungsten Graphics to + * develop this 3D driver. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ #include "brw_context.h" #include "brw_wm.h" #include "brw_state.h" @@ -181,9 +175,12 @@ brw_codegen_wm_prog(struct brw_context *brw, * so the shader definitely kills pixels. */ prog_data.uses_kill = fp->program.UsesKill || key->alpha_test_func; - + prog_data.uses_omask = + fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK); prog_data.computed_depth_mode = computed_depth_mode(&fp->program); + prog_data.early_fragment_tests = fs && fs->EarlyFragmentTests; + /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */ if (!prog) prog_data.base.use_alt_mode = true; @@ -194,7 +191,9 @@ brw_codegen_wm_prog(struct brw_context *brw, */ int param_count; if (fs) { - param_count = fs->num_uniform_components; + param_count = fs->num_uniform_components + + fs->NumImages * BRW_IMAGE_PARAM_SIZE; + prog_data.base.nr_image_params = fs->NumImages; } else { param_count = fp->program.Base.Parameters->NumParameters * 4; } @@ -204,6 +203,9 @@ brw_codegen_wm_prog(struct brw_context *brw, rzalloc_array(NULL, const gl_constant_value *, param_count); prog_data.base.pull_param = rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data.base.image_param = + rzalloc_array(NULL, struct brw_image_param, + prog_data.base.nr_image_params); prog_data.base.nr_params = param_count; prog_data.barycentric_interp_modes = @@ -349,13 +351,15 @@ static uint8_t gen6_gather_workaround(GLenum internalformat) { switch (internalformat) { - case GL_R8I: return WA_SIGN | WA_8BIT; - case GL_R8UI: return WA_8BIT; - case GL_R16I: return WA_SIGN | WA_16BIT; - case GL_R16UI: return WA_16BIT; - /* note that even though GL_R32I and GL_R32UI have format overrides - * in the surface state, there is no shader w/a required */ - default: return 0; + case GL_R8I: return WA_SIGN | WA_8BIT; + case GL_R8UI: return WA_8BIT; + case GL_R16I: return WA_SIGN | WA_16BIT; + case GL_R16UI: return WA_16BIT; + default: + /* Note that even though GL_R32I and GL_R32UI have format overrides in + * the surface state, there is no shader w/a required. + */ + return 0; } } @@ -402,8 +406,9 @@ brw_populate_sampler_prog_key_data(struct gl_context *ctx, key->gl_clamp_mask[2] |= 1 << s; } - /* gather4's channel select for green from RG32F is broken; - * requires a shader w/a on IVB; fixable with just SCS on HSW. */ + /* gather4's channel select for green from RG32F is broken; requires + * a shader w/a on IVB; fixable with just SCS on HSW. + */ if (brw->gen == 7 && !brw->is_haswell && prog->UsesGather) { if (img->InternalFormat == GL_RG32F) key->gather_channel_quirk_mask |= 1 << s; @@ -452,13 +457,13 @@ brw_wm_state_dirty (struct brw_context *brw) BRW_NEW_VUE_MAP_GEOM_OUT); } -static void brw_wm_populate_key( struct brw_context *brw, - struct brw_wm_prog_key *key ) +static void +brw_wm_populate_key(struct brw_context *brw, struct brw_wm_prog_key *key) { struct gl_context *ctx = &brw->ctx; /* BRW_NEW_FRAGMENT_PROGRAM */ const struct brw_fragment_program *fp = - (struct brw_fragment_program *)brw->fragment_program; + (struct brw_fragment_program *) brw->fragment_program; const struct gl_program *prog = (struct gl_program *) brw->fragment_program; GLuint lookup = 0; GLuint line_aa; @@ -604,7 +609,8 @@ static void brw_wm_populate_key( struct brw_context *brw, * like GL requires. Fix that by building the alpha test into the * shader, and we'll skip enabling the fixed function alpha test. */ - if (brw->gen < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 && ctx->Color.AlphaEnabled) { + if (brw->gen < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 && + ctx->Color.AlphaEnabled) { key->alpha_test_func = ctx->Color.AlphaFunc; key->alpha_test_ref = ctx->Color.AlphaRef; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 72aad96bb6a..f13a97ce2b0 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -1024,6 +1024,257 @@ const struct brw_tracked_state brw_cs_abo_surfaces = { .emit = brw_upload_cs_abo_surfaces, }; +static void +brw_upload_cs_image_surfaces(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->ctx; + /* _NEW_PROGRAM */ + struct gl_shader_program *prog = + ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; + + if (prog) { + /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS */ + brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE], + &brw->cs.base, &brw->cs.prog_data->base); + } +} + +const struct brw_tracked_state brw_cs_image_surfaces = { + .dirty = { + .mesa = _NEW_PROGRAM, + .brw = BRW_NEW_BATCH | + BRW_NEW_CS_PROG_DATA | + BRW_NEW_IMAGE_UNITS + }, + .emit = brw_upload_cs_image_surfaces, +}; + +static uint32_t +get_image_format(struct brw_context *brw, mesa_format format, GLenum access) +{ + if (access == GL_WRITE_ONLY) { + return brw_format_for_mesa_format(format); + } else { + /* Typed surface reads support a very limited subset of the shader + * image formats. Translate it into the closest format the + * hardware supports. + */ + if ((_mesa_get_format_bytes(format) >= 16 && brw->gen <= 8) || + (_mesa_get_format_bytes(format) >= 8 && + (brw->gen == 7 && !brw->is_haswell))) + return BRW_SURFACEFORMAT_RAW; + else + return brw_format_for_mesa_format( + brw_lower_mesa_image_format(brw->intelScreen->devinfo, format)); + } +} + +static void +update_default_image_param(struct brw_context *brw, + struct gl_image_unit *u, + unsigned surface_idx, + struct brw_image_param *param) +{ + memset(param, 0, sizeof(*param)); + param->surface_idx = surface_idx; + /* Set the swizzling shifts to all-ones to effectively disable swizzling -- + * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more + * detailed explanation of these parameters. + */ + param->swizzling[0] = 0xff; + param->swizzling[1] = 0xff; +} + +static void +update_buffer_image_param(struct brw_context *brw, + struct gl_image_unit *u, + unsigned surface_idx, + struct brw_image_param *param) +{ + struct gl_buffer_object *obj = u->TexObj->BufferObject; + + update_default_image_param(brw, u, surface_idx, param); + + param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat); + param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat); +} + +static void +update_texture_image_param(struct brw_context *brw, + struct gl_image_unit *u, + unsigned surface_idx, + struct brw_image_param *param) +{ + struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt; + + update_default_image_param(brw, u, surface_idx, param); + + param->size[0] = minify(mt->logical_width0, u->Level); + param->size[1] = minify(mt->logical_height0, u->Level); + param->size[2] = (!u->Layered ? 1 : + u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 : + u->TexObj->Target == GL_TEXTURE_3D ? + minify(mt->logical_depth0, u->Level) : + mt->logical_depth0); + + intel_miptree_get_image_offset(mt, u->Level, u->Layer, + ¶m->offset[0], + ¶m->offset[1]); + + param->stride[0] = mt->cpp; + param->stride[1] = mt->pitch / mt->cpp; + param->stride[2] = + brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level); + param->stride[3] = + brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level); + + if (mt->tiling == I915_TILING_X) { + /* An X tile is a rectangular block of 512x8 bytes. */ + param->tiling[0] = _mesa_logbase2(512 / mt->cpp); + param->tiling[1] = _mesa_logbase2(8); + + if (brw->has_swizzling) { + /* Right shifts required to swizzle bits 9 and 10 of the memory + * address with bit 6. + */ + param->swizzling[0] = 3; + param->swizzling[1] = 4; + } + } else if (mt->tiling == I915_TILING_Y) { + /* The layout of a Y-tiled surface in memory isn't really fundamentally + * different to the layout of an X-tiled surface, we simply pretend that + * the surface is broken up in a number of smaller 16Bx32 tiles, each + * one arranged in X-major order just like is the case for X-tiling. + */ + param->tiling[0] = _mesa_logbase2(16 / mt->cpp); + param->tiling[1] = _mesa_logbase2(32); + + if (brw->has_swizzling) { + /* Right shift required to swizzle bit 9 of the memory address with + * bit 6. + */ + param->swizzling[0] = 3; + } + } + + /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The + * address calculation algorithm (emit_address_calculation() in + * brw_fs_surface_builder.cpp) handles this as a sort of tiling with + * modulus equal to the LOD. + */ + param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level : + 0); +} + +static void +update_image_surface(struct brw_context *brw, + struct gl_image_unit *u, + GLenum access, + unsigned surface_idx, + uint32_t *surf_offset, + struct brw_image_param *param) +{ + if (u->_Valid) { + struct gl_texture_object *obj = u->TexObj; + const unsigned format = get_image_format(brw, u->_ActualFormat, access); + + if (obj->Target == GL_TEXTURE_BUFFER) { + struct intel_buffer_object *intel_obj = + intel_buffer_object(obj->BufferObject); + const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 : + _mesa_get_format_bytes(u->_ActualFormat)); + + brw->vtbl.emit_buffer_surface_state( + brw, surf_offset, intel_obj->buffer, obj->BufferOffset, + format, intel_obj->Base.Size / texel_size, texel_size, + access != GL_READ_ONLY); + + update_buffer_image_param(brw, u, surface_idx, param); + + } else { + struct intel_texture_object *intel_obj = intel_texture_object(obj); + struct intel_mipmap_tree *mt = intel_obj->mt; + + if (format == BRW_SURFACEFORMAT_RAW) { + brw->vtbl.emit_buffer_surface_state( + brw, surf_offset, mt->bo, mt->offset, + format, mt->bo->size - mt->offset, 1 /* pitch */, + access != GL_READ_ONLY); + + } else { + const unsigned min_layer = obj->MinLayer + u->Layer; + const unsigned min_level = obj->MinLevel + u->Level; + const unsigned num_layers = (!u->Layered ? 1 : + obj->Target == GL_TEXTURE_CUBE_MAP ? 6 : + mt->logical_depth0); + const GLenum target = (obj->Target == GL_TEXTURE_CUBE_MAP || + obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY ? + GL_TEXTURE_2D_ARRAY : obj->Target); + + brw->vtbl.emit_texture_surface_state( + brw, mt, target, + min_layer, min_layer + num_layers, + min_level, min_level + 1, + format, SWIZZLE_XYZW, + surf_offset, access != GL_READ_ONLY, false); + } + + update_texture_image_param(brw, u, surface_idx, param); + } + + } else { + brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset); + update_default_image_param(brw, u, surface_idx, param); + } +} + +void +brw_upload_image_surfaces(struct brw_context *brw, + struct gl_shader *shader, + struct brw_stage_state *stage_state, + struct brw_stage_prog_data *prog_data) +{ + struct gl_context *ctx = &brw->ctx; + + if (shader && shader->NumImages) { + for (unsigned i = 0; i < shader->NumImages; i++) { + struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]]; + const unsigned surf_idx = prog_data->binding_table.image_start + i; + + update_image_surface(brw, u, shader->ImageAccess[i], + surf_idx, + &stage_state->surf_offset[surf_idx], + &prog_data->image_param[i]); + } + + brw->ctx.NewDriverState |= BRW_NEW_SURFACES; + } +} + +static void +brw_upload_wm_image_surfaces(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->ctx; + /* BRW_NEW_FRAGMENT_PROGRAM */ + struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram; + + if (prog) { + /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS */ + brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT], + &brw->wm.base, &brw->wm.prog_data->base); + } +} + +const struct brw_tracked_state brw_wm_image_surfaces = { + .dirty = { + .brw = BRW_NEW_BATCH | + BRW_NEW_FRAGMENT_PROGRAM | + BRW_NEW_FS_PROG_DATA | + BRW_NEW_IMAGE_UNITS + }, + .emit = brw_upload_wm_image_surfaces, +}; + void gen4_init_vtable_surface_functions(struct brw_context *brw) { diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp index b6a3d78d849..54c4a6dfdd8 100644 --- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp @@ -821,7 +821,7 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context *brw, /* 3DSTATE_DEPTH_BUFFER */ { - intel_emit_depth_stall_flushes(brw); + brw_emit_depth_stall_flushes(brw); BEGIN_BATCH(7); /* 3DSTATE_DEPTH_BUFFER dw0 */ @@ -896,7 +896,7 @@ static void gen6_blorp_emit_depth_disable(struct brw_context *brw, const brw_blorp_params *params) { - intel_emit_depth_stall_flushes(brw); + brw_emit_depth_stall_flushes(brw); BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); @@ -1021,7 +1021,7 @@ gen6_blorp_exec(struct brw_context *brw, uint32_t prog_offset = params->get_wm_prog(brw, &prog_data); /* Emit workaround flushes when we switch from drawing to blorping. */ - intel_emit_post_sync_nonzero_flush(brw); + brw_emit_post_sync_nonzero_flush(brw); gen6_emit_3dstate_multisample(brw, params->dst.num_samples); gen6_emit_3dstate_sample_mask(brw, diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c index 2bfa271b527..3bab8f46ae8 100644 --- a/src/mesa/drivers/dri/i965/gen6_cc.c +++ b/src/mesa/drivers/dri/i965/gen6_cc.c @@ -51,7 +51,7 @@ gen6_upload_blend_state(struct brw_context *brw) * with render target 0, which will reference BLEND_STATE[0] for * alpha test enable. */ - if (nr_draw_buffers == 0 && ctx->Color.AlphaEnabled) + if (nr_draw_buffers == 0) nr_draw_buffers = 1; size = sizeof(*blend) * nr_draw_buffers; @@ -97,8 +97,8 @@ gen6_upload_blend_state(struct brw_context *brw) rb_type != GL_UNSIGNED_NORMALIZED && rb_type != GL_FLOAT, "Ignoring %s logic op on %s " "renderbuffer\n", - _mesa_lookup_enum_by_nr(ctx->Color.LogicOp), - _mesa_lookup_enum_by_nr(rb_type)); + _mesa_enum_to_string(ctx->Color.LogicOp), + _mesa_enum_to_string(rb_type)); if (rb_type == GL_UNSIGNED_NORMALIZED) { blend[b].blend1.logic_op_enable = 1; blend[b].blend1.logic_op_func = diff --git a/src/mesa/drivers/dri/i965/gen6_depth_state.c b/src/mesa/drivers/dri/i965/gen6_depth_state.c index 1df0bd47571..febd4781100 100644 --- a/src/mesa/drivers/dri/i965/gen6_depth_state.c +++ b/src/mesa/drivers/dri/i965/gen6_depth_state.c @@ -65,7 +65,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw, */ bool enable_hiz_ss = hiz || separate_stencil; - intel_emit_depth_stall_flushes(brw); + brw_emit_depth_stall_flushes(brw); irb = intel_get_renderbuffer(fb, BUFFER_DEPTH); if (!irb) @@ -73,7 +73,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw, rb = (struct gl_renderbuffer*) irb; if (rb) { - depth = MAX2(rb->Depth, 1); + depth = MAX2(irb->layer_count, 1); if (rb->TexImage) gl_target = rb->TexImage->TexObject->Target; } @@ -89,6 +89,10 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw, surftype = BRW_SURFACE_2D; depth *= 6; break; + case GL_TEXTURE_3D: + assert(mt); + depth = MAX2(mt->logical_depth0, 1); + /* fallthrough */ default: surftype = translate_tex_target(gl_target); break; diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp index 782687aac57..68e443d38a5 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp @@ -147,7 +147,12 @@ gen6_gs_visitor::emit_prolog() } void -gen6_gs_visitor::visit(ir_emit_vertex *) +gen6_gs_visitor::visit(ir_emit_vertex *ir) +{ + gs_emit_vertex(ir->stream_id()); +} +void +gen6_gs_visitor::gs_emit_vertex(int stream_id) { this->current_annotation = "gen6 emit vertex"; /* Honor max_vertex layout indication in geometry shader by ignoring any @@ -224,6 +229,12 @@ gen6_gs_visitor::visit(ir_emit_vertex *) void gen6_gs_visitor::visit(ir_end_primitive *) { + gs_end_primitive(); +} + +void +gen6_gs_visitor::gs_end_primitive() +{ this->current_annotation = "gen6 end primitive"; /* Calling EndPrimitive() is optional for point output. In this case we set * the PrimEnd flag when we process EmitVertex(). diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h index 27254ebb727..4cf94893261 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h @@ -36,12 +36,14 @@ class gen6_gs_visitor : public vec4_gs_visitor { public: gen6_gs_visitor(const struct brw_compiler *comp, + void *log_data, struct brw_gs_compile *c, struct gl_shader_program *prog, void *mem_ctx, bool no_spills, int shader_time_index) : - vec4_gs_visitor(comp, c, prog, mem_ctx, no_spills, shader_time_index) {} + vec4_gs_visitor(comp, log_data, c, prog, mem_ctx, no_spills, + shader_time_index) {} protected: virtual void assign_binding_table_offsets(); @@ -49,6 +51,8 @@ protected: virtual void emit_thread_end(); virtual void visit(ir_emit_vertex *); virtual void visit(ir_end_primitive *); + virtual void gs_emit_vertex(int stream_id); + virtual void gs_end_primitive(); virtual void emit_urb_write_header(int mrf); virtual void emit_urb_write_opcode(bool complete, int base_mrf, diff --git a/src/mesa/drivers/dri/i965/gen6_multisample_state.c b/src/mesa/drivers/dri/i965/gen6_multisample_state.c index 36734f598fe..8444c0c9bae 100644 --- a/src/mesa/drivers/dri/i965/gen6_multisample_state.c +++ b/src/mesa/drivers/dri/i965/gen6_multisample_state.c @@ -143,12 +143,11 @@ gen6_emit_3dstate_multisample(struct brw_context *brw, ADVANCE_BATCH(); } - unsigned gen6_determine_sample_mask(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; - float coverage = 1.0; + float coverage = 1.0f; float coverage_invert = false; unsigned sample_mask = ~0u; @@ -166,7 +165,7 @@ gen6_determine_sample_mask(struct brw_context *brw) } if (num_samples > 1) { - int coverage_int = (int) (num_samples * coverage + 0.5); + int coverage_int = (int) (num_samples * coverage + 0.5f); uint32_t coverage_bits = (1 << coverage_int) - 1; if (coverage_invert) coverage_bits ^= (1 << num_samples) - 1; @@ -176,7 +175,6 @@ gen6_determine_sample_mask(struct brw_context *brw) } } - /** * 3DSTATE_SAMPLE_MASK */ @@ -189,15 +187,14 @@ gen6_emit_3dstate_sample_mask(struct brw_context *brw, unsigned mask) ADVANCE_BATCH(); } - -static void upload_multisample_state(struct brw_context *brw) +static void +upload_multisample_state(struct brw_context *brw) { /* BRW_NEW_NUM_SAMPLES */ gen6_emit_3dstate_multisample(brw, brw->num_samples); gen6_emit_3dstate_sample_mask(brw, gen6_determine_sample_mask(brw)); } - const struct brw_tracked_state gen6_multisample_state = { .dirty = { .mesa = _NEW_MULTISAMPLE, diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c index ba5c944fb3d..9f4a5db3592 100644 --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c @@ -86,7 +86,7 @@ static void write_primitives_generated(struct brw_context *brw, drm_intel_bo *query_bo, int stream, int idx) { - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); if (brw->gen >= 7 && stream > 0) { brw_store_register_mem64(brw, query_bo, @@ -100,7 +100,7 @@ static void write_xfb_primitives_written(struct brw_context *brw, drm_intel_bo *bo, int stream, int idx) { - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); if (brw->gen >= 7) { brw_store_register_mem64(brw, bo, GEN7_SO_NUM_PRIMS_WRITTEN(stream), idx); @@ -157,7 +157,7 @@ emit_pipeline_stat(struct brw_context *brw, drm_intel_bo *bo, /* Emit a flush to make sure various parts of the pipeline are complete and * we get an accurate value */ - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); brw_store_register_mem64(brw, bo, reg, idx); } diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index b00517ed81e..4068f2844a2 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -383,7 +383,7 @@ upload_sf_state(struct brw_context *brw) point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize); /* Clamp to the hardware limits and convert to fixed point */ - dw4 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3); + dw4 |= U_FIXED(CLAMP(point_size, 0.125f, 255.875f), 3); /* * Window coordinates in an FBO are inverted, which means point diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c index be80d7bdfc5..3899ce9451f 100644 --- a/src/mesa/drivers/dri/i965/gen6_sol.c +++ b/src/mesa/drivers/dri/i965/gen6_sol.c @@ -292,5 +292,5 @@ brw_end_transform_feedback(struct gl_context *ctx, * simplicity, just do a full flush. */ struct brw_context *brw = brw_context(ctx); - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); } diff --git a/src/mesa/drivers/dri/i965/gen6_surface_state.c b/src/mesa/drivers/dri/i965/gen6_surface_state.c index 03e913a0a76..39de62f2304 100644 --- a/src/mesa/drivers/dri/i965/gen6_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen6_surface_state.c @@ -88,7 +88,8 @@ gen6_update_renderbuffer_surface(struct brw_context *brw, break; } - const int min_array_element = layered ? 0 : irb->mt_layer; + const int min_array_element = irb->mt_layer; + assert(!layered || irb->mt_layer == 0); surf[0] = SET_FIELD(surftype, BRW_SURFACE_TYPE) | SET_FIELD(format, BRW_SURFACE_FORMAT); diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c index 107a4f24fa6..c7311fd0b03 100644 --- a/src/mesa/drivers/dri/i965/gen6_urb.c +++ b/src/mesa/drivers/dri/i965/gen6_urb.c @@ -120,7 +120,7 @@ gen6_upload_urb( struct brw_context *brw ) * a workaround. */ if (brw->urb.gs_present && !gs_present) - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); brw->urb.gs_present = gs_present; } diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c index 7c8d8849f4e..11b9a360ced 100644 --- a/src/mesa/drivers/dri/i965/gen6_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c @@ -101,7 +101,7 @@ gen6_upload_sf_vp(struct brw_context *brw) } for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) { - double scale[3], translate[3]; + float scale[3], translate[3]; /* _NEW_VIEWPORT */ _mesa_get_viewport_xform(ctx, i, scale, translate); diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp index 2bdc82bc895..9822dc1fe79 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -645,7 +645,7 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw, /* 3DSTATE_DEPTH_BUFFER */ { - intel_emit_depth_stall_flushes(brw); + brw_emit_depth_stall_flushes(brw); BEGIN_BATCH(7); OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); @@ -696,7 +696,7 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw, static void gen7_blorp_emit_depth_disable(struct brw_context *brw) { - intel_emit_depth_stall_flushes(brw); + brw_emit_depth_stall_flushes(brw); BEGIN_BATCH(7); OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); @@ -794,6 +794,8 @@ gen7_blorp_exec(struct brw_context *brw, } depthstencil_offset = gen6_blorp_emit_depth_stencil_state(brw, params); gen7_blorp_emit_depth_stencil_state_pointers(brw, depthstencil_offset); + if (brw->use_resource_streamer) + gen7_disable_hw_binding_tables(brw); if (params->use_wm_prog) { uint32_t wm_surf_offset_renderbuffer; uint32_t wm_surf_offset_texture = 0; diff --git a/src/mesa/drivers/dri/i965/gen7_disable.c b/src/mesa/drivers/dri/i965/gen7_disable.c index 2c43cd77f07..bb509696d72 100644 --- a/src/mesa/drivers/dri/i965/gen7_disable.c +++ b/src/mesa/drivers/dri/i965/gen7_disable.c @@ -52,7 +52,7 @@ disable_stages(struct brw_context *brw) BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_HS << 16 | (2 - 2)); - OUT_BATCH(0); + OUT_BATCH(brw->hw_bt_pool.next_offset); ADVANCE_BATCH(); /* Disable the TE */ @@ -85,7 +85,7 @@ disable_stages(struct brw_context *brw) BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_DS << 16 | (2 - 2)); - OUT_BATCH(0); + OUT_BATCH(brw->hw_bt_pool.next_offset); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c index 8d6d3fe1d34..497ecec8e45 100644 --- a/src/mesa/drivers/dri/i965/gen7_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c @@ -59,7 +59,9 @@ upload_gs_state(struct brw_context *brw) OUT_BATCH(((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_GS_SAMPLER_COUNT_SHIFT) | ((brw->gs.prog_data->base.base.binding_table.size_bytes / 4) << - GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT) | + (brw->is_haswell && prog_data->base.nr_image_params ? + HSW_GS_UAV_ACCESS_ENABLE : 0)); if (brw->gs.prog_data->base.base.total_scratch) { OUT_RELOC(stage_state->scratch_bo, diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c b/src/mesa/drivers/dri/i965/gen7_misc_state.c index f4f665219d6..a14d4a0c50d 100644 --- a/src/mesa/drivers/dri/i965/gen7_misc_state.c +++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c @@ -57,7 +57,7 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw, return; } - intel_emit_depth_stall_flushes(brw); + brw_emit_depth_stall_flushes(brw); irb = intel_get_renderbuffer(fb, BUFFER_DEPTH); if (!irb) diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index 4fa46a8eb97..698b3d491bc 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -220,7 +220,7 @@ upload_sf_state(struct brw_context *brw) point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize); /* Clamp to the hardware limits and convert to fixed point */ - dw3 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3); + dw3 |= U_FIXED(CLAMP(point_size, 0.125f, 255.875f), 3); /* _NEW_LIGHT */ if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) { diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c b/src/mesa/drivers/dri/i965/gen7_sol_state.c index aec4f44bb73..41573a80a52 100644 --- a/src/mesa/drivers/dri/i965/gen7_sol_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c @@ -365,7 +365,7 @@ gen7_save_primitives_written_counters(struct brw_context *brw, } /* Flush any drawing so that the counters have the right values. */ - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); /* Emit MI_STORE_REGISTER_MEM commands to write the values. */ for (int i = 0; i < streams; i++) { @@ -502,7 +502,7 @@ gen7_pause_transform_feedback(struct gl_context *ctx, (struct brw_transform_feedback_object *) obj; /* Flush any drawing so that the counters have the right values. */ - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); /* Save the SOL buffer offset register values. */ if (brw->gen < 8) { diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c index d371c193577..69162171c4e 100644 --- a/src/mesa/drivers/dri/i965/gen7_urb.c +++ b/src/mesa/drivers/dri/i965/gen7_urb.c @@ -228,7 +228,7 @@ gen7_upload_urb(struct brw_context *brw) remaining_space = total_wants; if (remaining_space > 0) { unsigned vs_additional = (unsigned) - round(vs_wants * (((double) remaining_space) / total_wants)); + roundf(vs_wants * (((float) remaining_space) / total_wants)); vs_chunks += vs_additional; remaining_space -= vs_additional; gs_chunks += remaining_space; diff --git a/src/mesa/drivers/dri/i965/gen7_viewport_state.c b/src/mesa/drivers/dri/i965/gen7_viewport_state.c index b655205ec35..c75dc9964bf 100644 --- a/src/mesa/drivers/dri/i965/gen7_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen7_viewport_state.c @@ -53,7 +53,7 @@ gen7_upload_sf_clip_viewport(struct brw_context *brw) } for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) { - double scale[3], translate[3]; + float scale[3], translate[3]; _mesa_get_viewport_xform(ctx, i, scale, translate); /* According to the "Vertex X,Y Clamping and Quantization" section of diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index 4b17d06fa83..b7e48585482 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -62,6 +62,7 @@ gen7_upload_constant_state(struct brw_context *brw, OUT_BATCH(active ? stage_state->push_const_size : 0); OUT_BATCH(0); } + /* Pointer to the constant buffer. Covered by the set of state flags * from gen6_prepare_wm_contants */ @@ -95,15 +96,14 @@ gen7_upload_constant_state(struct brw_context *brw, ADVANCE_BATCH(); - /* On SKL+ the new constants don't take effect until the next corresponding - * 3DSTATE_BINDING_TABLE_POINTER_* command is parsed so we need to ensure - * that is sent - */ + /* On SKL+ the new constants don't take effect until the next corresponding + * 3DSTATE_BINDING_TABLE_POINTER_* command is parsed so we need to ensure + * that is sent + */ if (brw->gen >= 9) brw->ctx.NewDriverState |= BRW_NEW_SURFACES; } - static void upload_vs_state(struct brw_context *brw) { @@ -111,6 +111,7 @@ upload_vs_state(struct brw_context *brw) uint32_t floating_point_mode = 0; const int max_threads_shift = brw->is_haswell ? HSW_VS_MAX_THREADS_SHIFT : GEN6_VS_MAX_THREADS_SHIFT; + const struct brw_vue_prog_data *prog_data = &brw->vs.prog_data->base; if (!brw->is_haswell && !brw->is_baytrail) gen7_emit_vs_workaround_flush(brw); @@ -125,19 +126,21 @@ upload_vs_state(struct brw_context *brw) ((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT) | ((brw->vs.prog_data->base.base.binding_table.size_bytes / 4) << - GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT) | + (brw->is_haswell && prog_data->base.nr_image_params ? + HSW_VS_UAV_ACCESS_ENABLE : 0)); - if (brw->vs.prog_data->base.base.total_scratch) { + if (prog_data->base.total_scratch) { OUT_RELOC(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(brw->vs.prog_data->base.base.total_scratch) - 11); + ffs(prog_data->base.total_scratch) - 11); } else { OUT_BATCH(0); } - OUT_BATCH((brw->vs.prog_data->base.base.dispatch_grf_start_reg << + OUT_BATCH((prog_data->base.dispatch_grf_start_reg << GEN6_VS_DISPATCH_START_GRF_SHIFT) | - (brw->vs.prog_data->base.urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | + (prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); OUT_BATCH(((brw->max_vs_threads - 1) << max_threads_shift) | diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index ea11ae845e3..fd6dab5be8b 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -83,6 +83,7 @@ upload_wm_state(struct brw_context *brw) /* _NEW_BUFFERS | _NEW_COLOR */ if (brw_color_buffer_write_enabled(brw) || writes_depth || + prog_data->base.nr_image_params || dw1 & GEN7_WM_KILL_ENABLE) { dw1 |= GEN7_WM_DISPATCH_ENABLE; } @@ -106,6 +107,18 @@ upload_wm_state(struct brw_context *brw) dw1 |= GEN7_WM_USES_INPUT_COVERAGE_MASK; } + /* BRW_NEW_FS_PROG_DATA */ + if (prog_data->early_fragment_tests) + dw1 |= GEN7_WM_EARLY_DS_CONTROL_PREPS; + else if (prog_data->base.nr_image_params) + dw1 |= GEN7_WM_EARLY_DS_CONTROL_PSEXEC; + + /* _NEW_BUFFERS | _NEW_COLOR */ + if (brw->is_haswell && + !(brw_color_buffer_write_enabled(brw) || writes_depth) && + prog_data->base.nr_image_params) + dw2 |= HSW_WM_UAV_ONLY; + BEGIN_BATCH(3); OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2)); OUT_BATCH(dw1); @@ -127,7 +140,7 @@ const struct brw_tracked_state gen7_wm_state = { .emit = upload_wm_state, }; -void +static void gen7_upload_ps_state(struct brw_context *brw, const struct gl_fragment_program *fp, const struct brw_stage_state *stage_state, @@ -208,6 +221,9 @@ gen7_upload_ps_state(struct brw_context *brw, _mesa_get_min_invocations_per_fragment(ctx, fp, false); assert(min_inv_per_frag >= 1); + if (brw->is_haswell && prog_data->base.nr_image_params) + dw4 |= HSW_PS_UAV_ACCESS_ENABLE; + if (prog_data->prog_offset_16 || prog_data->no_8) { dw4 |= GEN7_PS_16_DISPATCH_ENABLE; if (!prog_data->no_8 && min_inv_per_frag == 1) { diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c b/src/mesa/drivers/dri/i965/gen8_depth_state.c index 12ac97a5d14..93100a0708f 100644 --- a/src/mesa/drivers/dri/i965/gen8_depth_state.c +++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c @@ -41,7 +41,6 @@ emit_depth_packets(struct brw_context *brw, bool depth_writable, struct intel_mipmap_tree *stencil_mt, bool stencil_writable, - uint32_t stencil_offset, bool hiz, uint32_t width, uint32_t height, @@ -57,7 +56,7 @@ emit_depth_packets(struct brw_context *brw, return; } - intel_emit_depth_stall_flushes(brw); + brw_emit_depth_stall_flushes(brw); /* _NEW_BUFFERS, _NEW_DEPTH, _NEW_STENCIL */ BEGIN_BATCH(8); @@ -100,7 +99,7 @@ emit_depth_packets(struct brw_context *brw, } if (stencil_mt == NULL) { - BEGIN_BATCH(5); + BEGIN_BATCH(5); OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (5 - 2)); OUT_BATCH(0); OUT_BATCH(0); @@ -127,8 +126,7 @@ emit_depth_packets(struct brw_context *brw, OUT_BATCH(HSW_STENCIL_ENABLED | mocs_wb << 22 | (2 * stencil_mt->pitch - 1)); OUT_RELOC64(stencil_mt->bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - stencil_offset); + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); OUT_BATCH(stencil_mt ? stencil_mt->qpitch >> 2 : 0); ADVANCE_BATCH(); } @@ -220,7 +218,6 @@ gen8_emit_depth_stencil_hiz(struct brw_context *brw, emit_depth_packets(brw, depth_mt, brw_depthbuffer_format(brw), surftype, ctx->Depth.Mask != 0, stencil_mt, ctx->Stencil._WriteEnabled, - brw->depthstencil.stencil_offset, hiz, width, height, depth, lod, min_array_element); } @@ -253,10 +250,10 @@ pma_fix_enable(const struct brw_context *brw) */ const bool hiz_enabled = depth_irb && intel_renderbuffer_has_hiz(depth_irb); - /* 3DSTATE_WM::Early Depth/Stencil Control != EDSC_PREPS (2). - * We always leave this set to EDSC_NORMAL (0). + /* BRW_NEW_FS_PROG_DATA: + * 3DSTATE_WM::Early Depth/Stencil Control != EDSC_PREPS (2). */ - const bool edsc_not_preps = true; + const bool edsc_not_preps = !brw->wm.prog_data->early_fragment_tests; /* 3DSTATE_PS_EXTRA::PixelShaderValid is always true. */ const bool pixel_shader_valid = true; @@ -439,7 +436,7 @@ gen8_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt, brw_depth_format(brw, mt->format), BRW_SURFACE_2D, true, /* depth writes */ - NULL, false, 0, /* no stencil for now */ + NULL, false, /* no stencil for now */ true, /* hiz */ surface_width, surface_height, @@ -499,7 +496,7 @@ gen8_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt, */ brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_IMMEDIATE, - brw->batch.workaround_bo, 0, 0, 0); + brw->workaround_bo, 0, 0, 0); /* Emit 3DSTATE_WM_HZ_OP again to disable the state overrides. */ BEGIN_BATCH(5); diff --git a/src/mesa/drivers/dri/i965/gen8_disable.c b/src/mesa/drivers/dri/i965/gen8_disable.c index da0d4a5fe7a..32508e377c9 100644 --- a/src/mesa/drivers/dri/i965/gen8_disable.c +++ b/src/mesa/drivers/dri/i965/gen8_disable.c @@ -66,7 +66,7 @@ disable_stages(struct brw_context *brw) BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_HS << 16 | (2 - 2)); - OUT_BATCH(0); + OUT_BATCH(brw->hw_bt_pool.next_offset); ADVANCE_BATCH(); /* Disable the TE */ @@ -101,7 +101,7 @@ disable_stages(struct brw_context *brw) BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_DS << 16 | (2 - 2)); - OUT_BATCH(0); + OUT_BATCH(brw->hw_bt_pool.next_offset); ADVANCE_BATCH(); BEGIN_BATCH(2); diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c index 26a02d3b045..81bd3b21778 100644 --- a/src/mesa/drivers/dri/i965/gen8_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c @@ -52,7 +52,9 @@ gen8_upload_gs_state(struct brw_context *brw) ((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_GS_SAMPLER_COUNT_SHIFT) | ((prog_data->base.binding_table.size_bytes / 4) << - GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT) | + (prog_data->base.nr_image_params ? + HSW_GS_UAV_ACCESS_ENABLE : 0)); if (brw->gs.prog_data->base.base.total_scratch) { OUT_RELOC64(stage_state->scratch_bo, diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c index a88f109c691..ae18f0f162c 100644 --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c @@ -58,7 +58,11 @@ gen8_upload_ps_extra(struct brw_context *brw, if (prog_data->uses_omask) dw1 |= GEN8_PSX_OMASK_TO_RENDER_TARGET; - if (_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx)) + if (brw->gen >= 9 && prog_data->pulls_bary) + dw1 |= GEN9_PSX_SHADER_PULLS_BARY; + + if (_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx) || + prog_data->base.nr_image_params) dw1 |= GEN8_PSX_SHADER_HAS_UAV; BEGIN_BATCH(2); @@ -115,6 +119,12 @@ upload_wm_state(struct brw_context *brw) dw1 |= brw->wm.prog_data->barycentric_interp_modes << GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; + /* BRW_NEW_FS_PROG_DATA */ + if (brw->wm.prog_data->early_fragment_tests) + dw1 |= GEN7_WM_EARLY_DS_CONTROL_PREPS; + else if (brw->wm.prog_data->base.nr_image_params) + dw1 |= GEN7_WM_EARLY_DS_CONTROL_PSEXEC; + BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_WM << 16 | (2 - 2)); OUT_BATCH(dw1); diff --git a/src/mesa/drivers/dri/i965/gen8_sf_state.c b/src/mesa/drivers/dri/i965/gen8_sf_state.c index c2b585d0001..6b655ee493e 100644 --- a/src/mesa/drivers/dri/i965/gen8_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen8_sf_state.c @@ -169,7 +169,7 @@ upload_sf(struct brw_context *brw) point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize); /* Clamp to the hardware limits and convert to fixed point */ - dw3 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3); + dw3 |= U_FIXED(CLAMP(point_size, 0.125f, 255.875f), 3); /* _NEW_PROGRAM | _NEW_POINT */ if (!(ctx->VertexProgram.PointSizeEnabled || ctx->Point._Attenuated)) diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index b2d1a579815..6c4d3e197a5 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -88,12 +88,12 @@ vertical_alignment(const struct brw_context *brw, uint32_t surf_type) { /* On Gen9+ vertical alignment is ignored for 1D surfaces and when - * tr_mode is not TRMODE_NONE. + * tr_mode is not TRMODE_NONE. Set to an arbitrary non-reserved value. */ if (brw->gen > 8 && (mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE || surf_type == BRW_SURFACE_1D)) - return 0; + return GEN8_SURFACE_VALIGN_4; switch (mt->align_h) { case 4: @@ -113,12 +113,12 @@ horizontal_alignment(const struct brw_context *brw, uint32_t surf_type) { /* On Gen9+ horizontal alignment is ignored when tr_mode is not - * TRMODE_NONE. + * TRMODE_NONE. Set to an arbitrary non-reserved value. */ if (brw->gen > 8 && (mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE || gen9_use_linear_1d_layout(brw, mt))) - return 0; + return GEN8_SURFACE_HALIGN_4; switch (mt->align_w) { case 4: @@ -401,8 +401,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, irb->mt_layer : (irb->mt_layer / MAX2(mt->num_samples, 1)); GLenum gl_target = rb->TexImage ? rb->TexImage->TexObject->Target : GL_TEXTURE_2D; - /* FINISHME: Use PTE MOCS on Skylake. */ - uint32_t mocs = brw->gen >= 9 ? SKL_MOCS_WT : BDW_MOCS_PTE; + const uint32_t mocs = brw->gen >= 9 ? SKL_MOCS_PTE : BDW_MOCS_PTE; intel_miptree_used_for_rendering(mt); diff --git a/src/mesa/drivers/dri/i965/gen8_viewport_state.c b/src/mesa/drivers/dri/i965/gen8_viewport_state.c index 2d8eeb1f10f..2692ad55999 100644 --- a/src/mesa/drivers/dri/i965/gen8_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen8_viewport_state.c @@ -53,7 +53,7 @@ gen8_upload_sf_clip_viewport(struct brw_context *brw) } for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) { - double scale[3], translate[3]; + float scale[3], translate[3]; _mesa_get_viewport_xform(ctx, i, scale, translate); /* _NEW_VIEWPORT: Viewport Matrix Elements */ diff --git a/src/mesa/drivers/dri/i965/gen8_vs_state.c b/src/mesa/drivers/dri/i965/gen8_vs_state.c index 28f5adddf14..8b5048bee7e 100644 --- a/src/mesa/drivers/dri/i965/gen8_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_vs_state.c @@ -53,7 +53,9 @@ upload_vs_state(struct brw_context *brw) ((ALIGN(stage_state->sampler_count, 4) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT) | ((prog_data->base.binding_table.size_bytes / 4) << - GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT) | + (prog_data->base.nr_image_params ? + HSW_VS_UAV_ACCESS_ENABLE : 0)); if (prog_data->base.total_scratch) { OUT_RELOC64(stage_state->scratch_bo, diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index ed659ed625e..85f20a05729 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -32,6 +32,8 @@ #include "intel_buffers.h" #include "intel_fbo.h" #include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" #include <xf86drm.h> #include <i915_drm.h> @@ -44,19 +46,10 @@ intel_batchbuffer_init(struct brw_context *brw) { intel_batchbuffer_reset(brw); - if (brw->gen >= 6) { - /* We can't just use brw_state_batch to get a chunk of space for - * the gen6 workaround because it involves actually writing to - * the buffer, and the kernel doesn't let us write to the batch. - */ - brw->batch.workaround_bo = drm_intel_bo_alloc(brw->bufmgr, - "pipe_control workaround", - 4096, 4096); - } - if (!brw->has_llc) { brw->batch.cpu_map = malloc(BATCH_SZ); brw->batch.map = brw->batch.cpu_map; + brw->batch.map_next = brw->batch.cpu_map; } } @@ -77,12 +70,11 @@ intel_batchbuffer_reset(struct brw_context *brw) drm_intel_bo_map(brw->batch.bo, true); brw->batch.map = brw->batch.bo->virtual; } + brw->batch.map_next = brw->batch.map; brw->batch.reserved_space = BATCH_RESERVED; brw->batch.state_batch_offset = brw->batch.bo->size; - brw->batch.used = 0; brw->batch.needs_sol_reset = false; - brw->batch.pipe_controls_since_last_cs_stall = 0; /* We don't know what ring the new batch will be sent to until we see the * first BEGIN_BATCH or BEGIN_BATCH_BLT. Mark it as unknown. @@ -93,7 +85,7 @@ intel_batchbuffer_reset(struct brw_context *brw) void intel_batchbuffer_save_state(struct brw_context *brw) { - brw->batch.saved.used = brw->batch.used; + brw->batch.saved.map_next = brw->batch.map_next; brw->batch.saved.reloc_count = drm_intel_gem_bo_get_reloc_count(brw->batch.bo); } @@ -103,8 +95,8 @@ intel_batchbuffer_reset_to_saved(struct brw_context *brw) { drm_intel_gem_bo_clear_relocs(brw->batch.bo, brw->batch.saved.reloc_count); - brw->batch.used = brw->batch.saved.used; - if (brw->batch.used == 0) + brw->batch.map_next = brw->batch.saved.map_next; + if (USED_BATCH(brw->batch) == 0) brw->batch.ring = UNKNOWN_RING; } @@ -114,7 +106,6 @@ intel_batchbuffer_free(struct brw_context *brw) free(brw->batch.cpu_map); drm_intel_bo_unreference(brw->batch.last_bo); drm_intel_bo_unreference(brw->batch.bo); - drm_intel_bo_unreference(brw->batch.workaround_bo); } static void @@ -133,7 +124,7 @@ do_batch_dump(struct brw_context *brw) drm_intel_decode_set_batch_pointer(decode, batch->bo->virtual, batch->bo->offset64, - batch->used); + USED_BATCH(*batch)); } else { fprintf(stderr, "WARNING: failed to map batchbuffer (%s), " @@ -142,7 +133,7 @@ do_batch_dump(struct brw_context *brw) drm_intel_decode_set_batch_pointer(decode, batch->map, batch->bo->offset64, - batch->used); + USED_BATCH(*batch)); } drm_intel_decode_set_output_file(decode, stderr); @@ -218,10 +209,32 @@ brw_finish_batch(struct brw_context *brw) */ brw_emit_query_end(brw); - /* We may also need to snapshot and disable OA counters. */ - if (brw->batch.ring == RENDER_RING) + if (brw->batch.ring == RENDER_RING) { + /* We may also need to snapshot and disable OA counters. */ brw_perf_monitor_finish_batch(brw); + if (brw->is_haswell) { + /* From the Haswell PRM, Volume 2b, Command Reference: Instructions, + * 3DSTATE_CC_STATE_POINTERS > "Note": + * + * "SW must program 3DSTATE_CC_STATE_POINTERS command at the end of every + * 3D batch buffer followed by a PIPE_CONTROL with RC flush and CS stall." + * + * From the example in the docs, it seems to expect a regular pipe control + * flush here as well. We may have done it already, but meh. + * + * See also WaAvoidRCZCounterRollover. + */ + brw_emit_mi_flush(brw); + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2)); + OUT_BATCH(brw->cc.state_offset | 1); + ADVANCE_BATCH(); + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH | + PIPE_CONTROL_CS_STALL); + } + } + /* Mark that the current program cache BO has been used by the GPU. * It will be reallocated if we need to put new programs in for the * next batch. @@ -267,6 +280,11 @@ throttle(struct brw_context *brw) } } +/* Drop when RS headers get pulled to libdrm */ +#ifndef I915_EXEC_RESOURCE_STREAMER +#define I915_EXEC_RESOURCE_STREAMER (1<<15) +#endif + /* TODO: Push this whole function into bufmgr. */ static int @@ -278,7 +296,7 @@ do_flush_locked(struct brw_context *brw) if (brw->has_llc) { drm_intel_bo_unmap(batch->bo); } else { - ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map); + ret = drm_intel_bo_subdata(batch->bo, 0, 4 * USED_BATCH(*batch), batch->map); if (ret == 0 && batch->state_batch_offset != batch->bo->size) { ret = drm_intel_bo_subdata(batch->bo, batch->state_batch_offset, @@ -293,7 +311,8 @@ do_flush_locked(struct brw_context *brw) if (brw->gen >= 6 && batch->ring == BLT_RING) { flags = I915_EXEC_BLT; } else { - flags = I915_EXEC_RENDER; + flags = I915_EXEC_RENDER | + (brw->use_resource_streamer ? I915_EXEC_RESOURCE_STREAMER : 0); } if (batch->needs_sol_reset) flags |= I915_EXEC_GEN7_SOL_RESET; @@ -303,11 +322,11 @@ do_flush_locked(struct brw_context *brw) brw_annotate_aub(brw); if (brw->hw_ctx == NULL || batch->ring != RENDER_RING) { - ret = drm_intel_bo_mrb_exec(batch->bo, 4 * batch->used, NULL, 0, 0, - flags); + ret = drm_intel_bo_mrb_exec(batch->bo, 4 * USED_BATCH(*batch), + NULL, 0, 0, flags); } else { ret = drm_intel_gem_bo_context_exec(batch->bo, brw->hw_ctx, - 4 * batch->used, flags); + 4 * USED_BATCH(*batch), flags); } } @@ -331,7 +350,7 @@ _intel_batchbuffer_flush(struct brw_context *brw, { int ret; - if (brw->batch.used == 0) + if (USED_BATCH(brw->batch) == 0) return 0; if (brw->throttle_batch[0] == NULL) { @@ -340,7 +359,7 @@ _intel_batchbuffer_flush(struct brw_context *brw, } if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) { - int bytes_for_commands = 4 * brw->batch.used; + int bytes_for_commands = 4 * USED_BATCH(brw->batch); int bytes_for_state = brw->batch.bo->size - brw->batch.state_batch_offset; int total_bytes = bytes_for_commands + bytes_for_state; fprintf(stderr, "%s:%d: Batchbuffer flush with %4db (pkt) + " @@ -356,7 +375,7 @@ _intel_batchbuffer_flush(struct brw_context *brw, /* Mark the end of the buffer. */ intel_batchbuffer_emit_dword(brw, MI_BATCH_BUFFER_END); - if (brw->batch.used & 1) { + if (USED_BATCH(brw->batch) & 1) { /* Round batchbuffer usage to 2 DWORDs. */ intel_batchbuffer_emit_dword(brw, MI_NOOP); } @@ -373,6 +392,9 @@ _intel_batchbuffer_flush(struct brw_context *brw, drm_intel_bo_wait_rendering(brw->batch.bo); } + if (brw->use_resource_streamer) + gen7_reset_hw_bt_pool_offsets(brw); + /* Start a new batch buffer. */ brw_new_batch(brw); @@ -382,15 +404,15 @@ _intel_batchbuffer_flush(struct brw_context *brw, /* This is the only way buffers get added to the validate list. */ -bool -intel_batchbuffer_emit_reloc(struct brw_context *brw, - drm_intel_bo *buffer, - uint32_t read_domains, uint32_t write_domain, - uint32_t delta) +uint32_t +intel_batchbuffer_reloc(struct brw_context *brw, + drm_intel_bo *buffer, uint32_t offset, + uint32_t read_domains, uint32_t write_domain, + uint32_t delta) { int ret; - ret = drm_intel_bo_emit_reloc(brw->batch.bo, 4*brw->batch.used, + ret = drm_intel_bo_emit_reloc(brw->batch.bo, offset, buffer, delta, read_domains, write_domain); assert(ret == 0); @@ -400,18 +422,16 @@ intel_batchbuffer_emit_reloc(struct brw_context *brw, * case the buffer doesn't move and we can short-circuit the relocation * processing in the kernel */ - intel_batchbuffer_emit_dword(brw, buffer->offset64 + delta); - - return true; + return buffer->offset64 + delta; } -bool -intel_batchbuffer_emit_reloc64(struct brw_context *brw, - drm_intel_bo *buffer, - uint32_t read_domains, uint32_t write_domain, - uint32_t delta) +uint64_t +intel_batchbuffer_reloc64(struct brw_context *brw, + drm_intel_bo *buffer, uint32_t offset, + uint32_t read_domains, uint32_t write_domain, + uint32_t delta) { - int ret = drm_intel_bo_emit_reloc(brw->batch.bo, 4*brw->batch.used, + int ret = drm_intel_bo_emit_reloc(brw->batch.bo, offset, buffer, delta, read_domains, write_domain); assert(ret == 0); @@ -421,11 +441,7 @@ intel_batchbuffer_emit_reloc64(struct brw_context *brw, * case the buffer doesn't move and we can short-circuit the relocation * processing in the kernel */ - uint64_t offset = buffer->offset64 + delta; - intel_batchbuffer_emit_dword(brw, offset); - intel_batchbuffer_emit_dword(brw, offset >> 32); - - return true; + return buffer->offset64 + delta; } @@ -435,312 +451,8 @@ intel_batchbuffer_data(struct brw_context *brw, { assert((bytes & 3) == 0); intel_batchbuffer_require_space(brw, bytes, ring); - memcpy(brw->batch.map + brw->batch.used, data, bytes); - brw->batch.used += bytes >> 2; -} - -/** - * According to the latest documentation, any PIPE_CONTROL with the - * "Command Streamer Stall" bit set must also have another bit set, - * with five different options: - * - * - Render Target Cache Flush - * - Depth Cache Flush - * - Stall at Pixel Scoreboard - * - Post-Sync Operation - * - Depth Stall - * - * I chose "Stall at Pixel Scoreboard" since we've used it effectively - * in the past, but the choice is fairly arbitrary. - */ -static void -gen8_add_cs_stall_workaround_bits(uint32_t *flags) -{ - uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_WRITE_IMMEDIATE | - PIPE_CONTROL_WRITE_DEPTH_COUNT | - PIPE_CONTROL_WRITE_TIMESTAMP | - PIPE_CONTROL_STALL_AT_SCOREBOARD | - PIPE_CONTROL_DEPTH_STALL; - - /* If we're doing a CS stall, and don't already have one of the - * workaround bits set, add "Stall at Pixel Scoreboard." - */ - if ((*flags & PIPE_CONTROL_CS_STALL) != 0 && (*flags & wa_bits) == 0) - *flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; -} - -/* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT: - * - * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with - * only read-cache-invalidate bit(s) set, must have a CS_STALL bit set." - * - * Note that the kernel does CS stalls between batches, so we only need - * to count them within a batch. - */ -static uint32_t -gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags) -{ - if (brw->gen == 7 && !brw->is_haswell) { - if (flags & PIPE_CONTROL_CS_STALL) { - /* If we're doing a CS stall, reset the counter and carry on. */ - brw->batch.pipe_controls_since_last_cs_stall = 0; - return 0; - } - - /* If this is the fourth pipe control without a CS stall, do one now. */ - if (++brw->batch.pipe_controls_since_last_cs_stall == 4) { - brw->batch.pipe_controls_since_last_cs_stall = 0; - return PIPE_CONTROL_CS_STALL; - } - } - return 0; -} - -/** - * Emit a PIPE_CONTROL with various flushing flags. - * - * The caller is responsible for deciding what flags are appropriate for the - * given generation. - */ -void -brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags) -{ - if (brw->gen >= 8) { - gen8_add_cs_stall_workaround_bits(&flags); - - BEGIN_BATCH(6); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2)); - OUT_BATCH(flags); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } else if (brw->gen >= 6) { - flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags); - - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2)); - OUT_BATCH(flags); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } else { - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } -} - -/** - * Emit a PIPE_CONTROL that writes to a buffer object. - * - * \p flags should contain one of the following items: - * - PIPE_CONTROL_WRITE_IMMEDIATE - * - PIPE_CONTROL_WRITE_TIMESTAMP - * - PIPE_CONTROL_WRITE_DEPTH_COUNT - */ -void -brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags, - drm_intel_bo *bo, uint32_t offset, - uint32_t imm_lower, uint32_t imm_upper) -{ - if (brw->gen >= 8) { - gen8_add_cs_stall_workaround_bits(&flags); - - BEGIN_BATCH(6); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2)); - OUT_BATCH(flags); - OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - offset); - OUT_BATCH(imm_lower); - OUT_BATCH(imm_upper); - ADVANCE_BATCH(); - } else if (brw->gen >= 6) { - flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags); - - /* PPGTT/GGTT is selected by DW2 bit 2 on Sandybridge, but DW1 bit 24 - * on later platforms. We always use PPGTT on Gen7+. - */ - unsigned gen6_gtt = brw->gen == 6 ? PIPE_CONTROL_GLOBAL_GTT_WRITE : 0; - - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2)); - OUT_BATCH(flags); - OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - gen6_gtt | offset); - OUT_BATCH(imm_lower); - OUT_BATCH(imm_upper); - ADVANCE_BATCH(); - } else { - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2)); - OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - PIPE_CONTROL_GLOBAL_GTT_WRITE | offset); - OUT_BATCH(imm_lower); - OUT_BATCH(imm_upper); - ADVANCE_BATCH(); - } -} - -/** - * Restriction [DevSNB, DevIVB]: - * - * Prior to changing Depth/Stencil Buffer state (i.e. any combination of - * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER, - * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall - * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth - * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by - * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set), - * unless SW can otherwise guarantee that the pipeline from WM onwards is - * already flushed (e.g., via a preceding MI_FLUSH). - */ -void -intel_emit_depth_stall_flushes(struct brw_context *brw) -{ - assert(brw->gen >= 6 && brw->gen <= 9); - - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL); - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_CACHE_FLUSH); - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL); -} - -/** - * From the Ivybridge PRM, Volume 2 Part 1, Section 3.2 (VS Stage Input): - * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth - * stall needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS, - * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS, - * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL needs - * to be sent before any combination of VS associated 3DSTATE." - */ -void -gen7_emit_vs_workaround_flush(struct brw_context *brw) -{ - assert(brw->gen == 7); - brw_emit_pipe_control_write(brw, - PIPE_CONTROL_WRITE_IMMEDIATE - | PIPE_CONTROL_DEPTH_STALL, - brw->batch.workaround_bo, 0, - 0, 0); -} - - -/** - * Emit a PIPE_CONTROL command for gen7 with the CS Stall bit set. - */ -void -gen7_emit_cs_stall_flush(struct brw_context *brw) -{ - brw_emit_pipe_control_write(brw, - PIPE_CONTROL_CS_STALL - | PIPE_CONTROL_WRITE_IMMEDIATE, - brw->batch.workaround_bo, 0, - 0, 0); -} - - -/** - * Emits a PIPE_CONTROL with a non-zero post-sync operation, for - * implementing two workarounds on gen6. From section 1.4.7.1 - * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: - * - * [DevSNB-C+{W/A}] Before any depth stall flush (including those - * produced by non-pipelined state commands), software needs to first - * send a PIPE_CONTROL with no bits set except Post-Sync Operation != - * 0. - * - * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable - * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. - * - * And the workaround for these two requires this workaround first: - * - * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent - * BEFORE the pipe-control with a post-sync op and no write-cache - * flushes. - * - * And this last workaround is tricky because of the requirements on - * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM - * volume 2 part 1: - * - * "1 of the following must also be set: - * - Render Target Cache Flush Enable ([12] of DW1) - * - Depth Cache Flush Enable ([0] of DW1) - * - Stall at Pixel Scoreboard ([1] of DW1) - * - Depth Stall ([13] of DW1) - * - Post-Sync Operation ([13] of DW1) - * - Notify Enable ([8] of DW1)" - * - * The cache flushes require the workaround flush that triggered this - * one, so we can't use it. Depth stall would trigger the same. - * Post-sync nonzero is what triggered this second workaround, so we - * can't use that one either. Notify enable is IRQs, which aren't - * really our business. That leaves only stall at scoreboard. - */ -void -intel_emit_post_sync_nonzero_flush(struct brw_context *brw) -{ - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_STALL_AT_SCOREBOARD); - - brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_IMMEDIATE, - brw->batch.workaround_bo, 0, 0, 0); -} - -/* Emit a pipelined flush to either flush render and texture cache for - * reading from a FBO-drawn texture, or flush so that frontbuffer - * render appears on the screen in DRI1. - * - * This is also used for the always_flush_cache driconf debug option. - */ -void -intel_batchbuffer_emit_mi_flush(struct brw_context *brw) -{ - if (brw->batch.ring == BLT_RING && brw->gen >= 6) { - BEGIN_BATCH_BLT(4); - OUT_BATCH(MI_FLUSH_DW); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } else { - int flags = PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_RENDER_TARGET_FLUSH; - if (brw->gen >= 6) { - if (brw->gen == 9) { - /* Hardware workaround: SKL - * - * Emit Pipe Control with all bits set to zero before emitting - * a Pipe Control with VF Cache Invalidate set. - */ - brw_emit_pipe_control_flush(brw, 0); - } - - flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_VF_CACHE_INVALIDATE | - PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | - PIPE_CONTROL_CS_STALL; - - if (brw->gen == 6) { - /* Hardware workaround: SNB B-Spec says: - * - * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache - * Flush Enable =1, a PIPE_CONTROL with any non-zero - * post-sync-op is required. - */ - intel_emit_post_sync_nonzero_flush(brw); - } - } - brw_emit_pipe_control_flush(brw, flags); - } - - brw_render_cache_set_clear(brw); + memcpy(brw->batch.map_next, data, bytes); + brw->batch.map_next += bytes >> 2; } static void diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h index 7bdd8364346..84add927c9a 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h @@ -22,12 +22,16 @@ extern "C" { * - Disabling OA counters on Gen6+ (3 DWords = 12 bytes) * - Ending MI_REPORT_PERF_COUNT on Gen5+, plus associated PIPE_CONTROLs: * - Two sets of PIPE_CONTROLs, which become 3 PIPE_CONTROLs each on SNB, - * which are 4 DWords each ==> 2 * 3 * 4 * 4 = 96 bytes + * which are 5 DWords each ==> 2 * 3 * 5 * 4 = 120 bytes * - 3 DWords for MI_REPORT_PERF_COUNT itself on Gen6+. ==> 12 bytes. * On Ironlake, it's 6 DWords, but we have some slack due to the lack of * Sandybridge PIPE_CONTROL madness. + * - CC_STATE workaround on HSW (12 * 4 = 48 bytes) + * - 5 dwords for initial mi_flush + * - 2 dwords for CC state setup + * - 5 dwords for the required pipe control at the end */ -#define BATCH_RESERVED 146 +#define BATCH_RESERVED 152 struct intel_batchbuffer; @@ -53,25 +57,20 @@ void intel_batchbuffer_data(struct brw_context *brw, const void *data, GLuint bytes, enum brw_gpu_ring ring); -bool intel_batchbuffer_emit_reloc(struct brw_context *brw, - drm_intel_bo *buffer, - uint32_t read_domains, - uint32_t write_domain, - uint32_t offset); -bool intel_batchbuffer_emit_reloc64(struct brw_context *brw, - drm_intel_bo *buffer, - uint32_t read_domains, - uint32_t write_domain, - uint32_t offset); -void brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags); -void brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags, - drm_intel_bo *bo, uint32_t offset, - uint32_t imm_lower, uint32_t imm_upper); -void intel_batchbuffer_emit_mi_flush(struct brw_context *brw); -void intel_emit_post_sync_nonzero_flush(struct brw_context *brw); -void intel_emit_depth_stall_flushes(struct brw_context *brw); -void gen7_emit_vs_workaround_flush(struct brw_context *brw); -void gen7_emit_cs_stall_flush(struct brw_context *brw); +uint32_t intel_batchbuffer_reloc(struct brw_context *brw, + drm_intel_bo *buffer, + uint32_t offset, + uint32_t read_domains, + uint32_t write_domain, + uint32_t delta); +uint64_t intel_batchbuffer_reloc64(struct brw_context *brw, + drm_intel_bo *buffer, + uint32_t offset, + uint32_t read_domains, + uint32_t write_domain, + uint32_t delta); + +#define USED_BATCH(batch) ((uintptr_t)((batch).map_next - (batch).map)) static inline uint32_t float_as_int(float f) { @@ -93,7 +92,7 @@ static inline unsigned intel_batchbuffer_space(struct brw_context *brw) { return (brw->batch.state_batch_offset - brw->batch.reserved_space) - - brw->batch.used*4; + - USED_BATCH(brw->batch) * 4; } @@ -103,7 +102,7 @@ intel_batchbuffer_emit_dword(struct brw_context *brw, GLuint dword) #ifdef DEBUG assert(intel_batchbuffer_space(brw) >= 4); #endif - brw->batch.map[brw->batch.used++] = dword; + *brw->batch.map_next++ = dword; assert(brw->batch.ring != UNKNOWN_RING); } @@ -144,8 +143,8 @@ intel_batchbuffer_begin(struct brw_context *brw, int n, enum brw_gpu_ring ring) { intel_batchbuffer_require_space(brw, n * 4, ring); - brw->batch.emit = brw->batch.used; #ifdef DEBUG + brw->batch.emit = USED_BATCH(brw->batch); brw->batch.total = n; #endif } @@ -155,7 +154,7 @@ intel_batchbuffer_advance(struct brw_context *brw) { #ifdef DEBUG struct intel_batchbuffer *batch = &brw->batch; - unsigned int _n = batch->used - batch->emit; + unsigned int _n = USED_BATCH(*batch) - batch->emit; assert(batch->total != 0); if (_n != batch->total) { fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n", @@ -166,21 +165,42 @@ intel_batchbuffer_advance(struct brw_context *brw) #endif } -#define BEGIN_BATCH(n) intel_batchbuffer_begin(brw, n, RENDER_RING) -#define BEGIN_BATCH_BLT(n) intel_batchbuffer_begin(brw, n, BLT_RING) -#define OUT_BATCH(d) intel_batchbuffer_emit_dword(brw, d) -#define OUT_BATCH_F(f) intel_batchbuffer_emit_float(brw, f) -#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ - intel_batchbuffer_emit_reloc(brw, buf, \ - read_domains, write_domain, delta); \ +#define BEGIN_BATCH(n) do { \ + intel_batchbuffer_begin(brw, (n), RENDER_RING); \ + uint32_t *__map = brw->batch.map_next; \ + brw->batch.map_next += (n) + +#define BEGIN_BATCH_BLT(n) do { \ + intel_batchbuffer_begin(brw, (n), BLT_RING); \ + uint32_t *__map = brw->batch.map_next; \ + brw->batch.map_next += (n) + +#define OUT_BATCH(d) *__map++ = (d) +#define OUT_BATCH_F(f) OUT_BATCH(float_as_int((f))) + +#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ + uint32_t __offset = (__map - brw->batch.map) * 4; \ + OUT_BATCH(intel_batchbuffer_reloc(brw, (buf), __offset, \ + (read_domains), \ + (write_domain), \ + (delta))); \ } while (0) /* Handle 48-bit address relocations for Gen8+ */ -#define OUT_RELOC64(buf, read_domains, write_domain, delta) do { \ - intel_batchbuffer_emit_reloc64(brw, buf, read_domains, write_domain, delta); \ +#define OUT_RELOC64(buf, read_domains, write_domain, delta) do { \ + uint32_t __offset = (__map - brw->batch.map) * 4; \ + uint64_t reloc64 = intel_batchbuffer_reloc64(brw, (buf), __offset, \ + (read_domains), \ + (write_domain), \ + (delta)); \ + OUT_BATCH(reloc64); \ + OUT_BATCH(reloc64 >> 32); \ } while (0) -#define ADVANCE_BATCH() intel_batchbuffer_advance(brw); +#define ADVANCE_BATCH() \ + assert(__map == brw->batch.map_next); \ + intel_batchbuffer_advance(brw); \ +} while (0) #ifdef __cplusplus } diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index d3ab769356c..6d92580e725 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -27,6 +27,7 @@ #include "main/mtypes.h" +#include "main/blit.h" #include "main/context.h" #include "main/enums.h" #include "main/colormac.h" @@ -43,6 +44,23 @@ #define FILE_DEBUG_FLAG DEBUG_BLIT +#define SET_TILING_XY_FAST_COPY_BLT(tiling, tr_mode, type) \ +({ \ + switch (tiling) { \ + case I915_TILING_X: \ + CMD |= type ## _TILED_X; \ + break; \ + case I915_TILING_Y: \ + if (tr_mode == INTEL_MIPTREE_TRMODE_YS) \ + CMD |= type ## _TILED_64K; \ + else \ + CMD |= type ## _TILED_Y; \ + break; \ + default: \ + unreachable("not reached"); \ + } \ +}) + static void intel_miptree_set_alpha_to_one(struct brw_context *brw, struct intel_mipmap_tree *mt, @@ -75,6 +93,10 @@ static uint32_t br13_for_cpp(int cpp) { switch (cpp) { + case 16: + return BR13_32323232; + case 8: + return BR13_16161616; case 4: return BR13_8888; case 2: @@ -86,6 +108,64 @@ br13_for_cpp(int cpp) } } +static uint32_t +get_tr_horizontal_align(uint32_t tr_mode, uint32_t cpp, bool is_src) { + /* Alignment tables for YF/YS tiled surfaces. */ + const uint32_t align_2d_yf[] = {64, 64, 32, 32, 16}; + const uint32_t bpp = cpp * 8; + const uint32_t shift = is_src ? 17 : 10; + uint32_t align; + int i = 0; + + if (tr_mode == INTEL_MIPTREE_TRMODE_NONE) + return 0; + + /* Compute array index. */ + assert (bpp >= 8 && bpp <= 128 && _mesa_is_pow_two(bpp)); + i = ffs(bpp / 8) - 1; + + align = tr_mode == INTEL_MIPTREE_TRMODE_YF ? + align_2d_yf[i] : + 4 * align_2d_yf[i]; + + assert(_mesa_is_pow_two(align)); + + /* XY_FAST_COPY_BLT doesn't support horizontal alignment of 16. */ + if (align == 16) + align = 32; + + return (ffs(align) - 6) << shift; +} + +static uint32_t +get_tr_vertical_align(uint32_t tr_mode, uint32_t cpp, bool is_src) { + /* Vertical alignment tables for YF/YS tiled surfaces. */ + const unsigned align_2d_yf[] = {64, 32, 32, 16, 16}; + const uint32_t bpp = cpp * 8; + const uint32_t shift = is_src ? 15 : 8; + uint32_t align; + int i = 0; + + if (tr_mode == INTEL_MIPTREE_TRMODE_NONE) + return 0; + + /* Compute array index. */ + assert (bpp >= 8 && bpp <= 128 && _mesa_is_pow_two(bpp)); + i = ffs(bpp / 8) - 1; + + align = tr_mode == INTEL_MIPTREE_TRMODE_YF ? + align_2d_yf[i] : + 4 * align_2d_yf[i]; + + assert(_mesa_is_pow_two(align)); + + /* XY_FAST_COPY_BLT doesn't support vertical alignments of 16 and 32. */ + if (align == 16 || align == 32) + align = 64; + + return (ffs(align) - 7) << shift; +} + /** * Emits the packet for switching the blitter from X to Y tiled or back. * @@ -96,9 +176,10 @@ br13_for_cpp(int cpp) * tiling state would leak into other unsuspecting applications (like the X * server). */ -static void +static uint32_t * set_blitter_tiling(struct brw_context *brw, - bool dst_y_tiled, bool src_y_tiled) + bool dst_y_tiled, bool src_y_tiled, + uint32_t *__map) { assert(brw->gen >= 6); @@ -113,19 +194,19 @@ set_blitter_tiling(struct brw_context *brw, OUT_BATCH((BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y) << 16 | (dst_y_tiled ? BCS_SWCTRL_DST_Y : 0) | (src_y_tiled ? BCS_SWCTRL_SRC_Y : 0)); + return __map; } +#define SET_BLITTER_TILING(...) __map = set_blitter_tiling(__VA_ARGS__, __map) -#define BEGIN_BATCH_BLT_TILED(n, dst_y_tiled, src_y_tiled) do { \ +#define BEGIN_BATCH_BLT_TILED(n, dst_y_tiled, src_y_tiled) \ BEGIN_BATCH_BLT(n + ((dst_y_tiled || src_y_tiled) ? 14 : 0)); \ if (dst_y_tiled || src_y_tiled) \ - set_blitter_tiling(brw, dst_y_tiled, src_y_tiled); \ - } while (0) + SET_BLITTER_TILING(brw, dst_y_tiled, src_y_tiled) -#define ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled) do { \ +#define ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled) \ if (dst_y_tiled || src_y_tiled) \ - set_blitter_tiling(brw, false, false); \ - ADVANCE_BATCH(); \ - } while (0) + SET_BLITTER_TILING(brw, false, false); \ + ADVANCE_BATCH() static int blt_pitch(struct intel_mipmap_tree *mt) @@ -278,9 +359,11 @@ intel_miptree_blit(struct brw_context *brw, src_pitch, src_mt->bo, src_mt->offset, src_mt->tiling, + src_mt->tr_mode, dst_mt->pitch, dst_mt->bo, dst_mt->offset, dst_mt->tiling, + dst_mt->tr_mode, src_x, src_y, dst_x, dst_y, width, height, @@ -313,6 +396,112 @@ alignment_valid(struct brw_context *brw, unsigned offset, uint32_t tiling) return true; } +static bool +can_fast_copy_blit(struct brw_context *brw, + drm_intel_bo *src_buffer, + int16_t src_x, int16_t src_y, + uintptr_t src_offset, uint32_t src_pitch, + uint32_t src_tiling, uint32_t src_tr_mode, + drm_intel_bo *dst_buffer, + int16_t dst_x, int16_t dst_y, + uintptr_t dst_offset, uint32_t dst_pitch, + uint32_t dst_tiling, uint32_t dst_tr_mode, + int16_t w, int16_t h, uint32_t cpp) +{ + const bool dst_tiling_none = dst_tiling == I915_TILING_NONE; + const bool src_tiling_none = src_tiling == I915_TILING_NONE; + + if (brw->gen < 9) + return false; + + if (src_buffer->handle == dst_buffer->handle && + _mesa_regions_overlap(src_x, src_y, src_x + w, src_y + h, + dst_x, dst_y, dst_x + w, dst_y + h)) + return false; + + /* Enable fast copy blit only if the surfaces are Yf/Ys tiled. + * FIXME: Based on performance data, remove this condition later to + * enable for all types of surfaces. + */ + if (src_tr_mode == INTEL_MIPTREE_TRMODE_NONE && + dst_tr_mode == INTEL_MIPTREE_TRMODE_NONE) + return false; + + /* For all surface types buffers must be cacheline-aligned. */ + if ((dst_offset | src_offset) & 63) + return false; + + /* Color depth greater than 128 bits not supported. */ + if (cpp > 16) + return false; + + /* For Fast Copy Blits the pitch cannot be a negative number. So, bit 15 + * of the destination pitch must be zero. + */ + if ((src_pitch >> 15 & 1) != 0 || (dst_pitch >> 15 & 1) != 0) + return false; + + /* For Linear surfaces, the pitch has to be an OWord (16byte) multiple. */ + if ((src_tiling_none && src_pitch % 16 != 0) || + (dst_tiling_none && dst_pitch % 16 != 0)) + return false; + + /* For Tiled surfaces, the pitch has to be a multiple of the Tile width + * (X direction width of the Tile). This means the pitch value will + * always be Cache Line aligned (64byte multiple). + */ + if ((!dst_tiling_none && dst_pitch % 64 != 0) || + (!src_tiling_none && src_pitch % 64 != 0)) + return false; + + return true; +} + +static uint32_t +xy_blit_cmd(uint32_t src_tiling, uint32_t src_tr_mode, + uint32_t dst_tiling, uint32_t dst_tr_mode, + uint32_t cpp, bool use_fast_copy_blit) +{ + uint32_t CMD = 0; + + if (use_fast_copy_blit) { + CMD = XY_FAST_COPY_BLT_CMD; + + if (dst_tiling != I915_TILING_NONE) + SET_TILING_XY_FAST_COPY_BLT(dst_tiling, dst_tr_mode, XY_FAST_DST); + + if (src_tiling != I915_TILING_NONE) + SET_TILING_XY_FAST_COPY_BLT(src_tiling, src_tr_mode, XY_FAST_SRC); + + CMD |= get_tr_horizontal_align(src_tr_mode, cpp, true /* is_src */); + CMD |= get_tr_vertical_align(src_tr_mode, cpp, true /* is_src */); + + CMD |= get_tr_horizontal_align(dst_tr_mode, cpp, false /* is_src */); + CMD |= get_tr_vertical_align(dst_tr_mode, cpp, false /* is_src */); + + } else { + assert(cpp <= 4); + switch (cpp) { + case 1: + case 2: + CMD = XY_SRC_COPY_BLT_CMD; + break; + case 4: + CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; + break; + default: + unreachable("not reached"); + } + + if (dst_tiling != I915_TILING_NONE) + CMD |= XY_DST_TILED; + + if (src_tiling != I915_TILING_NONE) + CMD |= XY_SRC_TILED; + } + return CMD; +} + /* Copy BitBlt */ bool @@ -322,10 +511,12 @@ intelEmitCopyBlit(struct brw_context *brw, drm_intel_bo *src_buffer, GLuint src_offset, uint32_t src_tiling, + uint32_t src_tr_mode, GLshort dst_pitch, drm_intel_bo *dst_buffer, GLuint dst_offset, uint32_t dst_tiling, + uint32_t dst_tr_mode, GLshort src_x, GLshort src_y, GLshort dst_x, GLshort dst_y, GLshort w, GLshort h, @@ -337,18 +528,11 @@ intelEmitCopyBlit(struct brw_context *brw, drm_intel_bo *aper_array[3]; bool dst_y_tiled = dst_tiling == I915_TILING_Y; bool src_y_tiled = src_tiling == I915_TILING_Y; - - if (!alignment_valid(brw, dst_offset, dst_tiling)) - return false; - if (!alignment_valid(brw, src_offset, src_tiling)) - return false; + bool use_fast_copy_blit = false; if ((dst_y_tiled || src_y_tiled) && brw->gen < 6) return false; - assert(!dst_y_tiled || (dst_pitch % 128) == 0); - assert(!src_y_tiled || (src_pitch % 128) == 0); - /* do space check before going any further */ do { aper_array[0] = brw->batch.bo; @@ -373,52 +557,98 @@ intelEmitCopyBlit(struct brw_context *brw, src_buffer, src_pitch, src_offset, src_x, src_y, dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); - /* Blit pitch must be dword-aligned. Otherwise, the hardware appears to drop - * the low bits. Offsets must be naturally aligned. - */ - if (src_pitch % 4 != 0 || src_offset % cpp != 0 || - dst_pitch % 4 != 0 || dst_offset % cpp != 0) - return false; + use_fast_copy_blit = can_fast_copy_blit(brw, + src_buffer, + src_x, src_y, + src_offset, src_pitch, + src_tiling, src_tr_mode, + dst_buffer, + dst_x, dst_y, + dst_offset, dst_pitch, + dst_tiling, dst_tr_mode, + w, h, cpp); + assert(use_fast_copy_blit || + (src_tr_mode == INTEL_MIPTREE_TRMODE_NONE && + dst_tr_mode == INTEL_MIPTREE_TRMODE_NONE)); + + if (use_fast_copy_blit) { + /* When two sequential fast copy blits have different source surfaces, + * but their destinations refer to the same destination surfaces and + * therefore destinations overlap it is imperative that a flush be + * inserted between the two blits. + * + * FIXME: Figure out a way to avoid flushing when not required. + */ + brw_emit_mi_flush(brw); + + assert(cpp <= 16); + BR13 = br13_for_cpp(cpp); + + if (src_tr_mode == INTEL_MIPTREE_TRMODE_YF) + BR13 |= XY_FAST_SRC_TRMODE_YF; + + if (dst_tr_mode == INTEL_MIPTREE_TRMODE_YF) + BR13 |= XY_FAST_DST_TRMODE_YF; + + CMD = xy_blit_cmd(src_tiling, src_tr_mode, + dst_tiling, dst_tr_mode, + cpp, use_fast_copy_blit); + + /* For tiled source and destination, pitch value should be specified + * as a number of Dwords. + */ + if (dst_tiling != I915_TILING_NONE) + dst_pitch /= 4; + + if (src_tiling != I915_TILING_NONE) + src_pitch /= 4; - /* For big formats (such as floating point), do the copy using 16 or 32bpp - * and multiply the coordinates. - */ - if (cpp > 4) { - if (cpp % 4 == 2) { - dst_x *= cpp / 2; - dst_x2 *= cpp / 2; - src_x *= cpp / 2; - cpp = 2; - } else { - assert(cpp % 4 == 0); - dst_x *= cpp / 4; - dst_x2 *= cpp / 4; - src_x *= cpp / 4; - cpp = 4; + } else { + assert(!dst_y_tiled || (dst_pitch % 128) == 0); + assert(!src_y_tiled || (src_pitch % 128) == 0); + + /* For big formats (such as floating point), do the copy using 16 or + * 32bpp and multiply the coordinates. + */ + if (cpp > 4) { + if (cpp % 4 == 2) { + dst_x *= cpp / 2; + dst_x2 *= cpp / 2; + src_x *= cpp / 2; + cpp = 2; + } else { + assert(cpp % 4 == 0); + dst_x *= cpp / 4; + dst_x2 *= cpp / 4; + src_x *= cpp / 4; + cpp = 4; + } } - } - BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16; + if (!alignment_valid(brw, dst_offset, dst_tiling)) + return false; + if (!alignment_valid(brw, src_offset, src_tiling)) + return false; - switch (cpp) { - case 1: - case 2: - CMD = XY_SRC_COPY_BLT_CMD; - break; - case 4: - CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; - break; - default: - return false; - } + /* Blit pitch must be dword-aligned. Otherwise, the hardware appears to drop + * the low bits. Offsets must be naturally aligned. + */ + if (src_pitch % 4 != 0 || src_offset % cpp != 0 || + dst_pitch % 4 != 0 || dst_offset % cpp != 0) + return false; - if (dst_tiling != I915_TILING_NONE) { - CMD |= XY_DST_TILED; - dst_pitch /= 4; - } - if (src_tiling != I915_TILING_NONE) { - CMD |= XY_SRC_TILED; - src_pitch /= 4; + assert(cpp <= 4); + BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16; + + CMD = xy_blit_cmd(src_tiling, src_tr_mode, + dst_tiling, dst_tr_mode, + cpp, use_fast_copy_blit); + + if (dst_tiling != I915_TILING_NONE) + dst_pitch /= 4; + + if (src_tiling != I915_TILING_NONE) + src_pitch /= 4; } if (dst_y2 <= dst_y || dst_x2 <= dst_x) { @@ -460,7 +690,7 @@ intelEmitCopyBlit(struct brw_context *brw, ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled); - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); return true; } @@ -544,7 +774,7 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw, intel_batchbuffer_data(brw, src_bits, dwords * 4, BLT_RING); - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); return true; } @@ -576,7 +806,9 @@ intel_emit_linear_blit(struct brw_context *brw, dst_x = dst_offset % 64; ok = intelEmitCopyBlit(brw, 1, pitch, src_bo, src_offset - src_x, I915_TILING_NONE, + INTEL_MIPTREE_TRMODE_NONE, pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE, + INTEL_MIPTREE_TRMODE_NONE, src_x, 0, /* src x/y */ dst_x, 0, /* dst x/y */ pitch, height, /* w, h */ @@ -595,7 +827,9 @@ intel_emit_linear_blit(struct brw_context *brw, if (size != 0) { ok = intelEmitCopyBlit(brw, 1, pitch, src_bo, src_offset - src_x, I915_TILING_NONE, + INTEL_MIPTREE_TRMODE_NONE, pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE, + INTEL_MIPTREE_TRMODE_NONE, src_x, 0, /* src x/y */ dst_x, 0, /* dst x/y */ size, 1, /* w, h */ @@ -667,5 +901,5 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw, OUT_BATCH(0xffffffff); /* white, but only alpha gets written */ ADVANCE_BATCH_TILED(dst_y_tiled, false); - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); } diff --git a/src/mesa/drivers/dri/i965/intel_blit.h b/src/mesa/drivers/dri/i965/intel_blit.h index 2287c379c4e..c3d19a5a20e 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.h +++ b/src/mesa/drivers/dri/i965/intel_blit.h @@ -32,19 +32,21 @@ bool intelEmitCopyBlit(struct brw_context *brw, - GLuint cpp, - GLshort src_pitch, - drm_intel_bo *src_buffer, - GLuint src_offset, - uint32_t src_tiling, - GLshort dst_pitch, - drm_intel_bo *dst_buffer, - GLuint dst_offset, - uint32_t dst_tiling, - GLshort srcx, GLshort srcy, - GLshort dstx, GLshort dsty, - GLshort w, GLshort h, - GLenum logicop ); + GLuint cpp, + GLshort src_pitch, + drm_intel_bo *src_buffer, + GLuint src_offset, + uint32_t src_tiling, + uint32_t src_tr_mode, + GLshort dst_pitch, + drm_intel_bo *dst_buffer, + GLuint dst_offset, + uint32_t dst_tiling, + uint32_t dst_tr_mode, + GLshort srcx, GLshort srcy, + GLshort dstx, GLshort dsty, + GLshort w, GLshort h, + GLenum logicop); bool intel_miptree_blit_compatible_formats(mesa_format src, mesa_format dst); diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c index 627c487f0e7..ff05b5cd0e7 100644 --- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c @@ -560,7 +560,7 @@ brw_unmap_buffer(struct gl_context *ctx, * flush. Once again, we wish for a domain tracker in libdrm to cover * usage inside of a batchbuffer. */ - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); drm_intel_bo_unreference(intel_obj->range_map_bo[index]); intel_obj->range_map_bo[index] = NULL; @@ -632,7 +632,7 @@ brw_copy_buffer_subdata(struct gl_context *ctx, * flush. Once again, we wish for a domain tracker in libdrm to cover * usage inside of a batchbuffer. */ - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); } void diff --git a/src/mesa/drivers/dri/i965/intel_copy_image.c b/src/mesa/drivers/dri/i965/intel_copy_image.c index f4c7eff2904..3706704bf1a 100644 --- a/src/mesa/drivers/dri/i965/intel_copy_image.c +++ b/src/mesa/drivers/dri/i965/intel_copy_image.c @@ -126,9 +126,11 @@ copy_image_with_blitter(struct brw_context *brw, src_mt->pitch, src_mt->bo, src_mt->offset, src_mt->tiling, + src_mt->tr_mode, dst_mt->pitch, dst_mt->bo, dst_mt->offset, dst_mt->tiling, + dst_mt->tr_mode, src_x, src_y, dst_x, dst_y, src_width, src_height, diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c index 75cf7854eff..58f41bfd55d 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.c +++ b/src/mesa/drivers/dri/i965/intel_debug.c @@ -79,11 +79,13 @@ intel_debug_flag_for_shader_stage(gl_shader_stage stage) { uint64_t flags[] = { [MESA_SHADER_VERTEX] = DEBUG_VS, + [MESA_SHADER_TESS_CTRL] = 0, + [MESA_SHADER_TESS_EVAL] = 0, [MESA_SHADER_GEOMETRY] = DEBUG_GS, [MESA_SHADER_FRAGMENT] = DEBUG_WM, [MESA_SHADER_COMPUTE] = DEBUG_CS, }; - STATIC_ASSERT(MESA_SHADER_STAGES == 4); + STATIC_ASSERT(MESA_SHADER_STAGES == 6); return flags[stage]; } diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index c99677c7197..3bc28a12026 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -64,10 +64,10 @@ can_do_pipelined_register_writes(struct brw_context *brw) /* Set a value in a BO to a known quantity. The workaround BO already * exists and doesn't contain anything important, so we may as well use it. */ - drm_intel_bo_map(brw->batch.workaround_bo, true); - data = brw->batch.workaround_bo->virtual; + drm_intel_bo_map(brw->workaround_bo, true); + data = brw->workaround_bo->virtual; data[offset] = 0xffffffff; - drm_intel_bo_unmap(brw->batch.workaround_bo); + drm_intel_bo_unmap(brw->workaround_bo); /* Write the register. */ BEGIN_BATCH(3); @@ -76,13 +76,13 @@ can_do_pipelined_register_writes(struct brw_context *brw) OUT_BATCH(expected_value); ADVANCE_BATCH(); - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); /* Save the register's value back to the buffer. */ BEGIN_BATCH(3); OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2)); OUT_BATCH(reg); - OUT_RELOC(brw->batch.workaround_bo, + OUT_RELOC(brw->workaround_bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, offset * sizeof(uint32_t)); ADVANCE_BATCH(); @@ -90,10 +90,10 @@ can_do_pipelined_register_writes(struct brw_context *brw) intel_batchbuffer_flush(brw); /* Check whether the value got written. */ - drm_intel_bo_map(brw->batch.workaround_bo, false); - data = brw->batch.workaround_bo->virtual; + drm_intel_bo_map(brw->workaround_bo, false); + data = brw->workaround_bo->virtual; bool success = data[offset] == expected_value; - drm_intel_bo_unmap(brw->batch.workaround_bo); + drm_intel_bo_unmap(brw->workaround_bo); result = success; @@ -120,10 +120,10 @@ can_write_oacontrol(struct brw_context *brw) /* Set a value in a BO to a known quantity. The workaround BO already * exists and doesn't contain anything important, so we may as well use it. */ - drm_intel_bo_map(brw->batch.workaround_bo, true); - data = brw->batch.workaround_bo->virtual; + drm_intel_bo_map(brw->workaround_bo, true); + data = brw->workaround_bo->virtual; data[offset] = 0xffffffff; - drm_intel_bo_unmap(brw->batch.workaround_bo); + drm_intel_bo_unmap(brw->workaround_bo); /* Write OACONTROL. */ BEGIN_BATCH(3); @@ -132,18 +132,18 @@ can_write_oacontrol(struct brw_context *brw) OUT_BATCH(expected_value); ADVANCE_BATCH(); - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); /* Save the register's value back to the buffer. */ BEGIN_BATCH(3); OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2)); OUT_BATCH(OACONTROL); - OUT_RELOC(brw->batch.workaround_bo, + OUT_RELOC(brw->workaround_bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, offset * sizeof(uint32_t)); ADVANCE_BATCH(); - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); /* Set OACONTROL back to zero (everything off). */ BEGIN_BATCH(3); @@ -155,10 +155,10 @@ can_write_oacontrol(struct brw_context *brw) intel_batchbuffer_flush(brw); /* Check whether the value got written. */ - drm_intel_bo_map(brw->batch.workaround_bo, false); - data = brw->batch.workaround_bo->virtual; + drm_intel_bo_map(brw->workaround_bo, false); + data = brw->workaround_bo->virtual; bool success = data[offset] == expected_value; - drm_intel_bo_unmap(brw->batch.workaround_bo); + drm_intel_bo_unmap(brw->workaround_bo); result = success; @@ -284,8 +284,6 @@ intelInitExtensions(struct gl_context *ctx) } if (brw->gen >= 6) { - uint64_t dummy; - ctx->Extensions.ARB_blend_func_extended = brw->optionCache.info == NULL || !driQueryOptionb(&brw->optionCache, "disable_blend_func_extended"); @@ -311,13 +309,14 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.OES_depth_texture_cube_map = true; /* Test if the kernel has the ioctl. */ - if (brw->bufmgr && drm_intel_reg_read(brw->bufmgr, TIMESTAMP, &dummy) == 0) + if (brw->intelScreen->hw_has_timestamp) ctx->Extensions.ARB_timer_query = true; /* Only enable this in core profile because other parts of Mesa behave * slightly differently when the extension is enabled. */ if (ctx->API == API_OPENGL_CORE) { + ctx->Extensions.ARB_shader_subroutine = true; ctx->Extensions.ARB_viewport_array = true; ctx->Extensions.AMD_vertex_shader_viewport_index = true; } @@ -331,6 +330,7 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_framebuffer_no_attachments = true; ctx->Extensions.ARB_gpu_shader5 = true; ctx->Extensions.ARB_shader_atomic_counters = true; + ctx->Extensions.ARB_shader_image_load_store = true; ctx->Extensions.ARB_texture_compression_bptc = true; ctx->Extensions.ARB_texture_view = true; @@ -351,6 +351,7 @@ intelInitExtensions(struct gl_context *ctx) if (ctx->API == API_OPENGL_CORE) { ctx->Extensions.ARB_viewport_array = true; ctx->Extensions.AMD_vertex_shader_viewport_index = true; + ctx->Extensions.ARB_shader_subroutine = true; } } diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index 1b3a72f3ec2..72648b01e33 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -310,7 +310,7 @@ intel_alloc_private_renderbuffer_storage(struct gl_context * ctx, struct gl_rend intel_miptree_release(&irb->mt); DBG("%s: %s: %s (%dx%d)\n", __func__, - _mesa_lookup_enum_by_nr(internalFormat), + _mesa_enum_to_string(internalFormat), _mesa_get_format_name(rb->Format), width, height); if (width == 0 || height == 0) @@ -551,10 +551,12 @@ intel_renderbuffer_update_wrapper(struct brw_context *brw, irb->mt_layer = layer_multiplier * layer; - if (layered) { - irb->layer_count = image->TexObject->NumLayers ?: mt->level[level].depth / layer_multiplier; - } else { + if (!layered) { irb->layer_count = 1; + } else if (image->TexObject->NumLayers > 0) { + irb->layer_count = image->TexObject->NumLayers; + } else { + irb->layer_count = mt->level[level].depth / layer_multiplier; } intel_miptree_reference(&irb->mt, mt); @@ -1020,6 +1022,9 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw, struct intel_mipmap_tree *new_mt; int width, height, depth; + uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD | + MIPTREE_LAYOUT_TILING_ANY; + intel_miptree_get_dimensions_for_image(rb->TexImage, &width, &height, &depth); new_mt = intel_miptree_create(brw, rb->TexImage->TexObject->Target, @@ -1028,8 +1033,7 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw, intel_image->base.Base.Level, width, height, depth, irb->mt->num_samples, - INTEL_MIPTREE_TILING_ANY, - MIPTREE_LAYOUT_ACCELERATED_UPLOAD); + layout_flags); if (intel_miptree_wants_hiz_buffer(brw, new_mt)) { intel_miptree_alloc_hiz(brw, new_mt); @@ -1076,7 +1080,7 @@ brw_render_cache_set_check_flush(struct brw_context *brw, drm_intel_bo *bo) if (!_mesa_set_search(brw->render_cache, bo)) return; - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); } /** diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 6aa969a4930..e85c3f00c7b 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -272,7 +272,6 @@ intel_miptree_create_layout(struct brw_context *brw, GLuint height0, GLuint depth0, GLuint num_samples, - enum intel_miptree_tiling_mode requested, uint32_t layout_flags) { struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1); @@ -280,7 +279,7 @@ intel_miptree_create_layout(struct brw_context *brw, return NULL; DBG("%s target %s format %s level %d..%d slices %d <-- %p\n", __func__, - _mesa_lookup_enum_by_nr(target), + _mesa_enum_to_string(target), _mesa_get_format_name(format), first_level, last_level, depth0, mt); @@ -454,8 +453,10 @@ intel_miptree_create_layout(struct brw_context *brw, (brw->has_separate_stencil && intel_miptree_wants_hiz_buffer(brw, mt)))) { uint32_t stencil_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD; - if (brw->gen == 6) - stencil_flags |= MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD; + if (brw->gen == 6) { + stencil_flags |= MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD | + MIPTREE_LAYOUT_TILING_ANY; + } mt->stencil_mt = intel_miptree_create(brw, mt->target, @@ -466,7 +467,6 @@ intel_miptree_create_layout(struct brw_context *brw, mt->logical_height0, mt->logical_depth0, num_samples, - INTEL_MIPTREE_TILING_ANY, stencil_flags); if (!mt->stencil_mt) { @@ -510,7 +510,7 @@ intel_miptree_create_layout(struct brw_context *brw, assert((layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0); } - brw_miptree_layout(brw, mt, requested, layout_flags); + brw_miptree_layout(brw, mt, layout_flags); if (mt->disable_aux_buffers) assert(mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS); @@ -558,6 +558,53 @@ intel_lower_compressed_format(struct brw_context *brw, mesa_format format) } } +/* This function computes Yf/Ys tiled bo size, alignment and pitch. */ +static unsigned long +intel_get_yf_ys_bo_size(struct intel_mipmap_tree *mt, unsigned *alignment, + unsigned long *pitch) +{ + const uint32_t bpp = mt->cpp * 8; + const uint32_t aspect_ratio = (bpp == 16 || bpp == 64) ? 2 : 1; + uint32_t tile_width, tile_height; + unsigned long stride, size, aligned_y; + + assert(mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE); + + switch (bpp) { + case 8: + tile_height = 64; + break; + case 16: + case 32: + tile_height = 32; + break; + case 64: + case 128: + tile_height = 16; + break; + default: + unreachable("not reached"); + } + + if (mt->tr_mode == INTEL_MIPTREE_TRMODE_YS) + tile_height *= 4; + + aligned_y = ALIGN(mt->total_height, tile_height); + stride = mt->total_width * mt->cpp; + tile_width = tile_height * mt->cpp * aspect_ratio; + stride = ALIGN(stride, tile_width); + size = stride * aligned_y; + + if (mt->tr_mode == INTEL_MIPTREE_TRMODE_YF) { + assert(size % 4096 == 0); + *alignment = 4096; + } else { + assert(size % (64 * 1024) == 0); + *alignment = 64 * 1024; + } + *pitch = stride; + return size; +} struct intel_mipmap_tree * intel_miptree_create(struct brw_context *brw, @@ -569,7 +616,6 @@ intel_miptree_create(struct brw_context *brw, GLuint height0, GLuint depth0, GLuint num_samples, - enum intel_miptree_tiling_mode requested_tiling, uint32_t layout_flags) { struct intel_mipmap_tree *mt; @@ -587,7 +633,7 @@ intel_miptree_create(struct brw_context *brw, mt = intel_miptree_create_layout(brw, target, format, first_level, last_level, width0, height0, depth0, num_samples, - requested_tiling, layout_flags); + layout_flags); /* * pitch == 0 || height == 0 indicates the null texture */ @@ -616,10 +662,22 @@ intel_miptree_create(struct brw_context *brw, alloc_flags |= BO_ALLOC_FOR_RENDER; unsigned long pitch; - mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree", total_width, - total_height, mt->cpp, &mt->tiling, - &pitch, alloc_flags); mt->etc_format = etc_format; + + if (mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) { + unsigned alignment = 0; + unsigned long size; + size = intel_get_yf_ys_bo_size(mt, &alignment, &pitch); + assert(size); + mt->bo = drm_intel_bo_alloc_for_render(brw->bufmgr, "miptree", + size, alignment); + } else { + mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree", + total_width, total_height, mt->cpp, + &mt->tiling, &pitch, + alloc_flags); + } + mt->pitch = pitch; /* If the BO is too large to fit in the aperture, we need to use the @@ -698,17 +756,16 @@ intel_miptree_create_for_bo(struct brw_context *brw, target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D; - /* 'requested' parameter of intel_miptree_create_layout() is relevant - * only for non bo miptree. Tiling for bo is already computed above. - * So, the tiling requested (INTEL_MIPTREE_TILING_ANY) below is - * just a place holder and will not make any change to the miptree - * tiling format. + /* The BO already has a tiling format and we shouldn't confuse the lower + * layers by making it try to find a tiling format again. */ + assert((layout_flags & MIPTREE_LAYOUT_TILING_ANY) == 0); + assert((layout_flags & MIPTREE_LAYOUT_TILING_NONE) == 0); + layout_flags |= MIPTREE_LAYOUT_FOR_BO; mt = intel_miptree_create_layout(brw, target, format, 0, 0, width, height, depth, 0, - INTEL_MIPTREE_TILING_ANY, layout_flags); if (!mt) return NULL; @@ -816,11 +873,13 @@ intel_miptree_create_for_renderbuffer(struct brw_context *brw, uint32_t depth = 1; bool ok; GLenum target = num_samples > 1 ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; + const uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD | + MIPTREE_LAYOUT_TILING_ANY; + mt = intel_miptree_create(brw, target, format, 0, 0, width, height, depth, num_samples, - INTEL_MIPTREE_TILING_ANY, - MIPTREE_LAYOUT_ACCELERATED_UPLOAD); + layout_flags); if (!mt) goto fail; @@ -1325,6 +1384,8 @@ intel_miptree_alloc_mcs(struct brw_context *brw, * * "The MCS surface must be stored as Tile Y." */ + const uint32_t mcs_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD | + MIPTREE_LAYOUT_TILING_Y; mt->mcs_mt = intel_miptree_create(brw, mt->target, format, @@ -1334,8 +1395,7 @@ intel_miptree_alloc_mcs(struct brw_context *brw, mt->logical_height0, mt->logical_depth0, 0 /* num_samples */, - INTEL_MIPTREE_TILING_Y, - MIPTREE_LAYOUT_ACCELERATED_UPLOAD); + mcs_flags); /* From the Ivy Bridge PRM, Vol 2 Part 1 p326: * @@ -1383,9 +1443,11 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, unsigned mcs_height = ALIGN(mt->logical_height0, height_divisor) / height_divisor; assert(mt->logical_depth0 == 1); - uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD; - if (brw->gen >= 8) + uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD | + MIPTREE_LAYOUT_TILING_Y; + if (brw->gen >= 8) { layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16; + } mt->mcs_mt = intel_miptree_create(brw, mt->target, format, @@ -1395,7 +1457,6 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, mcs_height, mt->logical_depth0, 0 /* num_samples */, - INTEL_MIPTREE_TILING_Y, layout_flags); return mt->mcs_mt; @@ -1456,21 +1517,23 @@ intel_gen7_hiz_buf_create(struct brw_context *brw, /* Gen7 PRM Volume 2, Part 1, 11.5.3 "Hierarchical Depth Buffer" documents * adjustments required for Z_Height and Z_Width based on multisampling. */ - switch (mt->num_samples) { - case 0: - case 1: - break; - case 2: - case 4: - z_width *= 2; - z_height *= 2; - break; - case 8: - z_width *= 4; - z_height *= 2; - break; - default: - unreachable("unsupported sample count"); + if (brw->gen < 9) { + switch (mt->num_samples) { + case 0: + case 1: + break; + case 2: + case 4: + z_width *= 2; + z_height *= 2; + break; + case 8: + z_width *= 4; + z_height *= 2; + break; + default: + unreachable("unsupported sample count"); + } } const unsigned vertical_align = 8; /* 'j' in the docs */ @@ -1646,6 +1709,7 @@ intel_hiz_miptree_buf_create(struct brw_context *brw, if (!buf) return NULL; + layout_flags |= MIPTREE_LAYOUT_TILING_ANY; buf->mt = intel_miptree_create(brw, mt->target, mt->format, @@ -1655,7 +1719,6 @@ intel_hiz_miptree_buf_create(struct brw_context *brw, mt->logical_height0, mt->logical_depth0, mt->num_samples, - INTEL_MIPTREE_TILING_ANY, layout_flags); if (!buf->mt) { free(buf); @@ -2086,7 +2149,7 @@ intel_miptree_map_blit(struct brw_context *brw, map->mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format, 0, 0, map->w, map->h, 1, - 0, INTEL_MIPTREE_TILING_NONE, 0); + 0, MIPTREE_LAYOUT_TILING_NONE); if (!map->mt) { fprintf(stderr, "Failed to allocate blit temporary\n"); diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index bde6daa4e2d..790d3129207 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -516,12 +516,6 @@ struct intel_mipmap_tree GLuint refcount; }; -enum intel_miptree_tiling_mode { - INTEL_MIPTREE_TILING_ANY, - INTEL_MIPTREE_TILING_Y, - INTEL_MIPTREE_TILING_NONE, -}; - void intel_get_non_msrt_mcs_alignment(struct brw_context *brw, struct intel_mipmap_tree *mt, @@ -541,6 +535,11 @@ enum { MIPTREE_LAYOUT_FOR_BO = 1 << 2, MIPTREE_LAYOUT_DISABLE_AUX = 1 << 3, MIPTREE_LAYOUT_FORCE_HALIGN16 = 1 << 4, + + MIPTREE_LAYOUT_TILING_Y = 1 << 5, + MIPTREE_LAYOUT_TILING_NONE = 1 << 6, + MIPTREE_LAYOUT_TILING_ANY = MIPTREE_LAYOUT_TILING_Y | + MIPTREE_LAYOUT_TILING_NONE, }; struct intel_mipmap_tree *intel_miptree_create(struct brw_context *brw, @@ -552,7 +551,6 @@ struct intel_mipmap_tree *intel_miptree_create(struct brw_context *brw, GLuint height0, GLuint depth0, GLuint num_samples, - enum intel_miptree_tiling_mode, uint32_t flags); struct intel_mipmap_tree * @@ -771,7 +769,6 @@ brw_miptree_get_vertical_slice_pitch(const struct brw_context *brw, void brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt, - enum intel_miptree_tiling_mode requested, uint32_t layout_flags); void *intel_miptree_map_raw(struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c index 30380570d62..3fe506e3cf1 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_read.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c @@ -247,7 +247,7 @@ intelReadPixels(struct gl_context * ctx, * rendered to via a PBO at any point, so it seems better to just * flush here unconditionally. */ - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); return; } diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h index bd14e189da3..b4283da9633 100644 --- a/src/mesa/drivers/dri/i965/intel_reg.h +++ b/src/mesa/drivers/dri/i965/intel_reg.h @@ -47,6 +47,9 @@ /* Load a value from memory into a register. Only available on Gen7+. */ #define GEN7_MI_LOAD_REGISTER_MEM (CMD_MI | (0x29 << 23)) # define MI_LOAD_REGISTER_MEM_USE_GGTT (1 << 22) +/* Haswell RS control */ +#define MI_RS_CONTROL (CMD_MI | (0x6 << 23)) +#define MI_RS_STORE_DATA_IMM (CMD_MI | (0x2b << 23)) /* Manipulate the predicate bit based on some register values. Only on Gen7+ */ #define GEN7_MI_PREDICATE (CMD_MI | (0xC << 23)) @@ -102,6 +105,8 @@ #define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22)) +#define XY_FAST_COPY_BLT_CMD (CMD_2D | (0x42 << 22)) + #define XY_TEXT_IMMEDIATE_BLIT_CMD (CMD_2D | (0x31 << 22)) # define XY_TEXT_BYTE_PACKED (1 << 16) @@ -111,10 +116,24 @@ #define XY_SRC_TILED (1 << 15) #define XY_DST_TILED (1 << 11) +/* BR00 */ +#define XY_FAST_SRC_TILED_64K (3 << 20) +#define XY_FAST_SRC_TILED_Y (2 << 20) +#define XY_FAST_SRC_TILED_X (1 << 20) + +#define XY_FAST_DST_TILED_64K (3 << 13) +#define XY_FAST_DST_TILED_Y (2 << 13) +#define XY_FAST_DST_TILED_X (1 << 13) + /* BR13 */ #define BR13_8 (0x0 << 24) #define BR13_565 (0x1 << 24) #define BR13_8888 (0x3 << 24) +#define BR13_16161616 (0x4 << 24) +#define BR13_32323232 (0x5 << 24) + +#define XY_FAST_SRC_TRMODE_YF (1 << 31) +#define XY_FAST_DST_TRMODE_YF (1 << 30) /* Pipeline Statistics Counter Registers */ #define IA_VERTICES_COUNT 0x2310 diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index de14696bd76..a164c6985dc 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -229,6 +229,12 @@ static struct intel_image_format intel_image_formats[] = { { __DRI_IMAGE_FOURCC_RGB565, __DRI_IMAGE_COMPONENTS_RGB, 1, { { 0, 0, 0, __DRI_IMAGE_FORMAT_RGB565, 2 } } }, + { __DRI_IMAGE_FOURCC_R8, __DRI_IMAGE_COMPONENTS_R, 1, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, } }, + + { __DRI_IMAGE_FOURCC_GR88, __DRI_IMAGE_COMPONENTS_RG, 1, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR88, 2 }, } }, + { __DRI_IMAGE_FOURCC_YUV410, __DRI_IMAGE_COMPONENTS_Y_U_V, 3, { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, { 1, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 }, @@ -1123,6 +1129,50 @@ intel_detect_swizzling(struct intel_screen *screen) return true; } +static int +intel_detect_timestamp(struct intel_screen *screen) +{ + uint64_t dummy = 0, last = 0; + int upper, lower, loops; + + /* On 64bit systems, some old kernels trigger a hw bug resulting in the + * TIMESTAMP register being shifted and the low 32bits always zero. + * + * More recent kernels offer an interface to read the full 36bits + * everywhere. + */ + if (drm_intel_reg_read(screen->bufmgr, TIMESTAMP | 1, &dummy) == 0) + return 3; + + /* Determine if we have a 32bit or 64bit kernel by inspecting the + * upper 32bits for a rapidly changing timestamp. + */ + if (drm_intel_reg_read(screen->bufmgr, TIMESTAMP, &last)) + return 0; + + upper = lower = 0; + for (loops = 0; loops < 10; loops++) { + /* The TIMESTAMP should change every 80ns, so several round trips + * through the kernel should be enough to advance it. + */ + if (drm_intel_reg_read(screen->bufmgr, TIMESTAMP, &dummy)) + return 0; + + upper += (dummy >> 32) != (last >> 32); + if (upper > 1) /* beware 32bit counter overflow */ + return 2; /* upper dword holds the low 32bits of the timestamp */ + + lower += (dummy & 0xffffffff) != (last & 0xffffffff); + if (lower > 1) + return 1; /* timestamp is unshifted */ + + last = dummy; + } + + /* No advancement? No timestamp! */ + return 0; +} + /** * Return array of MSAA modes supported by the hardware. The array is * zero-terminated and sorted in decreasing order. @@ -1309,11 +1359,6 @@ set_max_gl_versions(struct intel_screen *screen) } } -/* drop when libdrm 2.4.61 is released */ -#ifndef I915_PARAM_REVISION -#define I915_PARAM_REVISION 32 -#endif - static int brw_get_revision(int fd) { @@ -1332,6 +1377,11 @@ brw_get_revision(int fd) return revision; } +/* Drop when RS headers get pulled to libdrm */ +#ifndef I915_PARAM_HAS_RESOURCE_STREAMER +#define I915_PARAM_HAS_RESOURCE_STREAMER 36 +#endif + /** * This is the driver specific part of the createNewScreen entry point. * Called when using DRI2. @@ -1378,6 +1428,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) intelScreen->hw_must_use_separate_stencil = intelScreen->devinfo->gen >= 7; intelScreen->hw_has_swizzling = intel_detect_swizzling(intelScreen); + intelScreen->hw_has_timestamp = intel_detect_timestamp(intelScreen); const char *force_msaa = getenv("INTEL_FORCE_MSAA"); if (force_msaa) { @@ -1423,6 +1474,15 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) intelScreen->compiler = brw_compiler_create(intelScreen, intelScreen->devinfo); + if (intelScreen->devinfo->has_resource_streamer) { + int val = -1; + getparam.param = I915_PARAM_HAS_RESOURCE_STREAMER; + getparam.value = &val; + + drmIoctl(psp->fd, DRM_IOCTL_I915_GETPARAM, &getparam); + intelScreen->has_resource_streamer = val > 0; + } + return (const __DRIconfig**) intel_screen_make_configs(psp); } diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h index 742b3d30eee..fd5143eecba 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.h +++ b/src/mesa/drivers/dri/i965/intel_screen.h @@ -52,6 +52,13 @@ struct intel_screen bool hw_has_swizzling; + int hw_has_timestamp; + + /** + * Does the kernel support resource streamer? + */ + bool has_resource_streamer; + /** * Does the kernel support context reset notifications? */ diff --git a/src/mesa/drivers/dri/i965/intel_syncobj.c b/src/mesa/drivers/dri/i965/intel_syncobj.c index 3cfa7e593ab..c44c4beceef 100644 --- a/src/mesa/drivers/dri/i965/intel_syncobj.c +++ b/src/mesa/drivers/dri/i965/intel_syncobj.c @@ -69,7 +69,7 @@ brw_fence_insert(struct brw_context *brw, struct brw_fence *fence) assert(!fence->batch_bo); assert(!fence->signalled); - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); fence->batch_bo = brw->batch.bo; drm_intel_bo_reference(fence->batch_bo); intel_batchbuffer_flush(brw); diff --git a/src/mesa/drivers/dri/i965/intel_tex.c b/src/mesa/drivers/dri/i965/intel_tex.c index b0181ad1d75..e16b0def0d4 100644 --- a/src/mesa/drivers/dri/i965/intel_tex.c +++ b/src/mesa/drivers/dri/i965/intel_tex.c @@ -145,7 +145,7 @@ intel_alloc_texture_storage(struct gl_context *ctx, 0, levels - 1, width, height, depth, num_samples, - INTEL_MIPTREE_TILING_ANY, 0); + MIPTREE_LAYOUT_TILING_ANY); if (intel_texobj->mt == NULL) { return false; diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c index ebe84b664d4..93a8cdee0cb 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_image.c +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c @@ -80,8 +80,7 @@ intel_miptree_create_for_teximage(struct brw_context *brw, height, depth, intelImage->base.Base.NumSamples, - INTEL_MIPTREE_TILING_ANY, - layout_flags); + layout_flags | MIPTREE_LAYOUT_TILING_ANY); } static void @@ -98,8 +97,8 @@ intelTexImage(struct gl_context * ctx, DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n", __func__, _mesa_get_format_name(texImage->TexFormat), - _mesa_lookup_enum_by_nr(texImage->TexObject->Target), - _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type), + _mesa_enum_to_string(texImage->TexObject->Target), + _mesa_enum_to_string(format), _mesa_enum_to_string(type), texImage->Level, texImage->Width, texImage->Height, texImage->Depth); /* Allocate storage for texture data. */ @@ -472,39 +471,44 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx, } static void -intel_get_tex_image(struct gl_context *ctx, - GLenum format, GLenum type, GLvoid *pixels, - struct gl_texture_image *texImage) { +intel_get_tex_sub_image(struct gl_context *ctx, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLint depth, + GLenum format, GLenum type, GLvoid *pixels, + struct gl_texture_image *texImage) +{ struct brw_context *brw = brw_context(ctx); bool ok; DBG("%s\n", __func__); if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) { - if (_mesa_meta_pbo_GetTexSubImage(ctx, 3, texImage, 0, 0, 0, - texImage->Width, texImage->Height, - texImage->Depth, format, type, + if (_mesa_meta_pbo_GetTexSubImage(ctx, 3, texImage, + xoffset, yoffset, zoffset, + width, height, depth, format, type, pixels, &ctx->Pack)) { /* Flush to guarantee coherency between the render cache and other * caches the PBO could potentially be bound to after this point. * See the related comment in intelReadPixels() for a more detailed * explanation. */ - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); return; } perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__); } - ok = intel_gettexsubimage_tiled_memcpy(ctx, texImage, 0, 0, - texImage->Width, texImage->Height, + ok = intel_gettexsubimage_tiled_memcpy(ctx, texImage, xoffset, yoffset, + width, height, format, type, pixels, &ctx->Pack); if(ok) return; - _mesa_meta_GetTexImage(ctx, format, type, pixels, texImage); + _mesa_meta_GetTexSubImage(ctx, xoffset, yoffset, zoffset, + width, height, depth, + format, type, pixels, texImage); DBG("%s - DONE\n", __func__); } @@ -515,5 +519,5 @@ intelInitTextureImageFuncs(struct dd_function_table *functions) functions->TexImage = intelTexImage; functions->EGLImageTargetTexture2D = intel_image_target_texture_2d; functions->BindRenderbufferTexImage = intel_bind_renderbuffer_tex_image; - functions->GetTexImage = intel_get_tex_image; + functions->GetTexSubImage = intel_get_tex_sub_image; } diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c b/src/mesa/drivers/dri/i965/intel_tex_subimage.c index 7507f7669a0..31e511f0b7b 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c +++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c @@ -206,8 +206,8 @@ intelTexSubImage(struct gl_context * ctx, DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n", __func__, _mesa_get_format_name(texImage->TexFormat), - _mesa_lookup_enum_by_nr(texImage->TexObject->Target), - _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type), + _mesa_enum_to_string(texImage->TexObject->Target), + _mesa_enum_to_string(format), _mesa_enum_to_string(type), texImage->Level, texImage->Width, texImage->Height, texImage->Depth); ok = _mesa_meta_pbo_TexSubImage(ctx, dims, texImage, diff --git a/src/mesa/drivers/dri/i965/intel_tex_validate.c b/src/mesa/drivers/dri/i965/intel_tex_validate.c index 4991c2997ef..d3fb252b5d5 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_validate.c +++ b/src/mesa/drivers/dri/i965/intel_tex_validate.c @@ -136,6 +136,8 @@ intel_finalize_mipmap_tree(struct brw_context *brw, GLuint unit) _mesa_get_format_name(firstImage->base.Base.TexFormat), width, height, depth, validate_last_level + 1); + const uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD | + MIPTREE_LAYOUT_TILING_ANY; intelObj->mt = intel_miptree_create(brw, intelObj->base.Target, firstImage->base.Base.TexFormat, @@ -145,8 +147,7 @@ intel_finalize_mipmap_tree(struct brw_context *brw, GLuint unit) height, depth, 0 /* num_samples */, - INTEL_MIPTREE_TILING_ANY, - MIPTREE_LAYOUT_ACCELERATED_UPLOAD); + layout_flags); if (!intelObj->mt) return false; } diff --git a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp index 8010fb4f610..ba67bc59e19 100644 --- a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp @@ -283,10 +283,10 @@ TEST_F(cmod_propagation_test, intervening_dest_write) fs_reg src1 = v->vgrf(glsl_type::float_type); fs_reg src2 = v->vgrf(glsl_type::vec2_type); fs_reg zero(0.0f); - bld.ADD(offset(dest, 2), src0, src1); + bld.ADD(offset(dest, bld, 2), src0, src1); bld.emit(SHADER_OPCODE_TEX, dest, src2) ->regs_written = 4; - bld.CMP(bld.null_reg_f(), offset(dest, 2), zero, BRW_CONDITIONAL_GE); + bld.CMP(bld.null_reg_f(), offset(dest, bld, 2), zero, BRW_CONDITIONAL_GE); /* = Before = * diff --git a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp index 3ef0cb319eb..1caa0b50ec6 100644 --- a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp @@ -367,10 +367,10 @@ TEST_F(saturate_propagation_test, intervening_dest_write) fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); fs_reg src2 = v->vgrf(glsl_type::vec2_type); - bld.ADD(offset(dst0, 2), src0, src1); + bld.ADD(offset(dst0, bld, 2), src0, src1); bld.emit(SHADER_OPCODE_TEX, dst0, src2) ->regs_written = 4; - set_saturate(true, bld.MOV(dst1, offset(dst0, 2))); + set_saturate(true, bld.MOV(dst1, offset(dst0, bld, 2))); /* = Before = * diff --git a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp index 84e43fa75cd..fbd9fa8f19b 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp @@ -53,7 +53,8 @@ public: } protected: - virtual dst_reg *make_reg_for_system_value(ir_variable *ir) + virtual dst_reg *make_reg_for_system_value(int location, + const glsl_type *type) { unreachable("Not reached"); } diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp index de2afd39cfe..a3055fcc851 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp @@ -56,7 +56,8 @@ public: } protected: - virtual dst_reg *make_reg_for_system_value(ir_variable *ir) + virtual dst_reg *make_reg_for_system_value(int location, + const glsl_type *type) { unreachable("Not reached"); } diff --git a/src/mesa/drivers/dri/nouveau/Makefile.am b/src/mesa/drivers/dri/nouveau/Makefile.am index 61af95a7dbc..01e34a8e3c3 100644 --- a/src/mesa/drivers/dri/nouveau/Makefile.am +++ b/src/mesa/drivers/dri/nouveau/Makefile.am @@ -38,8 +38,8 @@ AM_CFLAGS = \ -I$(top_srcdir)/src/mesa/drivers/dri/common \ $(DEFINES) \ $(VISIBILITY_CFLAGS) \ - $(NOUVEAU_CFLAGS) + $(NVVIEUX_CFLAGS) noinst_LTLIBRARIES = libnouveau_dri.la libnouveau_dri_la_SOURCES = $(NOUVEAU_C_FILES) -libnouveau_dri_la_LIBADD = $(NOUVEAU_LIBS) +libnouveau_dri_la_LIBADD = $(NVVIEUX_LIBS) diff --git a/src/mesa/drivers/dri/nouveau/nouveau_swtnl_t.c b/src/mesa/drivers/dri/nouveau/nouveau_swtnl_t.c index 0753c3a0019..755de2c4b68 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_swtnl_t.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_swtnl_t.c @@ -338,7 +338,6 @@ TAG(swtnl_init)(struct gl_context *ctx) NUM_VERTEX_ATTRS * 4 * sizeof(GLfloat)); _tnl_need_projected_coords(ctx, GL_FALSE); _tnl_allow_vertex_fog(ctx, GL_FALSE); - _tnl_wakeup(ctx); swtnl_alloc_vertices(ctx); } diff --git a/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c b/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c index c85acec1268..a3fbad07e66 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c @@ -223,6 +223,7 @@ TAG(vbo_render_prims)(struct gl_context *ctx, GLboolean index_bounds_valid, GLuint min_index, GLuint max_index, struct gl_transform_feedback_object *tfb_vertcount, + unsigned stream, struct gl_buffer_object *indirect); static GLboolean @@ -455,6 +456,7 @@ TAG(vbo_render_prims)(struct gl_context *ctx, GLboolean index_bounds_valid, GLuint min_index, GLuint max_index, struct gl_transform_feedback_object *tfb_vertcount, + unsigned stream, struct gl_buffer_object *indirect) { struct nouveau_render_state *render = to_render_state(ctx); @@ -492,6 +494,7 @@ TAG(vbo_check_render_prims)(struct gl_context *ctx, GLboolean index_bounds_valid, GLuint min_index, GLuint max_index, struct gl_transform_feedback_object *tfb_vertcount, + unsigned stream, struct gl_buffer_object *indirect) { struct nouveau_context *nctx = to_nouveau_context(ctx); @@ -501,12 +504,12 @@ TAG(vbo_check_render_prims)(struct gl_context *ctx, if (nctx->fallback == HWTNL) TAG(vbo_render_prims)(ctx, prims, nr_prims, ib, index_bounds_valid, min_index, max_index, - tfb_vertcount, indirect); + tfb_vertcount, stream, indirect); if (nctx->fallback == SWTNL) _tnl_draw_prims(ctx, prims, nr_prims, ib, index_bounds_valid, min_index, max_index, - tfb_vertcount, indirect); + tfb_vertcount, stream, indirect); } void diff --git a/src/mesa/drivers/dri/nouveau/nv04_render.c b/src/mesa/drivers/dri/nouveau/nv04_render.c index 30e9f9aad96..3b7f7829044 100644 --- a/src/mesa/drivers/dri/nouveau/nv04_render.c +++ b/src/mesa/drivers/dri/nouveau/nv04_render.c @@ -285,7 +285,6 @@ nv04_render_init(struct gl_context *ctx) _tnl_init_vertices(ctx, tnl->vb.Size, NUM_VERTEX_ATTRS * 4 * sizeof(GLfloat)); _tnl_allow_pixel_fog(ctx, GL_FALSE); - _tnl_wakeup(ctx); } void diff --git a/src/mesa/drivers/dri/r200/r200_blit.c b/src/mesa/drivers/dri/r200/r200_blit.c index 3adc69423cd..d68a53e67f7 100644 --- a/src/mesa/drivers/dri/r200/r200_blit.c +++ b/src/mesa/drivers/dri/r200/r200_blit.c @@ -28,6 +28,7 @@ #include "radeon_common.h" #include "r200_context.h" #include "r200_blit.h" +#include "r200_tex.h" static inline uint32_t cmdpacket0(struct radeon_screen *rscrn, int reg, int count) @@ -40,22 +41,42 @@ static inline uint32_t cmdpacket0(struct radeon_screen *rscrn, /* common formats supported as both textures and render targets */ unsigned r200_check_blit(mesa_format mesa_format, uint32_t dst_pitch) { - /* XXX others? BE/LE? */ - switch (mesa_format) { - case MESA_FORMAT_B8G8R8A8_UNORM: - case MESA_FORMAT_B8G8R8X8_UNORM: - case MESA_FORMAT_B5G6R5_UNORM: - case MESA_FORMAT_B4G4R4A4_UNORM: - case MESA_FORMAT_B5G5R5A1_UNORM: - case MESA_FORMAT_A_UNORM8: - case MESA_FORMAT_L_UNORM8: - case MESA_FORMAT_I_UNORM8: - /* swizzled */ - case MESA_FORMAT_A8B8G8R8_UNORM: - case MESA_FORMAT_R8G8B8A8_UNORM: + /* XXX others? */ + if (_mesa_little_endian()) { + switch (mesa_format) { + case MESA_FORMAT_B8G8R8A8_UNORM: + case MESA_FORMAT_B8G8R8X8_UNORM: + case MESA_FORMAT_B5G6R5_UNORM: + case MESA_FORMAT_B4G4R4A4_UNORM: + case MESA_FORMAT_B5G5R5A1_UNORM: + case MESA_FORMAT_A_UNORM8: + case MESA_FORMAT_L_UNORM8: + case MESA_FORMAT_I_UNORM8: + /* swizzled - probably can't happen with the disabled Choose8888TexFormat code */ + case MESA_FORMAT_A8B8G8R8_UNORM: + case MESA_FORMAT_R8G8B8A8_UNORM: break; - default: + default: return 0; + } + } + else { + switch (mesa_format) { + case MESA_FORMAT_A8R8G8B8_UNORM: + case MESA_FORMAT_X8R8G8B8_UNORM: + case MESA_FORMAT_R5G6B5_UNORM: + case MESA_FORMAT_A4R4G4B4_UNORM: + case MESA_FORMAT_A1R5G5B5_UNORM: + case MESA_FORMAT_A_UNORM8: + case MESA_FORMAT_L_UNORM8: + case MESA_FORMAT_I_UNORM8: + /* swizzled - probably can't happen with the disabled Choose8888TexFormat code */ + case MESA_FORMAT_R8G8B8A8_UNORM: + case MESA_FORMAT_A8B8G8R8_UNORM: + break; + default: + return 0; + } } /* Rendering to small buffer doesn't work. @@ -112,41 +133,11 @@ static void inline emit_tx_setup(struct r200_context *r200, assert(height <= 2048); assert(offset % 32 == 0); - /* XXX others? BE/LE? */ - switch (src_mesa_format) { - case MESA_FORMAT_B8G8R8A8_UNORM: - txformat |= R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP; - break; - case MESA_FORMAT_A8B8G8R8_UNORM: - txformat |= R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP; - break; - case MESA_FORMAT_R8G8B8A8_UNORM: - txformat |= R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP; - break; - case MESA_FORMAT_B8G8R8X8_UNORM: - txformat |= R200_TXFORMAT_ARGB8888; - break; - case MESA_FORMAT_B5G6R5_UNORM: - txformat |= R200_TXFORMAT_RGB565; - break; - case MESA_FORMAT_B4G4R4A4_UNORM: - txformat |= R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP; - break; - case MESA_FORMAT_B5G5R5A1_UNORM: - txformat |= R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP; - break; - case MESA_FORMAT_A_UNORM8: - case MESA_FORMAT_I_UNORM8: - txformat |= R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP; - break; - case MESA_FORMAT_L_UNORM8: - txformat |= R200_TXFORMAT_I8; - break; - case MESA_FORMAT_L8A8_UNORM: - txformat |= R200_TXFORMAT_AI88 | R200_TXFORMAT_ALPHA_IN_MAP; - break; - default: - break; + if (_mesa_little_endian()) { + txformat |= tx_table_le[src_mesa_format].format; + } + else { + txformat |= tx_table_be[src_mesa_format].format; } if (bo->flags & RADEON_BO_FLAGS_MACRO_TILE) @@ -155,11 +146,19 @@ static void inline emit_tx_setup(struct r200_context *r200, offset |= R200_TXO_MICRO_TILE; switch (dst_mesa_format) { + /* le */ case MESA_FORMAT_B8G8R8A8_UNORM: case MESA_FORMAT_B8G8R8X8_UNORM: case MESA_FORMAT_B5G6R5_UNORM: case MESA_FORMAT_B4G4R4A4_UNORM: case MESA_FORMAT_B5G5R5A1_UNORM: + /* be */ + case MESA_FORMAT_A8R8G8B8_UNORM: + case MESA_FORMAT_X8R8G8B8_UNORM: + case MESA_FORMAT_R5G6B5_UNORM: + case MESA_FORMAT_A4R4G4B4_UNORM: + case MESA_FORMAT_A1R5G5B5_UNORM: + /* little and big */ case MESA_FORMAT_A_UNORM8: case MESA_FORMAT_L_UNORM8: case MESA_FORMAT_I_UNORM8: @@ -183,6 +182,9 @@ static void inline emit_tx_setup(struct r200_context *r200, END_BATCH(); break; case MESA_FORMAT_A8B8G8R8_UNORM: + case MESA_FORMAT_R8G8B8A8_UNORM: + if ((dst_mesa_format == MESA_FORMAT_A8B8G8R8_UNORM && _mesa_little_endian()) || + (dst_mesa_format == MESA_FORMAT_R8G8B8A8_UNORM && !_mesa_little_endian())) { BEGIN_BATCH(10); OUT_BATCH_REGVAL(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE)); @@ -190,6 +192,8 @@ static void inline emit_tx_setup(struct r200_context *r200, R200_TXC_ARG_B_ZERO | R200_TXC_ARG_C_R0_COLOR | R200_TXC_OP_MADD)); + /* XXX I don't think this can work. This is output rotation, and alpha contains + * red, not alpha (we'd write gbrr). */ OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_0, (R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_ROTATE_GBA | R200_TXC_OUTPUT_REG_R0)); @@ -201,8 +205,16 @@ static void inline emit_tx_setup(struct r200_context *r200, (R200_TXA_REPL_RED << R200_TXA_REPL_ARG_C_SHIFT) | R200_TXA_OUTPUT_REG_R0)); END_BATCH(); - break; - case MESA_FORMAT_R8G8B8A8_UNORM: + } + else { + /* XXX pretty sure could do this with just 2 instead of 4 instructions. + * Like so: + * 1st: use RGA output rotation, rgb arg replicate b, a arg r, write mask rb. + * That's just one instruction in fact but I'm not entirely sure it works + * if some of those incoming r0 components are never written (due to mask) + * in the shader itself to r0. + * In any case this case (and the one above) may not be reachable with + * disabled Choose8888TexFormat code. */ BEGIN_BATCH(34); OUT_BATCH_REGVAL(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE | @@ -272,7 +284,8 @@ static void inline emit_tx_setup(struct r200_context *r200, OUT_BATCH_REGVAL(R200_PP_TXABLEND2_3, (R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0)); END_BATCH(); - break; + } + break; } BEGIN_BATCH(18); @@ -306,21 +319,27 @@ static inline void emit_cb_setup(struct r200_context *r200, uint32_t dst_format = 0; BATCH_LOCALS(&r200->radeon); - /* XXX others? BE/LE? */ switch (mesa_format) { + /* The first of each pair is for little, the second for big endian */ case MESA_FORMAT_B8G8R8A8_UNORM: + case MESA_FORMAT_A8R8G8B8_UNORM: case MESA_FORMAT_B8G8R8X8_UNORM: + case MESA_FORMAT_X8R8G8B8_UNORM: + /* These two are valid both for little and big endian (swizzled) */ case MESA_FORMAT_A8B8G8R8_UNORM: case MESA_FORMAT_R8G8B8A8_UNORM: dst_format = RADEON_COLOR_FORMAT_ARGB8888; break; case MESA_FORMAT_B5G6R5_UNORM: + case MESA_FORMAT_R5G6B5_UNORM: dst_format = RADEON_COLOR_FORMAT_RGB565; break; case MESA_FORMAT_B4G4R4A4_UNORM: + case MESA_FORMAT_A4R4G4B4_UNORM: dst_format = RADEON_COLOR_FORMAT_ARGB4444; break; case MESA_FORMAT_B5G5R5A1_UNORM: + case MESA_FORMAT_A1R5G5B5_UNORM: dst_format = RADEON_COLOR_FORMAT_ARGB1555; break; case MESA_FORMAT_A_UNORM8: @@ -547,5 +566,21 @@ unsigned r200_blit(struct gl_context *ctx, radeonFlush(ctx); + /* We submitted those packets outside our state atom mechanism. Thus + * make sure the atoms are resubmitted the next time. */ + r200->hw.cst.dirty = GL_TRUE; + r200->hw.ctx.dirty = GL_TRUE; + r200->hw.vap.dirty = GL_TRUE; + r200->hw.msk.dirty = GL_TRUE; + r200->hw.pix[0].dirty = GL_TRUE; + r200->hw.pix[1].dirty = GL_TRUE; + r200->hw.pix[2].dirty = GL_TRUE; + r200->hw.pix[3].dirty = GL_TRUE; + r200->hw.sci.dirty = GL_TRUE; + r200->hw.set.dirty = GL_TRUE; + r200->hw.tex[0].dirty = GL_TRUE; + r200->hw.vte.dirty = GL_TRUE; + r200->hw.vtx.dirty = GL_TRUE; + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c index fb15082114f..2a42ab3f4c8 100644 --- a/src/mesa/drivers/dri/r200/r200_context.c +++ b/src/mesa/drivers/dri/r200/r200_context.c @@ -225,18 +225,9 @@ GLboolean r200CreateContext( gl_api api, rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache, "def_max_anisotropy"); - if ( sPriv->drm_version.major == 1 - && driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) { - if ( sPriv->drm_version.minor < 13 ) - fprintf( stderr, "DRM version 1.%d too old to support HyperZ, " - "disabling.\n", sPriv->drm_version.minor ); - else - rmesa->using_hyperz = GL_TRUE; - } + if (driQueryOptionb( &rmesa->radeon.optionCache, "hyperz")) + rmesa->using_hyperz = GL_TRUE; - if ( sPriv->drm_version.minor >= 15 ) - rmesa->texmicrotile = GL_TRUE; - /* Init default driver functions then plug in our R200-specific functions * (the texture functions are especially important) */ diff --git a/src/mesa/drivers/dri/r200/r200_context.h b/src/mesa/drivers/dri/r200/r200_context.h index eb498f7406b..c02a4f399ee 100644 --- a/src/mesa/drivers/dri/r200/r200_context.h +++ b/src/mesa/drivers/dri/r200/r200_context.h @@ -109,7 +109,6 @@ struct r200_texture_state { #define CTX_RB3D_COLOROFFSET 11 #define CTX_CMD_2 12 /* why */ #define CTX_RB3D_COLORPITCH 13 /* why */ -#define CTX_STATE_SIZE_OLDDRM 14 #define CTX_CMD_3 14 #define CTX_RB3D_BLENDCOLOR 15 #define CTX_RB3D_ABLENDCNTL 16 @@ -167,9 +166,6 @@ struct r200_texture_state { #define TEX_PP_TXSIZE 4 /*2c0c*/ #define TEX_PP_TXPITCH 5 /*2c10*/ #define TEX_PP_BORDER_COLOR 6 /*2c14*/ -#define TEX_CMD_1_OLDDRM 7 -#define TEX_PP_TXOFFSET_OLDDRM 8 /*2d00 */ -#define TEX_STATE_SIZE_OLDDRM 9 #define TEX_PP_CUBIC_FACES 7 #define TEX_PP_TXMULTI_CTL 8 #define TEX_CMD_1_NEWDRM 9 @@ -618,7 +614,6 @@ struct r200_context { struct r200_swtcl_info swtcl; GLboolean using_hyperz; - GLboolean texmicrotile; struct ati_fragment_shader *afs_loaded; }; diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c index 6fe70b5c9d0..cca176d7f9b 100644 --- a/src/mesa/drivers/dri/r200/r200_state.c +++ b/src/mesa/drivers/dri/r200/r200_state.c @@ -1546,7 +1546,7 @@ void r200UpdateWindow( struct gl_context *ctx ) GLfloat xoffset = 0; GLfloat yoffset = dPriv ? (GLfloat) dPriv->h : 0; const GLboolean render_to_fbo = (ctx->DrawBuffer ? _mesa_is_user_fbo(ctx->DrawBuffer) : 0); - double scale[3], translate[3]; + float scale[3], translate[3]; GLfloat y_scale, y_bias; if (render_to_fbo) { @@ -1669,7 +1669,7 @@ static void r200Enable( struct gl_context *ctx, GLenum cap, GLboolean state ) if ( R200_DEBUG & RADEON_STATE ) fprintf( stderr, "%s( %s = %s )\n", __func__, - _mesa_lookup_enum_by_nr( cap ), + _mesa_enum_to_string( cap ), state ? "GL_TRUE" : "GL_FALSE" ); switch ( cap ) { diff --git a/src/mesa/drivers/dri/r200/r200_state_init.c b/src/mesa/drivers/dri/r200/r200_state_init.c index d9d1a0ed227..ad64f788b9f 100644 --- a/src/mesa/drivers/dri/r200/r200_state_init.c +++ b/src/mesa/drivers/dri/r200/r200_state_init.c @@ -254,7 +254,7 @@ CHECK( never, GL_FALSE, 0 ) CHECK( tex_any, ctx->Texture._MaxEnabledTexImageUnit != -1, 0 ) CHECK( tf, (ctx->Texture._MaxEnabledTexImageUnit != -1 && !ctx->ATIFragmentShader._Enabled), 0 ); CHECK( pix_zero, !ctx->ATIFragmentShader._Enabled, 0 ) - CHECK( texenv, (rmesa->state.envneeded & (1 << (atom->idx)) && !ctx->ATIFragmentShader._Enabled), 0 ) +CHECK( texenv, (rmesa->state.envneeded & (1 << (atom->idx)) && !ctx->ATIFragmentShader._Enabled), 0 ) CHECK( afs_pass1, (ctx->ATIFragmentShader._Enabled && (ctx->ATIFragmentShader.Current->NumPasses > 1)), 0 ) CHECK( afs, ctx->ATIFragmentShader._Enabled, 0 ) CHECK( tex_cube, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT, 3 + 3*5 - CUBE_STATE_SIZE ) @@ -453,12 +453,15 @@ static void ctx_emit_cs(struct gl_context *ctx, struct radeon_state_atom *atom) atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888; else switch (rrb->base.Base.Format) { case MESA_FORMAT_B5G6R5_UNORM: + case MESA_FORMAT_R5G6B5_UNORM: atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565; break; case MESA_FORMAT_B4G4R4A4_UNORM: + case MESA_FORMAT_A4R4G4B4_UNORM: atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB4444; break; case MESA_FORMAT_B5G5R5A1_UNORM: + case MESA_FORMAT_A1R5G5B5_UNORM: atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB1555; break; default: diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c index 083a1840d9e..feee0b2ba3f 100644 --- a/src/mesa/drivers/dri/r200/r200_tex.c +++ b/src/mesa/drivers/dri/r200/r200_tex.c @@ -68,9 +68,9 @@ static void r200SetTexWrap( radeonTexObjPtr t, GLenum swrap, GLenum twrap, GLenu radeon_print(RADEON_TEXTURE, RADEON_TRACE, "%s(tex %p) sw %s, tw %s, rw %s\n", __func__, t, - _mesa_lookup_enum_by_nr(swrap), - _mesa_lookup_enum_by_nr(twrap), - _mesa_lookup_enum_by_nr(rwrap)); + _mesa_enum_to_string(swrap), + _mesa_enum_to_string(twrap), + _mesa_enum_to_string(rwrap)); t->pp_txfilter &= ~(R200_CLAMP_S_MASK | R200_CLAMP_T_MASK | R200_BORDER_MODE_D3D); @@ -225,8 +225,8 @@ static void r200SetTexFilter( radeonTexObjPtr t, GLenum minf, GLenum magf ) radeon_print(RADEON_TEXTURE, RADEON_TRACE, "%s(tex %p) minf %s, maxf %s, anisotropy %d.\n", __func__, t, - _mesa_lookup_enum_by_nr(minf), - _mesa_lookup_enum_by_nr(magf), + _mesa_enum_to_string(minf), + _mesa_enum_to_string(magf), anisotropy); if ( anisotropy == R200_MAX_ANISO_1_TO_1 ) { @@ -302,7 +302,7 @@ static void r200TexEnv( struct gl_context *ctx, GLenum target, struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; radeon_print(RADEON_TEXTURE | RADEON_STATE, RADEON_VERBOSE, "%s( %s )\n", - __func__, _mesa_lookup_enum_by_nr( pname ) ); + __func__, _mesa_enum_to_string( pname ) ); /* This is incorrect: Need to maintain this data for each of * GL_TEXTURE_{123}D, GL_TEXTURE_RECTANGLE_NV, etc, and switch @@ -384,7 +384,7 @@ static void r200TexParameter( struct gl_context *ctx, radeon_print(RADEON_TEXTURE | RADEON_STATE, RADEON_VERBOSE, "%s(%p, tex %p) pname %s\n", __func__, ctx, texObj, - _mesa_lookup_enum_by_nr( pname ) ); + _mesa_enum_to_string( pname ) ); switch ( pname ) { case GL_TEXTURE_MIN_FILTER: @@ -415,7 +415,7 @@ static void r200DeleteTexture(struct gl_context * ctx, struct gl_texture_object radeon_print(RADEON_TEXTURE | RADEON_STATE, RADEON_NORMAL, "%s( %p (target = %s) )\n", __func__, (void *)texObj, - _mesa_lookup_enum_by_nr(texObj->Target)); + _mesa_enum_to_string(texObj->Target)); if (rmesa) { int i; @@ -473,7 +473,7 @@ static struct gl_texture_object *r200NewTextureObject(struct gl_context * ctx, radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_NORMAL, "%s(%p) target %s, new texture %p.\n", __func__, ctx, - _mesa_lookup_enum_by_nr(target), t); + _mesa_enum_to_string(target), t); _mesa_initialize_texture_object(ctx, &t->base, name, target); t->base.Sampler.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy; diff --git a/src/mesa/drivers/dri/r200/r200_tex.h b/src/mesa/drivers/dri/r200/r200_tex.h index d7e91d1a0c8..a8c31b741ed 100644 --- a/src/mesa/drivers/dri/r200/r200_tex.h +++ b/src/mesa/drivers/dri/r200/r200_tex.h @@ -52,4 +52,68 @@ extern void r200TexUpdateParameters(struct gl_context *ctx, GLuint unit); extern void set_re_cntl_d3d( struct gl_context *ctx, int unit, GLboolean use_d3d ); +struct tx_table { + GLuint format, filter; +}; + +/* Note the tables (have to) contain invalid entries (if they are only valid + * for either be/le) */ +static const struct tx_table tx_table_be[] = +{ + [ MESA_FORMAT_A8B8G8R8_UNORM ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_R8G8B8A8_UNORM ] = { R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_B8G8R8A8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_A8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_BGR_UNORM8 ] = { 0xffffffff, 0 }, + [ MESA_FORMAT_B5G6R5_UNORM ] = { R200_TXFORMAT_RGB565, 0 }, + [ MESA_FORMAT_R5G6B5_UNORM ] = { R200_TXFORMAT_RGB565, 0 }, + [ MESA_FORMAT_B4G4R4A4_UNORM ] = { R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_A4R4G4B4_UNORM ] = { R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_B5G5R5A1_UNORM ] = { R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_A1R5G5B5_UNORM ] = { R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_L8A8_UNORM ] = { R200_TXFORMAT_AI88 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_A8L8_UNORM ] = { R200_TXFORMAT_AI88 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_A_UNORM8 ] = { R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_L_UNORM8 ] = { R200_TXFORMAT_I8, 0 }, + [ MESA_FORMAT_I_UNORM8 ] = { R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_YCBCR ] = { R200_TXFORMAT_YVYU422, R200_YUV_TO_RGB }, + [ MESA_FORMAT_YCBCR_REV ] = { R200_TXFORMAT_VYUY422, R200_YUV_TO_RGB }, + [ MESA_FORMAT_RGB_FXT1 ] = { 0xffffffff, 0 }, + [ MESA_FORMAT_RGBA_FXT1 ] = { 0xffffffff, 0 }, + [ MESA_FORMAT_RGB_DXT1 ] = { R200_TXFORMAT_DXT1, 0 }, + [ MESA_FORMAT_RGBA_DXT1 ] = { R200_TXFORMAT_DXT1 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_RGBA_DXT3 ] = { R200_TXFORMAT_DXT23 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_RGBA_DXT5 ] = { R200_TXFORMAT_DXT45 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, +}; + +static const struct tx_table tx_table_le[] = +{ + [ MESA_FORMAT_A8B8G8R8_UNORM ] = { R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_R8G8B8A8_UNORM ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_B8G8R8A8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_A8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_BGR_UNORM8 ] = { R200_TXFORMAT_ARGB8888, 0 }, + [ MESA_FORMAT_B5G6R5_UNORM ] = { R200_TXFORMAT_RGB565, 0 }, + [ MESA_FORMAT_R5G6B5_UNORM ] = { R200_TXFORMAT_RGB565, 0 }, + [ MESA_FORMAT_B4G4R4A4_UNORM ] = { R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_A4R4G4B4_UNORM ] = { R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_B5G5R5A1_UNORM ] = { R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_A1R5G5B5_UNORM ] = { R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_L8A8_UNORM ] = { R200_TXFORMAT_AI88 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_A8L8_UNORM ] = { R200_TXFORMAT_AI88 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_A_UNORM8 ] = { R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_L_UNORM8 ] = { R200_TXFORMAT_I8, 0 }, + [ MESA_FORMAT_I_UNORM8 ] = { R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_YCBCR ] = { R200_TXFORMAT_YVYU422, R200_YUV_TO_RGB }, + [ MESA_FORMAT_YCBCR_REV ] = { R200_TXFORMAT_VYUY422, R200_YUV_TO_RGB }, + [ MESA_FORMAT_RGB_FXT1 ] = { 0xffffffff, 0 }, + [ MESA_FORMAT_RGBA_FXT1 ] = { 0xffffffff, 0 }, + [ MESA_FORMAT_RGB_DXT1 ] = { R200_TXFORMAT_DXT1, 0 }, + [ MESA_FORMAT_RGBA_DXT1 ] = { R200_TXFORMAT_DXT1 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_RGBA_DXT3 ] = { R200_TXFORMAT_DXT23 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_RGBA_DXT5 ] = { R200_TXFORMAT_DXT45 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, +}; + + + #endif /* __R200_TEX_H__ */ diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c index ab84d1752ba..441ac730d4c 100644 --- a/src/mesa/drivers/dri/r200/r200_texstate.c +++ b/src/mesa/drivers/dri/r200/r200_texstate.c @@ -49,80 +49,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r200_tex.h" #include "r200_tcl.h" - -#define R200_TXFORMAT_A8 R200_TXFORMAT_I8 -#define R200_TXFORMAT_L8 R200_TXFORMAT_I8 -#define R200_TXFORMAT_AL88 R200_TXFORMAT_AI88 -#define R200_TXFORMAT_YCBCR R200_TXFORMAT_YVYU422 -#define R200_TXFORMAT_YCBCR_REV R200_TXFORMAT_VYUY422 -#define R200_TXFORMAT_RGB_DXT1 R200_TXFORMAT_DXT1 -#define R200_TXFORMAT_RGBA_DXT1 R200_TXFORMAT_DXT1 -#define R200_TXFORMAT_RGBA_DXT3 R200_TXFORMAT_DXT23 -#define R200_TXFORMAT_RGBA_DXT5 R200_TXFORMAT_DXT45 - #define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \ && (tx_table_be[f].format != 0xffffffff) ) -struct tx_table { - GLuint format, filter; -}; - -static const struct tx_table tx_table_be[] = -{ - [ MESA_FORMAT_A8B8G8R8_UNORM ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_R8G8B8A8_UNORM ] = { R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_B8G8R8A8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_A8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_BGR_UNORM8 ] = { 0xffffffff, 0 }, - [ MESA_FORMAT_B5G6R5_UNORM ] = { R200_TXFORMAT_RGB565, 0 }, - [ MESA_FORMAT_R5G6B5_UNORM ] = { R200_TXFORMAT_RGB565, 0 }, - [ MESA_FORMAT_B4G4R4A4_UNORM ] = { R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_A4R4G4B4_UNORM ] = { R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_B5G5R5A1_UNORM ] = { R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_A1R5G5B5_UNORM ] = { R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_L8A8_UNORM ] = { R200_TXFORMAT_AL88 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_A8L8_UNORM ] = { R200_TXFORMAT_AL88 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_A_UNORM8 ] = { R200_TXFORMAT_A8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_L_UNORM8 ] = { R200_TXFORMAT_L8, 0 }, - [ MESA_FORMAT_I_UNORM8 ] = { R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_YCBCR ] = { R200_TXFORMAT_YCBCR, R200_YUV_TO_RGB }, - [ MESA_FORMAT_YCBCR_REV ] = { R200_TXFORMAT_YCBCR_REV, R200_YUV_TO_RGB }, - [ MESA_FORMAT_RGB_FXT1 ] = { 0xffffffff, 0 }, - [ MESA_FORMAT_RGBA_FXT1 ] = { 0xffffffff, 0 }, - [ MESA_FORMAT_RGB_DXT1 ] = { R200_TXFORMAT_RGB_DXT1, 0 }, - [ MESA_FORMAT_RGBA_DXT1 ] = { R200_TXFORMAT_RGBA_DXT1 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_RGBA_DXT3 ] = { R200_TXFORMAT_RGBA_DXT3 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_RGBA_DXT5 ] = { R200_TXFORMAT_RGBA_DXT5 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, -}; - -static const struct tx_table tx_table_le[] = -{ - [ MESA_FORMAT_A8B8G8R8_UNORM ] = { R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_R8G8B8A8_UNORM ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_B8G8R8A8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_A8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_BGR_UNORM8 ] = { R200_TXFORMAT_ARGB8888, 0 }, - [ MESA_FORMAT_B5G6R5_UNORM ] = { R200_TXFORMAT_RGB565, 0 }, - [ MESA_FORMAT_R5G6B5_UNORM ] = { R200_TXFORMAT_RGB565, 0 }, - [ MESA_FORMAT_B4G4R4A4_UNORM ] = { R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_A4R4G4B4_UNORM ] = { R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_B5G5R5A1_UNORM ] = { R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_A1R5G5B5_UNORM ] = { R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_L8A8_UNORM ] = { R200_TXFORMAT_AL88 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_A8L8_UNORM ] = { R200_TXFORMAT_AL88 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_A_UNORM8 ] = { R200_TXFORMAT_A8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_L_UNORM8 ] = { R200_TXFORMAT_L8, 0 }, - [ MESA_FORMAT_I_UNORM8 ] = { R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_YCBCR ] = { R200_TXFORMAT_YCBCR, R200_YUV_TO_RGB }, - [ MESA_FORMAT_YCBCR_REV ] = { R200_TXFORMAT_YCBCR_REV, R200_YUV_TO_RGB }, - [ MESA_FORMAT_RGB_FXT1 ] = { 0xffffffff, 0 }, - [ MESA_FORMAT_RGBA_FXT1 ] = { 0xffffffff, 0 }, - [ MESA_FORMAT_RGB_DXT1 ] = { R200_TXFORMAT_RGB_DXT1, 0 }, - [ MESA_FORMAT_RGBA_DXT1 ] = { R200_TXFORMAT_RGBA_DXT1 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_RGBA_DXT3 ] = { R200_TXFORMAT_RGBA_DXT3 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_RGBA_DXT5 ] = { R200_TXFORMAT_RGBA_DXT5 | R200_TXFORMAT_ALPHA_IN_MAP, 0 }, -}; - /* ================================================================ * Texture combine functions */ diff --git a/src/mesa/drivers/dri/radeon/radeon_blit.c b/src/mesa/drivers/dri/radeon/radeon_blit.c index 0de17514e05..0b0f06f0edb 100644 --- a/src/mesa/drivers/dri/radeon/radeon_blit.c +++ b/src/mesa/drivers/dri/radeon/radeon_blit.c @@ -28,6 +28,7 @@ #include "radeon_common.h" #include "radeon_context.h" #include "radeon_blit.h" +#include "radeon_tex.h" static inline uint32_t cmdpacket0(struct radeon_screen *rscrn, int reg, int count) @@ -40,19 +41,36 @@ static inline uint32_t cmdpacket0(struct radeon_screen *rscrn, /* common formats supported as both textures and render targets */ unsigned r100_check_blit(mesa_format mesa_format, uint32_t dst_pitch) { - /* XXX others? BE/LE? */ - switch (mesa_format) { - case MESA_FORMAT_B8G8R8A8_UNORM: - case MESA_FORMAT_B8G8R8X8_UNORM: - case MESA_FORMAT_B5G6R5_UNORM: - case MESA_FORMAT_B4G4R4A4_UNORM: - case MESA_FORMAT_B5G5R5A1_UNORM: - case MESA_FORMAT_A_UNORM8: - case MESA_FORMAT_L_UNORM8: - case MESA_FORMAT_I_UNORM8: + /* XXX others? */ + if (_mesa_little_endian()) { + switch (mesa_format) { + case MESA_FORMAT_B8G8R8A8_UNORM: + case MESA_FORMAT_B8G8R8X8_UNORM: + case MESA_FORMAT_B5G6R5_UNORM: + case MESA_FORMAT_B4G4R4A4_UNORM: + case MESA_FORMAT_B5G5R5A1_UNORM: + case MESA_FORMAT_A_UNORM8: + case MESA_FORMAT_L_UNORM8: + case MESA_FORMAT_I_UNORM8: break; - default: + default: + return 0; + } + } + else { + switch (mesa_format) { + case MESA_FORMAT_A8R8G8B8_UNORM: + case MESA_FORMAT_X8R8G8B8_UNORM: + case MESA_FORMAT_R5G6B5_UNORM: + case MESA_FORMAT_A4R4G4B4_UNORM: + case MESA_FORMAT_A1R5G5B5_UNORM: + case MESA_FORMAT_A_UNORM8: + case MESA_FORMAT_L_UNORM8: + case MESA_FORMAT_I_UNORM8: + break; + default: return 0; + } } /* Rendering to small buffer doesn't work. @@ -106,40 +124,8 @@ static void inline emit_tx_setup(struct r100_context *r100, assert(height <= 2048); assert(offset % 32 == 0); - /* XXX others? BE/LE? */ - switch (mesa_format) { - case MESA_FORMAT_B8G8R8A8_UNORM: - txformat |= RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP; - break; - case MESA_FORMAT_A8B8G8R8_UNORM: - txformat |= RADEON_TXFORMAT_RGBA8888 | RADEON_TXFORMAT_ALPHA_IN_MAP; - break; - case MESA_FORMAT_B8G8R8X8_UNORM: - txformat |= RADEON_TXFORMAT_ARGB8888; - break; - case MESA_FORMAT_B5G6R5_UNORM: - txformat |= RADEON_TXFORMAT_RGB565; - break; - case MESA_FORMAT_B4G4R4A4_UNORM: - txformat |= RADEON_TXFORMAT_ARGB4444 | RADEON_TXFORMAT_ALPHA_IN_MAP; - break; - case MESA_FORMAT_B5G5R5A1_UNORM: - txformat |= RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP; - break; - case MESA_FORMAT_A_UNORM8: - case MESA_FORMAT_I_UNORM8: - txformat |= RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP; - break; - case MESA_FORMAT_L_UNORM8: - txformat |= RADEON_TXFORMAT_I8; - break; - case MESA_FORMAT_L8A8_UNORM: - txformat |= RADEON_TXFORMAT_AI88 | RADEON_TXFORMAT_ALPHA_IN_MAP; - break; - default: - break; - } - + txformat |= tx_table[mesa_format].format; + if (bo->flags & RADEON_BO_FLAGS_MACRO_TILE) offset |= RADEON_TXO_MACRO_TILE; if (bo->flags & RADEON_BO_FLAGS_MICRO_TILE) @@ -184,19 +170,25 @@ static inline void emit_cb_setup(struct r100_context *r100, uint32_t dst_format = 0; BATCH_LOCALS(&r100->radeon); - /* XXX others? BE/LE? */ + /* XXX others? */ switch (mesa_format) { + /* The first of each pair is for little, the second for big endian. */ case MESA_FORMAT_B8G8R8A8_UNORM: + case MESA_FORMAT_A8R8G8B8_UNORM: case MESA_FORMAT_B8G8R8X8_UNORM: + case MESA_FORMAT_X8R8G8B8_UNORM: dst_format = RADEON_COLOR_FORMAT_ARGB8888; break; case MESA_FORMAT_B5G6R5_UNORM: + case MESA_FORMAT_R5G6B5_UNORM: dst_format = RADEON_COLOR_FORMAT_RGB565; break; case MESA_FORMAT_B4G4R4A4_UNORM: + case MESA_FORMAT_A4R4G4B4_UNORM: dst_format = RADEON_COLOR_FORMAT_ARGB4444; break; case MESA_FORMAT_B5G5R5A1_UNORM: + case MESA_FORMAT_A1R5G5B5_UNORM: dst_format = RADEON_COLOR_FORMAT_ARGB1555; break; case MESA_FORMAT_A_UNORM8: @@ -425,5 +417,13 @@ unsigned r100_blit(struct gl_context *ctx, radeonFlush(ctx); + /* We submitted those packets outside our state atom mechanism. Thus + * make sure they are all resubmitted the next time. */ + r100->hw.ctx.dirty = GL_TRUE; + r100->hw.msk.dirty = GL_TRUE; + r100->hw.set.dirty = GL_TRUE; + r100->hw.tex[0].dirty = GL_TRUE; + r100->hw.txr[0].dirty = GL_TRUE; + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index 2a8bd6c9edc..fde89214ed2 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -164,7 +164,7 @@ uint32_t radeonGetAge(radeonContextPtr radeon) gp.param = RADEON_PARAM_LAST_CLEAR; gp.value = (int *)&age; - ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM, + ret = drmCommandWriteRead(radeon->radeonScreen->driScreen->fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp)); if (ret) { fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __func__, @@ -343,7 +343,7 @@ void radeonDrawBuffer( struct gl_context *ctx, GLenum mode ) { if (RADEON_DEBUG & RADEON_DRI) fprintf(stderr, "%s %s\n", __func__, - _mesa_lookup_enum_by_nr( mode )); + _mesa_enum_to_string( mode )); if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) { radeonContextPtr radeon = RADEON_CONTEXT(ctx); @@ -358,8 +358,8 @@ void radeonDrawBuffer( struct gl_context *ctx, GLenum mode ) * that the front-buffer has actually been allocated. */ if (!was_front_buffer_rendering && radeon->is_front_buffer_rendering) { - radeon_update_renderbuffers(radeon->dri.context, - radeon->dri.context->driDrawablePriv, GL_FALSE); + radeon_update_renderbuffers(radeon->driContext, + radeon->driContext->driDrawablePriv, GL_FALSE); } } @@ -375,8 +375,8 @@ void radeonReadBuffer( struct gl_context *ctx, GLenum mode ) || (mode == GL_FRONT); if (!was_front_buffer_reading && rmesa->is_front_buffer_reading) { - radeon_update_renderbuffers(rmesa->dri.context, - rmesa->dri.context->driReadablePriv, GL_FALSE); + radeon_update_renderbuffers(rmesa->driContext, + rmesa->driContext->driReadablePriv, GL_FALSE); } } /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */ @@ -399,7 +399,7 @@ void radeon_window_moved(radeonContextPtr radeon) void radeon_viewport(struct gl_context *ctx) { radeonContextPtr radeon = RADEON_CONTEXT(ctx); - __DRIcontext *driContext = radeon->dri.context; + __DRIcontext *driContext = radeon->driContext; void (*old_viewport)(struct gl_context *ctx); if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) { @@ -693,6 +693,7 @@ void rcommonInitCmdBuf(radeonContextPtr rmesa) { GLuint size; struct drm_radeon_gem_info mminfo = { 0 }; + int fd = rmesa->radeonScreen->driScreen->fd; /* Initialize command buffer */ size = 256 * driQueryOptioni(&rmesa->optionCache, @@ -711,8 +712,7 @@ void rcommonInitCmdBuf(radeonContextPtr rmesa) "Allocating %d bytes command buffer (max state is %d bytes)\n", size * 4, rmesa->hw.max_state_size * 4); - rmesa->cmdbuf.csm = - radeon_cs_manager_gem_ctor(rmesa->radeonScreen->driScreen->fd); + rmesa->cmdbuf.csm = radeon_cs_manager_gem_ctor(fd); if (rmesa->cmdbuf.csm == NULL) { /* FIXME: fatal error */ return; @@ -725,7 +725,7 @@ void rcommonInitCmdBuf(radeonContextPtr rmesa) (void (*)(void *))rmesa->glCtx.Driver.Flush, &rmesa->glCtx); - if (!drmCommandWriteRead(rmesa->dri.fd, DRM_RADEON_GEM_INFO, + if (!drmCommandWriteRead(fd, DRM_RADEON_GEM_INFO, &mminfo, sizeof(mminfo))) { radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, mminfo.vram_visible); diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index 9699dcbfcdc..4660d98c9a2 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -162,10 +162,7 @@ GLboolean radeonInitContext(radeonContextPtr radeon, _mesa_meta_init(ctx); /* DRI fields */ - radeon->dri.context = driContextPriv; - radeon->dri.screen = sPriv; - radeon->dri.fd = sPriv->fd; - radeon->dri.drmMinor = sPriv->drm_version.minor; + radeon->driContext = driContextPriv; /* Setup IRQs */ fthrottle_mode = driQueryOptioni(&radeon->optionCache, "fthrottle_mode"); @@ -194,6 +191,29 @@ GLboolean radeonInitContext(radeonContextPtr radeon, radeon_init_dma(radeon); + /* _mesa_initialize_context calls _mesa_init_queryobj which + * initializes all of the counter sizes to 64. The counters on r100 + * and r200 are only 32-bits for occlusion queries. Those are the + * only counters, so set the other sizes to zero. + */ + radeon->glCtx.Const.QueryCounterBits.SamplesPassed = 32; + + radeon->glCtx.Const.QueryCounterBits.TimeElapsed = 0; + radeon->glCtx.Const.QueryCounterBits.Timestamp = 0; + radeon->glCtx.Const.QueryCounterBits.PrimitivesGenerated = 0; + radeon->glCtx.Const.QueryCounterBits.PrimitivesWritten = 0; + radeon->glCtx.Const.QueryCounterBits.VerticesSubmitted = 0; + radeon->glCtx.Const.QueryCounterBits.PrimitivesSubmitted = 0; + radeon->glCtx.Const.QueryCounterBits.VsInvocations = 0; + radeon->glCtx.Const.QueryCounterBits.TessPatches = 0; + radeon->glCtx.Const.QueryCounterBits.TessInvocations = 0; + radeon->glCtx.Const.QueryCounterBits.GsInvocations = 0; + radeon->glCtx.Const.QueryCounterBits.GsPrimitives = 0; + radeon->glCtx.Const.QueryCounterBits.FsInvocations = 0; + radeon->glCtx.Const.QueryCounterBits.ComputeInvocations = 0; + radeon->glCtx.Const.QueryCounterBits.ClInPrimitives = 0; + radeon->glCtx.Const.QueryCounterBits.ClOutPrimitives = 0; + return GL_TRUE; } @@ -302,7 +322,7 @@ radeon_bits_per_pixel(const struct radeon_renderbuffer *rb) */ void radeon_prepare_render(radeonContextPtr radeon) { - __DRIcontext *driContext = radeon->dri.context; + __DRIcontext *driContext = radeon->driContext; __DRIdrawable *drawable; __DRIscreen *screen; diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h index dc72592b90c..d142a871b40 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h @@ -342,17 +342,6 @@ struct radeon_store { int elts_start; }; -struct radeon_dri_mirror { - __DRIcontext *context; /* DRI context */ - __DRIscreen *screen; /* DRI screen */ - - drm_context_t hwContext; - drm_hw_lock_t *hwLock; - int hwLockCount; - int fd; - int drmMinor; -}; - typedef void (*radeon_tri_func) (radeonContextPtr, radeonVertex *, radeonVertex *, radeonVertex *); @@ -385,6 +374,7 @@ struct radeon_cmdbuf { struct radeon_context { struct gl_context glCtx; /**< base class, must be first */ + __DRIcontext *driContext; /* DRI context */ radeonScreenPtr radeonScreen; /* Screen private DRI data */ /* Texture object bookkeeping @@ -407,9 +397,6 @@ struct radeon_context { /* Drawable information */ unsigned int lastStamp; - /* Mirrors of some DRI state */ - struct radeon_dri_mirror dri; - /* Busy waiting */ GLuint do_usleeps; GLuint do_irqs; @@ -502,12 +489,12 @@ static inline radeonContextPtr RADEON_CONTEXT(struct gl_context *ctx) static inline __DRIdrawable* radeon_get_drawable(radeonContextPtr radeon) { - return radeon->dri.context->driDrawablePriv; + return radeon->driContext->driDrawablePriv; } static inline __DRIdrawable* radeon_get_readable(radeonContextPtr radeon) { - return radeon->dri.context->driReadablePriv; + return radeon->driContext->driReadablePriv; } extern const char const *radeonVendorString; diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c index d4d19354b6d..a9e2ab563d3 100644 --- a/src/mesa/drivers/dri/radeon/radeon_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_context.c @@ -191,16 +191,8 @@ r100CreateContext( gl_api api, rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache, "def_max_anisotropy"); - if ( driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) { - if ( sPriv->drm_version.minor < 13 ) - fprintf( stderr, "DRM version 1.%d too old to support HyperZ, " - "disabling.\n", sPriv->drm_version.minor ); - else - rmesa->using_hyperz = GL_TRUE; - } - - if ( sPriv->drm_version.minor >= 15 ) - rmesa->texmicrotile = GL_TRUE; + if (driQueryOptionb(&rmesa->radeon.optionCache, "hyperz")) + rmesa->using_hyperz = GL_TRUE; /* Init default driver functions then plug in our Radeon-specific functions * (the texture functions are especially important) diff --git a/src/mesa/drivers/dri/radeon/radeon_context.h b/src/mesa/drivers/dri/radeon/radeon_context.h index 40325327813..badabd9508c 100644 --- a/src/mesa/drivers/dri/radeon/radeon_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_context.h @@ -426,7 +426,6 @@ struct r100_context { struct r100_swtcl_info swtcl; GLboolean using_hyperz; - GLboolean texmicrotile; /* Performance counters */ diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c index ef62d097bae..5eece518c95 100644 --- a/src/mesa/drivers/dri/radeon/radeon_fbo.c +++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c @@ -169,6 +169,7 @@ radeon_map_renderbuffer_s8z24(struct gl_context *ctx, rrb->map_buffer = malloc(w * h * 4); ret = radeon_bo_map(rrb->bo, !!(mode & GL_MAP_WRITE_BIT)); assert(!ret); + (void) ret; untiled_s8z24_map = rrb->map_buffer; tiled_s8z24_map = rrb->bo->ptr; @@ -207,6 +208,7 @@ radeon_map_renderbuffer_z16(struct gl_context *ctx, rrb->map_buffer = malloc(w * h * 2); ret = radeon_bo_map(rrb->bo, !!(mode & GL_MAP_WRITE_BIT)); assert(!ret); + (void) ret; untiled_z16_map = rrb->map_buffer; tiled_z16_map = rrb->bo->ptr; @@ -324,6 +326,7 @@ radeon_map_renderbuffer(struct gl_context *ctx, ret = radeon_bo_map(rrb->bo, !!(mode & GL_MAP_WRITE_BIT)); assert(!ret); + (void) ret; map = rrb->bo->ptr; stride = rrb->map_pitch; @@ -416,7 +419,6 @@ radeon_unmap_renderbuffer(struct gl_context *ctx, { struct radeon_context *const rmesa = RADEON_CONTEXT(ctx); struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb); - GLboolean ok; if ((rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_DEPTH_ALWAYS_TILED) && !rrb->has_surface) { if (rb->Format == MESA_FORMAT_Z24_UNORM_S8_UINT || rb->Format == MESA_FORMAT_Z24_UNORM_X8_UINT) { @@ -438,6 +440,7 @@ radeon_unmap_renderbuffer(struct gl_context *ctx, radeon_bo_unmap(rrb->map_bo); if (rrb->map_mode & GL_MAP_WRITE_BIT) { + GLboolean ok; ok = rmesa->vtbl.blit(ctx, rrb->map_bo, 0, rb->Format, rrb->map_pitch / rrb->cpp, rrb->map_w, rrb->map_h, @@ -449,6 +452,7 @@ radeon_unmap_renderbuffer(struct gl_context *ctx, rrb->map_w, rrb->map_h, GL_FALSE); assert(ok); + (void) ok; } radeon_bo_unref(rrb->map_bo); @@ -700,7 +704,7 @@ radeon_bind_framebuffer(struct gl_context * ctx, GLenum target, radeon_print(RADEON_TEXTURE, RADEON_TRACE, "%s(%p, fb %p, target %s) \n", __func__, ctx, fb, - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); if (target == GL_FRAMEBUFFER_EXT || target == GL_DRAW_FRAMEBUFFER_EXT) { radeon_draw_buffer(ctx, fb); diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c index 28591cad895..c71766d0a3e 100644 --- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c +++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c @@ -276,7 +276,7 @@ static void calculate_min_max_lod(struct gl_sampler_object *samp, struct gl_text radeon_print(RADEON_TEXTURE, RADEON_TRACE, "%s(%p) target %s, min %d, max %d.\n", __func__, tObj, - _mesa_lookup_enum_by_nr(tObj->Target), + _mesa_enum_to_string(tObj->Target), minLod, maxLod); /* save these values */ diff --git a/src/mesa/drivers/dri/radeon/radeon_pixel_read.c b/src/mesa/drivers/dri/radeon/radeon_pixel_read.c index 6998444fb66..e115b749da5 100644 --- a/src/mesa/drivers/dri/radeon/radeon_pixel_read.c +++ b/src/mesa/drivers/dri/radeon/radeon_pixel_read.c @@ -212,7 +212,7 @@ radeonReadPixels(struct gl_context * ctx, */ radeon_print(RADEON_FALLBACKS, RADEON_NORMAL, "Falling back to sw for ReadPixels (format %s, type %s)\n", - _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type)); + _mesa_enum_to_string(format), _mesa_enum_to_string(type)); if (ctx->NewState) _mesa_update_state(ctx); diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 45d9b2b8c0b..98b4741b456 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -135,36 +135,26 @@ DRI_CONF_END static int radeonGetParam(__DRIscreen *sPriv, int param, void *value) { - int ret; - drm_radeon_getparam_t gp = { 0 }; struct drm_radeon_info info = { 0 }; - if (sPriv->drm_version.major >= 2) { - info.value = (uint64_t)(uintptr_t)value; - switch (param) { - case RADEON_PARAM_DEVICE_ID: - info.request = RADEON_INFO_DEVICE_ID; - break; - case RADEON_PARAM_NUM_GB_PIPES: - info.request = RADEON_INFO_NUM_GB_PIPES; - break; - case RADEON_PARAM_NUM_Z_PIPES: - info.request = RADEON_INFO_NUM_Z_PIPES; - break; - case RADEON_INFO_TILE_CONFIG: - info.request = RADEON_INFO_TILE_CONFIG; - break; - default: - return -EINVAL; - } - ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_INFO, &info, sizeof(info)); - } else { - gp.param = param; - gp.value = value; - - ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp)); + info.value = (uint64_t)(uintptr_t)value; + switch (param) { + case RADEON_PARAM_DEVICE_ID: + info.request = RADEON_INFO_DEVICE_ID; + break; + case RADEON_PARAM_NUM_GB_PIPES: + info.request = RADEON_INFO_NUM_GB_PIPES; + break; + case RADEON_PARAM_NUM_Z_PIPES: + info.request = RADEON_INFO_NUM_Z_PIPES; + break; + case RADEON_INFO_TILE_CONFIG: + info.request = RADEON_INFO_TILE_CONFIG; + break; + default: + return -EINVAL; } - return ret; + return drmCommandWriteRead(sPriv->fd, DRM_RADEON_INFO, &info, sizeof(info)); } #if defined(RADEON_R100) diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c index cba3d9c9689..74c1fc6c902 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state.c +++ b/src/mesa/drivers/dri/radeon/radeon_state.c @@ -1354,7 +1354,7 @@ void radeonUpdateWindow( struct gl_context *ctx ) GLfloat xoffset = 0.0; GLfloat yoffset = dPriv ? (GLfloat) dPriv->h : 0; const GLboolean render_to_fbo = (ctx->DrawBuffer ? _mesa_is_user_fbo(ctx->DrawBuffer) : 0); - double scale[3], translate[3]; + float scale[3], translate[3]; GLfloat y_scale, y_bias; if (render_to_fbo) { @@ -1452,7 +1452,7 @@ static void radeonEnable( struct gl_context *ctx, GLenum cap, GLboolean state ) if ( RADEON_DEBUG & RADEON_STATE ) fprintf( stderr, "%s( %s = %s )\n", __func__, - _mesa_lookup_enum_by_nr( cap ), + _mesa_enum_to_string( cap ), state ? "GL_TRUE" : "GL_FALSE" ); switch ( cap ) { diff --git a/src/mesa/drivers/dri/radeon/radeon_state_init.c b/src/mesa/drivers/dri/radeon/radeon_state_init.c index c800edfc7be..5e2f41fdb4a 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state_init.c +++ b/src/mesa/drivers/dri/radeon/radeon_state_init.c @@ -336,12 +336,15 @@ static void ctx_emit_cs(struct gl_context *ctx, struct radeon_state_atom *atom) atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888; else switch (rrb->base.Base.Format) { case MESA_FORMAT_B5G6R5_UNORM: + case MESA_FORMAT_R5G6B5_UNORM: atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565; break; case MESA_FORMAT_B4G4R4A4_UNORM: + case MESA_FORMAT_A4R4G4B4_UNORM: atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB4444; break; case MESA_FORMAT_B5G5R5A1_UNORM: + case MESA_FORMAT_A1R5G5B5_UNORM: atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB1555; break; default: diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c index 8a1fbab39f8..2fbd353297b 100644 --- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c @@ -442,7 +442,7 @@ static GLboolean radeon_run_render( struct gl_context *ctx, radeon_print(RADEON_SWRENDER, RADEON_NORMAL, "radeon_render.c: prim %s %d..%d\n", - _mesa_lookup_enum_by_nr(prim & PRIM_MODE_MASK), + _mesa_enum_to_string(prim & PRIM_MODE_MASK), start, start+length); if (length) diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c index 353fdb00ec8..0955a135de8 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex.c @@ -263,7 +263,7 @@ static void radeonTexEnv( struct gl_context *ctx, GLenum target, if ( RADEON_DEBUG & RADEON_STATE ) { fprintf( stderr, "%s( %s )\n", - __func__, _mesa_lookup_enum_by_nr( pname ) ); + __func__, _mesa_enum_to_string( pname ) ); } switch ( pname ) { @@ -335,7 +335,7 @@ static void radeonTexParameter( struct gl_context *ctx, radeonTexObj* t = radeon_tex_obj(texObj); radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, "%s( %s )\n", __func__, - _mesa_lookup_enum_by_nr( pname ) ); + _mesa_enum_to_string( pname ) ); switch ( pname ) { case GL_TEXTURE_BASE_LEVEL: @@ -359,7 +359,7 @@ static void radeonDeleteTexture( struct gl_context *ctx, radeon_print(RADEON_TEXTURE, RADEON_NORMAL, "%s( %p (target = %s) )\n", __func__, (void *)texObj, - _mesa_lookup_enum_by_nr( texObj->Target ) ); + _mesa_enum_to_string( texObj->Target ) ); if ( rmesa ) { radeon_firevertices(&rmesa->radeon); diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.h b/src/mesa/drivers/dri/radeon/radeon_tex.h index fa57c08987d..f8ec432755a 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex.h +++ b/src/mesa/drivers/dri/radeon/radeon_tex.h @@ -51,4 +51,39 @@ extern void radeonTexUpdateParameters(struct gl_context *ctx, GLuint unit); extern void radeonInitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *functions ); +struct tx_table { + GLuint format, filter; +}; + +/* XXX verify this table against MESA_FORMAT_x values */ +static const struct tx_table tx_table[] = +{ + [ MESA_FORMAT_NONE ] = { 0xffffffff, 0 }, + [ MESA_FORMAT_A8B8G8R8_UNORM ] = { RADEON_TXFORMAT_RGBA8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_R8G8B8A8_UNORM ] = { RADEON_TXFORMAT_RGBA8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_B8G8R8A8_UNORM ] = { RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_A8R8G8B8_UNORM ] = { RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_BGR_UNORM8 ] = { RADEON_TXFORMAT_ARGB8888, 0 }, + [ MESA_FORMAT_B5G6R5_UNORM ] = { RADEON_TXFORMAT_RGB565, 0 }, + [ MESA_FORMAT_R5G6B5_UNORM ] = { RADEON_TXFORMAT_RGB565, 0 }, + [ MESA_FORMAT_B4G4R4A4_UNORM ] = { RADEON_TXFORMAT_ARGB4444 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_A4R4G4B4_UNORM ] = { RADEON_TXFORMAT_ARGB4444 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_B5G5R5A1_UNORM ] = { RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_A1R5G5B5_UNORM ] = { RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_L8A8_UNORM ] = { RADEON_TXFORMAT_AI88 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_A8L8_UNORM ] = { RADEON_TXFORMAT_AI88 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_A_UNORM8 ] = { RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_L_UNORM8 ] = { RADEON_TXFORMAT_I8, 0 }, + [ MESA_FORMAT_I_UNORM8 ] = { RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_YCBCR ] = { RADEON_TXFORMAT_YVYU422, RADEON_YUV_TO_RGB }, + [ MESA_FORMAT_YCBCR_REV ] = { RADEON_TXFORMAT_VYUY422, RADEON_YUV_TO_RGB }, + [ MESA_FORMAT_RGB_FXT1 ] = { 0xffffffff, 0 }, + [ MESA_FORMAT_RGBA_FXT1 ] = { 0xffffffff, 0 }, + [ MESA_FORMAT_RGB_DXT1 ] = { RADEON_TXFORMAT_DXT1, 0 }, + [ MESA_FORMAT_RGBA_DXT1 ] = { RADEON_TXFORMAT_DXT1 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_RGBA_DXT3 ] = { RADEON_TXFORMAT_DXT23 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, + [ MESA_FORMAT_RGBA_DXT5 ] = { RADEON_TXFORMAT_DXT45 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, +}; + + #endif /* __RADEON_TEX_H__ */ diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c index 45667efb65f..ec835f248eb 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texstate.c +++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c @@ -53,53 +53,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_tcl.h" -#define RADEON_TXFORMAT_A8 RADEON_TXFORMAT_I8 -#define RADEON_TXFORMAT_L8 RADEON_TXFORMAT_I8 -#define RADEON_TXFORMAT_AL88 RADEON_TXFORMAT_AI88 -#define RADEON_TXFORMAT_YCBCR RADEON_TXFORMAT_YVYU422 -#define RADEON_TXFORMAT_YCBCR_REV RADEON_TXFORMAT_VYUY422 -#define RADEON_TXFORMAT_RGB_DXT1 RADEON_TXFORMAT_DXT1 -#define RADEON_TXFORMAT_RGBA_DXT1 RADEON_TXFORMAT_DXT1 -#define RADEON_TXFORMAT_RGBA_DXT3 RADEON_TXFORMAT_DXT23 -#define RADEON_TXFORMAT_RGBA_DXT5 RADEON_TXFORMAT_DXT45 - #define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \ && (tx_table[f].format != 0xffffffff) ) -struct tx_table { - GLuint format, filter; -}; - -/* XXX verify this table against MESA_FORMAT_x values */ -static const struct tx_table tx_table[] = -{ - [ MESA_FORMAT_NONE ] = { 0xffffffff, 0 }, - [ MESA_FORMAT_A8B8G8R8_UNORM ] = { RADEON_TXFORMAT_RGBA8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_R8G8B8A8_UNORM ] = { RADEON_TXFORMAT_RGBA8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_B8G8R8A8_UNORM ] = { RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_A8R8G8B8_UNORM ] = { RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_BGR_UNORM8 ] = { RADEON_TXFORMAT_ARGB8888, 0 }, - [ MESA_FORMAT_B5G6R5_UNORM ] = { RADEON_TXFORMAT_RGB565, 0 }, - [ MESA_FORMAT_R5G6B5_UNORM ] = { RADEON_TXFORMAT_RGB565, 0 }, - [ MESA_FORMAT_B4G4R4A4_UNORM ] = { RADEON_TXFORMAT_ARGB4444 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_A4R4G4B4_UNORM ] = { RADEON_TXFORMAT_ARGB4444 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_B5G5R5A1_UNORM ] = { RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_A1R5G5B5_UNORM ] = { RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_L8A8_UNORM ] = { RADEON_TXFORMAT_AL88 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_A8L8_UNORM ] = { RADEON_TXFORMAT_AL88 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_A_UNORM8 ] = { RADEON_TXFORMAT_A8 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_L_UNORM8 ] = { RADEON_TXFORMAT_L8, 0 }, - [ MESA_FORMAT_I_UNORM8 ] = { RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_YCBCR ] = { RADEON_TXFORMAT_YCBCR, RADEON_YUV_TO_RGB }, - [ MESA_FORMAT_YCBCR_REV ] = { RADEON_TXFORMAT_YCBCR_REV, RADEON_YUV_TO_RGB }, - [ MESA_FORMAT_RGB_FXT1 ] = { 0xffffffff, 0 }, - [ MESA_FORMAT_RGBA_FXT1 ] = { 0xffffffff, 0 }, - [ MESA_FORMAT_RGB_DXT1 ] = { RADEON_TXFORMAT_RGB_DXT1, 0 }, - [ MESA_FORMAT_RGBA_DXT1 ] = { RADEON_TXFORMAT_RGBA_DXT1 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_RGBA_DXT3 ] = { RADEON_TXFORMAT_RGBA_DXT3 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, - [ MESA_FORMAT_RGBA_DXT5 ] = { RADEON_TXFORMAT_RGBA_DXT5 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }, -}; - /* ================================================================ * Texture combine functions */ diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c index edfd48b283b..4794ddae069 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.c +++ b/src/mesa/drivers/dri/radeon/radeon_texture.c @@ -224,7 +224,19 @@ static mesa_format radeonChoose8888TexFormat(radeonContextPtr rmesa, const GLuint ui = 1; const GLubyte littleEndian = *((const GLubyte *)&ui); - if (fbo) + + /* Unfortunately, regardless the fbo flag, we might still be asked to + * attach a texture to a fbo later, which then won't succeed if we chose + * one which isn't renderable. And unlike more exotic formats, apps aren't + * really prepared for the incomplete framebuffer this results in (they'd + * have to retry with same internalFormat even, just different + * srcFormat/srcType, which can't really be expected anyway). + * Ideally, we'd defer format selection until later (if the texture is + * used as a rt it's likely there's never data uploaded to it before attached + * to a fbo), but this isn't really possible, so for now just always use + * a renderable format. + */ + if (1 || fbo) return _radeon_texformat_argb8888; if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) || @@ -267,8 +279,8 @@ mesa_format radeonChooseTextureFormat(struct gl_context * ctx, radeon_print(RADEON_TEXTURE, RADEON_TRACE, "%s InternalFormat=%s(%d) type=%s format=%s\n", __func__, - _mesa_lookup_enum_by_nr(internalFormat), internalFormat, - _mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format)); + _mesa_enum_to_string(internalFormat), internalFormat, + _mesa_enum_to_string(type), _mesa_enum_to_string(format)); radeon_print(RADEON_TEXTURE, RADEON_TRACE, "%s do32bpt=%d force16bpt=%d\n", __func__, do32bpt, force16bpt); @@ -531,7 +543,7 @@ void radeon_image_target_texture_2d(struct gl_context *ctx, GLenum target, __DRIscreen *screen; __DRIimage *image; - screen = radeon->dri.screen; + screen = radeon->radeonScreen->driScreen; image = screen->dri2.image->lookupEGLImage(screen, image_handle, screen->loaderPrivate); if (image == NULL) diff --git a/src/mesa/drivers/dri/swrast/Makefile.am b/src/mesa/drivers/dri/swrast/Makefile.am index bfc3c10e334..9d21d9ea4dc 100644 --- a/src/mesa/drivers/dri/swrast/Makefile.am +++ b/src/mesa/drivers/dri/swrast/Makefile.am @@ -24,7 +24,6 @@ include Makefile.sources AM_CFLAGS = \ - -D__NOT_HAVE_DRM_H \ -I$(top_srcdir)/include \ -I$(top_srcdir)/src/ \ -I$(top_srcdir)/src/mapi \ @@ -33,6 +32,7 @@ AM_CFLAGS = \ -I$(top_srcdir)/src/gallium/auxiliary \ -I$(top_srcdir)/src/mesa/drivers/dri/common \ -I$(top_builddir)/src/mesa/drivers/dri/common \ + $(LIBDRM_CFLAGS) \ $(DEFINES) \ $(VISIBILITY_CFLAGS) diff --git a/src/mesa/drivers/osmesa/osmesa.c b/src/mesa/drivers/osmesa/osmesa.c index 022523eb00b..5c7dcac3841 100644 --- a/src/mesa/drivers/osmesa/osmesa.c +++ b/src/mesa/drivers/osmesa/osmesa.c @@ -1124,7 +1124,7 @@ static struct name_function functions[] = { { "OSMesaDestroyContext", (OSMESAproc) OSMesaDestroyContext }, { "OSMesaMakeCurrent", (OSMESAproc) OSMesaMakeCurrent }, { "OSMesaGetCurrentContext", (OSMESAproc) OSMesaGetCurrentContext }, - { "OSMesaPixelsStore", (OSMESAproc) OSMesaPixelStore }, + { "OSMesaPixelStore", (OSMESAproc) OSMesaPixelStore }, { "OSMesaGetIntegerv", (OSMESAproc) OSMesaGetIntegerv }, { "OSMesaGetDepthBuffer", (OSMESAproc) OSMesaGetDepthBuffer }, { "OSMesaGetColorBuffer", (OSMESAproc) OSMesaGetColorBuffer }, diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c index 9c2e29e6472..53c8fb893b5 100644 --- a/src/mesa/main/api_validate.c +++ b/src/mesa/main/api_validate.c @@ -69,6 +69,25 @@ check_valid_to_render(struct gl_context *ctx, const char *function) return false; } + /* The spec argues that this is allowed because a tess ctrl shader + * without a tess eval shader can be used with transform feedback. + * However, glBeginTransformFeedback doesn't allow GL_PATCHES and + * therefore doesn't allow tessellation. + * + * Further investigation showed that this is indeed a spec bug and + * a tess ctrl shader without a tess eval shader shouldn't have been + * allowed, because there is no API in GL 4.0 that can make use this + * to produce something useful. + * + * Also, all vendors except one don't support a tess ctrl shader without + * a tess eval shader anyway. + */ + if (ctx->TessCtrlProgram._Current && !ctx->TessEvalProgram._Current) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(tess eval shader is missing)", function); + return false; + } + /* Section 7.3 (Program Objects) of the OpenGL 4.5 Core Profile spec * says: * @@ -127,6 +146,9 @@ _mesa_is_valid_prim_mode(struct gl_context *ctx, GLenum mode) if (mode <= GL_TRIANGLE_STRIP_ADJACENCY) return _mesa_has_geometry_shaders(ctx); + if (mode == GL_PATCHES) + return _mesa_has_tessellation(ctx); + return false; } @@ -136,6 +158,7 @@ _mesa_is_valid_prim_mode(struct gl_context *ctx, GLenum mode) * etc? Also, do additional checking related to transformation feedback. * Note: this function cannot be called during glNewList(GL_COMPILE) because * this code depends on current transform feedback state. + * Also, do additional checking related to tessellation shaders. */ GLboolean _mesa_valid_prim_mode(struct gl_context *ctx, GLenum mode, const char *name) @@ -170,11 +193,29 @@ _mesa_valid_prim_mode(struct gl_context *ctx, GLenum mode, const char *name) * TRIANGLES_ADJACENCY_ARB and <mode> is not * TRIANGLES_ADJACENCY_ARB or TRIANGLE_STRIP_ADJACENCY_ARB. * + * The GL spec doesn't mention any interaction with tessellation, which + * is clearly a spec bug. The same rule should apply, but instead of + * the draw primitive mode, the tessellation evaluation shader primitive + * mode should be used for the checking. */ if (ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]) { const GLenum geom_mode = ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]->Geom.InputType; - switch (mode) { + struct gl_shader_program *tes = + ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL]; + GLenum mode_before_gs = mode; + + if (tes) { + if (tes->TessEval.PointMode) + mode_before_gs = GL_POINTS; + else if (tes->TessEval.PrimitiveMode == GL_ISOLINES) + mode_before_gs = GL_LINES; + else + /* the GL_QUADS mode generates triangles too */ + mode_before_gs = GL_TRIANGLES; + } + + switch (mode_before_gs) { case GL_POINTS: valid_enum = (geom_mode == GL_POINTS); break; @@ -209,12 +250,42 @@ _mesa_valid_prim_mode(struct gl_context *ctx, GLenum mode, const char *name) _mesa_error(ctx, GL_INVALID_OPERATION, "%s(mode=%s vs geometry shader input %s)", name, - _mesa_lookup_prim_by_nr(mode), + _mesa_lookup_prim_by_nr(mode_before_gs), _mesa_lookup_prim_by_nr(geom_mode)); return GL_FALSE; } } + /* From the OpenGL 4.0 (Core Profile) spec (section 2.12): + * + * "Tessellation operates only on patch primitives. If tessellation is + * active, any command that transfers vertices to the GL will + * generate an INVALID_OPERATION error if the primitive mode is not + * PATCHES. + * Patch primitives are not supported by pipeline stages below the + * tessellation evaluation shader. If there is no active program + * object or the active program object does not contain a tessellation + * evaluation shader, the error INVALID_OPERATION is generated by any + * command that transfers vertices to the GL if the primitive mode is + * PATCHES." + * + */ + if (ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL] || + ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL]) { + if (mode != GL_PATCHES) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "only GL_PATCHES valid with tessellation"); + return GL_FALSE; + } + } + else { + if (mode == GL_PATCHES) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "GL_PATCHES only valid with tessellation"); + return GL_FALSE; + } + } + /* From the GL_EXT_transform_feedback spec: * * "The error INVALID_OPERATION is generated if Begin, or any command @@ -247,6 +318,17 @@ _mesa_valid_prim_mode(struct gl_context *ctx, GLenum mode, const char *name) pass = GL_FALSE; } } + else if (ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL]) { + struct gl_shader_program *tes = + ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL]; + + if (tes->TessEval.PointMode) + pass = ctx->TransformFeedback.Mode == GL_POINTS; + else if (tes->TessEval.PrimitiveMode == GL_ISOLINES) + pass = ctx->TransformFeedback.Mode == GL_LINES; + else + pass = ctx->TransformFeedback.Mode == GL_TRIANGLES; + } else { switch (mode) { case GL_POINTS: @@ -291,7 +373,7 @@ valid_elements_type(struct gl_context *ctx, GLenum type, const char *name) default: _mesa_error(ctx, GL_INVALID_ENUM, "%s(type = %s)", name, - _mesa_lookup_enum_by_nr(type)); + _mesa_enum_to_string(type)); return false; } } diff --git a/src/mesa/main/atifragshader.c b/src/mesa/main/atifragshader.c index 9fc35520a38..935ba05b7cc 100644 --- a/src/mesa/main/atifragshader.c +++ b/src/mesa/main/atifragshader.c @@ -132,21 +132,21 @@ static void debug_op(GLint optype, GLuint arg_count, GLenum op, GLuint dst, op_name = atifs_ops[(arg_count-1)+(optype?3:0)]; - fprintf(stderr, "%s(%s, %s", op_name, _mesa_lookup_enum_by_nr(op), - _mesa_lookup_enum_by_nr(dst)); + fprintf(stderr, "%s(%s, %s", op_name, _mesa_enum_to_string(op), + _mesa_enum_to_string(dst)); if (!optype) fprintf(stderr, ", %d", dstMask); fprintf(stderr, ", %s", create_dst_mod_str(dstMod)); - fprintf(stderr, ", %s, %s, %d", _mesa_lookup_enum_by_nr(arg1), - _mesa_lookup_enum_by_nr(arg1Rep), arg1Mod); + fprintf(stderr, ", %s, %s, %d", _mesa_enum_to_string(arg1), + _mesa_enum_to_string(arg1Rep), arg1Mod); if (arg_count>1) - fprintf(stderr, ", %s, %s, %d", _mesa_lookup_enum_by_nr(arg2), - _mesa_lookup_enum_by_nr(arg2Rep), arg2Mod); + fprintf(stderr, ", %s, %s, %d", _mesa_enum_to_string(arg2), + _mesa_enum_to_string(arg2Rep), arg2Mod); if (arg_count>2) - fprintf(stderr, ", %s, %s, %d", _mesa_lookup_enum_by_nr(arg3), - _mesa_lookup_enum_by_nr(arg3Rep), arg3Mod); + fprintf(stderr, ", %s, %s, %d", _mesa_enum_to_string(arg3), + _mesa_enum_to_string(arg3Rep), arg3Mod); fprintf(stderr,")\n"); @@ -383,7 +383,7 @@ _mesa_EndFragmentShaderATI(void) for (j = 0; j < MAX_NUM_PASSES_ATI; j++) { for (i = 0; i < MAX_NUM_FRAGMENT_REGISTERS_ATI; i++) { GLuint op = curProg->SetupInst[j][i].Opcode; - const char *op_enum = op > 5 ? _mesa_lookup_enum_by_nr(op) : "0"; + const char *op_enum = op > 5 ? _mesa_enum_to_string(op) : "0"; GLuint src = curProg->SetupInst[j][i].src; GLuint swizzle = curProg->SetupInst[j][i].swizzle; fprintf(stderr, "%2d %04X %s %d %04X\n", i, op, op_enum, src, @@ -392,8 +392,8 @@ _mesa_EndFragmentShaderATI(void) for (i = 0; i < curProg->numArithInstr[j]; i++) { GLuint op0 = curProg->Instructions[j][i].Opcode[0]; GLuint op1 = curProg->Instructions[j][i].Opcode[1]; - const char *op0_enum = op0 > 5 ? _mesa_lookup_enum_by_nr(op0) : "0"; - const char *op1_enum = op1 > 5 ? _mesa_lookup_enum_by_nr(op1) : "0"; + const char *op0_enum = op0 > 5 ? _mesa_enum_to_string(op0) : "0"; + const char *op1_enum = op1 > 5 ? _mesa_enum_to_string(op1) : "0"; GLuint count0 = curProg->Instructions[j][i].ArgCount[0]; GLuint count1 = curProg->Instructions[j][i].ArgCount[1]; fprintf(stderr, "%2d %04X %s %d %04X %s %d\n", i, op0, op0_enum, count0, @@ -477,8 +477,8 @@ _mesa_PassTexCoordATI(GLuint dst, GLuint coord, GLenum swizzle) #if MESA_DEBUG_ATI_FS _mesa_debug(ctx, "%s(%s, %s, %s)\n", __func__, - _mesa_lookup_enum_by_nr(dst), _mesa_lookup_enum_by_nr(coord), - _mesa_lookup_enum_by_nr(swizzle)); + _mesa_enum_to_string(dst), _mesa_enum_to_string(coord), + _mesa_enum_to_string(swizzle)); #endif } @@ -550,8 +550,8 @@ _mesa_SampleMapATI(GLuint dst, GLuint interp, GLenum swizzle) #if MESA_DEBUG_ATI_FS _mesa_debug(ctx, "%s(%s, %s, %s)\n", __func__, - _mesa_lookup_enum_by_nr(dst), _mesa_lookup_enum_by_nr(interp), - _mesa_lookup_enum_by_nr(swizzle)); + _mesa_enum_to_string(dst), _mesa_enum_to_string(interp), + _mesa_enum_to_string(swizzle)); #endif } diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c index 53626e38be9..08f13178f84 100644 --- a/src/mesa/main/attrib.c +++ b/src/mesa/main/attrib.c @@ -937,7 +937,7 @@ _mesa_PopAttrib(void) if (MESA_VERBOSE & VERBOSE_API) { _mesa_debug(ctx, "glPopAttrib %s\n", - _mesa_lookup_enum_by_nr(attr->kind)); + _mesa_enum_to_string(attr->kind)); } switch (attr->kind) { diff --git a/src/mesa/main/blend.c b/src/mesa/main/blend.c index d869fa2aa09..4fc32962425 100644 --- a/src/mesa/main/blend.c +++ b/src/mesa/main/blend.c @@ -128,28 +128,28 @@ validate_blend_factors(struct gl_context *ctx, const char *func, if (!legal_src_factor(ctx, sfactorRGB)) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(sfactorRGB = %s)", func, - _mesa_lookup_enum_by_nr(sfactorRGB)); + _mesa_enum_to_string(sfactorRGB)); return GL_FALSE; } if (!legal_dst_factor(ctx, dfactorRGB)) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(dfactorRGB = %s)", func, - _mesa_lookup_enum_by_nr(dfactorRGB)); + _mesa_enum_to_string(dfactorRGB)); return GL_FALSE; } if (sfactorA != sfactorRGB && !legal_src_factor(ctx, sfactorA)) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(sfactorA = %s)", func, - _mesa_lookup_enum_by_nr(sfactorA)); + _mesa_enum_to_string(sfactorA)); return GL_FALSE; } if (dfactorA != dfactorRGB && !legal_dst_factor(ctx, dfactorA)) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(dfactorA = %s)", func, - _mesa_lookup_enum_by_nr(dfactorA)); + _mesa_enum_to_string(dfactorA)); return GL_FALSE; } @@ -208,10 +208,10 @@ _mesa_BlendFuncSeparate( GLenum sfactorRGB, GLenum dfactorRGB, if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glBlendFuncSeparate %s %s %s %s\n", - _mesa_lookup_enum_by_nr(sfactorRGB), - _mesa_lookup_enum_by_nr(dfactorRGB), - _mesa_lookup_enum_by_nr(sfactorA), - _mesa_lookup_enum_by_nr(dfactorA)); + _mesa_enum_to_string(sfactorRGB), + _mesa_enum_to_string(dfactorRGB), + _mesa_enum_to_string(sfactorA), + _mesa_enum_to_string(dfactorA)); if (!validate_blend_factors(ctx, "glBlendFuncSeparate", sfactorRGB, dfactorRGB, @@ -342,7 +342,7 @@ _mesa_BlendEquation( GLenum mode ) if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glBlendEquation(%s)\n", - _mesa_lookup_enum_by_nr(mode)); + _mesa_enum_to_string(mode)); if (!legal_blend_equation(ctx, mode)) { _mesa_error(ctx, GL_INVALID_ENUM, "glBlendEquation"); @@ -385,7 +385,7 @@ _mesa_BlendEquationiARB(GLuint buf, GLenum mode) if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glBlendEquationi(%u, %s)\n", - buf, _mesa_lookup_enum_by_nr(mode)); + buf, _mesa_enum_to_string(mode)); if (buf >= ctx->Const.MaxDrawBuffers) { _mesa_error(ctx, GL_INVALID_VALUE, "glBlendFuncSeparatei(buffer=%u)", @@ -421,8 +421,8 @@ _mesa_BlendEquationSeparate( GLenum modeRGB, GLenum modeA ) if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glBlendEquationSeparateEXT(%s %s)\n", - _mesa_lookup_enum_by_nr(modeRGB), - _mesa_lookup_enum_by_nr(modeA)); + _mesa_enum_to_string(modeRGB), + _mesa_enum_to_string(modeA)); if ( (modeRGB != modeA) && !ctx->Extensions.EXT_blend_equation_separate ) { _mesa_error(ctx, GL_INVALID_OPERATION, @@ -476,8 +476,8 @@ _mesa_BlendEquationSeparateiARB(GLuint buf, GLenum modeRGB, GLenum modeA) if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glBlendEquationSeparatei(%u, %s %s)\n", buf, - _mesa_lookup_enum_by_nr(modeRGB), - _mesa_lookup_enum_by_nr(modeA)); + _mesa_enum_to_string(modeRGB), + _mesa_enum_to_string(modeA)); if (buf >= ctx->Const.MaxDrawBuffers) { _mesa_error(ctx, GL_INVALID_VALUE, "glBlendEquationSeparatei(buffer=%u)", @@ -567,7 +567,10 @@ _mesa_AlphaFunc( GLenum func, GLclampf ref ) if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glAlphaFunc(%s, %f)\n", - _mesa_lookup_enum_by_nr(func), ref); + _mesa_enum_to_string(func), ref); + + if (ctx->Color.AlphaFunc == func && ctx->Color.AlphaRefUnclamped == ref) + return; /* no change */ switch (func) { case GL_NEVER: @@ -578,9 +581,6 @@ _mesa_AlphaFunc( GLenum func, GLclampf ref ) case GL_NOTEQUAL: case GL_GEQUAL: case GL_ALWAYS: - if (ctx->Color.AlphaFunc == func && ctx->Color.AlphaRefUnclamped == ref) - return; /* no change */ - FLUSH_VERTICES(ctx, _NEW_COLOR); ctx->Color.AlphaFunc = func; ctx->Color.AlphaRefUnclamped = ref; @@ -613,7 +613,7 @@ _mesa_LogicOp( GLenum opcode ) GET_CURRENT_CONTEXT(ctx); if (MESA_VERBOSE & VERBOSE_API) - _mesa_debug(ctx, "glLogicOp(%s)\n", _mesa_lookup_enum_by_nr(opcode)); + _mesa_debug(ctx, "glLogicOp(%s)\n", _mesa_enum_to_string(opcode)); switch (opcode) { case GL_CLEAR: @@ -790,7 +790,7 @@ _mesa_ClampColor(GLenum target, GLenum clamp) invalid_enum: _mesa_error(ctx, GL_INVALID_ENUM, "glClampColor(%s)", - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); } static GLboolean @@ -930,12 +930,10 @@ void _mesa_init_color( struct gl_context * ctx ) ctx->Color._ClampFragmentColor = GL_FALSE; ctx->Color.ClampReadColor = GL_FIXED_ONLY_ARB; - if (ctx->API == API_OPENGLES2) { - /* GLES 3 behaves as though GL_FRAMEBUFFER_SRGB is always enabled. */ - ctx->Color.sRGBEnabled = GL_TRUE; - } else { - ctx->Color.sRGBEnabled = GL_FALSE; - } + /* GLES 1/2/3 behaves as though GL_FRAMEBUFFER_SRGB is always enabled + * if EGL_KHR_gl_colorspace has been used to request sRGB. + */ + ctx->Color.sRGBEnabled = _mesa_is_gles(ctx); } /*@}*/ diff --git a/src/mesa/main/blit.c b/src/mesa/main/blit.c index db8fee5a414..a32f1a42aea 100644 --- a/src/mesa/main/blit.c +++ b/src/mesa/main/blit.c @@ -37,6 +37,7 @@ #include "framebuffer.h" #include "glformats.h" #include "mtypes.h" +#include "macros.h" #include "state.h" @@ -59,6 +60,31 @@ find_attachment(const struct gl_framebuffer *fb, /** + * \return true if two regions overlap, false otherwise + */ +bool +_mesa_regions_overlap(int srcX0, int srcY0, + int srcX1, int srcY1, + int dstX0, int dstY0, + int dstX1, int dstY1) +{ + if (MAX2(srcX0, srcX1) < MIN2(dstX0, dstX1)) + return false; /* dst completely right of src */ + + if (MAX2(dstX0, dstX1) < MIN2(srcX0, srcX1)) + return false; /* dst completely left of src */ + + if (MAX2(srcY0, srcY1) < MIN2(dstY0, dstY1)) + return false; /* dst completely above src */ + + if (MAX2(dstY0, dstY1) < MIN2(srcY0, srcY1)) + return false; /* dst completely below src */ + + return true; /* some overlap */ +} + + +/** * Helper function for checking if the datatypes of color buffers are * compatible for glBlitFramebuffer. From the 3.1 spec, page 198: * @@ -186,7 +212,7 @@ _mesa_blit_framebuffer(struct gl_context *ctx, if (!is_valid_blit_filter(ctx, filter)) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid filter %s)", func, - _mesa_lookup_enum_by_nr(filter)); + _mesa_enum_to_string(filter)); return; } @@ -194,7 +220,7 @@ _mesa_blit_framebuffer(struct gl_context *ctx, filter == GL_SCALED_RESOLVE_NICEST_EXT) && (readFb->Visual.samples == 0 || drawFb->Visual.samples > 0)) { _mesa_error(ctx, GL_INVALID_OPERATION, "%s(%s: invalid samples)", func, - _mesa_lookup_enum_by_nr(filter)); + _mesa_enum_to_string(filter)); return; } @@ -522,7 +548,7 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, " %d, %d, %d, %d, 0x%x, %s)\n", srcX0, srcY0, srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, - mask, _mesa_lookup_enum_by_nr(filter)); + mask, _mesa_enum_to_string(filter)); _mesa_blit_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer, srcX0, srcY0, srcX1, srcY1, @@ -547,7 +573,7 @@ _mesa_BlitNamedFramebuffer(GLuint readFramebuffer, GLuint drawFramebuffer, readFramebuffer, drawFramebuffer, srcX0, srcY0, srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, - mask, _mesa_lookup_enum_by_nr(filter)); + mask, _mesa_enum_to_string(filter)); /* * According to PDF page 533 of the OpenGL 4.5 core spec (30.10.2014, diff --git a/src/mesa/main/blit.h b/src/mesa/main/blit.h index 54b946e3192..88dd4a9ec8d 100644 --- a/src/mesa/main/blit.h +++ b/src/mesa/main/blit.h @@ -28,6 +28,12 @@ #include "glheader.h" +extern bool +_mesa_regions_overlap(int srcX0, int srcY0, + int srcX1, int srcY1, + int dstX0, int dstY0, + int dstX1, int dstY1); + extern void _mesa_blit_framebuffer(struct gl_context *ctx, struct gl_framebuffer *readFb, diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c index 66dee680258..1cdea937f91 100644 --- a/src/mesa/main/bufferobj.c +++ b/src/mesa/main/bufferobj.c @@ -91,8 +91,9 @@ get_buffer_target(struct gl_context *ctx, GLenum target) case GL_COPY_WRITE_BUFFER: return &ctx->CopyWriteBuffer; case GL_DRAW_INDIRECT_BUFFER: - if (ctx->API == API_OPENGL_CORE && - ctx->Extensions.ARB_draw_indirect) { + if ((ctx->API == API_OPENGL_CORE && + ctx->Extensions.ARB_draw_indirect) || + _mesa_is_gles31(ctx)) { return &ctx->DrawIndirectBuffer; } break; @@ -112,6 +113,11 @@ get_buffer_target(struct gl_context *ctx, GLenum target) return &ctx->UniformBuffer; } break; + case GL_SHADER_STORAGE_BUFFER: + if (ctx->Extensions.ARB_shader_storage_buffer_object) { + return &ctx->ShaderStorageBuffer; + } + break; case GL_ATOMIC_COUNTER_BUFFER: if (ctx->Extensions.ARB_shader_atomic_counters) { return &ctx->AtomicBuffer; @@ -831,6 +837,9 @@ _mesa_init_buffer_objects( struct gl_context *ctx ) _mesa_reference_buffer_object(ctx, &ctx->UniformBuffer, ctx->Shared->NullBufferObj); + _mesa_reference_buffer_object(ctx, &ctx->ShaderStorageBuffer, + ctx->Shared->NullBufferObj); + _mesa_reference_buffer_object(ctx, &ctx->AtomicBuffer, ctx->Shared->NullBufferObj); @@ -845,6 +854,14 @@ _mesa_init_buffer_objects( struct gl_context *ctx ) ctx->UniformBufferBindings[i].Size = -1; } + for (i = 0; i < MAX_COMBINED_SHADER_STORAGE_BUFFERS; i++) { + _mesa_reference_buffer_object(ctx, + &ctx->ShaderStorageBufferBindings[i].BufferObject, + ctx->Shared->NullBufferObj); + ctx->ShaderStorageBufferBindings[i].Offset = -1; + ctx->ShaderStorageBufferBindings[i].Size = -1; + } + for (i = 0; i < MAX_COMBINED_ATOMIC_BUFFERS; i++) { _mesa_reference_buffer_object(ctx, &ctx->AtomicBufferBindings[i].BufferObject, @@ -867,6 +884,8 @@ _mesa_free_buffer_objects( struct gl_context *ctx ) _mesa_reference_buffer_object(ctx, &ctx->UniformBuffer, NULL); + _mesa_reference_buffer_object(ctx, &ctx->ShaderStorageBuffer, NULL); + _mesa_reference_buffer_object(ctx, &ctx->AtomicBuffer, NULL); _mesa_reference_buffer_object(ctx, &ctx->DrawIndirectBuffer, NULL); @@ -877,6 +896,12 @@ _mesa_free_buffer_objects( struct gl_context *ctx ) NULL); } + for (i = 0; i < MAX_COMBINED_SHADER_STORAGE_BUFFERS; i++) { + _mesa_reference_buffer_object(ctx, + &ctx->ShaderStorageBufferBindings[i].BufferObject, + NULL); + } + for (i = 0; i < MAX_COMBINED_ATOMIC_BUFFERS; i++) { _mesa_reference_buffer_object(ctx, &ctx->AtomicBufferBindings[i].BufferObject, @@ -1158,7 +1183,7 @@ _mesa_BindBuffer(GLenum target, GLuint buffer) if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glBindBuffer(%s, %u)\n", - _mesa_lookup_enum_by_nr(target), buffer); + _mesa_enum_to_string(target), buffer); bind_buffer_object(ctx, target, buffer); } @@ -1240,6 +1265,17 @@ _mesa_DeleteBuffers(GLsizei n, const GLuint *ids) _mesa_BindBuffer( GL_UNIFORM_BUFFER, 0 ); } + /* unbind SSBO binding points */ + for (j = 0; j < ctx->Const.MaxShaderStorageBufferBindings; j++) { + if (ctx->ShaderStorageBufferBindings[j].BufferObject == bufObj) { + _mesa_BindBufferBase(GL_SHADER_STORAGE_BUFFER, j, 0); + } + } + + if (ctx->ShaderStorageBuffer == bufObj) { + _mesa_BindBuffer(GL_SHADER_STORAGE_BUFFER, 0); + } + /* unbind Atomci Buffer binding points */ for (j = 0; j < ctx->Const.MaxAtomicBufferBindings; j++) { if (ctx->AtomicBufferBindings[j].BufferObject == bufObj) { @@ -1500,9 +1536,9 @@ _mesa_buffer_data(struct gl_context *ctx, struct gl_buffer_object *bufObj, if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "%s(%s, %ld, %p, %s)\n", func, - _mesa_lookup_enum_by_nr(target), + _mesa_enum_to_string(target), (long int) size, data, - _mesa_lookup_enum_by_nr(usage)); + _mesa_enum_to_string(usage)); if (size < 0) { _mesa_error(ctx, GL_INVALID_VALUE, "%s(size < 0)", func); @@ -1535,7 +1571,7 @@ _mesa_buffer_data(struct gl_context *ctx, struct gl_buffer_object *bufObj, if (!valid_usage) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid usage: %s)", func, - _mesa_lookup_enum_by_nr(usage)); + _mesa_enum_to_string(usage)); return; } @@ -1990,7 +2026,7 @@ get_buffer_parameter(struct gl_context *ctx, invalid_pname: _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid pname: %s)", func, - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); return false; } @@ -2337,7 +2373,7 @@ _mesa_map_buffer_range(struct gl_context *ctx, if (offset + length > bufObj->Size) { _mesa_error(ctx, GL_INVALID_VALUE, - "%s(offset %ld + length %ld > buffer_size %ld)", func, + "%s(offset %td + length %td > buffer_size %td)", func, offset, length, bufObj->Size); return NULL; } @@ -2999,6 +3035,33 @@ set_ubo_binding(struct gl_context *ctx, } /** + * Binds a buffer object to a shader storage buffer binding point. + * + * The caller is responsible for flushing vertices and updating + * NewDriverState. + */ +static void +set_ssbo_binding(struct gl_context *ctx, + struct gl_shader_storage_buffer_binding *binding, + struct gl_buffer_object *bufObj, + GLintptr offset, + GLsizeiptr size, + GLboolean autoSize) +{ + _mesa_reference_buffer_object(ctx, &binding->BufferObject, bufObj); + + binding->Offset = offset; + binding->Size = size; + binding->AutomaticSize = autoSize; + + /* If this is a real buffer object, mark it has having been used + * at some point as a SSBO. + */ + if (size >= 0) + bufObj->UsageHistory |= USAGE_SHADER_STORAGE_BUFFER; +} + +/** * Binds a buffer object to a uniform buffer binding point. * * Unlike set_ubo_binding(), this function also flushes vertices @@ -3030,6 +3093,37 @@ bind_uniform_buffer(struct gl_context *ctx, } /** + * Binds a buffer object to a shader storage buffer binding point. + * + * Unlike set_ssbo_binding(), this function also flushes vertices + * and updates NewDriverState. It also checks if the binding + * has actually changed before updating it. + */ +static void +bind_shader_storage_buffer(struct gl_context *ctx, + GLuint index, + struct gl_buffer_object *bufObj, + GLintptr offset, + GLsizeiptr size, + GLboolean autoSize) +{ + struct gl_shader_storage_buffer_binding *binding = + &ctx->ShaderStorageBufferBindings[index]; + + if (binding->BufferObject == bufObj && + binding->Offset == offset && + binding->Size == size && + binding->AutomaticSize == autoSize) { + return; + } + + FLUSH_VERTICES(ctx, 0); + ctx->NewDriverState |= ctx->DriverFlags.NewShaderStorageBuffer; + + set_ssbo_binding(ctx, binding, bufObj, offset, size, autoSize); +} + +/** * Bind a region of a buffer object to a uniform block binding point. * \param index the uniform buffer binding point index * \param bufObj the buffer object @@ -3064,6 +3158,40 @@ bind_buffer_range_uniform_buffer(struct gl_context *ctx, bind_uniform_buffer(ctx, index, bufObj, offset, size, GL_FALSE); } +/** + * Bind a region of a buffer object to a shader storage block binding point. + * \param index the shader storage buffer binding point index + * \param bufObj the buffer object + * \param offset offset to the start of buffer object region + * \param size size of the buffer object region + */ +static void +bind_buffer_range_shader_storage_buffer(struct gl_context *ctx, + GLuint index, + struct gl_buffer_object *bufObj, + GLintptr offset, + GLsizeiptr size) +{ + if (index >= ctx->Const.MaxShaderStorageBufferBindings) { + _mesa_error(ctx, GL_INVALID_VALUE, "glBindBufferRange(index=%d)", index); + return; + } + + if (offset & (ctx->Const.ShaderStorageBufferOffsetAlignment - 1)) { + _mesa_error(ctx, GL_INVALID_VALUE, + "glBindBufferRange(offset misaligned %d/%d)", (int) offset, + ctx->Const.ShaderStorageBufferOffsetAlignment); + return; + } + + if (bufObj == ctx->Shared->NullBufferObj) { + offset = -1; + size = -1; + } + + _mesa_reference_buffer_object(ctx, &ctx->ShaderStorageBuffer, bufObj); + bind_shader_storage_buffer(ctx, index, bufObj, offset, size, GL_FALSE); +} /** * Bind a buffer object to a uniform block binding point. @@ -3088,6 +3216,28 @@ bind_buffer_base_uniform_buffer(struct gl_context *ctx, } /** + * Bind a buffer object to a shader storage block binding point. + * As above, but offset = 0. + */ +static void +bind_buffer_base_shader_storage_buffer(struct gl_context *ctx, + GLuint index, + struct gl_buffer_object *bufObj) +{ + if (index >= ctx->Const.MaxShaderStorageBufferBindings) { + _mesa_error(ctx, GL_INVALID_VALUE, "glBindBufferBase(index=%d)", index); + return; + } + + _mesa_reference_buffer_object(ctx, &ctx->ShaderStorageBuffer, bufObj); + + if (bufObj == ctx->Shared->NullBufferObj) + bind_shader_storage_buffer(ctx, index, bufObj, -1, -1, GL_TRUE); + else + bind_shader_storage_buffer(ctx, index, bufObj, 0, 0, GL_TRUE); +} + +/** * Binds a buffer object to an atomic buffer binding point. * * The caller is responsible for validating the offset, @@ -3219,6 +3369,35 @@ error_check_bind_uniform_buffers(struct gl_context *ctx, return true; } +static bool +error_check_bind_shader_storage_buffers(struct gl_context *ctx, + GLuint first, GLsizei count, + const char *caller) +{ + if (!ctx->Extensions.ARB_shader_storage_buffer_object) { + _mesa_error(ctx, GL_INVALID_ENUM, + "%s(target=GL_SHADER_STORAGE_BUFFER)", caller); + return false; + } + + /* The ARB_multi_bind_spec says: + * + * "An INVALID_OPERATION error is generated if <first> + <count> is + * greater than the number of target-specific indexed binding points, + * as described in section 6.7.1." + */ + if (first + count > ctx->Const.MaxShaderStorageBufferBindings) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(first=%u + count=%d > the value of " + "GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS=%u)", + caller, first, count, + ctx->Const.MaxShaderStorageBufferBindings); + return false; + } + + return true; +} + /** * Unbind all uniform buffers in the range * <first> through <first>+<count>-1 @@ -3234,6 +3413,22 @@ unbind_uniform_buffers(struct gl_context *ctx, GLuint first, GLsizei count) bufObj, -1, -1, GL_TRUE); } +/** + * Unbind all shader storage buffers in the range + * <first> through <first>+<count>-1 + */ +static void +unbind_shader_storage_buffers(struct gl_context *ctx, GLuint first, + GLsizei count) +{ + struct gl_buffer_object *bufObj = ctx->Shared->NullBufferObj; + GLint i; + + for (i = 0; i < count; i++) + set_ssbo_binding(ctx, &ctx->ShaderStorageBufferBindings[first + i], + bufObj, -1, -1, GL_TRUE); +} + static void bind_uniform_buffers_base(struct gl_context *ctx, GLuint first, GLsizei count, const GLuint *buffers) @@ -3301,6 +3496,73 @@ bind_uniform_buffers_base(struct gl_context *ctx, GLuint first, GLsizei count, } static void +bind_shader_storage_buffers_base(struct gl_context *ctx, GLuint first, + GLsizei count, const GLuint *buffers) +{ + GLint i; + + if (!error_check_bind_shader_storage_buffers(ctx, first, count, + "glBindBuffersBase")) + return; + + /* Assume that at least one binding will be changed */ + FLUSH_VERTICES(ctx, 0); + ctx->NewDriverState |= ctx->DriverFlags.NewShaderStorageBuffer; + + if (!buffers) { + /* The ARB_multi_bind spec says: + * + * "If <buffers> is NULL, all bindings from <first> through + * <first>+<count>-1 are reset to their unbound (zero) state." + */ + unbind_shader_storage_buffers(ctx, first, count); + return; + } + + /* Note that the error semantics for multi-bind commands differ from + * those of other GL commands. + * + * The Issues section in the ARB_multi_bind spec says: + * + * "(11) Typically, OpenGL specifies that if an error is generated by a + * command, that command has no effect. This is somewhat + * unfortunate for multi-bind commands, because it would require a + * first pass to scan the entire list of bound objects for errors + * and then a second pass to actually perform the bindings. + * Should we have different error semantics? + * + * RESOLVED: Yes. In this specification, when the parameters for + * one of the <count> binding points are invalid, that binding point + * is not updated and an error will be generated. However, other + * binding points in the same command will be updated if their + * parameters are valid and no other error occurs." + */ + + _mesa_begin_bufferobj_lookups(ctx); + + for (i = 0; i < count; i++) { + struct gl_shader_storage_buffer_binding *binding = + &ctx->ShaderStorageBufferBindings[first + i]; + struct gl_buffer_object *bufObj; + + if (binding->BufferObject && binding->BufferObject->Name == buffers[i]) + bufObj = binding->BufferObject; + else + bufObj = _mesa_multi_bind_lookup_bufferobj(ctx, buffers, i, + "glBindBuffersBase"); + + if (bufObj) { + if (bufObj == ctx->Shared->NullBufferObj) + set_ssbo_binding(ctx, binding, bufObj, -1, -1, GL_TRUE); + else + set_ssbo_binding(ctx, binding, bufObj, 0, 0, GL_TRUE); + } + } + + _mesa_end_bufferobj_lookups(ctx); +} + +static void bind_uniform_buffers_range(struct gl_context *ctx, GLuint first, GLsizei count, const GLuint *buffers, const GLintptr *offsets, const GLsizeiptr *sizes) @@ -3405,6 +3667,112 @@ bind_uniform_buffers_range(struct gl_context *ctx, GLuint first, GLsizei count, _mesa_end_bufferobj_lookups(ctx); } +static void +bind_shader_storage_buffers_range(struct gl_context *ctx, GLuint first, + GLsizei count, const GLuint *buffers, + const GLintptr *offsets, + const GLsizeiptr *sizes) +{ + GLint i; + + if (!error_check_bind_shader_storage_buffers(ctx, first, count, + "glBindBuffersRange")) + return; + + /* Assume that at least one binding will be changed */ + FLUSH_VERTICES(ctx, 0); + ctx->NewDriverState |= ctx->DriverFlags.NewShaderStorageBuffer; + + if (!buffers) { + /* The ARB_multi_bind spec says: + * + * "If <buffers> is NULL, all bindings from <first> through + * <first>+<count>-1 are reset to their unbound (zero) state. + * In this case, the offsets and sizes associated with the + * binding points are set to default values, ignoring + * <offsets> and <sizes>." + */ + unbind_shader_storage_buffers(ctx, first, count); + return; + } + + /* Note that the error semantics for multi-bind commands differ from + * those of other GL commands. + * + * The Issues section in the ARB_multi_bind spec says: + * + * "(11) Typically, OpenGL specifies that if an error is generated by a + * command, that command has no effect. This is somewhat + * unfortunate for multi-bind commands, because it would require a + * first pass to scan the entire list of bound objects for errors + * and then a second pass to actually perform the bindings. + * Should we have different error semantics? + * + * RESOLVED: Yes. In this specification, when the parameters for + * one of the <count> binding points are invalid, that binding point + * is not updated and an error will be generated. However, other + * binding points in the same command will be updated if their + * parameters are valid and no other error occurs." + */ + + _mesa_begin_bufferobj_lookups(ctx); + + for (i = 0; i < count; i++) { + struct gl_shader_storage_buffer_binding *binding = + &ctx->ShaderStorageBufferBindings[first + i]; + struct gl_buffer_object *bufObj; + + if (!bind_buffers_check_offset_and_size(ctx, i, offsets, sizes)) + continue; + + /* The ARB_multi_bind spec says: + * + * "An INVALID_VALUE error is generated by BindBuffersRange if any + * pair of values in <offsets> and <sizes> does not respectively + * satisfy the constraints described for those parameters for the + * specified target, as described in section 6.7.1 (per binding)." + * + * Section 6.7.1 refers to table 6.5, which says: + * + * "┌───────────────────────────────────────────────────────────────┐ + * │ Shader storage buffer array bindings (see sec. 7.8) │ + * ├─────────────────────┬─────────────────────────────────────────┤ + * │ ... │ ... │ + * │ offset restriction │ multiple of value of SHADER_STORAGE_- │ + * │ │ BUFFER_OFFSET_ALIGNMENT │ + * │ ... │ ... │ + * │ size restriction │ none │ + * └─────────────────────┴─────────────────────────────────────────┘" + */ + if (offsets[i] & (ctx->Const.ShaderStorageBufferOffsetAlignment - 1)) { + _mesa_error(ctx, GL_INVALID_VALUE, + "glBindBuffersRange(offsets[%u]=%" PRId64 + " is misaligned; it must be a multiple of the value of " + "GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT=%u when " + "target=GL_SHADER_STORAGE_BUFFER)", + i, (int64_t) offsets[i], + ctx->Const.ShaderStorageBufferOffsetAlignment); + continue; + } + + if (binding->BufferObject && binding->BufferObject->Name == buffers[i]) + bufObj = binding->BufferObject; + else + bufObj = _mesa_multi_bind_lookup_bufferobj(ctx, buffers, i, + "glBindBuffersRange"); + + if (bufObj) { + if (bufObj == ctx->Shared->NullBufferObj) + set_ssbo_binding(ctx, binding, bufObj, -1, -1, GL_FALSE); + else + set_ssbo_binding(ctx, binding, bufObj, + offsets[i], sizes[i], GL_FALSE); + } + } + + _mesa_end_bufferobj_lookups(ctx); +} + static bool error_check_bind_xfb_buffers(struct gl_context *ctx, struct gl_transform_feedback_object *tfObj, @@ -3894,6 +4262,9 @@ _mesa_BindBufferRange(GLenum target, GLuint index, case GL_UNIFORM_BUFFER: bind_buffer_range_uniform_buffer(ctx, index, bufObj, offset, size); return; + case GL_SHADER_STORAGE_BUFFER: + bind_buffer_range_shader_storage_buffer(ctx, index, bufObj, offset, size); + return; case GL_ATOMIC_COUNTER_BUFFER: bind_atomic_buffer(ctx, index, bufObj, offset, size, "glBindBufferRange"); @@ -3960,6 +4331,9 @@ _mesa_BindBufferBase(GLenum target, GLuint index, GLuint buffer) case GL_UNIFORM_BUFFER: bind_buffer_base_uniform_buffer(ctx, index, bufObj); return; + case GL_SHADER_STORAGE_BUFFER: + bind_buffer_base_shader_storage_buffer(ctx, index, bufObj); + return; case GL_ATOMIC_COUNTER_BUFFER: bind_atomic_buffer(ctx, index, bufObj, 0, 0, "glBindBufferBase"); @@ -3984,13 +4358,17 @@ _mesa_BindBuffersRange(GLenum target, GLuint first, GLsizei count, case GL_UNIFORM_BUFFER: bind_uniform_buffers_range(ctx, first, count, buffers, offsets, sizes); return; + case GL_SHADER_STORAGE_BUFFER: + bind_shader_storage_buffers_range(ctx, first, count, buffers, offsets, + sizes); + return; case GL_ATOMIC_COUNTER_BUFFER: bind_atomic_buffers_range(ctx, first, count, buffers, offsets, sizes); return; default: _mesa_error(ctx, GL_INVALID_ENUM, "glBindBuffersRange(target=%s)", - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); break; } } @@ -4008,12 +4386,15 @@ _mesa_BindBuffersBase(GLenum target, GLuint first, GLsizei count, case GL_UNIFORM_BUFFER: bind_uniform_buffers_base(ctx, first, count, buffers); return; + case GL_SHADER_STORAGE_BUFFER: + bind_shader_storage_buffers_base(ctx, first, count, buffers); + return; case GL_ATOMIC_COUNTER_BUFFER: bind_atomic_buffers_base(ctx, first, count, buffers); return; default: _mesa_error(ctx, GL_INVALID_ENUM, "glBindBuffersBase(target=%s)", - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); break; } } diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c index 0536266d756..93588a2ee18 100644 --- a/src/mesa/main/buffers.c +++ b/src/mesa/main/buffers.c @@ -251,7 +251,7 @@ _mesa_draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, FLUSH_VERTICES(ctx, 0); if (MESA_VERBOSE & VERBOSE_API) { - _mesa_debug(ctx, "%s %s\n", caller, _mesa_lookup_enum_by_nr(buffer)); + _mesa_debug(ctx, "%s %s\n", caller, _mesa_enum_to_string(buffer)); } if (buffer == GL_NONE) { @@ -264,14 +264,14 @@ _mesa_draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, if (destMask == BAD_MASK) { /* totally bogus buffer */ _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)", caller, - _mesa_lookup_enum_by_nr(buffer)); + _mesa_enum_to_string(buffer)); return; } destMask &= supportedMask; if (destMask == 0x0) { /* none of the named color buffers exist! */ _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid buffer %s)", - caller, _mesa_lookup_enum_by_nr(buffer)); + caller, _mesa_enum_to_string(buffer)); return; } } @@ -411,7 +411,7 @@ _mesa_draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb, */ if (destMask[output] == BAD_MASK) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)", - caller, _mesa_lookup_enum_by_nr(buffers[output])); + caller, _mesa_enum_to_string(buffers[output])); return; } @@ -427,7 +427,7 @@ _mesa_draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb, */ if (_mesa_bitcount(destMask[output]) > 1) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)", - caller, _mesa_lookup_enum_by_nr(buffers[output])); + caller, _mesa_enum_to_string(buffers[output])); return; } @@ -445,7 +445,7 @@ _mesa_draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb, if (destMask[output] == 0) { _mesa_error(ctx, GL_INVALID_OPERATION, "%s(unsupported buffer %s)", - caller, _mesa_lookup_enum_by_nr(buffers[output])); + caller, _mesa_enum_to_string(buffers[output])); return; } @@ -459,7 +459,7 @@ _mesa_draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb, buffers[output] != GL_COLOR_ATTACHMENT0 + output) { _mesa_error(ctx, GL_INVALID_OPERATION, "%s(unsupported buffer %s)", - caller, _mesa_lookup_enum_by_nr(buffers[output])); + caller, _mesa_enum_to_string(buffers[output])); return; } @@ -471,7 +471,7 @@ _mesa_draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb, if (destMask[output] & usedBufferMask) { _mesa_error(ctx, GL_INVALID_OPERATION, "%s(duplicated buffer %s)", - caller, _mesa_lookup_enum_by_nr(buffers[output])); + caller, _mesa_enum_to_string(buffers[output])); return; } @@ -700,7 +700,7 @@ _mesa_read_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, FLUSH_VERTICES(ctx, 0); if (MESA_VERBOSE & VERBOSE_API) - _mesa_debug(ctx, "%s %s\n", caller, _mesa_lookup_enum_by_nr(buffer)); + _mesa_debug(ctx, "%s %s\n", caller, _mesa_enum_to_string(buffer)); if (buffer == GL_NONE) { /* This is legal--it means that no buffer should be bound for reading. */ @@ -712,14 +712,14 @@ _mesa_read_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, if (srcBuffer == -1) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)", caller, - _mesa_lookup_enum_by_nr(buffer)); + _mesa_enum_to_string(buffer)); return; } supportedMask = supported_buffer_bitmask(ctx, fb); if (((1 << srcBuffer) & supportedMask) == 0) { _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid buffer %s)", caller, - _mesa_lookup_enum_by_nr(buffer)); + _mesa_enum_to_string(buffer)); return; } } diff --git a/src/mesa/main/clear.c b/src/mesa/main/clear.c index 426caea4709..3bfcc5c0e39 100644 --- a/src/mesa/main/clear.c +++ b/src/mesa/main/clear.c @@ -325,6 +325,18 @@ _mesa_ClearBufferiv(GLenum buffer, GLint drawbuffer, const GLint *value) _mesa_update_state( ctx ); } + /* Page 498 of the PDF, section '17.4.3.1 Clearing Individual Buffers' + * of the OpenGL 4.5 spec states: + * + * "An INVALID_ENUM error is generated by ClearBufferiv and + * ClearNamedFramebufferiv if buffer is not COLOR or STENCIL." + */ + if (buffer == GL_DEPTH || buffer == GL_DEPTH_STENCIL) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glClearBufferiv(buffer=GL_DEPTH || GL_DEPTH_STENCIL)"); + return; + } + switch (buffer) { case GL_STENCIL: /* Page 264 (page 280 of the PDF) of the OpenGL 3.0 spec says: @@ -395,7 +407,7 @@ _mesa_ClearBufferiv(GLenum buffer, GLint drawbuffer, const GLint *value) return; default: _mesa_error(ctx, GL_INVALID_ENUM, "glClearBufferiv(buffer=%s)", - _mesa_lookup_enum_by_nr(buffer)); + _mesa_enum_to_string(buffer)); return; } } @@ -485,7 +497,7 @@ _mesa_ClearBufferuiv(GLenum buffer, GLint drawbuffer, const GLuint *value) return; default: _mesa_error(ctx, GL_INVALID_ENUM, "glClearBufferuiv(buffer=%s)", - _mesa_lookup_enum_by_nr(buffer)); + _mesa_enum_to_string(buffer)); return; } } @@ -596,7 +608,7 @@ _mesa_ClearBufferfv(GLenum buffer, GLint drawbuffer, const GLfloat *value) return; default: _mesa_error(ctx, GL_INVALID_ENUM, "glClearBufferfv(buffer=%s)", - _mesa_lookup_enum_by_nr(buffer)); + _mesa_enum_to_string(buffer)); return; } } @@ -636,7 +648,7 @@ _mesa_ClearBufferfi(GLenum buffer, GLint drawbuffer, if (buffer != GL_DEPTH_STENCIL) { _mesa_error(ctx, GL_INVALID_ENUM, "glClearBufferfi(buffer=%s)", - _mesa_lookup_enum_by_nr(buffer)); + _mesa_enum_to_string(buffer)); return; } diff --git a/src/mesa/main/condrender.c b/src/mesa/main/condrender.c index 77e4b95ee8f..46c6036d2a5 100644 --- a/src/mesa/main/condrender.c +++ b/src/mesa/main/condrender.c @@ -87,7 +87,7 @@ _mesa_BeginConditionalRender(GLuint queryId, GLenum mode) /* fallthrough - invalid */ default: _mesa_error(ctx, GL_INVALID_ENUM, "glBeginConditionalRender(mode=%s)", - _mesa_lookup_enum_by_nr(mode)); + _mesa_enum_to_string(mode)); return; } @@ -184,7 +184,7 @@ _mesa_check_conditional_render(struct gl_context *ctx) default: _mesa_problem(ctx, "Bad cond render mode %s in " " _mesa_check_conditional_render()", - _mesa_lookup_enum_by_nr(ctx->Query.CondRenderMode)); + _mesa_enum_to_string(ctx->Query.CondRenderMode)); return GL_TRUE; } } diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h index 9c3baf4c6aa..b35031db3c9 100644 --- a/src/mesa/main/config.h +++ b/src/mesa/main/config.h @@ -171,8 +171,10 @@ #define MAX_PROGRAM_LOCAL_PARAMS 4096 #define MAX_UNIFORMS 4096 #define MAX_UNIFORM_BUFFERS 15 /* + 1 default uniform buffer */ +#define MAX_SHADER_STORAGE_BUFFERS 7 /* + 1 default shader storage buffer */ /* 6 is for vertex, hull, domain, geometry, fragment, and compute shader. */ #define MAX_COMBINED_UNIFORM_BUFFERS (MAX_UNIFORM_BUFFERS * 6) +#define MAX_COMBINED_SHADER_STORAGE_BUFFERS (MAX_SHADER_STORAGE_BUFFERS * 6) #define MAX_ATOMIC_COUNTERS 4096 /* 6 is for vertex, hull, domain, geometry, fragment, and compute shader. */ #define MAX_COMBINED_ATOMIC_BUFFERS (MAX_UNIFORM_BUFFERS * 6) @@ -272,6 +274,12 @@ #define MAX_VERTEX_STREAMS 4 /*@}*/ +/** For GL_ARB_shader_subroutine */ +/*@{*/ +#define MAX_SUBROUTINES 256 +#define MAX_SUBROUTINE_UNIFORM_LOCATIONS 1024 +/*@}*/ + /** For GL_INTEL_performance_query */ /*@{*/ #define MAX_PERFQUERY_QUERY_NAME_LENGTH 256 @@ -294,6 +302,14 @@ /** For GL_ARB_pipeline_statistics_query */ #define MAX_PIPELINE_STATISTICS 11 +/** For GL_ARB_tessellation_shader */ +/*@{*/ +#define MAX_TESS_GEN_LEVEL 64 +#define MAX_PATCH_VERTICES 32 +#define MAX_TESS_PATCH_COMPONENTS 120 +#define MAX_TESS_CONTROL_TOTAL_OUTPUT_COMPONENTS 4096 +/*@}*/ + /* * Color channel component order * diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index 79fa01849e0..888c461d1c2 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -120,6 +120,7 @@ #include "shaderobj.h" #include "shaderimage.h" #include "util/simple_list.h" +#include "util/strtod.h" #include "state.h" #include "stencil.h" #include "texcompress_s3tc.h" @@ -338,31 +339,6 @@ _mesa_destroy_visual( struct gl_config *vis ) /** - * This is lame. gdb only seems to recognize enum types that are - * actually used somewhere. We want to be able to print/use enum - * values such as TEXTURE_2D_INDEX in gdb. But we don't actually use - * the gl_texture_index type anywhere. Thus, this lame function. - */ -static void -dummy_enum_func(void) -{ - gl_buffer_index bi = BUFFER_FRONT_LEFT; - gl_face_index fi = FACE_POS_X; - gl_frag_result fr = FRAG_RESULT_DEPTH; - gl_texture_index ti = TEXTURE_2D_ARRAY_INDEX; - gl_vert_attrib va = VERT_ATTRIB_POS; - gl_varying_slot vs = VARYING_SLOT_POS; - - (void) bi; - (void) fi; - (void) fr; - (void) ti; - (void) va; - (void) vs; -} - - -/** * One-time initialization mutex lock. * * \sa Used by one_time_init(). @@ -370,6 +346,16 @@ dummy_enum_func(void) mtx_t OneTimeLock = _MTX_INITIALIZER_NP; +/** + * Calls all the various one-time-fini functions in Mesa + */ + +static void +one_time_fini(void) +{ + _mesa_destroy_shader_compiler(); + _mesa_locale_fini(); +} /** * Calls all the various one-time-init functions in Mesa. @@ -391,13 +377,14 @@ one_time_init( struct gl_context *ctx ) if (!api_init_mask) { GLuint i; - /* do some implementation tests */ - assert( sizeof(GLbyte) == 1 ); - assert( sizeof(GLubyte) == 1 ); - assert( sizeof(GLshort) == 2 ); - assert( sizeof(GLushort) == 2 ); - assert( sizeof(GLint) == 4 ); - assert( sizeof(GLuint) == 4 ); + STATIC_ASSERT(sizeof(GLbyte) == 1); + STATIC_ASSERT(sizeof(GLubyte) == 1); + STATIC_ASSERT(sizeof(GLshort) == 2); + STATIC_ASSERT(sizeof(GLushort) == 2); + STATIC_ASSERT(sizeof(GLint) == 4); + STATIC_ASSERT(sizeof(GLuint) == 4); + + _mesa_locale_init(); _mesa_one_time_init_extension_overrides(); @@ -407,6 +394,8 @@ one_time_init( struct gl_context *ctx ) _mesa_ubyte_to_float_color_tab[i] = (float) i / 255.0F; } + atexit(one_time_fini); + #if defined(DEBUG) && defined(__DATE__) && defined(__TIME__) if (MESA_VERBOSE != 0) { _mesa_debug(ctx, "Mesa %s DEBUG build %s %s\n", @@ -429,13 +418,6 @@ one_time_init( struct gl_context *ctx ) api_init_mask |= 1 << ctx->API; mtx_unlock(&OneTimeLock); - - /* Hopefully atexit() is widely available. If not, we may need some - * #ifdef tests here. - */ - atexit(_mesa_destroy_shader_compiler); - - dummy_enum_func(); } @@ -496,6 +478,8 @@ init_program_limits(struct gl_constants *consts, gl_shader_stage stage, prog->MaxInputComponents = 16 * 4; /* old limit not to break tnl and swrast */ prog->MaxOutputComponents = 0; /* value not used */ break; + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: case MESA_SHADER_GEOMETRY: prog->MaxParameters = MAX_VERTEX_PROGRAM_PARAMS; prog->MaxAttribs = MAX_VERTEX_GENERIC_ATTRIBS; @@ -554,6 +538,8 @@ init_program_limits(struct gl_constants *consts, gl_shader_stage stage, prog->MaxAtomicBuffers = 0; prog->MaxAtomicCounters = 0; + + prog->MaxShaderStorageBlocks = 8; } @@ -615,6 +601,12 @@ _mesa_init_constants(struct gl_constants *consts, gl_api api) consts->MaxUniformBlockSize = 16384; consts->UniformBufferOffsetAlignment = 1; + /** GL_ARB_shader_storage_buffer_object */ + consts->MaxCombinedShaderStorageBlocks = 8; + consts->MaxShaderStorageBufferBindings = 8; + consts->MaxShaderStorageBlockSize = 128 * 1024 * 1024; /* 2^27 */ + consts->ShaderStorageBufferOffsetAlignment = 256; + /* GL_ARB_explicit_uniform_location, GL_MAX_UNIFORM_LOCATIONS */ consts->MaxUserAssignableUniformLocations = 4 * MESA_SHADER_STAGES * MAX_UNIFORMS; @@ -724,6 +716,14 @@ _mesa_init_constants(struct gl_constants *consts, gl_api api) /** GL_KHR_context_flush_control */ consts->ContextReleaseBehavior = GL_CONTEXT_RELEASE_BEHAVIOR_FLUSH; + + /** GL_ARB_tessellation_shader */ + consts->MaxTessGenLevel = MAX_TESS_GEN_LEVEL; + consts->MaxPatchVertices = MAX_PATCH_VERTICES; + consts->Program[MESA_SHADER_TESS_CTRL].MaxTextureImageUnits = MAX_TEXTURE_IMAGE_UNITS; + consts->Program[MESA_SHADER_TESS_EVAL].MaxTextureImageUnits = MAX_TEXTURE_IMAGE_UNITS; + consts->MaxTessPatchComponents = MAX_TESS_PATCH_COMPONENTS; + consts->MaxTessControlTotalOutputComponents = MAX_TESS_CONTROL_TOTAL_OUTPUT_COMPONENTS; } @@ -1331,6 +1331,8 @@ _mesa_free_context_data( struct gl_context *ctx ) _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, NULL); _mesa_reference_vertprog(ctx, &ctx->VertexProgram._TnlProgram, NULL); + _mesa_reference_tesscprog(ctx, &ctx->TessCtrlProgram._Current, NULL); + _mesa_reference_tesseprog(ctx, &ctx->TessEvalProgram._Current, NULL); _mesa_reference_geomprog(ctx, &ctx->GeometryProgram._Current, NULL); _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, NULL); diff --git a/src/mesa/main/context.h b/src/mesa/main/context.h index 6f3c941016f..0f7529ad975 100644 --- a/src/mesa/main/context.h +++ b/src/mesa/main/context.h @@ -343,6 +343,26 @@ _mesa_has_compute_shaders(const struct gl_context *ctx) (ctx->API == API_OPENGLES2 && ctx->Version >= 31); } +/** + * Checks if the context supports shader subroutines. + */ +static inline bool +_mesa_has_shader_subroutine(const struct gl_context *ctx) +{ + return ctx->API == API_OPENGL_CORE && + (ctx->Version >= 40 || ctx->Extensions.ARB_shader_subroutine); +} + +/** + * Checks if the context supports tessellation. + */ +static inline GLboolean +_mesa_has_tessellation(const struct gl_context *ctx) +{ + return ctx->API == API_OPENGL_CORE && + ctx->Extensions.ARB_tessellation_shader; +} + #ifdef __cplusplus } diff --git a/src/mesa/main/copyimage.c b/src/mesa/main/copyimage.c index e8732c6175b..05bc50dd2c6 100644 --- a/src/mesa/main/copyimage.c +++ b/src/mesa/main/copyimage.c @@ -93,7 +93,7 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level, default: _mesa_error(ctx, GL_INVALID_ENUM, "glCopyImageSubData(%sTarget = %s)", dbg_prefix, - _mesa_lookup_enum_by_nr(*target)); + _mesa_enum_to_string(*target)); return false; } @@ -159,7 +159,7 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level, if ((*tex_obj)->Target != *target) { _mesa_error(ctx, GL_INVALID_ENUM, "glCopyImageSubData(%sTarget = %s)", dbg_prefix, - _mesa_lookup_enum_by_nr(*target)); + _mesa_enum_to_string(*target)); return false; } @@ -416,9 +416,9 @@ _mesa_CopyImageSubData(GLuint srcName, GLenum srcTarget, GLint srcLevel, _mesa_debug(ctx, "glCopyImageSubData(%u, %s, %d, %d, %d, %d, " "%u, %s, %d, %d, %d, %d, " "%d, %d, %d)\n", - srcName, _mesa_lookup_enum_by_nr(srcTarget), srcLevel, + srcName, _mesa_enum_to_string(srcTarget), srcLevel, srcX, srcY, srcZ, - dstName, _mesa_lookup_enum_by_nr(dstTarget), dstLevel, + dstName, _mesa_enum_to_string(dstTarget), dstLevel, dstX, dstY, dstZ, srcWidth, srcHeight, srcWidth); diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index d783e34222f..87eb63ea374 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -232,11 +232,13 @@ struct dd_function_table { /** - * Called by glGetTexImage(). + * Called by glGetTexImage(), glGetTextureSubImage(). */ - void (*GetTexImage)( struct gl_context *ctx, - GLenum format, GLenum type, GLvoid *pixels, - struct gl_texture_image *texImage ); + void (*GetTexSubImage)(struct gl_context *ctx, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, GLvoid *pixels, + struct gl_texture_image *texImage); /** * Called by glClearTex[Sub]Image @@ -326,16 +328,19 @@ struct dd_function_table { void (*CompressedTexSubImage)(struct gl_context *ctx, GLuint dims, struct gl_texture_image *texImage, GLint xoffset, GLint yoffset, GLint zoffset, - GLsizei width, GLint height, GLint depth, + GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const GLvoid *data); /** * Called by glGetCompressedTexImage. */ - void (*GetCompressedTexImage)(struct gl_context *ctx, - struct gl_texture_image *texImage, - GLvoid *data); + void (*GetCompressedTexSubImage)(struct gl_context *ctx, + struct gl_texture_image *texImage, + GLint xoffset, GLint yoffset, + GLint zoffset, GLsizei width, + GLsizei height, GLsizei depth, + GLvoid *data); /*@}*/ /** diff --git a/src/mesa/main/debug.c b/src/mesa/main/debug.c index c93e84a04d0..5ca7d5ce500 100644 --- a/src/mesa/main/debug.c +++ b/src/mesa/main/debug.c @@ -272,7 +272,9 @@ write_texture_image(struct gl_texture_object *texObj, store = ctx->Pack; /* save */ ctx->Pack = ctx->DefaultPacking; - ctx->Driver.GetTexImage(ctx, GL_RGBA, GL_UNSIGNED_BYTE, buffer, img); + ctx->Driver.GetTexSubImage(ctx, + 0, 0, 0, img->Width, img->Height, img->Depth, + GL_RGBA, GL_UNSIGNED_BYTE, buffer, img); /* make filename */ _mesa_snprintf(s, sizeof(s), "/tmp/tex%u.l%u.f%u.ppm", texObj->Name, level, face); @@ -411,7 +413,7 @@ dump_renderbuffer(const struct gl_renderbuffer *rb, GLboolean writeImage) { printf("Renderbuffer %u: %u x %u IntFormat = %s\n", rb->Name, rb->Width, rb->Height, - _mesa_lookup_enum_by_nr(rb->InternalFormat)); + _mesa_enum_to_string(rb->InternalFormat)); if (writeImage) { _mesa_write_renderbuffer_image(rb); } diff --git a/src/mesa/main/depth.c b/src/mesa/main/depth.c index bb4591cf152..c3534407599 100644 --- a/src/mesa/main/depth.c +++ b/src/mesa/main/depth.c @@ -63,7 +63,7 @@ _mesa_DepthFunc( GLenum func ) GET_CURRENT_CONTEXT(ctx); if (MESA_VERBOSE & VERBOSE_API) - _mesa_debug(ctx, "glDepthFunc %s\n", _mesa_lookup_enum_by_nr(func)); + _mesa_debug(ctx, "glDepthFunc %s\n", _mesa_enum_to_string(func)); if (ctx->Depth.Func == func) return; diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c index aafe486fb60..5554738d1a3 100644 --- a/src/mesa/main/dlist.c +++ b/src/mesa/main/dlist.c @@ -9000,7 +9000,7 @@ _mesa_NewList(GLuint name, GLenum mode) if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glNewList %u %s\n", name, - _mesa_lookup_enum_by_nr(mode)); + _mesa_enum_to_string(mode)); if (name == 0) { _mesa_error(ctx, GL_INVALID_VALUE, "glNewList"); @@ -9688,7 +9688,7 @@ _mesa_initialize_save_table(const struct gl_context *ctx) static const char * enum_string(GLenum k) { - return _mesa_lookup_enum_by_nr(k); + return _mesa_enum_to_string(k); } @@ -9827,19 +9827,19 @@ print_list(struct gl_context *ctx, GLuint list, const char *fname) break; case OPCODE_BIND_TEXTURE: fprintf(f, "BindTexture %s %d\n", - _mesa_lookup_enum_by_nr(n[1].ui), n[2].ui); + _mesa_enum_to_string(n[1].ui), n[2].ui); break; case OPCODE_SHADE_MODEL: - fprintf(f, "ShadeModel %s\n", _mesa_lookup_enum_by_nr(n[1].ui)); + fprintf(f, "ShadeModel %s\n", _mesa_enum_to_string(n[1].ui)); break; case OPCODE_MAP1: fprintf(f, "Map1 %s %.3f %.3f %d %d\n", - _mesa_lookup_enum_by_nr(n[1].ui), + _mesa_enum_to_string(n[1].ui), n[2].f, n[3].f, n[4].i, n[5].i); break; case OPCODE_MAP2: fprintf(f, "Map2 %s %.3f %.3f %.3f %.3f %d %d %d %d\n", - _mesa_lookup_enum_by_nr(n[1].ui), + _mesa_enum_to_string(n[1].ui), n[2].f, n[3].f, n[4].f, n[5].f, n[6].i, n[7].i, n[8].i, n[9].i); break; @@ -9918,7 +9918,7 @@ print_list(struct gl_context *ctx, GLuint list, const char *fname) case OPCODE_PROVOKING_VERTEX: fprintf(f, "ProvokingVertex %s\n", - _mesa_lookup_enum_by_nr(n[1].ui)); + _mesa_enum_to_string(n[1].ui)); break; /* diff --git a/src/mesa/main/drawpix.c b/src/mesa/main/drawpix.c index 55035f214b3..720a082ce6d 100644 --- a/src/mesa/main/drawpix.c +++ b/src/mesa/main/drawpix.c @@ -53,10 +53,10 @@ _mesa_DrawPixels( GLsizei width, GLsizei height, if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glDrawPixels(%d, %d, %s, %s, %p) // to %s at %d, %d\n", width, height, - _mesa_lookup_enum_by_nr(format), - _mesa_lookup_enum_by_nr(type), + _mesa_enum_to_string(format), + _mesa_enum_to_string(type), pixels, - _mesa_lookup_enum_by_nr(ctx->DrawBuffer->ColorDrawBuffer[0]), + _mesa_enum_to_string(ctx->DrawBuffer->ColorDrawBuffer[0]), IROUND(ctx->Current.RasterPos[0]), IROUND(ctx->Current.RasterPos[1])); @@ -96,8 +96,8 @@ _mesa_DrawPixels( GLsizei width, GLsizei height, err = _mesa_error_check_format_and_type(ctx, format, type); if (err != GL_NO_ERROR) { _mesa_error(ctx, err, "glDrawPixels(invalid format %s and/or type %s)", - _mesa_lookup_enum_by_nr(format), - _mesa_lookup_enum_by_nr(type)); + _mesa_enum_to_string(format), + _mesa_enum_to_string(type)); goto end; } @@ -198,9 +198,9 @@ _mesa_CopyPixels( GLint srcx, GLint srcy, GLsizei width, GLsizei height, _mesa_debug(ctx, "glCopyPixels(%d, %d, %d, %d, %s) // from %s to %s at %d, %d\n", srcx, srcy, width, height, - _mesa_lookup_enum_by_nr(type), - _mesa_lookup_enum_by_nr(ctx->ReadBuffer->ColorReadBuffer), - _mesa_lookup_enum_by_nr(ctx->DrawBuffer->ColorDrawBuffer[0]), + _mesa_enum_to_string(type), + _mesa_enum_to_string(ctx->ReadBuffer->ColorReadBuffer), + _mesa_enum_to_string(ctx->DrawBuffer->ColorDrawBuffer[0]), IROUND(ctx->Current.RasterPos[0]), IROUND(ctx->Current.RasterPos[1])); @@ -218,7 +218,7 @@ _mesa_CopyPixels( GLint srcx, GLint srcy, GLsizei width, GLsizei height, type != GL_STENCIL && type != GL_DEPTH_STENCIL) { _mesa_error(ctx, GL_INVALID_ENUM, "glCopyPixels(type=%s)", - _mesa_lookup_enum_by_nr(type)); + _mesa_enum_to_string(type)); return; } diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c index 9008a386343..42f67990784 100644 --- a/src/mesa/main/enable.c +++ b/src/mesa/main/enable.c @@ -146,7 +146,7 @@ client_state(struct gl_context *ctx, GLenum cap, GLboolean state) invalid_enum_error: _mesa_error(ctx, GL_INVALID_ENUM, "gl%sClientState(%s)", - state ? "Enable" : "Disable", _mesa_lookup_enum_by_nr(cap)); + state ? "Enable" : "Disable", _mesa_enum_to_string(cap)); } @@ -283,7 +283,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state) if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "%s %s (newstate is %x)\n", state ? "glEnable" : "glDisable", - _mesa_lookup_enum_by_nr(cap), + _mesa_enum_to_string(cap), ctx->NewState); switch (cap) { @@ -1001,7 +1001,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state) /* ARB_texture_multisample */ case GL_SAMPLE_MASK: - if (!_mesa_is_desktop_gl(ctx)) + if (!_mesa_is_desktop_gl(ctx) && !_mesa_is_gles31(ctx)) goto invalid_enum_error; CHECK_EXTENSION(ARB_texture_multisample, cap); if (ctx->Multisample.SampleMask == state) @@ -1022,7 +1022,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state) invalid_enum_error: _mesa_error(ctx, GL_INVALID_ENUM, "gl%s(%s)", - state ? "Enable" : "Disable", _mesa_lookup_enum_by_nr(cap)); + state ? "Enable" : "Disable", _mesa_enum_to_string(cap)); } @@ -1101,7 +1101,7 @@ _mesa_set_enablei(struct gl_context *ctx, GLenum cap, invalid_enum_error: _mesa_error(ctx, GL_INVALID_ENUM, "%s(cap=%s)", state ? "glEnablei" : "glDisablei", - _mesa_lookup_enum_by_nr(cap)); + _mesa_enum_to_string(cap)); } @@ -1143,7 +1143,7 @@ _mesa_IsEnabledi( GLenum cap, GLuint index ) return (ctx->Scissor.EnableFlags >> index) & 1; default: _mesa_error(ctx, GL_INVALID_ENUM, "glIsEnabledIndexed(cap=%s)", - _mesa_lookup_enum_by_nr(cap)); + _mesa_enum_to_string(cap)); return GL_FALSE; } } @@ -1603,7 +1603,7 @@ _mesa_IsEnabled( GLenum cap ) /* ARB_texture_multisample */ case GL_SAMPLE_MASK: - if (!_mesa_is_desktop_gl(ctx)) + if (!_mesa_is_desktop_gl(ctx) && !_mesa_is_gles31(ctx)) goto invalid_enum_error; CHECK_EXTENSION(ARB_texture_multisample); return ctx->Multisample.SampleMask; @@ -1623,6 +1623,6 @@ _mesa_IsEnabled( GLenum cap ) invalid_enum_error: _mesa_error(ctx, GL_INVALID_ENUM, "glIsEnabled(%s)", - _mesa_lookup_enum_by_nr(cap)); + _mesa_enum_to_string(cap)); return GL_FALSE; } diff --git a/src/mesa/main/enums.h b/src/mesa/main/enums.h index 66bdd53bbab..0e18cd407e9 100644 --- a/src/mesa/main/enums.h +++ b/src/mesa/main/enums.h @@ -42,7 +42,7 @@ extern "C" { #endif -extern const char *_mesa_lookup_enum_by_nr( int nr ); +extern const char *_mesa_enum_to_string( int nr ); /* Get the name of an enum given that it is a primitive type. Avoids * GL_FALSE/GL_POINTS ambiguity and others. diff --git a/src/mesa/main/errors.c b/src/mesa/main/errors.c index b3406665d94..f720de316e4 100644 --- a/src/mesa/main/errors.c +++ b/src/mesa/main/errors.c @@ -1314,7 +1314,7 @@ flush_delayed_errors( struct gl_context *ctx ) if (ctx->ErrorDebugCount) { _mesa_snprintf(s, MAX_DEBUG_MESSAGE_LENGTH, "%d similar %s errors", ctx->ErrorDebugCount, - _mesa_lookup_enum_by_nr(ctx->ErrorValue)); + _mesa_enum_to_string(ctx->ErrorValue)); output_if_debug("Mesa", s, GL_TRUE); @@ -1503,7 +1503,7 @@ _mesa_error( struct gl_context *ctx, GLenum error, const char *fmtString, ... ) } len = _mesa_snprintf(s2, MAX_DEBUG_MESSAGE_LENGTH, "%s in %s", - _mesa_lookup_enum_by_nr(error), s); + _mesa_enum_to_string(error), s); if (len >= MAX_DEBUG_MESSAGE_LENGTH) { /* Same as above. */ assert(0); diff --git a/src/mesa/main/errors.h b/src/mesa/main/errors.h index 24f234f7f10..81e47a8b8c1 100644 --- a/src/mesa/main/errors.h +++ b/src/mesa/main/errors.h @@ -37,6 +37,7 @@ #include <stdio.h> +#include <stdarg.h> #include "compiler.h" #include "glheader.h" #include "mtypes.h" diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index 4176a69ed7c..d934d19c3e7 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -121,6 +121,7 @@ static const struct extension extension_table[] = { { "GL_ARB_framebuffer_object", o(ARB_framebuffer_object), GL, 2005 }, { "GL_ARB_framebuffer_sRGB", o(EXT_framebuffer_sRGB), GL, 1998 }, { "GL_ARB_get_program_binary", o(dummy_true), GL, 2010 }, + { "GL_ARB_get_texture_sub_image", o(dummy_true), GL, 2014 }, { "GL_ARB_gpu_shader5", o(ARB_gpu_shader5), GLC, 2010 }, { "GL_ARB_gpu_shader_fp64", o(ARB_gpu_shader_fp64), GLC, 2010 }, { "GL_ARB_half_float_pixel", o(dummy_true), GL, 2003 }, @@ -154,6 +155,8 @@ static const struct extension extension_table[] = { { "GL_ARB_shader_objects", o(dummy_true), GL, 2002 }, { "GL_ARB_shader_precision", o(ARB_shader_precision), GL, 2010 }, { "GL_ARB_shader_stencil_export", o(ARB_shader_stencil_export), GL, 2009 }, + { "GL_ARB_shader_storage_buffer_object", o(ARB_shader_storage_buffer_object), GL, 2012 }, + { "GL_ARB_shader_subroutine", o(ARB_shader_subroutine), GLC, 2010 }, { "GL_ARB_shader_texture_lod", o(ARB_shader_texture_lod), GL, 2009 }, { "GL_ARB_shading_language_100", o(dummy_true), GLL, 2003 }, { "GL_ARB_shading_language_packing", o(ARB_shading_language_packing), GL, 2011 }, @@ -382,6 +385,9 @@ static const struct extension extension_table[] = { { "GL_NV_point_sprite", o(NV_point_sprite), GL, 2001 }, { "GL_NV_primitive_restart", o(NV_primitive_restart), GLL, 2002 }, { "GL_NV_read_buffer", o(dummy_true), ES2, 2011 }, + { "GL_NV_read_depth", o(dummy_true), ES2, 2011 }, + { "GL_NV_read_depth_stencil", o(dummy_true), ES2, 2011 }, + { "GL_NV_read_stencil", o(dummy_true), ES2, 2011 }, { "GL_NV_texgen_reflection", o(dummy_true), GLL, 1999 }, { "GL_NV_texture_barrier", o(NV_texture_barrier), GL, 2009 }, { "GL_NV_texture_env_combine4", o(NV_texture_env_combine4), GLL, 1999 }, diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index f8dcf122d99..841834030df 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -2007,7 +2007,7 @@ renderbuffer_storage(struct gl_context *ctx, struct gl_renderbuffer *rb, baseFormat = _mesa_base_fbo_format(ctx, internalFormat); if (baseFormat == 0) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(internalFormat=%s)", - func, _mesa_lookup_enum_by_nr(internalFormat)); + func, _mesa_enum_to_string(internalFormat)); return; } @@ -2095,12 +2095,12 @@ renderbuffer_storage_named(GLuint renderbuffer, GLenum internalFormat, if (samples == NO_SAMPLES) _mesa_debug(ctx, "%s(%u, %s, %d, %d)\n", func, renderbuffer, - _mesa_lookup_enum_by_nr(internalFormat), + _mesa_enum_to_string(internalFormat), width, height); else _mesa_debug(ctx, "%s(%u, %s, %d, %d, %d)\n", func, renderbuffer, - _mesa_lookup_enum_by_nr(internalFormat), + _mesa_enum_to_string(internalFormat), width, height, samples); } @@ -2131,14 +2131,14 @@ renderbuffer_storage_target(GLenum target, GLenum internalFormat, if (samples == NO_SAMPLES) _mesa_debug(ctx, "%s(%s, %s, %d, %d)\n", func, - _mesa_lookup_enum_by_nr(target), - _mesa_lookup_enum_by_nr(internalFormat), + _mesa_enum_to_string(target), + _mesa_enum_to_string(internalFormat), width, height); else _mesa_debug(ctx, "%s(%s, %s, %d, %d, %d)\n", func, - _mesa_lookup_enum_by_nr(target), - _mesa_lookup_enum_by_nr(internalFormat), + _mesa_enum_to_string(target), + _mesa_enum_to_string(internalFormat), width, height, samples); } @@ -2311,7 +2311,7 @@ get_render_buffer_parameteriv(struct gl_context *ctx, /* fallthrough */ default: _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid pname=%s)", func, - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); return; } } @@ -2694,13 +2694,13 @@ _mesa_CheckFramebufferStatus(GLenum target) if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glCheckFramebufferStatus(%s)\n", - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); fb = get_framebuffer_target(ctx, target); if (!fb) { _mesa_error(ctx, GL_INVALID_ENUM, "glCheckFramebufferStatus(invalid target %s)", - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); return 0; } @@ -2732,7 +2732,7 @@ _mesa_CheckNamedFramebufferStatus(GLuint framebuffer, GLenum target) default: _mesa_error(ctx, GL_INVALID_ENUM, "glCheckNamedFramebufferStatus(invalid target %s)", - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); return 0; } @@ -2851,7 +2851,7 @@ check_layered_texture_target(struct gl_context *ctx, GLenum target, _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid texture target %s)", caller, - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); return false; } @@ -2893,7 +2893,7 @@ check_texture_target(struct gl_context *ctx, GLenum target, _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid texture target %s)", caller, - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); return false; } @@ -2944,8 +2944,9 @@ check_textarget(struct gl_context *ctx, int dims, GLenum target, break; case GL_TEXTURE_2D_MULTISAMPLE: case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: - err = _mesa_is_gles(ctx) - || !ctx->Extensions.ARB_texture_multisample; + err = (_mesa_is_gles(ctx) || + !ctx->Extensions.ARB_texture_multisample) && + !_mesa_is_gles31(ctx); break; default: err = true; @@ -2962,7 +2963,7 @@ check_textarget(struct gl_context *ctx, int dims, GLenum target, if (err) { _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid textarget %s)", - caller, _mesa_lookup_enum_by_nr(textarget)); + caller, _mesa_enum_to_string(textarget)); return false; } @@ -3074,7 +3075,7 @@ _mesa_framebuffer_texture(struct gl_context *ctx, struct gl_framebuffer *fb, att = get_attachment(ctx, fb, attachment); if (att == NULL) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid attachment %s)", caller, - _mesa_lookup_enum_by_nr(attachment)); + _mesa_enum_to_string(attachment)); return; } @@ -3157,7 +3158,7 @@ framebuffer_texture_with_dims(int dims, GLenum target, fb = get_framebuffer_target(ctx, target); if (!fb) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid target %s)", caller, - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); return; } @@ -3225,7 +3226,7 @@ _mesa_FramebufferTextureLayer(GLenum target, GLenum attachment, if (!fb) { _mesa_error(ctx, GL_INVALID_ENUM, "glFramebufferTextureLayer(invalid target %s)", - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); return; } @@ -3304,7 +3305,7 @@ _mesa_FramebufferTexture(GLenum target, GLenum attachment, GET_CURRENT_CONTEXT(ctx); struct gl_framebuffer *fb; struct gl_texture_object *texObj; - GLboolean layered; + GLboolean layered = GL_FALSE; const char *func = "FramebufferTexture"; @@ -3319,7 +3320,7 @@ _mesa_FramebufferTexture(GLenum target, GLenum attachment, if (!fb) { _mesa_error(ctx, GL_INVALID_ENUM, "glFramebufferTexture(invalid target %s)", - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); return; } @@ -3347,7 +3348,7 @@ _mesa_NamedFramebufferTexture(GLuint framebuffer, GLenum attachment, GET_CURRENT_CONTEXT(ctx); struct gl_framebuffer *fb; struct gl_texture_object *texObj; - GLboolean layered; + GLboolean layered = GL_FALSE; const char *func = "glNamedFramebufferTexture"; @@ -3400,7 +3401,7 @@ _mesa_framebuffer_renderbuffer(struct gl_context *ctx, if (att == NULL) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid attachment %s)", func, - _mesa_lookup_enum_by_nr(attachment)); + _mesa_enum_to_string(attachment)); return; } @@ -3440,7 +3441,7 @@ _mesa_FramebufferRenderbuffer(GLenum target, GLenum attachment, if (!fb) { _mesa_error(ctx, GL_INVALID_ENUM, "glFramebufferRenderbuffer(invalid target %s)", - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); return; } @@ -3539,7 +3540,7 @@ _mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx, attachment != GL_DEPTH && attachment != GL_STENCIL) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid attachment %s)", caller, - _mesa_lookup_enum_by_nr(attachment)); + _mesa_enum_to_string(attachment)); return; } /* the default / window-system FBO */ @@ -3552,7 +3553,7 @@ _mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx, if (att == NULL) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid attachment %s)", caller, - _mesa_lookup_enum_by_nr(attachment)); + _mesa_enum_to_string(attachment)); return; } @@ -3609,7 +3610,7 @@ _mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx, } else if (att->Type == GL_NONE) { _mesa_error(ctx, err, "%s(invalid pname %s)", caller, - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); } else { goto invalid_pname_enum; @@ -3626,7 +3627,7 @@ _mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx, } else if (att->Type == GL_NONE) { _mesa_error(ctx, err, "%s(invalid pname %s)", caller, - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); } else { goto invalid_pname_enum; @@ -3637,7 +3638,7 @@ _mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx, goto invalid_pname_enum; } else if (att->Type == GL_NONE) { _mesa_error(ctx, err, "%s(invalid pname %s)", caller, - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); } else if (att->Type == GL_TEXTURE) { if (att->Texture && (att->Texture->Target == GL_TEXTURE_3D || att->Texture->Target == GL_TEXTURE_2D_ARRAY)) { @@ -3659,7 +3660,7 @@ _mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx, } else if (att->Type == GL_NONE) { _mesa_error(ctx, err, "%s(invalid pname %s)", caller, - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); } else { if (ctx->Extensions.EXT_framebuffer_sRGB) { @@ -3682,7 +3683,7 @@ _mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx, } else if (att->Type == GL_NONE) { _mesa_error(ctx, err, "%s(invalid pname %s)", caller, - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); } else { mesa_format format = att->Renderbuffer->Format; @@ -3734,7 +3735,7 @@ _mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx, } else if (att->Type == GL_NONE) { _mesa_error(ctx, err, "%s(invalid pname %s)", caller, - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); } else if (att->Texture) { const struct gl_texture_image *texImage = @@ -3763,7 +3764,7 @@ _mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx, *params = att->Layered; } else if (att->Type == GL_NONE) { _mesa_error(ctx, err, "%s(invalid pname %s)", caller, - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); } else { goto invalid_pname_enum; } @@ -3776,7 +3777,7 @@ _mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx, invalid_pname_enum: _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid pname %s)", caller, - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); return; } @@ -3792,7 +3793,7 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, if (!buffer) { _mesa_error(ctx, GL_INVALID_ENUM, "glGetFramebufferAttachmentParameteriv(invalid target %s)", - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); return; } @@ -4009,7 +4010,7 @@ invalidate_framebuffer_storage(struct gl_context *ctx, invalid_enum: _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid attachment %s)", name, - _mesa_lookup_enum_by_nr(attachments[i])); + _mesa_enum_to_string(attachments[i])); return; } @@ -4026,7 +4027,7 @@ _mesa_InvalidateSubFramebuffer(GLenum target, GLsizei numAttachments, if (!fb) { _mesa_error(ctx, GL_INVALID_ENUM, "glInvalidateSubFramebuffer(invalid target %s)", - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); return; } @@ -4076,7 +4077,7 @@ _mesa_InvalidateFramebuffer(GLenum target, GLsizei numAttachments, if (!fb) { _mesa_error(ctx, GL_INVALID_ENUM, "glInvalidateFramebuffer(invalid target %s)", - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); return; } @@ -4152,7 +4153,7 @@ _mesa_DiscardFramebufferEXT(GLenum target, GLsizei numAttachments, if (!fb) { _mesa_error(ctx, GL_INVALID_ENUM, "glDiscardFramebufferEXT(target %s)", - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); return; } @@ -4189,5 +4190,5 @@ _mesa_DiscardFramebufferEXT(GLenum target, GLsizei numAttachments, invalid_enum: _mesa_error(ctx, GL_INVALID_ENUM, "glDiscardFramebufferEXT(attachment %s)", - _mesa_lookup_enum_by_nr(attachments[i])); + _mesa_enum_to_string(attachments[i])); } diff --git a/src/mesa/main/feedback.c b/src/mesa/main/feedback.c index 6bc4294f9c7..699e2a855a3 100644 --- a/src/mesa/main/feedback.c +++ b/src/mesa/main/feedback.c @@ -415,7 +415,7 @@ _mesa_RenderMode( GLenum mode ) ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, 0); if (MESA_VERBOSE & VERBOSE_API) - _mesa_debug(ctx, "glRenderMode %s\n", _mesa_lookup_enum_by_nr(mode)); + _mesa_debug(ctx, "glRenderMode %s\n", _mesa_enum_to_string(mode)); FLUSH_VERTICES(ctx, _NEW_RENDERMODE); diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c index 70adaf88551..95b428dca3e 100644 --- a/src/mesa/main/ffvertex_prog.c +++ b/src/mesa/main/ffvertex_prog.c @@ -189,15 +189,15 @@ static void make_state_key( struct gl_context *ctx, struct state_key *key ) if (light->Enabled) { key->unit[i].light_enabled = 1; - if (light->EyePosition[3] == 0.0) + if (light->EyePosition[3] == 0.0F) key->unit[i].light_eyepos3_is_zero = 1; - if (light->SpotCutoff == 180.0) + if (light->SpotCutoff == 180.0F) key->unit[i].light_spotcutoff_is_180 = 1; - if (light->ConstantAttenuation != 1.0 || - light->LinearAttenuation != 0.0 || - light->QuadraticAttenuation != 0.0) + if (light->ConstantAttenuation != 1.0F || + light->LinearAttenuation != 0.0F || + light->QuadraticAttenuation != 0.0F) key->unit[i].light_attenuated = 1; } } diff --git a/src/mesa/main/fog.c b/src/mesa/main/fog.c index 3bce289e785..45f343d61c8 100644 --- a/src/mesa/main/fog.c +++ b/src/mesa/main/fog.c @@ -115,7 +115,7 @@ _mesa_Fogfv( GLenum pname, const GLfloat *params ) ctx->Fog.Mode = m; break; case GL_FOG_DENSITY: - if (*params<0.0) { + if (*params<0.0F) { _mesa_error( ctx, GL_INVALID_VALUE, "glFog" ); return; } diff --git a/src/mesa/main/format_parser.py b/src/mesa/main/format_parser.py index 11184f78e2c..799b14f0b1c 100755 --- a/src/mesa/main/format_parser.py +++ b/src/mesa/main/format_parser.py @@ -40,9 +40,6 @@ SRGB = 'srgb' YUV = 'yuv' ZS = 'zs' -def is_power_of_two(x): - return not bool(x & (x - 1)) - VERY_LARGE = 99999999999999999999999 class Channel: @@ -100,10 +97,6 @@ class Channel: else: return 1 - def is_power_of_two(self): - """Returns true if the size of this channel is a power of two.""" - return is_power_of_two(self.size) - def datatype(self): """Returns the datatype corresponding to a channel type and size""" return _get_datatype(self.type, self.size) diff --git a/src/mesa/main/format_utils.h b/src/mesa/main/format_utils.h index 7f500ec78da..618f43d0aaa 100644 --- a/src/mesa/main/format_utils.h +++ b/src/mesa/main/format_utils.h @@ -33,6 +33,7 @@ #include "imports.h" #include "macros.h" +#include "util/rounding.h" extern const mesa_array_format RGBA32_FLOAT; extern const mesa_array_format RGBA8_UBYTE; @@ -84,7 +85,7 @@ _mesa_float_to_unorm(float x, unsigned dst_bits) else if (x > 1.0f) return MAX_UINT(dst_bits); else - return F_TO_I(x * MAX_UINT(dst_bits)); + return _mesa_lroundevenf(x * MAX_UINT(dst_bits)); } static inline unsigned @@ -98,7 +99,7 @@ _mesa_unorm_to_unorm(unsigned x, unsigned src_bits, unsigned dst_bits) { if (src_bits < dst_bits) { return EXTEND_NORMALIZED_INT(x, src_bits, dst_bits); - } else { + } else if (src_bits > dst_bits) { unsigned src_half = (1 << (src_bits - 1)) - 1; if (src_bits + dst_bits > sizeof(x) * 8) { @@ -108,6 +109,8 @@ _mesa_unorm_to_unorm(unsigned x, unsigned src_bits, unsigned dst_bits) } else { return (x * MAX_UINT(dst_bits) + src_half) / MAX_UINT(src_bits); } + } else { + return x; } } @@ -128,7 +131,7 @@ _mesa_float_to_snorm(float x, unsigned dst_bits) else if (x > 1.0f) return MAX_INT(dst_bits); else - return F_TO_I(x * MAX_INT(dst_bits)); + return _mesa_lroundevenf(x * MAX_INT(dst_bits)); } static inline int diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c index 7741cabada1..85f7b6b5664 100644 --- a/src/mesa/main/formatquery.c +++ b/src/mesa/main/formatquery.c @@ -74,13 +74,15 @@ _mesa_GetInternalformativ(GLenum target, GLenum internalformat, GLenum pname, case GL_TEXTURE_2D_MULTISAMPLE: case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: /* These enums are only valid if ARB_texture_multisample is supported */ - if (_mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample) + if ((_mesa_is_desktop_gl(ctx) && + ctx->Extensions.ARB_texture_multisample) || + _mesa_is_gles31(ctx)) break; default: _mesa_error(ctx, GL_INVALID_ENUM, "glGetInternalformativ(target=%s)", - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); return; } @@ -107,7 +109,7 @@ _mesa_GetInternalformativ(GLenum target, GLenum internalformat, GLenum pname, _mesa_base_fbo_format(ctx, internalformat) == 0) { _mesa_error(ctx, GL_INVALID_ENUM, "glGetInternalformativ(internalformat=%s)", - _mesa_lookup_enum_by_nr(internalformat)); + _mesa_enum_to_string(internalformat)); return; } @@ -119,7 +121,7 @@ _mesa_GetInternalformativ(GLenum target, GLenum internalformat, GLenum pname, if (bufSize < 0) { _mesa_error(ctx, GL_INVALID_VALUE, "glGetInternalformativ(target=%s)", - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); return; } @@ -168,7 +170,7 @@ _mesa_GetInternalformativ(GLenum target, GLenum internalformat, GLenum pname, default: _mesa_error(ctx, GL_INVALID_ENUM, "glGetInternalformativ(pname=%s)", - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); return; } diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c index baeb1bfe5de..d7b2bae59e7 100644 --- a/src/mesa/main/formats.c +++ b/src/mesa/main/formats.c @@ -354,14 +354,22 @@ _mesa_array_format_flip_channels(mesa_array_format format) return format; if (num_channels == 2) { - _mesa_array_format_set_swizzle(&format, swizzle[1], swizzle[0], - swizzle[2], swizzle[3]); + /* Assert that the swizzle makes sense for 2 channels */ + for (unsigned i = 0; i < 4; i++) + assert(swizzle[i] != 2 && swizzle[i] != 3); + + static const uint8_t flip_xy[6] = { 1, 0, 2, 3, 4, 5 }; + _mesa_array_format_set_swizzle(&format, + flip_xy[swizzle[0]], flip_xy[swizzle[1]], + flip_xy[swizzle[2]], flip_xy[swizzle[3]]); return format; } if (num_channels == 4) { - _mesa_array_format_set_swizzle(&format, swizzle[3], swizzle[2], - swizzle[1], swizzle[0]); + static const uint8_t flip[6] = { 3, 2, 1, 0, 4, 5 }; + _mesa_array_format_set_swizzle(&format, + flip[swizzle[0]], flip[swizzle[1]], + flip[swizzle[2]], flip[swizzle[3]]); return format; } @@ -372,10 +380,11 @@ uint32_t _mesa_format_to_array_format(mesa_format format) { const struct gl_format_info *info = _mesa_get_format_info(format); - if (_mesa_little_endian()) - return info->ArrayFormat; - else + if (info->ArrayFormat && !_mesa_little_endian() && + info->Layout == MESA_FORMAT_LAYOUT_PACKED) return _mesa_array_format_flip_channels(info->ArrayFormat); + else + return info->ArrayFormat; } static struct hash_table *format_array_format_table; diff --git a/src/mesa/main/formats.h b/src/mesa/main/formats.h index 7e451caf0ff..d938e6ad513 100644 --- a/src/mesa/main/formats.h +++ b/src/mesa/main/formats.h @@ -191,6 +191,11 @@ static inline void _mesa_array_format_set_swizzle(mesa_array_format *f, int32_t x, int32_t y, int32_t z, int32_t w) { + *f &= ~(MESA_ARRAY_FORMAT_SWIZZLE_X_MASK | + MESA_ARRAY_FORMAT_SWIZZLE_Y_MASK | + MESA_ARRAY_FORMAT_SWIZZLE_Z_MASK | + MESA_ARRAY_FORMAT_SWIZZLE_W_MASK); + *f |= ((x << 8 ) & MESA_ARRAY_FORMAT_SWIZZLE_X_MASK) | ((y << 11) & MESA_ARRAY_FORMAT_SWIZZLE_Y_MASK) | ((z << 14) & MESA_ARRAY_FORMAT_SWIZZLE_Z_MASK) | diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index 77c04b8dab8..37e2c29c89c 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -938,7 +938,7 @@ _mesa_print_framebuffer(const struct gl_framebuffer *fb) fprintf(stderr, "Mesa Framebuffer %u at %p\n", fb->Name, (void *) fb); fprintf(stderr, " Size: %u x %u Status: %s\n", fb->Width, fb->Height, - _mesa_lookup_enum_by_nr(fb->_Status)); + _mesa_enum_to_string(fb->_Status)); fprintf(stderr, " Attachments:\n"); for (i = 0; i < BUFFER_COUNT; i++) { diff --git a/src/mesa/main/genmipmap.c b/src/mesa/main/genmipmap.c index 9aef090194e..c18f9d5223f 100644 --- a/src/mesa/main/genmipmap.c +++ b/src/mesa/main/genmipmap.c @@ -83,7 +83,7 @@ _mesa_generate_texture_mipmap(struct gl_context *ctx, if (error) { _mesa_error(ctx, GL_INVALID_ENUM, "glGenerate%sMipmap(target=%s)", - suffix, _mesa_lookup_enum_by_nr(target)); + suffix, _mesa_enum_to_string(target)); return; } diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index 3d6d63916b3..307a5ffbd1c 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -149,6 +149,8 @@ enum value_extra { EXTRA_EXT_UBO_GS4, EXTRA_EXT_ATOMICS_GS4, EXTRA_EXT_SHADER_IMAGE_GS4, + EXTRA_EXT_ATOMICS_TESS, + EXTRA_EXT_SHADER_IMAGE_TESS, }; #define NO_EXTRA NULL @@ -349,12 +351,58 @@ static const int extra_ARB_shader_image_load_store_and_geometry_shader[] = { EXTRA_END }; +static const int extra_ARB_shader_atomic_counters_and_tessellation[] = { + EXTRA_EXT_ATOMICS_TESS, + EXTRA_END +}; + +static const int extra_ARB_shader_image_load_store_and_tessellation[] = { + EXTRA_EXT_SHADER_IMAGE_TESS, + EXTRA_END +}; + static const int extra_ARB_draw_indirect_es31[] = { EXT(ARB_draw_indirect), EXTRA_API_ES31, EXTRA_END }; +static const int extra_ARB_shader_image_load_store_es31[] = { + EXT(ARB_shader_image_load_store), + EXTRA_API_ES31, + EXTRA_END +}; + +static const int extra_ARB_shader_atomic_counters_es31[] = { + EXT(ARB_shader_atomic_counters), + EXTRA_API_ES31, + EXTRA_END +}; + +static const int extra_ARB_texture_multisample_es31[] = { + EXT(ARB_texture_multisample), + EXTRA_API_ES31, + EXTRA_END +}; + +static const int extra_ARB_texture_gather_es31[] = { + EXT(ARB_texture_gather), + EXTRA_API_ES31, + EXTRA_END +}; + +static const int extra_ARB_compute_shader_es31[] = { + EXT(ARB_compute_shader), + EXTRA_API_ES31, + EXTRA_END +}; + +static const int extra_ARB_explicit_uniform_location_es31[] = { + EXT(ARB_explicit_uniform_location), + EXTRA_API_ES31, + EXTRA_END +}; + EXTRA_EXT(ARB_texture_cube_map); EXTRA_EXT(EXT_texture_array); EXTRA_EXT(NV_fog_distance); @@ -401,6 +449,8 @@ EXTRA_EXT(ARB_explicit_uniform_location); EXTRA_EXT(ARB_clip_control); EXTRA_EXT(EXT_polygon_offset_clamp); EXTRA_EXT(ARB_framebuffer_no_attachments); +EXTRA_EXT(ARB_tessellation_shader); +EXTRA_EXT(ARB_shader_subroutine); static const int extra_ARB_color_buffer_float_or_glcore[] = { @@ -626,7 +676,7 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu break; case GL_EDGE_FLAG: - v->value_bool = ctx->Current.Attrib[VERT_ATTRIB_EDGEFLAG][0] == 1.0; + v->value_bool = ctx->Current.Attrib[VERT_ATTRIB_EDGEFLAG][0] == 1.0F; break; case GL_READ_BUFFER: @@ -1149,6 +1199,16 @@ check_extra(struct gl_context *ctx, const char *func, const struct value_desc *d api_found = (ctx->Extensions.ARB_shader_image_load_store && _mesa_has_geometry_shaders(ctx)); break; + case EXTRA_EXT_ATOMICS_TESS: + api_check = GL_TRUE; + api_found = ctx->Extensions.ARB_shader_atomic_counters && + _mesa_has_tessellation(ctx); + break; + case EXTRA_EXT_SHADER_IMAGE_TESS: + api_check = GL_TRUE; + api_found = ctx->Extensions.ARB_shader_image_load_store && + _mesa_has_tessellation(ctx); + break; case EXTRA_END: break; default: /* *e is a offset into the extension struct */ @@ -1161,7 +1221,7 @@ check_extra(struct gl_context *ctx, const char *func, const struct value_desc *d if (api_check && !api_found) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=%s)", func, - _mesa_lookup_enum_by_nr(d->pname)); + _mesa_enum_to_string(d->pname)); return GL_FALSE; } @@ -1208,10 +1268,13 @@ find_value(const char *func, GLenum pname, void **p, union value *v) * value since it's compatible with GLES2 its entry in table_set[] is at the * end. */ - STATIC_ASSERT(ARRAY_SIZE(table_set) == API_OPENGL_LAST + 2); + STATIC_ASSERT(ARRAY_SIZE(table_set) == API_OPENGL_LAST + 3); if (_mesa_is_gles3(ctx)) { api = API_OPENGL_LAST + 1; } + if (_mesa_is_gles31(ctx)) { + api = API_OPENGL_LAST + 2; + } mask = ARRAY_SIZE(table(api)) - 1; hash = (pname * prime_factor); while (1) { @@ -1222,7 +1285,7 @@ find_value(const char *func, GLenum pname, void **p, union value *v) * any valid enum. */ if (unlikely(idx == 0)) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=%s)", func, - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); return &error_value; } @@ -2004,11 +2067,11 @@ find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v) invalid_enum: _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=%s)", func, - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); return TYPE_INVALID; invalid_value: _mesa_error(ctx, GL_INVALID_VALUE, "%s(pname=%s)", func, - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); return TYPE_INVALID; } diff --git a/src/mesa/main/get_hash_generator.py b/src/mesa/main/get_hash_generator.py index b200d197341..c777b782442 100644 --- a/src/mesa/main/get_hash_generator.py +++ b/src/mesa/main/get_hash_generator.py @@ -44,7 +44,7 @@ prime_factor = 89 prime_step = 281 hash_table_size = 1024 -gl_apis=set(["GL", "GL_CORE", "GLES", "GLES2", "GLES3"]) +gl_apis=set(["GL", "GL_CORE", "GLES", "GLES2", "GLES3", "GLES31"]) def print_header(): print "typedef const unsigned short table_t[%d];\n" % (hash_table_size) @@ -68,6 +68,7 @@ api_enum = [ 'GLES2', 'GL_CORE', 'GLES3', # Not in gl_api enum in mtypes.h + 'GLES31', # Not in gl_api enum in mtypes.h ] def api_index(api): @@ -167,10 +168,13 @@ def generate_hash_tables(enum_list, enabled_apis, param_descriptors): for api in valid_apis: add_to_hash_table(tables[api], hash_val, len(params)) - # Also add GLES2 items to the GLES3 hash table + # Also add GLES2 items to the GLES3 and GLES31 hash table if api == "GLES2": add_to_hash_table(tables["GLES3"], hash_val, len(params)) - + add_to_hash_table(tables["GLES31"], hash_val, len(params)) + # Also add GLES3 items to the GLES31 hash table + if api == "GLES3": + add_to_hash_table(tables["GLES31"], hash_val, len(params)) params.append(["GL_" + enum_name, param[1]]) sorted_tables={} @@ -206,7 +210,7 @@ if __name__ == '__main__': die("missing descriptor file (-f)\n") # generate the code for all APIs - enabled_apis = set(["GLES", "GLES2", "GLES3", "GL", "GL_CORE"]) + enabled_apis = set(["GLES", "GLES2", "GLES3", "GLES31", "GL", "GL_CORE"]) try: api_desc = gl_XML.parse_GL_API(api_desc_file) diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py index 74ff3ba6619..7dc92f10100 100644 --- a/src/mesa/main/get_hash_params.py +++ b/src/mesa/main/get_hash_params.py @@ -351,6 +351,9 @@ descriptor=[ # GL_ARB_framebuffer_object [ "MAX_SAMPLES", "CONTEXT_INT(Const.MaxSamples), extra_ARB_framebuffer_object_EXT_framebuffer_multisample" ], +# GL_ARB_sampler_objects / GL 3.3 / GLES 3.0 + [ "SAMPLER_BINDING", "LOC_CUSTOM, TYPE_INT, GL_SAMPLER_BINDING, NO_EXTRA" ], + # GL_ARB_sync [ "MAX_SERVER_WAIT_TIMEOUT", "CONTEXT_INT64(Const.MaxServerWaitTimeout), extra_ARB_sync" ], @@ -404,9 +407,49 @@ descriptor=[ [ "TEXTURE_EXTERNAL_OES", "LOC_CUSTOM, TYPE_BOOLEAN, 0, extra_OES_EGL_image_external" ], ]}, -{ "apis": ["GL", "GL_CORE", "GLES3"], "params": [ -# GL_ARB_sampler_objects / GL 3.3 / GLES 3.0 - [ "SAMPLER_BINDING", "LOC_CUSTOM, TYPE_INT, GL_SAMPLER_BINDING, NO_EXTRA" ], +# Enums in OpenGL and ES 3.1 +{ "apis": ["GL", "GL_CORE", "GLES31"], "params": [ +# GL_ARB_shader_image_load_store / GLES 3.1 + [ "MAX_IMAGE_UNITS", "CONTEXT_INT(Const.MaxImageUnits), extra_ARB_shader_image_load_store_es31" ], + [ "MAX_VERTEX_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms), extra_ARB_shader_image_load_store_es31" ], + [ "MAX_FRAGMENT_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms), extra_ARB_shader_image_load_store_es31" ], + [ "MAX_COMBINED_IMAGE_UNIFORMS", "CONTEXT_INT(Const.MaxCombinedImageUniforms), extra_ARB_shader_image_load_store_es31" ], + +# GL_ARB_shader_atomic_counters / GLES 3.1 + [ "ATOMIC_COUNTER_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_shader_atomic_counters_es31" ], + [ "MAX_ATOMIC_COUNTER_BUFFER_BINDINGS", "CONTEXT_INT(Const.MaxAtomicBufferBindings), extra_ARB_shader_atomic_counters_es31" ], + [ "MAX_ATOMIC_COUNTER_BUFFER_SIZE", "CONTEXT_INT(Const.MaxAtomicBufferSize), extra_ARB_shader_atomic_counters_es31" ], + [ "MAX_VERTEX_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_es31" ], + [ "MAX_VERTEX_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters), extra_ARB_shader_atomic_counters_es31" ], + [ "MAX_FRAGMENT_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_es31" ], + [ "MAX_FRAGMENT_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters), extra_ARB_shader_atomic_counters_es31" ], + [ "MAX_COMBINED_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.MaxCombinedAtomicBuffers), extra_ARB_shader_atomic_counters_es31" ], + [ "MAX_COMBINED_ATOMIC_COUNTERS", "CONTEXT_INT(Const.MaxCombinedAtomicCounters), extra_ARB_shader_atomic_counters_es31" ], + +# GL_ARB_texture_multisample / GLES 3.1 + [ "TEXTURE_BINDING_2D_MULTISAMPLE", "LOC_CUSTOM, TYPE_INT, TEXTURE_2D_MULTISAMPLE_INDEX, extra_ARB_texture_multisample_es31" ], + [ "MAX_COLOR_TEXTURE_SAMPLES", "CONTEXT_INT(Const.MaxColorTextureSamples), extra_ARB_texture_multisample_es31" ], + [ "MAX_DEPTH_TEXTURE_SAMPLES", "CONTEXT_INT(Const.MaxDepthTextureSamples), extra_ARB_texture_multisample_es31" ], + [ "MAX_INTEGER_SAMPLES", "CONTEXT_INT(Const.MaxIntegerSamples), extra_ARB_texture_multisample_es31" ], + [ "SAMPLE_MASK", "CONTEXT_BOOL(Multisample.SampleMask), extra_ARB_texture_multisample_es31" ], + [ "MAX_SAMPLE_MASK_WORDS", "CONST(1), extra_ARB_texture_multisample_es31" ], + +# GL_ARB_texture_gather / GLES 3.1 + [ "MIN_PROGRAM_TEXTURE_GATHER_OFFSET", "CONTEXT_INT(Const.MinProgramTextureGatherOffset), extra_ARB_texture_gather_es31"], + [ "MAX_PROGRAM_TEXTURE_GATHER_OFFSET", "CONTEXT_INT(Const.MaxProgramTextureGatherOffset), extra_ARB_texture_gather_es31"], + +# GL_ARB_compute_shader / GLES 3.1 + [ "MAX_COMPUTE_WORK_GROUP_INVOCATIONS", "CONTEXT_INT(Const.MaxComputeWorkGroupInvocations), extra_ARB_compute_shader_es31" ], + [ "MAX_COMPUTE_UNIFORM_BLOCKS", "CONST(MAX_COMPUTE_UNIFORM_BLOCKS), extra_ARB_compute_shader_es31" ], + [ "MAX_COMPUTE_TEXTURE_IMAGE_UNITS", "CONST(MAX_COMPUTE_TEXTURE_IMAGE_UNITS), extra_ARB_compute_shader_es31" ], + [ "MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS", "CONST(MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS), extra_ARB_compute_shader_es31" ], + [ "MAX_COMPUTE_ATOMIC_COUNTERS", "CONST(MAX_COMPUTE_ATOMIC_COUNTERS), extra_ARB_compute_shader_es31" ], + [ "MAX_COMPUTE_SHARED_MEMORY_SIZE", "CONST(MAX_COMPUTE_SHARED_MEMORY_SIZE), extra_ARB_compute_shader_es31" ], + [ "MAX_COMPUTE_UNIFORM_COMPONENTS", "CONST(MAX_COMPUTE_UNIFORM_COMPONENTS), extra_ARB_compute_shader_es31" ], + [ "MAX_COMPUTE_IMAGE_UNIFORMS", "CONST(MAX_COMPUTE_IMAGE_UNIFORMS), extra_ARB_compute_shader_es31" ], + +# GL_ARB_explicit_uniform_location / GLES 3.1 + [ "MAX_UNIFORM_LOCATIONS", "CONTEXT_INT(Const.MaxUserAssignableUniformLocations), extra_ARB_explicit_uniform_location_es31" ], ]}, # Enums in OpenGL Core profile and ES 3.1 @@ -498,7 +541,6 @@ descriptor=[ [ "MAX_LIST_NESTING", "CONST(MAX_LIST_NESTING), NO_EXTRA" ], [ "MAX_NAME_STACK_DEPTH", "CONST(MAX_NAME_STACK_DEPTH), NO_EXTRA" ], [ "MAX_PIXEL_MAP_TABLE", "CONST(MAX_PIXEL_MAP_TABLE), NO_EXTRA" ], - [ "MAX_UNIFORM_LOCATIONS", "CONTEXT_INT(Const.MaxUserAssignableUniformLocations), extra_ARB_explicit_uniform_location" ], [ "NAME_STACK_DEPTH", "CONTEXT_INT(Select.NameStackDepth), NO_EXTRA" ], [ "PACK_LSB_FIRST", "CONTEXT_BOOL(Pack.LsbFirst), NO_EXTRA" ], [ "PACK_SWAP_BYTES", "CONTEXT_BOOL(Pack.SwapBytes), NO_EXTRA" ], @@ -699,13 +741,7 @@ descriptor=[ [ "TEXTURE_BUFFER_ARB", "LOC_CUSTOM, TYPE_INT, 0, extra_texture_buffer_object" ], # GL_ARB_texture_multisample / GL 3.2 - [ "TEXTURE_BINDING_2D_MULTISAMPLE", "LOC_CUSTOM, TYPE_INT, TEXTURE_2D_MULTISAMPLE_INDEX, extra_ARB_texture_multisample" ], [ "TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY", "LOC_CUSTOM, TYPE_INT, TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX, extra_ARB_texture_multisample" ], - [ "MAX_COLOR_TEXTURE_SAMPLES", "CONTEXT_INT(Const.MaxColorTextureSamples), extra_ARB_texture_multisample" ], - [ "MAX_DEPTH_TEXTURE_SAMPLES", "CONTEXT_INT(Const.MaxDepthTextureSamples), extra_ARB_texture_multisample" ], - [ "MAX_INTEGER_SAMPLES", "CONTEXT_INT(Const.MaxIntegerSamples), extra_ARB_texture_multisample" ], - [ "SAMPLE_MASK", "CONTEXT_BOOL(Multisample.SampleMask), extra_ARB_texture_multisample" ], - [ "MAX_SAMPLE_MASK_WORDS", "CONST(1), extra_ARB_texture_multisample" ], # GL 3.0 [ "CONTEXT_FLAGS", "CONTEXT_INT(Const.ContextFlags), extra_version_30" ], @@ -756,48 +792,23 @@ descriptor=[ [ "TEXTURE_BINDING_CUBE_MAP_ARRAY_ARB", "LOC_CUSTOM, TYPE_INT, TEXTURE_CUBE_ARRAY_INDEX, extra_ARB_texture_cube_map_array" ], # GL_ARB_texture_gather - [ "MIN_PROGRAM_TEXTURE_GATHER_OFFSET", "CONTEXT_INT(Const.MinProgramTextureGatherOffset), extra_ARB_texture_gather"], - [ "MAX_PROGRAM_TEXTURE_GATHER_OFFSET", "CONTEXT_INT(Const.MaxProgramTextureGatherOffset), extra_ARB_texture_gather"], [ "MAX_PROGRAM_TEXTURE_GATHER_COMPONENTS_ARB", "CONTEXT_INT(Const.MaxProgramTextureGatherComponents), extra_ARB_texture_gather"], # GL_ARB_separate_shader_objects [ "PROGRAM_PIPELINE_BINDING", "LOC_CUSTOM, TYPE_INT, GL_PROGRAM_PIPELINE_BINDING, NO_EXTRA" ], # GL_ARB_shader_atomic_counters - [ "ATOMIC_COUNTER_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_shader_atomic_counters" ], - [ "MAX_ATOMIC_COUNTER_BUFFER_BINDINGS", "CONTEXT_INT(Const.MaxAtomicBufferBindings), extra_ARB_shader_atomic_counters" ], - [ "MAX_ATOMIC_COUNTER_BUFFER_SIZE", "CONTEXT_INT(Const.MaxAtomicBufferSize), extra_ARB_shader_atomic_counters" ], - [ "MAX_VERTEX_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers), extra_ARB_shader_atomic_counters" ], - [ "MAX_VERTEX_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters), extra_ARB_shader_atomic_counters" ], - [ "MAX_FRAGMENT_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers), extra_ARB_shader_atomic_counters" ], - [ "MAX_FRAGMENT_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters), extra_ARB_shader_atomic_counters" ], [ "MAX_GEOMETRY_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_and_geometry_shader" ], [ "MAX_GEOMETRY_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters), extra_ARB_shader_atomic_counters_and_geometry_shader" ], - [ "MAX_COMBINED_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.MaxCombinedAtomicBuffers), extra_ARB_shader_atomic_counters" ], - [ "MAX_COMBINED_ATOMIC_COUNTERS", "CONTEXT_INT(Const.MaxCombinedAtomicCounters), extra_ARB_shader_atomic_counters" ], # GL_ARB_vertex_attrib_binding [ "MAX_VERTEX_ATTRIB_RELATIVE_OFFSET", "CONTEXT_ENUM(Const.MaxVertexAttribRelativeOffset), NO_EXTRA" ], [ "MAX_VERTEX_ATTRIB_BINDINGS", "CONTEXT_ENUM(Const.MaxVertexAttribBindings), NO_EXTRA" ], # GL_ARB_shader_image_load_store - [ "MAX_IMAGE_UNITS", "CONTEXT_INT(Const.MaxImageUnits), extra_ARB_shader_image_load_store"], - [ "MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS", "CONTEXT_INT(Const.MaxCombinedImageUnitsAndFragmentOutputs), extra_ARB_shader_image_load_store"], - [ "MAX_IMAGE_SAMPLES", "CONTEXT_INT(Const.MaxImageSamples), extra_ARB_shader_image_load_store"], - [ "MAX_VERTEX_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms), extra_ARB_shader_image_load_store"], + [ "MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS", "CONTEXT_INT(Const.MaxCombinedImageUnitsAndFragmentOutputs), extra_ARB_shader_image_load_store" ], + [ "MAX_IMAGE_SAMPLES", "CONTEXT_INT(Const.MaxImageSamples), extra_ARB_shader_image_load_store" ], [ "MAX_GEOMETRY_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxImageUniforms), extra_ARB_shader_image_load_store_and_geometry_shader"], - [ "MAX_FRAGMENT_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms), extra_ARB_shader_image_load_store"], - [ "MAX_COMBINED_IMAGE_UNIFORMS", "CONTEXT_INT(Const.MaxCombinedImageUniforms), extra_ARB_shader_image_load_store"], - -# GL_ARB_compute_shader - [ "MAX_COMPUTE_WORK_GROUP_INVOCATIONS", "CONTEXT_INT(Const.MaxComputeWorkGroupInvocations), extra_ARB_compute_shader" ], - [ "MAX_COMPUTE_UNIFORM_BLOCKS", "CONST(MAX_COMPUTE_UNIFORM_BLOCKS), extra_ARB_compute_shader" ], - [ "MAX_COMPUTE_TEXTURE_IMAGE_UNITS", "CONST(MAX_COMPUTE_TEXTURE_IMAGE_UNITS), extra_ARB_compute_shader" ], - [ "MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS", "CONST(MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS), extra_ARB_compute_shader" ], - [ "MAX_COMPUTE_ATOMIC_COUNTERS", "CONST(MAX_COMPUTE_ATOMIC_COUNTERS), extra_ARB_compute_shader" ], - [ "MAX_COMPUTE_SHARED_MEMORY_SIZE", "CONST(MAX_COMPUTE_SHARED_MEMORY_SIZE), extra_ARB_compute_shader" ], - [ "MAX_COMPUTE_UNIFORM_COMPONENTS", "CONST(MAX_COMPUTE_UNIFORM_COMPONENTS), extra_ARB_compute_shader" ], - [ "MAX_COMPUTE_IMAGE_UNIFORMS", "CONST(MAX_COMPUTE_IMAGE_UNIFORMS), extra_ARB_compute_shader" ], # GL_ARB_framebuffer_no_attachments ["MAX_FRAMEBUFFER_WIDTH", "CONTEXT_INT(Const.MaxFramebufferWidth), extra_ARB_framebuffer_no_attachments"], @@ -826,6 +837,38 @@ descriptor=[ [ "MIN_FRAGMENT_INTERPOLATION_OFFSET", "CONTEXT_FLOAT(Const.MinFragmentInterpolationOffset), extra_ARB_gpu_shader5" ], [ "MAX_FRAGMENT_INTERPOLATION_OFFSET", "CONTEXT_FLOAT(Const.MaxFragmentInterpolationOffset), extra_ARB_gpu_shader5" ], [ "FRAGMENT_INTERPOLATION_OFFSET_BITS", "CONST(FRAGMENT_INTERPOLATION_OFFSET_BITS), extra_ARB_gpu_shader5" ], + +# GL_ARB_tessellation_shader + [ "PATCH_VERTICES", "CONTEXT_INT(TessCtrlProgram.patch_vertices), extra_ARB_tessellation_shader" ], + [ "PATCH_DEFAULT_OUTER_LEVEL", "CONTEXT_FLOAT4(TessCtrlProgram.patch_default_outer_level), extra_ARB_tessellation_shader" ], + [ "PATCH_DEFAULT_INNER_LEVEL", "CONTEXT_FLOAT2(TessCtrlProgram.patch_default_inner_level), extra_ARB_tessellation_shader" ], + [ "MAX_TESS_GEN_LEVEL", "CONTEXT_INT(Const.MaxTessGenLevel), extra_ARB_tessellation_shader" ], + [ "MAX_PATCH_VERTICES", "CONTEXT_INT(Const.MaxPatchVertices), extra_ARB_tessellation_shader" ], + [ "MAX_TESS_CONTROL_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxUniformComponents), extra_ARB_tessellation_shader" ], + [ "MAX_TESS_EVALUATION_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxUniformComponents), extra_ARB_tessellation_shader" ], + [ "MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxTextureImageUnits), extra_ARB_tessellation_shader" ], + [ "MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxTextureImageUnits), extra_ARB_tessellation_shader" ], + [ "MAX_TESS_CONTROL_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents), extra_ARB_tessellation_shader" ], + [ "MAX_TESS_PATCH_COMPONENTS", "CONTEXT_INT(Const.MaxTessPatchComponents), extra_ARB_tessellation_shader" ], + [ "MAX_TESS_CONTROL_TOTAL_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.MaxTessControlTotalOutputComponents), extra_ARB_tessellation_shader" ], + [ "MAX_TESS_EVALUATION_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents), extra_ARB_tessellation_shader" ], + [ "MAX_TESS_CONTROL_INPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents), extra_ARB_tessellation_shader" ], + [ "MAX_TESS_EVALUATION_INPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents), extra_ARB_tessellation_shader" ], + [ "MAX_TESS_CONTROL_UNIFORM_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxUniformBlocks), extra_ARB_tessellation_shader" ], + [ "MAX_TESS_EVALUATION_UNIFORM_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxUniformBlocks), extra_ARB_tessellation_shader" ], + [ "MAX_COMBINED_TESS_CONTROL_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxCombinedUniformComponents), extra_ARB_tessellation_shader" ], + [ "MAX_COMBINED_TESS_EVALUATION_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxCombinedUniformComponents), extra_ARB_tessellation_shader" ], +# Dependencies on GL_ARB_tessellation_shader + [ "MAX_TESS_CONTROL_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_and_tessellation" ], + [ "MAX_TESS_CONTROL_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxAtomicCounters), extra_ARB_shader_atomic_counters_and_tessellation" ], + [ "MAX_TESS_EVALUATION_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_and_tessellation" ], + [ "MAX_TESS_EVALUATION_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxAtomicCounters), extra_ARB_shader_atomic_counters_and_tessellation" ], + [ "MAX_TESS_CONTROL_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxImageUniforms), extra_ARB_shader_image_load_store_and_tessellation"], + [ "MAX_TESS_EVALUATION_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxImageUniforms), extra_ARB_shader_image_load_store_and_tessellation"], + +# GL_ARB_shader_subroutine + [ "MAX_SUBROUTINES", "CONST(MAX_SUBROUTINES), extra_ARB_shader_subroutine" ], + [ "MAX_SUBROUTINE_UNIFORM_LOCATIONS", "CONST(MAX_SUBROUTINE_UNIFORM_LOCATIONS), extra_ARB_shader_subroutine" ], ]} ] diff --git a/src/mesa/main/getstring.c b/src/mesa/main/getstring.c index 72d99ca4e22..9873fdbf1a4 100644 --- a/src/mesa/main/getstring.c +++ b/src/mesa/main/getstring.c @@ -208,7 +208,7 @@ _mesa_GetPointerv( GLenum pname, GLvoid **params ) return; if (MESA_VERBOSE & VERBOSE_API) - _mesa_debug(ctx, "glGetPointerv %s\n", _mesa_lookup_enum_by_nr(pname)); + _mesa_debug(ctx, "glGetPointerv %s\n", _mesa_enum_to_string(pname)); switch (pname) { case GL_VERTEX_ARRAY_POINTER: @@ -299,7 +299,7 @@ _mesa_GetError( void ) ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, 0); if (MESA_VERBOSE & VERBOSE_API) - _mesa_debug(ctx, "glGetError <-- %s\n", _mesa_lookup_enum_by_nr(e)); + _mesa_debug(ctx, "glGetError <-- %s\n", _mesa_enum_to_string(e)); ctx->ErrorValue = (GLenum) GL_NO_ERROR; ctx->ErrorDebugCount = 0; diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c index ac69fabccaa..3eb66dab7f8 100644 --- a/src/mesa/main/glformats.c +++ b/src/mesa/main/glformats.c @@ -186,7 +186,7 @@ get_map_idx(GLenum value) return IDX_RG; default: _mesa_problem(NULL, "Unexpected inFormat %s", - _mesa_lookup_enum_by_nr(value)); + _mesa_enum_to_string(value)); return 0; } } @@ -216,8 +216,8 @@ _mesa_compute_component_mapping(GLenum inFormat, GLenum outFormat, GLubyte *map) #if 0 printf("from %x/%s to %x/%s map %d %d %d %d %d %d\n", - inFormat, _mesa_lookup_enum_by_nr(inFormat), - outFormat, _mesa_lookup_enum_by_nr(outFormat), + inFormat, _mesa_enum_to_string(inFormat), + outFormat, _mesa_enum_to_string(outFormat), map[0], map[1], map[2], @@ -1278,9 +1278,53 @@ _mesa_is_compressed_format(const struct gl_context *ctx, GLenum format) } } +/** + * Convert various unpack formats to the corresponding base format. + */ +GLenum +_mesa_unpack_format_to_base_format(GLenum format) +{ + switch(format) { + case GL_RED_INTEGER: + return GL_RED; + case GL_GREEN_INTEGER: + return GL_GREEN; + case GL_BLUE_INTEGER: + return GL_BLUE; + case GL_ALPHA_INTEGER: + return GL_ALPHA; + case GL_RG_INTEGER: + return GL_RG; + case GL_RGB_INTEGER: + return GL_RGB; + case GL_RGBA_INTEGER: + return GL_RGBA; + case GL_BGR_INTEGER: + return GL_BGR; + case GL_BGRA_INTEGER: + return GL_BGRA; + case GL_LUMINANCE_INTEGER_EXT: + return GL_LUMINANCE; + case GL_LUMINANCE_ALPHA_INTEGER_EXT: + return GL_LUMINANCE_ALPHA; + case GL_RED: + case GL_GREEN: + case GL_BLUE: + case GL_RG: + case GL_RGB: + case GL_RGBA: + case GL_BGR: + case GL_BGRA: + case GL_ALPHA: + case GL_LUMINANCE: + case GL_LUMINANCE_ALPHA: + default: + return format; + } +} /** - * Convert various base formats to the cooresponding integer format. + * Convert various base formats to the corresponding integer format. */ GLenum _mesa_base_format_to_integer_format(GLenum format) @@ -2605,8 +2649,6 @@ get_swizzle_from_gl_format(GLenum format, uint8_t *swizzle) uint32_t _mesa_format_from_format_and_type(GLenum format, GLenum type) { - mesa_array_format array_format; - bool is_array_format = true; uint8_t swizzle[4]; bool normalized = false, is_float = false, is_signed = false; @@ -2662,15 +2704,9 @@ _mesa_format_from_format_and_type(GLenum format, GLenum type) normalized = !_mesa_is_enum_format_integer(format); num_channels = _mesa_components_in_format(format); - array_format = - MESA_ARRAY_FORMAT(type_size, is_signed, is_float, - normalized, num_channels, - swizzle[0], swizzle[1], swizzle[2], swizzle[3]); - - if (!_mesa_little_endian()) - array_format = _mesa_array_format_flip_channels(array_format); - - return array_format; + return MESA_ARRAY_FORMAT(type_size, is_signed, is_float, + normalized, num_channels, + swizzle[0], swizzle[1], swizzle[2], swizzle[3]); } /* Otherwise this is not an array format, so return the mesa_format diff --git a/src/mesa/main/glformats.h b/src/mesa/main/glformats.h index 8881cb7d86b..419955a6033 100644 --- a/src/mesa/main/glformats.h +++ b/src/mesa/main/glformats.h @@ -101,6 +101,9 @@ _mesa_is_compressed_format(const struct gl_context *ctx, GLenum format); extern GLenum _mesa_base_format_to_integer_format(GLenum format); +extern GLenum +_mesa_unpack_format_to_base_format(GLenum format); + extern GLboolean _mesa_base_format_has_channel(GLenum base_format, GLenum pname); diff --git a/src/mesa/main/hint.c b/src/mesa/main/hint.c index 3e056ebaf13..984239a7276 100644 --- a/src/mesa/main/hint.c +++ b/src/mesa/main/hint.c @@ -40,8 +40,8 @@ _mesa_Hint( GLenum target, GLenum mode ) if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glHint %s %s\n", - _mesa_lookup_enum_by_nr(target), - _mesa_lookup_enum_by_nr(mode)); + _mesa_enum_to_string(target), + _mesa_enum_to_string(mode)); if (mode != GL_NICEST && mode != GL_FASTEST && mode != GL_DONT_CARE) { _mesa_error(ctx, GL_INVALID_ENUM, "glHint(mode)"); diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c index 68c7316575c..350e6752c8b 100644 --- a/src/mesa/main/imports.c +++ b/src/mesa/main/imports.c @@ -369,7 +369,7 @@ _mesa_float_to_half(float val) * or normal. */ e = 0; - m = (int) _mesa_roundevenf((1 << 24) * fabsf(fi.f)); + m = _mesa_lroundevenf((1 << 24) * fabsf(fi.f)); } else if (new_exp > 15) { /* map this value to infinity */ @@ -383,7 +383,7 @@ _mesa_float_to_half(float val) * either normal or infinite. */ e = new_exp + 15; - m = (int) _mesa_roundevenf(flt_m / (float) (1 << 13)); + m = _mesa_lroundevenf(flt_m / (float) (1 << 13)); } } diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h index 9ffe3decd0f..d61279ac4e5 100644 --- a/src/mesa/main/imports.h +++ b/src/mesa/main/imports.h @@ -170,34 +170,6 @@ static inline int IROUND_POS(float f) return (int) (f + 0.5F); } -#ifdef __x86_64__ -# include <xmmintrin.h> -#endif - -/** - * Convert float to int using a fast method. The rounding mode may vary. - */ -static inline int F_TO_I(float f) -{ -#if defined(USE_X86_ASM) && defined(__GNUC__) && defined(__i386__) - int r; - __asm__ ("fistpl %0" : "=m" (r) : "t" (f) : "st"); - return r; -#elif defined(USE_X86_ASM) && defined(_MSC_VER) - int r; - _asm { - fld f - fistp r - } - return r; -#elif defined(__x86_64__) - return _mm_cvt_ss2si(_mm_load_ss(&f)); -#else - return IROUND(f); -#endif -} - - /** Return (as an integer) floor of float */ static inline int IFLOOR(float f) { diff --git a/src/mesa/main/light.c b/src/mesa/main/light.c index 4021dbef922..14b4b04162b 100644 --- a/src/mesa/main/light.c +++ b/src/mesa/main/light.c @@ -42,16 +42,16 @@ _mesa_ShadeModel( GLenum mode ) GET_CURRENT_CONTEXT(ctx); if (MESA_VERBOSE & VERBOSE_API) - _mesa_debug(ctx, "glShadeModel %s\n", _mesa_lookup_enum_by_nr(mode)); + _mesa_debug(ctx, "glShadeModel %s\n", _mesa_enum_to_string(mode)); + + if (ctx->Light.ShadeModel == mode) + return; if (mode != GL_FLAT && mode != GL_SMOOTH) { _mesa_error(ctx, GL_INVALID_ENUM, "glShadeModel"); return; } - if (ctx->Light.ShadeModel == mode) - return; - FLUSH_VERTICES(ctx, _NEW_LIGHT); ctx->Light.ShadeModel = mode; @@ -143,7 +143,7 @@ _mesa_light(struct gl_context *ctx, GLuint lnum, GLenum pname, const GLfloat *pa COPY_3V(light->SpotDirection, params); break; case GL_SPOT_EXPONENT: - assert(params[0] >= 0.0); + assert(params[0] >= 0.0F); assert(params[0] <= ctx->Const.MaxSpotExponent); if (light->SpotExponent == params[0]) return; @@ -151,12 +151,12 @@ _mesa_light(struct gl_context *ctx, GLuint lnum, GLenum pname, const GLfloat *pa light->SpotExponent = params[0]; break; case GL_SPOT_CUTOFF: - assert(params[0] == 180.0 || (params[0] >= 0.0 && params[0] <= 90.0)); + assert(params[0] == 180.0F || (params[0] >= 0.0F && params[0] <= 90.0F)); if (light->SpotCutoff == params[0]) return; FLUSH_VERTICES(ctx, _NEW_LIGHT); light->SpotCutoff = params[0]; - light->_CosCutoff = (GLfloat) (cos(light->SpotCutoff * M_PI / 180.0)); + light->_CosCutoff = (cosf(light->SpotCutoff * M_PI / 180.0)); if (light->_CosCutoff < 0) light->_CosCutoff = 0; if (light->SpotCutoff != 180.0F) @@ -165,21 +165,21 @@ _mesa_light(struct gl_context *ctx, GLuint lnum, GLenum pname, const GLfloat *pa light->_Flags &= ~LIGHT_SPOT; break; case GL_CONSTANT_ATTENUATION: - assert(params[0] >= 0.0); + assert(params[0] >= 0.0F); if (light->ConstantAttenuation == params[0]) return; FLUSH_VERTICES(ctx, _NEW_LIGHT); light->ConstantAttenuation = params[0]; break; case GL_LINEAR_ATTENUATION: - assert(params[0] >= 0.0); + assert(params[0] >= 0.0F); if (light->LinearAttenuation == params[0]) return; FLUSH_VERTICES(ctx, _NEW_LIGHT); light->LinearAttenuation = params[0]; break; case GL_QUADRATIC_ATTENUATION: - assert(params[0] >= 0.0); + assert(params[0] >= 0.0F); if (light->QuadraticAttenuation == params[0]) return; FLUSH_VERTICES(ctx, _NEW_LIGHT); @@ -238,31 +238,31 @@ _mesa_Lightfv( GLenum light, GLenum pname, const GLfloat *params ) params = temp; break; case GL_SPOT_EXPONENT: - if (params[0] < 0.0 || params[0] > ctx->Const.MaxSpotExponent) { + if (params[0] < 0.0F || params[0] > ctx->Const.MaxSpotExponent) { _mesa_error(ctx, GL_INVALID_VALUE, "glLight"); return; } break; case GL_SPOT_CUTOFF: - if ((params[0] < 0.0 || params[0] > 90.0) && params[0] != 180.0) { + if ((params[0] < 0.0F || params[0] > 90.0F) && params[0] != 180.0F) { _mesa_error(ctx, GL_INVALID_VALUE, "glLight"); return; } break; case GL_CONSTANT_ATTENUATION: - if (params[0] < 0.0) { + if (params[0] < 0.0F) { _mesa_error(ctx, GL_INVALID_VALUE, "glLight"); return; } break; case GL_LINEAR_ATTENUATION: - if (params[0] < 0.0) { + if (params[0] < 0.0F) { _mesa_error(ctx, GL_INVALID_VALUE, "glLight"); return; } break; case GL_QUADRATIC_ATTENUATION: - if (params[0] < 0.0) { + if (params[0] < 0.0F) { _mesa_error(ctx, GL_INVALID_VALUE, "glLight"); return; } @@ -463,14 +463,14 @@ _mesa_LightModelfv( GLenum pname, const GLfloat *params ) case GL_LIGHT_MODEL_LOCAL_VIEWER: if (ctx->API != API_OPENGL_COMPAT) goto invalid_pname; - newbool = (params[0]!=0.0); + newbool = (params[0] != 0.0F); if (ctx->Light.Model.LocalViewer == newbool) return; FLUSH_VERTICES(ctx, _NEW_LIGHT); ctx->Light.Model.LocalViewer = newbool; break; case GL_LIGHT_MODEL_TWO_SIDE: - newbool = (params[0]!=0.0); + newbool = (params[0] != 0.0F); if (ctx->Light.Model.TwoSide == newbool) return; FLUSH_VERTICES(ctx, _NEW_LIGHT); @@ -723,8 +723,8 @@ _mesa_ColorMaterial( GLenum face, GLenum mode ) if (MESA_VERBOSE&VERBOSE_API) _mesa_debug(ctx, "glColorMaterial %s %s\n", - _mesa_lookup_enum_by_nr(face), - _mesa_lookup_enum_by_nr(mode)); + _mesa_enum_to_string(face), + _mesa_enum_to_string(mode)); bitmask = _mesa_material_bitmask(ctx, face, mode, legal, "glColorMaterial"); if (bitmask == 0) @@ -975,7 +975,7 @@ compute_light_positions( struct gl_context *ctx ) } else { /* positional light w/ homogeneous coordinate, divide by W */ - GLfloat wInv = (GLfloat)1.0 / light->_Position[3]; + GLfloat wInv = 1.0F / light->_Position[3]; light->_Position[0] *= wInv; light->_Position[1] *= wInv; light->_Position[2] *= wInv; @@ -1024,7 +1024,7 @@ update_modelview_scale( struct gl_context *ctx ) if (!_math_matrix_is_length_preserving(ctx->ModelviewMatrixStack.Top)) { const GLfloat *m = ctx->ModelviewMatrixStack.Top->inv; GLfloat f = m[2] * m[2] + m[6] * m[6] + m[10] * m[10]; - if (f < 1e-12) f = 1.0; + if (f < 1e-12f) f = 1.0f; if (ctx->_NeedEyeCoords) ctx->_ModelViewInvScale = 1.0f / sqrtf(f); else diff --git a/src/mesa/main/lines.c b/src/mesa/main/lines.c index 3c08ed2e713..c020fb3eb9e 100644 --- a/src/mesa/main/lines.c +++ b/src/mesa/main/lines.c @@ -45,7 +45,7 @@ _mesa_LineWidth( GLfloat width ) if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glLineWidth %f\n", width); - if (width<=0.0) { + if (width <= 0.0F) { _mesa_error( ctx, GL_INVALID_VALUE, "glLineWidth" ); return; } @@ -63,7 +63,7 @@ _mesa_LineWidth( GLfloat width ) if (ctx->API == API_OPENGL_CORE && ((ctx->Const.ContextFlags & GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT) != 0) - && width > 1.0) { + && width > 1.0F) { _mesa_error( ctx, GL_INVALID_VALUE, "glLineWidth" ); return; } diff --git a/src/mesa/main/macros.h b/src/mesa/main/macros.h index 0608650aeb4..54df50c9cfe 100644 --- a/src/mesa/main/macros.h +++ b/src/mesa/main/macros.h @@ -33,6 +33,7 @@ #include "util/macros.h" #include "util/u_math.h" +#include "util/rounding.h" #include "imports.h" @@ -131,12 +132,12 @@ extern GLfloat _mesa_ubyte_to_float_color_tab[256]; #define INT_TO_USHORT(i) ((i) < 0 ? 0 : ((GLushort) ((i) >> 15))) #define UINT_TO_USHORT(i) ((i) < 0 ? 0 : ((GLushort) ((i) >> 16))) #define UNCLAMPED_FLOAT_TO_USHORT(us, f) \ - us = ( (GLushort) F_TO_I( CLAMP((f), 0.0F, 1.0F) * 65535.0F) ) + us = ( (GLushort) _mesa_lroundevenf( CLAMP((f), 0.0F, 1.0F) * 65535.0F) ) #define CLAMPED_FLOAT_TO_USHORT(us, f) \ - us = ( (GLushort) F_TO_I( (f) * 65535.0F) ) + us = ( (GLushort) _mesa_lroundevenf( (f) * 65535.0F) ) #define UNCLAMPED_FLOAT_TO_SHORT(s, f) \ - s = ( (GLshort) F_TO_I( CLAMP((f), -1.0F, 1.0F) * 32767.0F) ) + s = ( (GLshort) _mesa_lroundevenf( CLAMP((f), -1.0F, 1.0F) * 32767.0F) ) /*** *** UNCLAMPED_FLOAT_TO_UBYTE: clamp float to [0,1] and map to ubyte in [0,255] @@ -167,9 +168,9 @@ extern GLfloat _mesa_ubyte_to_float_color_tab[256]; } while (0) #else #define UNCLAMPED_FLOAT_TO_UBYTE(ub, f) \ - ub = ((GLubyte) F_TO_I(CLAMP((f), 0.0F, 1.0F) * 255.0F)) + ub = ((GLubyte) _mesa_lroundevenf(CLAMP((f), 0.0F, 1.0F) * 255.0F)) #define CLAMPED_FLOAT_TO_UBYTE(ub, f) \ - ub = ((GLubyte) F_TO_I((f) * 255.0F)) + ub = ((GLubyte) _mesa_lroundevenf((f) * 255.0F)) #endif static fi_type UINT_AS_UNION(GLuint u) @@ -679,17 +680,6 @@ minify(unsigned value, unsigned levels) } /** - * Return true if the given value is a power of two. - * - * Note that this considers 0 a power of two. - */ -static inline bool -is_power_of_two(unsigned value) -{ - return (value & (value - 1)) == 0; -} - -/** * Align a value up to an alignment value * * If \c value is not already aligned to the requested alignment value, it diff --git a/src/mesa/main/matrix.c b/src/mesa/main/matrix.c index 80c8a248ce4..2b8016a4a72 100644 --- a/src/mesa/main/matrix.c +++ b/src/mesa/main/matrix.c @@ -229,7 +229,7 @@ _mesa_PushMatrix( void ) if (MESA_VERBOSE&VERBOSE_API) _mesa_debug(ctx, "glPushMatrix %s\n", - _mesa_lookup_enum_by_nr(ctx->Transform.MatrixMode)); + _mesa_enum_to_string(ctx->Transform.MatrixMode)); if (stack->Depth + 1 >= stack->MaxDepth) { if (ctx->Transform.MatrixMode == GL_TEXTURE) { @@ -239,7 +239,7 @@ _mesa_PushMatrix( void ) } else { _mesa_error(ctx, GL_STACK_OVERFLOW, "glPushMatrix(mode=%s)", - _mesa_lookup_enum_by_nr(ctx->Transform.MatrixMode)); + _mesa_enum_to_string(ctx->Transform.MatrixMode)); } return; } @@ -270,7 +270,7 @@ _mesa_PopMatrix( void ) if (MESA_VERBOSE&VERBOSE_API) _mesa_debug(ctx, "glPopMatrix %s\n", - _mesa_lookup_enum_by_nr(ctx->Transform.MatrixMode)); + _mesa_enum_to_string(ctx->Transform.MatrixMode)); if (stack->Depth == 0) { if (ctx->Transform.MatrixMode == GL_TEXTURE) { @@ -280,7 +280,7 @@ _mesa_PopMatrix( void ) } else { _mesa_error(ctx, GL_STACK_UNDERFLOW, "glPopMatrix(mode=%s)", - _mesa_lookup_enum_by_nr(ctx->Transform.MatrixMode)); + _mesa_enum_to_string(ctx->Transform.MatrixMode)); } return; } diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c index 7732d09b2ec..1e22f930092 100644 --- a/src/mesa/main/mipmap.c +++ b/src/mesa/main/mipmap.c @@ -2077,9 +2077,12 @@ generate_mipmap_compressed(struct gl_context *ctx, GLenum target, /* Get the uncompressed image */ assert(srcImage->Level == texObj->BaseLevel); - ctx->Driver.GetTexImage(ctx, - temp_base_format, temp_datatype, - temp_src, srcImage); + ctx->Driver.GetTexSubImage(ctx, + 0, 0, 0, + srcImage->Width, srcImage->Height, + srcImage->Depth, + temp_base_format, temp_datatype, + temp_src, srcImage); /* restore packing mode */ ctx->Pack = save; } diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 2d285b87a78..83f3717754d 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -90,7 +90,7 @@ struct vbo_context; /** Extra draw modes beyond GL_POINTS, GL_TRIANGLE_FAN, etc */ -#define PRIM_MAX GL_TRIANGLE_STRIP_ADJACENCY +#define PRIM_MAX GL_PATCHES #define PRIM_OUTSIDE_BEGIN_END (PRIM_MAX + 1) #define PRIM_UNKNOWN (PRIM_MAX + 2) @@ -109,6 +109,8 @@ _mesa_varying_slot_in_fs(gl_varying_slot slot) case VARYING_SLOT_EDGE: case VARYING_SLOT_CLIP_VERTEX: case VARYING_SLOT_LAYER: + case VARYING_SLOT_TESS_LEVEL_OUTER: + case VARYING_SLOT_TESS_LEVEL_INNER: return GL_FALSE; default: return GL_TRUE; @@ -1254,6 +1256,7 @@ typedef enum { USAGE_UNIFORM_BUFFER = 0x1, USAGE_TEXTURE_BUFFER = 0x2, USAGE_ATOMIC_COUNTER_BUFFER = 0x4, + USAGE_SHADER_STORAGE_BUFFER = 0x8, } gl_buffer_usage; @@ -1654,6 +1657,11 @@ struct gl_transform_feedback_info * multiple transform feedback outputs in the same buffer. */ unsigned BufferStride[MAX_FEEDBACK_BUFFERS]; + + /** + * Which transform feedback stream this buffer binding is associated with. + */ + unsigned BufferStream[MAX_FEEDBACK_BUFFERS]; }; @@ -1891,6 +1899,8 @@ struct gl_program GLbitfield64 InputsRead; /**< Bitmask of which input regs are read */ GLbitfield64 DoubleInputsRead; /**< Bitmask of which input regs are read and are doubles */ GLbitfield64 OutputsWritten; /**< Bitmask of which output regs are written */ + GLbitfield PatchInputsRead; /**< VAR[0..31] usage for patch inputs (user-defined only) */ + GLbitfield PatchOutputsWritten; /**< VAR[0..31] usage for patch outputs (user-defined only) */ GLbitfield SystemValuesRead; /**< Bitmask of SYSTEM_VALUE_x inputs used */ GLbitfield TexturesUsed[MAX_COMBINED_TEXTURE_IMAGE_UNITS]; /**< TEXTURE_x_BIT bitmask */ GLbitfield SamplersUsed; /**< Bitfield of which samplers are used */ @@ -1958,6 +1968,29 @@ struct gl_vertex_program }; +/** Tessellation control program object */ +struct gl_tess_ctrl_program +{ + struct gl_program Base; /**< base class */ + + /* output layout */ + GLint VerticesOut; +}; + + +/** Tessellation evaluation program object */ +struct gl_tess_eval_program +{ + struct gl_program Base; /**< base class */ + + /* input layout */ + GLenum PrimitiveMode; /* GL_TRIANGLES, GL_QUADS or GL_ISOLINES */ + GLenum Spacing; /* GL_EQUAL, GL_FRACTIONAL_EVEN, GL_FRACTIONAL_ODD */ + GLenum VertexOrder; /* GL_CW or GL_CCW */ + bool PointMode; +}; + + /** Geometry program object */ struct gl_geometry_program { @@ -2060,6 +2093,27 @@ struct gl_vertex_program_state GLboolean _Overriden; }; +/** + * Context state for tessellation control programs. + */ +struct gl_tess_ctrl_program_state +{ + /** Currently bound and valid shader. */ + struct gl_tess_ctrl_program *_Current; + + GLint patch_vertices; + GLfloat patch_default_outer_level[4]; + GLfloat patch_default_inner_level[2]; +}; + +/** + * Context state for tessellation evaluation programs. + */ +struct gl_tess_eval_program_state +{ + /** Currently bound and valid shader. */ + struct gl_tess_eval_program *_Current; +}; /** * Context state for geometry programs. @@ -2154,13 +2208,23 @@ struct gl_ati_fragment_shader_state struct ati_fragment_shader *Current; }; +/** + * Shader subroutine function definition + */ +struct gl_subroutine_function +{ + char *name; + int num_compat_types; + const struct glsl_type **types; +}; /** * A GLSL vertex or fragment shader object. */ struct gl_shader { - /** GL_FRAGMENT_SHADER || GL_VERTEX_SHADER || GL_GEOMETRY_SHADER_ARB. + /** GL_FRAGMENT_SHADER || GL_VERTEX_SHADER || GL_GEOMETRY_SHADER_ARB || + * GL_TESS_CONTROL_SHADER || GL_TESS_EVALUATION_SHADER. * Must be the first field. */ GLenum Type; @@ -2240,6 +2304,41 @@ struct gl_shader bool pixel_center_integer; /** + * Tessellation Control shader state from layout qualifiers. + */ + struct { + /** + * 0 - vertices not declared in shader, or + * 1 .. GL_MAX_PATCH_VERTICES + */ + GLint VerticesOut; + } TessCtrl; + + /** + * Tessellation Evaluation shader state from layout qualifiers. + */ + struct { + /** + * GL_TRIANGLES, GL_QUADS, GL_ISOLINES or PRIM_UNKNOWN if it's not set + * in this shader. + */ + GLenum PrimitiveMode; + /** + * GL_EQUAL, GL_FRACTIONAL_ODD, GL_FRACTIONAL_EVEN, or 0 if it's not set + * in this shader. + */ + GLenum Spacing; + /** + * GL_CW, GL_CCW, or 0 if it's not set in this shader. + */ + GLenum VertexOrder; + /** + * 1, 0, or -1 if it's not set in this shader. + */ + int PointMode; + } TessEval; + + /** * Geometry shader state from GLSL 1.50 layout qualifiers. */ struct { @@ -2304,6 +2403,25 @@ struct gl_shader */ unsigned LocalSize[3]; } Comp; + + /** + * Number of types for subroutine uniforms. + */ + GLuint NumSubroutineUniformTypes; + + /** + * Subroutine uniform remap table + * based on the program level uniform remap table. + */ + GLuint NumSubroutineUniformRemapTable; + struct gl_uniform_storage **SubroutineUniformRemapTable; + + /** + * Num of subroutine functions for this stage + * and storage for them. + */ + GLuint NumSubroutineFunctions; + struct gl_subroutine_function *SubroutineFunctions; }; @@ -2365,6 +2483,11 @@ struct gl_uniform_block GLuint UniformBufferSize; /** + * Is this actually an interface block for a shader storage buffer? + */ + bool IsShaderStorage; + + /** * Layout specified in the shader * * This isn't accessible through the API, but it is used while @@ -2468,6 +2591,37 @@ struct gl_shader_program enum gl_frag_depth_layout FragDepthLayout; /** + * Tessellation Control shader state from layout qualifiers. + */ + struct { + /** + * 0 - vertices not declared in shader, or + * 1 .. GL_MAX_PATCH_VERTICES + */ + GLint VerticesOut; + } TessCtrl; + + /** + * Tessellation Evaluation shader state from layout qualifiers. + */ + struct { + /** GL_TRIANGLES, GL_QUADS or GL_ISOLINES */ + GLenum PrimitiveMode; + /** GL_EQUAL, GL_FRACTIONAL_ODD or GL_FRACTIONAL_EVEN */ + GLenum Spacing; + /** GL_CW or GL_CCW */ + GLenum VertexOrder; + bool PointMode; + /** + * True if gl_ClipDistance is written to. Copied into + * gl_tess_eval_program by _mesa_copy_linked_program_data(). + */ + GLboolean UsesClipDistance; + GLuint ClipDistanceArraySize; /**< Size of the gl_ClipDistance array, or + 0 if not present. */ + } TessEval; + + /** * Geometry shader state - copied into gl_geometry_program by * _mesa_copy_linked_program_data(). */ @@ -2681,6 +2835,7 @@ struct gl_shader_compiler_options GLboolean EmitNoIndirectOutput; /**< No indirect addressing of outputs */ GLboolean EmitNoIndirectTemp; /**< No indirect addressing of temps */ GLboolean EmitNoIndirectUniform; /**< No indirect addressing of constants */ + GLboolean EmitNoIndirectSampler; /**< No indirect addressing of samplers */ /*@}*/ GLuint MaxIfDepth; /**< Maximum nested IF blocks */ @@ -3100,6 +3255,9 @@ struct gl_program_constants /* GL_ARB_shader_image_load_store */ GLuint MaxImageUniforms; + + /* GL_ARB_shader_storage_buffer_object */ + GLuint MaxShaderStorageBlocks; }; @@ -3197,6 +3355,15 @@ struct gl_constants GLuint UniformBufferOffsetAlignment; /** @} */ + /** @{ + * GL_ARB_shader_storage_buffer_object + */ + GLuint MaxCombinedShaderStorageBlocks; + GLuint MaxShaderStorageBufferBindings; + GLuint MaxShaderStorageBlockSize; + GLuint ShaderStorageBufferOffsetAlignment; + /** @} */ + /** * GL_ARB_explicit_uniform_location */ @@ -3423,6 +3590,13 @@ struct gl_constants GLenum ContextReleaseBehavior; struct gl_shader_compiler_options ShaderCompilerOptions[MESA_SHADER_STAGES]; + + /** GL_ARB_tessellation_shader */ + GLuint MaxPatchVertices; + GLuint MaxTessGenLevel; + GLuint MaxTessPatchComponents; + GLuint MaxTessControlTotalOutputComponents; + bool LowerTessLevel; /**< Lower gl_TessLevel* from float[n] to vecn? */ }; @@ -3484,6 +3658,8 @@ struct gl_extensions GLboolean ARB_shader_image_load_store; GLboolean ARB_shader_precision; GLboolean ARB_shader_stencil_export; + GLboolean ARB_shader_storage_buffer_object; + GLboolean ARB_shader_subroutine; GLboolean ARB_shader_texture_lod; GLboolean ARB_shading_language_packing; GLboolean ARB_shading_language_420pack; @@ -3815,6 +3991,12 @@ struct gl_driver_flags */ uint64_t NewUniformBuffer; + /** + * gl_context::ShaderStorageBufferBindings + * gl_shader_program::ShaderStorageBlocks + */ + uint64_t NewShaderStorageBuffer; + uint64_t NewTextureBuffer; /** @@ -3826,6 +4008,11 @@ struct gl_driver_flags * gl_context::ImageUnits */ uint64_t NewImageUnits; + + /** + * gl_context::TessCtrlProgram::patch_default_* + */ + uint64_t NewDefaultTessLevels; }; struct gl_uniform_buffer_binding @@ -3842,6 +4029,20 @@ struct gl_uniform_buffer_binding GLboolean AutomaticSize; }; +struct gl_shader_storage_buffer_binding +{ + struct gl_buffer_object *BufferObject; + /** Start of shader storage block data in the buffer */ + GLintptr Offset; + /** Size of data allowed to be referenced from the buffer (in bytes) */ + GLsizeiptr Size; + /** + * glBindBufferBase() indicates that the Size should be ignored and only + * limited by the current size of the BufferObject. + */ + GLboolean AutomaticSize; +}; + /** * ARB_shader_image_load_store image unit. */ @@ -4047,6 +4248,8 @@ struct gl_context struct gl_fragment_program_state FragmentProgram; struct gl_geometry_program_state GeometryProgram; struct gl_compute_program_state ComputeProgram; + struct gl_tess_ctrl_program_state TessCtrlProgram; + struct gl_tess_eval_program_state TessEvalProgram; struct gl_ati_fragment_shader_state ATIFragmentShader; struct gl_pipeline_shader_state Pipeline; /**< GLSL pipeline shader object state */ @@ -4089,6 +4292,12 @@ struct gl_context struct gl_buffer_object *UniformBuffer; /** + * Current GL_ARB_shader_storage_buffer_object binding referenced by + * GL_SHADER_STORAGE_BUFFER target for glBufferData, glMapBuffer, etc. + */ + struct gl_buffer_object *ShaderStorageBuffer; + + /** * Array of uniform buffers for GL_ARB_uniform_buffer_object and GL 3.1. * This is set up using glBindBufferRange() or glBindBufferBase(). They are * associated with uniform blocks by glUniformBlockBinding()'s state in the @@ -4098,6 +4307,15 @@ struct gl_context UniformBufferBindings[MAX_COMBINED_UNIFORM_BUFFERS]; /** + * Array of shader storage buffers for ARB_shader_storage_buffer_object + * and GL 4.3. This is set up using glBindBufferRange() or + * glBindBufferBase(). They are associated with shader storage blocks by + * glShaderStorageBlockBinding()'s state in the shader program. + */ + struct gl_shader_storage_buffer_binding + ShaderStorageBufferBindings[MAX_COMBINED_SHADER_STORAGE_BUFFERS]; + + /** * Object currently associated with the GL_ATOMIC_COUNTER_BUFFER * target. */ diff --git a/src/mesa/main/multisample.c b/src/mesa/main/multisample.c index 816837b95bd..09e6154f7ec 100644 --- a/src/mesa/main/multisample.c +++ b/src/mesa/main/multisample.c @@ -43,7 +43,7 @@ _mesa_SampleCoverage(GLclampf value, GLboolean invert) FLUSH_VERTICES(ctx, 0); - ctx->Multisample.SampleCoverageValue = (GLfloat) CLAMP(value, 0.0, 1.0); + ctx->Multisample.SampleCoverageValue = CLAMP(value, 0.0f, 1.0f); ctx->Multisample.SampleCoverageInvert = invert; ctx->NewState |= _NEW_MULTISAMPLE; } @@ -134,7 +134,7 @@ _mesa_MinSampleShading(GLclampf value) FLUSH_VERTICES(ctx, 0); - ctx->Multisample.MinSampleShadingValue = CLAMP(value, 0.0, 1.0); + ctx->Multisample.MinSampleShadingValue = CLAMP(value, 0.0f, 1.0f); ctx->NewState |= _NEW_MULTISAMPLE; } @@ -164,8 +164,11 @@ _mesa_check_sample_count(struct gl_context *ctx, GLenum target, * * "If internalformat is a signed or unsigned integer format and samples * is greater than zero, then the error INVALID_OPERATION is generated." + * + * This restriction is relaxed for OpenGL ES 3.1. */ - if (_mesa_is_gles3(ctx) && _mesa_is_enum_format_integer(internalFormat) + if ((ctx->API == API_OPENGLES2 && ctx->Version == 30) && + _mesa_is_enum_format_integer(internalFormat) && samples > 0) { return GL_INVALID_OPERATION; } diff --git a/src/mesa/main/objectlabel.c b/src/mesa/main/objectlabel.c index 5626054687b..1019f893ba8 100644 --- a/src/mesa/main/objectlabel.c +++ b/src/mesa/main/objectlabel.c @@ -234,7 +234,7 @@ get_label_pointer(struct gl_context *ctx, GLenum identifier, GLuint name, invalid_enum: _mesa_error(ctx, GL_INVALID_ENUM, "%s(identifier = %s)", - caller, _mesa_lookup_enum_by_nr(identifier)); + caller, _mesa_enum_to_string(identifier)); return NULL; } diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c index f72360817e9..7147fd6e4fe 100644 --- a/src/mesa/main/pack.c +++ b/src/mesa/main/pack.c @@ -470,7 +470,7 @@ extract_uint_indexes(GLuint n, GLuint indexes[], static inline GLuint clamp_float_to_uint(GLfloat f) { - return f < 0.0F ? 0 : F_TO_I(f); + return f < 0.0F ? 0 : _mesa_lroundevenf(f); } @@ -478,7 +478,7 @@ static inline GLuint clamp_half_to_uint(GLhalfARB h) { GLfloat f = _mesa_half_to_float(h); - return f < 0.0F ? 0 : F_TO_I(f); + return f < 0.0F ? 0 : _mesa_lroundevenf(f); } @@ -796,7 +796,7 @@ _mesa_unpack_depth_span( struct gl_context *ctx, GLuint n, * back to an int type can introduce errors that will show up as * artifacts in things like depth peeling which uses glCopyTexImage. */ - if (ctx->Pixel.DepthScale == 1.0 && ctx->Pixel.DepthBias == 0.0) { + if (ctx->Pixel.DepthScale == 1.0F && ctx->Pixel.DepthBias == 0.0F) { if (srcType == GL_UNSIGNED_INT && dstType == GL_UNSIGNED_SHORT) { const GLuint *src = (const GLuint *) source; GLushort *dst = (GLushort *) dest; @@ -874,8 +874,8 @@ _mesa_unpack_depth_span( struct gl_context *ctx, GLuint n, case GL_UNSIGNED_INT_24_8_EXT: /* GL_EXT_packed_depth_stencil */ if (dstType == GL_UNSIGNED_INT_24_8_EXT && depthMax == 0xffffff && - ctx->Pixel.DepthScale == 1.0 && - ctx->Pixel.DepthBias == 0.0) { + ctx->Pixel.DepthScale == 1.0F && + ctx->Pixel.DepthBias == 0.0F) { const GLuint *src = (const GLuint *) source; GLuint *zValues = (GLuint *) dest; GLuint i; @@ -945,7 +945,7 @@ _mesa_unpack_depth_span( struct gl_context *ctx, GLuint n, { const GLfloat scale = ctx->Pixel.DepthScale; const GLfloat bias = ctx->Pixel.DepthBias; - if (scale != 1.0 || bias != 0.0) { + if (scale != 1.0F || bias != 0.0F) { GLuint i; for (i = 0; i < n; i++) { depthValues[i] = depthValues[i] * scale + bias; @@ -958,7 +958,7 @@ _mesa_unpack_depth_span( struct gl_context *ctx, GLuint n, if (needClamp) { GLuint i; for (i = 0; i < n; i++) { - depthValues[i] = (GLfloat)CLAMP(depthValues[i], 0.0, 1.0); + depthValues[i] = CLAMP(depthValues[i], 0.0F, 1.0F); } } @@ -1025,7 +1025,7 @@ _mesa_pack_depth_span( struct gl_context *ctx, GLuint n, GLvoid *dest, return; } - if (ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0) { + if (ctx->Pixel.DepthScale != 1.0F || ctx->Pixel.DepthBias != 0.0F) { memcpy(depthCopy, depthSpan, n * sizeof(GLfloat)); _mesa_scale_and_bias_depth(ctx, n, depthCopy); depthSpan = depthCopy; @@ -1153,7 +1153,7 @@ _mesa_pack_depth_stencil_span(struct gl_context *ctx,GLuint n, return; } - if (ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0) { + if (ctx->Pixel.DepthScale != 1.0F || ctx->Pixel.DepthBias != 0.0F) { memcpy(depthCopy, depthVals, n * sizeof(GLfloat)); _mesa_scale_and_bias_depth(ctx, n, depthCopy); depthVals = depthCopy; diff --git a/src/mesa/main/pipelineobj.c b/src/mesa/main/pipelineobj.c index 279ae2078fe..07acbf10c1d 100644 --- a/src/mesa/main/pipelineobj.c +++ b/src/mesa/main/pipelineobj.c @@ -244,14 +244,13 @@ _mesa_UseProgramStages(GLuint pipeline, GLbitfield stages, GLuint program) * * "If stages is not the special value ALL_SHADER_BITS, and has a bit * set that is not recognized, the error INVALID_VALUE is generated." - * - * NOT YET SUPPORTED: - * GL_TESS_CONTROL_SHADER_BIT - * GL_TESS_EVALUATION_SHADER_BIT */ any_valid_stages = GL_VERTEX_SHADER_BIT | GL_FRAGMENT_SHADER_BIT; if (_mesa_has_geometry_shaders(ctx)) any_valid_stages |= GL_GEOMETRY_SHADER_BIT; + if (_mesa_has_tessellation(ctx)) + any_valid_stages |= GL_TESS_CONTROL_SHADER_BIT | + GL_TESS_EVALUATION_SHADER_BIT; if (stages != GL_ALL_SHADER_BITS && (stages & ~any_valid_stages) != 0) { _mesa_error(ctx, GL_INVALID_VALUE, "glUseProgramStages(Stages)"); @@ -327,6 +326,12 @@ _mesa_UseProgramStages(GLuint pipeline, GLbitfield stages, GLuint program) if ((stages & GL_GEOMETRY_SHADER_BIT) != 0) _mesa_use_shader_program(ctx, GL_GEOMETRY_SHADER, shProg, pipe); + + if ((stages & GL_TESS_CONTROL_SHADER_BIT) != 0) + _mesa_use_shader_program(ctx, GL_TESS_CONTROL_SHADER, shProg, pipe); + + if ((stages & GL_TESS_EVALUATION_SHADER_BIT) != 0) + _mesa_use_shader_program(ctx, GL_TESS_EVALUATION_SHADER, shProg, pipe); } /** @@ -588,6 +593,7 @@ _mesa_GetProgramPipelineiv(GLuint pipeline, GLenum pname, GLint *params) /* Are geometry shaders available in this context? */ const bool has_gs = _mesa_has_geometry_shaders(ctx); + const bool has_tess = _mesa_has_tessellation(ctx);; if (!pipe) { _mesa_error(ctx, GL_INVALID_OPERATION, @@ -615,11 +621,17 @@ _mesa_GetProgramPipelineiv(GLuint pipeline, GLenum pname, GLint *params) ? pipe->CurrentProgram[MESA_SHADER_VERTEX]->Name : 0; return; case GL_TESS_EVALUATION_SHADER: - /* NOT YET SUPPORTED */ - break; + if (!has_tess) + break; + *params = pipe->CurrentProgram[MESA_SHADER_TESS_EVAL] + ? pipe->CurrentProgram[MESA_SHADER_TESS_EVAL]->Name : 0; + return; case GL_TESS_CONTROL_SHADER: - /* NOT YET SUPPORTED */ - break; + if (!has_tess) + break; + *params = pipe->CurrentProgram[MESA_SHADER_TESS_CTRL] + ? pipe->CurrentProgram[MESA_SHADER_TESS_CTRL]->Name : 0; + return; case GL_GEOMETRY_SHADER: if (!has_gs) break; @@ -635,7 +647,7 @@ _mesa_GetProgramPipelineiv(GLuint pipeline, GLenum pname, GLint *params) } _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramPipelineiv(pname=%s)", - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); } /** @@ -777,7 +789,9 @@ _mesa_validate_program_pipeline(struct gl_context* ctx, * executable vertex shader." */ if (!pipe->CurrentProgram[MESA_SHADER_VERTEX] - && pipe->CurrentProgram[MESA_SHADER_GEOMETRY]) { + && (pipe->CurrentProgram[MESA_SHADER_GEOMETRY] || + pipe->CurrentProgram[MESA_SHADER_TESS_CTRL] || + pipe->CurrentProgram[MESA_SHADER_TESS_EVAL])) { pipe->InfoLog = ralloc_strdup(pipe, "Program lacks a vertex shader"); goto err; } diff --git a/src/mesa/main/pixel.c b/src/mesa/main/pixel.c index ecda2694fc8..608a5454702 100644 --- a/src/mesa/main/pixel.c +++ b/src/mesa/main/pixel.c @@ -455,12 +455,12 @@ _mesa_GetnPixelMapusvARB( GLenum map, GLsizei bufSize, GLushort *values ) /* special cases */ case GL_PIXEL_MAP_I_TO_I: for (i = 0; i < mapsize; i++) { - values[i] = (GLushort) CLAMP(ctx->PixelMaps.ItoI.Map[i], 0.0, 65535.); + values[i] = (GLushort) CLAMP(ctx->PixelMaps.ItoI.Map[i], 0.0F, 65535.0F); } break; case GL_PIXEL_MAP_S_TO_S: for (i = 0; i < mapsize; i++) { - values[i] = (GLushort) CLAMP(ctx->PixelMaps.StoS.Map[i], 0.0, 65535.); + values[i] = (GLushort) CLAMP(ctx->PixelMaps.StoS.Map[i], 0.0F, 65535.0F); } break; default: diff --git a/src/mesa/main/pixeltransfer.c b/src/mesa/main/pixeltransfer.c index 94464ea6709..22eac00a7df 100644 --- a/src/mesa/main/pixeltransfer.c +++ b/src/mesa/main/pixeltransfer.c @@ -35,6 +35,7 @@ #include "pixeltransfer.h" #include "imports.h" #include "mtypes.h" +#include "util/rounding.h" /* @@ -47,25 +48,25 @@ _mesa_scale_and_bias_rgba(GLuint n, GLfloat rgba[][4], GLfloat rBias, GLfloat gBias, GLfloat bBias, GLfloat aBias) { - if (rScale != 1.0 || rBias != 0.0) { + if (rScale != 1.0F || rBias != 0.0F) { GLuint i; for (i = 0; i < n; i++) { rgba[i][RCOMP] = rgba[i][RCOMP] * rScale + rBias; } } - if (gScale != 1.0 || gBias != 0.0) { + if (gScale != 1.0F || gBias != 0.0F) { GLuint i; for (i = 0; i < n; i++) { rgba[i][GCOMP] = rgba[i][GCOMP] * gScale + gBias; } } - if (bScale != 1.0 || bBias != 0.0) { + if (bScale != 1.0F || bBias != 0.0F) { GLuint i; for (i = 0; i < n; i++) { rgba[i][BCOMP] = rgba[i][BCOMP] * bScale + bBias; } } - if (aScale != 1.0 || aBias != 0.0) { + if (aScale != 1.0F || aBias != 0.0F) { GLuint i; for (i = 0; i < n; i++) { rgba[i][ACOMP] = rgba[i][ACOMP] * aScale + aBias; @@ -94,10 +95,10 @@ _mesa_map_rgba( const struct gl_context *ctx, GLuint n, GLfloat rgba[][4] ) GLfloat g = CLAMP(rgba[i][GCOMP], 0.0F, 1.0F); GLfloat b = CLAMP(rgba[i][BCOMP], 0.0F, 1.0F); GLfloat a = CLAMP(rgba[i][ACOMP], 0.0F, 1.0F); - rgba[i][RCOMP] = rMap[F_TO_I(r * rscale)]; - rgba[i][GCOMP] = gMap[F_TO_I(g * gscale)]; - rgba[i][BCOMP] = bMap[F_TO_I(b * bscale)]; - rgba[i][ACOMP] = aMap[F_TO_I(a * ascale)]; + rgba[i][RCOMP] = rMap[(int)_mesa_lroundevenf(r * rscale)]; + rgba[i][GCOMP] = gMap[(int)_mesa_lroundevenf(g * gscale)]; + rgba[i][BCOMP] = bMap[(int)_mesa_lroundevenf(b * bscale)]; + rgba[i][ACOMP] = aMap[(int)_mesa_lroundevenf(a * ascale)]; } } @@ -236,7 +237,7 @@ _mesa_apply_ci_transfer_ops(const struct gl_context *ctx, GLuint i; for (i = 0; i < n; i++) { const GLuint j = indexes[i] & mask; - indexes[i] = F_TO_I(ctx->PixelMaps.ItoI.Map[j]); + indexes[i] = _mesa_lroundevenf(ctx->PixelMaps.ItoI.Map[j]); } } } diff --git a/src/mesa/main/points.c b/src/mesa/main/points.c index 5ad1f38f366..863e3c1af32 100644 --- a/src/mesa/main/points.c +++ b/src/mesa/main/points.c @@ -45,7 +45,7 @@ _mesa_PointSize( GLfloat size ) { GET_CURRENT_CONTEXT(ctx); - if (size <= 0.0) { + if (size <= 0.0F) { _mesa_error( ctx, GL_INVALID_VALUE, "glPointSize" ); return; } @@ -119,9 +119,9 @@ _mesa_PointParameterfv( GLenum pname, const GLfloat *params) return; FLUSH_VERTICES(ctx, _NEW_POINT); COPY_3V(ctx->Point.Params, params); - ctx->Point._Attenuated = (ctx->Point.Params[0] != 1.0 || - ctx->Point.Params[1] != 0.0 || - ctx->Point.Params[2] != 0.0); + ctx->Point._Attenuated = (ctx->Point.Params[0] != 1.0F || + ctx->Point.Params[1] != 0.0F || + ctx->Point.Params[2] != 0.0F); break; case GL_POINT_SIZE_MIN_EXT: if (params[0] < 0.0F) { diff --git a/src/mesa/main/polygon.c b/src/mesa/main/polygon.c index a1f0aa02da1..60af88f9857 100644 --- a/src/mesa/main/polygon.c +++ b/src/mesa/main/polygon.c @@ -56,7 +56,7 @@ _mesa_CullFace( GLenum mode ) GET_CURRENT_CONTEXT(ctx); if (MESA_VERBOSE&VERBOSE_API) - _mesa_debug(ctx, "glCullFace %s\n", _mesa_lookup_enum_by_nr(mode)); + _mesa_debug(ctx, "glCullFace %s\n", _mesa_enum_to_string(mode)); if (mode!=GL_FRONT && mode!=GL_BACK && mode!=GL_FRONT_AND_BACK) { _mesa_error( ctx, GL_INVALID_ENUM, "glCullFace" ); @@ -91,16 +91,16 @@ _mesa_FrontFace( GLenum mode ) GET_CURRENT_CONTEXT(ctx); if (MESA_VERBOSE&VERBOSE_API) - _mesa_debug(ctx, "glFrontFace %s\n", _mesa_lookup_enum_by_nr(mode)); + _mesa_debug(ctx, "glFrontFace %s\n", _mesa_enum_to_string(mode)); + + if (ctx->Polygon.FrontFace == mode) + return; if (mode!=GL_CW && mode!=GL_CCW) { _mesa_error( ctx, GL_INVALID_ENUM, "glFrontFace" ); return; } - if (ctx->Polygon.FrontFace == mode) - return; - FLUSH_VERTICES(ctx, _NEW_POLYGON); ctx->Polygon.FrontFace = mode; @@ -128,8 +128,8 @@ _mesa_PolygonMode( GLenum face, GLenum mode ) if (MESA_VERBOSE&VERBOSE_API) _mesa_debug(ctx, "glPolygonMode %s %s\n", - _mesa_lookup_enum_by_nr(face), - _mesa_lookup_enum_by_nr(mode)); + _mesa_enum_to_string(face), + _mesa_enum_to_string(mode)); if (mode!=GL_POINT && mode!=GL_LINE && mode!=GL_FILL) { _mesa_error( ctx, GL_INVALID_ENUM, "glPolygonMode(mode)" ); diff --git a/src/mesa/main/program_resource.c b/src/mesa/main/program_resource.c index d857b84e60d..23d2b4d2da0 100644 --- a/src/mesa/main/program_resource.c +++ b/src/mesa/main/program_resource.c @@ -28,10 +28,11 @@ #include "main/mtypes.h" #include "main/shaderapi.h" #include "main/shaderobj.h" +#include "main/context.h" #include "program_resource.h" - +#include "ir_uniform.h" static bool -supported_interface_enum(GLenum iface) +supported_interface_enum(struct gl_context *ctx, GLenum iface) { switch (iface) { case GL_UNIFORM: @@ -42,17 +43,21 @@ supported_interface_enum(GLenum iface) case GL_ATOMIC_COUNTER_BUFFER: return true; case GL_VERTEX_SUBROUTINE: - case GL_TESS_CONTROL_SUBROUTINE: - case GL_TESS_EVALUATION_SUBROUTINE: - case GL_GEOMETRY_SUBROUTINE: case GL_FRAGMENT_SUBROUTINE: - case GL_COMPUTE_SUBROUTINE: case GL_VERTEX_SUBROUTINE_UNIFORM: - case GL_TESS_CONTROL_SUBROUTINE_UNIFORM: - case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM: - case GL_GEOMETRY_SUBROUTINE_UNIFORM: case GL_FRAGMENT_SUBROUTINE_UNIFORM: + return _mesa_has_shader_subroutine(ctx); + case GL_GEOMETRY_SUBROUTINE: + case GL_GEOMETRY_SUBROUTINE_UNIFORM: + return _mesa_has_geometry_shaders(ctx) && _mesa_has_shader_subroutine(ctx); + case GL_COMPUTE_SUBROUTINE: case GL_COMPUTE_SUBROUTINE_UNIFORM: + return _mesa_has_compute_shaders(ctx) && _mesa_has_shader_subroutine(ctx); + case GL_TESS_CONTROL_SUBROUTINE: + case GL_TESS_EVALUATION_SUBROUTINE: + case GL_TESS_CONTROL_SUBROUTINE_UNIFORM: + case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM: + return _mesa_has_tessellation(ctx) && _mesa_has_shader_subroutine(ctx); case GL_BUFFER_VARIABLE: case GL_SHADER_STORAGE_BLOCK: default: @@ -79,9 +84,9 @@ _mesa_GetProgramInterfaceiv(GLuint program, GLenum programInterface, } /* Validate interface. */ - if (!supported_interface_enum(programInterface)) { + if (!supported_interface_enum(ctx, programInterface)) { _mesa_error(ctx, GL_INVALID_OPERATION, "glGetProgramInterfaceiv(%s)", - _mesa_lookup_enum_by_nr(programInterface)); + _mesa_enum_to_string(programInterface)); return; } @@ -96,8 +101,8 @@ _mesa_GetProgramInterfaceiv(GLuint program, GLenum programInterface, if (programInterface == GL_ATOMIC_COUNTER_BUFFER) { _mesa_error(ctx, GL_INVALID_OPERATION, "glGetProgramInterfaceiv(%s pname %s)", - _mesa_lookup_enum_by_nr(programInterface), - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(programInterface), + _mesa_enum_to_string(pname)); return; } /* Name length consists of base name, 3 additional chars '[0]' if @@ -138,15 +143,40 @@ _mesa_GetProgramInterfaceiv(GLuint program, GLenum programInterface, default: _mesa_error(ctx, GL_INVALID_OPERATION, "glGetProgramInterfaceiv(%s pname %s)", - _mesa_lookup_enum_by_nr(programInterface), - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(programInterface), + _mesa_enum_to_string(pname)); }; break; case GL_MAX_NUM_COMPATIBLE_SUBROUTINES: + switch (programInterface) { + case GL_VERTEX_SUBROUTINE_UNIFORM: + case GL_FRAGMENT_SUBROUTINE_UNIFORM: + case GL_GEOMETRY_SUBROUTINE_UNIFORM: + case GL_COMPUTE_SUBROUTINE_UNIFORM: + case GL_TESS_CONTROL_SUBROUTINE_UNIFORM: + case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM: { + for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++) { + if (shProg->ProgramResourceList[i].Type == programInterface) { + struct gl_uniform_storage *uni = + (struct gl_uniform_storage *) + shProg->ProgramResourceList[i].Data; + *params = MAX2(*params, uni->num_compatible_subroutines); + } + } + break; + } + + default: + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetProgramInterfaceiv(%s pname %s)", + _mesa_enum_to_string(programInterface), + _mesa_enum_to_string(pname)); + } + break; default: _mesa_error(ctx, GL_INVALID_OPERATION, "glGetProgramInterfaceiv(pname %s)", - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); } } @@ -173,32 +203,12 @@ is_xfb_marker(const char *str) return false; } -/** - * Checks if given name index is legal for GetProgramResourceIndex, - * check is written to be compatible with GL_ARB_array_of_arrays. - */ -static bool -valid_program_resource_index_name(const GLchar *name) -{ - const char *array = strstr(name, "["); - const char *close = strrchr(name, ']'); - - /* Not array, no need for the check. */ - if (!array) - return true; - - /* Last array index has to be zero. */ - if (!close || *--close != '0') - return false; - - return true; -} - GLuint GLAPIENTRY _mesa_GetProgramResourceIndex(GLuint program, GLenum programInterface, const GLchar *name) { GET_CURRENT_CONTEXT(ctx); + unsigned array_index = 0; struct gl_program_resource *res; struct gl_shader_program *shProg = _mesa_lookup_shader_program_err(ctx, program, @@ -206,6 +216,11 @@ _mesa_GetProgramResourceIndex(GLuint program, GLenum programInterface, if (!shProg || !name) return GL_INVALID_INDEX; + if (!supported_interface_enum(ctx, programInterface)) { + _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramResourceIndex(%s)", + _mesa_enum_to_string(programInterface)); + return GL_INVALID_INDEX; + } /* * For the interface TRANSFORM_FEEDBACK_VARYING, the value INVALID_INDEX * should be returned when querying the index assigned to the special names @@ -217,24 +232,33 @@ _mesa_GetProgramResourceIndex(GLuint program, GLenum programInterface, return GL_INVALID_INDEX; switch (programInterface) { + case GL_TESS_CONTROL_SUBROUTINE: + case GL_TESS_CONTROL_SUBROUTINE_UNIFORM: + case GL_TESS_EVALUATION_SUBROUTINE: + case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM: + case GL_COMPUTE_SUBROUTINE: + case GL_COMPUTE_SUBROUTINE_UNIFORM: + case GL_GEOMETRY_SUBROUTINE: + case GL_GEOMETRY_SUBROUTINE_UNIFORM: + case GL_VERTEX_SUBROUTINE: + case GL_FRAGMENT_SUBROUTINE: + case GL_VERTEX_SUBROUTINE_UNIFORM: + case GL_FRAGMENT_SUBROUTINE_UNIFORM: case GL_PROGRAM_INPUT: case GL_PROGRAM_OUTPUT: case GL_UNIFORM: case GL_TRANSFORM_FEEDBACK_VARYING: - /* Validate name syntax for array variables */ - if (!valid_program_resource_index_name(name)) - return GL_INVALID_INDEX; - /* fall-through */ case GL_UNIFORM_BLOCK: - res = _mesa_program_resource_find_name(shProg, programInterface, name); - if (!res) + res = _mesa_program_resource_find_name(shProg, programInterface, name, + &array_index); + if (!res || array_index > 0) return GL_INVALID_INDEX; return _mesa_program_resource_index(shProg, res); case GL_ATOMIC_COUNTER_BUFFER: default: _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramResourceIndex(%s)", - _mesa_lookup_enum_by_nr(programInterface)); + _mesa_enum_to_string(programInterface)); } return GL_INVALID_INDEX; @@ -250,19 +274,13 @@ _mesa_GetProgramResourceName(GLuint program, GLenum programInterface, _mesa_lookup_shader_program_err(ctx, program, "glGetProgramResourceName"); - /* Set user friendly return values in case of errors. */ - if (name) - *name = '\0'; - if (length) - *length = 0; - if (!shProg || !name) return; if (programInterface == GL_ATOMIC_COUNTER_BUFFER || - !supported_interface_enum(programInterface)) { + !supported_interface_enum(ctx, programInterface)) { _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramResourceName(%s)", - _mesa_lookup_enum_by_nr(programInterface)); + _mesa_enum_to_string(programInterface)); return; } @@ -300,36 +318,6 @@ _mesa_GetProgramResourceiv(GLuint program, GLenum programInterface, propCount, props, bufSize, length, params); } -/** - * Function verifies syntax of given name for GetProgramResourceLocation - * and GetProgramResourceLocationIndex for the following cases: - * - * "array element portion of a string passed to GetProgramResourceLocation - * or GetProgramResourceLocationIndex must not have, a "+" sign, extra - * leading zeroes, or whitespace". - * - * Check is written to be compatible with GL_ARB_array_of_arrays. - */ -static bool -invalid_array_element_syntax(const GLchar *name) -{ - char *first = strchr(name, '['); - char *last = strrchr(name, '['); - - if (!first) - return false; - - /* No '+' or ' ' allowed anywhere. */ - if (strchr(first, '+') || strchr(first, ' ')) - return true; - - /* Check that last array index is 0. */ - if (last[1] == '0' && last[2] != ']') - return true; - - return false; -} - static struct gl_shader_program * lookup_linked_program(GLuint program, const char *caller) { @@ -356,7 +344,7 @@ _mesa_GetProgramResourceLocation(GLuint program, GLenum programInterface, struct gl_shader_program *shProg = lookup_linked_program(program, "glGetProgramResourceLocation"); - if (!shProg || !name || invalid_array_element_syntax(name)) + if (!shProg || !name) return -1; /* Validate programInterface. */ @@ -366,24 +354,33 @@ _mesa_GetProgramResourceLocation(GLuint program, GLenum programInterface, case GL_PROGRAM_OUTPUT: break; - /* For reference valid cases requiring additional extension support: - * GL_ARB_shader_subroutine - * GL_ARB_tessellation_shader - * GL_ARB_compute_shader - */ case GL_VERTEX_SUBROUTINE_UNIFORM: - case GL_TESS_CONTROL_SUBROUTINE_UNIFORM: - case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM: - case GL_GEOMETRY_SUBROUTINE_UNIFORM: case GL_FRAGMENT_SUBROUTINE_UNIFORM: + if (!_mesa_has_shader_subroutine(ctx)) + goto fail; + break; + case GL_GEOMETRY_SUBROUTINE_UNIFORM: + if (!_mesa_has_geometry_shaders(ctx) || !_mesa_has_shader_subroutine(ctx)) + goto fail; + break; case GL_COMPUTE_SUBROUTINE_UNIFORM: - + if (!_mesa_has_compute_shaders(ctx) || !_mesa_has_shader_subroutine(ctx)) + goto fail; + break; + case GL_TESS_CONTROL_SUBROUTINE_UNIFORM: + case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM: + if (!_mesa_has_tessellation(ctx) || !_mesa_has_shader_subroutine(ctx)) + goto fail; + break; default: - _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramResourceLocation(%s %s)", - _mesa_lookup_enum_by_nr(programInterface), name); + goto fail; } return _mesa_program_resource_location(shProg, programInterface, name); +fail: + _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramResourceLocation(%s %s)", + _mesa_enum_to_string(programInterface), name); + return -1; } /** @@ -397,7 +394,7 @@ _mesa_GetProgramResourceLocationIndex(GLuint program, GLenum programInterface, struct gl_shader_program *shProg = lookup_linked_program(program, "glGetProgramResourceLocationIndex"); - if (!shProg || !name || invalid_array_element_syntax(name)) + if (!shProg || !name) return -1; /* From the GL_ARB_program_interface_query spec: @@ -408,7 +405,7 @@ _mesa_GetProgramResourceLocationIndex(GLuint program, GLenum programInterface, if (programInterface != GL_PROGRAM_OUTPUT) { _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramResourceLocationIndex(%s)", - _mesa_lookup_enum_by_nr(programInterface)); + _mesa_enum_to_string(programInterface)); return -1; } diff --git a/src/mesa/main/queryobj.c b/src/mesa/main/queryobj.c index 5ff1b953231..98366857f62 100644 --- a/src/mesa/main/queryobj.c +++ b/src/mesa/main/queryobj.c @@ -217,7 +217,7 @@ get_query_binding_point(struct gl_context *ctx, GLenum target, GLuint index) case GL_TESS_CONTROL_SHADER_PATCHES_ARB: case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: - if (ctx->Extensions.ARB_tessellation_shader) + if (_mesa_has_tessellation(ctx)) return get_pipe_stats_binding_point(ctx, target); else return NULL; @@ -295,7 +295,7 @@ _mesa_CreateQueries(GLenum target, GLsizei n, GLuint *ids) break; default: _mesa_error(ctx, GL_INVALID_ENUM, "glCreateQueries(invalid target = %s)", - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); return; } @@ -390,7 +390,7 @@ _mesa_BeginQueryIndexed(GLenum target, GLuint index, GLuint id) if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glBeginQueryIndexed(%s, %u, %u)\n", - _mesa_lookup_enum_by_nr(target), index, id); + _mesa_enum_to_string(target), index, id); if (!query_error_check_index(ctx, target, index)) return; @@ -412,7 +412,7 @@ _mesa_BeginQueryIndexed(GLenum target, GLuint index, GLuint id) if (*bindpt) { _mesa_error(ctx, GL_INVALID_OPERATION, "glBeginQuery{Indexed}(target=%s is active)", - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); return; } @@ -496,7 +496,7 @@ _mesa_EndQueryIndexed(GLenum target, GLuint index) if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glEndQueryIndexed(%s, %u)\n", - _mesa_lookup_enum_by_nr(target), index); + _mesa_enum_to_string(target), index); if (!query_error_check_index(ctx, target, index)) return; @@ -516,8 +516,8 @@ _mesa_EndQueryIndexed(GLenum target, GLuint index) if (q && q->Target != target) { _mesa_error(ctx, GL_INVALID_OPERATION, "glEndQuery(target=%s with active query of target %s)", - _mesa_lookup_enum_by_nr(target), - _mesa_lookup_enum_by_nr(q->Target)); + _mesa_enum_to_string(target), + _mesa_enum_to_string(q->Target)); return; } @@ -553,7 +553,7 @@ _mesa_QueryCounter(GLuint id, GLenum target) if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glQueryCounter(%u, %s)\n", id, - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); /* error checking */ if (target != GL_TIMESTAMP) { @@ -628,9 +628,9 @@ _mesa_GetQueryIndexediv(GLenum target, GLuint index, GLenum pname, if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glGetQueryIndexediv(%s, %u, %s)\n", - _mesa_lookup_enum_by_nr(target), + _mesa_enum_to_string(target), index, - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); if (!query_error_check_index(ctx, target, index)) return; @@ -712,7 +712,7 @@ _mesa_GetQueryIndexediv(GLenum target, GLuint index, GLenum pname, default: _mesa_problem(ctx, "Unknown target in glGetQueryIndexediv(target = %s)", - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); *params = 0; break; } @@ -740,7 +740,7 @@ _mesa_GetQueryObjectiv(GLuint id, GLenum pname, GLint *params) if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glGetQueryObjectiv(%u, %s)\n", id, - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); if (id) q = _mesa_lookup_query_object(ctx, id); @@ -794,7 +794,7 @@ _mesa_GetQueryObjectuiv(GLuint id, GLenum pname, GLuint *params) if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glGetQueryObjectuiv(%u, %s)\n", id, - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); if (id) q = _mesa_lookup_query_object(ctx, id); @@ -851,7 +851,7 @@ _mesa_GetQueryObjecti64v(GLuint id, GLenum pname, GLint64EXT *params) if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glGetQueryObjecti64v(%u, %s)\n", id, - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); if (id) q = _mesa_lookup_query_object(ctx, id); @@ -894,7 +894,7 @@ _mesa_GetQueryObjectui64v(GLuint id, GLenum pname, GLuint64EXT *params) if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glGetQueryObjectui64v(%u, %s)\n", id, - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); if (id) q = _mesa_lookup_query_object(ctx, id); diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c index a3357cd6419..d826ecfc3d5 100644 --- a/src/mesa/main/readpix.c +++ b/src/mesa/main/readpix.c @@ -47,28 +47,47 @@ * Return true if the conversion L=R+G+B is needed. */ GLboolean -_mesa_need_rgb_to_luminance_conversion(mesa_format texFormat, GLenum format) +_mesa_need_rgb_to_luminance_conversion(GLenum srcBaseFormat, + GLenum dstBaseFormat) { - GLenum baseTexFormat = _mesa_get_format_base_format(texFormat); - - return (baseTexFormat == GL_RG || - baseTexFormat == GL_RGB || - baseTexFormat == GL_RGBA) && - (format == GL_LUMINANCE || - format == GL_LUMINANCE_ALPHA || - format == GL_LUMINANCE_INTEGER_EXT || - format == GL_LUMINANCE_ALPHA_INTEGER_EXT); + return (srcBaseFormat == GL_RG || + srcBaseFormat == GL_RGB || + srcBaseFormat == GL_RGBA) && + (dstBaseFormat == GL_LUMINANCE || + dstBaseFormat == GL_LUMINANCE_ALPHA); } +/** + * Return true if the conversion L,I to RGB conversion is needed. + */ +GLboolean +_mesa_need_luminance_to_rgb_conversion(GLenum srcBaseFormat, + GLenum dstBaseFormat) +{ + return (srcBaseFormat == GL_LUMINANCE || + srcBaseFormat == GL_LUMINANCE_ALPHA || + srcBaseFormat == GL_INTENSITY) && + (dstBaseFormat == GL_GREEN || + dstBaseFormat == GL_BLUE || + dstBaseFormat == GL_RG || + dstBaseFormat == GL_RGB || + dstBaseFormat == GL_BGR || + dstBaseFormat == GL_RGBA || + dstBaseFormat == GL_BGRA); +} /** * Return transfer op flags for this ReadPixels operation. */ -static GLbitfield -get_readpixels_transfer_ops(const struct gl_context *ctx, mesa_format texFormat, - GLenum format, GLenum type, GLboolean uses_blit) +GLbitfield +_mesa_get_readpixels_transfer_ops(const struct gl_context *ctx, + mesa_format texFormat, + GLenum format, GLenum type, + GLboolean uses_blit) { GLbitfield transferOps = ctx->_ImageTransferState; + GLenum srcBaseFormat = _mesa_get_format_base_format(texFormat); + GLenum dstBaseFormat = _mesa_unpack_format_to_base_format(format); if (format == GL_DEPTH_COMPONENT || format == GL_DEPTH_STENCIL || @@ -105,7 +124,7 @@ get_readpixels_transfer_ops(const struct gl_context *ctx, mesa_format texFormat, * have any effect anyway. */ if (_mesa_get_format_datatype(texFormat) == GL_UNSIGNED_NORMALIZED && - !_mesa_need_rgb_to_luminance_conversion(texFormat, format)) { + !_mesa_need_rgb_to_luminance_conversion(srcBaseFormat, dstBaseFormat)) { transferOps &= ~IMAGE_CLAMP_BIT; } @@ -128,7 +147,7 @@ _mesa_readpixels_needs_slow_path(const struct gl_context *ctx, GLenum format, { struct gl_renderbuffer *rb = _mesa_get_read_renderbuffer_for_format(ctx, format); - GLenum srcType; + GLenum dstBaseFormat = _mesa_unpack_format_to_base_format(format); assert(rb); @@ -149,28 +168,14 @@ _mesa_readpixels_needs_slow_path(const struct gl_context *ctx, GLenum format, default: /* Color formats. */ - if (_mesa_need_rgb_to_luminance_conversion(rb->Format, format)) { - return GL_TRUE; - } - - /* Conversion between signed and unsigned integers needs masking - * (it isn't just memcpy). */ - srcType = _mesa_get_format_datatype(rb->Format); - - if ((srcType == GL_INT && - (type == GL_UNSIGNED_INT || - type == GL_UNSIGNED_SHORT || - type == GL_UNSIGNED_BYTE)) || - (srcType == GL_UNSIGNED_INT && - (type == GL_INT || - type == GL_SHORT || - type == GL_BYTE))) { + if (_mesa_need_rgb_to_luminance_conversion(rb->_BaseFormat, + dstBaseFormat)) { return GL_TRUE; } /* And finally, see if there are any transfer ops. */ - return get_readpixels_transfer_ops(ctx, rb->Format, format, type, - uses_blit) != 0; + return _mesa_get_readpixels_transfer_ops(ctx, rb->Format, format, type, + uses_blit) != 0; } return GL_FALSE; } @@ -263,7 +268,7 @@ read_uint_depth_pixels( struct gl_context *ctx, GLubyte *map, *dst; int stride, dstStride, j; - if (ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0) + if (ctx->Pixel.DepthScale != 1.0F || ctx->Pixel.DepthBias != 0.0F) return GL_FALSE; if (packing->SwapBytes) @@ -432,18 +437,19 @@ read_rgba_pixels( struct gl_context *ctx, uint8_t rebase_swizzle[4]; struct gl_framebuffer *fb = ctx->ReadBuffer; struct gl_renderbuffer *rb = fb->_ColorReadBuffer; + GLenum dstBaseFormat = _mesa_unpack_format_to_base_format(format); if (!rb) return; - transferOps = get_readpixels_transfer_ops(ctx, rb->Format, format, type, - GL_FALSE); + transferOps = _mesa_get_readpixels_transfer_ops(ctx, rb->Format, format, + type, GL_FALSE); /* Describe the dst format */ dst_is_integer = _mesa_is_enum_format_integer(format); dst_stride = _mesa_image_row_stride(packing, width, format, type); dst_format = _mesa_format_from_format_and_type(format, type); convert_rgb_to_lum = - _mesa_need_rgb_to_luminance_conversion(rb->Format, format); + _mesa_need_rgb_to_luminance_conversion(rb->_BaseFormat, dstBaseFormat); dst = (GLubyte *) _mesa_image_address2d(packing, pixels, width, height, format, type, 0, 0); @@ -815,7 +821,7 @@ read_depth_stencil_pixels(struct gl_context *ctx, const struct gl_pixelstore_attrib *packing ) { const GLboolean scaleOrBias - = ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0; + = ctx->Pixel.DepthScale != 1.0F || ctx->Pixel.DepthBias != 0.0F; const GLboolean stencilTransfer = ctx->Pixel.IndexShift || ctx->Pixel.IndexOffset || ctx->Pixel.MapStencilFlag; GLubyte *dst; @@ -910,10 +916,8 @@ read_pixels_es3_error_check(GLenum format, GLenum type, const GLenum data_type = _mesa_get_format_datatype(rb->Format); GLboolean is_unsigned_int = GL_FALSE; GLboolean is_signed_int = GL_FALSE; - - if (!_mesa_is_color_format(internalFormat)) { - return GL_INVALID_OPERATION; - } + GLboolean is_float_depth = (internalFormat == GL_DEPTH_COMPONENT32F) || + (internalFormat == GL_DEPTH32F_STENCIL8); is_unsigned_int = _mesa_is_enum_format_unsigned_int(internalFormat); if (!is_unsigned_int) { @@ -944,6 +948,43 @@ read_pixels_es3_error_check(GLenum format, GLenum type, (is_unsigned_int && type == GL_UNSIGNED_INT)) return GL_NO_ERROR; break; + case GL_DEPTH_STENCIL: + switch (type) { + case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: + if (is_float_depth) + return GL_NO_ERROR; + break; + case GL_UNSIGNED_INT_24_8: + if (!is_float_depth) + return GL_NO_ERROR; + break; + default: + return GL_INVALID_ENUM; + } + break; + case GL_DEPTH_COMPONENT: + switch (type) { + case GL_FLOAT: + if (is_float_depth) + return GL_NO_ERROR; + break; + case GL_UNSIGNED_SHORT: + case GL_UNSIGNED_INT_24_8: + if (!is_float_depth) + return GL_NO_ERROR; + break; + default: + return GL_INVALID_ENUM; + } + break; + case GL_STENCIL_INDEX: + switch (type) { + case GL_UNSIGNED_BYTE: + return GL_NO_ERROR; + default: + return GL_INVALID_ENUM; + } + break; } return GL_INVALID_OPERATION; @@ -966,8 +1007,8 @@ _mesa_ReadnPixelsARB( GLint x, GLint y, GLsizei width, GLsizei height, if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glReadPixels(%d, %d, %s, %s, %p)\n", width, height, - _mesa_lookup_enum_by_nr(format), - _mesa_lookup_enum_by_nr(type), + _mesa_enum_to_string(format), + _mesa_enum_to_string(type), pixels); if (width < 0 || height < 0) { @@ -1017,15 +1058,10 @@ _mesa_ReadnPixelsARB( GLint x, GLint y, GLsizei width, GLsizei height, err = read_pixels_es3_error_check(format, type, rb); } - if (err == GL_NO_ERROR && (format == GL_DEPTH_COMPONENT - || format == GL_DEPTH_STENCIL)) { - err = GL_INVALID_ENUM; - } - if (err != GL_NO_ERROR) { _mesa_error(ctx, err, "glReadPixels(invalid format %s and/or type %s)", - _mesa_lookup_enum_by_nr(format), - _mesa_lookup_enum_by_nr(type)); + _mesa_enum_to_string(format), + _mesa_enum_to_string(type)); return; } } @@ -1033,8 +1069,8 @@ _mesa_ReadnPixelsARB( GLint x, GLint y, GLsizei width, GLsizei height, err = _mesa_error_check_format_and_type(ctx, format, type); if (err != GL_NO_ERROR) { _mesa_error(ctx, err, "glReadPixels(invalid format %s and/or type %s)", - _mesa_lookup_enum_by_nr(format), - _mesa_lookup_enum_by_nr(type)); + _mesa_enum_to_string(format), + _mesa_enum_to_string(type)); return; } diff --git a/src/mesa/main/readpix.h b/src/mesa/main/readpix.h index 1636dd9ce3e..481ad9d9c37 100644 --- a/src/mesa/main/readpix.h +++ b/src/mesa/main/readpix.h @@ -38,7 +38,18 @@ _mesa_readpixels_needs_slow_path(const struct gl_context *ctx, GLenum format, GLenum type, GLboolean uses_blit); extern GLboolean -_mesa_need_rgb_to_luminance_conversion(mesa_format texFormat, GLenum format); +_mesa_need_rgb_to_luminance_conversion(GLenum srcBaseFormat, + GLenum dstBaseFormat); + +extern GLboolean +_mesa_need_luminance_to_rgb_conversion(GLenum srcBaseFormat, + GLenum dstBaseFormat); + +extern GLbitfield +_mesa_get_readpixels_transfer_ops(const struct gl_context *ctx, + mesa_format texFormat, + GLenum format, GLenum type, + GLboolean uses_blit); extern void _mesa_readpixels(struct gl_context *ctx, diff --git a/src/mesa/main/samplerobj.c b/src/mesa/main/samplerobj.c index a3aacc66aa3..32180fb1ba2 100644 --- a/src/mesa/main/samplerobj.c +++ b/src/mesa/main/samplerobj.c @@ -689,7 +689,7 @@ set_sampler_max_anisotropy(struct gl_context *ctx, if (samp->MaxAnisotropy == param) return GL_FALSE; - if (param < 1.0) + if (param < 1.0F) return INVALID_VALUE; flush(ctx); @@ -813,7 +813,7 @@ _mesa_SamplerParameteri(GLuint sampler, GLenum pname, GLint param) break; case INVALID_PNAME: _mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameteri(pname=%s)\n", - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); break; case INVALID_PARAM: _mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameteri(param=%d)\n", @@ -906,7 +906,7 @@ _mesa_SamplerParameterf(GLuint sampler, GLenum pname, GLfloat param) break; case INVALID_PNAME: _mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameterf(pname=%s)\n", - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); break; case INVALID_PARAM: _mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameterf(param=%f)\n", @@ -1006,7 +1006,7 @@ _mesa_SamplerParameteriv(GLuint sampler, GLenum pname, const GLint *params) break; case INVALID_PNAME: _mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameteriv(pname=%s)\n", - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); break; case INVALID_PARAM: _mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameteriv(param=%d)\n", @@ -1099,7 +1099,7 @@ _mesa_SamplerParameterfv(GLuint sampler, GLenum pname, const GLfloat *params) break; case INVALID_PNAME: _mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameterfv(pname=%s)\n", - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); break; case INVALID_PARAM: _mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameterfv(param=%f)\n", @@ -1184,7 +1184,7 @@ _mesa_SamplerParameterIiv(GLuint sampler, GLenum pname, const GLint *params) break; case INVALID_PNAME: _mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameterIiv(pname=%s)\n", - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); break; case INVALID_PARAM: _mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameterIiv(param=%d)\n", @@ -1270,7 +1270,7 @@ _mesa_SamplerParameterIuiv(GLuint sampler, GLenum pname, const GLuint *params) break; case INVALID_PNAME: _mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameterIuiv(pname=%s)\n", - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); break; case INVALID_PARAM: _mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameterIuiv(param=%u)\n", @@ -1380,7 +1380,7 @@ _mesa_GetSamplerParameteriv(GLuint sampler, GLenum pname, GLint *params) invalid_pname: _mesa_error(ctx, GL_INVALID_ENUM, "glGetSamplerParameteriv(pname=%s)", - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); } @@ -1466,7 +1466,7 @@ _mesa_GetSamplerParameterfv(GLuint sampler, GLenum pname, GLfloat *params) invalid_pname: _mesa_error(ctx, GL_INVALID_ENUM, "glGetSamplerParameterfv(pname=%s)", - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); } @@ -1545,7 +1545,7 @@ _mesa_GetSamplerParameterIiv(GLuint sampler, GLenum pname, GLint *params) invalid_pname: _mesa_error(ctx, GL_INVALID_ENUM, "glGetSamplerParameterIiv(pname=%s)", - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); } @@ -1624,7 +1624,7 @@ _mesa_GetSamplerParameterIuiv(GLuint sampler, GLenum pname, GLuint *params) invalid_pname: _mesa_error(ctx, GL_INVALID_ENUM, "glGetSamplerParameterIuiv(pname=%s)", - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); } diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index a6246a39aad..ee7320221e2 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -44,7 +44,8 @@ extern "C" { static GLint program_resource_location(struct gl_shader_program *shProg, - struct gl_program_resource *res, const char *name); + struct gl_program_resource *res, const char *name, + unsigned array_index); /** * Declare convenience functions to return resource data in a given type. @@ -61,6 +62,7 @@ DECL_RESOURCE_FUNC(UBO, gl_uniform_block); DECL_RESOURCE_FUNC(UNI, gl_uniform_storage); DECL_RESOURCE_FUNC(ATC, gl_active_atomic_buffer); DECL_RESOURCE_FUNC(XFB, gl_transform_feedback_varying_info); +DECL_RESOURCE_FUNC(SUB, gl_subroutine_function); void GLAPIENTRY _mesa_BindAttribLocation(GLhandleARB program, GLuint index, @@ -189,63 +191,6 @@ _mesa_GetActiveAttrib(GLhandleARB program, GLuint desired_index, (GLint *) type, "glGetActiveAttrib"); } -/* Locations associated with shader variables (array or non-array) can be - * queried using its base name or using the base name appended with the - * valid array index. For example, in case of below vertex shader, valid - * queries can be made to know the location of "xyz", "array", "array[0]", - * "array[1]", "array[2]" and "array[3]". In this example index reurned - * will be 0, 0, 0, 1, 2, 3 respectively. - * - * [Vertex Shader] - * layout(location=0) in vec4 xyz; - * layout(location=1) in vec4[4] array; - * void main() - * { } - * - * This requirement came up with the addition of ARB_program_interface_query - * to OpenGL 4.3 specification. See page 101 (page 122 of the PDF) for details. - * - * This utility function is used by: - * _mesa_GetAttribLocation - * _mesa_GetFragDataLocation - * _mesa_GetFragDataIndex - * - * Returns 0: - * if the 'name' string matches var->name. - * Returns 'matched index': - * if the 'name' string matches var->name appended with valid array index. - */ -int static inline -get_matching_index(const ir_variable *const var, const char *name) { - unsigned idx = 0; - const char *const paren = strchr(name, '['); - const unsigned len = (paren != NULL) ? paren - name : strlen(name); - - if (paren != NULL) { - if (!var->type->is_array()) - return -1; - - char *endptr; - idx = (unsigned) strtol(paren + 1, &endptr, 10); - const unsigned idx_len = endptr != (paren + 1) ? endptr - paren - 1 : 0; - - /* Validate the sub string representing index in 'name' string */ - if ((idx > 0 && paren[1] == '0') /* leading zeroes */ - || (idx == 0 && idx_len > 1) /* all zeroes */ - || paren[1] == ' ' /* whitespace */ - || endptr[0] != ']' /* closing brace */ - || endptr[1] != '\0' /* null char */ - || idx_len == 0 /* missing index */ - || idx >= var->type->length) /* exceeding array bound */ - return -1; - } - - if (strncmp(var->name, name, len) == 0 && var->name[len] == '\0') - return idx; - - return -1; -} - GLint GLAPIENTRY _mesa_GetAttribLocation(GLhandleARB program, const GLcharARB * name) { @@ -271,13 +216,15 @@ _mesa_GetAttribLocation(GLhandleARB program, const GLcharARB * name) if (shProg->_LinkedShaders[MESA_SHADER_VERTEX] == NULL) return -1; + unsigned array_index = 0; struct gl_program_resource *res = - _mesa_program_resource_find_name(shProg, GL_PROGRAM_INPUT, name); + _mesa_program_resource_find_name(shProg, GL_PROGRAM_INPUT, name, + &array_index); if (!res) return -1; - GLint loc = program_resource_location(shProg, res, name); + GLint loc = program_resource_location(shProg, res, name, array_index); /* The extra check against against 0 is made because of builtin-attribute * locations that have offset applied. Function program_resource_location @@ -455,13 +402,15 @@ _mesa_GetFragDataLocation(GLuint program, const GLchar *name) if (shProg->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL) return -1; + unsigned array_index = 0; struct gl_program_resource *res = - _mesa_program_resource_find_name(shProg, GL_PROGRAM_OUTPUT, name); + _mesa_program_resource_find_name(shProg, GL_PROGRAM_OUTPUT, name, + &array_index); if (!res) return -1; - GLint loc = program_resource_location(shProg, res, name); + GLint loc = program_resource_location(shProg, res, name, array_index); /* The extra check against against 0 is made because of builtin-attribute * locations that have offset applied. Function program_resource_location @@ -497,6 +446,20 @@ _mesa_program_resource_name(struct gl_program_resource *res) return RESOURCE_VAR(res)->name; case GL_UNIFORM: return RESOURCE_UNI(res)->name; + case GL_VERTEX_SUBROUTINE_UNIFORM: + case GL_GEOMETRY_SUBROUTINE_UNIFORM: + case GL_FRAGMENT_SUBROUTINE_UNIFORM: + case GL_COMPUTE_SUBROUTINE_UNIFORM: + case GL_TESS_CONTROL_SUBROUTINE_UNIFORM: + case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM: + return RESOURCE_UNI(res)->name + MESA_SUBROUTINE_PREFIX_LEN; + case GL_VERTEX_SUBROUTINE: + case GL_GEOMETRY_SUBROUTINE: + case GL_FRAGMENT_SUBROUTINE: + case GL_COMPUTE_SUBROUTINE: + case GL_TESS_CONTROL_SUBROUTINE: + case GL_TESS_EVALUATION_SUBROUTINE: + return RESOURCE_SUB(res)->name; default: assert(!"support for resource type not implemented"); } @@ -515,7 +478,19 @@ _mesa_program_resource_array_size(struct gl_program_resource *res) case GL_PROGRAM_OUTPUT: return RESOURCE_VAR(res)->data.max_array_access; case GL_UNIFORM: + case GL_VERTEX_SUBROUTINE_UNIFORM: + case GL_GEOMETRY_SUBROUTINE_UNIFORM: + case GL_FRAGMENT_SUBROUTINE_UNIFORM: + case GL_COMPUTE_SUBROUTINE_UNIFORM: + case GL_TESS_CONTROL_SUBROUTINE_UNIFORM: + case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM: return RESOURCE_UNI(res)->array_elements; + case GL_VERTEX_SUBROUTINE: + case GL_GEOMETRY_SUBROUTINE: + case GL_FRAGMENT_SUBROUTINE: + case GL_COMPUTE_SUBROUTINE: + case GL_TESS_CONTROL_SUBROUTINE: + case GL_TESS_EVALUATION_SUBROUTINE: case GL_ATOMIC_COUNTER_BUFFER: case GL_UNIFORM_BLOCK: return 0; @@ -525,39 +500,32 @@ _mesa_program_resource_array_size(struct gl_program_resource *res) return 0; } -static int -array_index_of_resource(struct gl_program_resource *res, - const char *name) +/** + * Checks if array subscript is valid and if so sets array_index. + */ +static bool +valid_array_index(const GLchar *name, unsigned *array_index) { - assert(res->Data); + long idx = 0; + const GLchar *out_base_name_end; - switch (res->Type) { - case GL_PROGRAM_INPUT: - case GL_PROGRAM_OUTPUT: - return get_matching_index(RESOURCE_VAR(res), name); - default: - assert(!"support for resource type not implemented"); - return -1; - } + idx = parse_program_resource_name(name, &out_base_name_end); + if (idx < 0) + return false; + + if (array_index) + *array_index = idx; + + return true; } /* Find a program resource with specific name in given interface. */ struct gl_program_resource * _mesa_program_resource_find_name(struct gl_shader_program *shProg, - GLenum programInterface, const char *name) + GLenum programInterface, const char *name, + unsigned *array_index) { - GET_CURRENT_CONTEXT(ctx); - const char *full_name = name; - - /* When context has 'VertexID_is_zero_based' set, gl_VertexID has been - * lowered to gl_VertexIDMESA. - */ - if (name && ctx->Const.VertexID_is_zero_based) { - if (strcmp(name, "gl_VertexID") == 0) - full_name = "gl_VertexIDMESA"; - } - struct gl_program_resource *res = shProg->ProgramResourceList; for (unsigned i = 0; i < shProg->NumProgramResourceList; i++, res++) { if (res->Type != programInterface) @@ -567,26 +535,46 @@ _mesa_program_resource_find_name(struct gl_shader_program *shProg, const char *rname = _mesa_program_resource_name(res); unsigned baselen = strlen(rname); - switch (programInterface) { - case GL_TRANSFORM_FEEDBACK_VARYING: - case GL_UNIFORM_BLOCK: - case GL_UNIFORM: - if (strncmp(rname, name, baselen) == 0) { + if (strncmp(rname, name, baselen) == 0) { + switch (programInterface) { + case GL_UNIFORM_BLOCK: /* Basename match, check if array or struct. */ if (name[baselen] == '\0' || name[baselen] == '[' || name[baselen] == '.') { return res; } + break; + case GL_TRANSFORM_FEEDBACK_VARYING: + case GL_UNIFORM: + case GL_VERTEX_SUBROUTINE_UNIFORM: + case GL_GEOMETRY_SUBROUTINE_UNIFORM: + case GL_FRAGMENT_SUBROUTINE_UNIFORM: + case GL_COMPUTE_SUBROUTINE_UNIFORM: + case GL_TESS_CONTROL_SUBROUTINE_UNIFORM: + case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM: + case GL_VERTEX_SUBROUTINE: + case GL_GEOMETRY_SUBROUTINE: + case GL_FRAGMENT_SUBROUTINE: + case GL_COMPUTE_SUBROUTINE: + case GL_TESS_CONTROL_SUBROUTINE: + case GL_TESS_EVALUATION_SUBROUTINE: + if (name[baselen] == '.') { + return res; + } + /* fall-through */ + case GL_PROGRAM_INPUT: + case GL_PROGRAM_OUTPUT: + if (name[baselen] == '\0') { + return res; + } else if (name[baselen] == '[' && + valid_array_index(name, array_index)) { + return res; + } + break; + default: + assert(!"not implemented for given interface"); } - break; - case GL_PROGRAM_INPUT: - case GL_PROGRAM_OUTPUT: - if (array_index_of_resource(res, full_name) >= 0) - return res; - break; - default: - assert(!"not implemented for given interface"); } } return NULL; @@ -651,6 +639,18 @@ _mesa_program_resource_find_index(struct gl_shader_program *shProg, case GL_PROGRAM_INPUT: case GL_PROGRAM_OUTPUT: case GL_UNIFORM: + case GL_VERTEX_SUBROUTINE_UNIFORM: + case GL_GEOMETRY_SUBROUTINE_UNIFORM: + case GL_FRAGMENT_SUBROUTINE_UNIFORM: + case GL_COMPUTE_SUBROUTINE_UNIFORM: + case GL_TESS_CONTROL_SUBROUTINE_UNIFORM: + case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM: + case GL_VERTEX_SUBROUTINE: + case GL_GEOMETRY_SUBROUTINE: + case GL_FRAGMENT_SUBROUTINE: + case GL_COMPUTE_SUBROUTINE: + case GL_TESS_CONTROL_SUBROUTINE: + case GL_TESS_EVALUATION_SUBROUTINE: if (++idx == (int) index) return res; break; @@ -719,6 +719,12 @@ _mesa_get_program_resource_name(struct gl_shader_program *shProg, bool add_index = !(((programInterface == GL_PROGRAM_INPUT) && res->StageReferences & (1 << MESA_SHADER_GEOMETRY))); + /* Transform feedback varyings have array index already appended + * in their names. + */ + if (programInterface == GL_TRANSFORM_FEEDBACK_VARYING) + add_index = false; + if (add_index && _mesa_program_resource_array_size(res)) { int i; @@ -736,17 +742,9 @@ _mesa_get_program_resource_name(struct gl_shader_program *shProg, static GLint program_resource_location(struct gl_shader_program *shProg, - struct gl_program_resource *res, const char *name) + struct gl_program_resource *res, const char *name, + unsigned array_index) { - unsigned index, offset; - int array_index = -1; - - if (res->Type == GL_PROGRAM_INPUT || res->Type == GL_PROGRAM_OUTPUT) { - array_index = array_index_of_resource(res, name); - if (array_index < 0) - return -1; - } - /* Built-in locations should report GL_INVALID_INDEX. */ if (is_gl_identifier(name)) return GL_INVALID_INDEX; @@ -757,13 +755,22 @@ program_resource_location(struct gl_shader_program *shProg, */ switch (res->Type) { case GL_PROGRAM_INPUT: + /* If the input is an array, fail if the index is out of bounds. */ + if (array_index > 0 + && array_index >= RESOURCE_VAR(res)->type->length) { + return -1; + } return RESOURCE_VAR(res)->data.location + array_index - VERT_ATTRIB_GENERIC0; case GL_PROGRAM_OUTPUT: + /* If the output is an array, fail if the index is out of bounds. */ + if (array_index > 0 + && array_index >= RESOURCE_VAR(res)->type->length) { + return -1; + } return RESOURCE_VAR(res)->data.location + array_index - FRAG_RESULT_DATA0; case GL_UNIFORM: - index = _mesa_get_uniform_location(shProg, name, &offset); - - if (index == GL_INVALID_INDEX) + /* If the uniform is built-in, fail. */ + if (RESOURCE_UNI(res)->builtin) return -1; /* From the GL_ARB_uniform_buffer_object spec: @@ -777,9 +784,21 @@ program_resource_location(struct gl_shader_program *shProg, RESOURCE_UNI(res)->atomic_buffer_index != -1) return -1; - /* location in remap table + array element offset */ - return RESOURCE_UNI(res)->remap_location + offset; + /* fallthrough */ + case GL_VERTEX_SUBROUTINE_UNIFORM: + case GL_GEOMETRY_SUBROUTINE_UNIFORM: + case GL_FRAGMENT_SUBROUTINE_UNIFORM: + case GL_COMPUTE_SUBROUTINE_UNIFORM: + case GL_TESS_CONTROL_SUBROUTINE_UNIFORM: + case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM: + /* If the uniform is an array, fail if the index is out of bounds. */ + if (array_index > 0 + && array_index >= RESOURCE_UNI(res)->array_elements) { + return -1; + } + /* location in remap table + array element offset */ + return RESOURCE_UNI(res)->remap_location + array_index; default: return -1; } @@ -787,22 +806,22 @@ program_resource_location(struct gl_shader_program *shProg, /** * Function implements following location queries: - * glGetAttribLocation - * glGetFragDataLocation * glGetUniformLocation */ GLint _mesa_program_resource_location(struct gl_shader_program *shProg, GLenum programInterface, const char *name) { + unsigned array_index = 0; struct gl_program_resource *res = - _mesa_program_resource_find_name(shProg, programInterface, name); + _mesa_program_resource_find_name(shProg, programInterface, name, + &array_index); /* Resource not found. */ if (!res) return -1; - return program_resource_location(shProg, res, name); + return program_resource_location(shProg, res, name, array_index); } /** @@ -814,7 +833,7 @@ _mesa_program_resource_location_index(struct gl_shader_program *shProg, GLenum programInterface, const char *name) { struct gl_program_resource *res = - _mesa_program_resource_find_name(shProg, programInterface, name); + _mesa_program_resource_find_name(shProg, programInterface, name, NULL); /* Non-existent variable or resource is not referenced by fragment stage. */ if (!res || !(res->StageReferences & (1 << MESA_SHADER_FRAGMENT))) @@ -829,6 +848,10 @@ stage_from_enum(GLenum ref) switch (ref) { case GL_REFERENCED_BY_VERTEX_SHADER: return MESA_SHADER_VERTEX; + case GL_REFERENCED_BY_TESS_CONTROL_SHADER: + return MESA_SHADER_TESS_CTRL; + case GL_REFERENCED_BY_TESS_EVALUATION_SHADER: + return MESA_SHADER_TESS_EVAL; case GL_REFERENCED_BY_GEOMETRY_SHADER: return MESA_SHADER_GEOMETRY; case GL_REFERENCED_BY_FRAGMENT_SHADER: @@ -886,7 +909,8 @@ get_buffer_property(struct gl_shader_program *shProg, for (unsigned i = 0; i < RESOURCE_UBO(res)->NumUniforms; i++) { const char *iname = RESOURCE_UBO(res)->Uniforms[i].IndexName; struct gl_program_resource *uni = - _mesa_program_resource_find_name(shProg, GL_UNIFORM, iname); + _mesa_program_resource_find_name(shProg, GL_UNIFORM, iname, + NULL); if (!uni) continue; (*val)++; @@ -896,7 +920,8 @@ get_buffer_property(struct gl_shader_program *shProg, for (unsigned i = 0; i < RESOURCE_UBO(res)->NumUniforms; i++) { const char *iname = RESOURCE_UBO(res)->Uniforms[i].IndexName; struct gl_program_resource *uni = - _mesa_program_resource_find_name(shProg, GL_UNIFORM, iname); + _mesa_program_resource_find_name(shProg, GL_UNIFORM, iname, + NULL); if (!uni) continue; *val++ = @@ -925,8 +950,8 @@ get_buffer_property(struct gl_shader_program *shProg, invalid_operation: _mesa_error(ctx, GL_INVALID_OPERATION, "%s(%s prop %s)", caller, - _mesa_lookup_enum_by_nr(res->Type), - _mesa_lookup_enum_by_nr(prop)); + _mesa_enum_to_string(res->Type), + _mesa_enum_to_string(prop)); return 0; } @@ -944,11 +969,17 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg, switch(prop) { case GL_NAME_LENGTH: - if (res->Type == GL_ATOMIC_COUNTER_BUFFER) + switch (res->Type) { + case GL_ATOMIC_COUNTER_BUFFER: goto invalid_operation; - /* Base name +3 if array '[0]' + terminator. */ - *val = strlen(_mesa_program_resource_name(res)) + - (_mesa_program_resource_array_size(res) > 0 ? 3 : 0) + 1; + case GL_TRANSFORM_FEEDBACK_VARYING: + *val = strlen(_mesa_program_resource_name(res)) + 1; + break; + default: + /* Base name +3 if array '[0]' + terminator. */ + *val = strlen(_mesa_program_resource_name(res)) + + (_mesa_program_resource_array_size(res) > 0 ? 3 : 0) + 1; + } return 1; case GL_TYPE: switch (res->Type) { @@ -1014,6 +1045,8 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg, goto invalid_enum; /* fallthrough */ case GL_REFERENCED_BY_VERTEX_SHADER: + case GL_REFERENCED_BY_TESS_CONTROL_SHADER: + case GL_REFERENCED_BY_TESS_EVALUATION_SHADER: case GL_REFERENCED_BY_GEOMETRY_SHADER: case GL_REFERENCED_BY_FRAGMENT_SHADER: switch (res->Type) { @@ -1034,7 +1067,8 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg, case GL_PROGRAM_INPUT: case GL_PROGRAM_OUTPUT: *val = program_resource_location(shProg, res, - _mesa_program_resource_name(res)); + _mesa_program_resource_name(res), + 0); return 1; default: goto invalid_operation; @@ -1045,10 +1079,54 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg, *val = RESOURCE_VAR(res)->data.index; return 1; + case GL_NUM_COMPATIBLE_SUBROUTINES: + if (res->Type != GL_VERTEX_SUBROUTINE_UNIFORM && + res->Type != GL_FRAGMENT_SUBROUTINE_UNIFORM && + res->Type != GL_GEOMETRY_SUBROUTINE_UNIFORM && + res->Type != GL_COMPUTE_SUBROUTINE_UNIFORM && + res->Type != GL_TESS_CONTROL_SUBROUTINE_UNIFORM && + res->Type != GL_TESS_EVALUATION_SUBROUTINE_UNIFORM) + goto invalid_operation; + *val = RESOURCE_UNI(res)->num_compatible_subroutines; + return 1; + case GL_COMPATIBLE_SUBROUTINES: { + const struct gl_uniform_storage *uni; + struct gl_shader *sh; + unsigned count, i; + int j; + + if (res->Type != GL_VERTEX_SUBROUTINE_UNIFORM && + res->Type != GL_FRAGMENT_SUBROUTINE_UNIFORM && + res->Type != GL_GEOMETRY_SUBROUTINE_UNIFORM && + res->Type != GL_COMPUTE_SUBROUTINE_UNIFORM && + res->Type != GL_TESS_CONTROL_SUBROUTINE_UNIFORM && + res->Type != GL_TESS_EVALUATION_SUBROUTINE_UNIFORM) + goto invalid_operation; + uni = RESOURCE_UNI(res); + + sh = shProg->_LinkedShaders[_mesa_shader_stage_from_subroutine_uniform(res->Type)]; + count = 0; + for (i = 0; i < sh->NumSubroutineFunctions; i++) { + struct gl_subroutine_function *fn = &sh->SubroutineFunctions[i]; + for (j = 0; j < fn->num_compat_types; j++) { + if (fn->types[j] == uni->type) { + val[count++] = i; + break; + } + } + } + return count; + } /* GL_ARB_tessellation_shader */ case GL_IS_PER_PATCH: - case GL_REFERENCED_BY_TESS_CONTROL_SHADER: - case GL_REFERENCED_BY_TESS_EVALUATION_SHADER: + switch (res->Type) { + case GL_PROGRAM_INPUT: + case GL_PROGRAM_OUTPUT: + *val = RESOURCE_VAR(res)->data.patch; + return 1; + default: + goto invalid_operation; + } default: goto invalid_enum; } @@ -1057,14 +1135,14 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg, invalid_enum: _mesa_error(ctx, GL_INVALID_ENUM, "%s(%s prop %s)", caller, - _mesa_lookup_enum_by_nr(res->Type), - _mesa_lookup_enum_by_nr(prop)); + _mesa_enum_to_string(res->Type), + _mesa_enum_to_string(prop)); return 0; invalid_operation: _mesa_error(ctx, GL_INVALID_OPERATION, "%s(%s prop %s)", caller, - _mesa_lookup_enum_by_nr(res->Type), - _mesa_lookup_enum_by_nr(prop)); + _mesa_enum_to_string(res->Type), + _mesa_enum_to_string(prop)); return 0; } @@ -1086,7 +1164,7 @@ _mesa_get_program_resourceiv(struct gl_shader_program *shProg, if (!res || bufSize < 0) { _mesa_error(ctx, GL_INVALID_VALUE, "glGetProgramResourceiv(%s index %d bufSize %d)", - _mesa_lookup_enum_by_nr(programInterface), index, bufSize); + _mesa_enum_to_string(programInterface), index, bufSize); return; } diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index a4296adf799..f9a7d130f9c 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -110,6 +110,7 @@ _mesa_init_shader_state(struct gl_context *ctx) */ struct gl_shader_compiler_options options; gl_shader_stage sh; + int i; memset(&options, 0, sizeof(options)); options.MaxUnrollIterations = 32; @@ -126,6 +127,12 @@ _mesa_init_shader_state(struct gl_context *ctx) /* Extended for ARB_separate_shader_objects */ ctx->Shader.RefCount = 1; mtx_init(&ctx->Shader.Mutex, mtx_plain); + + ctx->TessCtrlProgram.patch_vertices = 3; + for (i = 0; i < 4; ++i) + ctx->TessCtrlProgram.patch_default_outer_level[i] = 1.0; + for (i = 0; i < 2; ++i) + ctx->TessCtrlProgram.patch_default_inner_level[i] = 1.0; } @@ -199,6 +206,9 @@ _mesa_validate_shader_target(const struct gl_context *ctx, GLenum type) return ctx == NULL || ctx->Extensions.ARB_vertex_shader; case GL_GEOMETRY_SHADER_ARB: return ctx == NULL || _mesa_has_geometry_shaders(ctx); + case GL_TESS_CONTROL_SHADER: + case GL_TESS_EVALUATION_SHADER: + return ctx == NULL || _mesa_has_tessellation(ctx); case GL_COMPUTE_SHADER: return ctx == NULL || ctx->Extensions.ARB_compute_shader; default: @@ -415,6 +425,8 @@ detach_shader(struct gl_context *ctx, GLuint program, GLuint shader) /* sanity check - make sure the new list's entries are sensible */ for (j = 0; j < shProg->NumShaders; j++) { assert(shProg->Shaders[j]->Type == GL_VERTEX_SHADER || + shProg->Shaders[j]->Type == GL_TESS_CONTROL_SHADER || + shProg->Shaders[j]->Type == GL_TESS_EVALUATION_SHADER || shProg->Shaders[j]->Type == GL_GEOMETRY_SHADER || shProg->Shaders[j]->Type == GL_FRAGMENT_SHADER); assert(shProg->Shaders[j]->RefCount > 0); @@ -511,6 +523,57 @@ check_gs_query(struct gl_context *ctx, const struct gl_shader_program *shProg) /** + * Check if a tessellation control shader query is valid at this time. + * If not, report an error and return false. + * + * From GL 4.0 section 6.1.12 (Shader and Program Queries): + * + * "If TESS_CONTROL_OUTPUT_VERTICES is queried for a program which has + * not been linked successfully, or which does not contain objects to + * form a tessellation control shader, then an INVALID_OPERATION error is + * generated." + */ +static bool +check_tcs_query(struct gl_context *ctx, const struct gl_shader_program *shProg) +{ + if (shProg->LinkStatus && + shProg->_LinkedShaders[MESA_SHADER_TESS_CTRL] != NULL) { + return true; + } + + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGetProgramv(linked tessellation control shader required)"); + return false; +} + + +/** + * Check if a tessellation evaluation shader query is valid at this time. + * If not, report an error and return false. + * + * From GL 4.0 section 6.1.12 (Shader and Program Queries): + * + * "If any of the pname values in this paragraph are queried for a program + * which has not been linked successfully, or which does not contain + * objects to form a tessellation evaluation shader, then an + * INVALID_OPERATION error is generated." + * + */ +static bool +check_tes_query(struct gl_context *ctx, const struct gl_shader_program *shProg) +{ + if (shProg->LinkStatus && + shProg->_LinkedShaders[MESA_SHADER_TESS_EVAL] != NULL) { + return true; + } + + _mesa_error(ctx, GL_INVALID_OPERATION, "glGetProgramv(linked tessellation " + "evaluation shader required)"); + return false; +} + + +/** * glGetProgramiv() - get shader program state. * Note that this is for GLSL shader programs, not ARB vertex/fragment * programs (see glGetProgramivARB). @@ -533,6 +596,7 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname, * and GL 3.2) are available in this context */ const bool has_core_gs = _mesa_has_geometry_shaders(ctx); + const bool has_tess = _mesa_has_tessellation(ctx); /* Are uniform buffer objects available in this context? */ @@ -711,12 +775,44 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname, case GL_PROGRAM_SEPARABLE: *params = shProg->SeparateShader; return; + + /* ARB_tessellation_shader */ + case GL_TESS_CONTROL_OUTPUT_VERTICES: + if (!has_tess) + break; + if (check_tcs_query(ctx, shProg)) + *params = shProg->TessCtrl.VerticesOut; + return; + case GL_TESS_GEN_MODE: + if (!has_tess) + break; + if (check_tes_query(ctx, shProg)) + *params = shProg->TessEval.PrimitiveMode; + return; + case GL_TESS_GEN_SPACING: + if (!has_tess) + break; + if (check_tes_query(ctx, shProg)) + *params = shProg->TessEval.Spacing; + return; + case GL_TESS_GEN_VERTEX_ORDER: + if (!has_tess) + break; + if (check_tes_query(ctx, shProg)) + *params = shProg->TessEval.VertexOrder; + return; + case GL_TESS_GEN_POINT_MODE: + if (!has_tess) + break; + if (check_tes_query(ctx, shProg)) + *params = shProg->TessEval.PointMode; + return; default: break; } _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramiv(pname=%s)", - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); } @@ -992,6 +1088,12 @@ print_shader_info(const struct gl_shader_program *shProg) if (shProg->_LinkedShaders[MESA_SHADER_GEOMETRY]) printf(" geom prog %u\n", shProg->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program->Id); + if (shProg->_LinkedShaders[MESA_SHADER_TESS_CTRL]) + printf(" tesc prog %u\n", + shProg->_LinkedShaders[MESA_SHADER_TESS_CTRL]->Program->Id); + if (shProg->_LinkedShaders[MESA_SHADER_TESS_EVAL]) + printf(" tese prog %u\n", + shProg->_LinkedShaders[MESA_SHADER_TESS_EVAL]->Program->Id); } @@ -1037,11 +1139,9 @@ use_shader_program(struct gl_context *ctx, gl_shader_stage stage, */ switch (stage) { case MESA_SHADER_VERTEX: - /* Empty for now. */ - break; + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: case MESA_SHADER_GEOMETRY: - /* Empty for now. */ - break; case MESA_SHADER_COMPUTE: /* Empty for now. */ break; @@ -1071,6 +1171,7 @@ _mesa_use_program(struct gl_context *ctx, struct gl_shader_program *shProg) use_shader_program(ctx, i, shProg, &ctx->Shader); _mesa_active_program(ctx, shProg, "glUseProgram"); + _mesa_shader_program_init_subroutine_defaults(shProg); if (ctx->Driver.UseProgram) ctx->Driver.UseProgram(ctx, shProg); } @@ -1172,7 +1273,7 @@ _mesa_CreateShader(GLenum type) { GET_CURRENT_CONTEXT(ctx); if (MESA_VERBOSE & VERBOSE_API) - _mesa_debug(ctx, "glCreateShader %s\n", _mesa_lookup_enum_by_nr(type)); + _mesa_debug(ctx, "glCreateShader %s\n", _mesa_enum_to_string(type)); return create_shader(ctx, type); } @@ -1331,7 +1432,7 @@ void GLAPIENTRY _mesa_GetObjectParameterfvARB(GLhandleARB object, GLenum pname, GLfloat *params) { - GLint iparams[1]; /* XXX is one element enough? */ + GLint iparams[1] = {0}; /* XXX is one element enough? */ _mesa_GetObjectParameterivARB(object, pname, iparams); params[0] = (GLfloat) iparams[0]; } @@ -1460,7 +1561,7 @@ read_shader(const char *fname) */ void GLAPIENTRY _mesa_ShaderSource(GLhandleARB shaderObj, GLsizei count, - const GLcharARB * const * string, const GLint * length) + const GLcharARB * const * string, const GLint * length) { GET_CURRENT_CONTEXT(ctx); GLint *offsets; @@ -1692,12 +1793,23 @@ _mesa_ShaderBinary(GLint n, const GLuint* shaders, GLenum binaryformat, const void* binary, GLint length) { GET_CURRENT_CONTEXT(ctx); - (void) n; (void) shaders; (void) binaryformat; (void) binary; - (void) length; - _mesa_error(ctx, GL_INVALID_OPERATION, "glShaderBinary"); + + /* Page 68, section 7.2 'Shader Binaries" of the of the OpenGL ES 3.1, and + * page 88 of the OpenGL 4.5 specs state: + * + * "An INVALID_VALUE error is generated if count or length is negative. + * An INVALID_ENUM error is generated if binaryformat is not a supported + * format returned in SHADER_BINARY_FORMATS." + */ + if (n < 0 || length < 0) { + _mesa_error(ctx, GL_INVALID_VALUE, "glShaderBinary(count or length < 0)"); + return; + } + + _mesa_error(ctx, GL_INVALID_ENUM, "glShaderBinary(format)"); } @@ -1857,7 +1969,7 @@ _mesa_ProgramParameteri(GLuint program, GLenum pname, GLint value) default: _mesa_error(ctx, GL_INVALID_ENUM, "glProgramParameteri(pname=%s)", - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); return; } @@ -1865,7 +1977,7 @@ invalid_value: _mesa_error(ctx, GL_INVALID_VALUE, "glProgramParameteri(pname=%s, value=%d): " "value must be 0 or 1.", - _mesa_lookup_enum_by_nr(pname), + _mesa_enum_to_string(pname), value); } @@ -1885,7 +1997,8 @@ _mesa_use_shader_program(struct gl_context *ctx, GLenum type, static GLuint _mesa_create_shader_program(struct gl_context* ctx, GLboolean separate, - GLenum type, GLsizei count, const GLchar* const *strings) + GLenum type, GLsizei count, + const GLchar* const *strings) { const GLuint shader = create_shader(ctx, type); GLuint program = 0; @@ -1920,8 +2033,8 @@ _mesa_create_shader_program(struct gl_context* ctx, GLboolean separate, } #endif } - - ralloc_strcat(&shProg->InfoLog, sh->InfoLog); + if (sh->InfoLog) + ralloc_strcat(&shProg->InfoLog, sh->InfoLog); } delete_shader(ctx, shader); @@ -1944,6 +2057,22 @@ _mesa_copy_linked_program_data(gl_shader_stage type, case MESA_SHADER_VERTEX: dst->UsesClipDistanceOut = src->Vert.UsesClipDistance; break; + case MESA_SHADER_TESS_CTRL: { + struct gl_tess_ctrl_program *dst_tcp = + (struct gl_tess_ctrl_program *) dst; + dst_tcp->VerticesOut = src->TessCtrl.VerticesOut; + break; + } + case MESA_SHADER_TESS_EVAL: { + struct gl_tess_eval_program *dst_tep = + (struct gl_tess_eval_program *) dst; + dst_tep->PrimitiveMode = src->TessEval.PrimitiveMode; + dst_tep->Spacing = src->TessEval.Spacing; + dst_tep->VertexOrder = src->TessEval.VertexOrder; + dst_tep->PointMode = src->TessEval.PointMode; + dst->UsesClipDistanceOut = src->TessEval.UsesClipDistance; + break; + } case MESA_SHADER_GEOMETRY: { struct gl_geometry_program *dst_gp = (struct gl_geometry_program *) dst; dst_gp->VerticesIn = src->Geom.VerticesIn; @@ -1954,20 +2083,20 @@ _mesa_copy_linked_program_data(gl_shader_stage type, dst->UsesClipDistanceOut = src->Geom.UsesClipDistance; dst_gp->UsesEndPrimitive = src->Geom.UsesEndPrimitive; dst_gp->UsesStreams = src->Geom.UsesStreams; - } break; + } case MESA_SHADER_FRAGMENT: { struct gl_fragment_program *dst_fp = (struct gl_fragment_program *) dst; dst_fp->FragDepthLayout = src->FragDepthLayout; - } break; + } case MESA_SHADER_COMPUTE: { struct gl_compute_program *dst_cp = (struct gl_compute_program *) dst; int i; for (i = 0; i < 3; i++) dst_cp->LocalSize[i] = src->Comp.LocalSize[i]; - } break; + } default: break; } @@ -1984,3 +2113,568 @@ _mesa_CreateShaderProgramv(GLenum type, GLsizei count, return _mesa_create_shader_program(ctx, GL_TRUE, type, count, strings); } + + +/** + * For GL_ARB_tessellation_shader + */ +extern void GLAPIENTRY +_mesa_PatchParameteri(GLenum pname, GLint value) +{ + GET_CURRENT_CONTEXT(ctx); + + if (!_mesa_has_tessellation(ctx)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "glPatchParameteri"); + return; + } + + if (pname != GL_PATCH_VERTICES) { + _mesa_error(ctx, GL_INVALID_ENUM, "glPatchParameteri"); + return; + } + + if (value <= 0 || value > ctx->Const.MaxPatchVertices) { + _mesa_error(ctx, GL_INVALID_VALUE, "glPatchParameteri"); + return; + } + + ctx->TessCtrlProgram.patch_vertices = value; +} + + +extern void GLAPIENTRY +_mesa_PatchParameterfv(GLenum pname, const GLfloat *values) +{ + GET_CURRENT_CONTEXT(ctx); + + if (!_mesa_has_tessellation(ctx)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "glPatchParameterfv"); + return; + } + + switch(pname) { + case GL_PATCH_DEFAULT_OUTER_LEVEL: + FLUSH_VERTICES(ctx, 0); + memcpy(ctx->TessCtrlProgram.patch_default_outer_level, values, + 4 * sizeof(GLfloat)); + ctx->NewDriverState |= ctx->DriverFlags.NewDefaultTessLevels; + return; + case GL_PATCH_DEFAULT_INNER_LEVEL: + FLUSH_VERTICES(ctx, 0); + memcpy(ctx->TessCtrlProgram.patch_default_inner_level, values, + 2 * sizeof(GLfloat)); + ctx->NewDriverState |= ctx->DriverFlags.NewDefaultTessLevels; + return; + default: + _mesa_error(ctx, GL_INVALID_ENUM, "glPatchParameterfv"); + return; + } +} + +/** + * ARB_shader_subroutine + */ +GLint GLAPIENTRY +_mesa_GetSubroutineUniformLocation(GLuint program, GLenum shadertype, + const GLchar *name) +{ + GET_CURRENT_CONTEXT(ctx); + const char *api_name = "glGetSubroutineUniformLocation"; + struct gl_shader_program *shProg; + GLenum resource_type; + gl_shader_stage stage; + + if (!_mesa_has_shader_subroutine(ctx)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return -1; + } + + if (!_mesa_validate_shader_target(ctx, shadertype)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return -1; + } + + shProg = _mesa_lookup_shader_program_err(ctx, program, api_name); + if (!shProg) + return -1; + + stage = _mesa_shader_enum_to_shader_stage(shadertype); + if (!shProg->_LinkedShaders[stage]) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return -1; + } + + resource_type = _mesa_shader_stage_to_subroutine_uniform(stage); + return _mesa_program_resource_location(shProg, resource_type, name); +} + +GLuint GLAPIENTRY +_mesa_GetSubroutineIndex(GLuint program, GLenum shadertype, + const GLchar *name) +{ + GET_CURRENT_CONTEXT(ctx); + const char *api_name = "glGetSubroutineIndex"; + struct gl_shader_program *shProg; + struct gl_program_resource *res; + GLenum resource_type; + gl_shader_stage stage; + + if (!_mesa_has_shader_subroutine(ctx)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return -1; + } + + if (!_mesa_validate_shader_target(ctx, shadertype)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return -1; + } + + shProg = _mesa_lookup_shader_program_err(ctx, program, api_name); + if (!shProg) + return -1; + + stage = _mesa_shader_enum_to_shader_stage(shadertype); + if (!shProg->_LinkedShaders[stage]) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return -1; + } + + resource_type = _mesa_shader_stage_to_subroutine(stage); + res = _mesa_program_resource_find_name(shProg, resource_type, name, NULL); + if (!res) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return -1; + } + + return _mesa_program_resource_index(shProg, res); +} + + +GLvoid GLAPIENTRY +_mesa_GetActiveSubroutineUniformiv(GLuint program, GLenum shadertype, + GLuint index, GLenum pname, GLint *values) +{ + GET_CURRENT_CONTEXT(ctx); + const char *api_name = "glGetActiveSubroutineUniformiv"; + struct gl_shader_program *shProg; + struct gl_shader *sh; + gl_shader_stage stage; + struct gl_program_resource *res; + const struct gl_uniform_storage *uni; + GLenum resource_type; + int count, i, j; + + if (!_mesa_has_shader_subroutine(ctx)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return; + } + + if (!_mesa_validate_shader_target(ctx, shadertype)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return; + } + + shProg = _mesa_lookup_shader_program_err(ctx, program, api_name); + if (!shProg) + return; + + stage = _mesa_shader_enum_to_shader_stage(shadertype); + resource_type = _mesa_shader_stage_to_subroutine_uniform(stage); + + sh = shProg->_LinkedShaders[stage]; + if (!sh) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return; + } + + switch (pname) { + case GL_NUM_COMPATIBLE_SUBROUTINES: { + res = _mesa_program_resource_find_index(shProg, resource_type, index); + if (res) { + uni = res->Data; + values[0] = uni->num_compatible_subroutines; + } + break; + } + case GL_COMPATIBLE_SUBROUTINES: { + res = _mesa_program_resource_find_index(shProg, resource_type, index); + if (res) { + uni = res->Data; + count = 0; + for (i = 0; i < sh->NumSubroutineFunctions; i++) { + struct gl_subroutine_function *fn = &sh->SubroutineFunctions[i]; + for (j = 0; j < fn->num_compat_types; j++) { + if (fn->types[j] == uni->type) { + values[count++] = i; + break; + } + } + } + } + break; + } + case GL_UNIFORM_SIZE: + res = _mesa_program_resource_find_index(shProg, resource_type, index); + if (res) { + uni = res->Data; + values[0] = uni->array_elements ? uni->array_elements : 1; + } + break; + case GL_UNIFORM_NAME_LENGTH: + res = _mesa_program_resource_find_index(shProg, resource_type, index); + if (res) { + values[0] = strlen(_mesa_program_resource_name(res)) + 1 + + ((_mesa_program_resource_array_size(res) != 0) ? 3 : 0);; + } + break; + default: + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return; + } +} + + +GLvoid GLAPIENTRY +_mesa_GetActiveSubroutineUniformName(GLuint program, GLenum shadertype, + GLuint index, GLsizei bufsize, + GLsizei *length, GLchar *name) +{ + GET_CURRENT_CONTEXT(ctx); + const char *api_name = "glGetActiveSubroutineUniformName"; + struct gl_shader_program *shProg; + GLenum resource_type; + gl_shader_stage stage; + + if (!_mesa_has_shader_subroutine(ctx)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return; + } + + if (!_mesa_validate_shader_target(ctx, shadertype)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return; + } + + shProg = _mesa_lookup_shader_program_err(ctx, program, api_name); + if (!shProg) + return; + + stage = _mesa_shader_enum_to_shader_stage(shadertype); + if (!shProg->_LinkedShaders[stage]) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return; + } + + resource_type = _mesa_shader_stage_to_subroutine_uniform(stage); + /* get program resource name */ + _mesa_get_program_resource_name(shProg, resource_type, + index, bufsize, + length, name, api_name); +} + + +GLvoid GLAPIENTRY +_mesa_GetActiveSubroutineName(GLuint program, GLenum shadertype, + GLuint index, GLsizei bufsize, + GLsizei *length, GLchar *name) +{ + GET_CURRENT_CONTEXT(ctx); + const char *api_name = "glGetActiveSubroutineName"; + struct gl_shader_program *shProg; + GLenum resource_type; + gl_shader_stage stage; + + if (!_mesa_has_shader_subroutine(ctx)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return; + } + + if (!_mesa_validate_shader_target(ctx, shadertype)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return; + } + + shProg = _mesa_lookup_shader_program_err(ctx, program, api_name); + if (!shProg) + return; + + stage = _mesa_shader_enum_to_shader_stage(shadertype); + if (!shProg->_LinkedShaders[stage]) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return; + } + resource_type = _mesa_shader_stage_to_subroutine(stage); + _mesa_get_program_resource_name(shProg, resource_type, + index, bufsize, + length, name, api_name); +} + + +GLvoid GLAPIENTRY +_mesa_UniformSubroutinesuiv(GLenum shadertype, GLsizei count, + const GLuint *indices) +{ + GET_CURRENT_CONTEXT(ctx); + const char *api_name = "glUniformSubroutinesuiv"; + struct gl_shader_program *shProg; + struct gl_shader *sh; + gl_shader_stage stage; + int i; + + if (!_mesa_has_shader_subroutine(ctx)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return; + } + + if (!_mesa_validate_shader_target(ctx, shadertype)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return; + } + + stage = _mesa_shader_enum_to_shader_stage(shadertype); + shProg = ctx->_Shader->CurrentProgram[stage]; + if (!shProg) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return; + } + + sh = shProg->_LinkedShaders[stage]; + if (!sh) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return; + } + + if (count != sh->NumSubroutineUniformRemapTable) { + _mesa_error(ctx, GL_INVALID_VALUE, "%s", api_name); + return; + } + + i = 0; + do { + struct gl_uniform_storage *uni = sh->SubroutineUniformRemapTable[i]; + int uni_count = uni->array_elements ? uni->array_elements : 1; + int j, k; + + for (j = i; j < i + uni_count; j++) { + struct gl_subroutine_function *subfn; + if (indices[j] >= sh->NumSubroutineFunctions) { + _mesa_error(ctx, GL_INVALID_VALUE, "%s", api_name); + return; + } + + subfn = &sh->SubroutineFunctions[indices[j]]; + for (k = 0; k < subfn->num_compat_types; k++) { + if (subfn->types[k] == uni->type) + break; + } + if (k == subfn->num_compat_types) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return; + } + } + i += uni_count; + } while(i < count); + + FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS); + i = 0; + do { + struct gl_uniform_storage *uni = sh->SubroutineUniformRemapTable[i]; + int uni_count = uni->array_elements ? uni->array_elements : 1; + + memcpy(&uni->storage[0], &indices[i], + sizeof(GLuint) * uni_count); + + uni->initialized = true; + _mesa_propagate_uniforms_to_driver_storage(uni, 0, uni_count); + i += uni_count; + } while(i < count); +} + + +GLvoid GLAPIENTRY +_mesa_GetUniformSubroutineuiv(GLenum shadertype, GLint location, + GLuint *params) +{ + GET_CURRENT_CONTEXT(ctx); + const char *api_name = "glGetUniformSubroutineuiv"; + struct gl_shader_program *shProg; + struct gl_shader *sh; + gl_shader_stage stage; + + if (!_mesa_has_shader_subroutine(ctx)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return; + } + + if (!_mesa_validate_shader_target(ctx, shadertype)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return; + } + + stage = _mesa_shader_enum_to_shader_stage(shadertype); + shProg = ctx->_Shader->CurrentProgram[stage]; + if (!shProg) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return; + } + + sh = shProg->_LinkedShaders[stage]; + if (!sh) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return; + } + + if (location >= sh->NumSubroutineUniformRemapTable) { + _mesa_error(ctx, GL_INVALID_VALUE, "%s", api_name); + return; + } + + { + struct gl_uniform_storage *uni = sh->SubroutineUniformRemapTable[location]; + int offset = location - uni->subroutine[stage].index; + memcpy(params, &uni->storage[offset], + sizeof(GLuint)); + } +} + + +GLvoid GLAPIENTRY +_mesa_GetProgramStageiv(GLuint program, GLenum shadertype, + GLenum pname, GLint *values) +{ + GET_CURRENT_CONTEXT(ctx); + const char *api_name = "glGetProgramStageiv"; + struct gl_shader_program *shProg; + struct gl_shader *sh; + gl_shader_stage stage; + + if (!_mesa_has_shader_subroutine(ctx)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return; + } + + if (!_mesa_validate_shader_target(ctx, shadertype)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return; + } + + shProg = _mesa_lookup_shader_program_err(ctx, program, api_name); + if (!shProg) + return; + + stage = _mesa_shader_enum_to_shader_stage(shadertype); + sh = shProg->_LinkedShaders[stage]; + if (!sh) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name); + return; + } + + switch (pname) { + case GL_ACTIVE_SUBROUTINES: + values[0] = sh->NumSubroutineFunctions; + break; + case GL_ACTIVE_SUBROUTINE_UNIFORM_LOCATIONS: + values[0] = sh->NumSubroutineUniformRemapTable; + break; + case GL_ACTIVE_SUBROUTINE_UNIFORMS: + values[0] = sh->NumSubroutineUniformTypes; + break; + case GL_ACTIVE_SUBROUTINE_MAX_LENGTH: + { + unsigned i; + GLint max_len = 0; + GLenum resource_type; + struct gl_program_resource *res; + + resource_type = _mesa_shader_stage_to_subroutine(stage); + for (i = 0; i < sh->NumSubroutineFunctions; i++) { + res = _mesa_program_resource_find_index(shProg, resource_type, i); + if (res) { + const GLint len = strlen(_mesa_program_resource_name(res)) + 1; + if (len > max_len) + max_len = len; + } + } + values[0] = max_len; + break; + } + case GL_ACTIVE_SUBROUTINE_UNIFORM_MAX_LENGTH: + { + unsigned i; + GLint max_len = 0; + GLenum resource_type; + struct gl_program_resource *res; + + resource_type = _mesa_shader_stage_to_subroutine_uniform(stage); + for (i = 0; i < sh->NumSubroutineUniformRemapTable; i++) { + res = _mesa_program_resource_find_index(shProg, resource_type, i); + if (res) { + const GLint len = strlen(_mesa_program_resource_name(res)) + 1 + + ((_mesa_program_resource_array_size(res) != 0) ? 3 : 0); + + if (len > max_len) + max_len = len; + } + } + values[0] = max_len; + break; + } + default: + _mesa_error(ctx, GL_INVALID_ENUM, "%s", api_name); + values[0] = -1; + break; + } +} + +static int +find_compat_subroutine(struct gl_shader *sh, const struct glsl_type *type) +{ + int i, j; + + for (i = 0; i < sh->NumSubroutineFunctions; i++) { + struct gl_subroutine_function *fn = &sh->SubroutineFunctions[i]; + for (j = 0; j < fn->num_compat_types; j++) { + if (fn->types[j] == type) + return i; + } + } + return 0; +} + +static void +_mesa_shader_init_subroutine_defaults(struct gl_shader *sh) +{ + int i, j; + + for (i = 0; i < sh->NumSubroutineUniformRemapTable; i++) { + struct gl_uniform_storage *uni = sh->SubroutineUniformRemapTable[i]; + int uni_count; + int val; + + if (!uni) + continue; + uni_count = uni->array_elements ? uni->array_elements : 1; + val = find_compat_subroutine(sh, uni->type); + + for (j = 0; j < uni_count; j++) + memcpy(&uni->storage[j], &val, sizeof(int)); + uni->initialized = true; + _mesa_propagate_uniforms_to_driver_storage(uni, 0, uni_count); + } +} + +void +_mesa_shader_program_init_subroutine_defaults(struct gl_shader_program *shProg) +{ + int i; + + if (!shProg) + return; + + for (i = 0; i < MESA_SHADER_STAGES; i++) { + if (!shProg->_LinkedShaders[i]) + continue; + + _mesa_shader_init_subroutine_defaults(shProg->_LinkedShaders[i]); + } +} diff --git a/src/mesa/main/shaderapi.h b/src/mesa/main/shaderapi.h index aba6d5d8306..0a10191684f 100644 --- a/src/mesa/main/shaderapi.h +++ b/src/mesa/main/shaderapi.h @@ -232,7 +232,8 @@ _mesa_program_resource_index(struct gl_shader_program *shProg, extern struct gl_program_resource * _mesa_program_resource_find_name(struct gl_shader_program *shProg, - GLenum programInterface, const char *name); + GLenum programInterface, const char *name, + unsigned *array_index); extern struct gl_program_resource * _mesa_program_resource_find_index(struct gl_shader_program *shProg, @@ -264,6 +265,51 @@ _mesa_get_program_resourceiv(struct gl_shader_program *shProg, GLsizei bufSize, GLsizei *length, GLint *params); +/* GL_ARB_tessellation_shader */ +extern void GLAPIENTRY +_mesa_PatchParameteri(GLenum pname, GLint value); + +extern void GLAPIENTRY +_mesa_PatchParameterfv(GLenum pname, const GLfloat *values); + +/* GL_ARB_shader_subroutine */ +void +_mesa_shader_program_init_subroutine_defaults(struct gl_shader_program *shProg); + +extern GLint GLAPIENTRY +_mesa_GetSubroutineUniformLocation(GLuint program, GLenum shadertype, + const GLchar *name); + +extern GLuint GLAPIENTRY +_mesa_GetSubroutineIndex(GLuint program, GLenum shadertype, + const GLchar *name); + +extern GLvoid GLAPIENTRY +_mesa_GetActiveSubroutineUniformiv(GLuint program, GLenum shadertype, + GLuint index, GLenum pname, GLint *values); + +extern GLvoid GLAPIENTRY +_mesa_GetActiveSubroutineUniformName(GLuint program, GLenum shadertype, + GLuint index, GLsizei bufsize, + GLsizei *length, GLchar *name); + +extern GLvoid GLAPIENTRY +_mesa_GetActiveSubroutineName(GLuint program, GLenum shadertype, + GLuint index, GLsizei bufsize, + GLsizei *length, GLchar *name); + +extern GLvoid GLAPIENTRY +_mesa_UniformSubroutinesuiv(GLenum shadertype, GLsizei count, + const GLuint *indices); + +extern GLvoid GLAPIENTRY +_mesa_GetUniformSubroutineuiv(GLenum shadertype, GLint location, + GLuint *params); + +extern GLvoid GLAPIENTRY +_mesa_GetProgramStageiv(GLuint program, GLenum shadertype, + GLenum pname, GLint *values); + #ifdef __cplusplus } #endif diff --git a/src/mesa/main/shaderimage.c b/src/mesa/main/shaderimage.c index 80b77275f93..a348cdb0405 100644 --- a/src/mesa/main/shaderimage.c +++ b/src/mesa/main/shaderimage.c @@ -610,7 +610,7 @@ _mesa_BindImageTextures(GLuint first, GLsizei count, const GLuint *textures) "glBindImageTextures(the internal format %s of " "the level zero texture image of textures[%d]=%u " "is not supported)", - _mesa_lookup_enum_by_nr(tex_format), + _mesa_enum_to_string(tex_format), i, texture); continue; } diff --git a/src/mesa/main/shaderobj.h b/src/mesa/main/shaderobj.h index 3d696a1887e..943044e37cd 100644 --- a/src/mesa/main/shaderobj.h +++ b/src/mesa/main/shaderobj.h @@ -111,6 +111,10 @@ _mesa_shader_enum_to_shader_stage(GLenum v) return MESA_SHADER_FRAGMENT; case GL_GEOMETRY_SHADER: return MESA_SHADER_GEOMETRY; + case GL_TESS_CONTROL_SHADER: + return MESA_SHADER_TESS_CTRL; + case GL_TESS_EVALUATION_SHADER: + return MESA_SHADER_TESS_EVAL; case GL_COMPUTE_SHADER: return MESA_SHADER_COMPUTE; default: @@ -119,6 +123,107 @@ _mesa_shader_enum_to_shader_stage(GLenum v) } } +/* 8 bytes + another underscore */ +#define MESA_SUBROUTINE_PREFIX_LEN 9 +static inline const char * +_mesa_shader_stage_to_subroutine_prefix(gl_shader_stage stage) +{ + switch (stage) { + case MESA_SHADER_VERTEX: + return "__subu_v"; + case MESA_SHADER_GEOMETRY: + return "__subu_g"; + case MESA_SHADER_FRAGMENT: + return "__subu_f"; + case MESA_SHADER_COMPUTE: + return "__subu_c"; + case MESA_SHADER_TESS_CTRL: + return "__subu_t"; + case MESA_SHADER_TESS_EVAL: + return "__subu_e"; + default: + return NULL; + } +} + +static inline gl_shader_stage +_mesa_shader_stage_from_subroutine_uniform(GLenum subuniform) +{ + switch (subuniform) { + default: + case GL_VERTEX_SUBROUTINE_UNIFORM: + return MESA_SHADER_VERTEX; + case GL_GEOMETRY_SUBROUTINE_UNIFORM: + return MESA_SHADER_GEOMETRY; + case GL_FRAGMENT_SUBROUTINE_UNIFORM: + return MESA_SHADER_FRAGMENT; + case GL_COMPUTE_SUBROUTINE_UNIFORM: + return MESA_SHADER_COMPUTE; + case GL_TESS_CONTROL_SUBROUTINE_UNIFORM: + return MESA_SHADER_TESS_CTRL; + case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM: + return MESA_SHADER_TESS_EVAL; + } +} + +static inline gl_shader_stage +_mesa_shader_stage_from_subroutine(GLenum subroutine) +{ + switch (subroutine) { + case GL_VERTEX_SUBROUTINE: + return MESA_SHADER_VERTEX; + case GL_GEOMETRY_SUBROUTINE: + return MESA_SHADER_GEOMETRY; + case GL_FRAGMENT_SUBROUTINE: + return MESA_SHADER_FRAGMENT; + case GL_COMPUTE_SUBROUTINE: + return MESA_SHADER_COMPUTE; + case GL_TESS_CONTROL_SUBROUTINE: + return MESA_SHADER_TESS_CTRL; + case GL_TESS_EVALUATION_SUBROUTINE: + return MESA_SHADER_TESS_EVAL; + } +} + +static inline GLenum +_mesa_shader_stage_to_subroutine(gl_shader_stage stage) +{ + switch (stage) { + default: + case MESA_SHADER_VERTEX: + return GL_VERTEX_SUBROUTINE; + case MESA_SHADER_GEOMETRY: + return GL_GEOMETRY_SUBROUTINE; + case MESA_SHADER_FRAGMENT: + return GL_FRAGMENT_SUBROUTINE; + case MESA_SHADER_COMPUTE: + return GL_COMPUTE_SUBROUTINE; + case MESA_SHADER_TESS_CTRL: + return GL_TESS_CONTROL_SUBROUTINE; + case MESA_SHADER_TESS_EVAL: + return GL_TESS_EVALUATION_SUBROUTINE; + } +} + +static inline GLenum +_mesa_shader_stage_to_subroutine_uniform(gl_shader_stage stage) +{ + switch (stage) { + default: + case MESA_SHADER_VERTEX: + return GL_VERTEX_SUBROUTINE_UNIFORM; + case MESA_SHADER_GEOMETRY: + return GL_GEOMETRY_SUBROUTINE_UNIFORM; + case MESA_SHADER_FRAGMENT: + return GL_FRAGMENT_SUBROUTINE_UNIFORM; + case MESA_SHADER_COMPUTE: + return GL_COMPUTE_SUBROUTINE_UNIFORM; + case MESA_SHADER_TESS_CTRL: + return GL_TESS_CONTROL_SUBROUTINE_UNIFORM; + case MESA_SHADER_TESS_EVAL: + return GL_TESS_EVALUATION_SUBROUTINE_UNIFORM; + } +} #ifdef __cplusplus } diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c index bede7fe1d0e..d3b1c72b08d 100644 --- a/src/mesa/main/state.c +++ b/src/mesa/main/state.c @@ -79,8 +79,8 @@ update_program_enables(struct gl_context *ctx) /** - * Update the ctx->Vertex/Geometry/FragmentProgram._Current pointers to point - * to the current/active programs. Then call ctx->Driver.BindProgram() to + * Update the ctx->*Program._Current pointers to point to the + * current/active programs. Then call ctx->Driver.BindProgram() to * tell the driver which programs to use. * * Programs may come from 3 sources: GLSL shaders, ARB/NV_vertex/fragment @@ -97,6 +97,10 @@ update_program(struct gl_context *ctx) { const struct gl_shader_program *vsProg = ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]; + const struct gl_shader_program *tcsProg = + ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL]; + const struct gl_shader_program *tesProg = + ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL]; const struct gl_shader_program *gsProg = ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]; struct gl_shader_program *fsProg = @@ -106,6 +110,8 @@ update_program(struct gl_context *ctx) const struct gl_vertex_program *prevVP = ctx->VertexProgram._Current; const struct gl_fragment_program *prevFP = ctx->FragmentProgram._Current; const struct gl_geometry_program *prevGP = ctx->GeometryProgram._Current; + const struct gl_tess_ctrl_program *prevTCP = ctx->TessCtrlProgram._Current; + const struct gl_tess_eval_program *prevTEP = ctx->TessEvalProgram._Current; const struct gl_compute_program *prevCP = ctx->ComputeProgram._Current; GLbitfield new_state = 0x0; @@ -175,6 +181,30 @@ update_program(struct gl_context *ctx) _mesa_reference_geomprog(ctx, &ctx->GeometryProgram._Current, NULL); } + if (tesProg && tesProg->LinkStatus + && tesProg->_LinkedShaders[MESA_SHADER_TESS_EVAL]) { + /* Use GLSL tessellation evaluation shader */ + _mesa_reference_tesseprog(ctx, &ctx->TessEvalProgram._Current, + gl_tess_eval_program( + tesProg->_LinkedShaders[MESA_SHADER_TESS_EVAL]->Program)); + } + else { + /* No tessellation evaluation program */ + _mesa_reference_tesseprog(ctx, &ctx->TessEvalProgram._Current, NULL); + } + + if (tcsProg && tcsProg->LinkStatus + && tcsProg->_LinkedShaders[MESA_SHADER_TESS_CTRL]) { + /* Use GLSL tessellation control shader */ + _mesa_reference_tesscprog(ctx, &ctx->TessCtrlProgram._Current, + gl_tess_ctrl_program( + tcsProg->_LinkedShaders[MESA_SHADER_TESS_CTRL]->Program)); + } + else { + /* No tessellation control program */ + _mesa_reference_tesscprog(ctx, &ctx->TessCtrlProgram._Current, NULL); + } + /* Examine vertex program after fragment program as * _mesa_get_fixed_func_vertex_program() needs to know active * fragprog inputs. @@ -230,6 +260,22 @@ update_program(struct gl_context *ctx) } } + if (ctx->TessEvalProgram._Current != prevTEP) { + new_state |= _NEW_PROGRAM; + if (ctx->Driver.BindProgram) { + ctx->Driver.BindProgram(ctx, GL_TESS_EVALUATION_PROGRAM_NV, + (struct gl_program *) ctx->TessEvalProgram._Current); + } + } + + if (ctx->TessCtrlProgram._Current != prevTCP) { + new_state |= _NEW_PROGRAM; + if (ctx->Driver.BindProgram) { + ctx->Driver.BindProgram(ctx, GL_TESS_CONTROL_PROGRAM_NV, + (struct gl_program *) ctx->TessCtrlProgram._Current); + } + } + if (ctx->VertexProgram._Current != prevVP) { new_state |= _NEW_PROGRAM; if (ctx->Driver.BindProgram) { @@ -266,8 +312,8 @@ update_program_constants(struct gl_context *ctx) } } - /* Don't handle geometry shaders here. They don't use any state - * constants. + /* Don't handle tessellation and geometry shaders here. They don't use + * any state constants. */ if (ctx->VertexProgram._Current) { diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index 800720b798e..af89d2c1cfb 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -563,6 +563,8 @@ const struct function common_desktop_functions_possible[] = { /* GL 4.0 */ { "glMinSampleShading", 40, -1 }, + { "glPatchParameteri", 40, -1 }, + { "glPatchParameterfv", 40, -1 }, { "glBlendEquationi", 40, -1 }, { "glBlendEquationSeparatei", 40, -1 }, { "glBlendFunci", 40, -1 }, @@ -930,6 +932,11 @@ const struct function common_desktop_functions_possible[] = { /* GL_EXT_polygon_offset_clamp */ { "glPolygonOffsetClampEXT", 11, -1 }, + + /* GL_ARB_get_texture_sub_image */ + { "glGetTextureSubImage", 20, -1 }, + { "glGetCompressedTextureSubImage", 20, -1 }, + { NULL, 0, -1 } }; @@ -1424,6 +1431,16 @@ const struct function gl_core_functions_possible[] = { /* GL 3.2 */ { "glFramebufferTexture", 32, -1 }, + /* GL 4.0 */ + { "glGetSubroutineUniformLocation", 40, -1 }, + { "glGetSubroutineIndex", 40, -1 }, + { "glGetActiveSubroutineUniformiv", 40, -1 }, + { "glGetActiveSubroutineUniformName", 40, -1 }, + { "glGetActiveSubroutineName", 40, -1 }, + { "glUniformSubroutinesuiv", 40, -1 }, + { "glGetUniformSubroutineuiv", 40, -1 }, + { "glGetProgramStageiv", 40, -1 }, + /* GL 4.3 */ { "glIsRenderbuffer", 43, -1 }, { "glBindRenderbuffer", 43, -1 }, @@ -1562,16 +1579,6 @@ const struct function gl_core_functions_possible[] = { { "glUniformMatrix4x2dv", 40, -1 }, { "glUniformMatrix4x3dv", 40, -1 }, { "glGetUniformdv", 43, -1 }, -// { "glGetSubroutineUniformLocation", 43, -1 }, // XXX: Add to xml -// { "glGetSubroutineIndex", 43, -1 }, // XXX: Add to xml -// { "glGetActiveSubroutineUniformiv", 43, -1 }, // XXX: Add to xml -// { "glGetActiveSubroutineUniformName", 43, -1 }, // XXX: Add to xml -// { "glGetActiveSubroutineName", 43, -1 }, // XXX: Add to xml -// { "glUniformSubroutinesuiv", 43, -1 }, // XXX: Add to xml -// { "glGetUniformSubroutineuiv", 43, -1 }, // XXX: Add to xml -// { "glGetProgramStageiv", 43, -1 }, // XXX: Add to xml -// { "glPatchParameteri", 43, -1 }, // XXX: Add to xml -// { "glPatchParameterfv", 43, -1 }, // XXX: Add to xml { "glBindTransformFeedback", 43, -1 }, { "glDeleteTransformFeedbacks", 43, -1 }, diff --git a/src/mesa/main/tests/enum_strings.cpp b/src/mesa/main/tests/enum_strings.cpp index dc5fe751a86..8218cc9a685 100644 --- a/src/mesa/main/tests/enum_strings.cpp +++ b/src/mesa/main/tests/enum_strings.cpp @@ -39,13 +39,13 @@ TEST(EnumStrings, LookUpByNumber) { for (unsigned i = 0; everything[i].name != NULL; i++) { EXPECT_STREQ(everything[i].name, - _mesa_lookup_enum_by_nr(everything[i].value)); + _mesa_enum_to_string(everything[i].value)); } } TEST(EnumStrings, LookUpUnknownNumber) { - EXPECT_STRCASEEQ("0xEEEE", _mesa_lookup_enum_by_nr(0xEEEE)); + EXPECT_STRCASEEQ("0xEEEE", _mesa_enum_to_string(0xEEEE)); } /* Please type the name and the value. This makes it easier to detect @@ -1731,6 +1731,10 @@ const struct enum_info everything[] = { { 0x8DDF, "GL_MAX_GEOMETRY_UNIFORM_COMPONENTS" }, { 0x8DE0, "GL_MAX_GEOMETRY_OUTPUT_VERTICES" }, { 0x8DE1, "GL_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS" }, + { 0x8DE5, "GL_ACTIVE_SUBROUTINES" }, + { 0x8DE6, "GL_ACTIVE_SUBROUTINE_UNIFORMS" }, + { 0x8DE7, "GL_MAX_SUBROUTINES" }, + { 0x8DE8, "GL_MAX_SUBROUTINE_UNIFORM_LOCATIONS" }, { 0x8DF0, "GL_LOW_FLOAT" }, { 0x8DF1, "GL_MEDIUM_FLOAT" }, { 0x8DF2, "GL_HIGH_FLOAT" }, @@ -1759,6 +1763,11 @@ const struct enum_info everything[] = { { 0x8E44, "GL_TEXTURE_SWIZZLE_B" }, { 0x8E45, "GL_TEXTURE_SWIZZLE_A" }, { 0x8E46, "GL_TEXTURE_SWIZZLE_RGBA" }, + { 0x8E47, "GL_ACTIVE_SUBROUTINE_UNIFORM_LOCATIONS" }, + { 0x8E48, "GL_ACTIVE_SUBROUTINE_MAX_LENGTH" }, + { 0x8E49, "GL_ACTIVE_SUBROUTINE_UNIFORM_MAX_LENGTH" }, + { 0x8E4A, "GL_NUM_COMPATIBLE_SUBROUTINES" }, + { 0x8E4B, "GL_COMPATIBLE_SUBROUTINES" }, { 0x8E4C, "GL_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION" }, { 0x8E4D, "GL_FIRST_VERTEX_CONVENTION" }, { 0x8E4E, "GL_LAST_VERTEX_CONVENTION" }, diff --git a/src/mesa/main/texenv.c b/src/mesa/main/texenv.c index 3edafc0f776..091922161c5 100644 --- a/src/mesa/main/texenv.c +++ b/src/mesa/main/texenv.c @@ -42,7 +42,7 @@ #define TE_ERROR(errCode, msg, value) \ - _mesa_error(ctx, errCode, msg, _mesa_lookup_enum_by_nr(value)); + _mesa_error(ctx, errCode, msg, _mesa_enum_to_string(value)); /** Set texture env mode */ @@ -482,16 +482,16 @@ _mesa_TexEnvfv( GLenum target, GLenum pname, const GLfloat *param ) } else { _mesa_error(ctx, GL_INVALID_ENUM, "glTexEnv(target=%s)", - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); return; } if (MESA_VERBOSE&(VERBOSE_API|VERBOSE_TEXTURE)) _mesa_debug(ctx, "glTexEnv %s %s %.1f(%s) ...\n", - _mesa_lookup_enum_by_nr(target), - _mesa_lookup_enum_by_nr(pname), + _mesa_enum_to_string(target), + _mesa_enum_to_string(pname), *param, - _mesa_lookup_enum_by_nr((GLenum) iparam0)); + _mesa_enum_to_string((GLenum) iparam0)); /* Tell device driver about the new texture environment */ if (ctx->Driver.TexEnv) { diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c index 3c4baca7026..f4d17e1bdb5 100644 --- a/src/mesa/main/texformat.c +++ b/src/mesa/main/texformat.c @@ -847,7 +847,7 @@ _mesa_choose_tex_format(struct gl_context *ctx, GLenum target, } _mesa_problem(ctx, "unexpected format %s in _mesa_choose_tex_format()", - _mesa_lookup_enum_by_nr(internalFormat)); + _mesa_enum_to_string(internalFormat)); return MESA_FORMAT_NONE; } diff --git a/src/mesa/main/texgen.c b/src/mesa/main/texgen.c index 41e428b69e7..24ba295746a 100644 --- a/src/mesa/main/texgen.c +++ b/src/mesa/main/texgen.c @@ -76,10 +76,10 @@ _mesa_TexGenfv( GLenum coord, GLenum pname, const GLfloat *params ) if (MESA_VERBOSE&(VERBOSE_API|VERBOSE_TEXTURE)) _mesa_debug(ctx, "glTexGen %s %s %.1f(%s)...\n", - _mesa_lookup_enum_by_nr(coord), - _mesa_lookup_enum_by_nr(pname), + _mesa_enum_to_string(coord), + _mesa_enum_to_string(pname), *params, - _mesa_lookup_enum_by_nr((GLenum) (GLint) *params)); + _mesa_enum_to_string((GLenum) (GLint) *params)); if (ctx->Texture.CurrentUnit >= ctx->Const.MaxTextureCoordUnits) { _mesa_error(ctx, GL_INVALID_OPERATION, "glTexGen(current unit)"); diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c index 92b4d6795c6..c0ccce3d50e 100644 --- a/src/mesa/main/texgetimage.c +++ b/src/mesa/main/texgetimage.c @@ -75,12 +75,11 @@ type_needs_clamping(GLenum type) */ static void get_tex_depth(struct gl_context *ctx, GLuint dimensions, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLint depth, GLenum format, GLenum type, GLvoid *pixels, struct gl_texture_image *texImage) { - const GLint width = texImage->Width; - GLint height = texImage->Height; - GLint depth = texImage->Depth; GLint img, row; GLfloat *depthRow = malloc(width * sizeof(GLfloat)); @@ -94,14 +93,15 @@ get_tex_depth(struct gl_context *ctx, GLuint dimensions, height = 1; } + assert(zoffset + depth <= texImage->Depth); for (img = 0; img < depth; img++) { GLubyte *srcMap; GLint srcRowStride; /* map src texture buffer */ - ctx->Driver.MapTextureImage(ctx, texImage, img, - 0, 0, width, height, GL_MAP_READ_BIT, - &srcMap, &srcRowStride); + ctx->Driver.MapTextureImage(ctx, texImage, zoffset + img, + xoffset, yoffset, width, height, + GL_MAP_READ_BIT, &srcMap, &srcRowStride); if (srcMap) { for (row = 0; row < height; row++) { @@ -113,7 +113,7 @@ get_tex_depth(struct gl_context *ctx, GLuint dimensions, _mesa_pack_depth_span(ctx, width, dest, type, depthRow, &ctx->Pack); } - ctx->Driver.UnmapTextureImage(ctx, texImage, img); + ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset + img); } else { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage"); @@ -130,26 +130,26 @@ get_tex_depth(struct gl_context *ctx, GLuint dimensions, */ static void get_tex_depth_stencil(struct gl_context *ctx, GLuint dimensions, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLint depth, GLenum format, GLenum type, GLvoid *pixels, struct gl_texture_image *texImage) { - const GLint width = texImage->Width; - const GLint height = texImage->Height; - const GLint depth = texImage->Depth; GLint img, row; assert(format == GL_DEPTH_STENCIL); assert(type == GL_UNSIGNED_INT_24_8 || type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV); + assert(zoffset + depth <= texImage->Depth); for (img = 0; img < depth; img++) { GLubyte *srcMap; GLint rowstride; /* map src texture buffer */ - ctx->Driver.MapTextureImage(ctx, texImage, img, - 0, 0, width, height, GL_MAP_READ_BIT, - &srcMap, &rowstride); + ctx->Driver.MapTextureImage(ctx, texImage, zoffset + img, + xoffset, yoffset, width, height, + GL_MAP_READ_BIT, &srcMap, &rowstride); if (srcMap) { for (row = 0; row < height; row++) { @@ -166,7 +166,7 @@ get_tex_depth_stencil(struct gl_context *ctx, GLuint dimensions, } } - ctx->Driver.UnmapTextureImage(ctx, texImage, img); + ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset + img); } else { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage"); @@ -180,12 +180,11 @@ get_tex_depth_stencil(struct gl_context *ctx, GLuint dimensions, */ static void get_tex_stencil(struct gl_context *ctx, GLuint dimensions, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLint depth, GLenum format, GLenum type, GLvoid *pixels, struct gl_texture_image *texImage) { - const GLint width = texImage->Width; - const GLint height = texImage->Height; - const GLint depth = texImage->Depth; GLint img, row; assert(format == GL_STENCIL_INDEX); @@ -195,8 +194,9 @@ get_tex_stencil(struct gl_context *ctx, GLuint dimensions, GLint rowstride; /* map src texture buffer */ - ctx->Driver.MapTextureImage(ctx, texImage, img, - 0, 0, width, height, GL_MAP_READ_BIT, + ctx->Driver.MapTextureImage(ctx, texImage, zoffset + img, + xoffset, yoffset, width, height, + GL_MAP_READ_BIT, &srcMap, &rowstride); if (srcMap) { @@ -211,7 +211,7 @@ get_tex_stencil(struct gl_context *ctx, GLuint dimensions, dest); } - ctx->Driver.UnmapTextureImage(ctx, texImage, img); + ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset + img); } else { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage"); @@ -226,22 +226,22 @@ get_tex_stencil(struct gl_context *ctx, GLuint dimensions, */ static void get_tex_ycbcr(struct gl_context *ctx, GLuint dimensions, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLint depth, GLenum format, GLenum type, GLvoid *pixels, struct gl_texture_image *texImage) { - const GLint width = texImage->Width; - const GLint height = texImage->Height; - const GLint depth = texImage->Depth; GLint img, row; + assert(zoffset + depth <= texImage->Depth); for (img = 0; img < depth; img++) { GLubyte *srcMap; GLint rowstride; /* map src texture buffer */ - ctx->Driver.MapTextureImage(ctx, texImage, img, - 0, 0, width, height, GL_MAP_READ_BIT, - &srcMap, &rowstride); + ctx->Driver.MapTextureImage(ctx, texImage, zoffset + img, + xoffset, yoffset, width, height, + GL_MAP_READ_BIT, &srcMap, &rowstride); if (srcMap) { for (row = 0; row < height; row++) { @@ -264,7 +264,7 @@ get_tex_ycbcr(struct gl_context *ctx, GLuint dimensions, } } - ctx->Driver.UnmapTextureImage(ctx, texImage, img); + ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset + img); } else { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage"); @@ -279,6 +279,8 @@ get_tex_ycbcr(struct gl_context *ctx, GLuint dimensions, */ static void get_tex_rgba_compressed(struct gl_context *ctx, GLuint dimensions, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLint depth, GLenum format, GLenum type, GLvoid *pixels, struct gl_texture_image *texImage, GLbitfield transferOps) @@ -287,9 +289,6 @@ get_tex_rgba_compressed(struct gl_context *ctx, GLuint dimensions, const mesa_format texFormat = _mesa_get_srgb_format_linear(texImage->TexFormat); const GLenum baseFormat = _mesa_get_format_base_format(texFormat); - const GLuint width = texImage->Width; - const GLuint height = texImage->Height; - const GLuint depth = texImage->Depth; GLfloat *tempImage, *tempSlice; GLuint slice; int srcStride, dstStride; @@ -312,15 +311,15 @@ get_tex_rgba_compressed(struct gl_context *ctx, GLuint dimensions, tempSlice = tempImage + slice * 4 * width * height; - ctx->Driver.MapTextureImage(ctx, texImage, slice, - 0, 0, width, height, + ctx->Driver.MapTextureImage(ctx, texImage, zoffset + slice, + xoffset, yoffset, width, height, GL_MAP_READ_BIT, &srcMap, &srcRowStride); if (srcMap) { _mesa_decompress_image(texFormat, width, height, srcMap, srcRowStride, tempSlice); - ctx->Driver.UnmapTextureImage(ctx, texImage, slice); + ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset + slice); } else { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage"); @@ -409,6 +408,8 @@ _mesa_base_pack_format(GLenum format) */ static void get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint dimensions, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLint depth, GLenum format, GLenum type, GLvoid *pixels, struct gl_texture_image *texImage, GLbitfield transferOps) @@ -416,9 +417,6 @@ get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint dimensions, /* don't want to apply sRGB -> RGB conversion here so override the format */ const mesa_format texFormat = _mesa_get_srgb_format_linear(texImage->TexFormat); - const GLuint width = texImage->Width; - GLuint height = texImage->Height; - GLuint depth = texImage->Depth; GLuint img; GLboolean dst_is_integer; uint32_t dst_format; @@ -430,6 +428,8 @@ get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint dimensions, if (texImage->TexObject->Target == GL_TEXTURE_1D_ARRAY) { depth = height; height = 1; + zoffset = yoffset; + yoffset = 0; } /* Depending on the base format involved we may need to apply a rebase @@ -449,7 +449,8 @@ get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint dimensions, rebaseSwizzle[1] = MESA_FORMAT_SWIZZLE_ZERO; rebaseSwizzle[2] = MESA_FORMAT_SWIZZLE_ZERO; rebaseSwizzle[3] = MESA_FORMAT_SWIZZLE_W; - } else if (texImage->_BaseFormat != _mesa_get_format_base_format(texFormat)) { + } else if (texImage->_BaseFormat != + _mesa_get_format_base_format(texFormat)) { needsRebase = _mesa_compute_rgba2base2rgba_component_mapping(texImage->_BaseFormat, rebaseSwizzle); @@ -480,8 +481,9 @@ get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint dimensions, uint32_t src_format; /* map src texture buffer */ - ctx->Driver.MapTextureImage(ctx, texImage, img, - 0, 0, width, height, GL_MAP_READ_BIT, + ctx->Driver.MapTextureImage(ctx, texImage, zoffset + img, + xoffset, yoffset, width, height, + GL_MAP_READ_BIT, &srcMap, &rowstride); if (!srcMap) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage"); @@ -530,8 +532,8 @@ get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint dimensions, /* If we had to rebase, we have already handled that */ needsRebase = false; - /* If we were lucky and our RGBA conversion matches the dst format, then - * we are done. + /* If we were lucky and our RGBA conversion matches the dst format, + * then we are done. */ if (!need_convert) goto do_swap; @@ -568,7 +570,7 @@ get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint dimensions, } /* Unmap the src texture buffer */ - ctx->Driver.UnmapTextureImage(ctx, texImage, img); + ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset + img); } done: @@ -583,6 +585,8 @@ done: */ static void get_tex_rgba(struct gl_context *ctx, GLuint dimensions, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLint depth, GLenum format, GLenum type, GLvoid *pixels, struct gl_texture_image *texImage) { @@ -604,11 +608,17 @@ get_tex_rgba(struct gl_context *ctx, GLuint dimensions, } if (_mesa_is_format_compressed(texImage->TexFormat)) { - get_tex_rgba_compressed(ctx, dimensions, format, type, + get_tex_rgba_compressed(ctx, dimensions, + xoffset, yoffset, zoffset, + width, height, depth, + format, type, pixels, texImage, transferOps); } else { - get_tex_rgba_uncompressed(ctx, dimensions, format, type, + get_tex_rgba_uncompressed(ctx, dimensions, + xoffset, yoffset, zoffset, + width, height, depth, + format, type, pixels, texImage, transferOps); } } @@ -619,8 +629,10 @@ get_tex_rgba(struct gl_context *ctx, GLuint dimensions, * \return GL_TRUE if done, GL_FALSE otherwise */ static GLboolean -get_tex_memcpy(struct gl_context *ctx, GLenum format, GLenum type, - GLvoid *pixels, +get_tex_memcpy(struct gl_context *ctx, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLint depth, + GLenum format, GLenum type, GLvoid *pixels, struct gl_texture_image *texImage) { const GLenum target = texImage->TexObject->Target; @@ -642,20 +654,25 @@ get_tex_memcpy(struct gl_context *ctx, GLenum format, GLenum type, ctx->Pack.SwapBytes); } + if (depth > 1) { + /* only a single slice is supported at this time */ + memCopy = FALSE; + } + if (memCopy) { const GLuint bpp = _mesa_get_format_bytes(texImage->TexFormat); - const GLint bytesPerRow = texImage->Width * bpp; + const GLint bytesPerRow = width * bpp; GLubyte *dst = - _mesa_image_address2d(&ctx->Pack, pixels, texImage->Width, - texImage->Height, format, type, 0, 0); + _mesa_image_address2d(&ctx->Pack, pixels, width, height, + format, type, 0, 0); const GLint dstRowStride = - _mesa_image_row_stride(&ctx->Pack, texImage->Width, format, type); + _mesa_image_row_stride(&ctx->Pack, width, format, type); GLubyte *src; GLint srcRowStride; /* map src texture buffer */ - ctx->Driver.MapTextureImage(ctx, texImage, 0, - 0, 0, texImage->Width, texImage->Height, + ctx->Driver.MapTextureImage(ctx, texImage, zoffset, + xoffset, yoffset, width, height, GL_MAP_READ_BIT, &src, &srcRowStride); if (src) { @@ -664,7 +681,7 @@ get_tex_memcpy(struct gl_context *ctx, GLenum format, GLenum type, } else { GLuint row; - for (row = 0; row < texImage->Height; row++) { + for (row = 0; row < height; row++) { memcpy(dst, src, bytesPerRow); dst += dstRowStride; src += srcRowStride; @@ -672,7 +689,7 @@ get_tex_memcpy(struct gl_context *ctx, GLenum format, GLenum type, } /* unmap src texture buffer */ - ctx->Driver.UnmapTextureImage(ctx, texImage, 0); + ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset); } else { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage"); @@ -684,15 +701,17 @@ get_tex_memcpy(struct gl_context *ctx, GLenum format, GLenum type, /** - * This is the software fallback for Driver.GetTexImage(). + * This is the software fallback for Driver.GetTexSubImage(). * All error checking will have been done before this routine is called. * We'll call ctx->Driver.MapTextureImage() to access the data, then * unmap with ctx->Driver.UnmapTextureImage(). */ void -_mesa_GetTexImage_sw(struct gl_context *ctx, - GLenum format, GLenum type, GLvoid *pixels, - struct gl_texture_image *texImage) +_mesa_GetTexSubImage_sw(struct gl_context *ctx, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLint depth, + GLenum format, GLenum type, GLvoid *pixels, + struct gl_texture_image *texImage) { const GLuint dimensions = _mesa_get_texture_dimensions(texImage->TexObject->Target); @@ -720,23 +739,30 @@ _mesa_GetTexImage_sw(struct gl_context *ctx, pixels = ADD_POINTERS(buf, pixels); } - if (get_tex_memcpy(ctx, format, type, pixels, texImage)) { + if (get_tex_memcpy(ctx, xoffset, yoffset, zoffset, width, height, depth, + format, type, pixels, texImage)) { /* all done */ } else if (format == GL_DEPTH_COMPONENT) { - get_tex_depth(ctx, dimensions, format, type, pixels, texImage); + get_tex_depth(ctx, dimensions, xoffset, yoffset, zoffset, + width, height, depth, format, type, pixels, texImage); } else if (format == GL_DEPTH_STENCIL_EXT) { - get_tex_depth_stencil(ctx, dimensions, format, type, pixels, texImage); + get_tex_depth_stencil(ctx, dimensions, xoffset, yoffset, zoffset, + width, height, depth, format, type, pixels, + texImage); } else if (format == GL_STENCIL_INDEX) { - get_tex_stencil(ctx, dimensions, format, type, pixels, texImage); + get_tex_stencil(ctx, dimensions, xoffset, yoffset, zoffset, + width, height, depth, format, type, pixels, texImage); } else if (format == GL_YCBCR_MESA) { - get_tex_ycbcr(ctx, dimensions, format, type, pixels, texImage); + get_tex_ycbcr(ctx, dimensions, xoffset, yoffset, zoffset, + width, height, depth, format, type, pixels, texImage); } else { - get_tex_rgba(ctx, dimensions, format, type, pixels, texImage); + get_tex_rgba(ctx, dimensions, xoffset, yoffset, zoffset, + width, height, depth, format, type, pixels, texImage); } if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) { @@ -747,13 +773,16 @@ _mesa_GetTexImage_sw(struct gl_context *ctx, /** - * This is the software fallback for Driver.GetCompressedTexImage(). + * This is the software fallback for Driver.GetCompressedTexSubImage(). * All error checking will have been done before this routine is called. */ void -_mesa_GetCompressedTexImage_sw(struct gl_context *ctx, - struct gl_texture_image *texImage, - GLvoid *img) +_mesa_GetCompressedTexSubImage_sw(struct gl_context *ctx, + struct gl_texture_image *texImage, + GLint xoffset, GLint yoffset, + GLint zoffset, GLsizei width, + GLint height, GLint depth, + GLvoid *img) { const GLuint dimensions = _mesa_get_texture_dimensions(texImage->TexObject->Target); @@ -762,10 +791,8 @@ _mesa_GetCompressedTexImage_sw(struct gl_context *ctx, GLubyte *dest; _mesa_compute_compressed_pixelstore(dimensions, texImage->TexFormat, - texImage->Width, texImage->Height, - texImage->Depth, - &ctx->Pack, - &store); + width, height, depth, + &ctx->Pack, &store); if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) { /* pack texture image into a PBO */ @@ -791,8 +818,8 @@ _mesa_GetCompressedTexImage_sw(struct gl_context *ctx, GLubyte *src; /* map src texture buffer */ - ctx->Driver.MapTextureImage(ctx, texImage, slice, - 0, 0, texImage->Width, texImage->Height, + ctx->Driver.MapTextureImage(ctx, texImage, zoffset + slice, + xoffset, yoffset, width, height, GL_MAP_READ_BIT, &src, &srcRowStride); if (src) { @@ -803,10 +830,11 @@ _mesa_GetCompressedTexImage_sw(struct gl_context *ctx, src += srcRowStride; } - ctx->Driver.UnmapTextureImage(ctx, texImage, slice); + ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset + slice); /* Advance to next slice */ - dest += store.TotalBytesPerRow * (store.TotalRowsPerSlice - store.CopyRowsPerSlice); + dest += store.TotalBytesPerRow * (store.TotalRowsPerSlice - + store.CopyRowsPerSlice); } else { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetCompresssedTexImage"); @@ -863,29 +891,299 @@ legal_getteximage_target(struct gl_context *ctx, GLenum target, bool dsa) /** - * Do error checking for a glGetTex(ture)Image() call. - * \return GL_TRUE if any error, GL_FALSE if no errors. + * Wrapper for _mesa_select_tex_image() which can handle target being + * GL_TEXTURE_CUBE_MAP_ARB in which case we use zoffset to select a cube face. + * This can happen for glGetTextureImage and glGetTextureSubImage (DSA + * functions). */ -static GLboolean +static struct gl_texture_image * +select_tex_image(const struct gl_texture_object *texObj, GLenum target, + GLint level, GLint zoffset) +{ + assert(level >= 0); + assert(level < MAX_TEXTURE_LEVELS); + if (target == GL_TEXTURE_CUBE_MAP) { + assert(zoffset >= 0); + assert(zoffset < 6); + target = GL_TEXTURE_CUBE_MAP_POSITIVE_X + zoffset; + } + return _mesa_select_tex_image(texObj, target, level); +} + + +/** + * Error-check the offset and size arguments to + * glGet[Compressed]TextureSubImage(). Also checks if the specified + * texture image is missing. + * \return true if error, false if no error. + */ +static bool +dimensions_error_check(struct gl_context *ctx, + struct gl_texture_object *texObj, + GLenum target, GLint level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + const char *caller) +{ + const struct gl_texture_image *texImage; + int i; + + if (xoffset < 0) { + _mesa_error(ctx, GL_INVALID_VALUE, "%s(xoffset = %d)", caller, xoffset); + return true; + } + + if (yoffset < 0) { + _mesa_error(ctx, GL_INVALID_VALUE, "%s(yoffset = %d)", caller, yoffset); + return true; + } + + if (zoffset < 0) { + _mesa_error(ctx, GL_INVALID_VALUE, "%s(zoffset = %d)", caller, zoffset); + return true; + } + + if (width < 0) { + _mesa_error(ctx, GL_INVALID_VALUE, "%s(width = %d)", caller, width); + return true; + } + + if (height < 0) { + _mesa_error(ctx, GL_INVALID_VALUE, "%s(height = %d)", caller, height); + return true; + } + + if (depth < 0) { + _mesa_error(ctx, GL_INVALID_VALUE, "%s(depth = %d)", caller, depth); + return true; + } + + /* do special per-target checks */ + switch (target) { + case GL_TEXTURE_1D: + if (yoffset != 0) { + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(1D, yoffset = %d)", caller, yoffset); + return true; + } + if (height > 1) { + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(1D, height = %d)", caller, height); + return true; + } + /* fall-through */ + case GL_TEXTURE_1D_ARRAY: + case GL_TEXTURE_2D: + case GL_TEXTURE_RECTANGLE: + if (zoffset != 0) { + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(zoffset = %d)", caller, zoffset); + return true; + } + if (depth > 1) { + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(depth = %d)", caller, depth); + return true; + } + break; + case GL_TEXTURE_CUBE_MAP: + /* Non-array cube maps are special because we have a gl_texture_image + * per face. + */ + if (zoffset + depth > 6) { + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(zoffset + depth = %d)", caller, zoffset + depth); + return true; + } + /* check that the range of faces exist */ + for (i = 0; i < depth; i++) { + GLenum face = GL_TEXTURE_CUBE_MAP_POSITIVE_X + zoffset + i; + if (!_mesa_select_tex_image(texObj, face, level)) { + /* non-existant face */ + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(missing cube face)", caller); + return true; + } + } + break; + default: + ; /* nothing */ + } + + texImage = select_tex_image(texObj, target, level, zoffset); + if (!texImage) { + /* missing texture image */ + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(missing image)", caller); + return true; + } + + if (xoffset + width > texImage->Width) { + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(xoffset %d + width %d > %u)", + caller, xoffset, width, texImage->Width); + return true; + } + + if (yoffset + height > texImage->Height) { + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(yoffset %d + height %d > %u)", + caller, yoffset, height, texImage->Height); + return true; + } + + if (target != GL_TEXTURE_CUBE_MAP) { + /* Cube map error checking was done above */ + if (zoffset + depth > texImage->Depth) { + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(zoffset %d + depth %d > %u)", + caller, zoffset, depth, texImage->Depth); + return true; + } + } + + /* Extra checks for compressed textures */ + { + GLuint bw, bh; + _mesa_get_format_block_size(texImage->TexFormat, &bw, &bh); + if (bw > 1 || bh > 1) { + /* offset must be multiple of block size */ + if (xoffset % bw != 0) { + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(xoffset = %d)", caller, xoffset); + return true; + } + if (target != GL_TEXTURE_1D && target != GL_TEXTURE_1D_ARRAY) { + if (yoffset % bh != 0) { + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(yoffset = %d)", caller, yoffset); + return true; + } + } + + /* The size must be a multiple of bw x bh, or we must be using a + * offset+size that exactly hits the edge of the image. + */ + if ((width % bw != 0) && + (xoffset + width != (GLint) texImage->Width)) { + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(width = %d)", caller, width); + return true; + } + + if ((height % bh != 0) && + (yoffset + height != (GLint) texImage->Height)) { + _mesa_error(ctx, GL_INVALID_VALUE, + "%s(height = %d)", caller, height); + return true; + } + } + } + + if (width == 0 || height == 0 || depth == 0) { + /* Not an error, but nothing to do. Return 'true' so that the + * caller simply returns. + */ + return true; + } + + return false; +} + + +/** + * Do PBO-related error checking for getting uncompressed images. + * \return true if there was an error (or the GetTexImage is to be a no-op) + */ +static bool +pbo_error_check(struct gl_context *ctx, GLenum target, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, GLsizei clientMemSize, + GLvoid *pixels, + const char *caller) +{ + const GLuint dimensions = (target == GL_TEXTURE_3D) ? 3 : 2; + + if (!_mesa_validate_pbo_access(dimensions, &ctx->Pack, width, height, depth, + format, type, clientMemSize, pixels)) { + if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(out of bounds PBO access)", caller); + } else { + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(out of bounds access: bufSize (%d) is too small)", + caller, clientMemSize); + } + return true; + } + + if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) { + /* PBO should not be mapped */ + if (_mesa_check_disallowed_mapping(ctx->Pack.BufferObj)) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(PBO is mapped)", caller); + return true; + } + } + + if (!_mesa_is_bufferobj(ctx->Pack.BufferObj) && !pixels) { + /* not an error, do nothing */ + return true; + } + + return false; +} + + +/** + * Do error checking for all (non-compressed) get-texture-image functions. + * \return true if any error, false if no errors. + */ +static bool getteximage_error_check(struct gl_context *ctx, - struct gl_texture_image *texImage, + struct gl_texture_object *texObj, GLenum target, GLint level, - GLenum format, GLenum type, GLsizei clientMemSize, - GLvoid *pixels, bool dsa) + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, GLsizei bufSize, + GLvoid *pixels, const char *caller) { - const GLint maxLevels = _mesa_max_texture_levels(ctx, target); - const GLuint dimensions = (target == GL_TEXTURE_3D) ? 3 : 2; - GLenum baseFormat; - const char *suffix = dsa ? "ture" : ""; + struct gl_texture_image *texImage; + GLenum baseFormat, err; + GLint maxLevels; - assert(texImage); - assert(maxLevels != 0); + assert(texObj); + + if (texObj->Target == 0) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid texture)", caller); + return true; + } + + maxLevels = _mesa_max_texture_levels(ctx, target); if (level < 0 || level >= maxLevels) { - _mesa_error(ctx, GL_INVALID_VALUE, - "glGetTex%sImage(level out of range)", suffix); - return GL_TRUE; + _mesa_error(ctx, GL_INVALID_VALUE, "%s(level = %d)", caller, level); + return true; } + err = _mesa_error_check_format_and_type(ctx, format, type); + if (err != GL_NO_ERROR) { + _mesa_error(ctx, err, "%s(format/type)", caller); + return true; + } + + if (dimensions_error_check(ctx, texObj, target, level, + xoffset, yoffset, zoffset, + width, height, depth, caller)) { + return true; + } + + if (pbo_error_check(ctx, target, width, height, depth, + format, type, bufSize, pixels, caller)) { + return true; + } + + texImage = select_tex_image(texObj, target, level, zoffset); + assert(texImage); + /* * Format and type checking has been moved up to GetnTexImage and * GetTextureImage so that it happens before getting the texImage object. @@ -899,494 +1197,579 @@ getteximage_error_check(struct gl_context *ctx, if (_mesa_is_color_format(format) && !_mesa_is_color_format(baseFormat)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetTex%sImage(format mismatch)", suffix); - return GL_TRUE; + "%s(format mismatch)", caller); + return true; } else if (_mesa_is_depth_format(format) && !_mesa_is_depth_format(baseFormat) && !_mesa_is_depthstencil_format(baseFormat)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetTex%sImage(format mismatch)", suffix); - return GL_TRUE; + "%s(format mismatch)", caller); + return true; } else if (_mesa_is_stencil_format(format) && !ctx->Extensions.ARB_texture_stencil8) { _mesa_error(ctx, GL_INVALID_ENUM, - "glGetTex%sImage(format=GL_STENCIL_INDEX)", suffix); - return GL_TRUE; + "%s(format=GL_STENCIL_INDEX)", caller); + return true; } else if (_mesa_is_ycbcr_format(format) && !_mesa_is_ycbcr_format(baseFormat)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetTex%sImage(format mismatch)", suffix); - return GL_TRUE; + "%s(format mismatch)", caller); + return true; } else if (_mesa_is_depthstencil_format(format) && !_mesa_is_depthstencil_format(baseFormat)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetTex%sImage(format mismatch)", suffix); - return GL_TRUE; + "%s(format mismatch)", caller); + return true; } - else if (!_mesa_is_stencil_format(format) && _mesa_is_enum_format_integer(format) != + else if (!_mesa_is_stencil_format(format) && + _mesa_is_enum_format_integer(format) != _mesa_is_format_integer(texImage->TexFormat)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetTex%sImage(format mismatch)", suffix); - return GL_TRUE; + "%s(format mismatch)", caller); + return true; } - if (!_mesa_validate_pbo_access(dimensions, &ctx->Pack, texImage->Width, - texImage->Height, texImage->Depth, - format, type, clientMemSize, pixels)) { - if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetTex%sImage(out of bounds PBO access)", suffix); - } else { - _mesa_error(ctx, GL_INVALID_OPERATION, - "%s(out of bounds access:" - " bufSize (%d) is too small)", - dsa ? "glGetTextureImage" : "glGetnTexImageARB", - clientMemSize); - } - return GL_TRUE; + return false; +} + + +/** + * Return the width, height and depth of a texture image. + * This function must be resilient to bad parameter values since + * this is called before full error checking. + */ +static void +get_texture_image_dims(const struct gl_texture_object *texObj, + GLenum target, GLint level, + GLsizei *width, GLsizei *height, GLsizei *depth) +{ + const struct gl_texture_image *texImage = NULL; + + if (level >= 0 && level < MAX_TEXTURE_LEVELS) { + texImage = _mesa_select_tex_image(texObj, target, level); } - if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) { - /* PBO should not be mapped */ - if (_mesa_check_disallowed_mapping(ctx->Pack.BufferObj)) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetTex%sImage(PBO is mapped)", suffix); - return GL_TRUE; + if (texImage) { + *width = texImage->Width; + *height = texImage->Height; + if (target == GL_TEXTURE_CUBE_MAP) { + *depth = 6; + } + else { + *depth = texImage->Depth; } } - - return GL_FALSE; + else { + *width = *height = *depth = 0; + } } /** - * This is the implementation for glGetnTexImageARB, glGetTextureImage, - * and glGetTexImage. - * - * Requires caller to pass in texImage object because _mesa_GetTextureImage - * must handle the GL_TEXTURE_CUBE_MAP target. - * - * \param target texture target. + * Common code for all (uncompressed) get-texture-image functions. + * \param texObj the texture object (should not be null) + * \param target user-provided target, or 0 for DSA * \param level image level. * \param format pixel data format for returned image. * \param type pixel data type for returned image. * \param bufSize size of the pixels data buffer. * \param pixels returned pixel data. - * \param dsa True when the caller is an ARB_direct_state_access function, - * false otherwise + * \param caller name of calling function */ -void -_mesa_get_texture_image(struct gl_context *ctx, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage, GLenum target, - GLint level, GLenum format, GLenum type, - GLsizei bufSize, GLvoid *pixels, bool dsa) +static void +get_texture_image(struct gl_context *ctx, + struct gl_texture_object *texObj, + GLenum target, GLint level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLint depth, + GLenum format, GLenum type, + GLvoid *pixels, const char *caller) { - assert(texObj); - assert(texImage); + struct gl_texture_image *texImage; + unsigned firstFace, numFaces, i; + GLint imageStride; FLUSH_VERTICES(ctx, 0); - /* - * Legal target checking has been moved up to GetnTexImage and - * GetTextureImage so that it can be caught before receiving a NULL - * texImage object and exiting. - */ - - if (getteximage_error_check(ctx, texImage, target, level, format, - type, bufSize, pixels, dsa)) { - return; - } + texImage = select_tex_image(texObj, target, level, zoffset); + assert(texImage); /* should have been error checked already */ - if (!_mesa_is_bufferobj(ctx->Pack.BufferObj) && !pixels) { - /* not an error, do nothing */ + if (_mesa_is_zero_size_texture(texImage)) { + /* no image data to return */ return; } - if (_mesa_is_zero_size_texture(texImage)) - return; - if (MESA_VERBOSE & (VERBOSE_API | VERBOSE_TEXTURE)) { - _mesa_debug(ctx, "glGetTex%sImage(tex %u) format = %s, w=%d, h=%d," + _mesa_debug(ctx, "%s(tex %u) format = %s, w=%d, h=%d," " dstFmt=0x%x, dstType=0x%x\n", - dsa ? "ture": "", - texObj->Name, + caller, texObj->Name, _mesa_get_format_name(texImage->TexFormat), texImage->Width, texImage->Height, format, type); } + if (target == GL_TEXTURE_CUBE_MAP) { + /* Compute stride between cube faces */ + imageStride = _mesa_image_image_stride(&ctx->Pack, width, height, + format, type); + firstFace = zoffset; + numFaces = depth; + zoffset = 0; + depth = 1; + } + else { + imageStride = 0; + firstFace = _mesa_tex_target_to_face(target); + numFaces = 1; + } + _mesa_lock_texture(ctx, texObj); - { - ctx->Driver.GetTexImage(ctx, format, type, pixels, texImage); + + for (i = 0; i < numFaces; i++) { + texImage = texObj->Image[firstFace + i][level]; + assert(texImage); + + ctx->Driver.GetTexSubImage(ctx, xoffset, yoffset, zoffset, + width, height, depth, + format, type, pixels, texImage); + + /* next cube face */ + pixels = (GLubyte *) pixels + imageStride; } + _mesa_unlock_texture(ctx, texObj); } -/** - * Get texture image. Called by glGetTexImage. - * - * \param target texture target. - * \param level image level. - * \param format pixel data format for returned image. - * \param type pixel data type for returned image. - * \param bufSize size of the pixels data buffer. - * \param pixels returned pixel data. - */ + void GLAPIENTRY -_mesa_GetnTexImageARB(GLenum target, GLint level, GLenum format, - GLenum type, GLsizei bufSize, GLvoid *pixels) +_mesa_GetnTexImageARB(GLenum target, GLint level, GLenum format, GLenum type, + GLsizei bufSize, GLvoid *pixels) { - struct gl_texture_object *texObj; - struct gl_texture_image *texImage; - GLenum err; GET_CURRENT_CONTEXT(ctx); + static const char *caller = "glGetnTexImageARB"; + GLsizei width, height, depth; + struct gl_texture_object *texObj; - /* - * This has been moved here because a format/type mismatch can cause a NULL - * texImage object, which in turn causes the mismatch error to be - * ignored. - */ - err = _mesa_error_check_format_and_type(ctx, format, type); - if (err != GL_NO_ERROR) { - _mesa_error(ctx, err, "glGetnTexImage(format/type)"); - return; - } - - /* - * Legal target checking has been moved here to prevent exiting with a NULL - * texImage object. - */ if (!legal_getteximage_target(ctx, target, false)) { - _mesa_error(ctx, GL_INVALID_ENUM, "glGetnTexImage(target=0x%x)", - target); + _mesa_error(ctx, GL_INVALID_ENUM, "%s", caller); return; } texObj = _mesa_get_current_tex_object(ctx, target); - if (!texObj) - return; + assert(texObj); + + get_texture_image_dims(texObj, target, level, &width, &height, &depth); - texImage = _mesa_select_tex_image(texObj, target, level); - if (!texImage) + if (getteximage_error_check(ctx, texObj, target, level, + 0, 0, 0, width, height, depth, + format, type, bufSize, pixels, caller)) { return; + } - _mesa_get_texture_image(ctx, texObj, texImage, target, level, format, type, - bufSize, pixels, false); + get_texture_image(ctx, texObj, target, level, + 0, 0, 0, width, height, depth, + format, type, pixels, caller); } void GLAPIENTRY -_mesa_GetTexImage( GLenum target, GLint level, GLenum format, - GLenum type, GLvoid *pixels ) +_mesa_GetTexImage(GLenum target, GLint level, GLenum format, GLenum type, + GLvoid *pixels ) { - _mesa_GetnTexImageARB(target, level, format, type, INT_MAX, pixels); + GET_CURRENT_CONTEXT(ctx); + static const char *caller = "glGetTexImage"; + GLsizei width, height, depth; + struct gl_texture_object *texObj; + + if (!legal_getteximage_target(ctx, target, false)) { + _mesa_error(ctx, GL_INVALID_ENUM, "%s", caller); + return; + } + + texObj = _mesa_get_current_tex_object(ctx, target); + assert(texObj); + + get_texture_image_dims(texObj, target, level, &width, &height, &depth); + + if (getteximage_error_check(ctx, texObj, target, level, + 0, 0, 0, width, height, depth, + format, type, INT_MAX, pixels, caller)) { + return; + } + + get_texture_image(ctx, texObj, target, level, + 0, 0, 0, width, height, depth, + format, type, pixels, caller); } -/** - * Get texture image. - * - * \param texture texture name. - * \param level image level. - * \param format pixel data format for returned image. - * \param type pixel data type for returned image. - * \param bufSize size of the pixels data buffer. - * \param pixels returned pixel data. - */ + void GLAPIENTRY -_mesa_GetTextureImage(GLuint texture, GLint level, GLenum format, - GLenum type, GLsizei bufSize, GLvoid *pixels) +_mesa_GetTextureImage(GLuint texture, GLint level, GLenum format, GLenum type, + GLsizei bufSize, GLvoid *pixels) { - struct gl_texture_object *texObj; - struct gl_texture_image *texImage; - int i; - GLint image_stride; - GLenum err; GET_CURRENT_CONTEXT(ctx); + GLsizei width, height, depth; + static const char *caller = "glGetTextureImage"; + struct gl_texture_object *texObj = + _mesa_lookup_texture_err(ctx, texture, caller); - /* - * This has been moved here because a format/type mismatch can cause a NULL - * texImage object, which in turn causes the mismatch error to be - * ignored. - */ - err = _mesa_error_check_format_and_type(ctx, format, type); - if (err != GL_NO_ERROR) { - _mesa_error(ctx, err, "glGetTextureImage(format/type)"); + if (!texObj) { return; } - texObj = _mesa_lookup_texture_err(ctx, texture, "glGetTextureImage"); - if (!texObj) - return; + get_texture_image_dims(texObj, texObj->Target, level, + &width, &height, &depth); - /* - * Legal target checking has been moved here to prevent exiting with a NULL - * texImage object. - */ - if (!legal_getteximage_target(ctx, texObj->Target, true)) { - _mesa_error(ctx, GL_INVALID_ENUM, "glGetTextureImage(target=%s)", - _mesa_lookup_enum_by_nr(texObj->Target)); + if (getteximage_error_check(ctx, texObj, texObj->Target, level, + 0, 0, 0, width, height, depth, + format, type, bufSize, pixels, caller)) { return; } - /* Must handle special case GL_TEXTURE_CUBE_MAP. */ - if (texObj->Target == GL_TEXTURE_CUBE_MAP) { - - /* Make sure the texture object is a proper cube. - * (See texturesubimage in teximage.c for details on why this check is - * performed.) - */ - if (!_mesa_cube_level_complete(texObj, level)) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetTextureImage(cube map incomplete)"); - return; - } + get_texture_image(ctx, texObj, texObj->Target, level, + 0, 0, 0, width, height, depth, + format, type, pixels, caller); +} - /* Copy each face. */ - for (i = 0; i < 6; ++i) { - texImage = texObj->Image[i][level]; - assert(texImage); - _mesa_get_texture_image(ctx, texObj, texImage, texObj->Target, level, - format, type, bufSize, pixels, true); +void GLAPIENTRY +_mesa_GetTextureSubImage(GLuint texture, GLint level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, GLsizei bufSize, + void *pixels) +{ + GET_CURRENT_CONTEXT(ctx); + static const char *caller = "glGetTextureSubImage"; + struct gl_texture_object *texObj = + _mesa_lookup_texture_err(ctx, texture, caller); - image_stride = _mesa_image_image_stride(&ctx->Pack, texImage->Width, - texImage->Height, format, - type); - pixels = (GLubyte *) pixels + image_stride; - bufSize -= image_stride; - } + if (!texObj) { + return; } - else { - texImage = _mesa_select_tex_image(texObj, texObj->Target, level); - if (!texImage) - return; - _mesa_get_texture_image(ctx, texObj, texImage, texObj->Target, level, - format, type, bufSize, pixels, true); + if (getteximage_error_check(ctx, texObj, texObj->Target, level, + xoffset, yoffset, zoffset, width, height, depth, + format, type, bufSize, pixels, caller)) { + return; } + + get_texture_image(ctx, texObj, texObj->Target, level, + xoffset, yoffset, zoffset, width, height, depth, + format, type, pixels, caller); } + + /** - * Do error checking for a glGetCompressedTexImage() call. - * \return GL_TRUE if any error, GL_FALSE if no errors. + * Compute the number of bytes which will be written when retrieving + * a sub-region of a compressed texture. */ -static GLboolean +static GLsizei +packed_compressed_size(GLuint dimensions, mesa_format format, + GLsizei width, GLsizei height, GLsizei depth, + const struct gl_pixelstore_attrib *packing) +{ + struct compressed_pixelstore st; + GLsizei totalBytes; + + _mesa_compute_compressed_pixelstore(dimensions, format, + width, height, depth, + packing, &st); + totalBytes = + (st.CopySlices - 1) * st.TotalRowsPerSlice * st.TotalBytesPerRow + + st.SkipBytes + + (st.CopyRowsPerSlice - 1) * st.TotalBytesPerRow + + st.CopyBytesPerRow; + + return totalBytes; +} + + +/** + * Do error checking for getting compressed texture images. + * \return true if any error, false if no errors. + */ +static bool getcompressedteximage_error_check(struct gl_context *ctx, - struct gl_texture_image *texImage, - GLenum target, - GLint level, GLsizei clientMemSize, - GLvoid *img, bool dsa) + struct gl_texture_object *texObj, + GLenum target, GLint level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLsizei bufSize, GLvoid *pixels, + const char *caller) { - const GLint maxLevels = _mesa_max_texture_levels(ctx, target); - GLuint compressedSize, dimensions; - const char *suffix = dsa ? "ture" : ""; + struct gl_texture_image *texImage; + GLint maxLevels; + GLsizei totalBytes; + GLuint dimensions; - assert(texImage); + assert(texObj); - if (!legal_getteximage_target(ctx, target, dsa)) { - _mesa_error(ctx, GL_INVALID_ENUM, - "glGetCompressedTex%sImage(target=%s)", suffix, - _mesa_lookup_enum_by_nr(target)); - return GL_TRUE; + if (texObj->Target == 0) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid texture)", caller); + return true; } - assert(maxLevels != 0); + maxLevels = _mesa_max_texture_levels(ctx, target); if (level < 0 || level >= maxLevels) { _mesa_error(ctx, GL_INVALID_VALUE, - "glGetCompressedTex%sImage(bad level = %d)", suffix, level); - return GL_TRUE; + "%s(bad level = %d)", caller, level); + return true; + } + + if (dimensions_error_check(ctx, texObj, target, level, + xoffset, yoffset, zoffset, + width, height, depth, caller)) { + return true; } + texImage = select_tex_image(texObj, target, level, zoffset); + assert(texImage); + if (!_mesa_is_format_compressed(texImage->TexFormat)) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetCompressedTex%sImage(texture is not compressed)", - suffix); - return GL_TRUE; + "%s(texture is not compressed)", caller); + return true; } - compressedSize = _mesa_format_image_size(texImage->TexFormat, - texImage->Width, - texImage->Height, - texImage->Depth); - /* Check for invalid pixel storage modes */ - dimensions = _mesa_get_texture_dimensions(texImage->TexObject->Target); + dimensions = _mesa_get_texture_dimensions(texObj->Target); if (!_mesa_compressed_pixel_storage_error_check(ctx, dimensions, - &ctx->Pack, dsa ? - "glGetCompressedTextureImage": - "glGetCompressedTexImage")) { - return GL_TRUE; + &ctx->Pack, + caller)) { + return true; } - if (!_mesa_is_bufferobj(ctx->Pack.BufferObj)) { - /* do bounds checking on writing to client memory */ - if (clientMemSize < (GLsizei) compressedSize) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "%s(out of bounds access: bufSize (%d) is too small)", - dsa ? "glGetCompressedTextureImage" : - "glGetnCompressedTexImageARB", clientMemSize); - return GL_TRUE; - } - } else { + /* Compute number of bytes that may be touched in the dest buffer */ + totalBytes = packed_compressed_size(dimensions, texImage->TexFormat, + width, height, depth, + &ctx->Pack); + + /* Do dest buffer bounds checking */ + if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) { /* do bounds checking on PBO write */ - if ((const GLubyte *) img + compressedSize > - (const GLubyte *) ctx->Pack.BufferObj->Size) { + if ((GLubyte *) pixels + totalBytes > + (GLubyte *) ctx->Pack.BufferObj->Size) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetCompressedTex%sImage(out of bounds PBO access)", - suffix); - return GL_TRUE; + "%s(out of bounds PBO access)", caller); + return true; } /* make sure PBO is not mapped */ if (_mesa_check_disallowed_mapping(ctx->Pack.BufferObj)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(PBO is mapped)", caller); + return true; + } + } + else { + /* do bounds checking on writing to client memory */ + if (totalBytes > bufSize) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetCompressedTex%sImage(PBO is mapped)", suffix); - return GL_TRUE; + "%s(out of bounds access: bufSize (%d) is too small)", + caller, bufSize); + return true; } } - return GL_FALSE; + if (!_mesa_is_bufferobj(ctx->Pack.BufferObj) && !pixels) { + /* not an error, but do nothing */ + return true; + } + + return false; } -/** Implements glGetnCompressedTexImageARB, glGetCompressedTexImage, and - * glGetCompressedTextureImage. - * - * texImage must be passed in because glGetCompressedTexImage must handle the - * target GL_TEXTURE_CUBE_MAP. + +/** + * Common helper for all glGetCompressed-teximage functions. */ -void -_mesa_get_compressed_texture_image(struct gl_context *ctx, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage, - GLenum target, GLint level, - GLsizei bufSize, GLvoid *pixels, - bool dsa) +static void +get_compressed_texture_image(struct gl_context *ctx, + struct gl_texture_object *texObj, + GLenum target, GLint level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLint depth, + GLvoid *pixels, + const char *caller) { - assert(texObj); - assert(texImage); + struct gl_texture_image *texImage; + unsigned firstFace, numFaces, i, imageStride; FLUSH_VERTICES(ctx, 0); - if (getcompressedteximage_error_check(ctx, texImage, target, level, - bufSize, pixels, dsa)) { - return; - } - - if (!_mesa_is_bufferobj(ctx->Pack.BufferObj) && !pixels) { - /* not an error, do nothing */ - return; - } + texImage = select_tex_image(texObj, target, level, zoffset); + assert(texImage); /* should have been error checked already */ if (_mesa_is_zero_size_texture(texImage)) return; if (MESA_VERBOSE & (VERBOSE_API | VERBOSE_TEXTURE)) { _mesa_debug(ctx, - "glGetCompressedTex%sImage(tex %u) format = %s, w=%d, h=%d\n", - dsa ? "ture" : "", texObj->Name, + "%s(tex %u) format = %s, w=%d, h=%d\n", + caller, texObj->Name, _mesa_get_format_name(texImage->TexFormat), texImage->Width, texImage->Height); } + if (target == GL_TEXTURE_CUBE_MAP) { + struct compressed_pixelstore store; + + /* Compute image stride between cube faces */ + _mesa_compute_compressed_pixelstore(2, texImage->TexFormat, + width, height, depth, + &ctx->Pack, &store); + imageStride = store.TotalBytesPerRow * store.TotalRowsPerSlice; + + firstFace = zoffset; + numFaces = depth; + zoffset = 0; + depth = 1; + } + else { + imageStride = 0; + firstFace = _mesa_tex_target_to_face(target); + numFaces = 1; + } + _mesa_lock_texture(ctx, texObj); - { - ctx->Driver.GetCompressedTexImage(ctx, texImage, pixels); + + for (i = 0; i < numFaces; i++) { + texImage = texObj->Image[firstFace + i][level]; + assert(texImage); + + ctx->Driver.GetCompressedTexSubImage(ctx, texImage, + xoffset, yoffset, zoffset, + width, height, depth, pixels); + + /* next cube face */ + pixels = (GLubyte *) pixels + imageStride; } + _mesa_unlock_texture(ctx, texObj); } + void GLAPIENTRY _mesa_GetnCompressedTexImageARB(GLenum target, GLint level, GLsizei bufSize, - GLvoid *img) + GLvoid *pixels) { - struct gl_texture_object *texObj; - struct gl_texture_image *texImage; GET_CURRENT_CONTEXT(ctx); + static const char *caller = "glGetnCompressedTexImageARB"; + GLsizei width, height, depth; + struct gl_texture_object *texObj; - texObj = _mesa_get_current_tex_object(ctx, target); - if (!texObj) + if (!legal_getteximage_target(ctx, target, false)) { + _mesa_error(ctx, GL_INVALID_ENUM, "%s", caller); return; + } - texImage = _mesa_select_tex_image(texObj, target, level); - if (!texImage) + texObj = _mesa_get_current_tex_object(ctx, target); + assert(texObj); + + get_texture_image_dims(texObj, target, level, &width, &height, &depth); + + if (getcompressedteximage_error_check(ctx, texObj, target, level, + 0, 0, 0, width, height, depth, + INT_MAX, pixels, caller)) { return; + } - _mesa_get_compressed_texture_image(ctx, texObj, texImage, target, level, - bufSize, img, false); + get_compressed_texture_image(ctx, texObj, target, level, + 0, 0, 0, width, height, depth, + pixels, caller); } + void GLAPIENTRY -_mesa_GetCompressedTexImage(GLenum target, GLint level, GLvoid *img) +_mesa_GetCompressedTexImage(GLenum target, GLint level, GLvoid *pixels) { - _mesa_GetnCompressedTexImageARB(target, level, INT_MAX, img); + GET_CURRENT_CONTEXT(ctx); + static const char *caller = "glGetCompressedTexImage"; + GLsizei width, height, depth; + struct gl_texture_object *texObj; + + if (!legal_getteximage_target(ctx, target, false)) { + _mesa_error(ctx, GL_INVALID_ENUM, "%s", caller); + return; + } + + texObj = _mesa_get_current_tex_object(ctx, target); + assert(texObj); + + get_texture_image_dims(texObj, target, level, + &width, &height, &depth); + + if (getcompressedteximage_error_check(ctx, texObj, target, level, + 0, 0, 0, width, height, depth, + INT_MAX, pixels, caller)) { + return; + } + + get_compressed_texture_image(ctx, texObj, target, level, + 0, 0, 0, width, height, depth, + pixels, caller); } -/** - * Get compressed texture image. - * - * \param texture texture name. - * \param level image level. - * \param bufSize size of the pixels data buffer. - * \param pixels returned pixel data. - */ + void GLAPIENTRY _mesa_GetCompressedTextureImage(GLuint texture, GLint level, GLsizei bufSize, GLvoid *pixels) { - struct gl_texture_object *texObj; - struct gl_texture_image *texImage; - int i; - GLint image_stride; GET_CURRENT_CONTEXT(ctx); + static const char *caller = "glGetCompressedTextureImage"; + GLsizei width, height, depth; + struct gl_texture_object *texObj = + _mesa_lookup_texture_err(ctx, texture, caller); - texObj = _mesa_lookup_texture_err(ctx, texture, - "glGetCompressedTextureImage"); - if (!texObj) + if (!texObj) { return; + } - /* Must handle special case GL_TEXTURE_CUBE_MAP. */ - if (texObj->Target == GL_TEXTURE_CUBE_MAP) { + get_texture_image_dims(texObj, texObj->Target, level, + &width, &height, &depth); - /* Make sure the texture object is a proper cube. - * (See texturesubimage in teximage.c for details on why this check is - * performed.) - */ - if (!_mesa_cube_level_complete(texObj, level)) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glGetCompressedTextureImage(cube map incomplete)"); - return; - } + if (getcompressedteximage_error_check(ctx, texObj, texObj->Target, level, + 0, 0, 0, width, height, depth, + bufSize, pixels, caller)) { + return; + } - /* Copy each face. */ - for (i = 0; i < 6; ++i) { - texImage = texObj->Image[i][level]; - assert(texImage); + get_compressed_texture_image(ctx, texObj, texObj->Target, level, + 0, 0, 0, width, height, depth, + pixels, caller); +} - _mesa_get_compressed_texture_image(ctx, texObj, texImage, - texObj->Target, level, - bufSize, pixels, true); - /* Compressed images don't have a client format */ - image_stride = _mesa_format_image_size(texImage->TexFormat, - texImage->Width, - texImage->Height, 1); +void APIENTRY +_mesa_GetCompressedTextureSubImage(GLuint texture, GLint level, + GLint xoffset, GLint yoffset, + GLint zoffset, GLsizei width, + GLsizei height, GLsizei depth, + GLsizei bufSize, void *pixels) +{ + GET_CURRENT_CONTEXT(ctx); + static const char *caller = "glGetCompressedTextureImage"; + struct gl_texture_object *texObj; - pixels = (GLubyte *) pixels + image_stride; - bufSize -= image_stride; - } + texObj = _mesa_lookup_texture_err(ctx, texture, caller); + if (!texObj) { + return; } - else { - texImage = _mesa_select_tex_image(texObj, texObj->Target, level); - if (!texImage) - return; - _mesa_get_compressed_texture_image(ctx, texObj, texImage, - texObj->Target, level, bufSize, - pixels, true); + if (getcompressedteximage_error_check(ctx, texObj, texObj->Target, level, + xoffset, yoffset, zoffset, + width, height, depth, + bufSize, pixels, caller)) { + return; } + + get_compressed_texture_image(ctx, texObj, texObj->Target, level, + xoffset, yoffset, zoffset, + width, height, depth, + pixels, caller); } diff --git a/src/mesa/main/texgetimage.h b/src/mesa/main/texgetimage.h index 1fa2f59dcdc..63c75eb931d 100644 --- a/src/mesa/main/texgetimage.h +++ b/src/mesa/main/texgetimage.h @@ -37,22 +37,19 @@ extern GLenum _mesa_base_pack_format(GLenum format); extern void -_mesa_GetTexImage_sw(struct gl_context *ctx, - GLenum format, GLenum type, GLvoid *pixels, - struct gl_texture_image *texImage); - - -extern void -_mesa_GetCompressedTexImage_sw(struct gl_context *ctx, - struct gl_texture_image *texImage, - GLvoid *data); +_mesa_GetTexSubImage_sw(struct gl_context *ctx, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLint depth, + GLenum format, GLenum type, GLvoid *pixels, + struct gl_texture_image *texImage); extern void -_mesa_get_texture_image(struct gl_context *ctx, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage, GLenum target, - GLint level, GLenum format, GLenum type, - GLsizei bufSize, GLvoid *pixels, bool dsa); +_mesa_GetCompressedTexSubImage_sw(struct gl_context *ctx, + struct gl_texture_image *texImage, + GLint xoffset, GLint yoffset, + GLint zoffset, GLsizei width, + GLint height, GLint depth, + GLvoid *data); extern void _mesa_get_compressed_texture_image( struct gl_context *ctx, @@ -74,6 +71,14 @@ _mesa_GetTextureImage(GLuint texture, GLint level, GLenum format, GLenum type, GLsizei bufSize, GLvoid *pixels); extern void GLAPIENTRY +_mesa_GetTextureSubImage(GLuint texture, GLint level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, GLsizei bufSize, + void *pixels); + + +extern void GLAPIENTRY _mesa_GetCompressedTexImage(GLenum target, GLint lod, GLvoid *img); extern void GLAPIENTRY @@ -84,4 +89,11 @@ extern void GLAPIENTRY _mesa_GetCompressedTextureImage(GLuint texture, GLint level, GLsizei bufSize, GLvoid *pixels); +extern void APIENTRY +_mesa_GetCompressedTextureSubImage(GLuint texture, GLint level, + GLint xoffset, GLint yoffset, + GLint zoffset, GLsizei width, + GLsizei height, GLsizei depth, + GLsizei bufSize, void *pixels); + #endif /* TEXGETIMAGE_H */ diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 3d85615fa45..3a556a6ad6e 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -1008,7 +1008,7 @@ _mesa_max_texture_levels(struct gl_context *ctx, GLenum target) case GL_PROXY_TEXTURE_2D_MULTISAMPLE: case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: case GL_PROXY_TEXTURE_2D_MULTISAMPLE_ARRAY: - return _mesa_is_desktop_gl(ctx) + return (_mesa_is_desktop_gl(ctx) || _mesa_is_gles31(ctx)) && ctx->Extensions.ARB_texture_multisample ? 1 : 0; case GL_TEXTURE_EXTERNAL_OES: @@ -1793,8 +1793,6 @@ GLboolean _mesa_target_can_be_compressed(const struct gl_context *ctx, GLenum target, GLenum intFormat) { - (void) intFormat; /* not used yet */ - switch (target) { case GL_TEXTURE_2D: case GL_PROXY_TEXTURE_2D: @@ -1814,6 +1812,16 @@ _mesa_target_can_be_compressed(const struct gl_context *ctx, GLenum target, case GL_PROXY_TEXTURE_CUBE_MAP_ARRAY: case GL_TEXTURE_CUBE_MAP_ARRAY: return ctx->Extensions.ARB_texture_cube_map_array; + case GL_TEXTURE_3D: + switch (intFormat) { + case GL_COMPRESSED_RGBA_BPTC_UNORM: + case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM: + case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT: + case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT: + return ctx->Extensions.ARB_texture_compression_bptc; + default: + return GL_FALSE; + } default: return GL_FALSE; } @@ -2081,6 +2089,53 @@ texture_formats_agree(GLenum internalFormat, } /** + * Test the combination of format, type and internal format arguments of + * different texture operations on GLES. + * + * \param ctx GL context. + * \param format pixel data format given by the user. + * \param type pixel data type given by the user. + * \param internalFormat internal format given by the user. + * \param dimensions texture image dimensions (must be 1, 2 or 3). + * \param callerName name of the caller function to print in the error message + * + * \return true if a error is found, false otherwise + * + * Currently, it is used by texture_error_check() and texsubimage_error_check(). + */ +static bool +texture_format_error_check_gles(struct gl_context *ctx, GLenum format, + GLenum type, GLenum internalFormat, + GLuint dimensions, const char *callerName) +{ + GLenum err; + + if (_mesa_is_gles3(ctx)) { + err = _mesa_es3_error_check_format_and_type(ctx, format, type, + internalFormat); + if (err != GL_NO_ERROR) { + _mesa_error(ctx, err, + "%s(format = %s, type = %s, internalformat = %s)", + callerName, _mesa_enum_to_string(format), + _mesa_enum_to_string(type), + _mesa_enum_to_string(internalFormat)); + return true; + } + } + else { + err = _mesa_es_error_check_format_and_type(format, type, dimensions); + if (err != GL_NO_ERROR) { + _mesa_error(ctx, err, "%s(format = %s, type = %s)", + callerName, _mesa_enum_to_string(format), + _mesa_enum_to_string(type)); + return true; + } + } + + return false; +} + +/** * Test the glTexImage[123]D() parameters for errors. * * \param ctx GL context. @@ -2151,39 +2206,17 @@ texture_error_check( struct gl_context *ctx, * Formats and types that require additional extensions (e.g., GL_FLOAT * requires GL_OES_texture_float) are filtered elsewhere. */ - - if (_mesa_is_gles(ctx)) { - if (_mesa_is_gles3(ctx)) { - err = _mesa_es3_error_check_format_and_type(ctx, format, type, - internalFormat); - } else { - if (format != internalFormat) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glTexImage%dD(format = %s, internalFormat = %s)", - dimensions, - _mesa_lookup_enum_by_nr(format), - _mesa_lookup_enum_by_nr(internalFormat)); - return GL_TRUE; - } - - err = _mesa_es_error_check_format_and_type(format, type, dimensions); - } - if (err != GL_NO_ERROR) { - _mesa_error(ctx, err, - "glTexImage%dD(format = %s, type = %s, internalFormat = %s)", - dimensions, - _mesa_lookup_enum_by_nr(format), - _mesa_lookup_enum_by_nr(type), - _mesa_lookup_enum_by_nr(internalFormat)); - return GL_TRUE; - } + if (_mesa_is_gles(ctx) && + texture_format_error_check_gles(ctx, format, type, internalFormat, + dimensions, "glTexImage%dD")) { + return GL_TRUE; } /* Check internalFormat */ if (_mesa_base_tex_format(ctx, internalFormat) < 0) { _mesa_error(ctx, GL_INVALID_VALUE, "glTexImage%dD(internalFormat=%s)", - dimensions, _mesa_lookup_enum_by_nr(internalFormat)); + dimensions, _mesa_enum_to_string(internalFormat)); return GL_TRUE; } @@ -2192,8 +2225,8 @@ texture_error_check( struct gl_context *ctx, if (err != GL_NO_ERROR) { _mesa_error(ctx, err, "glTexImage%dD(incompatible format = %s, type = %s)", - dimensions, _mesa_lookup_enum_by_nr(format), - _mesa_lookup_enum_by_nr(type)); + dimensions, _mesa_enum_to_string(format), + _mesa_enum_to_string(type)); return GL_TRUE; } @@ -2208,8 +2241,8 @@ texture_error_check( struct gl_context *ctx, if (!texture_formats_agree(internalFormat, format)) { _mesa_error(ctx, GL_INVALID_OPERATION, "glTexImage%dD(incompatible internalFormat = %s, format = %s)", - dimensions, _mesa_lookup_enum_by_nr(internalFormat), - _mesa_lookup_enum_by_nr(format)); + dimensions, _mesa_enum_to_string(internalFormat), + _mesa_enum_to_string(format)); return GL_TRUE; } @@ -2324,7 +2357,7 @@ compressed_texture_error_check(struct gl_context *ctx, GLint dimensions, if (!_mesa_is_compressed_format(ctx, internalFormat)) { _mesa_error(ctx, GL_INVALID_ENUM, "glCompressedTexImage%dD(internalFormat=%s)", - dimensions, _mesa_lookup_enum_by_nr(internalFormat)); + dimensions, _mesa_enum_to_string(internalFormat)); return GL_TRUE; } @@ -2479,40 +2512,38 @@ texsubimage_error_check(struct gl_context *ctx, GLuint dimensions, return GL_TRUE; } - /* check target (proxies not allowed) */ - if (!legal_texsubimage_target(ctx, dimensions, target, dsa)) { - _mesa_error(ctx, GL_INVALID_ENUM, "%s(target=%s)", - callerName, _mesa_lookup_enum_by_nr(target)); - return GL_TRUE; - } - /* level check */ if (level < 0 || level >= _mesa_max_texture_levels(ctx, target)) { _mesa_error(ctx, GL_INVALID_VALUE, "%s(level=%d)", callerName, level); return GL_TRUE; } - /* OpenGL ES 1.x and OpenGL ES 2.0 impose additional restrictions on the - * combinations of format and type that can be used. Formats and types - * that require additional extensions (e.g., GL_FLOAT requires - * GL_OES_texture_float) are filtered elsewhere. - */ - if (_mesa_is_gles(ctx) && !_mesa_is_gles3(ctx)) { - err = _mesa_es_error_check_format_and_type(format, type, dimensions); - if (err != GL_NO_ERROR) { - _mesa_error(ctx, err, "%s(format = %s, type = %s)", - callerName, _mesa_lookup_enum_by_nr(format), - _mesa_lookup_enum_by_nr(type)); - return GL_TRUE; - } + texImage = _mesa_select_tex_image(texObj, target, level); + if (!texImage) { + /* non-existant texture level */ + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid texture image)", + callerName); + return GL_TRUE; } err = _mesa_error_check_format_and_type(ctx, format, type); if (err != GL_NO_ERROR) { _mesa_error(ctx, err, "%s(incompatible format = %s, type = %s)", - callerName, _mesa_lookup_enum_by_nr(format), - _mesa_lookup_enum_by_nr(type)); + callerName, _mesa_enum_to_string(format), + _mesa_enum_to_string(type)); + return GL_TRUE; + } + + /* OpenGL ES 1.x and OpenGL ES 2.0 impose additional restrictions on the + * combinations of format, internalFormat, and type that can be used. + * Formats and types that require additional extensions (e.g., GL_FLOAT + * requires GL_OES_texture_float) are filtered elsewhere. + */ + if (_mesa_is_gles(ctx) && + texture_format_error_check_gles(ctx, format, type, + texImage->InternalFormat, + dimensions, callerName)) { return GL_TRUE; } @@ -2523,14 +2554,6 @@ texsubimage_error_check(struct gl_context *ctx, GLuint dimensions, return GL_TRUE; } - texImage = _mesa_select_tex_image(texObj, target, level); - if (!texImage) { - /* non-existant texture level */ - _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid texture image)", - callerName); - return GL_TRUE; - } - if (error_check_subtexture_dimensions(ctx, dimensions, texImage, xoffset, yoffset, zoffset, width, height, depth, callerName)) { @@ -2590,7 +2613,7 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions, /* check target */ if (!legal_texsubimage_target(ctx, dimensions, target, false)) { _mesa_error(ctx, GL_INVALID_ENUM, "glCopyTexImage%uD(target=%s)", - dimensions, _mesa_lookup_enum_by_nr(target)); + dimensions, _mesa_enum_to_string(target)); return GL_TRUE; } @@ -2629,13 +2652,6 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions, return GL_TRUE; } - rb = _mesa_get_read_renderbuffer_for_format(ctx, internalFormat); - if (rb == NULL) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glCopyTexImage%dD(read buffer)", dimensions); - return GL_TRUE; - } - /* OpenGL ES 1.x and OpenGL ES 2.0 impose additional restrictions on the * internalFormat. */ @@ -2648,18 +2664,25 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions, case GL_LUMINANCE_ALPHA: break; default: - _mesa_error(ctx, GL_INVALID_VALUE, + _mesa_error(ctx, GL_INVALID_ENUM, "glCopyTexImage%dD(internalFormat=%s)", dimensions, - _mesa_lookup_enum_by_nr(internalFormat)); + _mesa_enum_to_string(internalFormat)); return GL_TRUE; } } baseFormat = _mesa_base_tex_format(ctx, internalFormat); if (baseFormat < 0) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, GL_INVALID_ENUM, "glCopyTexImage%dD(internalFormat=%s)", dimensions, - _mesa_lookup_enum_by_nr(internalFormat)); + _mesa_enum_to_string(internalFormat)); + return GL_TRUE; + } + + rb = _mesa_get_read_renderbuffer_for_format(ctx, internalFormat); + if (rb == NULL) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glCopyTexImage%dD(read buffer)", dimensions); return GL_TRUE; } @@ -2669,7 +2692,7 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions, if (rb_base_format < 0) { _mesa_error(ctx, GL_INVALID_VALUE, "glCopyTexImage%dD(internalFormat=%s)", dimensions, - _mesa_lookup_enum_by_nr(internalFormat)); + _mesa_enum_to_string(internalFormat)); return GL_TRUE; } } @@ -2696,7 +2719,7 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions, if (!valid) { _mesa_error(ctx, GL_INVALID_OPERATION, "glCopyTexImage%dD(internalFormat=%s)", dimensions, - _mesa_lookup_enum_by_nr(internalFormat)); + _mesa_enum_to_string(internalFormat)); return GL_TRUE; } } @@ -2735,10 +2758,10 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions, * types for SNORM formats. Also, conversion to SNORM formats is not * allowed by Table 3.2 on Page 110. */ - if(_mesa_is_enum_format_snorm(internalFormat)) { + if (_mesa_is_enum_format_snorm(internalFormat)) { _mesa_error(ctx, GL_INVALID_OPERATION, "glCopyTexImage%dD(internalFormat=%s)", dimensions, - _mesa_lookup_enum_by_nr(internalFormat)); + _mesa_enum_to_string(internalFormat)); return GL_TRUE; } } @@ -3103,8 +3126,8 @@ _mesa_choose_texture_format(struct gl_context *ctx, "DXT compression requested (%s), " "but libtxc_dxtn library not installed. Using %s " "instead.", - _mesa_lookup_enum_by_nr(before), - _mesa_lookup_enum_by_nr(internalFormat)); + _mesa_enum_to_string(before), + _mesa_enum_to_string(internalFormat)); } } @@ -3191,18 +3214,18 @@ teximage(struct gl_context *ctx, GLboolean compressed, GLuint dims, _mesa_debug(ctx, "glCompressedTexImage%uD %s %d %s %d %d %d %d %p\n", dims, - _mesa_lookup_enum_by_nr(target), level, - _mesa_lookup_enum_by_nr(internalFormat), + _mesa_enum_to_string(target), level, + _mesa_enum_to_string(internalFormat), width, height, depth, border, pixels); else _mesa_debug(ctx, "glTexImage%uD %s %d %s %d %d %d %d %s %s %p\n", dims, - _mesa_lookup_enum_by_nr(target), level, - _mesa_lookup_enum_by_nr(internalFormat), + _mesa_enum_to_string(target), level, + _mesa_enum_to_string(internalFormat), width, height, depth, border, - _mesa_lookup_enum_by_nr(format), - _mesa_lookup_enum_by_nr(type), pixels); + _mesa_enum_to_string(format), + _mesa_enum_to_string(type), pixels); } internalFormat = override_internal_format(internalFormat, width, height); @@ -3210,7 +3233,7 @@ teximage(struct gl_context *ctx, GLboolean compressed, GLuint dims, /* target error checking */ if (!legal_teximage_target(ctx, dims, target)) { _mesa_error(ctx, GL_INVALID_ENUM, "%s%uD(target=%s)", - func, dims, _mesa_lookup_enum_by_nr(target)); + func, dims, _mesa_enum_to_string(target)); return; } @@ -3313,16 +3336,16 @@ teximage(struct gl_context *ctx, GLboolean compressed, GLuint dims, if (!dimensionsOK) { _mesa_error(ctx, GL_INVALID_VALUE, - "glTexImage%uD(invalid width or height or depth)", - dims); + "%s%uD(invalid width or height or depth)", + func, dims); return; } if (!sizeOK) { _mesa_error(ctx, GL_OUT_OF_MEMORY, - "glTexImage%uD(image too large: %d x %d x %d, %s format)", - dims, width, height, depth, - _mesa_lookup_enum_by_nr(internalFormat)); + "%s%uD(image too large: %d x %d x %d, %s format)", + func, dims, width, height, depth, + _mesa_enum_to_string(internalFormat)); return; } @@ -3495,7 +3518,6 @@ _mesa_EGLImageTargetTexture2DOES (GLenum target, GLeglImageOES image) _mesa_dirty_texobj(ctx, texObj); } _mesa_unlock_texture(ctx, texObj); - } @@ -3515,14 +3537,6 @@ _mesa_texture_sub_image(struct gl_context *ctx, GLuint dims, { FLUSH_VERTICES(ctx, 0); - /* check target (proxies not allowed) */ - if (!legal_texsubimage_target(ctx, dims, target, dsa)) { - _mesa_error(ctx, GL_INVALID_ENUM, "glTex%sSubImage%uD(target=%s)", - dsa ? "ture" : "", - dims, _mesa_lookup_enum_by_nr(target)); - return; - } - if (ctx->NewState & _NEW_PIXEL) _mesa_update_state(ctx); @@ -3572,6 +3586,13 @@ texsubimage(struct gl_context *ctx, GLuint dims, GLenum target, GLint level, struct gl_texture_object *texObj; struct gl_texture_image *texImage; + /* check target (proxies not allowed) */ + if (!legal_texsubimage_target(ctx, dims, target, false)) { + _mesa_error(ctx, GL_INVALID_ENUM, "glTexSubImage%uD(target=%s)", + dims, _mesa_enum_to_string(target)); + return; + } + texObj = _mesa_get_current_tex_object(ctx, target); if (!texObj) return; @@ -3589,10 +3610,10 @@ texsubimage(struct gl_context *ctx, GLuint dims, GLenum target, GLint level, if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE)) _mesa_debug(ctx, "glTexSubImage%uD %s %d %d %d %d %d %d %d %s %s %p\n", dims, - _mesa_lookup_enum_by_nr(target), level, + _mesa_enum_to_string(target), level, xoffset, yoffset, zoffset, width, height, depth, - _mesa_lookup_enum_by_nr(format), - _mesa_lookup_enum_by_nr(type), pixels); + _mesa_enum_to_string(format), + _mesa_enum_to_string(type), pixels); _mesa_texture_sub_image(ctx, dims, texObj, texImage, target, level, xoffset, yoffset, zoffset, width, height, depth, @@ -3621,8 +3642,8 @@ texturesubimage(struct gl_context *ctx, GLuint dims, "glTextureSubImage%uD %d %d %d %d %d %d %d %d %s %s %p\n", dims, texture, level, xoffset, yoffset, zoffset, width, height, depth, - _mesa_lookup_enum_by_nr(format), - _mesa_lookup_enum_by_nr(type), pixels); + _mesa_enum_to_string(format), + _mesa_enum_to_string(type), pixels); /* Get the texture object by Name. */ texObj = _mesa_lookup_texture(ctx, texture); @@ -3632,6 +3653,13 @@ texturesubimage(struct gl_context *ctx, GLuint dims, return; } + /* check target (proxies not allowed) */ + if (!legal_texsubimage_target(ctx, dims, texObj->Target, true)) { + _mesa_error(ctx, GL_INVALID_ENUM, "%s(target=%s)", + callerName, _mesa_enum_to_string(texObj->Target)); + return; + } + if (texsubimage_error_check(ctx, dims, texObj, texObj->Target, level, xoffset, yoffset, zoffset, width, height, depth, format, type, @@ -3842,8 +3870,7 @@ copytexsubimage_by_slice(struct gl_context *ctx, } static GLboolean -formats_differ_in_component_sizes (mesa_format f1, - mesa_format f2) +formats_differ_in_component_sizes(mesa_format f1, mesa_format f2) { GLint f1_r_bits = _mesa_get_format_bits(f1, GL_RED_BITS); GLint f1_g_bits = _mesa_get_format_bits(f1, GL_GREEN_BITS); @@ -3883,8 +3910,8 @@ copyteximage(struct gl_context *ctx, GLuint dims, if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE)) _mesa_debug(ctx, "glCopyTexImage%uD %s %d %s %d %d %d %d %d\n", dims, - _mesa_lookup_enum_by_nr(target), level, - _mesa_lookup_enum_by_nr(internalFormat), + _mesa_enum_to_string(target), level, + _mesa_enum_to_string(internalFormat), x, y, width, height, border); if (ctx->NewState & NEW_COPY_TEX_STATE) @@ -3916,8 +3943,8 @@ copyteximage(struct gl_context *ctx, GLuint dims, */ if (rb->InternalFormat == GL_RGB10_A2) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glCopyTexImage%uD(Reading from GL_RGB10_A2 buffer and" - " writing to unsized internal format)", dims); + "glCopyTexImage%uD(Reading from GL_RGB10_A2 buffer" + " and writing to unsized internal format)", dims); return; } } @@ -4043,7 +4070,7 @@ _mesa_copy_texture_sub_image(struct gl_context *ctx, GLuint dims, if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE)) _mesa_debug(ctx, "%s %s %d %d %d %d %d %d %d %d\n", caller, - _mesa_lookup_enum_by_nr(target), + _mesa_enum_to_string(target), level, xoffset, yoffset, zoffset, x, y, width, height); if (ctx->NewState & NEW_COPY_TEX_STATE) @@ -4105,7 +4132,7 @@ _mesa_CopyTexSubImage1D( GLenum target, GLint level, */ if (!legal_texsubimage_target(ctx, 1, target, false)) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid target %s)", self, - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); return; } @@ -4133,7 +4160,7 @@ _mesa_CopyTexSubImage2D( GLenum target, GLint level, */ if (!legal_texsubimage_target(ctx, 2, target, false)) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid target %s)", self, - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); return; } @@ -4162,7 +4189,7 @@ _mesa_CopyTexSubImage3D( GLenum target, GLint level, */ if (!legal_texsubimage_target(ctx, 3, target, false)) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid target %s)", self, - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); return; } @@ -4190,7 +4217,7 @@ _mesa_CopyTextureSubImage1D(GLuint texture, GLint level, /* Check target (proxies not allowed). */ if (!legal_texsubimage_target(ctx, 1, texObj->Target, true)) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid target %s)", self, - _mesa_lookup_enum_by_nr(texObj->Target)); + _mesa_enum_to_string(texObj->Target)); return; } @@ -4214,7 +4241,7 @@ _mesa_CopyTextureSubImage2D(GLuint texture, GLint level, /* Check target (proxies not allowed). */ if (!legal_texsubimage_target(ctx, 2, texObj->Target, true)) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid target %s)", self, - _mesa_lookup_enum_by_nr(texObj->Target)); + _mesa_enum_to_string(texObj->Target)); return; } @@ -4241,7 +4268,7 @@ _mesa_CopyTextureSubImage3D(GLuint texture, GLint level, /* Check target (proxies not allowed). */ if (!legal_texsubimage_target(ctx, 3, texObj->Target, true)) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid target %s)", self, - _mesa_lookup_enum_by_nr(texObj->Target)); + _mesa_enum_to_string(texObj->Target)); return; } @@ -4288,8 +4315,8 @@ check_clear_tex_image(struct gl_context *ctx, _mesa_error(ctx, err, "%s(incompatible format = %s, type = %s)", function, - _mesa_lookup_enum_by_nr(format), - _mesa_lookup_enum_by_nr(type)); + _mesa_enum_to_string(format), + _mesa_enum_to_string(type)); return false; } @@ -4298,8 +4325,8 @@ check_clear_tex_image(struct gl_context *ctx, _mesa_error(ctx, GL_INVALID_OPERATION, "%s(incompatible internalFormat = %s, format = %s)", function, - _mesa_lookup_enum_by_nr(internalFormat), - _mesa_lookup_enum_by_nr(format)); + _mesa_enum_to_string(internalFormat), + _mesa_enum_to_string(format)); return false; } @@ -4541,7 +4568,7 @@ compressed_subtexture_target_check(struct gl_context *ctx, GLenum target, if (dsa && target == GL_TEXTURE_RECTANGLE) { _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid target %s)", caller, - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); return GL_TRUE; } @@ -4549,13 +4576,15 @@ compressed_subtexture_target_check(struct gl_context *ctx, GLenum target, case 2: switch (target) { case GL_TEXTURE_2D: + targetOK = GL_TRUE; + break; case GL_TEXTURE_CUBE_MAP_POSITIVE_X: case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: - targetOK = GL_TRUE; + targetOK = ctx->Extensions.ARB_texture_cube_map; break; default: targetOK = GL_FALSE; @@ -4563,52 +4592,59 @@ compressed_subtexture_target_check(struct gl_context *ctx, GLenum target, } break; case 3: - targetOK = (target == GL_TEXTURE_3D) || - (target == GL_TEXTURE_2D_ARRAY) || - (target == GL_TEXTURE_CUBE_MAP_ARRAY) || - (target == GL_TEXTURE_CUBE_MAP && dsa); - - /* OpenGL 4.5 spec (30.10.2014) says in Section 8.7 Compressed Texture - * Images: - * "An INVALID_OPERATION error is generated by - * CompressedTex*SubImage3D if the internal format of the texture is - * one of the EAC, ETC2, or RGTC formats and either border is - * non-zero, or the effective target for the texture is not - * TEXTURE_2D_ARRAY." - */ - if (target != GL_TEXTURE_2D_ARRAY) { - bool invalidformat; + switch (target) { + case GL_TEXTURE_CUBE_MAP: + targetOK = dsa && ctx->Extensions.ARB_texture_cube_map; + break; + case GL_TEXTURE_2D_ARRAY: + targetOK = _mesa_is_gles3(ctx) || + (_mesa_is_desktop_gl(ctx) && ctx->Extensions.EXT_texture_array); + break; + case GL_TEXTURE_CUBE_MAP_ARRAY: + targetOK = ctx->Extensions.ARB_texture_cube_map_array; + break; + case GL_TEXTURE_3D: + targetOK = GL_TRUE; + /* + * OpenGL 4.5 spec (30.10.2014) says in Section 8.7 Compressed Texture + * Images: + * "An INVALID_OPERATION error is generated by + * CompressedTex*SubImage3D if the internal format of the texture + * is one of the EAC, ETC2, or RGTC formats and either border is + * non-zero, or the effective target for the texture is not + * TEXTURE_2D_ARRAY." + * + * NOTE: that's probably a spec error. It should probably say + * "... or the effective target for the texture is not + * TEXTURE_2D_ARRAY, TEXTURE_CUBE_MAP, nor + * GL_TEXTURE_CUBE_MAP_ARRAY." + * since those targets are 2D images and they support all compression + * formats. + * + * Instead of listing all these, just list those which are allowed, + * which is (at this time) only bptc. Otherwise we'd say s3tc (and + * more) are valid here, which they are not, but of course not + * mentioned by core spec. + */ switch (format) { - /* These came from _mesa_is_compressed_format in glformats.c. */ - /* EAC formats */ - case GL_COMPRESSED_RGBA8_ETC2_EAC: - case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC: - case GL_COMPRESSED_R11_EAC: - case GL_COMPRESSED_RG11_EAC: - case GL_COMPRESSED_SIGNED_R11_EAC: - case GL_COMPRESSED_SIGNED_RG11_EAC: - /* ETC2 formats */ - case GL_COMPRESSED_RGB8_ETC2: - case GL_COMPRESSED_SRGB8_ETC2: - case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: - case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: - /* RGTC formats */ - case GL_COMPRESSED_RED_RGTC1: - case GL_COMPRESSED_SIGNED_RED_RGTC1: - case GL_COMPRESSED_RG_RGTC2: - case GL_COMPRESSED_SIGNED_RG_RGTC2: - invalidformat = true; - break; - default: - invalidformat = false; - } - if (invalidformat) { + /* These are the only 3D compression formats supported at this time */ + case GL_COMPRESSED_RGBA_BPTC_UNORM: + case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM: + case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT: + case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT: + /* valid format */ + break; + default: + /* invalid format */ _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid target %s for format %s)", caller, - _mesa_lookup_enum_by_nr(target), - _mesa_lookup_enum_by_nr(format)); + _mesa_enum_to_string(target), + _mesa_enum_to_string(format)); return GL_TRUE; } + break; + default: + targetOK = GL_FALSE; } break; @@ -4621,7 +4657,7 @@ compressed_subtexture_target_check(struct gl_context *ctx, GLenum target, if (!targetOK) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid target %s)", caller, - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); return GL_TRUE; } @@ -4834,8 +4870,7 @@ _mesa_CompressedTextureSubImage1D(GLuint texture, GLint level, GLint xoffset, if (!texObj) return; - if (compressed_subtexture_target_check(ctx, texObj->Target, 1, format, - true, + if (compressed_subtexture_target_check(ctx, texObj->Target, 1, format, true, "glCompressedTextureSubImage1D")) { return; } @@ -4912,8 +4947,7 @@ _mesa_CompressedTextureSubImage2D(GLuint texture, GLint level, GLint xoffset, if (!texObj) return; - if (compressed_subtexture_target_check(ctx, texObj->Target, 2, format, - true, + if (compressed_subtexture_target_check(ctx, texObj->Target, 2, format, true, "glCompressedTextureSubImage2D")) { return; } @@ -4990,8 +5024,7 @@ _mesa_CompressedTextureSubImage3D(GLuint texture, GLint level, GLint xoffset, if (!texObj) return; - if (compressed_subtexture_target_check(ctx, texObj->Target, 3, format, - true, + if (compressed_subtexture_target_check(ctx, texObj->Target, 3, format, true, "glCompressedTextureSubImage3D")) { return; } @@ -5440,7 +5473,6 @@ _mesa_TexBufferRange(GLenum target, GLenum internalFormat, GLuint buffer, return; } else { - /* OpenGL 4.5 core spec (02.02.2015) says in Section 8.9 Buffer * Textures (PDF page 254): * "If buffer is zero, then any buffer object attached to the buffer @@ -5508,7 +5540,6 @@ _mesa_TextureBufferRange(GLuint texture, GLenum internalFormat, GLuint buffer, return; } else { - /* OpenGL 4.5 core spec (02.02.2015) says in Section 8.9 Buffer * Textures (PDF page 254): * "If buffer is zero, then any buffer object attached to the buffer @@ -5554,19 +5585,17 @@ check_multisample_target(GLuint dims, GLenum target, bool dsa) return dims == 2; case GL_PROXY_TEXTURE_2D_MULTISAMPLE: return dims == 2 && !dsa; - case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: return dims == 3; case GL_PROXY_TEXTURE_2D_MULTISAMPLE_ARRAY: return dims == 3 && !dsa; - default: return GL_FALSE; } } -void +static void _mesa_texture_image_multisample(struct gl_context *ctx, GLuint dims, struct gl_texture_object *texObj, GLenum target, GLsizei samples, @@ -5581,8 +5610,8 @@ _mesa_texture_image_multisample(struct gl_context *ctx, GLuint dims, GLenum sample_count_error; bool dsa = strstr(func, "ture") ? true : false; - if (!(ctx->Extensions.ARB_texture_multisample - && _mesa_is_desktop_gl(ctx))) { + if (!((ctx->Extensions.ARB_texture_multisample + && _mesa_is_desktop_gl(ctx))) && !_mesa_is_gles31(ctx)) { _mesa_error(ctx, GL_INVALID_OPERATION, "%s(unsupported)", func); return; } @@ -5605,14 +5634,21 @@ _mesa_texture_image_multisample(struct gl_context *ctx, GLuint dims, if (immutable && !_mesa_is_legal_tex_storage_format(ctx, internalformat)) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(internalformat=%s not legal for immutable-format)", - func, _mesa_lookup_enum_by_nr(internalformat)); + func, _mesa_enum_to_string(internalformat)); return; } if (!is_renderable_texture_format(ctx, internalformat)) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "%s(internalformat=%s)", - func, _mesa_lookup_enum_by_nr(internalformat)); + /* Page 172 of OpenGL ES 3.1 spec says: + * "An INVALID_ENUM error is generated if sizedinternalformat is not + * color-renderable, depth-renderable, or stencil-renderable (as + * defined in section 9.4). + * + * (Same error is also defined for desktop OpenGL for multisample + * teximage/texstorage functions.) + */ + _mesa_error(ctx, GL_INVALID_ENUM, "%s(internalformat=%s)", func, + _mesa_enum_to_string(internalformat)); return; } @@ -5671,13 +5707,12 @@ _mesa_texture_image_multisample(struct gl_context *ctx, GLuint dims, else { if (!dimensionsOK) { _mesa_error(ctx, GL_INVALID_VALUE, - "%s(invalid width or height)", func); + "%s(invalid width or height)", func); return; } if (!sizeOK) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, - "%s(texture too large)", func); + _mesa_error(ctx, GL_OUT_OF_MEMORY, "%s(texture too large)", func); return; } @@ -5695,7 +5730,7 @@ _mesa_texture_image_multisample(struct gl_context *ctx, GLuint dims, if (width > 0 && height > 0 && depth > 0) { if (!ctx->Driver.AllocTextureStorage(ctx, texObj, 1, - width, height, depth)) { + width, height, depth)) { /* tidy up the texture image state. strictly speaking, * we're allowed to just leave this in whatever state we * like, but being tidy is good. diff --git a/src/mesa/main/teximage.h b/src/mesa/main/teximage.h index 1eebaa8b631..bf729daf534 100644 --- a/src/mesa/main/teximage.h +++ b/src/mesa/main/teximage.h @@ -200,15 +200,6 @@ _mesa_copy_texture_sub_image(struct gl_context *ctx, GLuint dims, const char *caller); extern void -_mesa_texture_image_multisample(struct gl_context *ctx, GLuint dims, - struct gl_texture_object *texObj, - GLenum target, GLsizei samples, - GLint internalformat, GLsizei width, - GLsizei height, GLsizei depth, - GLboolean fixedsamplelocations, - GLboolean immutable, const char *func); - -extern void _mesa_texture_buffer_range(struct gl_context *ctx, struct gl_texture_object *texObj, GLenum internalFormat, diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c index c563f1e7434..cd7cfd6a4fb 100644 --- a/src/mesa/main/texobj.c +++ b/src/mesa/main/texobj.c @@ -1255,7 +1255,7 @@ create_textures(struct gl_context *ctx, GLenum target, if (targetIndex < 0) { /* Bad Target */ mtx_unlock(&ctx->Shared->Mutex); _mesa_error(ctx, GL_INVALID_ENUM, "gl%sTextures(target = %s)", - func, _mesa_lookup_enum_by_nr(texObj->Target)); + func, _mesa_enum_to_string(texObj->Target)); return; } assert(targetIndex < NUM_TEXTURE_TARGETS); @@ -1606,8 +1606,8 @@ _mesa_tex_target_to_index(const struct gl_context *ctx, GLenum target) return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_cube_map_array ? TEXTURE_CUBE_ARRAY_INDEX : -1; case GL_TEXTURE_2D_MULTISAMPLE: - return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample - ? TEXTURE_2D_MULTISAMPLE_INDEX: -1; + return ((_mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample) || + _mesa_is_gles31(ctx)) ? TEXTURE_2D_MULTISAMPLE_INDEX: -1; case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX: -1; @@ -1642,7 +1642,7 @@ _mesa_BindTexture( GLenum target, GLuint texName ) if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE)) _mesa_debug(ctx, "glBindTexture %s %d\n", - _mesa_lookup_enum_by_nr(target), (GLint) texName); + _mesa_enum_to_string(target), (GLint) texName); targetIndex = _mesa_tex_target_to_index(ctx, target); if (targetIndex < 0) { @@ -1806,7 +1806,7 @@ _mesa_BindTextureUnit(GLuint unit, GLuint texture) if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE)) _mesa_debug(ctx, "glBindTextureUnit %s %d\n", - _mesa_lookup_enum_by_nr(GL_TEXTURE0+unit), (GLint) texture); + _mesa_enum_to_string(GL_TEXTURE0+unit), (GLint) texture); /* Section 8.1 (Texture Objects) of the OpenGL 4.5 core profile spec * (20141030) says: diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index d74134f41b1..c0611c3e489 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -381,7 +381,7 @@ set_tex_parameteri(struct gl_context *ctx, if (texObj->Target == GL_TEXTURE_RECTANGLE_ARB && params[0] != 0) { _mesa_error(ctx, GL_INVALID_OPERATION, "glTex%sParameter(target=%s, param=%d)", suffix, - _mesa_lookup_enum_by_nr(texObj->Target), params[0]); + _mesa_enum_to_string(texObj->Target), params[0]); return GL_FALSE; } incomplete(ctx, texObj); @@ -500,7 +500,9 @@ set_tex_parameteri(struct gl_context *ctx, goto invalid_pname; case GL_DEPTH_STENCIL_TEXTURE_MODE: - if (_mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_stencil_texturing) { + if ((_mesa_is_desktop_gl(ctx) && + ctx->Extensions.ARB_stencil_texturing) || + _mesa_is_gles31(ctx)) { bool stencil = params[0] == GL_STENCIL_INDEX; if (!stencil && params[0] != GL_DEPTH_COMPONENT) goto invalid_param; @@ -610,22 +612,22 @@ set_tex_parameteri(struct gl_context *ctx, invalid_pname: _mesa_error(ctx, GL_INVALID_ENUM, "glTex%sParameter(pname=%s)", - suffix, _mesa_lookup_enum_by_nr(pname)); + suffix, _mesa_enum_to_string(pname)); return GL_FALSE; invalid_param: _mesa_error(ctx, GL_INVALID_ENUM, "glTex%sParameter(param=%s)", - suffix, _mesa_lookup_enum_by_nr(params[0])); + suffix, _mesa_enum_to_string(params[0])); return GL_FALSE; invalid_operation: _mesa_error(ctx, GL_INVALID_OPERATION, "glTex%sParameter(pname=%s)", - suffix, _mesa_lookup_enum_by_nr(pname)); + suffix, _mesa_enum_to_string(pname)); return GL_FALSE; invalid_enum: _mesa_error(ctx, GL_INVALID_ENUM, "glTex%sParameter(pname=%s)", - suffix, _mesa_lookup_enum_by_nr(pname)); + suffix, _mesa_enum_to_string(pname)); return GL_FALSE; } @@ -683,7 +685,7 @@ set_tex_parameterf(struct gl_context *ctx, if (texObj->Sampler.MaxAnisotropy == params[0]) return GL_FALSE; - if (params[0] < 1.0) { + if (params[0] < 1.0F) { _mesa_error(ctx, GL_INVALID_VALUE, "glTex%sParameter(param)", suffix); return GL_FALSE; @@ -745,12 +747,12 @@ set_tex_parameterf(struct gl_context *ctx, invalid_pname: _mesa_error(ctx, GL_INVALID_ENUM, "glTex%sParameter(pname=%s)", - suffix, _mesa_lookup_enum_by_nr(pname)); + suffix, _mesa_enum_to_string(pname)); return GL_FALSE; invalid_enum: _mesa_error(ctx, GL_INVALID_ENUM, "glTex%sParameter(pname=%s)", - suffix, _mesa_lookup_enum_by_nr(pname)); + suffix, _mesa_enum_to_string(pname)); return GL_FALSE; } @@ -1395,7 +1397,7 @@ get_tex_level_parameter_image(struct gl_context *ctx, else { _mesa_error(ctx, GL_INVALID_OPERATION, "glGetTex%sLevelParameter[if]v(pname=%s)", suffix, - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); } break; case GL_TEXTURE_COMPRESSED: @@ -1444,7 +1446,7 @@ get_tex_level_parameter_image(struct gl_context *ctx, invalid_pname: _mesa_error(ctx, GL_INVALID_ENUM, "glGetTex%sLevelParameter[if]v(pname=%s)", suffix, - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); } @@ -1528,7 +1530,7 @@ get_tex_level_parameter_buffer(struct gl_context *ctx, /* Always illegal for GL_TEXTURE_BUFFER */ _mesa_error(ctx, GL_INVALID_OPERATION, "glGetTex%sLevelParameter[if]v(pname=%s)", suffix, - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); break; /* GL_ARB_texture_float */ @@ -1557,7 +1559,7 @@ get_tex_level_parameter_buffer(struct gl_context *ctx, invalid_pname: _mesa_error(ctx, GL_INVALID_ENUM, "glGetTex%sLevelParameter[if]v(pname=%s)", suffix, - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); } @@ -1586,7 +1588,7 @@ get_tex_level_parameteriv(struct gl_context *ctx, if (!legal_get_tex_level_parameter_target(ctx, target, dsa)) { _mesa_error(ctx, GL_INVALID_ENUM, "glGetTex%sLevelParameter[if]v(target=%s)", suffix, - _mesa_lookup_enum_by_nr(target)); + _mesa_enum_to_string(target)); return; } diff --git a/src/mesa/main/texstate.c b/src/mesa/main/texstate.c index 1af9d47f030..9b5928c4306 100644 --- a/src/mesa/main/texstate.c +++ b/src/mesa/main/texstate.c @@ -123,21 +123,21 @@ _mesa_print_texunit_state( struct gl_context *ctx, GLuint unit ) { const struct gl_texture_unit *texUnit = ctx->Texture.Unit + unit; printf("Texture Unit %d\n", unit); - printf(" GL_TEXTURE_ENV_MODE = %s\n", _mesa_lookup_enum_by_nr(texUnit->EnvMode)); - printf(" GL_COMBINE_RGB = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.ModeRGB)); - printf(" GL_COMBINE_ALPHA = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.ModeA)); - printf(" GL_SOURCE0_RGB = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.SourceRGB[0])); - printf(" GL_SOURCE1_RGB = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.SourceRGB[1])); - printf(" GL_SOURCE2_RGB = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.SourceRGB[2])); - printf(" GL_SOURCE0_ALPHA = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.SourceA[0])); - printf(" GL_SOURCE1_ALPHA = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.SourceA[1])); - printf(" GL_SOURCE2_ALPHA = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.SourceA[2])); - printf(" GL_OPERAND0_RGB = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.OperandRGB[0])); - printf(" GL_OPERAND1_RGB = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.OperandRGB[1])); - printf(" GL_OPERAND2_RGB = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.OperandRGB[2])); - printf(" GL_OPERAND0_ALPHA = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.OperandA[0])); - printf(" GL_OPERAND1_ALPHA = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.OperandA[1])); - printf(" GL_OPERAND2_ALPHA = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.OperandA[2])); + printf(" GL_TEXTURE_ENV_MODE = %s\n", _mesa_enum_to_string(texUnit->EnvMode)); + printf(" GL_COMBINE_RGB = %s\n", _mesa_enum_to_string(texUnit->Combine.ModeRGB)); + printf(" GL_COMBINE_ALPHA = %s\n", _mesa_enum_to_string(texUnit->Combine.ModeA)); + printf(" GL_SOURCE0_RGB = %s\n", _mesa_enum_to_string(texUnit->Combine.SourceRGB[0])); + printf(" GL_SOURCE1_RGB = %s\n", _mesa_enum_to_string(texUnit->Combine.SourceRGB[1])); + printf(" GL_SOURCE2_RGB = %s\n", _mesa_enum_to_string(texUnit->Combine.SourceRGB[2])); + printf(" GL_SOURCE0_ALPHA = %s\n", _mesa_enum_to_string(texUnit->Combine.SourceA[0])); + printf(" GL_SOURCE1_ALPHA = %s\n", _mesa_enum_to_string(texUnit->Combine.SourceA[1])); + printf(" GL_SOURCE2_ALPHA = %s\n", _mesa_enum_to_string(texUnit->Combine.SourceA[2])); + printf(" GL_OPERAND0_RGB = %s\n", _mesa_enum_to_string(texUnit->Combine.OperandRGB[0])); + printf(" GL_OPERAND1_RGB = %s\n", _mesa_enum_to_string(texUnit->Combine.OperandRGB[1])); + printf(" GL_OPERAND2_RGB = %s\n", _mesa_enum_to_string(texUnit->Combine.OperandRGB[2])); + printf(" GL_OPERAND0_ALPHA = %s\n", _mesa_enum_to_string(texUnit->Combine.OperandA[0])); + printf(" GL_OPERAND1_ALPHA = %s\n", _mesa_enum_to_string(texUnit->Combine.OperandA[1])); + printf(" GL_OPERAND2_ALPHA = %s\n", _mesa_enum_to_string(texUnit->Combine.OperandA[2])); printf(" GL_RGB_SCALE = %d\n", 1 << texUnit->Combine.ScaleShiftRGB); printf(" GL_ALPHA_SCALE = %d\n", 1 << texUnit->Combine.ScaleShiftA); printf(" GL_TEXTURE_ENV_COLOR = (%f, %f, %f, %f)\n", texUnit->EnvColor[0], texUnit->EnvColor[1], texUnit->EnvColor[2], texUnit->EnvColor[3]); @@ -289,23 +289,23 @@ _mesa_ActiveTexture(GLenum texture) GLuint k; GET_CURRENT_CONTEXT(ctx); + if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE)) + _mesa_debug(ctx, "glActiveTexture %s\n", + _mesa_enum_to_string(texture)); + + if (ctx->Texture.CurrentUnit == texUnit) + return; + k = _mesa_max_tex_unit(ctx); assert(k <= ARRAY_SIZE(ctx->Texture.Unit)); - if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE)) - _mesa_debug(ctx, "glActiveTexture %s\n", - _mesa_lookup_enum_by_nr(texture)); - if (texUnit >= k) { _mesa_error(ctx, GL_INVALID_ENUM, "glActiveTexture(texture=%s)", - _mesa_lookup_enum_by_nr(texture)); + _mesa_enum_to_string(texture)); return; } - if (ctx->Texture.CurrentUnit == texUnit) - return; - FLUSH_VERTICES(ctx, _NEW_TEXTURE); ctx->Texture.CurrentUnit = texUnit; @@ -325,16 +325,16 @@ _mesa_ClientActiveTexture(GLenum texture) if (MESA_VERBOSE & (VERBOSE_API | VERBOSE_TEXTURE)) _mesa_debug(ctx, "glClientActiveTexture %s\n", - _mesa_lookup_enum_by_nr(texture)); + _mesa_enum_to_string(texture)); + + if (ctx->Array.ActiveTexture == texUnit) + return; if (texUnit >= ctx->Const.MaxTextureCoordUnits) { _mesa_error(ctx, GL_INVALID_ENUM, "glClientActiveTexture(texture)"); return; } - if (ctx->Array.ActiveTexture == texUnit) - return; - FLUSH_VERTICES(ctx, _NEW_ARRAY); ctx->Array.ActiveTexture = texUnit; } diff --git a/src/mesa/main/texstate.h b/src/mesa/main/texstate.h index 662435b47cc..bee8c9c3316 100644 --- a/src/mesa/main/texstate.h +++ b/src/mesa/main/texstate.h @@ -77,7 +77,7 @@ _mesa_get_tex_unit_err(struct gl_context *ctx, GLuint unit, const char *func) * implementation." */ _mesa_error(ctx, GL_INVALID_OPERATION, "%s(unit=%s)", func, - _mesa_lookup_enum_by_nr(GL_TEXTURE0+unit)); + _mesa_enum_to_string(GL_TEXTURE0+unit)); return NULL; } diff --git a/src/mesa/main/texstorage.c b/src/mesa/main/texstorage.c index 53cb2c091f8..4a2cc6065df 100644 --- a/src/mesa/main/texstorage.c +++ b/src/mesa/main/texstorage.c @@ -308,7 +308,8 @@ tex_storage_error_check(struct gl_context *ctx, _mesa_error(ctx, _mesa_is_desktop_gl(ctx)? GL_INVALID_ENUM : GL_INVALID_OPERATION, "glTex%sStorage%dD(internalformat = %s)", suffix, dims, - _mesa_lookup_enum_by_nr(internalformat)); + _mesa_enum_to_string(internalformat)); + return GL_TRUE; } /* levels check */ @@ -464,21 +465,21 @@ texstorage(GLuint dims, GLenum target, GLsizei levels, GLenum internalformat, if (!legal_texobj_target(ctx, dims, target)) { _mesa_error(ctx, GL_INVALID_ENUM, "glTexStorage%uD(illegal target=%s)", - dims, _mesa_lookup_enum_by_nr(target)); + dims, _mesa_enum_to_string(target)); return; } if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE)) _mesa_debug(ctx, "glTexStorage%uD %s %d %s %d %d %d\n", dims, - _mesa_lookup_enum_by_nr(target), levels, - _mesa_lookup_enum_by_nr(internalformat), + _mesa_enum_to_string(target), levels, + _mesa_enum_to_string(internalformat), width, height, depth); /* Check the format to make sure it is sized. */ if (!_mesa_is_legal_tex_storage_format(ctx, internalformat)) { _mesa_error(ctx, GL_INVALID_ENUM, "glTexStorage%uD(internalformat = %s)", dims, - _mesa_lookup_enum_by_nr(internalformat)); + _mesa_enum_to_string(internalformat)); return; } @@ -504,14 +505,14 @@ texturestorage(GLuint dims, GLuint texture, GLsizei levels, if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE)) _mesa_debug(ctx, "glTextureStorage%uD %d %d %s %d %d %d\n", dims, texture, levels, - _mesa_lookup_enum_by_nr(internalformat), + _mesa_enum_to_string(internalformat), width, height, depth); /* Check the format to make sure it is sized. */ if (!_mesa_is_legal_tex_storage_format(ctx, internalformat)) { _mesa_error(ctx, GL_INVALID_ENUM, "glTextureStorage%uD(internalformat = %s)", dims, - _mesa_lookup_enum_by_nr(internalformat)); + _mesa_enum_to_string(internalformat)); return; } @@ -529,7 +530,7 @@ texturestorage(GLuint dims, GLuint texture, GLsizei levels, if (!legal_texobj_target(ctx, dims, texObj->Target)) { _mesa_error(ctx, GL_INVALID_ENUM, "glTextureStorage%uD(illegal target=%s)", - dims, _mesa_lookup_enum_by_nr(texObj->Target)); + dims, _mesa_enum_to_string(texObj->Target)); return; } diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index 1525205981b..37c05690091 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -787,6 +787,7 @@ texstore_rgba(TEXSTORE_PARAMS) srcType = GL_FLOAT; srcRowStride = srcWidth * 4 * sizeof(float); srcMesaFormat = RGBA32_FLOAT; + srcPacking = &ctx->DefaultPacking; } src = (GLubyte *) diff --git a/src/mesa/main/textureview.c b/src/mesa/main/textureview.c index 6b0aed4ea1a..5a3282a40c1 100644 --- a/src/mesa/main/textureview.c +++ b/src/mesa/main/textureview.c @@ -313,7 +313,7 @@ target_valid(struct gl_context *ctx, GLenum origTarget, GLenum newTarget) } _mesa_error(ctx, GL_INVALID_OPERATION, "glTextureView(illegal target=%s)", - _mesa_lookup_enum_by_nr(newTarget)); + _mesa_enum_to_string(newTarget)); return false; } #undef RETURN_IF_SUPPORTED @@ -435,8 +435,8 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, if (MESA_VERBOSE & (VERBOSE_API | VERBOSE_TEXTURE)) _mesa_debug(ctx, "glTextureView %d %s %d %s %d %d %d %d\n", - texture, _mesa_lookup_enum_by_nr(target), origtexture, - _mesa_lookup_enum_by_nr(internalformat), + texture, _mesa_enum_to_string(target), origtexture, + _mesa_enum_to_string(internalformat), minlevel, numlevels, minlayer, numlayers); if (origtexture == 0) { @@ -523,8 +523,8 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, internalformat)) { _mesa_error(ctx, GL_INVALID_OPERATION, "glTextureView(internalformat %s not compatible with origtexture %s)", - _mesa_lookup_enum_by_nr(internalformat), - _mesa_lookup_enum_by_nr(origTexObj->Image[0][0]->InternalFormat)); + _mesa_enum_to_string(internalformat), + _mesa_enum_to_string(origTexObj->Image[0][0]->InternalFormat)); return; } diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp index cab5083e81b..036530e91b6 100644 --- a/src/mesa/main/uniform_query.cpp +++ b/src/mesa/main/uniform_query.cpp @@ -978,81 +978,6 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg, } -/** - * Called via glGetUniformLocation(). - * - * Returns the uniform index into UniformStorage (also the - * glGetActiveUniformsiv uniform index), and stores the referenced - * array offset in *offset, or GL_INVALID_INDEX (-1). - */ -extern "C" unsigned -_mesa_get_uniform_location(struct gl_shader_program *shProg, - const GLchar *name, - unsigned *out_offset) -{ - /* Page 80 (page 94 of the PDF) of the OpenGL 2.1 spec says: - * - * "The first element of a uniform array is identified using the - * name of the uniform array appended with "[0]". Except if the last - * part of the string name indicates a uniform array, then the - * location of the first element of that array can be retrieved by - * either using the name of the uniform array, or the name of the - * uniform array appended with "[0]"." - * - * Note: since uniform names are not allowed to use whitespace, and array - * indices within uniform names are not allowed to use "+", "-", or leading - * zeros, it follows that each uniform has a unique name up to the possible - * ambiguity with "[0]" noted above. Therefore we don't need to worry - * about mal-formed inputs--they will properly fail when we try to look up - * the uniform name in shProg->UniformHash. - */ - - const GLchar *base_name_end; - long offset = parse_program_resource_name(name, &base_name_end); - bool array_lookup = offset >= 0; - char *name_copy; - - if (array_lookup) { - name_copy = (char *) malloc(base_name_end - name + 1); - memcpy(name_copy, name, base_name_end - name); - name_copy[base_name_end - name] = '\0'; - } else { - name_copy = (char *) name; - offset = 0; - } - - unsigned location = 0; - const bool found = shProg->UniformHash->get(location, name_copy); - - assert(!found - || strcmp(name_copy, shProg->UniformStorage[location].name) == 0); - - /* Free the temporary buffer *before* possibly returning an error. - */ - if (name_copy != name) - free(name_copy); - - if (!found) - return GL_INVALID_INDEX; - - /* If the uniform is built-in, fail. */ - if (shProg->UniformStorage[location].builtin) - return GL_INVALID_INDEX; - - /* If the uniform is an array, fail if the index is out of bounds. - * (A negative index is caught above.) This also fails if the uniform - * is not an array, but the user is trying to index it, because - * array_elements is zero and offset >= 0. - */ - if (array_lookup - && offset >= (long) shProg->UniformStorage[location].array_elements) { - return GL_INVALID_INDEX; - } - - *out_offset = offset; - return location; -} - extern "C" bool _mesa_sampler_uniforms_are_valid(const struct gl_shader_program *shProg, char *errMsg, size_t errMsgLength) @@ -1101,18 +1026,23 @@ _mesa_sampler_uniforms_pipeline_are_valid(struct gl_pipeline_object *pipeline) for (unsigned i = 0; i < shProg[idx]->NumUniformStorage; i++) { const struct gl_uniform_storage *const storage = &shProg[idx]->UniformStorage[i]; - const glsl_type *const t = (storage->type->is_array()) - ? storage->type->fields.array : storage->type; - if (!t->is_sampler()) + if (!storage->type->is_sampler()) continue; active_samplers++; - const unsigned count = MAX2(1, storage->type->array_size()); + const unsigned count = MAX2(1, storage->array_elements); for (unsigned j = 0; j < count; j++) { const unsigned unit = storage->storage[j].i; + /* FIXME: Samplers are initialized to 0 and Mesa doesn't do a + * great job of eliminating unused uniforms currently so for now + * don't throw an error if two sampler types both point to 0. + */ + if (unit == 0) + continue; + /* The types of the samplers associated with a particular texture * unit must be an exact match. Page 74 (page 89 of the PDF) of * the OpenGL 3.3 core spec says: @@ -1122,13 +1052,14 @@ _mesa_sampler_uniforms_pipeline_are_valid(struct gl_pipeline_object *pipeline) * program object." */ if (unit_types[unit] == NULL) { - unit_types[unit] = t; - } else if (unit_types[unit] != t) { + unit_types[unit] = storage->type; + } else if (unit_types[unit] != storage->type) { pipeline->InfoLog = ralloc_asprintf(pipeline, "Texture unit %d is accessed both as %s " "and %s", - unit, unit_types[unit]->name, t->name); + unit, unit_types[unit]->name, + storage->type->name); return false; } } diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c index 5548d1d026f..ff1df72e1d6 100644 --- a/src/mesa/main/uniforms.c +++ b/src/mesa/main/uniforms.c @@ -952,7 +952,7 @@ _mesa_GetUniformBlockIndex(GLuint program, struct gl_program_resource *res = _mesa_program_resource_find_name(shProg, GL_UNIFORM_BLOCK, - uniformBlockName); + uniformBlockName, NULL); if (!res) return GL_INVALID_INDEX; @@ -987,7 +987,8 @@ _mesa_GetUniformIndices(GLuint program, for (i = 0; i < uniformCount; i++) { struct gl_program_resource *res = - _mesa_program_resource_find_name(shProg, GL_UNIFORM, uniformNames[i]); + _mesa_program_resource_find_name(shProg, GL_UNIFORM, uniformNames[i], + NULL); uniformIndices[i] = _mesa_program_resource_index(shProg, res); } } @@ -1092,6 +1093,21 @@ mesa_bufferiv(struct gl_shader_program *shProg, GLenum type, GL_REFERENCED_BY_VERTEX_SHADER, params, caller); return; + + case GL_UNIFORM_BLOCK_REFERENCED_BY_TESS_CONTROL_SHADER: + case GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_CONTROL_SHADER: + _mesa_program_resource_prop(shProg, res, index, + GL_REFERENCED_BY_TESS_CONTROL_SHADER, params, + caller); + return; + + case GL_UNIFORM_BLOCK_REFERENCED_BY_TESS_EVALUATION_SHADER: + case GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_EVALUATION_SHADER: + _mesa_program_resource_prop(shProg, res, index, + GL_REFERENCED_BY_TESS_EVALUATION_SHADER, params, + caller); + return; + case GL_UNIFORM_BLOCK_REFERENCED_BY_GEOMETRY_SHADER: case GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_GEOMETRY_SHADER: _mesa_program_resource_prop(shProg, res, index, @@ -1104,16 +1120,10 @@ mesa_bufferiv(struct gl_shader_program *shProg, GLenum type, GL_REFERENCED_BY_FRAGMENT_SHADER, params, caller); return; - case GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_CONTROL_SHADER: - params[0] = GL_FALSE; - return; - case GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_EVALUATION_SHADER: - params[0] = GL_FALSE; - return; default: _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname 0x%x (%s))", caller, pname, - _mesa_lookup_enum_by_nr(pname)); + _mesa_enum_to_string(pname)); return; } } diff --git a/src/mesa/main/uniforms.h b/src/mesa/main/uniforms.h index bd7b05e207a..e62eaa53ccc 100644 --- a/src/mesa/main/uniforms.h +++ b/src/mesa/main/uniforms.h @@ -343,10 +343,6 @@ void GLAPIENTRY _mesa_ProgramUniformMatrix4x3dv(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble *value); -unsigned -_mesa_get_uniform_location(struct gl_shader_program *shProg, - const GLchar *name, unsigned *offset); - void _mesa_uniform(struct gl_context *ctx, struct gl_shader_program *shader_program, GLint location, GLsizei count, diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c index ebdd9eaf02e..3bab9850588 100644 --- a/src/mesa/main/varray.c +++ b/src/mesa/main/varray.c @@ -300,7 +300,7 @@ update_array_format(struct gl_context *ctx, typeBit = type_to_bit(ctx, type); if (typeBit == 0x0 || (typeBit & legalTypesMask) == 0x0) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(type = %s)", - func, _mesa_lookup_enum_by_nr(type)); + func, _mesa_enum_to_string(type)); return false; } @@ -333,7 +333,7 @@ update_array_format(struct gl_context *ctx, if (bgra_error) { _mesa_error(ctx, GL_INVALID_OPERATION, "%s(size=GL_BGRA and type=%s)", - func, _mesa_lookup_enum_by_nr(type)); + func, _mesa_enum_to_string(type)); return false; } @@ -2310,7 +2310,7 @@ print_array(const char *name, GLint index, const struct gl_client_array *array) else fprintf(stderr, " %s: ", name); fprintf(stderr, "Ptr=%p, Type=%s, Size=%d, ElemSize=%u, Stride=%d, Buffer=%u(Size %lu)\n", - array->Ptr, _mesa_lookup_enum_by_nr(array->Type), array->Size, + array->Ptr, _mesa_enum_to_string(array->Type), array->Size, array->_ElementSize, array->StrideB, array->BufferObj->Name, (unsigned long) array->BufferObj->Size); } diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c index 8bc00ace5c4..fd7ae53ccbd 100644 --- a/src/mesa/main/version.c +++ b/src/mesa/main/version.c @@ -309,7 +309,7 @@ compute_version(const struct gl_extensions *extensions, extensions->ARB_gpu_shader5 && extensions->ARB_gpu_shader_fp64 && extensions->ARB_sample_shading && - false /*extensions->ARB_shader_subroutine*/ && + extensions->ARB_shader_subroutine && extensions->ARB_tessellation_shader && extensions->ARB_texture_buffer_object_rgb32 && extensions->ARB_texture_cube_map_array && diff --git a/src/mesa/main/viewport.c b/src/mesa/main/viewport.c index b27063031c4..7d8914291c3 100644 --- a/src/mesa/main/viewport.c +++ b/src/mesa/main/viewport.c @@ -391,8 +391,8 @@ _mesa_ClipControl(GLenum origin, GLenum depth) if (MESA_VERBOSE&VERBOSE_API) _mesa_debug(ctx, "glClipControl(%s, %s)\n", - _mesa_lookup_enum_by_nr(origin), - _mesa_lookup_enum_by_nr(depth)); + _mesa_enum_to_string(origin), + _mesa_enum_to_string(depth)); ASSERT_OUTSIDE_BEGIN_END(ctx); @@ -443,12 +443,12 @@ _mesa_ClipControl(GLenum origin, GLenum depth) */ void _mesa_get_viewport_xform(struct gl_context *ctx, unsigned i, - double scale[3], double translate[3]) + float scale[3], float translate[3]) { - double x = ctx->ViewportArray[i].X; - double y = ctx->ViewportArray[i].Y; - double half_width = 0.5*ctx->ViewportArray[i].Width; - double half_height = 0.5*ctx->ViewportArray[i].Height; + float x = ctx->ViewportArray[i].X; + float y = ctx->ViewportArray[i].Y; + float half_width = 0.5f * ctx->ViewportArray[i].Width; + float half_height = 0.5f * ctx->ViewportArray[i].Height; double n = ctx->ViewportArray[i].Near; double f = ctx->ViewportArray[i].Far; @@ -462,8 +462,8 @@ _mesa_get_viewport_xform(struct gl_context *ctx, unsigned i, translate[1] = half_height + y; } if (ctx->Transform.ClipDepthMode == GL_NEGATIVE_ONE_TO_ONE) { - scale[2] = 0.5*(f - n); - translate[2] = 0.5*(n + f); + scale[2] = 0.5 * (f - n); + translate[2] = 0.5 * (n + f); } else { scale[2] = f - n; translate[2] = n; diff --git a/src/mesa/main/viewport.h b/src/mesa/main/viewport.h index 899dc2d0bcc..b0675db1096 100644 --- a/src/mesa/main/viewport.h +++ b/src/mesa/main/viewport.h @@ -73,6 +73,6 @@ _mesa_ClipControl(GLenum origin, GLenum depth); extern void _mesa_get_viewport_xform(struct gl_context *ctx, unsigned i, - double scale[3], double translate[3]); + float scale[3], float translate[3]); #endif diff --git a/src/mesa/math/m_clip_tmp.h b/src/mesa/math/m_clip_tmp.h index e289be7b302..60c00043725 100644 --- a/src/mesa/math/m_clip_tmp.h +++ b/src/mesa/math/m_clip_tmp.h @@ -194,13 +194,13 @@ static GLvector4f * TAG(cliptest_points3)( GLvector4f *clip_vec, STRIDE_LOOP { const GLfloat cx = from[0], cy = from[1], cz = from[2]; GLubyte mask = 0; - if (cx > 1.0) mask |= CLIP_RIGHT_BIT; - else if (cx < -1.0) mask |= CLIP_LEFT_BIT; - if (cy > 1.0) mask |= CLIP_TOP_BIT; - else if (cy < -1.0) mask |= CLIP_BOTTOM_BIT; + if (cx > 1.0F) mask |= CLIP_RIGHT_BIT; + else if (cx < -1.0F) mask |= CLIP_LEFT_BIT; + if (cy > 1.0F) mask |= CLIP_TOP_BIT; + else if (cy < -1.0F) mask |= CLIP_BOTTOM_BIT; if (viewport_z_clip) { - if (cz > 1.0) mask |= CLIP_FAR_BIT; - else if (cz < -1.0) mask |= CLIP_NEAR_BIT; + if (cz > 1.0F) mask |= CLIP_FAR_BIT; + else if (cz < -1.0F) mask |= CLIP_NEAR_BIT; } clipMask[i] = mask; tmpOrMask |= mask; @@ -230,10 +230,10 @@ static GLvector4f * TAG(cliptest_points2)( GLvector4f *clip_vec, STRIDE_LOOP { const GLfloat cx = from[0], cy = from[1]; GLubyte mask = 0; - if (cx > 1.0) mask |= CLIP_RIGHT_BIT; - else if (cx < -1.0) mask |= CLIP_LEFT_BIT; - if (cy > 1.0) mask |= CLIP_TOP_BIT; - else if (cy < -1.0) mask |= CLIP_BOTTOM_BIT; + if (cx > 1.0F) mask |= CLIP_RIGHT_BIT; + else if (cx < -1.0F) mask |= CLIP_LEFT_BIT; + if (cy > 1.0F) mask |= CLIP_TOP_BIT; + else if (cy < -1.0F) mask |= CLIP_BOTTOM_BIT; clipMask[i] = mask; tmpOrMask |= mask; tmpAndMask &= mask; diff --git a/src/mesa/math/m_matrix.c b/src/mesa/math/m_matrix.c index ecf564c0089..6522200b345 100644 --- a/src/mesa/math/m_matrix.c +++ b/src/mesa/math/m_matrix.c @@ -380,7 +380,7 @@ static GLboolean invert_matrix_general( GLmatrix *mat ) if (fabsf(r3[0])>fabsf(r2[0])) SWAP_ROWS(r3, r2); if (fabsf(r2[0])>fabsf(r1[0])) SWAP_ROWS(r2, r1); if (fabsf(r1[0])>fabsf(r0[0])) SWAP_ROWS(r1, r0); - if (0.0 == r0[0]) return GL_FALSE; + if (0.0F == r0[0]) return GL_FALSE; /* eliminate first variable */ m1 = r1[0]/r0[0]; m2 = r2[0]/r0[0]; m3 = r3[0]/r0[0]; @@ -388,31 +388,31 @@ static GLboolean invert_matrix_general( GLmatrix *mat ) s = r0[2]; r1[2] -= m1 * s; r2[2] -= m2 * s; r3[2] -= m3 * s; s = r0[3]; r1[3] -= m1 * s; r2[3] -= m2 * s; r3[3] -= m3 * s; s = r0[4]; - if (s != 0.0) { r1[4] -= m1 * s; r2[4] -= m2 * s; r3[4] -= m3 * s; } + if (s != 0.0F) { r1[4] -= m1 * s; r2[4] -= m2 * s; r3[4] -= m3 * s; } s = r0[5]; - if (s != 0.0) { r1[5] -= m1 * s; r2[5] -= m2 * s; r3[5] -= m3 * s; } + if (s != 0.0F) { r1[5] -= m1 * s; r2[5] -= m2 * s; r3[5] -= m3 * s; } s = r0[6]; - if (s != 0.0) { r1[6] -= m1 * s; r2[6] -= m2 * s; r3[6] -= m3 * s; } + if (s != 0.0F) { r1[6] -= m1 * s; r2[6] -= m2 * s; r3[6] -= m3 * s; } s = r0[7]; - if (s != 0.0) { r1[7] -= m1 * s; r2[7] -= m2 * s; r3[7] -= m3 * s; } + if (s != 0.0F) { r1[7] -= m1 * s; r2[7] -= m2 * s; r3[7] -= m3 * s; } /* choose pivot - or die */ if (fabsf(r3[1])>fabsf(r2[1])) SWAP_ROWS(r3, r2); if (fabsf(r2[1])>fabsf(r1[1])) SWAP_ROWS(r2, r1); - if (0.0 == r1[1]) return GL_FALSE; + if (0.0F == r1[1]) return GL_FALSE; /* eliminate second variable */ m2 = r2[1]/r1[1]; m3 = r3[1]/r1[1]; r2[2] -= m2 * r1[2]; r3[2] -= m3 * r1[2]; r2[3] -= m2 * r1[3]; r3[3] -= m3 * r1[3]; - s = r1[4]; if (0.0 != s) { r2[4] -= m2 * s; r3[4] -= m3 * s; } - s = r1[5]; if (0.0 != s) { r2[5] -= m2 * s; r3[5] -= m3 * s; } - s = r1[6]; if (0.0 != s) { r2[6] -= m2 * s; r3[6] -= m3 * s; } - s = r1[7]; if (0.0 != s) { r2[7] -= m2 * s; r3[7] -= m3 * s; } + s = r1[4]; if (0.0F != s) { r2[4] -= m2 * s; r3[4] -= m3 * s; } + s = r1[5]; if (0.0F != s) { r2[5] -= m2 * s; r3[5] -= m3 * s; } + s = r1[6]; if (0.0F != s) { r2[6] -= m2 * s; r3[6] -= m3 * s; } + s = r1[7]; if (0.0F != s) { r2[7] -= m2 * s; r3[7] -= m3 * s; } /* choose pivot - or die */ if (fabsf(r3[2])>fabsf(r2[2])) SWAP_ROWS(r3, r2); - if (0.0 == r2[2]) return GL_FALSE; + if (0.0F == r2[2]) return GL_FALSE; /* eliminate third variable */ m3 = r3[2]/r2[2]; @@ -421,7 +421,7 @@ static GLboolean invert_matrix_general( GLmatrix *mat ) r3[7] -= m3 * r2[7]; /* last check */ - if (0.0 == r3[3]) return GL_FALSE; + if (0.0F == r3[3]) return GL_FALSE; s = 1.0F/r3[3]; /* now back substitute row 3 */ r3[4] *= s; r3[5] *= s; r3[6] *= s; r3[7] *= s; @@ -490,26 +490,26 @@ static GLboolean invert_matrix_3d_general( GLmatrix *mat ) */ pos = neg = 0.0; t = MAT(in,0,0) * MAT(in,1,1) * MAT(in,2,2); - if (t >= 0.0) pos += t; else neg += t; + if (t >= 0.0F) pos += t; else neg += t; t = MAT(in,1,0) * MAT(in,2,1) * MAT(in,0,2); - if (t >= 0.0) pos += t; else neg += t; + if (t >= 0.0F) pos += t; else neg += t; t = MAT(in,2,0) * MAT(in,0,1) * MAT(in,1,2); - if (t >= 0.0) pos += t; else neg += t; + if (t >= 0.0F) pos += t; else neg += t; t = -MAT(in,2,0) * MAT(in,1,1) * MAT(in,0,2); - if (t >= 0.0) pos += t; else neg += t; + if (t >= 0.0F) pos += t; else neg += t; t = -MAT(in,1,0) * MAT(in,0,1) * MAT(in,2,2); - if (t >= 0.0) pos += t; else neg += t; + if (t >= 0.0F) pos += t; else neg += t; t = -MAT(in,0,0) * MAT(in,2,1) * MAT(in,1,2); - if (t >= 0.0) pos += t; else neg += t; + if (t >= 0.0F) pos += t; else neg += t; det = pos + neg; - if (fabsf(det) < 1e-25) + if (fabsf(det) < 1e-25F) return GL_FALSE; det = 1.0F / det; @@ -564,7 +564,7 @@ static GLboolean invert_matrix_3d( GLmatrix *mat ) MAT(in,0,1) * MAT(in,0,1) + MAT(in,0,2) * MAT(in,0,2)); - if (scale == 0.0) + if (scale == 0.0F) return GL_FALSE; scale = 1.0F / scale; @@ -799,8 +799,8 @@ _math_matrix_rotate( GLmatrix *mat, GLfloat m[16]; GLboolean optimized; - s = (GLfloat) sin( angle * M_PI / 180.0 ); - c = (GLfloat) cos( angle * M_PI / 180.0 ); + s = sinf( angle * M_PI / 180.0 ); + c = cosf( angle * M_PI / 180.0 ); memcpy(m, Identity, sizeof(GLfloat)*16); optimized = GL_FALSE; @@ -859,7 +859,7 @@ _math_matrix_rotate( GLmatrix *mat, if (!optimized) { const GLfloat mag = sqrtf(x * x + y * y + z * z); - if (mag <= 1.0e-4) { + if (mag <= 1.0e-4F) { /* no rotation, leave mat as-is */ return; } @@ -1070,7 +1070,7 @@ _math_matrix_scale( GLmatrix *mat, GLfloat x, GLfloat y, GLfloat z ) m[2] *= x; m[6] *= y; m[10] *= z; m[3] *= x; m[7] *= y; m[11] *= z; - if (fabsf(x - y) < 1e-8 && fabsf(x - z) < 1e-8) + if (fabsf(x - y) < 1e-8F && fabsf(x - z) < 1e-8F) mat->flags |= MAT_FLAG_UNIFORM_SCALE; else mat->flags |= MAT_FLAG_GENERAL_SCALE; @@ -1111,8 +1111,8 @@ _math_matrix_translate( GLmatrix *mat, GLfloat x, GLfloat y, GLfloat z ) * Transforms Normalized Device Coords to window/Z values. */ void -_math_matrix_viewport(GLmatrix *m, const double scale[3], - const double translate[3], double depthMax) +_math_matrix_viewport(GLmatrix *m, const float scale[3], + const float translate[3], double depthMax) { m->m[MAT_SX] = scale[0]; m->m[MAT_TX] = translate[0]; @@ -1206,7 +1206,7 @@ static void analyse_from_scratch( GLmatrix *mat ) GLuint i; for (i = 0 ; i < 16 ; i++) { - if (m[i] == 0.0) mask |= (1<<i); + if (m[i] == 0.0F) mask |= (1<<i); } if (m[0] == 1.0F) mask |= (1<<16); @@ -1240,12 +1240,12 @@ static void analyse_from_scratch( GLmatrix *mat ) mat->type = MATRIX_2D; /* Check for scale */ - if (SQ(mm-1) > SQ(1e-6) || - SQ(m4m4-1) > SQ(1e-6)) + if (SQ(mm-1) > SQ(1e-6F) || + SQ(m4m4-1) > SQ(1e-6F)) mat->flags |= MAT_FLAG_GENERAL_SCALE; /* Check for rotation */ - if (SQ(mm4) > SQ(1e-6)) + if (SQ(mm4) > SQ(1e-6F)) mat->flags |= MAT_FLAG_GENERAL_3D; else mat->flags |= MAT_FLAG_ROTATION; @@ -1255,9 +1255,9 @@ static void analyse_from_scratch( GLmatrix *mat ) mat->type = MATRIX_3D_NO_ROT; /* Check for scale */ - if (SQ(m[0]-m[5]) < SQ(1e-6) && - SQ(m[0]-m[10]) < SQ(1e-6)) { - if (SQ(m[0]-1.0) > SQ(1e-6)) { + if (SQ(m[0]-m[5]) < SQ(1e-6F) && + SQ(m[0]-m[10]) < SQ(1e-6F)) { + if (SQ(m[0]-1.0F) > SQ(1e-6F)) { mat->flags |= MAT_FLAG_UNIFORM_SCALE; } } @@ -1275,8 +1275,8 @@ static void analyse_from_scratch( GLmatrix *mat ) mat->type = MATRIX_3D; /* Check for scale */ - if (SQ(c1-c2) < SQ(1e-6) && SQ(c1-c3) < SQ(1e-6)) { - if (SQ(c1-1.0) > SQ(1e-6)) + if (SQ(c1-c2) < SQ(1e-6F) && SQ(c1-c3) < SQ(1e-6F)) { + if (SQ(c1-1.0F) > SQ(1e-6F)) mat->flags |= MAT_FLAG_UNIFORM_SCALE; /* else no scale at all */ } @@ -1285,10 +1285,10 @@ static void analyse_from_scratch( GLmatrix *mat ) } /* Check for rotation */ - if (SQ(d1) < SQ(1e-6)) { + if (SQ(d1) < SQ(1e-6F)) { CROSS3( cp, m, m+4 ); SUB_3V( cp, cp, (m+8) ); - if (LEN_SQUARED_3FV(cp) < SQ(1e-6)) + if (LEN_SQUARED_3FV(cp) < SQ(1e-6F)) mat->flags |= MAT_FLAG_ROTATION; else mat->flags |= MAT_FLAG_GENERAL_3D; diff --git a/src/mesa/math/m_matrix.h b/src/mesa/math/m_matrix.h index 778d716dce7..c34d9e3022f 100644 --- a/src/mesa/math/m_matrix.h +++ b/src/mesa/math/m_matrix.h @@ -122,8 +122,8 @@ _math_matrix_frustum( GLmatrix *mat, GLfloat nearval, GLfloat farval ); extern void -_math_matrix_viewport( GLmatrix *m, const double scale[3], - const double translate[3], double depthMax ); +_math_matrix_viewport( GLmatrix *m, const float scale[3], + const float translate[3], double depthMax ); extern void _math_matrix_set_identity( GLmatrix *dest ); diff --git a/src/mesa/math/m_norm_tmp.h b/src/mesa/math/m_norm_tmp.h index d3ec1c22ecd..6f1db8d0bd0 100644 --- a/src/mesa/math/m_norm_tmp.h +++ b/src/mesa/math/m_norm_tmp.h @@ -80,7 +80,7 @@ TAG(transform_normalize_normals)( const GLmatrix *mat, } } else { - if (scale != 1.0) { + if (scale != 1.0f) { m0 *= scale, m4 *= scale, m8 *= scale; m1 *= scale, m5 *= scale, m9 *= scale; m2 *= scale, m6 *= scale, m10 *= scale; diff --git a/src/mesa/math/m_vector.h b/src/mesa/math/m_vector.h index 8551ee7520e..5bd76b8987d 100644 --- a/src/mesa/math/m_vector.h +++ b/src/mesa/math/m_vector.h @@ -51,7 +51,7 @@ /** * Wrap all the information about vectors up in a struct. Has - * additional fields compared to the other vectors to help us track of + * additional fields compared to the other vectors to help us track * different vertex sizes, and whether we need to clean columns out * because they contain non-(0,0,0,1) values. * @@ -61,7 +61,7 @@ */ typedef struct { GLfloat (*data)[4]; /**< may be malloc'd or point to client data */ - GLfloat *start; /**< points somewhere inside of <data> */ + GLfloat *start; /**< points somewhere inside of GLvector4f::data */ GLuint count; /**< size of the vector (in elements) */ GLuint stride; /**< stride from one element to the next (in bytes) */ GLuint size; /**< 2-4 for vertices and 1-4 for texcoords */ diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 3bffe90ff1f..b8b082e2a59 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -534,6 +534,7 @@ type_size(const struct glsl_type *type) return size; case GLSL_TYPE_SAMPLER: case GLSL_TYPE_IMAGE: + case GLSL_TYPE_SUBROUTINE: /* Samplers take up one slot in UNIFORMS[], but they're baked in * at link time. */ @@ -1343,6 +1344,7 @@ ir_to_mesa_visitor::visit(ir_expression *ir) case ir_unop_dFdx_fine: case ir_unop_dFdy_coarse: case ir_unop_dFdy_fine: + case ir_unop_subroutine_to_int: assert(!"not supported"); break; @@ -2385,7 +2387,7 @@ _mesa_generate_parameters_list_for_uniforms(struct gl_shader_program ir_variable *var = node->as_variable(); if ((var == NULL) || (var->data.mode != ir_var_uniform) - || var->is_in_uniform_block() || (strncmp(var->name, "gl_", 3) == 0)) + || var->is_in_buffer_block() || (strncmp(var->name, "gl_", 3) == 0)) continue; add.process(var); @@ -2452,6 +2454,7 @@ _mesa_associate_uniform_storage(struct gl_context *ctx, break; case GLSL_TYPE_SAMPLER: case GLSL_TYPE_IMAGE: + case GLSL_TYPE_SUBROUTINE: format = uniform_native; columns = 1; break; @@ -2912,7 +2915,7 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) progress = - lower_variable_index_to_cond_assign(ir, + lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir, options->EmitNoIndirectInput, options->EmitNoIndirectOutput, options->EmitNoIndirectTemp, @@ -2977,6 +2980,8 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) if (prog->LinkStatus) { if (!ctx->Driver.LinkShader(ctx, prog)) { prog->LinkStatus = GL_FALSE; + } else { + build_program_resource_list(ctx, prog); } } diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c index 46260b54882..2c52d0db508 100644 --- a/src/mesa/program/prog_execute.c +++ b/src/mesa/program/prog_execute.c @@ -623,7 +623,7 @@ _mesa_execute_program(struct gl_context * ctx, GLfloat a[4], result[4]; fetch_vector1(&inst->SrcReg[0], machine, a); result[0] = result[1] = result[2] = result[3] - = (GLfloat) cos(a[0]); + = cosf(a[0]); store_vector4(inst, machine, result); } break; @@ -723,7 +723,7 @@ _mesa_execute_program(struct gl_context * ctx, * result.z = result.x * APPX(result.y) * We do what the ARB extension says. */ - q[2] = (GLfloat) pow(2.0, t[0]); + q[2] = exp2f(t[0]); } q[1] = t[0] - floor_t0; q[3] = 1.0F; @@ -734,7 +734,7 @@ _mesa_execute_program(struct gl_context * ctx, { GLfloat a[4], result[4], val; fetch_vector1(&inst->SrcReg[0], machine, a); - val = (GLfloat) pow(2.0, a[0]); + val = exp2f(a[0]); /* if (IS_INF_OR_NAN(val)) val = 1.0e10; @@ -776,7 +776,7 @@ _mesa_execute_program(struct gl_context * ctx, if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) { GLfloat a[4]; fetch_vector1(&inst->SrcReg[0], machine, a); - cond = (a[0] != 0.0); + cond = (a[0] != 0.0F); } else { cond = eval_condition(machine, inst); @@ -834,7 +834,7 @@ _mesa_execute_program(struct gl_context * ctx, val = -FLT_MAX; } else { - val = (float)(log(a[0]) * 1.442695F); + val = logf(a[0]) * 1.442695F; } result[0] = result[1] = result[2] = result[3] = val; store_vector4(inst, machine, result); @@ -853,10 +853,10 @@ _mesa_execute_program(struct gl_context * ctx, result[1] = a[0]; /* XXX we could probably just use pow() here */ if (a[0] > 0.0F) { - if (a[1] == 0.0 && a[3] == 0.0) + if (a[1] == 0.0F && a[3] == 0.0F) result[2] = 1.0F; else - result[2] = (GLfloat) pow(a[1], a[3]); + result[2] = powf(a[1], a[3]); } else { result[2] = 0.0F; @@ -886,12 +886,12 @@ _mesa_execute_program(struct gl_context * ctx, int exponent; GLfloat mantissa = frexpf(t[0], &exponent); q[0] = (GLfloat) (exponent - 1); - q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */ + q[1] = 2.0F * mantissa; /* map [.5, 1) -> [1, 2) */ /* The fast LOG2 macro doesn't meet the precision * requirements. */ - q[2] = (float)(log(t[0]) * 1.442695F); + q[2] = logf(t[0]) * 1.442695F; } } else { @@ -1051,7 +1051,7 @@ _mesa_execute_program(struct gl_context * ctx, fetch_vector1(&inst->SrcReg[0], machine, a); fetch_vector1(&inst->SrcReg[1], machine, b); result[0] = result[1] = result[2] = result[3] - = (GLfloat) pow(a[0], b[0]); + = powf(a[0], b[0]); store_vector4(inst, machine, result); } break; @@ -1095,10 +1095,10 @@ _mesa_execute_program(struct gl_context * ctx, { GLfloat a[4], result[4]; fetch_vector1(&inst->SrcReg[0], machine, a); - result[0] = (GLfloat) cos(a[0]); - result[1] = (GLfloat) sin(a[0]); - result[2] = 0.0; /* undefined! */ - result[3] = 0.0; /* undefined! */ + result[0] = cosf(a[0]); + result[1] = sinf(a[0]); + result[2] = 0.0F; /* undefined! */ + result[3] = 0.0F; /* undefined! */ store_vector4(inst, machine, result); } break; @@ -1161,7 +1161,7 @@ _mesa_execute_program(struct gl_context * ctx, GLfloat a[4], result[4]; fetch_vector1(&inst->SrcReg[0], machine, a); result[0] = result[1] = result[2] = result[3] - = (GLfloat) sin(a[0]); + = sinf(a[0]); store_vector4(inst, machine, result); } break; @@ -1360,7 +1360,7 @@ _mesa_execute_program(struct gl_context * ctx, * zero, we'd probably be fine except for an assert in * IROUND_POS() which gets triggered by the inf values created. */ - if (texcoord[3] != 0.0) { + if (texcoord[3] != 0.0F) { texcoord[0] /= texcoord[3]; texcoord[1] /= texcoord[3]; texcoord[2] /= texcoord[3]; @@ -1380,7 +1380,7 @@ _mesa_execute_program(struct gl_context * ctx, fetch_vector4(&inst->SrcReg[0], machine, texcoord); if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX && - texcoord[3] != 0.0) { + texcoord[3] != 0.0F) { texcoord[0] /= texcoord[3]; texcoord[1] /= texcoord[3]; texcoord[2] /= texcoord[3]; diff --git a/src/mesa/program/prog_opt_constant_fold.c b/src/mesa/program/prog_opt_constant_fold.c index 3811c0d8aa6..e2518e660e6 100644 --- a/src/mesa/program/prog_opt_constant_fold.c +++ b/src/mesa/program/prog_opt_constant_fold.c @@ -38,6 +38,8 @@ src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs) for (i = 0; i < num_srcs; i++) { if (inst->SrcReg[i].File != PROGRAM_CONSTANT) return false; + if (inst->SrcReg[i].RelAddr) + return false; } return true; diff --git a/src/mesa/program/prog_print.c b/src/mesa/program/prog_print.c index e4faa63c06f..bb7c2c6e527 100644 --- a/src/mesa/program/prog_print.c +++ b/src/mesa/program/prog_print.c @@ -147,6 +147,8 @@ arb_input_attrib_string(GLuint index, GLenum progType) "fragment.(twenty-one)", /* VARYING_SLOT_VIEWPORT */ "fragment.(twenty-two)", /* VARYING_SLOT_FACE */ "fragment.(twenty-three)", /* VARYING_SLOT_PNTC */ + "fragment.(twenty-four)", /* VARYING_SLOT_TESS_LEVEL_OUTER */ + "fragment.(twenty-five)", /* VARYING_SLOT_TESS_LEVEL_INNER */ "fragment.varying[0]", "fragment.varying[1]", "fragment.varying[2]", @@ -272,6 +274,8 @@ arb_output_attrib_string(GLuint index, GLenum progType) "result.(twenty-one)", /* VARYING_SLOT_VIEWPORT */ "result.(twenty-two)", /* VARYING_SLOT_FACE */ "result.(twenty-three)", /* VARYING_SLOT_PNTC */ + "result.(twenty-four)", /* VARYING_SLOT_TESS_LEVEL_OUTER */ + "result.(twenty-five)", /* VARYING_SLOT_TESS_LEVEL_INNER */ "result.varying[0]", "result.varying[1]", "result.varying[2]", @@ -1015,6 +1019,12 @@ _mesa_write_shader_to_file(const struct gl_shader *shader) case MESA_SHADER_FRAGMENT: type = "frag"; break; + case MESA_SHADER_TESS_CTRL: + type = "tesc"; + break; + case MESA_SHADER_TESS_EVAL: + type = "tese"; + break; case MESA_SHADER_VERTEX: type = "vert"; break; diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c index c13e61b1630..2d03bba3d12 100644 --- a/src/mesa/program/program.c +++ b/src/mesa/program/program.c @@ -286,6 +286,38 @@ _mesa_init_compute_program(struct gl_context *ctx, /** + * Initialize a new tessellation control program object. + */ +struct gl_program * +_mesa_init_tess_ctrl_program(struct gl_context *ctx, + struct gl_tess_ctrl_program *prog, + GLenum target, GLuint id) +{ + if (prog) { + init_program_struct(&prog->Base, target, id); + return &prog->Base; + } + return NULL; +} + + +/** + * Initialize a new tessellation evaluation program object. + */ +struct gl_program * +_mesa_init_tess_eval_program(struct gl_context *ctx, + struct gl_tess_eval_program *prog, + GLenum target, GLuint id) +{ + if (prog) { + init_program_struct(&prog->Base, target, id); + return &prog->Base; + } + return NULL; +} + + +/** * Initialize a new geometry program object. */ struct gl_program * @@ -333,6 +365,16 @@ _mesa_new_program(struct gl_context *ctx, GLenum target, GLuint id) CALLOC_STRUCT(gl_geometry_program), target, id); break; + case GL_TESS_CONTROL_PROGRAM_NV: + prog = _mesa_init_tess_ctrl_program(ctx, + CALLOC_STRUCT(gl_tess_ctrl_program), + target, id); + break; + case GL_TESS_EVALUATION_PROGRAM_NV: + prog = _mesa_init_tess_eval_program(ctx, + CALLOC_STRUCT(gl_tess_eval_program), + target, id); + break; case GL_COMPUTE_PROGRAM_NV: prog = _mesa_init_compute_program(ctx, CALLOC_STRUCT(gl_compute_program), @@ -554,6 +596,23 @@ _mesa_clone_program(struct gl_context *ctx, const struct gl_program *prog) gpc->UsesStreams = gp->UsesStreams; } break; + case GL_TESS_CONTROL_PROGRAM_NV: + { + const struct gl_tess_ctrl_program *tcp = gl_tess_ctrl_program_const(prog); + struct gl_tess_ctrl_program *tcpc = gl_tess_ctrl_program(clone); + tcpc->VerticesOut = tcp->VerticesOut; + } + break; + case GL_TESS_EVALUATION_PROGRAM_NV: + { + const struct gl_tess_eval_program *tep = gl_tess_eval_program_const(prog); + struct gl_tess_eval_program *tepc = gl_tess_eval_program(clone); + tepc->PrimitiveMode = tep->PrimitiveMode; + tepc->Spacing = tep->Spacing; + tepc->VertexOrder = tep->VertexOrder; + tepc->PointMode = tep->PointMode; + } + break; default: _mesa_problem(NULL, "Unexpected target in _mesa_clone_program"); } diff --git a/src/mesa/program/program.h b/src/mesa/program/program.h index 2d92ab2f118..a894147cafd 100644 --- a/src/mesa/program/program.h +++ b/src/mesa/program/program.h @@ -79,6 +79,16 @@ _mesa_init_fragment_program(struct gl_context *ctx, GLenum target, GLuint id); extern struct gl_program * +_mesa_init_tess_ctrl_program(struct gl_context *ctx, + struct gl_tess_ctrl_program *prog, + GLenum target, GLuint id); + +extern struct gl_program * +_mesa_init_tess_eval_program(struct gl_context *ctx, + struct gl_tess_eval_program *prog, + GLenum target, GLuint id); + +extern struct gl_program * _mesa_init_geometry_program(struct gl_context *ctx, struct gl_geometry_program *prog, GLenum target, GLuint id); @@ -147,6 +157,25 @@ _mesa_reference_compprog(struct gl_context *ctx, (struct gl_program *) prog); } + +static inline void +_mesa_reference_tesscprog(struct gl_context *ctx, + struct gl_tess_ctrl_program **ptr, + struct gl_tess_ctrl_program *prog) +{ + _mesa_reference_program(ctx, (struct gl_program **) ptr, + (struct gl_program *) prog); +} + +static inline void +_mesa_reference_tesseprog(struct gl_context *ctx, + struct gl_tess_eval_program **ptr, + struct gl_tess_eval_program *prog) +{ + _mesa_reference_program(ctx, (struct gl_program **) ptr, + (struct gl_program *) prog); +} + extern struct gl_program * _mesa_clone_program(struct gl_context *ctx, const struct gl_program *prog); @@ -157,6 +186,20 @@ _mesa_clone_vertex_program(struct gl_context *ctx, return (struct gl_vertex_program *) _mesa_clone_program(ctx, &prog->Base); } +static inline struct gl_tess_ctrl_program * +_mesa_clone_tess_ctrl_program(struct gl_context *ctx, + const struct gl_tess_ctrl_program *prog) +{ + return (struct gl_tess_ctrl_program *) _mesa_clone_program(ctx, &prog->Base); +} + +static inline struct gl_tess_eval_program * +_mesa_clone_tess_eval_program(struct gl_context *ctx, + const struct gl_tess_eval_program *prog) +{ + return (struct gl_tess_eval_program *) _mesa_clone_program(ctx, &prog->Base); +} + static inline struct gl_geometry_program * _mesa_clone_geometry_program(struct gl_context *ctx, const struct gl_geometry_program *prog) @@ -216,6 +259,10 @@ _mesa_program_enum_to_shader_stage(GLenum v) return MESA_SHADER_FRAGMENT; case GL_GEOMETRY_PROGRAM_NV: return MESA_SHADER_GEOMETRY; + case GL_TESS_CONTROL_PROGRAM_NV: + return MESA_SHADER_TESS_CTRL; + case GL_TESS_EVALUATION_PROGRAM_NV: + return MESA_SHADER_TESS_EVAL; case GL_COMPUTE_PROGRAM_NV: return MESA_SHADER_COMPUTE; default: @@ -235,6 +282,10 @@ _mesa_shader_stage_to_program(unsigned stage) return GL_FRAGMENT_PROGRAM_ARB; case MESA_SHADER_GEOMETRY: return GL_GEOMETRY_PROGRAM_NV; + case MESA_SHADER_TESS_CTRL: + return GL_TESS_CONTROL_PROGRAM_NV; + case MESA_SHADER_TESS_EVAL: + return GL_TESS_EVALUATION_PROGRAM_NV; case MESA_SHADER_COMPUTE: return GL_COMPUTE_PROGRAM_NV; } @@ -244,7 +295,9 @@ _mesa_shader_stage_to_program(unsigned stage) } -/* Cast wrappers from gl_program to gl_vertex/geometry/fragment_program */ +/* Cast wrappers from gl_program to derived program types. + * (e.g. gl_vertex_program) + */ static inline struct gl_fragment_program * gl_fragment_program(struct gl_program *prog) @@ -297,6 +350,31 @@ gl_compute_program_const(const struct gl_program *prog) return (const struct gl_compute_program *) prog; } +static inline struct gl_tess_ctrl_program * +gl_tess_ctrl_program(struct gl_program *prog) +{ + return (struct gl_tess_ctrl_program *) prog; +} + +static inline const struct gl_tess_ctrl_program * +gl_tess_ctrl_program_const(const struct gl_program *prog) +{ + return (const struct gl_tess_ctrl_program *) prog; +} + + +static inline struct gl_tess_eval_program * +gl_tess_eval_program(struct gl_program *prog) +{ + return (struct gl_tess_eval_program *) prog; +} + +static inline const struct gl_tess_eval_program * +gl_tess_eval_program_const(const struct gl_program *prog) +{ + return (const struct gl_tess_eval_program *) prog; +} + #ifdef __cplusplus } /* extern "C" */ diff --git a/src/mesa/program/program_parse_extra.c b/src/mesa/program/program_parse_extra.c index 32b54afc57b..71f86d13ace 100644 --- a/src/mesa/program/program_parse_extra.c +++ b/src/mesa/program/program_parse_extra.c @@ -163,6 +163,8 @@ _mesa_ARBvp_parse_option(struct asm_parser_state *state, const char *option) int _mesa_ARBfp_parse_option(struct asm_parser_state *state, const char *option) { + unsigned fog_option; + /* All of the options currently supported start with "ARB_". The code is * currently structured with nested if-statements because eventually options * that start with "NV_" will be supported. This structure will result in @@ -177,20 +179,42 @@ _mesa_ARBfp_parse_option(struct asm_parser_state *state, const char *option) if (strncmp(option, "fog_", 4) == 0) { option += 4; - if (state->option.Fog == OPTION_NONE) { - if (strcmp(option, "exp") == 0) { - state->option.Fog = OPTION_FOG_EXP; - return 1; - } else if (strcmp(option, "exp2") == 0) { - state->option.Fog = OPTION_FOG_EXP2; - return 1; - } else if (strcmp(option, "linear") == 0) { - state->option.Fog = OPTION_FOG_LINEAR; - return 1; - } - } + if (strcmp(option, "exp") == 0) { + fog_option = OPTION_FOG_EXP; + } else if (strcmp(option, "exp2") == 0) { + fog_option = OPTION_FOG_EXP2; + } else if (strcmp(option, "linear") == 0) { + fog_option = OPTION_FOG_LINEAR; + } else { + /* invalid option */ + return 0; + } - return 0; + if (state->option.Fog == OPTION_NONE) { + state->option.Fog = fog_option; + return 1; + } + + /* The ARB_fragment_program specification instructs us to handle + * redundant options in two seemingly contradictory ways: + * + * Section 3.11.4.5.1 says: + * "Only one fog application option may be specified by any given + * fragment program. A fragment program that specifies more than one + * of the program options "ARB_fog_exp", "ARB_fog_exp2", and + * "ARB_fog_linear", will fail to load." + * + * Issue 27 says: + * "The three mandatory options are ARB_fog_exp, ARB_fog_exp2, and + * ARB_fog_linear. As these options are mutually exclusive by + * nature, specifying more than one is not useful. If more than one + * is specified, the last one encountered in the <optionSequence> + * will be the one to actually modify the execution environment." + * + * We choose to allow programs to specify the same OPTION redundantly, + * but fail to load programs that specify contradictory options. + */ + return state->option.Fog == fog_option ? 1 : 0; } else if (strncmp(option, "precision_hint_", 15) == 0) { option += 15; diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c index 428f2d9d7d7..43dbadd4a7e 100644 --- a/src/mesa/state_tracker/st_atom.c +++ b/src/mesa/state_tracker/st_atom.c @@ -46,9 +46,10 @@ static const struct st_tracked_state *atoms[] = &st_update_depth_stencil_alpha, &st_update_clip, - &st_finalize_textures, &st_update_fp, &st_update_gp, + &st_update_tep, + &st_update_tcp, &st_update_vp, &st_update_rasterizer, @@ -59,17 +60,24 @@ static const struct st_tracked_state *atoms[] = &st_update_vertex_texture, &st_update_fragment_texture, &st_update_geometry_texture, + &st_update_tessctrl_texture, + &st_update_tesseval_texture, &st_update_sampler, /* depends on update_*_texture for swizzle */ &st_update_framebuffer, &st_update_msaa, &st_update_sample_shading, &st_update_vs_constants, + &st_update_tcs_constants, + &st_update_tes_constants, &st_update_gs_constants, &st_update_fs_constants, &st_bind_vs_ubos, + &st_bind_tcs_ubos, + &st_bind_tes_ubos, &st_bind_fs_ubos, &st_bind_gs_ubos, &st_update_pixel_transfer, + &st_update_tess, /* this must be done after the vertex program update */ &st_update_array diff --git a/src/mesa/state_tracker/st_atom.h b/src/mesa/state_tracker/st_atom.h index c50111d501f..a24842baa4f 100644 --- a/src/mesa/state_tracker/st_atom.h +++ b/src/mesa/state_tracker/st_atom.h @@ -52,6 +52,8 @@ extern const struct st_tracked_state st_update_clip; extern const struct st_tracked_state st_update_depth_stencil_alpha; extern const struct st_tracked_state st_update_fp; extern const struct st_tracked_state st_update_gp; +extern const struct st_tracked_state st_update_tep; +extern const struct st_tracked_state st_update_tcp; extern const struct st_tracked_state st_update_vp; extern const struct st_tracked_state st_update_rasterizer; extern const struct st_tracked_state st_update_polygon_stipple; @@ -64,14 +66,20 @@ extern const struct st_tracked_state st_update_sampler; extern const struct st_tracked_state st_update_fragment_texture; extern const struct st_tracked_state st_update_vertex_texture; extern const struct st_tracked_state st_update_geometry_texture; -extern const struct st_tracked_state st_finalize_textures; +extern const struct st_tracked_state st_update_tessctrl_texture; +extern const struct st_tracked_state st_update_tesseval_texture; extern const struct st_tracked_state st_update_fs_constants; extern const struct st_tracked_state st_update_gs_constants; +extern const struct st_tracked_state st_update_tes_constants; +extern const struct st_tracked_state st_update_tcs_constants; extern const struct st_tracked_state st_update_vs_constants; extern const struct st_tracked_state st_bind_fs_ubos; extern const struct st_tracked_state st_bind_vs_ubos; extern const struct st_tracked_state st_bind_gs_ubos; +extern const struct st_tracked_state st_bind_tcs_ubos; +extern const struct st_tracked_state st_bind_tes_ubos; extern const struct st_tracked_state st_update_pixel_transfer; +extern const struct st_tracked_state st_update_tess; GLuint st_compare_func_to_pipe(GLenum func); diff --git a/src/mesa/state_tracker/st_atom_clip.c b/src/mesa/state_tracker/st_atom_clip.c index f82c1332afc..506a770499f 100644 --- a/src/mesa/state_tracker/st_atom_clip.c +++ b/src/mesa/state_tracker/st_atom_clip.c @@ -59,8 +59,11 @@ static void update_clip( struct st_context *st ) memcpy(clip.ucp, use_eye ? ctx->Transform.EyeUserPlane : ctx->Transform._ClipUserPlane, sizeof(clip.ucp)); - st->state.clip = clip; - cso_set_clip(st->cso_context, &clip); + + if (memcmp(&st->state.clip, &clip, sizeof(clip)) != 0) { + st->state.clip = clip; + st->pipe->set_clip_state(st->pipe, &clip); + } } diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c index a54e0d9dbf5..6affb4d84d5 100644 --- a/src/mesa/state_tracker/st_atom_constbuf.c +++ b/src/mesa/state_tracker/st_atom_constbuf.c @@ -59,7 +59,9 @@ void st_upload_constants( struct st_context *st, { assert(shader_type == PIPE_SHADER_VERTEX || shader_type == PIPE_SHADER_FRAGMENT || - shader_type == PIPE_SHADER_GEOMETRY); + shader_type == PIPE_SHADER_GEOMETRY || + shader_type == PIPE_SHADER_TESS_CTRL || + shader_type == PIPE_SHADER_TESS_EVAL); /* update constants */ if (params && params->NumParameters) { @@ -178,6 +180,50 @@ const struct st_tracked_state st_update_gs_constants = { update_gs_constants /* update */ }; +/* Tessellation control shader: + */ +static void update_tcs_constants(struct st_context *st ) +{ + struct st_tessctrl_program *tcp = st->tcp; + struct gl_program_parameter_list *params; + + if (tcp) { + params = tcp->Base.Base.Parameters; + st_upload_constants( st, params, PIPE_SHADER_TESS_CTRL ); + } +} + +const struct st_tracked_state st_update_tcs_constants = { + "st_update_tcs_constants", /* name */ + { /* dirty */ + _NEW_PROGRAM_CONSTANTS, /* mesa */ + ST_NEW_TESSCTRL_PROGRAM, /* st */ + }, + update_tcs_constants /* update */ +}; + +/* Tessellation evaluation shader: + */ +static void update_tes_constants(struct st_context *st ) +{ + struct st_tesseval_program *tep = st->tep; + struct gl_program_parameter_list *params; + + if (tep) { + params = tep->Base.Base.Parameters; + st_upload_constants( st, params, PIPE_SHADER_TESS_EVAL ); + } +} + +const struct st_tracked_state st_update_tes_constants = { + "st_update_tes_constants", /* name */ + { /* dirty */ + _NEW_PROGRAM_CONSTANTS, /* mesa */ + ST_NEW_TESSEVAL_PROGRAM, /* st */ + }, + update_tes_constants /* update */ +}; + static void st_bind_ubos(struct st_context *st, struct gl_shader *shader, unsigned shader_type) @@ -275,3 +321,43 @@ const struct st_tracked_state st_bind_gs_ubos = { }, bind_gs_ubos }; + +static void bind_tcs_ubos(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL]; + + if (!prog) + return; + + st_bind_ubos(st, prog->_LinkedShaders[MESA_SHADER_TESS_CTRL], PIPE_SHADER_TESS_CTRL); +} + +const struct st_tracked_state st_bind_tcs_ubos = { + "st_bind_tcs_ubos", + { + 0, + ST_NEW_TESSCTRL_PROGRAM | ST_NEW_UNIFORM_BUFFER, + }, + bind_tcs_ubos +}; + +static void bind_tes_ubos(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL]; + + if (!prog) + return; + + st_bind_ubos(st, prog->_LinkedShaders[MESA_SHADER_TESS_EVAL], PIPE_SHADER_TESS_EVAL); +} + +const struct st_tracked_state st_bind_tes_ubos = { + "st_bind_tes_ubos", + { + 0, + ST_NEW_TESSEVAL_PROGRAM | ST_NEW_UNIFORM_BUFFER, + }, + bind_tes_ubos +}; diff --git a/src/mesa/state_tracker/st_atom_depth.c b/src/mesa/state_tracker/st_atom_depth.c index c4bca8d09b5..d9cc97029fb 100644 --- a/src/mesa/state_tracker/st_atom_depth.c +++ b/src/mesa/state_tracker/st_atom_depth.c @@ -105,10 +105,17 @@ update_depth_stencil_alpha(struct st_context *st) memset(dsa, 0, sizeof(*dsa)); memset(&sr, 0, sizeof(sr)); - if (ctx->Depth.Test && ctx->DrawBuffer->Visual.depthBits > 0) { - dsa->depth.enabled = 1; - dsa->depth.writemask = ctx->Depth.Mask; - dsa->depth.func = st_compare_func_to_pipe(ctx->Depth.Func); + if (ctx->DrawBuffer->Visual.depthBits > 0) { + if (ctx->Depth.Test) { + dsa->depth.enabled = 1; + dsa->depth.writemask = ctx->Depth.Mask; + dsa->depth.func = st_compare_func_to_pipe(ctx->Depth.Func); + } + if (ctx->Depth.BoundsTest) { + dsa->depth.bounds_test = 1; + dsa->depth.bounds_min = ctx->Depth.BoundsMin; + dsa->depth.bounds_max = ctx->Depth.BoundsMax; + } } if (ctx->Stencil.Enabled && ctx->DrawBuffer->Visual.stencilBits > 0) { diff --git a/src/mesa/state_tracker/st_atom_sampler.c b/src/mesa/state_tracker/st_atom_sampler.c index b68eb16d7be..4252c27962e 100644 --- a/src/mesa/state_tracker/st_atom_sampler.c +++ b/src/mesa/state_tracker/st_atom_sampler.c @@ -245,6 +245,7 @@ update_shader_samplers(struct st_context *st, GLuint unit; GLbitfield samplers_used; const GLuint old_max = *num_samplers; + const struct pipe_sampler_state *states[PIPE_MAX_SAMPLERS]; samplers_used = prog->SamplersUsed; @@ -261,13 +262,11 @@ update_shader_samplers(struct st_context *st, const GLuint texUnit = prog->SamplerUnits[unit]; convert_sampler(st, sampler, texUnit); - + states[unit] = sampler; *num_samplers = unit + 1; - - cso_single_sampler(st->cso_context, shader_stage, unit, sampler); } else if (samplers_used != 0 || unit < old_max) { - cso_single_sampler(st->cso_context, shader_stage, unit, NULL); + states[unit] = NULL; } else { /* if we've reset all the old samplers and we have no more new ones */ @@ -275,7 +274,7 @@ update_shader_samplers(struct st_context *st, } } - cso_single_sampler_done(st->cso_context, shader_stage); + cso_set_samplers(st->cso_context, shader_stage, *num_samplers, states); } @@ -306,6 +305,22 @@ update_samplers(struct st_context *st) st->state.samplers[PIPE_SHADER_GEOMETRY], &st->state.num_samplers[PIPE_SHADER_GEOMETRY]); } + if (ctx->TessCtrlProgram._Current) { + update_shader_samplers(st, + PIPE_SHADER_TESS_CTRL, + &ctx->TessCtrlProgram._Current->Base, + ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxTextureImageUnits, + st->state.samplers[PIPE_SHADER_TESS_CTRL], + &st->state.num_samplers[PIPE_SHADER_TESS_CTRL]); + } + if (ctx->TessEvalProgram._Current) { + update_shader_samplers(st, + PIPE_SHADER_TESS_EVAL, + &ctx->TessEvalProgram._Current->Base, + ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxTextureImageUnits, + st->state.samplers[PIPE_SHADER_TESS_EVAL], + &st->state.num_samplers[PIPE_SHADER_TESS_EVAL]); + } } diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index ad8d2624fc9..fee15a980f3 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -50,24 +50,6 @@ /** - * Return pointer to a pass-through fragment shader. - * This shader is used when a texture is missing/incomplete. - */ -static void * -get_passthrough_fs(struct st_context *st) -{ - if (!st->passthrough_fs) { - st->passthrough_fs = - util_make_fragment_passthrough_shader(st->pipe, TGSI_SEMANTIC_COLOR, - TGSI_INTERPOLATE_PERSPECTIVE, - TRUE); - } - - return st->passthrough_fs; -} - - -/** * Update fragment program state/atom. This involves translating the * Mesa fragment program into a gallium fragment program and binding it. */ @@ -96,15 +78,8 @@ update_fp( struct st_context *st ) st_reference_fragprog(st, &st->fp, stfp); - if (st->missing_textures) { - /* use a pass-through frag shader that uses no textures */ - void *fs = get_passthrough_fs(st); - cso_set_fragment_shader_handle(st->cso_context, fs); - } - else { - cso_set_fragment_shader_handle(st->cso_context, - st->fp_variant->driver_shader); - } + cso_set_fragment_shader_handle(st->cso_context, + st->fp_variant->driver_shader); } @@ -210,3 +185,75 @@ const struct st_tracked_state st_update_gp = { }, update_gp /* update */ }; + + + +static void +update_tcp( struct st_context *st ) +{ + struct st_tessctrl_program *sttcp; + struct st_tcp_variant_key key; + + if (!st->ctx->TessCtrlProgram._Current) { + cso_set_tessctrl_shader_handle(st->cso_context, NULL); + return; + } + + sttcp = st_tessctrl_program(st->ctx->TessCtrlProgram._Current); + assert(sttcp->Base.Base.Target == GL_TESS_CONTROL_PROGRAM_NV); + + memset(&key, 0, sizeof(key)); + key.st = st; + + st->tcp_variant = st_get_tcp_variant(st, sttcp, &key); + + st_reference_tesscprog(st, &st->tcp, sttcp); + + cso_set_tessctrl_shader_handle(st->cso_context, + st->tcp_variant->driver_shader); +} + +const struct st_tracked_state st_update_tcp = { + "st_update_tcp", /* name */ + { /* dirty */ + 0, /* mesa */ + ST_NEW_TESSCTRL_PROGRAM /* st */ + }, + update_tcp /* update */ +}; + + + +static void +update_tep( struct st_context *st ) +{ + struct st_tesseval_program *sttep; + struct st_tep_variant_key key; + + if (!st->ctx->TessEvalProgram._Current) { + cso_set_tesseval_shader_handle(st->cso_context, NULL); + return; + } + + sttep = st_tesseval_program(st->ctx->TessEvalProgram._Current); + assert(sttep->Base.Base.Target == GL_TESS_EVALUATION_PROGRAM_NV); + + memset(&key, 0, sizeof(key)); + key.st = st; + + st->tep_variant = st_get_tep_variant(st, sttep, &key); + + st_reference_tesseprog(st, &st->tep, sttep); + + cso_set_tesseval_shader_handle(st->cso_context, + st->tep_variant->driver_shader); +} + +const struct st_tracked_state st_update_tep = { + "st_update_tep", /* name */ + { /* dirty */ + 0, /* mesa */ + ST_NEW_TESSEVAL_PROGRAM /* st */ + }, + update_tep /* update */ +}; diff --git a/src/mesa/state_tracker/st_atom_tess.c b/src/mesa/state_tracker/st_atom_tess.c new file mode 100644 index 00000000000..8e6287a900c --- /dev/null +++ b/src/mesa/state_tracker/st_atom_tess.c @@ -0,0 +1,62 @@ +/************************************************************************** + * + * Copyright 2015 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Authors: + * Marek Olšák <[email protected]> + */ + + +#include "main/macros.h" +#include "st_context.h" +#include "pipe/p_context.h" +#include "st_atom.h" + + +static void +update_tess(struct st_context *st) +{ + const struct gl_context *ctx = st->ctx; + struct pipe_context *pipe = st->pipe; + + if (!pipe->set_tess_state) + return; + + pipe->set_tess_state(pipe, + ctx->TessCtrlProgram.patch_default_outer_level, + ctx->TessCtrlProgram.patch_default_inner_level); +} + + +const struct st_tracked_state st_update_tess = { + "update_tess", /* name */ + { /* dirty */ + 0, /* mesa */ + ST_NEW_TESS_STATE, /* st */ + }, + update_tess /* update */ +}; diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c index 04ba86448fc..31e0f6ba06c 100644 --- a/src/mesa/state_tracker/st_atom_texture.c +++ b/src/mesa/state_tracker/st_atom_texture.c @@ -103,7 +103,8 @@ swizzle_swizzle(unsigned swizzle1, unsigned swizzle2) */ static unsigned compute_texture_format_swizzle(GLenum baseFormat, GLenum depthMode, - enum pipe_format actualFormat) + enum pipe_format actualFormat, + unsigned glsl_version) { switch (baseFormat) { case GL_RGBA: @@ -157,8 +158,26 @@ compute_texture_format_swizzle(GLenum baseFormat, GLenum depthMode, case GL_INTENSITY: return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); case GL_ALPHA: - return MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, - SWIZZLE_ZERO, SWIZZLE_X); + /* The texture(sampler*Shadow) functions from GLSL 1.30 ignore + * the depth mode and return float, while older shadow* functions + * and ARB_fp instructions return vec4 according to the depth mode. + * + * The problem with the GLSL 1.30 functions is that GL_ALPHA forces + * them to return 0, breaking them completely. + * + * A proper fix would increase code complexity and that's not worth + * it for a rarely used feature such as the GL_ALPHA depth mode + * in GL3. Therefore, change GL_ALPHA to GL_INTENSITY for all + * shaders that use GLSL 1.30 or later. + * + * BTW, it's required that sampler views are updated when + * shaders change (check_sampler_swizzle takes care of that). + */ + if (glsl_version && glsl_version >= 130) + return SWIZZLE_XXXX; + else + return MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, + SWIZZLE_ZERO, SWIZZLE_X); case GL_RED: return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE); @@ -174,7 +193,8 @@ compute_texture_format_swizzle(GLenum baseFormat, GLenum depthMode, static unsigned -get_texture_format_swizzle(const struct st_texture_object *stObj) +get_texture_format_swizzle(const struct st_texture_object *stObj, + unsigned glsl_version) { GLenum baseFormat = _mesa_texture_base_format(&stObj->base); unsigned tex_swizzle; @@ -182,7 +202,8 @@ get_texture_format_swizzle(const struct st_texture_object *stObj) if (baseFormat != GL_NONE) { tex_swizzle = compute_texture_format_swizzle(baseFormat, stObj->base.DepthMode, - stObj->pt->format); + stObj->pt->format, + glsl_version); } else { tex_swizzle = SWIZZLE_XYZW; @@ -201,9 +222,9 @@ get_texture_format_swizzle(const struct st_texture_object *stObj) */ static boolean check_sampler_swizzle(const struct st_texture_object *stObj, - struct pipe_sampler_view *sv) + struct pipe_sampler_view *sv, unsigned glsl_version) { - unsigned swizzle = get_texture_format_swizzle(stObj); + unsigned swizzle = get_texture_format_swizzle(stObj, glsl_version); return ((sv->swizzle_r != GET_SWZ(swizzle, 0)) || (sv->swizzle_g != GET_SWZ(swizzle, 1)) || @@ -232,11 +253,11 @@ static unsigned last_layer(struct st_texture_object *stObj) static struct pipe_sampler_view * st_create_texture_sampler_view_from_stobj(struct pipe_context *pipe, struct st_texture_object *stObj, - const struct gl_sampler_object *samp, - enum pipe_format format) + enum pipe_format format, + unsigned glsl_version) { struct pipe_sampler_view templ; - unsigned swizzle = get_texture_format_swizzle(stObj); + unsigned swizzle = get_texture_format_swizzle(stObj, glsl_version); u_sampler_view_default_template(&templ, stObj->pt, @@ -283,8 +304,8 @@ st_create_texture_sampler_view_from_stobj(struct pipe_context *pipe, static struct pipe_sampler_view * st_get_texture_sampler_view_from_stobj(struct st_context *st, struct st_texture_object *stObj, - const struct gl_sampler_object *samp, - enum pipe_format format) + enum pipe_format format, + unsigned glsl_version) { struct pipe_sampler_view **sv; const struct st_texture_image *firstImage; @@ -306,7 +327,7 @@ st_get_texture_sampler_view_from_stobj(struct st_context *st, /* if sampler view has changed dereference it */ if (*sv) { - if (check_sampler_swizzle(stObj, *sv) || + if (check_sampler_swizzle(stObj, *sv, glsl_version) || (format != (*sv)->format) || gl_target_to_pipe(stObj->base.Target) != (*sv)->target || stObj->base.MinLevel + stObj->base.BaseLevel != (*sv)->u.tex.first_level || @@ -318,7 +339,8 @@ st_get_texture_sampler_view_from_stobj(struct st_context *st, } if (!*sv) { - *sv = st_create_texture_sampler_view_from_stobj(st->pipe, stObj, samp, format); + *sv = st_create_texture_sampler_view_from_stobj(st->pipe, stObj, + format, glsl_version); } else if ((*sv)->context != st->pipe) { /* Recreate view in correct context, use existing view as template */ @@ -334,7 +356,7 @@ st_get_texture_sampler_view_from_stobj(struct st_context *st, static GLboolean update_single_texture(struct st_context *st, struct pipe_sampler_view **sampler_view, - GLuint texUnit) + GLuint texUnit, unsigned glsl_version) { struct gl_context *ctx = st->ctx; const struct gl_sampler_object *samp; @@ -374,8 +396,9 @@ update_single_texture(struct st_context *st, } } - *sampler_view = st_get_texture_sampler_view_from_stobj(st, stObj, samp, - view_format); + *sampler_view = + st_get_texture_sampler_view_from_stobj(st, stObj, view_format, + glsl_version); return GL_TRUE; } @@ -383,7 +406,7 @@ update_single_texture(struct st_context *st, static void update_textures(struct st_context *st, - unsigned shader_stage, + gl_shader_stage mesa_shader, const struct gl_program *prog, unsigned max_units, struct pipe_sampler_view **sampler_views, @@ -392,6 +415,10 @@ update_textures(struct st_context *st, const GLuint old_max = *num_textures; GLbitfield samplers_used = prog->SamplersUsed; GLuint unit; + struct gl_shader_program *shader = + st->ctx->_Shader->CurrentProgram[mesa_shader]; + unsigned glsl_version = shader ? shader->Version : 0; + unsigned shader_stage = st_shader_stage_to_ptarget(mesa_shader); if (samplers_used == 0x0 && old_max == 0) return; @@ -406,7 +433,8 @@ update_textures(struct st_context *st, const GLuint texUnit = prog->SamplerUnits[unit]; GLboolean retval; - retval = update_single_texture(st, &sampler_view, texUnit); + retval = update_single_texture(st, &sampler_view, texUnit, + glsl_version); if (retval == GL_FALSE) continue; @@ -435,7 +463,7 @@ update_vertex_textures(struct st_context *st) if (ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits > 0) { update_textures(st, - PIPE_SHADER_VERTEX, + MESA_SHADER_VERTEX, &ctx->VertexProgram._Current->Base, ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits, st->state.sampler_views[PIPE_SHADER_VERTEX], @@ -450,7 +478,7 @@ update_fragment_textures(struct st_context *st) const struct gl_context *ctx = st->ctx; update_textures(st, - PIPE_SHADER_FRAGMENT, + MESA_SHADER_FRAGMENT, &ctx->FragmentProgram._Current->Base, ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits, st->state.sampler_views[PIPE_SHADER_FRAGMENT], @@ -465,7 +493,7 @@ update_geometry_textures(struct st_context *st) if (ctx->GeometryProgram._Current) { update_textures(st, - PIPE_SHADER_GEOMETRY, + MESA_SHADER_GEOMETRY, &ctx->GeometryProgram._Current->Base, ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits, st->state.sampler_views[PIPE_SHADER_GEOMETRY], @@ -474,11 +502,43 @@ update_geometry_textures(struct st_context *st) } +static void +update_tessctrl_textures(struct st_context *st) +{ + const struct gl_context *ctx = st->ctx; + + if (ctx->TessCtrlProgram._Current) { + update_textures(st, + MESA_SHADER_TESS_CTRL, + &ctx->TessCtrlProgram._Current->Base, + ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxTextureImageUnits, + st->state.sampler_views[PIPE_SHADER_TESS_CTRL], + &st->state.num_sampler_views[PIPE_SHADER_TESS_CTRL]); + } +} + + +static void +update_tesseval_textures(struct st_context *st) +{ + const struct gl_context *ctx = st->ctx; + + if (ctx->TessEvalProgram._Current) { + update_textures(st, + MESA_SHADER_TESS_EVAL, + &ctx->TessEvalProgram._Current->Base, + ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxTextureImageUnits, + st->state.sampler_views[PIPE_SHADER_TESS_EVAL], + &st->state.num_sampler_views[PIPE_SHADER_TESS_EVAL]); + } +} + + const struct st_tracked_state st_update_fragment_texture = { "st_update_texture", /* name */ { /* dirty */ _NEW_TEXTURE, /* mesa */ - ST_NEW_FRAGMENT_PROGRAM, /* st */ + ST_NEW_FRAGMENT_PROGRAM | ST_NEW_SAMPLER_VIEWS, /* st */ }, update_fragment_textures /* update */ }; @@ -488,7 +548,7 @@ const struct st_tracked_state st_update_vertex_texture = { "st_update_vertex_texture", /* name */ { /* dirty */ _NEW_TEXTURE, /* mesa */ - ST_NEW_VERTEX_PROGRAM, /* st */ + ST_NEW_VERTEX_PROGRAM | ST_NEW_SAMPLER_VIEWS, /* st */ }, update_vertex_textures /* update */ }; @@ -498,52 +558,27 @@ const struct st_tracked_state st_update_geometry_texture = { "st_update_geometry_texture", /* name */ { /* dirty */ _NEW_TEXTURE, /* mesa */ - ST_NEW_GEOMETRY_PROGRAM, /* st */ + ST_NEW_GEOMETRY_PROGRAM | ST_NEW_SAMPLER_VIEWS, /* st */ }, update_geometry_textures /* update */ }; - -static void -finalize_textures(struct st_context *st) -{ - struct gl_context *ctx = st->ctx; - struct gl_fragment_program *fprog = ctx->FragmentProgram._Current; - const GLboolean prev_missing_textures = st->missing_textures; - GLuint su; - - st->missing_textures = GL_FALSE; - - for (su = 0; su < ctx->Const.MaxTextureCoordUnits; su++) { - if (fprog->Base.SamplersUsed & (1 << su)) { - const GLuint texUnit = fprog->Base.SamplerUnits[su]; - struct gl_texture_object *texObj - = ctx->Texture.Unit[texUnit]._Current; - - if (texObj) { - GLboolean retval; - - retval = st_finalize_texture(ctx, st->pipe, texObj); - if (!retval) { - /* out of mem */ - st->missing_textures = GL_TRUE; - continue; - } - } - } - } - - if (prev_missing_textures != st->missing_textures) - st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM; -} +const struct st_tracked_state st_update_tessctrl_texture = { + "st_update_tessctrl_texture", /* name */ + { /* dirty */ + _NEW_TEXTURE, /* mesa */ + ST_NEW_TESSCTRL_PROGRAM | ST_NEW_SAMPLER_VIEWS, /* st */ + }, + update_tessctrl_textures /* update */ +}; -const struct st_tracked_state st_finalize_textures = { - "st_finalize_textures", /* name */ - { /* dirty */ - _NEW_TEXTURE, /* mesa */ - 0, /* st */ +const struct st_tracked_state st_update_tesseval_texture = { + "st_update_tesseval_texture", /* name */ + { /* dirty */ + _NEW_TEXTURE, /* mesa */ + ST_NEW_TESSEVAL_PROGRAM | ST_NEW_SAMPLER_VIEWS, /* st */ }, - finalize_textures /* update */ + update_tesseval_textures /* update */ }; diff --git a/src/mesa/state_tracker/st_atom_viewport.c b/src/mesa/state_tracker/st_atom_viewport.c index 2f62590c4f1..9a692cecade 100644 --- a/src/mesa/state_tracker/st_atom_viewport.c +++ b/src/mesa/state_tracker/st_atom_viewport.c @@ -64,7 +64,7 @@ update_viewport( struct st_context *st ) */ for (i = 0; i < ctx->Const.MaxViewports; i++) { - double scale[3], translate[3]; + float scale[3], translate[3]; _mesa_get_viewport_xform(ctx, i, scale, translate); st->state.viewport[i].scale[0] = scale[0]; diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index c881e194f70..01a96c18264 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -446,8 +446,8 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, assert(height <= (GLsizei)maxSize); cso_save_rasterizer(cso); - cso_save_samplers(cso, PIPE_SHADER_FRAGMENT); - cso_save_sampler_views(cso, PIPE_SHADER_FRAGMENT); + cso_save_fragment_samplers(cso); + cso_save_fragment_sampler_views(cso); cso_save_viewport(cso); cso_save_fragment_shader(cso); cso_save_stream_outputs(cso); @@ -535,8 +535,8 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, /* restore state */ cso_restore_rasterizer(cso); - cso_restore_samplers(cso, PIPE_SHADER_FRAGMENT); - cso_restore_sampler_views(cso, PIPE_SHADER_FRAGMENT); + cso_restore_fragment_samplers(cso); + cso_restore_fragment_sampler_views(cso); cso_restore_viewport(cso); cso_restore_fragment_shader(cso); cso_restore_vertex_shader(cso); diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c index 6d9371852c5..139690615d6 100644 --- a/src/mesa/state_tracker/st_cb_blit.c +++ b/src/mesa/state_tracker/st_cb_blit.c @@ -39,7 +39,7 @@ #include "st_cb_bitmap.h" #include "st_cb_blit.h" #include "st_cb_fbo.h" -#include "st_atom.h" +#include "st_manager.h" #include "util/u_format.h" @@ -92,7 +92,7 @@ st_BlitFramebuffer(struct gl_context *ctx, } clip; struct pipe_blit_info blit; - st_validate_state(st); + st_manager_validate_framebuffers(st); /* Make sure bitmap rendering has landed in the framebuffers */ st_flush_bitmap_cache(st); diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index a6a98c83aa6..b372697026b 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -33,6 +33,7 @@ #include "main/imports.h" #include "main/image.h" #include "main/bufferobj.h" +#include "main/blit.h" #include "main/format_pack.h" #include "main/macros.h" #include "main/mtypes.h" @@ -688,8 +689,8 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, cso_save_rasterizer(cso); cso_save_viewport(cso); - cso_save_samplers(cso, PIPE_SHADER_FRAGMENT); - cso_save_sampler_views(cso, PIPE_SHADER_FRAGMENT); + cso_save_fragment_samplers(cso); + cso_save_fragment_sampler_views(cso); cso_save_fragment_shader(cso); cso_save_stream_outputs(cso); cso_save_vertex_shader(cso); @@ -756,6 +757,8 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, /* texture sampling state: */ { struct pipe_sampler_state sampler; + const struct pipe_sampler_state *states[2] = {&sampler, &sampler}; + memset(&sampler, 0, sizeof(sampler)); sampler.wrap_s = PIPE_TEX_WRAP_CLAMP; sampler.wrap_t = PIPE_TEX_WRAP_CLAMP; @@ -765,11 +768,8 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; sampler.normalized_coords = normalized; - cso_single_sampler(cso, PIPE_SHADER_FRAGMENT, 0, &sampler); - if (num_sampler_view > 1) { - cso_single_sampler(cso, PIPE_SHADER_FRAGMENT, 1, &sampler); - } - cso_single_sampler_done(cso, PIPE_SHADER_FRAGMENT); + cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, + num_sampler_view > 1 ? 2 : 1, states); } /* viewport state: viewport matching window dims */ @@ -816,8 +816,8 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, /* restore state */ cso_restore_rasterizer(cso); cso_restore_viewport(cso); - cso_restore_samplers(cso, PIPE_SHADER_FRAGMENT); - cso_restore_sampler_views(cso, PIPE_SHADER_FRAGMENT); + cso_restore_fragment_samplers(cso); + cso_restore_fragment_sampler_views(cso); cso_restore_fragment_shader(cso); cso_restore_vertex_shader(cso); cso_restore_tessctrl_shader(cso); @@ -1313,31 +1313,6 @@ st_get_color_read_renderbuffer(struct gl_context *ctx) /** - * \return TRUE if two regions overlap, FALSE otherwise - */ -static boolean -regions_overlap(int srcX0, int srcY0, - int srcX1, int srcY1, - int dstX0, int dstY0, - int dstX1, int dstY1) -{ - if (MAX2(srcX0, srcX1) < MIN2(dstX0, dstX1)) - return FALSE; /* src completely left of dst */ - - if (MAX2(dstX0, dstX1) < MIN2(srcX0, srcX1)) - return FALSE; /* dst completely left of src */ - - if (MAX2(srcY0, srcY1) < MIN2(dstY0, dstY1)) - return FALSE; /* src completely above dst */ - - if (MAX2(dstY0, dstY1) < MIN2(srcY0, srcY1)) - return FALSE; /* dst completely above src */ - - return TRUE; /* some overlap */ -} - - -/** * Try to do a glCopyPixels for simple cases with a blit by calling * pipe->blit(). * @@ -1420,8 +1395,8 @@ blit_copy_pixels(struct gl_context *ctx, GLint srcx, GLint srcy, } if (rbRead != rbDraw || - !regions_overlap(readX, readY, readX + readW, readY + readH, - drawX, drawY, drawX + drawW, drawY + drawH)) { + !_mesa_regions_overlap(readX, readY, readX + readW, readY + readH, + drawX, drawY, drawX + drawW, drawY + drawH)) { struct pipe_blit_info blit; memset(&blit, 0, sizeof(blit)); diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index 0399eef7204..57075904450 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -511,8 +511,6 @@ st_render_texture(struct gl_context *ctx, strb->rtt_layered = att->Layered; pipe_resource_reference(&strb->texture, pt); - pipe_surface_release(pipe, &strb->surface); - st_update_renderbuffer_surface(st, strb); strb->Base.Format = st_pipe_format_to_mesa_format(pt->format); diff --git a/src/mesa/state_tracker/st_cb_perfmon.h b/src/mesa/state_tracker/st_cb_perfmon.h index 13d3627de5d..0b195de47fe 100644 --- a/src/mesa/state_tracker/st_cb_perfmon.h +++ b/src/mesa/state_tracker/st_cb_perfmon.h @@ -46,7 +46,7 @@ struct st_perf_counter_object /** * Cast wrapper */ -static INLINE struct st_perf_monitor_object * +static inline struct st_perf_monitor_object * st_perf_monitor_object(struct gl_perf_monitor_object *q) { return (struct st_perf_monitor_object *)q; diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c index 6aa7d5796d9..3029909d12d 100644 --- a/src/mesa/state_tracker/st_cb_program.c +++ b/src/mesa/state_tracker/st_cb_program.c @@ -68,6 +68,12 @@ st_bind_program(struct gl_context *ctx, GLenum target, struct gl_program *prog) case GL_GEOMETRY_PROGRAM_NV: st->dirty.st |= ST_NEW_GEOMETRY_PROGRAM; break; + case GL_TESS_CONTROL_PROGRAM_NV: + st->dirty.st |= ST_NEW_TESSCTRL_PROGRAM; + break; + case GL_TESS_EVALUATION_PROGRAM_NV: + st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM; + break; } } @@ -84,6 +90,8 @@ st_use_program(struct gl_context *ctx, struct gl_shader_program *shProg) st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM; st->dirty.st |= ST_NEW_VERTEX_PROGRAM; st->dirty.st |= ST_NEW_GEOMETRY_PROGRAM; + st->dirty.st |= ST_NEW_TESSCTRL_PROGRAM; + st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM; } @@ -110,6 +118,16 @@ st_new_program(struct gl_context *ctx, GLenum target, GLuint id) return _mesa_init_geometry_program(ctx, &prog->Base, target, id); } + case GL_TESS_CONTROL_PROGRAM_NV: { + struct st_tessctrl_program *prog = ST_CALLOC_STRUCT(st_tessctrl_program); + return _mesa_init_tess_ctrl_program(ctx, &prog->Base, target, id); + } + + case GL_TESS_EVALUATION_PROGRAM_NV: { + struct st_tesseval_program *prog = ST_CALLOC_STRUCT(st_tesseval_program); + return _mesa_init_tess_eval_program(ctx, &prog->Base, target, id); + } + default: assert(0); return NULL; @@ -157,6 +175,28 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog) free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi); } break; + case GL_TESS_CONTROL_PROGRAM_NV: + { + struct st_tessctrl_program *sttcp = + (struct st_tessctrl_program *) prog; + + st_release_tcp_variants(st, sttcp); + + if (sttcp->glsl_to_tgsi) + free_glsl_to_tgsi_visitor(sttcp->glsl_to_tgsi); + } + break; + case GL_TESS_EVALUATION_PROGRAM_NV: + { + struct st_tesseval_program *sttep = + (struct st_tesseval_program *) prog; + + st_release_tep_variants(st, sttep); + + if (sttep->glsl_to_tgsi) + free_glsl_to_tgsi_visitor(sttep->glsl_to_tgsi); + } + break; default: assert(0); /* problem */ } @@ -214,6 +254,24 @@ st_program_string_notify( struct gl_context *ctx, if (st->vp == stvp) st->dirty.st |= ST_NEW_VERTEX_PROGRAM; } + else if (target == GL_TESS_CONTROL_PROGRAM_NV) { + struct st_tessctrl_program *sttcp = + (struct st_tessctrl_program *) prog; + + st_release_tcp_variants(st, sttcp); + + if (st->tcp == sttcp) + st->dirty.st |= ST_NEW_TESSCTRL_PROGRAM; + } + else if (target == GL_TESS_EVALUATION_PROGRAM_NV) { + struct st_tesseval_program *sttep = + (struct st_tesseval_program *) prog; + + st_release_tep_variants(st, sttep); + + if (st->tep == sttep) + st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM; + } if (ST_DEBUG & DEBUG_PRECOMPILE) st_precompile_shader_variant(st, prog); diff --git a/src/mesa/state_tracker/st_cb_rasterpos.c b/src/mesa/state_tracker/st_cb_rasterpos.c index 272cbb91d52..b9997dacfd2 100644 --- a/src/mesa/state_tracker/st_cb_rasterpos.c +++ b/src/mesa/state_tracker/st_cb_rasterpos.c @@ -254,7 +254,7 @@ st_RasterPos(struct gl_context *ctx, const GLfloat v[4]) * st_feedback_draw_vbo doesn't check for that flag. */ ctx->Array._DrawArrays = rs->arrays; st_feedback_draw_vbo(ctx, &rs->prim, 1, NULL, GL_TRUE, 0, 1, - NULL, NULL); + NULL, 0, NULL); ctx->Array._DrawArrays = saved_arrays; /* restore draw's rasterization stage depending on rendermode */ diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index d95a608d32e..18ea43fa71a 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -43,6 +43,30 @@ #include "state_tracker/st_format.h" #include "state_tracker/st_texture.h" +static boolean +needs_integer_signed_unsigned_conversion(const struct gl_context *ctx, + GLenum format, GLenum type) +{ + struct gl_renderbuffer *rb = + _mesa_get_read_renderbuffer_for_format(ctx, format); + + assert(rb); + + GLenum srcType = _mesa_get_format_datatype(rb->Format); + + if ((srcType == GL_INT && + (type == GL_UNSIGNED_INT || + type == GL_UNSIGNED_SHORT || + type == GL_UNSIGNED_BYTE)) || + (srcType == GL_UNSIGNED_INT && + (type == GL_INT || + type == GL_SHORT || + type == GL_BYTE))) { + return TRUE; + } + + return FALSE; +} /** * This uses a blit to copy the read buffer to a texture format which matches @@ -123,6 +147,10 @@ st_readpixels(struct gl_context *ctx, GLint x, GLint y, goto fallback; } + if (needs_integer_signed_unsigned_conversion(ctx, format, type)) { + goto fallback; + } + /* Convert the source format to what is expected by ReadPixels * and see if it's supported. */ src_format = util_format_linear(src->format); diff --git a/src/mesa/state_tracker/st_cb_syncobj.c b/src/mesa/state_tracker/st_cb_syncobj.c index 6d875b851a2..ec2687fba53 100644 --- a/src/mesa/state_tracker/st_cb_syncobj.c +++ b/src/mesa/state_tracker/st_cb_syncobj.c @@ -81,7 +81,13 @@ static void st_check_sync(struct gl_context *ctx, struct gl_sync_object *obj) struct pipe_screen *screen = st_context(ctx)->pipe->screen; struct st_sync_object *so = (struct st_sync_object*)obj; - if (so->fence && screen->fence_signalled(screen, so->fence)) { + /* If the fence doesn't exist, assume it's signalled. */ + if (!so->fence) { + so->b.StatusFlag = GL_TRUE; + return; + } + + if (screen->fence_finish(screen, so->fence, 0)) { screen->fence_reference(screen, &so->fence, NULL); so->b.StatusFlag = GL_TRUE; } @@ -94,6 +100,12 @@ static void st_client_wait_sync(struct gl_context *ctx, struct pipe_screen *screen = st_context(ctx)->pipe->screen; struct st_sync_object *so = (struct st_sync_object*)obj; + /* If the fence doesn't exist, assume it's signalled. */ + if (!so->fence) { + so->b.StatusFlag = GL_TRUE; + return; + } + /* We don't care about GL_SYNC_FLUSH_COMMANDS_BIT, because flush is * already called when creating a fence. */ diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 7ea3846fff1..715d69c0c68 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -896,7 +896,7 @@ st_CompressedTexImage(struct gl_context *ctx, GLuint dims, /** - * Called via ctx->Driver.GetTexImage() + * Called via ctx->Driver.GetTexSubImage() * * This uses a blit to copy the texture to a texture format which matches * the format and type combo and then a fast read-back is done using memcpy. @@ -910,16 +910,15 @@ st_CompressedTexImage(struct gl_context *ctx, GLuint dims, * we do here should be free in such cases. */ static void -st_GetTexImage(struct gl_context * ctx, - GLenum format, GLenum type, GLvoid * pixels, - struct gl_texture_image *texImage) +st_GetTexSubImage(struct gl_context * ctx, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLint depth, + GLenum format, GLenum type, GLvoid * pixels, + struct gl_texture_image *texImage) { struct st_context *st = st_context(ctx); struct pipe_context *pipe = st->pipe; struct pipe_screen *screen = pipe->screen; - GLuint width = texImage->Width; - GLuint height = texImage->Height; - GLuint depth = texImage->Depth; struct st_texture_image *stImage = st_texture_image(texImage); struct st_texture_object *stObj = st_texture_object(texImage->TexObject); struct pipe_resource *src = stObj->pt; @@ -1054,7 +1053,7 @@ st_GetTexImage(struct gl_context * ctx, } } - /* create the destination texture */ + /* create the destination texture of size (width X height X depth) */ memset(&dst_templ, 0, sizeof(dst_templ)); dst_templ.target = pipe_target; dst_templ.format = dst_format; @@ -1076,6 +1075,10 @@ st_GetTexImage(struct gl_context * ctx, height = 1; } + assert(texImage->Face == 0 || + texImage->TexObject->MinLayer == 0 || + zoffset == 0); + memset(&blit, 0, sizeof(blit)); blit.src.resource = src; blit.src.level = texImage->Level + texImage->TexObject->MinLevel; @@ -1083,9 +1086,11 @@ st_GetTexImage(struct gl_context * ctx, blit.dst.resource = dst; blit.dst.level = 0; blit.dst.format = dst->format; - blit.src.box.x = blit.dst.box.x = 0; - blit.src.box.y = blit.dst.box.y = 0; - blit.src.box.z = texImage->Face + texImage->TexObject->MinLayer; + blit.src.box.x = xoffset; + blit.dst.box.x = 0; + blit.src.box.y = yoffset; + blit.dst.box.y = 0; + blit.src.box.z = texImage->Face + texImage->TexObject->MinLayer + zoffset; blit.dst.box.z = 0; blit.src.box.width = blit.dst.box.width = width; blit.src.box.height = blit.dst.box.height = height; @@ -1206,7 +1211,9 @@ end: fallback: if (!done) { - _mesa_GetTexImage_sw(ctx, format, type, pixels, texImage); + _mesa_GetTexSubImage_sw(ctx, xoffset, yoffset, zoffset, + width, height, depth, + format, type, pixels, texImage); } } @@ -1876,11 +1883,11 @@ st_init_texture_functions(struct dd_function_table *functions) functions->CopyTexSubImage = st_CopyTexSubImage; functions->GenerateMipmap = st_generate_mipmap; - functions->GetTexImage = st_GetTexImage; + functions->GetTexSubImage = st_GetTexSubImage; /* compressed texture functions */ functions->CompressedTexImage = st_CompressedTexImage; - functions->GetCompressedTexImage = _mesa_GetCompressedTexImage_sw; + functions->GetCompressedTexSubImage = _mesa_GetCompressedTexSubImage_sw; functions->NewTextureObject = st_NewTextureObject; functions->NewTextureImage = st_NewTextureImage; diff --git a/src/mesa/state_tracker/st_cb_xformfb.c b/src/mesa/state_tracker/st_cb_xformfb.c index 07c118e227b..0c01cd5ab78 100644 --- a/src/mesa/state_tracker/st_cb_xformfb.c +++ b/src/mesa/state_tracker/st_cb_xformfb.c @@ -54,9 +54,9 @@ struct st_transform_feedback_object { struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS]; /* This encapsulates the count that can be used as a source for draw_vbo. - * It contains a stream output target from the last call of - * EndTransformFeedback. */ - struct pipe_stream_output_target *draw_count; + * It contains stream output targets from the last call of + * EndTransformFeedback for each stream. */ + struct pipe_stream_output_target *draw_count[MAX_VERTEX_STREAMS]; }; static inline struct st_transform_feedback_object * @@ -88,7 +88,8 @@ st_delete_transform_feedback(struct gl_context *ctx, st_transform_feedback_object(obj); unsigned i; - pipe_so_target_reference(&sobj->draw_count, NULL); + for (i = 0; i < ARRAY_SIZE(sobj->draw_count); i++) + pipe_so_target_reference(&sobj->draw_count[i], NULL); /* Unreference targets. */ for (i = 0; i < sobj->num_targets; i++) { @@ -123,9 +124,12 @@ st_begin_transform_feedback(struct gl_context *ctx, GLenum mode, struct st_buffer_object *bo = st_buffer_object(sobj->base.Buffers[i]); if (bo && bo->buffer) { + unsigned stream = + obj->shader_program->LinkedTransformFeedback.BufferStream[i]; + /* Check whether we need to recreate the target. */ if (!sobj->targets[i] || - sobj->targets[i] == sobj->draw_count || + sobj->targets[i] == sobj->draw_count[stream] || sobj->targets[i]->buffer != bo->buffer || sobj->targets[i]->buffer_offset != sobj->base.Offset[i] || sobj->targets[i]->buffer_size != sobj->base.Size[i]) { @@ -178,24 +182,6 @@ st_resume_transform_feedback(struct gl_context *ctx, } -static struct pipe_stream_output_target * -st_transform_feedback_get_draw_target(struct gl_transform_feedback_object *obj) -{ - struct st_transform_feedback_object *sobj = - st_transform_feedback_object(obj); - unsigned i; - - for (i = 0; i < ARRAY_SIZE(sobj->targets); i++) { - if (sobj->targets[i]) { - return sobj->targets[i]; - } - } - - assert(0); - return NULL; -} - - static void st_end_transform_feedback(struct gl_context *ctx, struct gl_transform_feedback_object *obj) @@ -203,22 +189,41 @@ st_end_transform_feedback(struct gl_context *ctx, struct st_context *st = st_context(ctx); struct st_transform_feedback_object *sobj = st_transform_feedback_object(obj); + unsigned i; cso_set_stream_outputs(st->cso_context, 0, NULL, NULL); - pipe_so_target_reference(&sobj->draw_count, - st_transform_feedback_get_draw_target(obj)); + /* The next call to glDrawTransformFeedbackStream should use the vertex + * count from the last call to glEndTransformFeedback. + * Therefore, save the targets for each stream. + * + * NULL means the vertex counter is 0 (initial state). + */ + for (i = 0; i < ARRAY_SIZE(sobj->draw_count); i++) + pipe_so_target_reference(&sobj->draw_count[i], NULL); + + for (i = 0; i < ARRAY_SIZE(sobj->targets); i++) { + unsigned stream = + obj->shader_program->LinkedTransformFeedback.BufferStream[i]; + + /* Is it not bound or already set for this stream? */ + if (!sobj->targets[i] || sobj->draw_count[stream]) + continue; + + pipe_so_target_reference(&sobj->draw_count[stream], sobj->targets[i]); + } } -void +bool st_transform_feedback_draw_init(struct gl_transform_feedback_object *obj, - struct pipe_draw_info *out) + unsigned stream, struct pipe_draw_info *out) { struct st_transform_feedback_object *sobj = st_transform_feedback_object(obj); - out->count_from_stream_output = sobj->draw_count; + out->count_from_stream_output = sobj->draw_count[stream]; + return out->count_from_stream_output != NULL; } diff --git a/src/mesa/state_tracker/st_cb_xformfb.h b/src/mesa/state_tracker/st_cb_xformfb.h index 998c418257b..444d11842c5 100644 --- a/src/mesa/state_tracker/st_cb_xformfb.h +++ b/src/mesa/state_tracker/st_cb_xformfb.h @@ -38,9 +38,9 @@ struct pipe_draw_info; extern void st_init_xformfb_functions(struct dd_function_table *functions); -extern void +extern bool st_transform_feedback_draw_init(struct gl_transform_feedback_object *obj, - struct pipe_draw_info *out); + unsigned stream, struct pipe_draw_info *out); #endif /* ST_CB_XFORMFB_H */ diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index ed9ed0f1b6c..72c23cad4bc 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -287,6 +287,11 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe, /* For vertex shaders, make sure not to emit saturate when SM 3.0 is not supported */ ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoSat = !st->has_shader_model3; + if (!ctx->Extensions.ARB_gpu_shader5) { + for (i = 0; i < MESA_SHADER_STAGES; i++) + ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectSampler = true; + } + _mesa_compute_version(ctx); if (ctx->Version == 0) { @@ -308,6 +313,8 @@ static void st_init_driver_flags(struct gl_driver_flags *f) f->NewArray = ST_NEW_VERTEX_ARRAYS; f->NewRasterizerDiscard = ST_NEW_RASTERIZER; f->NewUniformBuffer = ST_NEW_UNIFORM_BUFFER; + f->NewDefaultTessLevels = ST_NEW_TESS_STATE; + f->NewTextureBuffer = ST_NEW_SAMPLER_VIEWS; } struct st_context *st_create_context(gl_api api, struct pipe_context *pipe, @@ -369,6 +376,8 @@ void st_destroy_context( struct st_context *st ) st_reference_fragprog(st, &st->fp, NULL); st_reference_geomprog(st, &st->gp, NULL); st_reference_vertprog(st, &st->vp, NULL); + st_reference_tesscprog(st, &st->tcp, NULL); + st_reference_tesseprog(st, &st->tep, NULL); /* release framebuffer surfaces */ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index dac5a4b9006..81d5480431a 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -53,11 +53,14 @@ struct u_upload_mgr; #define ST_NEW_FRAGMENT_PROGRAM (1 << 1) #define ST_NEW_VERTEX_PROGRAM (1 << 2) #define ST_NEW_FRAMEBUFFER (1 << 3) -/* gap, re-use it */ +#define ST_NEW_TESS_STATE (1 << 4) #define ST_NEW_GEOMETRY_PROGRAM (1 << 5) #define ST_NEW_VERTEX_ARRAYS (1 << 6) #define ST_NEW_RASTERIZER (1 << 7) #define ST_NEW_UNIFORM_BUFFER (1 << 8) +#define ST_NEW_TESSCTRL_PROGRAM (1 << 9) +#define ST_NEW_TESSEVAL_PROGRAM (1 << 10) +#define ST_NEW_SAMPLER_VIEWS (1 << 11) struct st_state_flags { @@ -137,7 +140,6 @@ struct st_context struct st_state_flags dirty; - GLboolean missing_textures; GLboolean vertdata_edgeflags; GLboolean edgeflag_culls_prims; @@ -147,10 +149,14 @@ struct st_context struct st_vertex_program *vp; /**< Currently bound vertex program */ struct st_fragment_program *fp; /**< Currently bound fragment program */ struct st_geometry_program *gp; /**< Currently bound geometry program */ + struct st_tessctrl_program *tcp; /**< Currently bound tess control program */ + struct st_tesseval_program *tep; /**< Currently bound tess eval program */ struct st_vp_variant *vp_variant; struct st_fp_variant *fp_variant; struct st_gp_variant *gp_variant; + struct st_tcp_variant *tcp_variant; + struct st_tep_variant *tep_variant; struct gl_texture_object *default_texture; @@ -272,6 +278,29 @@ st_fb_orientation(const struct gl_framebuffer *fb) } +static inline unsigned +st_shader_stage_to_ptarget(gl_shader_stage stage) +{ + switch (stage) { + case MESA_SHADER_VERTEX: + return PIPE_SHADER_VERTEX; + case MESA_SHADER_FRAGMENT: + return PIPE_SHADER_FRAGMENT; + case MESA_SHADER_GEOMETRY: + return PIPE_SHADER_GEOMETRY; + case MESA_SHADER_TESS_CTRL: + return PIPE_SHADER_TESS_CTRL; + case MESA_SHADER_TESS_EVAL: + return PIPE_SHADER_TESS_EVAL; + case MESA_SHADER_COMPUTE: + return PIPE_SHADER_COMPUTE; + } + + assert(!"should not be reached"); + return PIPE_SHADER_VERTEX; +} + + /** clear-alloc a struct-sized object, with casting */ #define ST_CALLOC_STRUCT(T) (struct T *) calloc(1, sizeof(struct T)) diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 8b43582c14b..957fcfd410e 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -164,6 +164,7 @@ translate_prim(const struct gl_context *ctx, unsigned prim) STATIC_ASSERT(GL_POINTS == PIPE_PRIM_POINTS); STATIC_ASSERT(GL_QUADS == PIPE_PRIM_QUADS); STATIC_ASSERT(GL_TRIANGLE_STRIP_ADJACENCY == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY); + STATIC_ASSERT(GL_PATCHES == PIPE_PRIM_PATCHES); return prim; } @@ -183,6 +184,7 @@ st_draw_vbo(struct gl_context *ctx, GLuint min_index, GLuint max_index, struct gl_transform_feedback_object *tfb_vertcount, + unsigned stream, struct gl_buffer_object *indirect) { struct st_context *st = st_context(ctx); @@ -241,7 +243,8 @@ st_draw_vbo(struct gl_context *ctx, /* Transform feedback drawing is always non-indexed. */ /* Set info.count_from_stream_output. */ if (tfb_vertcount) { - st_transform_feedback_draw_init(tfb_vertcount, &info); + if (!st_transform_feedback_draw_init(tfb_vertcount, stream, &info)) + return; } } @@ -260,6 +263,7 @@ st_draw_vbo(struct gl_context *ctx, info.count = prims[i].count; info.start_instance = prims[i].base_instance; info.instance_count = prims[i].num_instances; + info.vertices_per_patch = ctx->TessCtrlProgram.patch_vertices; info.index_bias = prims[i].basevertex; if (!ib) { info.min_index = info.start; diff --git a/src/mesa/state_tracker/st_draw.h b/src/mesa/state_tracker/st_draw.h index 780d4bde713..a973c8a4a5d 100644 --- a/src/mesa/state_tracker/st_draw.h +++ b/src/mesa/state_tracker/st_draw.h @@ -56,6 +56,7 @@ st_draw_vbo(struct gl_context *ctx, GLuint min_index, GLuint max_index, struct gl_transform_feedback_object *tfb_vertcount, + unsigned stream, struct gl_buffer_object *indirect); extern void @@ -67,6 +68,7 @@ st_feedback_draw_vbo(struct gl_context *ctx, GLuint min_index, GLuint max_index, struct gl_transform_feedback_object *tfb_vertcount, + unsigned stream, struct gl_buffer_object *indirect); /** diff --git a/src/mesa/state_tracker/st_draw_feedback.c b/src/mesa/state_tracker/st_draw_feedback.c index 177f6b5aefa..88c10a8f150 100644 --- a/src/mesa/state_tracker/st_draw_feedback.c +++ b/src/mesa/state_tracker/st_draw_feedback.c @@ -117,6 +117,7 @@ st_feedback_draw_vbo(struct gl_context *ctx, GLuint min_index, GLuint max_index, struct gl_transform_feedback_object *tfb_vertcount, + unsigned stream, struct gl_buffer_object *indirect) { struct st_context *st = st_context(ctx); diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index b1057f3eadd..17f572f80fb 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -165,6 +165,14 @@ void st_init_limits(struct pipe_screen *screen, pc = &c->Program[MESA_SHADER_GEOMETRY]; options = &c->ShaderCompilerOptions[MESA_SHADER_GEOMETRY]; break; + case PIPE_SHADER_TESS_CTRL: + pc = &c->Program[MESA_SHADER_TESS_CTRL]; + options = &c->ShaderCompilerOptions[MESA_SHADER_TESS_CTRL]; + break; + case PIPE_SHADER_TESS_EVAL: + pc = &c->Program[MESA_SHADER_TESS_EVAL]; + options = &c->ShaderCompilerOptions[MESA_SHADER_TESS_EVAL]; + break; default: /* compute shader, etc. */ continue; @@ -245,8 +253,12 @@ void st_init_limits(struct pipe_screen *screen, options->LowerClipDistance = true; } + c->LowerTessLevel = true; + c->MaxCombinedTextureImageUnits = _min(c->Program[MESA_SHADER_VERTEX].MaxTextureImageUnits + + c->Program[MESA_SHADER_TESS_CTRL].MaxTextureImageUnits + + c->Program[MESA_SHADER_TESS_EVAL].MaxTextureImageUnits + c->Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits + c->Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits, MAX_COMBINED_TEXTURE_IMAGE_UNITS); @@ -266,6 +278,9 @@ void st_init_limits(struct pipe_screen *screen, c->MaxVarying = MIN2(c->MaxVarying, MAX_VARYING); c->MaxGeometryOutputVertices = screen->get_param(screen, PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES); c->MaxGeometryTotalOutputComponents = screen->get_param(screen, PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS); + c->MaxTessPatchComponents = + MAX2(screen->get_param(screen, PIPE_CAP_MAX_SHADER_PATCH_VARYINGS), + MAX_VARYING) * 4; c->MinProgramTexelOffset = screen->get_param(screen, PIPE_CAP_MIN_TEXEL_OFFSET); c->MaxProgramTexelOffset = screen->get_param(screen, PIPE_CAP_MAX_TEXEL_OFFSET); @@ -301,6 +316,8 @@ void st_init_limits(struct pipe_screen *screen, screen->get_param(screen, PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT); c->MaxCombinedUniformBlocks = c->MaxUniformBufferBindings = c->Program[MESA_SHADER_VERTEX].MaxUniformBlocks + + c->Program[MESA_SHADER_TESS_CTRL].MaxUniformBlocks + + c->Program[MESA_SHADER_TESS_EVAL].MaxUniformBlocks + c->Program[MESA_SHADER_GEOMETRY].MaxUniformBlocks + c->Program[MESA_SHADER_FRAGMENT].MaxUniformBlocks; assert(c->MaxCombinedUniformBlocks <= MAX_COMBINED_UNIFORM_BUFFERS); @@ -417,12 +434,14 @@ void st_init_extensions(struct pipe_screen *screen, static const struct st_extension_cap_mapping cap_mapping[] = { { o(ARB_base_instance), PIPE_CAP_START_INSTANCE }, - { o(ARB_buffer_storage), PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT }, + { o(ARB_buffer_storage), PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT }, + { o(ARB_color_buffer_float), PIPE_CAP_VERTEX_COLOR_UNCLAMPED }, { o(ARB_depth_clamp), PIPE_CAP_DEPTH_CLIP_DISABLE }, { o(ARB_depth_texture), PIPE_CAP_TEXTURE_SHADOW_MAP }, { o(ARB_draw_buffers_blend), PIPE_CAP_INDEP_BLEND_FUNC }, { o(ARB_draw_instanced), PIPE_CAP_TGSI_INSTANCEID }, { o(ARB_fragment_program_shadow), PIPE_CAP_TEXTURE_SHADOW_MAP }, + { o(ARB_framebuffer_object), PIPE_CAP_MIXED_FRAMEBUFFER_SIZES }, { o(ARB_instanced_arrays), PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR }, { o(ARB_occlusion_query), PIPE_CAP_OCCLUSION_QUERY }, { o(ARB_occlusion_query2), PIPE_CAP_OCCLUSION_QUERY }, @@ -432,6 +451,8 @@ void st_init_extensions(struct pipe_screen *screen, { o(ARB_shader_stencil_export), PIPE_CAP_SHADER_STENCIL_EXPORT }, { o(ARB_shader_texture_lod), PIPE_CAP_SM3 }, { o(ARB_shadow), PIPE_CAP_TEXTURE_SHADOW_MAP }, + { o(ARB_texture_buffer_object), PIPE_CAP_TEXTURE_BUFFER_OBJECTS }, + { o(ARB_texture_gather), PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS }, { o(ARB_texture_mirror_clamp_to_edge), PIPE_CAP_TEXTURE_MIRROR_CLAMP }, { o(ARB_texture_non_power_of_two), PIPE_CAP_NPOT_TEXTURES }, { o(ARB_timer_query), PIPE_CAP_QUERY_TIMESTAMP }, @@ -452,11 +473,14 @@ void st_init_extensions(struct pipe_screen *screen, { o(ATI_separate_stencil), PIPE_CAP_TWO_SIDED_STENCIL }, { o(ATI_texture_mirror_once), PIPE_CAP_TEXTURE_MIRROR_CLAMP }, { o(NV_conditional_render), PIPE_CAP_CONDITIONAL_RENDER }, + { o(NV_primitive_restart), PIPE_CAP_PRIMITIVE_RESTART }, { o(NV_texture_barrier), PIPE_CAP_TEXTURE_BARRIER }, /* GL_NV_point_sprite is not supported by gallium because we don't * support the GL_POINT_SPRITE_R_MODE_NV option. */ { o(OES_standard_derivatives), PIPE_CAP_SM3 }, + { o(OES_texture_float_linear), PIPE_CAP_TEXTURE_FLOAT_LINEAR }, + { o(OES_texture_half_float_linear), PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR }, { o(ARB_texture_cube_map_array), PIPE_CAP_CUBE_MAP_ARRAY }, { o(ARB_texture_multisample), PIPE_CAP_TEXTURE_MULTISAMPLE }, { o(ARB_texture_query_lod), PIPE_CAP_TEXTURE_QUERY_LOD }, @@ -467,6 +491,7 @@ void st_init_extensions(struct pipe_screen *screen, { o(ARB_texture_view), PIPE_CAP_SAMPLER_VIEW_TARGET }, { o(ARB_clip_control), PIPE_CAP_CLIP_HALFZ }, { o(EXT_polygon_offset_clamp), PIPE_CAP_POLYGON_OFFSET_CLAMP }, + { o(EXT_depth_bounds_test), PIPE_CAP_DEPTH_BOUNDS_TEST }, }; /* Required: render target and sampler support */ @@ -475,6 +500,12 @@ void st_init_extensions(struct pipe_screen *screen, { PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT } }, + { { o(OES_texture_float) }, + { PIPE_FORMAT_R32G32B32A32_FLOAT } }, + + { { o(OES_texture_half_float) }, + { PIPE_FORMAT_R16G16B16A16_FLOAT } }, + { { o(ARB_texture_rgb10_a2ui) }, { PIPE_FORMAT_R10G10B10A2_UINT, PIPE_FORMAT_B10G10R10A2_UINT }, @@ -556,7 +587,8 @@ void st_init_extensions(struct pipe_screen *screen, PIPE_FORMAT_R8G8B8A8_UNORM }, GL_TRUE }, /* at least one format must be supported */ - { { o(ARB_stencil_texturing) }, + { { o(ARB_stencil_texturing), + o(ARB_texture_stencil8) }, { PIPE_FORMAT_X24S8_UINT, PIPE_FORMAT_S8X24_UINT }, GL_TRUE }, /* at least one format must be supported */ @@ -650,9 +682,6 @@ void st_init_extensions(struct pipe_screen *screen, ARRAY_SIZE(vertex_mapping), PIPE_BUFFER, PIPE_BIND_VERTEX_BUFFER); - if (extensions->ARB_stencil_texturing) - extensions->ARB_texture_stencil8 = GL_TRUE; - /* Figure out GLSL support. */ glsl_feature_level = screen->get_param(screen, PIPE_CAP_GLSL_FEATURE_LEVEL); @@ -693,6 +722,7 @@ void st_init_extensions(struct pipe_screen *screen, extensions->OES_depth_texture_cube_map = GL_TRUE; extensions->ARB_shading_language_420pack = GL_TRUE; extensions->ARB_texture_query_levels = GL_TRUE; + extensions->ARB_shader_subroutine = GL_TRUE; if (!options->disable_shader_bit_encoding) { extensions->ARB_shader_bit_encoding = GL_TRUE; @@ -723,20 +753,9 @@ void st_init_extensions(struct pipe_screen *screen, extensions->ANGLE_texture_compression_dxt = GL_FALSE; } - if (screen->get_shader_param(screen, PIPE_SHADER_GEOMETRY, + if (screen->get_shader_param(screen, PIPE_SHADER_TESS_CTRL, PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0) { -#if 0 /* XXX re-enable when GLSL compiler again supports geometry shaders */ - extensions->ARB_geometry_shader4 = GL_TRUE; -#endif - } - - if (screen->get_param(screen, PIPE_CAP_PRIMITIVE_RESTART)) { - extensions->NV_primitive_restart = GL_TRUE; - } - - /* ARB_color_buffer_float. */ - if (screen->get_param(screen, PIPE_CAP_VERTEX_COLOR_UNCLAMPED)) { - extensions->ARB_color_buffer_float = GL_TRUE; + extensions->ARB_tessellation_shader = GL_TRUE; } if (screen->fence_finish) { @@ -823,9 +842,7 @@ void st_init_extensions(struct pipe_screen *screen, consts->MinMapBufferAlignment = screen->get_param(screen, PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT); - if (screen->get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OBJECTS)) { - extensions->ARB_texture_buffer_object = GL_TRUE; - + if (extensions->ARB_texture_buffer_object) { consts->MaxTextureBufferSize = _min(screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE), (1u << 31) - 1); @@ -840,10 +857,6 @@ void st_init_extensions(struct pipe_screen *screen, PIPE_BIND_SAMPLER_VIEW); } - if (screen->get_param(screen, PIPE_CAP_MIXED_FRAMEBUFFER_SIZES)) { - extensions->ARB_framebuffer_object = GL_TRUE; - } - /* Unpacking a varying in the fragment shader costs 1 texture indirection. * If the number of available texture indirections is very limited, then we * prefer to disable varying packing rather than run the risk of varying @@ -868,9 +881,6 @@ void st_init_extensions(struct pipe_screen *screen, extensions->AMD_vertex_shader_viewport_index = GL_TRUE; } - if (consts->MaxProgramTextureGatherComponents > 0) - extensions->ARB_texture_gather = GL_TRUE; - /* GL_ARB_ES3_compatibility. * * Assume that ES3 is supported if GLSL 3.30 is supported. diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 25e30c7deb2..6f007273c73 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -158,9 +158,12 @@ public: { this->file = file; this->index = index; + this->index2D = 0; this->writemask = writemask; this->cond_mask = COND_TR; this->reladdr = NULL; + this->reladdr2 = NULL; + this->has_index2 = false; this->type = type; this->array_id = 0; } @@ -169,9 +172,12 @@ public: { this->file = file; this->index = 0; + this->index2D = 0; this->writemask = writemask; this->cond_mask = COND_TR; this->reladdr = NULL; + this->reladdr2 = NULL; + this->has_index2 = false; this->type = type; this->array_id = 0; } @@ -181,9 +187,12 @@ public: this->type = GLSL_TYPE_ERROR; this->file = PROGRAM_UNDEFINED; this->index = 0; + this->index2D = 0; this->writemask = 0; this->cond_mask = COND_TR; this->reladdr = NULL; + this->reladdr2 = NULL; + this->has_index2 = false; this->array_id = 0; } @@ -191,11 +200,14 @@ public: gl_register_file file; /**< PROGRAM_* from Mesa */ int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ + int index2D; int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ GLuint cond_mask:4; int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ /** Register index should be offset by the integer in this reg. */ st_src_reg *reladdr; + st_src_reg *reladdr2; + bool has_index2; unsigned array_id; }; @@ -207,9 +219,9 @@ st_src_reg::st_src_reg(st_dst_reg reg) this->swizzle = SWIZZLE_XYZW; this->negate = 0; this->reladdr = reg.reladdr; - this->index2D = 0; - this->reladdr2 = NULL; - this->has_index2 = false; + this->index2D = reg.index2D; + this->reladdr2 = reg.reladdr2; + this->has_index2 = reg.has_index2; this->double_reg2 = false; this->array_id = reg.array_id; } @@ -222,6 +234,9 @@ st_dst_reg::st_dst_reg(st_src_reg reg) this->writemask = WRITEMASK_XYZW; this->cond_mask = COND_TR; this->reladdr = reg.reladdr; + this->index2D = reg.index2D; + this->reladdr2 = reg.reladdr2; + this->has_index2 = reg.has_index2; this->array_id = reg.array_id; } @@ -551,8 +566,8 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, * reg directly for one of the regs, and preload the other reladdr * sources into temps. */ - num_reladdr += dst.reladdr != NULL; - num_reladdr += dst1.reladdr != NULL; + num_reladdr += dst.reladdr != NULL || dst.reladdr2; + num_reladdr += dst1.reladdr != NULL || dst1.reladdr2; num_reladdr += src0.reladdr != NULL || src0.reladdr2 != NULL; num_reladdr += src1.reladdr != NULL || src1.reladdr2 != NULL; num_reladdr += src2.reladdr != NULL || src2.reladdr2 != NULL; @@ -563,8 +578,11 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, reladdr_to_temp(ir, &src1, &num_reladdr); reladdr_to_temp(ir, &src0, &num_reladdr); - if (dst.reladdr) { - emit_arl(ir, address_reg, *dst.reladdr); + if (dst.reladdr || dst.reladdr2) { + if (dst.reladdr) + emit_arl(ir, address_reg, *dst.reladdr); + if (dst.reladdr2) + emit_arl(ir, address_reg2, *dst.reladdr2); num_reladdr--; } if (dst1.reladdr) { @@ -590,7 +608,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, inst->function = NULL; /* Update indirect addressing status used by TGSI */ - if (dst.reladdr) { + if (dst.reladdr || dst.reladdr2) { switch(dst.file) { case PROGRAM_STATE_VAR: case PROGRAM_CONSTANT: @@ -797,7 +815,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, case TGSI_OPCODE_##c: \ if (type == GLSL_TYPE_DOUBLE) \ op = TGSI_OPCODE_##d; \ - else if (type == GLSL_TYPE_INT) \ + else if (type == GLSL_TYPE_INT || type == GLSL_TYPE_SUBROUTINE) \ op = TGSI_OPCODE_##i; \ else if (type == GLSL_TYPE_UINT) \ op = TGSI_OPCODE_##u; \ @@ -1090,6 +1108,7 @@ type_size(const struct glsl_type *type) return size; case GLSL_TYPE_SAMPLER: case GLSL_TYPE_IMAGE: + case GLSL_TYPE_SUBROUTINE: /* Samplers take up one slot in UNIFORMS[], but they're baked in * at link time. */ @@ -1470,6 +1489,9 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) result_src = op[0]; } break; + case ir_unop_subroutine_to_int: + emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); + break; case ir_unop_abs: emit_asm(ir, TGSI_OPCODE_ABS, result_dst, op[0]); break; @@ -2243,7 +2265,10 @@ is_inout_array(unsigned stage, ir_variable *var, bool *is_2d) *is_2d = false; - if (stage == MESA_SHADER_GEOMETRY && var->data.mode == ir_var_shader_in) { + if (((stage == MESA_SHADER_GEOMETRY && var->data.mode == ir_var_shader_in) || + (stage == MESA_SHADER_TESS_EVAL && var->data.mode == ir_var_shader_in) || + stage == MESA_SHADER_TESS_CTRL) && + !var->data.patch) { if (!var->type->is_array()) return false; /* a system value probably */ @@ -2355,7 +2380,8 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) static void shrink_array_declarations(struct array_decl *arrays, unsigned count, - GLbitfield64 usage_mask) + GLbitfield64 usage_mask, + GLbitfield patch_usage_mask) { unsigned i, j; @@ -2367,8 +2393,15 @@ shrink_array_declarations(struct array_decl *arrays, unsigned count, /* Shrink the beginning. */ for (j = 0; j < decl->array_size; j++) { - if (usage_mask & BITFIELD64_BIT(decl->mesa_index+j)) - break; + if (decl->mesa_index >= VARYING_SLOT_PATCH0) { + if (patch_usage_mask & + BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j)) + break; + } + else { + if (usage_mask & BITFIELD64_BIT(decl->mesa_index+j)) + break; + } decl->mesa_index++; decl->array_size--; @@ -2377,8 +2410,15 @@ shrink_array_declarations(struct array_decl *arrays, unsigned count, /* Shrink the end. */ for (j = decl->array_size-1; j >= 0; j--) { - if (usage_mask & BITFIELD64_BIT(decl->mesa_index+j)) - break; + if (decl->mesa_index >= VARYING_SLOT_PATCH0) { + if (patch_usage_mask & + BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j)) + break; + } + else { + if (usage_mask & BITFIELD64_BIT(decl->mesa_index+j)) + break; + } decl->array_size--; } @@ -2391,22 +2431,34 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) ir_constant *index; st_src_reg src; int element_size = type_size(ir->type); - bool is_2D_input; + bool is_2D = false; index = ir->array_index->constant_expression_value(); ir->array->accept(this); src = this->result; - is_2D_input = this->prog->Target == GL_GEOMETRY_PROGRAM_NV && - src.file == PROGRAM_INPUT && - ir->array->ir_type != ir_type_dereference_array; + if (ir->array->ir_type != ir_type_dereference_array) { + switch (this->prog->Target) { + case GL_TESS_CONTROL_PROGRAM_NV: + is_2D = (src.file == PROGRAM_INPUT || src.file == PROGRAM_OUTPUT) && + !ir->variable_referenced()->data.patch; + break; + case GL_TESS_EVALUATION_PROGRAM_NV: + is_2D = src.file == PROGRAM_INPUT && + !ir->variable_referenced()->data.patch; + break; + case GL_GEOMETRY_PROGRAM_NV: + is_2D = src.file == PROGRAM_INPUT; + break; + } + } - if (is_2D_input) + if (is_2D) element_size = 1; if (index) { - if (is_2D_input) { + if (is_2D) { src.index2D = index->value.i[0]; src.has_index2 = true; } else @@ -2433,7 +2485,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) /* If there was already a relative address register involved, add the * new and the old together to get the new offset. */ - if (!is_2D_input && src.reladdr != NULL) { + if (!is_2D && src.reladdr != NULL) { st_src_reg accum_reg = get_temp(native_integers ? glsl_type::int_type : glsl_type::float_type); @@ -2443,7 +2495,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) index_reg = accum_reg; } - if (is_2D_input) { + if (is_2D) { src.reladdr2 = ralloc(mem_ctx, st_src_reg); memcpy(src.reladdr2, &index_reg, sizeof(index_reg)); src.index2D = 0; @@ -3430,7 +3482,10 @@ glsl_to_tgsi_visitor::visit(ir_end_primitive *ir) void glsl_to_tgsi_visitor::visit(ir_barrier *ir) { - unreachable("Not implemented!"); + assert(this->prog->Target == GL_TESS_CONTROL_PROGRAM_NV || + this->prog->Target == GL_COMPUTE_PROGRAM_NV); + + emit_asm(ir, TGSI_OPCODE_BARRIER); } glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() @@ -3553,7 +3608,7 @@ glsl_to_tgsi_visitor::simplify_cmp(void) { int tempWritesSize = 0; unsigned *tempWrites = NULL; - unsigned outputWrites[MAX_PROGRAM_OUTPUTS]; + unsigned outputWrites[VARYING_SLOT_TESS_MAX]; memset(outputWrites, 0, sizeof(outputWrites)); @@ -3561,8 +3616,8 @@ glsl_to_tgsi_visitor::simplify_cmp(void) unsigned prevWriteMask = 0; /* Give up if we encounter relative addressing or flow control. */ - if (inst->dst[0].reladdr || - inst->dst[1].reladdr || + if (inst->dst[0].reladdr || inst->dst[0].reladdr2 || + inst->dst[1].reladdr || inst->dst[1].reladdr2 || tgsi_get_opcode_info(inst->op)->is_branch || inst->op == TGSI_OPCODE_BGNSUB || inst->op == TGSI_OPCODE_CONT || @@ -3573,7 +3628,7 @@ glsl_to_tgsi_visitor::simplify_cmp(void) } if (inst->dst[0].file == PROGRAM_OUTPUT) { - assert(inst->dst[0].index < MAX_PROGRAM_OUTPUTS); + assert(inst->dst[0].index < (signed)ARRAY_SIZE(outputWrites)); prevWriteMask = outputWrites[inst->dst[0].index]; outputWrites[inst->dst[0].index] |= inst->dst[0].writemask; } else if (inst->dst[0].file == PROGRAM_TEMPORARY) { @@ -3940,6 +3995,7 @@ glsl_to_tgsi_visitor::copy_propagate(void) !(inst->dst[0].file == inst->src[0].file && inst->dst[0].index == inst->src[0].index) && !inst->dst[0].reladdr && + !inst->dst[0].reladdr2 && !inst->saturate && inst->src[0].file != PROGRAM_ARRAY && !inst->src[0].reladdr && @@ -4527,6 +4583,14 @@ const unsigned _mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { TGSI_SEMANTIC_SAMPLEID, TGSI_SEMANTIC_SAMPLEPOS, TGSI_SEMANTIC_SAMPLEMASK, + + /* Tessellation shaders + */ + TGSI_SEMANTIC_TESSCOORD, + TGSI_SEMANTIC_VERTICESIN, + TGSI_SEMANTIC_PRIMID, + TGSI_SEMANTIC_TESSOUTER, + TGSI_SEMANTIC_TESSINNER, }; /** @@ -4651,6 +4715,9 @@ dst_register(struct st_translate *t, gl_register_file file, unsigned index, if (!array_id) { if (t->procType == TGSI_PROCESSOR_FRAGMENT) assert(index < FRAG_RESULT_MAX); + else if (t->procType == TGSI_PROCESSOR_TESS_CTRL || + t->procType == TGSI_PROCESSOR_TESS_EVAL) + assert(index < VARYING_SLOT_TESS_MAX); else assert(index < VARYING_SLOT_MAX); @@ -4790,6 +4857,14 @@ translate_dst(struct st_translate *t, dst = ureg_dst_indirect(dst, ureg_src(t->address[0])); } + if (dst_reg->has_index2) { + if (dst_reg->reladdr2) + dst = ureg_dst_dimension_indirect(dst, ureg_src(t->address[1]), + dst_reg->index2D); + else + dst = ureg_dst_dimension(dst, dst_reg->index2D); + } + return dst; } @@ -5271,6 +5346,8 @@ st_translate_program( TGSI_SEMANTIC_VERTEXID_NOBASE); assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_BASE_VERTEX] == TGSI_SEMANTIC_BASEVERTEX); + assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_TESS_COORD] == + TGSI_SEMANTIC_TESSCOORD); t = CALLOC_STRUCT(st_translate); if (!t) { @@ -5313,6 +5390,8 @@ st_translate_program( } break; case TGSI_PROCESSOR_GEOMETRY: + case TGSI_PROCESSOR_TESS_EVAL: + case TGSI_PROCESSOR_TESS_CTRL: for (i = 0; i < numInputs; i++) { unsigned array_id = 0; unsigned array_size; @@ -5347,6 +5426,8 @@ st_translate_program( case TGSI_PROCESSOR_FRAGMENT: break; case TGSI_PROCESSOR_GEOMETRY: + case TGSI_PROCESSOR_TESS_EVAL: + case TGSI_PROCESSOR_TESS_CTRL: case TGSI_PROCESSOR_VERTEX: for (i = 0; i < numOutputs; i++) { unsigned array_id = 0; @@ -5461,6 +5542,7 @@ st_translate_program( struct pipe_screen *pscreen = st->pipe->screen; assert(procType == TGSI_PROCESSOR_VERTEX); assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS)); + (void) pscreen; if (!ctx->Const.NativeIntegers) { struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg); ureg_U2F( t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), t->systemValues[i]); @@ -5611,25 +5693,6 @@ out: /* ----------------------------- End TGSI code ------------------------------ */ -static unsigned -shader_stage_to_ptarget(gl_shader_stage stage) -{ - switch (stage) { - case MESA_SHADER_VERTEX: - return PIPE_SHADER_VERTEX; - case MESA_SHADER_FRAGMENT: - return PIPE_SHADER_FRAGMENT; - case MESA_SHADER_GEOMETRY: - return PIPE_SHADER_GEOMETRY; - case MESA_SHADER_COMPUTE: - return PIPE_SHADER_COMPUTE; - } - - assert(!"should not be reached"); - return PIPE_SHADER_VERTEX; -} - - /** * Convert a shader's GLSL IR into a Mesa gl_program, although without * generating Mesa IR. @@ -5646,7 +5709,7 @@ get_mesa_program(struct gl_context *ctx, struct gl_shader_compiler_options *options = &ctx->Const.ShaderCompilerOptions[_mesa_shader_enum_to_shader_stage(shader->Type)]; struct pipe_screen *pscreen = ctx->st->pipe->screen; - unsigned ptarget = shader_stage_to_ptarget(shader->Stage); + unsigned ptarget = st_shader_stage_to_ptarget(shader->Stage); validate_ir_tree(shader->ir); @@ -5673,7 +5736,7 @@ get_mesa_program(struct gl_context *ctx, prog->Parameters); /* Remove reads from output registers. */ - lower_output_reads(shader->ir); + lower_output_reads(shader->Stage, shader->ir); /* Emit intermediate IR for main(). */ visit_exec_list(shader->ir, v); @@ -5721,7 +5784,11 @@ get_mesa_program(struct gl_context *ctx, /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ v->simplify_cmp(); - v->copy_propagate(); + + if (shader->Type != GL_TESS_CONTROL_SHADER && + shader->Type != GL_TESS_EVALUATION_SHADER) + v->copy_propagate(); + while (v->eliminate_dead_code()); v->merge_two_dsts(); @@ -5745,9 +5812,9 @@ get_mesa_program(struct gl_context *ctx, do_set_program_inouts(shader->ir, prog, shader->Stage); shrink_array_declarations(v->input_arrays, v->num_input_arrays, - prog->InputsRead); + prog->InputsRead, prog->PatchInputsRead); shrink_array_declarations(v->output_arrays, v->num_output_arrays, - prog->OutputsWritten); + prog->OutputsWritten, prog->PatchOutputsWritten); count_resources(v, prog); /* This must be done before the uniform storage is associated. */ @@ -5776,6 +5843,8 @@ get_mesa_program(struct gl_context *ctx, struct st_vertex_program *stvp; struct st_fragment_program *stfp; struct st_geometry_program *stgp; + struct st_tessctrl_program *sttcp; + struct st_tesseval_program *sttep; switch (shader->Type) { case GL_VERTEX_SHADER: @@ -5790,6 +5859,14 @@ get_mesa_program(struct gl_context *ctx, stgp = (struct st_geometry_program *)prog; stgp->glsl_to_tgsi = v; break; + case GL_TESS_CONTROL_SHADER: + sttcp = (struct st_tessctrl_program *)prog; + sttcp->glsl_to_tgsi = v; + break; + case GL_TESS_EVALUATION_SHADER: + sttep = (struct st_tesseval_program *)prog; + sttep->glsl_to_tgsi = v; + break; default: assert(!"should not be reached"); return NULL; @@ -5800,6 +5877,71 @@ get_mesa_program(struct gl_context *ctx, extern "C" { +static void +st_dump_program_for_shader_db(struct gl_context *ctx, + struct gl_shader_program *prog) +{ + /* Dump only successfully compiled and linked shaders to the specified + * file. This is for shader-db. + * + * These options allow some pre-processing of shaders while dumping, + * because some apps have ill-formed shaders. + */ + const char *dump_filename = os_get_option("ST_DUMP_SHADERS"); + const char *insert_directives = os_get_option("ST_DUMP_INSERT"); + + if (dump_filename && prog->Name != 0) { + FILE *f = fopen(dump_filename, "a"); + + if (f) { + for (unsigned i = 0; i < prog->NumShaders; i++) { + const struct gl_shader *sh = prog->Shaders[i]; + const char *source; + bool skip_version = false; + + if (!sh) + continue; + + source = sh->Source; + + /* This string mustn't be changed. shader-db uses it to find + * where the shader begins. + */ + fprintf(f, "GLSL %s shader %d source for linked program %d:\n", + _mesa_shader_stage_to_string(sh->Stage), + i, prog->Name); + + /* Dump the forced version if set. */ + if (ctx->Const.ForceGLSLVersion) { + fprintf(f, "#version %i\n", ctx->Const.ForceGLSLVersion); + skip_version = true; + } + + /* Insert directives (optional). */ + if (insert_directives) { + if (!ctx->Const.ForceGLSLVersion && prog->Version) + fprintf(f, "#version %i\n", prog->Version); + fprintf(f, "%s\n", insert_directives); + skip_version = true; + } + + if (skip_version && strncmp(source, "#version ", 9) == 0) { + const char *next_line = strstr(source, "\n"); + + if (next_line) + source = next_line + 1; + else + continue; + } + + fprintf(f, "%s", source); + fprintf(f, "\n"); + } + fclose(f); + } + } +} + /** * Link a shader. * Called via ctx->Driver.LinkShader() @@ -5821,7 +5963,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) gl_shader_stage stage = _mesa_shader_enum_to_shader_stage(prog->_LinkedShaders[i]->Type); const struct gl_shader_compiler_options *options = &ctx->Const.ShaderCompilerOptions[stage]; - unsigned ptarget = shader_stage_to_ptarget(stage); + unsigned ptarget = st_shader_stage_to_ptarget(stage); bool have_dround = pscreen->get_shader_param(pscreen, ptarget, PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED); bool have_dfrexp = pscreen->get_shader_param(pscreen, ptarget, @@ -5832,7 +5974,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) */ if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) { - lower_variable_index_to_cond_assign(ir, + lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir, options->EmitNoIndirectInput, options->EmitNoIndirectOutput, options->EmitNoIndirectTemp, @@ -5920,6 +6062,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) _mesa_reference_program(ctx, &linked_prog, NULL); } + st_dump_program_for_shader_db(ctx, prog); return GL_TRUE; } diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c index a2dee6298fa..2e2c8ffaed9 100644 --- a/src/mesa/state_tracker/st_manager.c +++ b/src/mesa/state_tracker/st_manager.c @@ -368,6 +368,7 @@ st_visual_to_context_mode(const struct st_visual *visual, mode->rgbBits = mode->redBits + mode->greenBits + mode->blueBits + mode->alphaBits; + mode->sRGBCapable = util_format_is_srgb(visual->color_format); } if (visual->depth_stencil_format != PIPE_FORMAT_NONE) { diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index fa792bc349b..e62dd7aab80 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -163,6 +163,68 @@ st_release_gp_variants(struct st_context *st, struct st_geometry_program *stgp) } +/** + * Delete a tessellation control program variant. Note the caller must unlink + * the variant from the linked list. + */ +static void +delete_tcp_variant(struct st_context *st, struct st_tcp_variant *tcpv) +{ + if (tcpv->driver_shader) + cso_delete_tessctrl_shader(st->cso_context, tcpv->driver_shader); + + free(tcpv); +} + + +/** + * Free all variants of a tessellation control program. + */ +void +st_release_tcp_variants(struct st_context *st, struct st_tessctrl_program *sttcp) +{ + struct st_tcp_variant *tcpv; + + for (tcpv = sttcp->variants; tcpv; ) { + struct st_tcp_variant *next = tcpv->next; + delete_tcp_variant(st, tcpv); + tcpv = next; + } + + sttcp->variants = NULL; +} + + +/** + * Delete a tessellation evaluation program variant. Note the caller must + * unlink the variant from the linked list. + */ +static void +delete_tep_variant(struct st_context *st, struct st_tep_variant *tepv) +{ + if (tepv->driver_shader) + cso_delete_tesseval_shader(st->cso_context, tepv->driver_shader); + + free(tepv); +} + + +/** + * Free all variants of a tessellation evaluation program. + */ +void +st_release_tep_variants(struct st_context *st, struct st_tesseval_program *sttep) +{ + struct st_tep_variant *tepv; + + for (tepv = sttep->variants; tepv; ) { + struct st_tep_variant *next = tepv->next; + delete_tep_variant(st, tepv); + tepv = next; + } + + sttep->variants = NULL; +} /** @@ -870,61 +932,52 @@ st_get_fp_variant(struct st_context *st, /** - * Translate a geometry program to create a new variant. + * Translate a program. This is common code for geometry and tessellation + * shaders. */ -static struct st_gp_variant * -st_translate_geometry_program(struct st_context *st, - struct st_geometry_program *stgp, - const struct st_gp_variant_key *key) +static void +st_translate_program_common(struct st_context *st, + struct gl_program *prog, + struct glsl_to_tgsi_visitor *glsl_to_tgsi, + struct ureg_program *ureg, + unsigned tgsi_processor, + struct pipe_shader_state *out_state) { - GLuint inputSlotToAttr[VARYING_SLOT_MAX]; - GLuint inputMapping[VARYING_SLOT_MAX]; - GLuint outputSlotToAttr[VARYING_SLOT_MAX]; - GLuint outputMapping[VARYING_SLOT_MAX]; - struct pipe_context *pipe = st->pipe; + GLuint inputSlotToAttr[VARYING_SLOT_TESS_MAX]; + GLuint inputMapping[VARYING_SLOT_TESS_MAX]; + GLuint outputSlotToAttr[VARYING_SLOT_TESS_MAX]; + GLuint outputMapping[VARYING_SLOT_TESS_MAX]; GLuint attr; - uint gs_num_inputs = 0; - ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; + uint num_inputs = 0; - ubyte gs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; - ubyte gs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; - uint gs_num_outputs = 0; + ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; + ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; + uint num_outputs = 0; GLint i; - struct ureg_program *ureg; - struct pipe_shader_state state = {0}; - struct st_gp_variant *gpv; - - gpv = CALLOC_STRUCT(st_gp_variant); - if (!gpv) - return NULL; - - ureg = ureg_create_with_screen(TGSI_PROCESSOR_GEOMETRY, st->pipe->screen); - if (ureg == NULL) { - free(gpv); - return NULL; - } memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr)); memset(inputMapping, 0, sizeof(inputMapping)); memset(outputSlotToAttr, 0, sizeof(outputSlotToAttr)); memset(outputMapping, 0, sizeof(outputMapping)); + memset(out_state, 0, sizeof(*out_state)); /* * Convert Mesa program inputs to TGSI input register semantics. */ for (attr = 0; attr < VARYING_SLOT_MAX; attr++) { - if ((stgp->Base.Base.InputsRead & BITFIELD64_BIT(attr)) != 0) { - const GLuint slot = gs_num_inputs++; + if ((prog->InputsRead & BITFIELD64_BIT(attr)) != 0) { + const GLuint slot = num_inputs++; inputMapping[attr] = slot; inputSlotToAttr[slot] = attr; switch (attr) { case VARYING_SLOT_PRIMITIVE_ID: + assert(tgsi_processor == TGSI_PROCESSOR_GEOMETRY); input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID; input_semantic_index[slot] = 0; break; @@ -976,19 +1029,33 @@ st_translate_geometry_program(struct st_context *st, /* fall through */ case VARYING_SLOT_VAR0: default: - assert(attr >= VARYING_SLOT_VAR0 && attr < VARYING_SLOT_MAX); + assert(attr >= VARYING_SLOT_VAR0 || + (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7)); input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; input_semantic_index[slot] = st_get_generic_varying_index(st, attr); - break; + break; } } } + /* Also add patch inputs. */ + for (attr = 0; attr < 32; attr++) { + if (prog->PatchInputsRead & (1 << attr)) { + GLuint slot = num_inputs++; + GLuint patch_attr = VARYING_SLOT_PATCH0 + attr; + + inputMapping[patch_attr] = slot; + inputSlotToAttr[slot] = patch_attr; + input_semantic_name[slot] = TGSI_SEMANTIC_PATCH; + input_semantic_index[slot] = attr; + } + } + /* initialize output semantics to defaults */ for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { - gs_output_semantic_name[i] = TGSI_SEMANTIC_GENERIC; - gs_output_semantic_index[i] = 0; + output_semantic_name[i] = TGSI_SEMANTIC_GENERIC; + output_semantic_index[i] = 0; } /* @@ -996,8 +1063,8 @@ st_translate_geometry_program(struct st_context *st, * mapping and the semantic information for each output. */ for (attr = 0; attr < VARYING_SLOT_MAX; attr++) { - if (stgp->Base.Base.OutputsWritten & BITFIELD64_BIT(attr)) { - GLuint slot = gs_num_outputs++; + if (prog->OutputsWritten & BITFIELD64_BIT(attr)) { + GLuint slot = num_outputs++; outputMapping[attr] = slot; outputSlotToAttr[slot] = attr; @@ -1005,56 +1072,64 @@ st_translate_geometry_program(struct st_context *st, switch (attr) { case VARYING_SLOT_POS: assert(slot == 0); - gs_output_semantic_name[slot] = TGSI_SEMANTIC_POSITION; - gs_output_semantic_index[slot] = 0; + output_semantic_name[slot] = TGSI_SEMANTIC_POSITION; + output_semantic_index[slot] = 0; break; case VARYING_SLOT_COL0: - gs_output_semantic_name[slot] = TGSI_SEMANTIC_COLOR; - gs_output_semantic_index[slot] = 0; + output_semantic_name[slot] = TGSI_SEMANTIC_COLOR; + output_semantic_index[slot] = 0; break; case VARYING_SLOT_COL1: - gs_output_semantic_name[slot] = TGSI_SEMANTIC_COLOR; - gs_output_semantic_index[slot] = 1; + output_semantic_name[slot] = TGSI_SEMANTIC_COLOR; + output_semantic_index[slot] = 1; break; case VARYING_SLOT_BFC0: - gs_output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR; - gs_output_semantic_index[slot] = 0; + output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR; + output_semantic_index[slot] = 0; break; case VARYING_SLOT_BFC1: - gs_output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR; - gs_output_semantic_index[slot] = 1; + output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR; + output_semantic_index[slot] = 1; break; case VARYING_SLOT_FOGC: - gs_output_semantic_name[slot] = TGSI_SEMANTIC_FOG; - gs_output_semantic_index[slot] = 0; + output_semantic_name[slot] = TGSI_SEMANTIC_FOG; + output_semantic_index[slot] = 0; break; case VARYING_SLOT_PSIZ: - gs_output_semantic_name[slot] = TGSI_SEMANTIC_PSIZE; - gs_output_semantic_index[slot] = 0; + output_semantic_name[slot] = TGSI_SEMANTIC_PSIZE; + output_semantic_index[slot] = 0; break; case VARYING_SLOT_CLIP_VERTEX: - gs_output_semantic_name[slot] = TGSI_SEMANTIC_CLIPVERTEX; - gs_output_semantic_index[slot] = 0; + output_semantic_name[slot] = TGSI_SEMANTIC_CLIPVERTEX; + output_semantic_index[slot] = 0; break; case VARYING_SLOT_CLIP_DIST0: - gs_output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST; - gs_output_semantic_index[slot] = 0; + output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST; + output_semantic_index[slot] = 0; break; case VARYING_SLOT_CLIP_DIST1: - gs_output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST; - gs_output_semantic_index[slot] = 1; + output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST; + output_semantic_index[slot] = 1; break; case VARYING_SLOT_LAYER: - gs_output_semantic_name[slot] = TGSI_SEMANTIC_LAYER; - gs_output_semantic_index[slot] = 0; + output_semantic_name[slot] = TGSI_SEMANTIC_LAYER; + output_semantic_index[slot] = 0; break; case VARYING_SLOT_PRIMITIVE_ID: - gs_output_semantic_name[slot] = TGSI_SEMANTIC_PRIMID; - gs_output_semantic_index[slot] = 0; + output_semantic_name[slot] = TGSI_SEMANTIC_PRIMID; + output_semantic_index[slot] = 0; break; case VARYING_SLOT_VIEWPORT: - gs_output_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX; - gs_output_semantic_index[slot] = 0; + output_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX; + output_semantic_index[slot] = 0; + break; + case VARYING_SLOT_TESS_LEVEL_OUTER: + output_semantic_name[slot] = TGSI_SEMANTIC_TESSOUTER; + output_semantic_index[slot] = 0; + break; + case VARYING_SLOT_TESS_LEVEL_INNER: + output_semantic_name[slot] = TGSI_SEMANTIC_TESSINNER; + output_semantic_index[slot] = 0; break; case VARYING_SLOT_TEX0: case VARYING_SLOT_TEX1: @@ -1065,36 +1140,44 @@ st_translate_geometry_program(struct st_context *st, case VARYING_SLOT_TEX6: case VARYING_SLOT_TEX7: if (st->needs_texcoord_semantic) { - gs_output_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD; - gs_output_semantic_index[slot] = attr - VARYING_SLOT_TEX0; + output_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD; + output_semantic_index[slot] = attr - VARYING_SLOT_TEX0; break; } /* fall through */ case VARYING_SLOT_VAR0: default: - assert(slot < ARRAY_SIZE(gs_output_semantic_name)); - assert(attr >= VARYING_SLOT_VAR0); - gs_output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; - gs_output_semantic_index[slot] = + assert(slot < ARRAY_SIZE(output_semantic_name)); + assert(attr >= VARYING_SLOT_VAR0 || + (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7)); + output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; + output_semantic_index[slot] = st_get_generic_varying_index(st, attr); - break; + break; } } } - ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, stgp->Base.InputType); - ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, stgp->Base.OutputType); - ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES, - stgp->Base.VerticesOut); - ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS, stgp->Base.Invocations); + /* Also add patch outputs. */ + for (attr = 0; attr < 32; attr++) { + if (prog->PatchOutputsWritten & (1 << attr)) { + GLuint slot = num_outputs++; + GLuint patch_attr = VARYING_SLOT_PATCH0 + attr; + + outputMapping[patch_attr] = slot; + outputSlotToAttr[slot] = patch_attr; + output_semantic_name[slot] = TGSI_SEMANTIC_PATCH; + output_semantic_index[slot] = attr; + } + } st_translate_program(st->ctx, - TGSI_PROCESSOR_GEOMETRY, + tgsi_processor, ureg, - stgp->glsl_to_tgsi, - &stgp->Base.Base, + glsl_to_tgsi, + prog, /* inputs */ - gs_num_inputs, + num_inputs, inputMapping, inputSlotToAttr, input_semantic_name, @@ -1102,30 +1185,64 @@ st_translate_geometry_program(struct st_context *st, NULL, NULL, /* outputs */ - gs_num_outputs, + num_outputs, outputMapping, outputSlotToAttr, - gs_output_semantic_name, - gs_output_semantic_index, + output_semantic_name, + output_semantic_index, FALSE, FALSE); - state.tokens = ureg_get_tokens(ureg, NULL); + out_state->tokens = ureg_get_tokens(ureg, NULL); ureg_destroy(ureg); - st_translate_stream_output_info(stgp->glsl_to_tgsi, + st_translate_stream_output_info(glsl_to_tgsi, outputMapping, - &state.stream_output); + &out_state->stream_output); if ((ST_DEBUG & DEBUG_TGSI) && (ST_DEBUG & DEBUG_MESA)) { - _mesa_print_program(&stgp->Base.Base); + _mesa_print_program(prog); debug_printf("\n"); } if (ST_DEBUG & DEBUG_TGSI) { - tgsi_dump(state.tokens, 0); + tgsi_dump(out_state->tokens, 0); debug_printf("\n"); } +} + + +/** + * Translate a geometry program to create a new variant. + */ +static struct st_gp_variant * +st_translate_geometry_program(struct st_context *st, + struct st_geometry_program *stgp, + const struct st_gp_variant_key *key) +{ + struct pipe_context *pipe = st->pipe; + struct ureg_program *ureg; + struct st_gp_variant *gpv; + struct pipe_shader_state state; + + ureg = ureg_create_with_screen(TGSI_PROCESSOR_GEOMETRY, st->pipe->screen); + if (ureg == NULL) + return NULL; + + ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, stgp->Base.InputType); + ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, stgp->Base.OutputType); + ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES, + stgp->Base.VerticesOut); + ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS, stgp->Base.Invocations); + + st_translate_program_common(st, &stgp->Base.Base, stgp->glsl_to_tgsi, ureg, + TGSI_PROCESSOR_GEOMETRY, &state); + + gpv = CALLOC_STRUCT(st_gp_variant); + if (!gpv) { + ureg_free_tokens(state.tokens); + return NULL; + } /* fill in new variant */ gpv->driver_shader = pipe->create_gs_state(pipe, &state); @@ -1168,6 +1285,168 @@ st_get_gp_variant(struct st_context *st, /** + * Translate a tessellation control program to create a new variant. + */ +static struct st_tcp_variant * +st_translate_tessctrl_program(struct st_context *st, + struct st_tessctrl_program *sttcp, + const struct st_tcp_variant_key *key) +{ + struct pipe_context *pipe = st->pipe; + struct ureg_program *ureg; + struct st_tcp_variant *tcpv; + struct pipe_shader_state state; + + ureg = ureg_create_with_screen(TGSI_PROCESSOR_TESS_CTRL, pipe->screen); + if (ureg == NULL) { + return NULL; + } + + ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT, + sttcp->Base.VerticesOut); + + st_translate_program_common(st, &sttcp->Base.Base, sttcp->glsl_to_tgsi, + ureg, TGSI_PROCESSOR_TESS_CTRL, &state); + + tcpv = CALLOC_STRUCT(st_tcp_variant); + if (!tcpv) { + ureg_free_tokens(state.tokens); + return NULL; + } + + /* fill in new variant */ + tcpv->driver_shader = pipe->create_tcs_state(pipe, &state); + tcpv->key = *key; + + ureg_free_tokens(state.tokens); + return tcpv; +} + + +/** + * Get/create tessellation control program variant. + */ +struct st_tcp_variant * +st_get_tcp_variant(struct st_context *st, + struct st_tessctrl_program *sttcp, + const struct st_tcp_variant_key *key) +{ + struct st_tcp_variant *tcpv; + + /* Search for existing variant */ + for (tcpv = sttcp->variants; tcpv; tcpv = tcpv->next) { + if (memcmp(&tcpv->key, key, sizeof(*key)) == 0) { + break; + } + } + + if (!tcpv) { + /* create new */ + tcpv = st_translate_tessctrl_program(st, sttcp, key); + if (tcpv) { + /* insert into list */ + tcpv->next = sttcp->variants; + sttcp->variants = tcpv; + } + } + + return tcpv; +} + + +/** + * Translate a tessellation evaluation program to create a new variant. + */ +static struct st_tep_variant * +st_translate_tesseval_program(struct st_context *st, + struct st_tesseval_program *sttep, + const struct st_tep_variant_key *key) +{ + struct pipe_context *pipe = st->pipe; + struct ureg_program *ureg; + struct st_tep_variant *tepv; + struct pipe_shader_state state; + + ureg = ureg_create_with_screen(TGSI_PROCESSOR_TESS_EVAL, pipe->screen); + if (ureg == NULL) { + return NULL; + } + + if (sttep->Base.PrimitiveMode == GL_ISOLINES) + ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES); + else + ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, sttep->Base.PrimitiveMode); + + switch (sttep->Base.Spacing) { + case GL_EQUAL: + ureg_property(ureg, TGSI_PROPERTY_TES_SPACING, PIPE_TESS_SPACING_EQUAL); + break; + case GL_FRACTIONAL_EVEN: + ureg_property(ureg, TGSI_PROPERTY_TES_SPACING, + PIPE_TESS_SPACING_FRACTIONAL_EVEN); + break; + case GL_FRACTIONAL_ODD: + ureg_property(ureg, TGSI_PROPERTY_TES_SPACING, + PIPE_TESS_SPACING_FRACTIONAL_ODD); + break; + default: + assert(0); + } + + ureg_property(ureg, TGSI_PROPERTY_TES_VERTEX_ORDER_CW, + sttep->Base.VertexOrder == GL_CW); + ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE, sttep->Base.PointMode); + + st_translate_program_common(st, &sttep->Base.Base, sttep->glsl_to_tgsi, + ureg, TGSI_PROCESSOR_TESS_EVAL, &state); + + tepv = CALLOC_STRUCT(st_tep_variant); + if (!tepv) { + ureg_free_tokens(state.tokens); + return NULL; + } + + /* fill in new variant */ + tepv->driver_shader = pipe->create_tes_state(pipe, &state); + tepv->key = *key; + + ureg_free_tokens(state.tokens); + return tepv; +} + + +/** + * Get/create tessellation evaluation program variant. + */ +struct st_tep_variant * +st_get_tep_variant(struct st_context *st, + struct st_tesseval_program *sttep, + const struct st_tep_variant_key *key) +{ + struct st_tep_variant *tepv; + + /* Search for existing variant */ + for (tepv = sttep->variants; tepv; tepv = tepv->next) { + if (memcmp(&tepv->key, key, sizeof(*key)) == 0) { + break; + } + } + + if (!tepv) { + /* create new */ + tepv = st_translate_tesseval_program(st, sttep, key); + if (tepv) { + /* insert into list */ + tepv->next = sttep->variants; + sttep->variants = tepv; + } + } + + return tepv; +} + + +/** * Vert/Geom/Frag programs have per-context variants. Free all the * variants attached to the given program which match the given context. */ @@ -1240,6 +1519,48 @@ destroy_program_variants(struct st_context *st, struct gl_program *program) } } break; + case GL_TESS_CONTROL_PROGRAM_NV: + { + struct st_tessctrl_program *sttcp = + (struct st_tessctrl_program *) program; + struct st_tcp_variant *tcpv, **prevPtr = &sttcp->variants; + + for (tcpv = sttcp->variants; tcpv; ) { + struct st_tcp_variant *next = tcpv->next; + if (tcpv->key.st == st) { + /* unlink from list */ + *prevPtr = next; + /* destroy this variant */ + delete_tcp_variant(st, tcpv); + } + else { + prevPtr = &tcpv->next; + } + tcpv = next; + } + } + break; + case GL_TESS_EVALUATION_PROGRAM_NV: + { + struct st_tesseval_program *sttep = + (struct st_tesseval_program *) program; + struct st_tep_variant *tepv, **prevPtr = &sttep->variants; + + for (tepv = sttep->variants; tepv; ) { + struct st_tep_variant *next = tepv->next; + if (tepv->key.st == st) { + /* unlink from list */ + *prevPtr = next; + /* destroy this variant */ + delete_tep_variant(st, tepv); + } + else { + prevPtr = &tepv->next; + } + tepv = next; + } + } + break; default: _mesa_problem(NULL, "Unexpected program target 0x%x in " "destroy_program_variants_cb()", program->Target); @@ -1276,6 +1597,8 @@ destroy_shader_program_variants_cb(GLuint key, void *data, void *userData) case GL_VERTEX_SHADER: case GL_FRAGMENT_SHADER: case GL_GEOMETRY_SHADER: + case GL_TESS_CONTROL_SHADER: + case GL_TESS_EVALUATION_SHADER: { destroy_program_variants(st, shader->Program); } diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index bb77eb6ed65..7013993fe38 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -188,7 +188,7 @@ struct st_gp_variant_key */ struct st_gp_variant { - /* Parameters which generated this translated version of a vertex */ + /* Parameters which generated this variant. */ struct st_gp_variant_key key; void *driver_shader; @@ -210,6 +210,76 @@ struct st_geometry_program +/** Tessellation control program variant key */ +struct st_tcp_variant_key +{ + struct st_context *st; /**< variants are per-context */ + /* no other fields yet */ +}; + + +/** + * Tessellation control program variant. + */ +struct st_tcp_variant +{ + /* Parameters which generated this variant. */ + struct st_tcp_variant_key key; + + void *driver_shader; + + struct st_tcp_variant *next; +}; + + +/** + * Derived from Mesa gl_tess_ctrl_program: + */ +struct st_tessctrl_program +{ + struct gl_tess_ctrl_program Base; /**< The Mesa tess ctrl program */ + struct glsl_to_tgsi_visitor* glsl_to_tgsi; + + struct st_tcp_variant *variants; +}; + + + +/** Tessellation evaluation program variant key */ +struct st_tep_variant_key +{ + struct st_context *st; /**< variants are per-context */ + /* no other fields yet */ +}; + + +/** + * Tessellation evaluation program variant. + */ +struct st_tep_variant +{ + /* Parameters which generated this variant. */ + struct st_tep_variant_key key; + + void *driver_shader; + + struct st_tep_variant *next; +}; + + +/** + * Derived from Mesa gl_tess_eval_program: + */ +struct st_tesseval_program +{ + struct gl_tess_eval_program Base; /**< The Mesa tess eval program */ + struct glsl_to_tgsi_visitor* glsl_to_tgsi; + + struct st_tep_variant *variants; +}; + + + static inline struct st_fragment_program * st_fragment_program( struct gl_fragment_program *fp ) { @@ -229,6 +299,18 @@ st_geometry_program( struct gl_geometry_program *gp ) return (struct st_geometry_program *)gp; } +static inline struct st_tessctrl_program * +st_tessctrl_program( struct gl_tess_ctrl_program *tcp ) +{ + return (struct st_tessctrl_program *)tcp; +} + +static inline struct st_tesseval_program * +st_tesseval_program( struct gl_tess_eval_program *tep ) +{ + return (struct st_tesseval_program *)tep; +} + static inline void st_reference_vertprog(struct st_context *st, struct st_vertex_program **ptr, @@ -259,6 +341,26 @@ st_reference_fragprog(struct st_context *st, (struct gl_program *) prog); } +static inline void +st_reference_tesscprog(struct st_context *st, + struct st_tessctrl_program **ptr, + struct st_tessctrl_program *prog) +{ + _mesa_reference_program(st->ctx, + (struct gl_program **) ptr, + (struct gl_program *) prog); +} + +static inline void +st_reference_tesseprog(struct st_context *st, + struct st_tesseval_program **ptr, + struct st_tesseval_program *prog) +{ + _mesa_reference_program(st->ctx, + (struct gl_program **) ptr, + (struct gl_program *) prog); +} + /** * This defines mapping from Mesa VARYING_SLOTs to TGSI GENERIC slots. */ @@ -302,6 +404,16 @@ st_get_gp_variant(struct st_context *st, struct st_geometry_program *stgp, const struct st_gp_variant_key *key); +extern struct st_tcp_variant * +st_get_tcp_variant(struct st_context *st, + struct st_tessctrl_program *stgp, + const struct st_tcp_variant_key *key); + +extern struct st_tep_variant * +st_get_tep_variant(struct st_context *st, + struct st_tesseval_program *stgp, + const struct st_tep_variant_key *key); + extern void st_prepare_vertex_program(struct gl_context *ctx, @@ -325,6 +437,14 @@ st_release_gp_variants(struct st_context *st, struct st_geometry_program *stgp); extern void +st_release_tcp_variants(struct st_context *st, + struct st_tessctrl_program *stgp); + +extern void +st_release_tep_variants(struct st_context *st, + struct st_tesseval_program *stgp); + +extern void st_destroy_program_variants(struct st_context *st); diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index 6beb21e3389..52b094330b9 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -462,6 +462,11 @@ st_texture_get_sampler_view(struct st_context *st, return free; } + +/** + * For the given texture object, release any sampler views which belong + * to the calling context. + */ void st_texture_release_sampler_view(struct st_context *st, struct st_texture_object *stObj) @@ -478,6 +483,11 @@ st_texture_release_sampler_view(struct st_context *st, } } + +/** + * Release all sampler views attached to the given texture object, regardless + * of the context. + */ void st_texture_release_all_sampler_views(struct st_context *st, struct st_texture_object *stObj) diff --git a/src/mesa/swrast/s_aaline.c b/src/mesa/swrast/s_aaline.c index f3258e813a6..de5b42b9f6b 100644 --- a/src/mesa/swrast/s_aaline.c +++ b/src/mesa/swrast/s_aaline.c @@ -116,11 +116,11 @@ compute_plane(GLfloat x0, GLfloat y0, GLfloat x1, GLfloat y1, const GLfloat b = pz * py; const GLfloat c = px * px + py * py; const GLfloat d = -(a * x0 + b * y0 + c * z0); - if (a == 0.0 && b == 0.0 && c == 0.0 && d == 0.0) { - plane[0] = 0.0; - plane[1] = 0.0; - plane[2] = 1.0; - plane[3] = 0.0; + if (a == 0.0F && b == 0.0F && c == 0.0F && d == 0.0F) { + plane[0] = 0.0F; + plane[1] = 0.0F; + plane[2] = 1.0F; + plane[3] = 0.0F; } else { plane[0] = a; @@ -135,9 +135,9 @@ compute_plane(GLfloat x0, GLfloat y0, GLfloat x1, GLfloat y1, static inline void constant_plane(GLfloat value, GLfloat plane[4]) { - plane[0] = 0.0; - plane[1] = 0.0; - plane[2] = -1.0; + plane[0] = 0.0F; + plane[1] = 0.0F; + plane[2] = -1.0F; plane[3] = value; } @@ -160,8 +160,8 @@ static inline GLfloat solve_plane_recip(GLfloat x, GLfloat y, const GLfloat plane[4]) { const GLfloat denom = plane[3] + plane[0] * x + plane[1] * y; - if (denom == 0.0) - return 0.0; + if (denom == 0.0F) + return 0.0F; else return -plane[2] / denom; } @@ -374,7 +374,7 @@ segment(struct gl_context *ctx, if (x0 < x1) { xLeft = x0 - line->halfWidth; xRight = x1 + line->halfWidth; - if (line->dy >= 0.0) { + if (line->dy >= 0.0F) { yBot = y0 - 3.0F * line->halfWidth; yTop = y0 + line->halfWidth; } @@ -386,7 +386,7 @@ segment(struct gl_context *ctx, else { xLeft = x1 - line->halfWidth; xRight = x0 + line->halfWidth; - if (line->dy <= 0.0) { + if (line->dy <= 0.0F) { yBot = y1 - 3.0F * line->halfWidth; yTop = y1 + line->halfWidth; } @@ -420,7 +420,7 @@ segment(struct gl_context *ctx, if (y0 < y1) { yBot = y0 - line->halfWidth; yTop = y1 + line->halfWidth; - if (line->dx >= 0.0) { + if (line->dx >= 0.0F) { xLeft = x0 - 3.0F * line->halfWidth; xRight = x0 + line->halfWidth; } @@ -432,7 +432,7 @@ segment(struct gl_context *ctx, else { yBot = y1 - line->halfWidth; yTop = y0 + line->halfWidth; - if (line->dx <= 0.0) { + if (line->dx <= 0.0F) { xLeft = x1 - 3.0F * line->halfWidth; xRight = x1 + line->halfWidth; } diff --git a/src/mesa/swrast/s_aalinetemp.h b/src/mesa/swrast/s_aalinetemp.h index f1d078fd89b..bebb131a5d1 100644 --- a/src/mesa/swrast/s_aalinetemp.h +++ b/src/mesa/swrast/s_aalinetemp.h @@ -44,7 +44,7 @@ NAME(plot)(struct gl_context *ctx, struct LineInfo *line, int ix, int iy) (void) swrast; - if (coverage == 0.0) + if (coverage == 0.0F) return; line->span.end++; @@ -123,7 +123,7 @@ NAME(line)(struct gl_context *ctx, const SWvertex *v0, const SWvertex *v1) ctx->Const.MinLineWidthAA, ctx->Const.MaxLineWidthAA); - if (line.len == 0.0 || IS_INF_OR_NAN(line.len)) + if (line.len == 0.0F || IS_INF_OR_NAN(line.len)) return; INIT_SPAN(line.span, GL_LINE); diff --git a/src/mesa/swrast/s_atifragshader.c b/src/mesa/swrast/s_atifragshader.c index 9e029db25ce..2974deed41b 100644 --- a/src/mesa/swrast/s_atifragshader.c +++ b/src/mesa/swrast/s_atifragshader.c @@ -436,13 +436,13 @@ execute_shader(struct gl_context *ctx, const struct ati_fragment_shader *shader, for (i = 0; i < 3; i++) { dst[optype][i] = (src[optype][2][i] > - 0.5) ? src[optype][0][i] : src[optype][1][i]; + 0.5F) ? src[optype][0][i] : src[optype][1][i]; } } else { dst[optype][3] = (src[optype][2][3] > - 0.5) ? src[optype][0][3] : src[optype][1][3]; + 0.5F) ? src[optype][0][3] : src[optype][1][3]; } break; diff --git a/src/mesa/swrast/s_copypix.c b/src/mesa/swrast/s_copypix.c index 68c83e44e12..0dbccc0f61d 100644 --- a/src/mesa/swrast/s_copypix.c +++ b/src/mesa/swrast/s_copypix.c @@ -27,6 +27,7 @@ #include "main/context.h" #include "main/condrender.h" #include "main/macros.h" +#include "main/blit.h" #include "main/pixeltransfer.h" #include "main/imports.h" @@ -51,20 +52,9 @@ regions_overlap(GLint srcx, GLint srcy, GLint width, GLint height, GLfloat zoomX, GLfloat zoomY) { - if (zoomX == 1.0 && zoomY == 1.0) { - /* no zoom */ - if (srcx >= dstx + width || (srcx + width <= dstx)) { - return GL_FALSE; - } - else if (srcy < dsty) { /* this is OK */ - return GL_FALSE; - } - else if (srcy > dsty + height) { - return GL_FALSE; - } - else { - return GL_TRUE; - } + if (zoomX == 1.0F && zoomY == 1.0F) { + return _mesa_regions_overlap(srcx, srcy, srcx + width, srcy + height, + dstx, dsty, dstx + width, dsty + height); } else { /* add one pixel of slop when zooming, just to be safe */ @@ -211,8 +201,8 @@ scale_and_bias_z(struct gl_context *ctx, GLuint width, GLuint i; if (depthMax <= 0xffffff && - ctx->Pixel.DepthScale == 1.0 && - ctx->Pixel.DepthBias == 0.0) { + ctx->Pixel.DepthScale == 1.0F && + ctx->Pixel.DepthBias == 0.0F) { /* no scale or bias and no clamping and no worry of overflow */ const GLfloat depthMaxF = ctx->DrawBuffer->_DepthMaxF; for (i = 0; i < width; i++) { diff --git a/src/mesa/swrast/s_depth.c b/src/mesa/swrast/s_depth.c index 134f897c039..ffadc05a732 100644 --- a/src/mesa/swrast/s_depth.c +++ b/src/mesa/swrast/s_depth.c @@ -419,8 +419,8 @@ _swrast_depth_bounds_test( struct gl_context *ctx, SWspan *span ) struct gl_framebuffer *fb = ctx->DrawBuffer; struct gl_renderbuffer *rb = fb->Attachment[BUFFER_DEPTH].Renderbuffer; GLubyte *zStart; - GLuint zMin = (GLuint) (ctx->Depth.BoundsMin * fb->_DepthMaxF + 0.5F); - GLuint zMax = (GLuint) (ctx->Depth.BoundsMax * fb->_DepthMaxF + 0.5F); + GLuint zMin = (GLuint)((double)ctx->Depth.BoundsMin * 0xffffffff); + GLuint zMax = (GLuint)((double)ctx->Depth.BoundsMax * 0xffffffff); GLubyte *mask = span->array->mask; const GLuint count = span->end; GLuint i; @@ -444,6 +444,16 @@ _swrast_depth_bounds_test( struct gl_context *ctx, SWspan *span ) zBufferVals = (const GLuint *) zStart; } else { + /* Round the bounds to the precision of the zbuffer. */ + if (rb->Format == MESA_FORMAT_Z_UNORM16) { + zMin = (zMin & 0xffff0000) | (zMin >> 16); + zMax = (zMax & 0xffff0000) | (zMax >> 16); + } else { + /* 24 bits */ + zMin = (zMin & 0xffffff00) | (zMin >> 24); + zMax = (zMax & 0xffffff00) | (zMax >> 24); + } + /* unpack Z values into a temporary array */ if (span->arrayMask & SPAN_XY) { get_z32_values(ctx, rb, count, span->array->x, span->array->y, diff --git a/src/mesa/swrast/s_drawpix.c b/src/mesa/swrast/s_drawpix.c index fb677ee1b16..dc6827ede9f 100644 --- a/src/mesa/swrast/s_drawpix.c +++ b/src/mesa/swrast/s_drawpix.c @@ -264,7 +264,7 @@ draw_stencil_pixels( struct gl_context *ctx, GLint x, GLint y, const struct gl_pixelstore_attrib *unpack, const GLvoid *pixels ) { - const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0; + const GLboolean zoom = ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F; const GLenum destType = GL_UNSIGNED_BYTE; GLint row; GLubyte *values; @@ -309,8 +309,8 @@ draw_depth_pixels( struct gl_context *ctx, GLint x, GLint y, const GLvoid *pixels ) { const GLboolean scaleOrBias - = ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0; - const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0; + = ctx->Pixel.DepthScale != 1.0f || ctx->Pixel.DepthBias != 0.0f; + const GLboolean zoom = ctx->Pixel.ZoomX != 1.0f || ctx->Pixel.ZoomY != 1.0f; SWspan span; INIT_SPAN(span, GL_BITMAP); @@ -415,7 +415,7 @@ draw_rgba_pixels( struct gl_context *ctx, GLint x, GLint y, const GLvoid *pixels ) { const GLint imgX = x, imgY = y; - const GLboolean zoom = ctx->Pixel.ZoomX!=1.0 || ctx->Pixel.ZoomY!=1.0; + const GLboolean zoom = ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F; GLbitfield transferOps = ctx->_ImageTransferState; SWspan span; @@ -601,10 +601,10 @@ draw_depth_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, { const GLint imgX = x, imgY = y; const GLboolean scaleOrBias - = ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0; + = ctx->Pixel.DepthScale != 1.0F || ctx->Pixel.DepthBias != 0.0F; const GLuint stencilMask = ctx->Stencil.WriteMask[0]; const GLenum stencilType = GL_UNSIGNED_BYTE; - const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0; + const GLboolean zoom = ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F; struct gl_renderbuffer *depthRb, *stencilRb; struct gl_pixelstore_attrib clippedUnpack = *unpack; diff --git a/src/mesa/swrast/s_fragprog.c b/src/mesa/swrast/s_fragprog.c index 175915a5a0b..4fbf66b9db7 100644 --- a/src/mesa/swrast/s_fragprog.c +++ b/src/mesa/swrast/s_fragprog.c @@ -243,9 +243,9 @@ run_program(struct gl_context *ctx, SWspan *span, GLuint start, GLuint end) /* Store result depth/z */ if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { const GLfloat depth = machine->Outputs[FRAG_RESULT_DEPTH][2]; - if (depth <= 0.0) + if (depth <= 0.0F) span->array->z[i] = 0; - else if (depth >= 1.0) + else if (depth >= 1.0F) span->array->z[i] = ctx->DrawBuffer->_DepthMax; else span->array->z[i] = diff --git a/src/mesa/swrast/s_lines.c b/src/mesa/swrast/s_lines.c index 58bd2fc720a..ab8da7db289 100644 --- a/src/mesa/swrast/s_lines.c +++ b/src/mesa/swrast/s_lines.c @@ -241,7 +241,7 @@ _swrast_choose_line( struct gl_context *ctx ) USE(general_line); } else if (ctx->Depth.Test - || ctx->Line.Width != 1.0 + || ctx->Line.Width != 1.0F || ctx->Line.StippleFlag) { /* no texture, but Z, fog, width>1, stipple, etc. */ #if CHAN_BITS == 32 @@ -252,7 +252,7 @@ _swrast_choose_line( struct gl_context *ctx ) } else { assert(!ctx->Depth.Test); - assert(ctx->Line.Width == 1.0); + assert(ctx->Line.Width == 1.0F); /* simple lines */ USE(simple_no_z_rgba_line); } diff --git a/src/mesa/swrast/s_points.c b/src/mesa/swrast/s_points.c index 2212c95fa9a..d9aae73302c 100644 --- a/src/mesa/swrast/s_points.c +++ b/src/mesa/swrast/s_points.c @@ -208,9 +208,9 @@ sprite_point(struct gl_context *ctx, const SWvertex *vert) else { /* even size */ /* 0.501 factor allows conformance to pass */ - xmin = (GLint) (x + 0.501) - iRadius; + xmin = (GLint) (x + 0.501F) - iRadius; xmax = xmin + iSize - 1; - ymin = (GLint) (y + 0.501) - iRadius; + ymin = (GLint) (y + 0.501F) - iRadius; ymax = ymin + iSize - 1; } @@ -423,9 +423,9 @@ large_point(struct gl_context *ctx, const SWvertex *vert) else { /* even size */ /* 0.501 factor allows conformance to pass */ - xmin = (GLint) (x + 0.501) - iRadius; + xmin = (GLint) (x + 0.501F) - iRadius; xmax = xmin + iSize - 1; - ymin = (GLint) (y + 0.501) - iRadius; + ymin = (GLint) (y + 0.501F) - iRadius; ymax = ymin + iSize - 1; } @@ -552,7 +552,7 @@ _swrast_choose_point(struct gl_context *ctx) else if (ctx->Point.SmoothFlag) { swrast->Point = smooth_point; } - else if (size > 1.0 || + else if (size > 1.0F || ctx->Point._Attenuated || ctx->VertexProgram.PointSizeEnabled) { swrast->Point = large_point; diff --git a/src/mesa/swrast/s_span.c b/src/mesa/swrast/s_span.c index 3db10e163d7..cd939ba9510 100644 --- a/src/mesa/swrast/s_span.c +++ b/src/mesa/swrast/s_span.c @@ -506,7 +506,7 @@ interpolate_texcoords(struct gl_context *ctx, SWspan *span) /* LOD is calculated directly in the ansiotropic filter, we can * skip the normal lambda function as the result is ignored. */ - if (samp->MaxAnisotropy > 1.0 && + if (samp->MaxAnisotropy > 1.0F && samp->MinFilter == GL_LINEAR_MIPMAP_LINEAR) { needLambda = GL_FALSE; } @@ -886,16 +886,16 @@ apply_aa_coverage(SWspan *span) GLubyte (*rgba)[4] = span->array->rgba8; for (i = 0; i < span->end; i++) { const GLfloat a = rgba[i][ACOMP] * coverage[i]; - rgba[i][ACOMP] = (GLubyte) CLAMP(a, 0.0, 255.0); - assert(coverage[i] >= 0.0); - assert(coverage[i] <= 1.0); + rgba[i][ACOMP] = (GLubyte) CLAMP(a, 0.0F, 255.0F); + assert(coverage[i] >= 0.0F); + assert(coverage[i] <= 1.0F); } } else if (span->array->ChanType == GL_UNSIGNED_SHORT) { GLushort (*rgba)[4] = span->array->rgba16; for (i = 0; i < span->end; i++) { const GLfloat a = rgba[i][ACOMP] * coverage[i]; - rgba[i][ACOMP] = (GLushort) CLAMP(a, 0.0, 65535.0); + rgba[i][ACOMP] = (GLushort) CLAMP(a, 0.0F, 65535.0F); } } else { diff --git a/src/mesa/swrast/s_texcombine.c b/src/mesa/swrast/s_texcombine.c index 453bd36367b..da4a013634c 100644 --- a/src/mesa/swrast/s_texcombine.c +++ b/src/mesa/swrast/s_texcombine.c @@ -670,8 +670,8 @@ _swrast_texture_span( struct gl_context *ctx, SWspan *span ) } } - if (samp->MinLod != -1000.0 || - samp->MaxLod != 1000.0) { + if (samp->MinLod != -1000.0F || + samp->MaxLod != 1000.0F) { /* apply LOD clamping to lambda */ const GLfloat min = samp->MinLod; const GLfloat max = samp->MaxLod; @@ -682,7 +682,7 @@ _swrast_texture_span( struct gl_context *ctx, SWspan *span ) } } } - else if (samp->MaxAnisotropy > 1.0 && + else if (samp->MaxAnisotropy > 1.0F && samp->MinFilter == GL_LINEAR_MIPMAP_LINEAR) { /* sample_lambda_2d_aniso is beeing used as texture_sample_func, * it requires the current SWspan *span as an additional parameter. diff --git a/src/mesa/swrast/s_texfilter.c b/src/mesa/swrast/s_texfilter.c index abc1727cf29..314170fc751 100644 --- a/src/mesa/swrast/s_texfilter.c +++ b/src/mesa/swrast/s_texfilter.c @@ -1902,7 +1902,7 @@ sample_lambda_2d_aniso(struct gl_context *ctx, const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u]; const GLboolean adjustLOD = (texUnit->LodBias + samp->LodBias != 0.0F) - || (samp->MinLod != -1000.0 || samp->MaxLod != 1000.0); + || (samp->MinLod != -1000.0F || samp->MaxLod != 1000.0F); GLuint i; @@ -1973,8 +1973,8 @@ sample_lambda_2d_aniso(struct gl_context *ctx, ctx->Const.MaxTextureLodBias); lod += bias; - if (samp->MinLod != -1000.0 || - samp->MaxLod != 1000.0) { + if (samp->MinLod != -1000.0F || + samp->MaxLod != 1000.0F) { /* apply LOD clamping to lambda */ lod = CLAMP(lod, samp->MinLod, samp->MaxLod); } @@ -3713,7 +3713,7 @@ _swrast_choose_texture_sample_func( struct gl_context *ctx, const struct gl_sampler_object *sampler) { if (!t || !_mesa_is_texture_complete(t, sampler)) { - return &null_sample_func; + return null_sample_func; } else { const GLboolean needLambda = @@ -3722,32 +3722,32 @@ _swrast_choose_texture_sample_func( struct gl_context *ctx, switch (t->Target) { case GL_TEXTURE_1D: if (is_depth_texture(t)) { - return &sample_depth_texture; + return sample_depth_texture; } else if (needLambda) { - return &sample_lambda_1d; + return sample_lambda_1d; } else if (sampler->MinFilter == GL_LINEAR) { - return &sample_linear_1d; + return sample_linear_1d; } else { assert(sampler->MinFilter == GL_NEAREST); - return &sample_nearest_1d; + return sample_nearest_1d; } case GL_TEXTURE_2D: if (is_depth_texture(t)) { - return &sample_depth_texture; + return sample_depth_texture; } else if (needLambda) { /* Anisotropic filtering extension. Activated only if mipmaps are used */ - if (sampler->MaxAnisotropy > 1.0 && + if (sampler->MaxAnisotropy > 1.0F && sampler->MinFilter == GL_LINEAR_MIPMAP_LINEAR) { - return &sample_lambda_2d_aniso; + return sample_lambda_2d_aniso; } - return &sample_lambda_2d; + return sample_lambda_2d; } else if (sampler->MinFilter == GL_LINEAR) { - return &sample_linear_2d; + return sample_linear_2d; } else { /* check for a few optimized cases */ @@ -3772,72 +3772,72 @@ _swrast_choose_texture_sample_func( struct gl_context *ctx, } case GL_TEXTURE_3D: if (needLambda) { - return &sample_lambda_3d; + return sample_lambda_3d; } else if (sampler->MinFilter == GL_LINEAR) { - return &sample_linear_3d; + return sample_linear_3d; } else { assert(sampler->MinFilter == GL_NEAREST); - return &sample_nearest_3d; + return sample_nearest_3d; } case GL_TEXTURE_CUBE_MAP: if (needLambda) { - return &sample_lambda_cube; + return sample_lambda_cube; } else if (sampler->MinFilter == GL_LINEAR) { - return &sample_linear_cube; + return sample_linear_cube; } else { assert(sampler->MinFilter == GL_NEAREST); - return &sample_nearest_cube; + return sample_nearest_cube; } case GL_TEXTURE_RECTANGLE_NV: if (is_depth_texture(t)) { - return &sample_depth_texture; + return sample_depth_texture; } else if (needLambda) { - return &sample_lambda_rect; + return sample_lambda_rect; } else if (sampler->MinFilter == GL_LINEAR) { - return &sample_linear_rect; + return sample_linear_rect; } else { assert(sampler->MinFilter == GL_NEAREST); - return &sample_nearest_rect; + return sample_nearest_rect; } case GL_TEXTURE_1D_ARRAY_EXT: if (is_depth_texture(t)) { - return &sample_depth_texture; + return sample_depth_texture; } else if (needLambda) { - return &sample_lambda_1d_array; + return sample_lambda_1d_array; } else if (sampler->MinFilter == GL_LINEAR) { - return &sample_linear_1d_array; + return sample_linear_1d_array; } else { assert(sampler->MinFilter == GL_NEAREST); - return &sample_nearest_1d_array; + return sample_nearest_1d_array; } case GL_TEXTURE_2D_ARRAY_EXT: if (is_depth_texture(t)) { - return &sample_depth_texture; + return sample_depth_texture; } else if (needLambda) { - return &sample_lambda_2d_array; + return sample_lambda_2d_array; } else if (sampler->MinFilter == GL_LINEAR) { - return &sample_linear_2d_array; + return sample_linear_2d_array; } else { assert(sampler->MinFilter == GL_NEAREST); - return &sample_nearest_2d_array; + return sample_nearest_2d_array; } default: _mesa_problem(ctx, "invalid target in _swrast_choose_texture_sample_func"); - return &null_sample_func; + return null_sample_func; } } } diff --git a/src/mesa/swrast/s_tritemp.h b/src/mesa/swrast/s_tritemp.h index fddbbfd99d6..1d71839713c 100644 --- a/src/mesa/swrast/s_tritemp.h +++ b/src/mesa/swrast/s_tritemp.h @@ -242,7 +242,7 @@ static void NAME(struct gl_context *ctx, const SWvertex *v0, if (IS_INF_OR_NAN(area) || area == 0.0F) return; - if (area * bf * swrast->_BackfaceCullSign < 0.0) + if (area * bf * swrast->_BackfaceCullSign < 0.0F) return; oneOverArea = 1.0F / area; diff --git a/src/mesa/swrast/s_zoom.c b/src/mesa/swrast/s_zoom.c index 9879e2a5f10..34b8eb19657 100644 --- a/src/mesa/swrast/s_zoom.c +++ b/src/mesa/swrast/s_zoom.c @@ -114,7 +114,7 @@ unzoom_x(GLfloat zoomX, GLint imageX, GLint zx) (zx - imageX) / zoomX = x - imageX; */ GLint x; - if (zoomX < 0.0) + if (zoomX < 0.0F) zx++; x = imageX + (GLint) ((zx - imageX) / zoomX); return x; diff --git a/src/mesa/swrast_setup/ss_tritmp.h b/src/mesa/swrast_setup/ss_tritmp.h index c38c76a4adb..adb77bd3247 100644 --- a/src/mesa/swrast_setup/ss_tritmp.h +++ b/src/mesa/swrast_setup/ss_tritmp.h @@ -58,7 +58,7 @@ static void TAG(triangle)(struct gl_context *ctx, GLuint e0, GLuint e1, GLuint e if (IND & (SS_TWOSIDE_BIT | SS_UNFILLED_BIT)) { - facing = (cc < 0.0) ^ ctx->Polygon._FrontBit; + facing = (cc < 0.0F) ^ ctx->Polygon._FrontBit; if (IND & SS_UNFILLED_BIT) mode = facing ? ctx->Polygon.BackMode : ctx->Polygon.FrontMode; @@ -138,7 +138,7 @@ static void TAG(triangle)(struct gl_context *ctx, GLuint e0, GLuint e1, GLuint e * so no MRD value is used here. */ offset = ctx->Polygon.OffsetUnits; - if (cc * cc > 1e-16) { + if (cc * cc > 1e-16F) { const GLfloat ez = z[0] - z[2]; const GLfloat fz = z[1] - z[2]; const GLfloat oneOverArea = 1.0F / cc; diff --git a/src/mesa/tnl/t_context.c b/src/mesa/tnl/t_context.c index bc77ba8bf95..b5c0b3e1f5b 100644 --- a/src/mesa/tnl/t_context.c +++ b/src/mesa/tnl/t_context.c @@ -190,7 +190,7 @@ _tnl_InvalidateState( struct gl_context *ctx, GLuint new_state ) } if (new_state & (_NEW_VIEWPORT | _NEW_BUFFERS)) { - double scale[3], translate[3]; + float scale[3], translate[3]; _mesa_get_viewport_xform(ctx, 0, scale, translate); _math_matrix_viewport(&tnl->_WindowMap, scale, translate, ctx->DrawBuffer->_DepthMaxF); diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c index 6adf1dce676..c130ab3f93d 100644 --- a/src/mesa/tnl/t_draw.c +++ b/src/mesa/tnl/t_draw.c @@ -257,7 +257,7 @@ static GLboolean *_tnl_import_edgeflag( struct gl_context *ctx, GLuint i; for (i = 0; i < count; i++) { - *bptr++ = ((GLfloat *)ptr)[0] == 1.0; + *bptr++ = ((GLfloat *)ptr)[0] == 1.0F; ptr += stride; } @@ -425,6 +425,7 @@ void _tnl_draw_prims(struct gl_context *ctx, GLuint min_index, GLuint max_index, struct gl_transform_feedback_object *tfb_vertcount, + unsigned stream, struct gl_buffer_object *indirect) { TNLcontext *tnl = TNL_CONTEXT(ctx); @@ -451,7 +452,7 @@ void _tnl_draw_prims(struct gl_context *ctx, printf("%s %d..%d\n", __func__, min_index, max_index); for (i = 0; i < nr_prims; i++) printf("prim %d: %s start %d count %d\n", i, - _mesa_lookup_enum_by_nr(prim[i].mode), + _mesa_enum_to_string(prim[i].mode), prim[i].start, prim[i].count); } diff --git a/src/mesa/tnl/t_rasterpos.c b/src/mesa/tnl/t_rasterpos.c index d4b45bac9ac..4bd9ac8539e 100644 --- a/src/mesa/tnl/t_rasterpos.c +++ b/src/mesa/tnl/t_rasterpos.c @@ -148,7 +148,7 @@ shade_rastpos(struct gl_context *ctx, SUB_3V(VP, light->_Position, vertex); /* d = length(VP) */ d = (GLfloat) LEN_3FV( VP ); - if (d > 1.0e-6) { + if (d > 1.0e-6F) { /* normalize VP */ GLfloat invd = 1.0F / d; SELF_SCALE_SCALAR_3V(VP, invd); @@ -172,7 +172,7 @@ shade_rastpos(struct gl_context *ctx, } } - if (attenuation < 1e-3) + if (attenuation < 1e-3F) continue; n_dot_VP = DOT3( normal, VP ); @@ -219,7 +219,7 @@ shade_rastpos(struct gl_context *ctx, shine = ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SHININESS][0]; spec_coef = powf(n_dot_h, shine); - if (spec_coef > 1.0e-10) { + if (spec_coef > 1.0e-10F) { if (ctx->Light.Model.ColorControl==GL_SEPARATE_SPECULAR_COLOR) { ACC_SCALE_SCALAR_3V( specularContrib, spec_coef, light->_MatSpecular[0]); @@ -378,7 +378,7 @@ _tnl_RasterPos(struct gl_context *ctx, const GLfloat vObj[4]) GLfloat eye[4], clip[4], ndc[3], d; GLfloat *norm, eyenorm[3]; GLfloat *objnorm = ctx->Current.Attrib[VERT_ATTRIB_NORMAL]; - double scale[3], translate[3]; + float scale[3], translate[3]; /* apply modelview matrix: eye = MV * obj */ TRANSFORM_POINT( eye, ctx->ModelviewMatrixStack.Top->m, vObj ); diff --git a/src/mesa/tnl/t_vb_fog.c b/src/mesa/tnl/t_vb_fog.c index 1ca72f866b7..5489ed6857f 100644 --- a/src/mesa/tnl/t_vb_fog.c +++ b/src/mesa/tnl/t_vb_fog.c @@ -45,8 +45,8 @@ struct fog_stage_data { #define FOG_STAGE_DATA(stage) ((struct fog_stage_data *)stage->privatePtr) #define FOG_EXP_TABLE_SIZE 256 -#define FOG_MAX (10.0) -#define EXP_FOG_MAX .0006595 +#define FOG_MAX (10.0F) +#define EXP_FOG_MAX .0006595F #define FOG_INCR (FOG_MAX/FOG_EXP_TABLE_SIZE) static GLfloat exp_table[FOG_EXP_TABLE_SIZE]; static GLfloat inited = 0; @@ -54,7 +54,7 @@ static GLfloat inited = 0; #if 1 #define NEG_EXP( result, narg ) \ do { \ - GLfloat f = (GLfloat) (narg * (1.0/FOG_INCR)); \ + GLfloat f = (GLfloat) (narg * (1.0F / FOG_INCR)); \ GLint k = (GLint) f; \ if (k > FOG_EXP_TABLE_SIZE-2) \ result = (GLfloat) EXP_FOG_MAX; \ diff --git a/src/mesa/tnl/t_vb_light.c b/src/mesa/tnl/t_vb_light.c index dbd57fa6bfe..029265a4f83 100644 --- a/src/mesa/tnl/t_vb_light.c +++ b/src/mesa/tnl/t_vb_light.c @@ -137,23 +137,23 @@ validate_shine_table( struct gl_context *ctx, GLuint side, GLfloat shininess ) break; m = s->tab; - m[0] = 0.0; - if (shininess == 0.0) { + m[0] = 0.0F; + if (shininess == 0.0F) { for (j = 1 ; j <= SHINE_TABLE_SIZE ; j++) - m[j] = 1.0; + m[j] = 1.0F; } else { for (j = 1 ; j < SHINE_TABLE_SIZE ; j++) { - GLdouble t, x = j / (GLfloat) (SHINE_TABLE_SIZE - 1); - if (x < 0.005) /* underflow check */ - x = 0.005; - t = pow(x, shininess); - if (t > 1e-20) - m[j] = (GLfloat) t; + GLfloat t, x = j / (GLfloat) (SHINE_TABLE_SIZE - 1); + if (x < 0.005F) /* underflow check */ + x = 0.005F; + t = powf(x, shininess); + if (t > 1e-20F) + m[j] = t; else - m[j] = 0.0; + m[j] = 0.0F; } - m[SHINE_TABLE_SIZE] = 1.0; + m[SHINE_TABLE_SIZE] = 1.0F; } s->shininess = shininess; diff --git a/src/mesa/tnl/t_vb_lighttmp.h b/src/mesa/tnl/t_vb_lighttmp.h index f8786accbbb..3aebcd4b799 100644 --- a/src/mesa/tnl/t_vb_lighttmp.h +++ b/src/mesa/tnl/t_vb_lighttmp.h @@ -112,7 +112,7 @@ static void TAG(light_rgba_spec)( struct gl_context *ctx, GLint side; GLfloat contrib[3]; GLfloat attenuation; - GLfloat VP[3]; /* unit vector from vertex to light */ + GLfloat VP[3]; /* unit vector from vertex to light */ GLfloat n_dot_VP; /* n dot VP */ GLfloat *h; @@ -129,7 +129,7 @@ static void TAG(light_rgba_spec)( struct gl_context *ctx, d = (GLfloat) LEN_3FV( VP ); - if (d > 1e-6) { + if (d > 1e-6F) { GLfloat invd = 1.0F / d; SELF_SCALE_SCALAR_3V(VP, invd); } @@ -152,7 +152,7 @@ static void TAG(light_rgba_spec)( struct gl_context *ctx, } } - if (attenuation < 1e-3) + if (attenuation < 1e-3F) continue; /* this light makes no contribution */ /* Compute dot product or normal and vector from V to light pos */ @@ -204,7 +204,7 @@ static void TAG(light_rgba_spec)( struct gl_context *ctx, if (n_dot_h > 0.0F) { GLfloat spec_coef = lookup_shininess(ctx, side, n_dot_h); - if (spec_coef > 1.0e-10) { + if (spec_coef > 1.0e-10F) { spec_coef *= attenuation; ACC_SCALE_SCALAR_3V( spec[side], spec_coef, light->_MatSpecular[side]); @@ -283,12 +283,11 @@ static void TAG(light_rgba)( struct gl_context *ctx, /* Add contribution from each enabled light source */ foreach (light, &ctx->Light.EnabledList) { - GLfloat n_dot_h; GLfloat correction; GLint side; GLfloat contrib[3]; - GLfloat attenuation = 1.0; + GLfloat attenuation; GLfloat VP[3]; /* unit vector from vertex to light */ GLfloat n_dot_VP; /* n dot VP */ GLfloat *h; @@ -302,12 +301,11 @@ static void TAG(light_rgba)( struct gl_context *ctx, else { GLfloat d; /* distance from vertex to light */ - SUB_3V(VP, light->_Position, vertex); d = (GLfloat) LEN_3FV( VP ); - if ( d > 1e-6) { + if (d > 1e-6F) { GLfloat invd = 1.0F / d; SELF_SCALE_SCALAR_3V(VP, invd); } @@ -330,7 +328,7 @@ static void TAG(light_rgba)( struct gl_context *ctx, } } - if (attenuation < 1e-3) + if (attenuation < 1e-3F) continue; /* this light makes no contribution */ /* Compute dot product or normal and vector from V to light pos */ diff --git a/src/mesa/tnl/t_vb_normals.c b/src/mesa/tnl/t_vb_normals.c index 9aee1a2fb0b..6fc89c23b33 100644 --- a/src/mesa/tnl/t_vb_normals.c +++ b/src/mesa/tnl/t_vb_normals.c @@ -114,7 +114,7 @@ validate_normal_stage(struct gl_context *ctx, struct tnl_pipeline_stage *stage) store->NormalTransform = _mesa_normal_tab[transform | NORM_NORMALIZE]; } else if (ctx->Transform.RescaleNormals && - ctx->_ModelViewInvScale != 1.0) { + ctx->_ModelViewInvScale != 1.0F) { store->NormalTransform = _mesa_normal_tab[transform | NORM_RESCALE]; } else { @@ -131,7 +131,7 @@ validate_normal_stage(struct gl_context *ctx, struct tnl_pipeline_stage *stage) store->NormalTransform = _mesa_normal_tab[NORM_NORMALIZE]; } else if (!ctx->Transform.RescaleNormals && - ctx->_ModelViewInvScale != 1.0) { + ctx->_ModelViewInvScale != 1.0F) { store->NormalTransform = _mesa_normal_tab[NORM_RESCALE]; } else { diff --git a/src/mesa/tnl/t_vb_render.c b/src/mesa/tnl/t_vb_render.c index 4960ac0969e..03e8fcfa196 100644 --- a/src/mesa/tnl/t_vb_render.c +++ b/src/mesa/tnl/t_vb_render.c @@ -315,7 +315,7 @@ static GLboolean run_render( struct gl_context *ctx, if (MESA_VERBOSE & VERBOSE_PRIMS) _mesa_debug(NULL, "MESA prim %s %d..%d\n", - _mesa_lookup_enum_by_nr(prim & PRIM_MODE_MASK), + _mesa_enum_to_string(prim & PRIM_MODE_MASK), start, start+length); if (length) diff --git a/src/mesa/tnl/t_vertex_generic.c b/src/mesa/tnl/t_vertex_generic.c index 2a25a96928f..6c40c868363 100644 --- a/src/mesa/tnl/t_vertex_generic.c +++ b/src/mesa/tnl/t_vertex_generic.c @@ -1026,7 +1026,7 @@ void _tnl_generic_interp( struct gl_context *ctx, if (tnl->NeedNdcCoords) { const GLfloat *dstclip = VB->ClipPtr->data[edst]; - if (dstclip[3] != 0.0) { + if (dstclip[3] != 0.0f) { const GLfloat w = 1.0f / dstclip[3]; GLfloat pos[4]; diff --git a/src/mesa/tnl/t_vertex_sse.c b/src/mesa/tnl/t_vertex_sse.c index 30dc1a72080..14e7812ec78 100644 --- a/src/mesa/tnl/t_vertex_sse.c +++ b/src/mesa/tnl/t_vertex_sse.c @@ -592,7 +592,7 @@ static GLboolean build_vertex_emit( struct x86_program *p ) break; case GL_UNSIGNED_SHORT: default: - printf("unknown CHAN_TYPE %s\n", _mesa_lookup_enum_by_nr(CHAN_TYPE)); + printf("unknown CHAN_TYPE %s\n", _mesa_enum_to_string(CHAN_TYPE)); return GL_FALSE; } break; diff --git a/src/mesa/tnl/tnl.h b/src/mesa/tnl/tnl.h index 8c59ff9e58f..5a9938e7afb 100644 --- a/src/mesa/tnl/tnl.h +++ b/src/mesa/tnl/tnl.h @@ -76,7 +76,7 @@ struct _mesa_prim; struct _mesa_index_buffer; void -_tnl_draw_prims( struct gl_context *ctx, +_tnl_draw_prims(struct gl_context *ctx, const struct _mesa_prim *prim, GLuint nr_prims, const struct _mesa_index_buffer *ib, @@ -84,6 +84,7 @@ _tnl_draw_prims( struct gl_context *ctx, GLuint min_index, GLuint max_index, struct gl_transform_feedback_object *tfb_vertcount, + unsigned stream, struct gl_buffer_object *indirect ); extern void diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h index 667e2a6e5d5..7be39541e43 100644 --- a/src/mesa/tnl_dd/t_dd_dmatmp.h +++ b/src/mesa/tnl_dd/t_dd_dmatmp.h @@ -1256,7 +1256,7 @@ static GLboolean TAG(validate_render)( struct gl_context *ctx, } if (!ok) { -/* fprintf(stderr, "not ok %s\n", _mesa_lookup_enum_by_nr(prim & PRIM_MODE_MASK)); */ +/* fprintf(stderr, "not ok %s\n", _mesa_enum_to_string(prim & PRIM_MODE_MASK)); */ return GL_FALSE; } } diff --git a/src/mesa/tnl_dd/t_dd_unfilled.h b/src/mesa/tnl_dd/t_dd_unfilled.h index 82190c08916..ee15e773c88 100644 --- a/src/mesa/tnl_dd/t_dd_unfilled.h +++ b/src/mesa/tnl_dd/t_dd_unfilled.h @@ -60,7 +60,7 @@ static void TAG(unfilled_tri)( struct gl_context *ctx, } /* fprintf(stderr, "%s %s %d %d %d\n", __func__, */ -/* _mesa_lookup_enum_by_nr( mode ), */ +/* _mesa_enum_to_string( mode ), */ /* ef[e0], ef[e1], ef[e2]); */ if (mode == GL_POINT) { diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h index 54dee6c464f..2aaff5df019 100644 --- a/src/mesa/vbo/vbo.h +++ b/src/mesa/vbo/vbo.h @@ -97,7 +97,8 @@ typedef void (*vbo_draw_func)( struct gl_context *ctx, GLuint min_index, GLuint max_index, struct gl_transform_feedback_object *tfb_vertcount, - struct gl_buffer_object *indirect ); + unsigned stream, + struct gl_buffer_object *indirect); diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c index fd1ffe2f76d..e3eb286e482 100644 --- a/src/mesa/vbo/vbo_context.c +++ b/src/mesa/vbo/vbo_context.c @@ -37,9 +37,9 @@ static GLuint check_size( const GLfloat *attr ) { - if (attr[3] != 1.0) return 4; - if (attr[2] != 0.0) return 3; - if (attr[1] != 0.0) return 2; + if (attr[3] != 1.0F) return 4; + if (attr[2] != 0.0F) return 3; + if (attr[1] != 0.0F) return 2; return 1; } diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index 72b8206ec23..34d2c1d3d6b 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -255,7 +255,7 @@ check_array_data(struct gl_context *ctx, struct gl_client_array *array, GLint k; for (k = 0; k < array->Size; k++) { if (IS_INF_OR_NAN(f[k]) || - f[k] >= 1.0e20 || f[k] <= -1.0e10) { + f[k] >= 1.0e20F || f[k] <= -1.0e10F) { printf("Bad array data:\n"); printf(" Element[%u].%u = %f\n", j, k, f[k]); printf(" Array %u at %p\n", attrib, (void* ) array); @@ -263,7 +263,7 @@ check_array_data(struct gl_context *ctx, struct gl_client_array *array, array->Type, array->Size, array->Stride); printf(" Address/offset %p in Buffer Object %u\n", array->Ptr, array->BufferObj->Name); - f[k] = 1.0; /* XXX replace the bad value! */ + f[k] = 1.0F; /* XXX replace the bad value! */ } /*assert(!IS_INF_OR_NAN(f[k]));*/ } @@ -633,7 +633,7 @@ vbo_draw_arrays(struct gl_context *ctx, GLenum mode, GLint start, /* draw one or two prims */ check_buffers_are_unmapped(exec->array.inputs); vbo->draw_prims(ctx, prim, primCount, NULL, - GL_TRUE, start, start + count - 1, NULL, NULL); + GL_TRUE, start, start + count - 1, NULL, 0, NULL); } } else { @@ -644,7 +644,7 @@ vbo_draw_arrays(struct gl_context *ctx, GLenum mode, GLint start, check_buffers_are_unmapped(exec->array.inputs); vbo->draw_prims(ctx, prim, 1, NULL, GL_TRUE, start, start + count - 1, - NULL, NULL); + NULL, 0, NULL); } if (MESA_DEBUG_FLAGS & DEBUG_ALWAYS_FLUSH) { @@ -786,7 +786,7 @@ vbo_exec_DrawArrays(GLenum mode, GLint start, GLsizei count) if (MESA_VERBOSE & VERBOSE_DRAW) _mesa_debug(ctx, "glDrawArrays(%s, %d, %d)\n", - _mesa_lookup_enum_by_nr(mode), start, count); + _mesa_enum_to_string(mode), start, count); if (!_mesa_validate_DrawArrays(ctx, mode, count)) return; @@ -813,7 +813,7 @@ vbo_exec_DrawArraysInstanced(GLenum mode, GLint start, GLsizei count, if (MESA_VERBOSE & VERBOSE_DRAW) _mesa_debug(ctx, "glDrawArraysInstanced(%s, %d, %d, %d)\n", - _mesa_lookup_enum_by_nr(mode), start, count, numInstances); + _mesa_enum_to_string(mode), start, count, numInstances); if (!_mesa_validate_DrawArraysInstanced(ctx, mode, start, count, numInstances)) return; @@ -839,7 +839,7 @@ vbo_exec_DrawArraysInstancedBaseInstance(GLenum mode, GLint first, GLsizei count if (MESA_VERBOSE & VERBOSE_DRAW) _mesa_debug(ctx, "glDrawArraysInstancedBaseInstance(%s, %d, %d, %d, %d)\n", - _mesa_lookup_enum_by_nr(mode), first, count, + _mesa_enum_to_string(mode), first, count, numInstances, baseInstance); if (!_mesa_validate_DrawArraysInstanced(ctx, mode, first, count, @@ -990,7 +990,7 @@ vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode, check_buffers_are_unmapped(exec->array.inputs); vbo->draw_prims(ctx, prim, 1, &ib, - index_bounds_valid, start, end, NULL, NULL); + index_bounds_valid, start, end, NULL, 0, NULL); if (MESA_DEBUG_FLAGS & DEBUG_ALWAYS_FLUSH) { _mesa_flush(ctx); @@ -1021,8 +1021,8 @@ vbo_exec_DrawRangeElementsBaseVertex(GLenum mode, if (MESA_VERBOSE & VERBOSE_DRAW) _mesa_debug(ctx, "glDrawRangeElementsBaseVertex(%s, %u, %u, %d, %s, %p, %d)\n", - _mesa_lookup_enum_by_nr(mode), start, end, count, - _mesa_lookup_enum_by_nr(type), indices, basevertex); + _mesa_enum_to_string(mode), start, end, count, + _mesa_enum_to_string(type), indices, basevertex); if (!_mesa_validate_DrawRangeElements(ctx, mode, start, end, count, type, indices)) @@ -1099,8 +1099,8 @@ vbo_exec_DrawRangeElements(GLenum mode, GLuint start, GLuint end, GET_CURRENT_CONTEXT(ctx); _mesa_debug(ctx, "glDrawRangeElements(%s, %u, %u, %d, %s, %p)\n", - _mesa_lookup_enum_by_nr(mode), start, end, count, - _mesa_lookup_enum_by_nr(type), indices); + _mesa_enum_to_string(mode), start, end, count, + _mesa_enum_to_string(type), indices); } vbo_exec_DrawRangeElementsBaseVertex(mode, start, end, count, type, @@ -1119,8 +1119,8 @@ vbo_exec_DrawElements(GLenum mode, GLsizei count, GLenum type, if (MESA_VERBOSE & VERBOSE_DRAW) _mesa_debug(ctx, "glDrawElements(%s, %u, %s, %p)\n", - _mesa_lookup_enum_by_nr(mode), count, - _mesa_lookup_enum_by_nr(type), indices); + _mesa_enum_to_string(mode), count, + _mesa_enum_to_string(type), indices); if (!_mesa_validate_DrawElements(ctx, mode, count, type, indices)) return; @@ -1141,8 +1141,8 @@ vbo_exec_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type, if (MESA_VERBOSE & VERBOSE_DRAW) _mesa_debug(ctx, "glDrawElementsBaseVertex(%s, %d, %s, %p, %d)\n", - _mesa_lookup_enum_by_nr(mode), count, - _mesa_lookup_enum_by_nr(type), indices, basevertex); + _mesa_enum_to_string(mode), count, + _mesa_enum_to_string(type), indices, basevertex); if (!_mesa_validate_DrawElements(ctx, mode, count, type, indices)) return; @@ -1163,8 +1163,8 @@ vbo_exec_DrawElementsInstanced(GLenum mode, GLsizei count, GLenum type, if (MESA_VERBOSE & VERBOSE_DRAW) _mesa_debug(ctx, "glDrawElementsInstanced(%s, %d, %s, %p, %d)\n", - _mesa_lookup_enum_by_nr(mode), count, - _mesa_lookup_enum_by_nr(type), indices, numInstances); + _mesa_enum_to_string(mode), count, + _mesa_enum_to_string(type), indices, numInstances); if (!_mesa_validate_DrawElementsInstanced(ctx, mode, count, type, indices, numInstances)) @@ -1187,8 +1187,8 @@ vbo_exec_DrawElementsInstancedBaseVertex(GLenum mode, GLsizei count, GLenum type if (MESA_VERBOSE & VERBOSE_DRAW) _mesa_debug(ctx, "glDrawElementsInstancedBaseVertex(%s, %d, %s, %p, %d; %d)\n", - _mesa_lookup_enum_by_nr(mode), count, - _mesa_lookup_enum_by_nr(type), indices, + _mesa_enum_to_string(mode), count, + _mesa_enum_to_string(type), indices, numInstances, basevertex); if (!_mesa_validate_DrawElementsInstanced(ctx, mode, count, type, indices, @@ -1212,8 +1212,8 @@ vbo_exec_DrawElementsInstancedBaseInstance(GLenum mode, GLsizei count, GLenum ty if (MESA_VERBOSE & VERBOSE_DRAW) _mesa_debug(ctx, "glDrawElementsInstancedBaseInstance(%s, %d, %s, %p, %d, %d)\n", - _mesa_lookup_enum_by_nr(mode), count, - _mesa_lookup_enum_by_nr(type), indices, + _mesa_enum_to_string(mode), count, + _mesa_enum_to_string(type), indices, numInstances, baseInstance); if (!_mesa_validate_DrawElementsInstanced(ctx, mode, count, type, indices, @@ -1238,8 +1238,8 @@ vbo_exec_DrawElementsInstancedBaseVertexBaseInstance(GLenum mode, GLsizei count, if (MESA_VERBOSE & VERBOSE_DRAW) _mesa_debug(ctx, "glDrawElementsInstancedBaseVertexBaseInstance(%s, %d, %s, %p, %d, %d, %d)\n", - _mesa_lookup_enum_by_nr(mode), count, - _mesa_lookup_enum_by_nr(type), indices, + _mesa_enum_to_string(mode), count, + _mesa_enum_to_string(type), indices, numInstances, basevertex, baseInstance); if (!_mesa_validate_DrawElementsInstanced(ctx, mode, count, type, indices, @@ -1350,7 +1350,7 @@ vbo_validated_multidrawelements(struct gl_context *ctx, GLenum mode, check_buffers_are_unmapped(exec->array.inputs); vbo->draw_prims(ctx, prim, primcount, &ib, - false, ~0, ~0, NULL, NULL); + false, ~0, ~0, NULL, 0, NULL); } else { /* render one prim at a time */ for (i = 0; i < primcount; i++) { @@ -1379,7 +1379,7 @@ vbo_validated_multidrawelements(struct gl_context *ctx, GLenum mode, check_buffers_are_unmapped(exec->array.inputs); vbo->draw_prims(ctx, prim, 1, &ib, - false, ~0, ~0, NULL, NULL); + false, ~0, ~0, NULL, 0, NULL); } } @@ -1464,7 +1464,7 @@ vbo_draw_transform_feedback(struct gl_context *ctx, GLenum mode, check_buffers_are_unmapped(exec->array.inputs); vbo->draw_prims(ctx, prim, 1, NULL, - GL_TRUE, 0, 0, obj, NULL); + GL_TRUE, 0, 0, obj, stream, NULL); if (MESA_DEBUG_FLAGS & DEBUG_ALWAYS_FLUSH) { _mesa_flush(ctx); @@ -1488,7 +1488,7 @@ vbo_exec_DrawTransformFeedback(GLenum mode, GLuint name) if (MESA_VERBOSE & VERBOSE_DRAW) _mesa_debug(ctx, "glDrawTransformFeedback(%s, %d)\n", - _mesa_lookup_enum_by_nr(mode), name); + _mesa_enum_to_string(mode), name); vbo_draw_transform_feedback(ctx, mode, obj, 0, 1); } @@ -1502,7 +1502,7 @@ vbo_exec_DrawTransformFeedbackStream(GLenum mode, GLuint name, GLuint stream) if (MESA_VERBOSE & VERBOSE_DRAW) _mesa_debug(ctx, "glDrawTransformFeedbackStream(%s, %u, %u)\n", - _mesa_lookup_enum_by_nr(mode), name, stream); + _mesa_enum_to_string(mode), name, stream); vbo_draw_transform_feedback(ctx, mode, obj, stream, 1); } @@ -1517,7 +1517,7 @@ vbo_exec_DrawTransformFeedbackInstanced(GLenum mode, GLuint name, if (MESA_VERBOSE & VERBOSE_DRAW) _mesa_debug(ctx, "glDrawTransformFeedbackInstanced(%s, %d)\n", - _mesa_lookup_enum_by_nr(mode), name); + _mesa_enum_to_string(mode), name); vbo_draw_transform_feedback(ctx, mode, obj, 0, primcount); } @@ -1533,7 +1533,7 @@ vbo_exec_DrawTransformFeedbackStreamInstanced(GLenum mode, GLuint name, if (MESA_VERBOSE & VERBOSE_DRAW) _mesa_debug(ctx, "glDrawTransformFeedbackStreamInstanced" "(%s, %u, %u, %i)\n", - _mesa_lookup_enum_by_nr(mode), name, stream, primcount); + _mesa_enum_to_string(mode), name, stream, primcount); vbo_draw_transform_feedback(ctx, mode, obj, stream, primcount); } @@ -1563,7 +1563,7 @@ vbo_validated_drawarraysindirect(struct gl_context *ctx, check_buffers_are_unmapped(exec->array.inputs); vbo->draw_prims(ctx, prim, 1, NULL, GL_TRUE, 0, ~0, - NULL, + NULL, 0, ctx->DrawIndirectBuffer); if (MESA_DEBUG_FLAGS & DEBUG_ALWAYS_FLUSH) @@ -1603,7 +1603,7 @@ vbo_validated_multidrawarraysindirect(struct gl_context *ctx, check_buffers_are_unmapped(exec->array.inputs); vbo->draw_prims(ctx, prim, primcount, NULL, GL_TRUE, 0, ~0, - NULL, + NULL, 0, ctx->DrawIndirectBuffer); free(prim); @@ -1640,7 +1640,7 @@ vbo_validated_drawelementsindirect(struct gl_context *ctx, check_buffers_are_unmapped(exec->array.inputs); vbo->draw_prims(ctx, prim, 1, &ib, GL_TRUE, 0, ~0, - NULL, + NULL, 0, ctx->DrawIndirectBuffer); if (MESA_DEBUG_FLAGS & DEBUG_ALWAYS_FLUSH) @@ -1689,7 +1689,7 @@ vbo_validated_multidrawelementsindirect(struct gl_context *ctx, check_buffers_are_unmapped(exec->array.inputs); vbo->draw_prims(ctx, prim, primcount, &ib, GL_TRUE, 0, ~0, - NULL, + NULL, 0, ctx->DrawIndirectBuffer); free(prim); @@ -1709,7 +1709,7 @@ vbo_exec_DrawArraysIndirect(GLenum mode, const GLvoid *indirect) if (MESA_VERBOSE & VERBOSE_DRAW) _mesa_debug(ctx, "glDrawArraysIndirect(%s, %p)\n", - _mesa_lookup_enum_by_nr(mode), indirect); + _mesa_enum_to_string(mode), indirect); if (!_mesa_validate_DrawArraysIndirect(ctx, mode, indirect)) return; @@ -1725,8 +1725,8 @@ vbo_exec_DrawElementsIndirect(GLenum mode, GLenum type, if (MESA_VERBOSE & VERBOSE_DRAW) _mesa_debug(ctx, "glDrawElementsIndirect(%s, %s, %p)\n", - _mesa_lookup_enum_by_nr(mode), - _mesa_lookup_enum_by_nr(type), indirect); + _mesa_enum_to_string(mode), + _mesa_enum_to_string(type), indirect); if (!_mesa_validate_DrawElementsIndirect(ctx, mode, type, indirect)) return; @@ -1743,7 +1743,7 @@ vbo_exec_MultiDrawArraysIndirect(GLenum mode, if (MESA_VERBOSE & VERBOSE_DRAW) _mesa_debug(ctx, "glMultiDrawArraysIndirect(%s, %p, %i, %i)\n", - _mesa_lookup_enum_by_nr(mode), indirect, primcount, stride); + _mesa_enum_to_string(mode), indirect, primcount, stride); /* If <stride> is zero, the array elements are treated as tightly packed. */ if (stride == 0) @@ -1768,8 +1768,8 @@ vbo_exec_MultiDrawElementsIndirect(GLenum mode, GLenum type, if (MESA_VERBOSE & VERBOSE_DRAW) _mesa_debug(ctx, "glMultiDrawElementsIndirect(%s, %s, %p, %i, %i)\n", - _mesa_lookup_enum_by_nr(mode), - _mesa_lookup_enum_by_nr(type), indirect, primcount, stride); + _mesa_enum_to_string(mode), + _mesa_enum_to_string(type), indirect, primcount, stride); /* If <stride> is zero, the array elements are treated as tightly packed. */ if (stride == 0) diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c index 37b53a8309d..2bfb0c32b73 100644 --- a/src/mesa/vbo/vbo_exec_draw.c +++ b/src/mesa/vbo/vbo_exec_draw.c @@ -412,7 +412,7 @@ vbo_exec_vtx_flush(struct vbo_exec_context *exec, GLboolean keepUnmapped) GL_TRUE, 0, exec->vtx.vert_count - 1, - NULL, NULL); + NULL, 0, NULL); /* If using a real VBO, get new storage -- unless asked not to. */ diff --git a/src/mesa/vbo/vbo_primitive_restart.c b/src/mesa/vbo/vbo_primitive_restart.c index dafc4fd2a9a..0662c5cd4ef 100644 --- a/src/mesa/vbo/vbo_primitive_restart.c +++ b/src/mesa/vbo/vbo_primitive_restart.c @@ -251,11 +251,11 @@ vbo_sw_primitive_restart(struct gl_context *ctx, (temp_prim.count == sub_prim->count)) { draw_prims_func(ctx, &temp_prim, 1, ib, GL_TRUE, sub_prim->min_index, sub_prim->max_index, - NULL, NULL); + NULL, 0, NULL); } else { draw_prims_func(ctx, &temp_prim, 1, ib, GL_FALSE, -1, -1, - NULL, NULL); + NULL, 0, NULL); } } if (sub_end_index >= end_index) { diff --git a/src/mesa/vbo/vbo_rebase.c b/src/mesa/vbo/vbo_rebase.c index c3c4b64e65c..24c04ca7e6a 100644 --- a/src/mesa/vbo/vbo_rebase.c +++ b/src/mesa/vbo/vbo_rebase.c @@ -258,7 +258,7 @@ void vbo_rebase_prims( struct gl_context *ctx, GL_TRUE, 0, max_index - min_index, - NULL, NULL ); + NULL, 0, NULL ); ctx->Array._DrawArrays = saved_arrays; ctx->NewDriverState |= ctx->DriverFlags.NewArray; diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c index de744e0c763..b1fd6892026 100644 --- a/src/mesa/vbo/vbo_save_draw.c +++ b/src/mesa/vbo/vbo_save_draw.c @@ -314,7 +314,7 @@ vbo_save_playback_vertex_list(struct gl_context *ctx, void *data) GL_TRUE, 0, /* Node is a VBO, so this is ok */ node->count - 1, - NULL, NULL); + NULL, 0, NULL); } } diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c index 7b1e20b18d2..cb27ef961ab 100644 --- a/src/mesa/vbo/vbo_split_copy.c +++ b/src/mesa/vbo/vbo_split_copy.c @@ -203,7 +203,7 @@ flush( struct copy_context *copy ) GL_TRUE, 0, copy->dstbuf_nr - 1, - NULL, NULL ); + NULL, 0, NULL ); ctx->Array._DrawArrays = saved_arrays; ctx->NewDriverState |= ctx->DriverFlags.NewArray; diff --git a/src/mesa/vbo/vbo_split_inplace.c b/src/mesa/vbo/vbo_split_inplace.c index 5887b74d829..cff4bcd30ff 100644 --- a/src/mesa/vbo/vbo_split_inplace.c +++ b/src/mesa/vbo/vbo_split_inplace.c @@ -94,7 +94,7 @@ static void flush_vertex( struct split_context *split ) !split->ib, split->min_index, split->max_index, - NULL, NULL); + NULL, 0, NULL); ctx->Array._DrawArrays = saved_arrays; ctx->NewDriverState |= ctx->DriverFlags.NewArray; |