diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/Android.mk | 38 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/Makefile.am | 14 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/Makefile.sources | 10 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_tiled_memcpy.c | 160 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_tiled_memcpy.h | 77 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_tiled_memcpy_normal.c | 59 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_tiled_memcpy_sse41.c | 61 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_tiled_memcpy_sse41.h | 59 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/meson.build | 38 |
9 files changed, 426 insertions, 90 deletions
diff --git a/src/mesa/drivers/dri/i965/Android.mk b/src/mesa/drivers/dri/i965/Android.mk index 324d087220a..e125eb6d394 100644 --- a/src/mesa/drivers/dri/i965/Android.mk +++ b/src/mesa/drivers/dri/i965/Android.mk @@ -51,6 +51,42 @@ I965_PERGEN_LIBS := \ libmesa_i965_gen10 \ libmesa_i965_gen11 + +# --------------------------------------- +# Build libmesa_intel_tiled_memcpy +# --------------------------------------- + +include $(CLEAR_VARS) + +LOCAL_MODULE := libmesa_intel_tiled_memcpy + +LOCAL_C_INCLUDES := $(I965_PERGEN_COMMON_INCLUDES) + +LOCAL_SRC_FILES := $(intel_tiled_memcpy_FILES) + +include $(MESA_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) + +# --------------------------------------- +# Build libmesa_intel_tiled_memcpy_sse41 +# --------------------------------------- + +include $(CLEAR_VARS) + +LOCAL_MODULE := libmesa_intel_tiled_memcpy_sse41 + +LOCAL_C_INCLUDES := $(I965_PERGEN_COMMON_INCLUDES) + +LOCAL_SRC_FILES := $(intel_tiled_memcpy_sse41_FILES) + +ifeq ($(ARCH_X86_HAVE_SSE4_1),true) +LOCAL_CFLAGS += \ + -DUSE_SSE41 -msse4.1 -mstackrealign +endif + +include $(MESA_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) + # --------------------------------------- # Build libmesa_i965_gen4 # --------------------------------------- @@ -289,6 +325,8 @@ LOCAL_SRC_FILES := \ LOCAL_WHOLE_STATIC_LIBRARIES := \ $(MESA_DRI_WHOLE_STATIC_LIBRARIES) \ $(I965_PERGEN_LIBS) \ + libmesa_intel_tiled_memcpy \ + libmesa_intel_tiled_memcpy_sse41 \ libmesa_intel_dev \ libmesa_intel_common \ libmesa_isl \ diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index 0afa7a2f216..dc19da2c4a6 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -92,8 +92,20 @@ libi965_gen11_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=110 noinst_LTLIBRARIES = \ libi965_dri.la \ + libintel_tiled_memcpy.la \ + libintel_tiled_memcpy_sse41.la \ $(I965_PERGEN_LIBS) +libintel_tiled_memcpy_la_SOURCES = \ + $(intel_tiled_memcpy_FILES) +libintel_tiled_memcpy_la_CFLAGS = \ + $(AM_CFLAGS) + +libintel_tiled_memcpy_sse41_la_SOURCES = \ + $(intel_tiled_memcpy_sse41_FILES) +libintel_tiled_memcpy_sse41_la_CFLAGS = \ + $(AM_CFLAGS) $(SSE41_CFLAGS) + libi965_dri_la_SOURCES = \ $(i965_FILES) \ $(i965_oa_GENERATED_FILES) @@ -104,6 +116,8 @@ libi965_dri_la_LIBADD = \ $(top_builddir)/src/intel/compiler/libintel_compiler.la \ $(top_builddir)/src/intel/blorp/libblorp.la \ $(I965_PERGEN_LIBS) \ + libintel_tiled_memcpy.la \ + libintel_tiled_memcpy_sse41.la \ $(LIBDRM_LIBS) BUILT_SOURCES = $(i965_oa_GENERATED_FILES) diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index db6591ab90a..0ab0e42fb18 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -110,11 +110,17 @@ i965_FILES = \ intel_tex_image.c \ intel_tex_obj.h \ intel_tex_validate.c \ - intel_tiled_memcpy.c \ - intel_tiled_memcpy.h \ intel_upload.c \ libdrm_macros.h +intel_tiled_memcpy_FILES = \ + intel_tiled_memcpy_normal.c \ + intel_tiled_memcpy.h + +intel_tiled_memcpy_sse41_FILES = \ + intel_tiled_memcpy_sse41.c \ + intel_tiled_memcpy_sse41.h + i965_gen4_FILES = \ genX_blorp_exec.c \ genX_state_upload.c diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c index 76a92b4d41f..b6bf96706f8 100644 --- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c +++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c @@ -566,6 +566,31 @@ ytiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, } } +#if defined(INLINE_SSE41) +static ALWAYS_INLINE void * +_memcpy_streaming_load(void *dest, const void *src, size_t count) +{ + if (count == 16) { + __m128i val = _mm_stream_load_si128((__m128i *)src); + _mm_storeu_si128((__m128i *)dest, val); + return dest; + } else if (count == 64) { + __m128i val0 = _mm_stream_load_si128(((__m128i *)src) + 0); + __m128i val1 = _mm_stream_load_si128(((__m128i *)src) + 1); + __m128i val2 = _mm_stream_load_si128(((__m128i *)src) + 2); + __m128i val3 = _mm_stream_load_si128(((__m128i *)src) + 3); + _mm_storeu_si128(((__m128i *)dest) + 0, val0); + _mm_storeu_si128(((__m128i *)dest) + 1, val1); + _mm_storeu_si128(((__m128i *)dest) + 2, val2); + _mm_storeu_si128(((__m128i *)dest) + 3, val3); + return dest; + } else { + assert(count < 64); /* and (count < 16) for ytiled */ + return memcpy(dest, src, count); + } +} +#endif + static mem_copy_fn choose_copy_function(mem_copy_fn_type copy_type) { @@ -574,6 +599,10 @@ choose_copy_function(mem_copy_fn_type copy_type) return memcpy; case INTEL_COPY_RGBA8: return rgba8_copy; +#if defined(INLINE_SSE41) + case INTEL_COPY_STREAMING_LOAD: + return _memcpy_streaming_load; +#endif default: assert(!"unreachable"); } @@ -696,6 +725,12 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height, dst, src, dst_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_src); +#if defined(INLINE_SSE41) + else if (mem_copy == _memcpy_streaming_load) + return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height, + dst, src, dst_pitch, swizzle_bit, + memcpy, _memcpy_streaming_load); +#endif else unreachable("not reached"); } else { @@ -706,6 +741,12 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, return xtiled_to_linear(x0, x1, x2, x3, y0, y1, dst, src, dst_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_src); +#if defined(INLINE_SSE41) + else if (mem_copy == _memcpy_streaming_load) + return xtiled_to_linear(x0, x1, x2, x3, y0, y1, + dst, src, dst_pitch, swizzle_bit, + memcpy, _memcpy_streaming_load); +#endif else unreachable("not reached"); } @@ -740,6 +781,12 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height, dst, src, dst_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_src); +#if defined(INLINE_SSE41) + else if (copy_type == INTEL_COPY_STREAMING_LOAD) + return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height, + dst, src, dst_pitch, swizzle_bit, + memcpy, _memcpy_streaming_load); +#endif else unreachable("not reached"); } else { @@ -750,6 +797,12 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, return ytiled_to_linear(x0, x1, x2, x3, y0, y1, dst, src, dst_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_src); +#if defined(INLINE_SSE41) + else if (copy_type == INTEL_COPY_STREAMING_LOAD) + return ytiled_to_linear(x0, x1, x2, x3, y0, y1, + dst, src, dst_pitch, swizzle_bit, + memcpy, _memcpy_streaming_load); +#endif else unreachable("not reached"); } @@ -768,14 +821,14 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, * 'dst' is the address of (0, 0) in the destination tiled texture. * 'src' is the address of (xt1, yt1) in the source linear texture. */ -void -linear_to_tiled(uint32_t xt1, uint32_t xt2, - uint32_t yt1, uint32_t yt2, - char *dst, const char *src, - uint32_t dst_pitch, int32_t src_pitch, - bool has_swizzling, - enum isl_tiling tiling, - mem_copy_fn_type copy_type) +static void +intel_linear_to_tiled(uint32_t xt1, uint32_t xt2, + uint32_t yt1, uint32_t yt2, + char *dst, const char *src, + uint32_t dst_pitch, int32_t src_pitch, + bool has_swizzling, + enum isl_tiling tiling, + mem_copy_fn_type copy_type) { tile_copy_fn tile_copy; uint32_t xt0, xt3; @@ -859,14 +912,14 @@ linear_to_tiled(uint32_t xt1, uint32_t xt2, * 'dst' is the address of (xt1, yt1) in the destination linear texture. * 'src' is the address of (0, 0) in the source tiled texture. */ -void -tiled_to_linear(uint32_t xt1, uint32_t xt2, - uint32_t yt1, uint32_t yt2, - char *dst, const char *src, - int32_t dst_pitch, uint32_t src_pitch, - bool has_swizzling, - enum isl_tiling tiling, - mem_copy_fn_type copy_type) +static void +intel_tiled_to_linear(uint32_t xt1, uint32_t xt2, + uint32_t yt1, uint32_t yt2, + char *dst, const char *src, + int32_t dst_pitch, uint32_t src_pitch, + bool has_swizzling, + enum isl_tiling tiling, + mem_copy_fn_type copy_type) { tile_copy_fn tile_copy; uint32_t xt0, xt3; @@ -889,6 +942,15 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2, unreachable("unsupported tiling"); } +#if defined(INLINE_SSE41) + if (copy_type == INTEL_COPY_STREAMING_LOAD) { + /* The hidden cacheline sized register used by movntdqa can apparently + * give you stale data, so do an mfence to invalidate it. + */ + _mm_mfence(); + } +#endif + /* Round out to tile boundaries. */ xt0 = ALIGN_DOWN(xt1, tw); xt3 = ALIGN_UP (xt2, tw); @@ -938,69 +1000,3 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2, } } } - - -/** - * Determine which copy function to use for the given format combination - * - * The only two possible copy functions which are ever returned are a - * direct memcpy and a RGBA <-> BGRA copy function. Since RGBA -> BGRA and - * BGRA -> RGBA are exactly the same operation (and memcpy is obviously - * symmetric), it doesn't matter whether the copy is from the tiled image - * to the untiled or vice versa. The copy function required is the same in - * either case so this function can be used. - * - * \param[in] tiledFormat The format of the tiled image - * \param[in] format The GL format of the client data - * \param[in] type The GL type of the client data - * \param[out] mem_copy Will be set to one of either the standard - * library's memcpy or a different copy function - * that performs an RGBA to BGRA conversion - * \param[out] cpp Number of bytes per channel - * - * \return true if the format and type combination are valid - */ -bool -intel_get_memcpy_type(mesa_format tiledFormat, GLenum format, GLenum type, - mem_copy_fn_type *copy_type, uint32_t *cpp) -{ - *copy_type = INTEL_COPY_INVALID; - - if (type == GL_UNSIGNED_INT_8_8_8_8_REV && - !(format == GL_RGBA || format == GL_BGRA)) - return false; /* Invalid type/format combination */ - - if ((tiledFormat == MESA_FORMAT_L_UNORM8 && format == GL_LUMINANCE) || - (tiledFormat == MESA_FORMAT_A_UNORM8 && format == GL_ALPHA)) { - *cpp = 1; - *copy_type = INTEL_COPY_MEMCPY; - } else if ((tiledFormat == MESA_FORMAT_B8G8R8A8_UNORM) || - (tiledFormat == MESA_FORMAT_B8G8R8X8_UNORM) || - (tiledFormat == MESA_FORMAT_B8G8R8A8_SRGB) || - (tiledFormat == MESA_FORMAT_B8G8R8X8_SRGB)) { - *cpp = 4; - if (format == GL_BGRA) { - *copy_type = INTEL_COPY_MEMCPY; - } else if (format == GL_RGBA) { - *copy_type = INTEL_COPY_RGBA8; - } - } else if ((tiledFormat == MESA_FORMAT_R8G8B8A8_UNORM) || - (tiledFormat == MESA_FORMAT_R8G8B8X8_UNORM) || - (tiledFormat == MESA_FORMAT_R8G8B8A8_SRGB) || - (tiledFormat == MESA_FORMAT_R8G8B8X8_SRGB)) { - *cpp = 4; - if (format == GL_BGRA) { - /* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can - * use the same function. - */ - *copy_type = INTEL_COPY_RGBA8; - } else if (format == GL_RGBA) { - *copy_type = INTEL_COPY_MEMCPY; - } - } - - if (*copy_type == INTEL_COPY_INVALID) - return false; - - return true; -} diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h index 70934410298..90aadf9e090 100644 --- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h +++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h @@ -38,11 +38,21 @@ typedef enum { INTEL_COPY_MEMCPY = 0, INTEL_COPY_RGBA8, + INTEL_COPY_STREAMING_LOAD, INTEL_COPY_INVALID, } mem_copy_fn_type; typedef void *(*mem_copy_fn)(void *dest, const void *src, size_t n); +typedef void (*tiled_to_linear_fn) + (uint32_t xt1, uint32_t xt2, + uint32_t yt1, uint32_t yt2, + char *dst, const char *src, + int32_t dst_pitch, uint32_t src_pitch, + bool has_swizzling, + enum isl_tiling tiling, + mem_copy_fn_type copy_type); + void linear_to_tiled(uint32_t xt1, uint32_t xt2, uint32_t yt1, uint32_t yt2, @@ -61,8 +71,69 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2, enum isl_tiling tiling, mem_copy_fn_type copy_type); -bool intel_get_memcpy_type(mesa_format tiledFormat, GLenum format, - GLenum type, mem_copy_fn_type *copy_type, - uint32_t *cpp); +/** + * Determine which copy function to use for the given format combination + * + * The only two possible copy functions which are ever returned are a + * direct memcpy and a RGBA <-> BGRA copy function. Since RGBA -> BGRA and + * BGRA -> RGBA are exactly the same operation (and memcpy is obviously + * symmetric), it doesn't matter whether the copy is from the tiled image + * to the untiled or vice versa. The copy function required is the same in + * either case so this function can be used. + * + * \param[in] tiledFormat The format of the tiled image + * \param[in] format The GL format of the client data + * \param[in] type The GL type of the client data + * \param[out] mem_copy Will be set to one of either the standard + * library's memcpy or a different copy function + * that performs an RGBA to BGRA conversion + * \param[out] cpp Number of bytes per channel + * + * \return true if the format and type combination are valid + */ +static MAYBE_UNUSED bool +intel_get_memcpy_type(mesa_format tiledFormat, GLenum format, GLenum type, + mem_copy_fn_type *copy_type, uint32_t *cpp) +{ + *copy_type = INTEL_COPY_INVALID; + + if (type == GL_UNSIGNED_INT_8_8_8_8_REV && + !(format == GL_RGBA || format == GL_BGRA)) + return false; /* Invalid type/format combination */ + + if ((tiledFormat == MESA_FORMAT_L_UNORM8 && format == GL_LUMINANCE) || + (tiledFormat == MESA_FORMAT_A_UNORM8 && format == GL_ALPHA)) { + *cpp = 1; + *copy_type = INTEL_COPY_MEMCPY; + } else if ((tiledFormat == MESA_FORMAT_B8G8R8A8_UNORM) || + (tiledFormat == MESA_FORMAT_B8G8R8X8_UNORM) || + (tiledFormat == MESA_FORMAT_B8G8R8A8_SRGB) || + (tiledFormat == MESA_FORMAT_B8G8R8X8_SRGB)) { + *cpp = 4; + if (format == GL_BGRA) { + *copy_type = INTEL_COPY_MEMCPY; + } else if (format == GL_RGBA) { + *copy_type = INTEL_COPY_RGBA8; + } + } else if ((tiledFormat == MESA_FORMAT_R8G8B8A8_UNORM) || + (tiledFormat == MESA_FORMAT_R8G8B8X8_UNORM) || + (tiledFormat == MESA_FORMAT_R8G8B8A8_SRGB) || + (tiledFormat == MESA_FORMAT_R8G8B8X8_SRGB)) { + *cpp = 4; + if (format == GL_BGRA) { + /* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can + * use the same function. + */ + *copy_type = INTEL_COPY_RGBA8; + } else if (format == GL_RGBA) { + *copy_type = INTEL_COPY_MEMCPY; + } + } + + if (*copy_type == INTEL_COPY_INVALID) + return false; + + return true; +} #endif /* INTEL_TILED_MEMCPY */ diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy_normal.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy_normal.c new file mode 100644 index 00000000000..c246067541b --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy_normal.c @@ -0,0 +1,59 @@ +/* + * Mesa 3-D graphics library + * + * Copyright 2012 Intel Corporation + * Copyright 2013 Google + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chad Versace <[email protected]> + * Frank Henigman <[email protected]> + */ + + +#include "intel_tiled_memcpy.c" + +void +linear_to_tiled(uint32_t xt1, uint32_t xt2, + uint32_t yt1, uint32_t yt2, + char *dst, const char *src, + uint32_t dst_pitch, int32_t src_pitch, + bool has_swizzling, + enum isl_tiling tiling, + mem_copy_fn_type copy_type) +{ + intel_linear_to_tiled(xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, + has_swizzling, tiling, copy_type); +} + +void +tiled_to_linear(uint32_t xt1, uint32_t xt2, + uint32_t yt1, uint32_t yt2, + char *dst, const char *src, + int32_t dst_pitch, uint32_t src_pitch, + bool has_swizzling, + enum isl_tiling tiling, + mem_copy_fn_type copy_type) +{ + intel_tiled_to_linear(xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, + has_swizzling, tiling, copy_type); +} diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy_sse41.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy_sse41.c new file mode 100644 index 00000000000..bc33ea11839 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy_sse41.c @@ -0,0 +1,61 @@ +/* + * Mesa 3-D graphics library + * + * Copyright 2012 Intel Corporation + * Copyright 2013 Google + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chad Versace <[email protected]> + * Frank Henigman <[email protected]> + */ + +#define INLINE_SSE41 + +#include "intel_tiled_memcpy_sse41.h" +#include "intel_tiled_memcpy.c" + +void +linear_to_tiled_sse41(uint32_t xt1, uint32_t xt2, + uint32_t yt1, uint32_t yt2, + char *dst, const char *src, + uint32_t dst_pitch, int32_t src_pitch, + bool has_swizzling, + enum isl_tiling tiling, + mem_copy_fn_type copy_type) +{ + intel_linear_to_tiled(xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, + has_swizzling, tiling, copy_type); +} + +void +tiled_to_linear_sse41(uint32_t xt1, uint32_t xt2, + uint32_t yt1, uint32_t yt2, + char *dst, const char *src, + int32_t dst_pitch, uint32_t src_pitch, + bool has_swizzling, + enum isl_tiling tiling, + mem_copy_fn_type copy_type) +{ + intel_tiled_to_linear(xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, + has_swizzling, tiling, copy_type); +} diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy_sse41.h b/src/mesa/drivers/dri/i965/intel_tiled_memcpy_sse41.h new file mode 100644 index 00000000000..5ddd6d01bb8 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy_sse41.h @@ -0,0 +1,59 @@ +/* + * Mesa 3-D graphics library + * + * Copyright 2012 Intel Corporation + * Copyright 2013 Google + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chad Versace <[email protected]> + * Frank Henigman <[email protected]> + */ + +#ifndef INTEL_TILED_MEMCPY_SSE41_H +#define INTEL_TILED_MEMCPY_SSE41_H + +#include <stdint.h> +#include "main/mtypes.h" +#include "isl/isl.h" + +#include "intel_tiled_memcpy.h" + +void +linear_to_tiled_sse41(uint32_t xt1, uint32_t xt2, + uint32_t yt1, uint32_t yt2, + char *dst, const char *src, + uint32_t dst_pitch, int32_t src_pitch, + bool has_swizzling, + enum isl_tiling tiling, + mem_copy_fn_type copy_type); + +void +tiled_to_linear_sse41(uint32_t xt1, uint32_t xt2, + uint32_t yt1, uint32_t yt2, + char *dst, const char *src, + int32_t dst_pitch, uint32_t src_pitch, + bool has_swizzling, + enum isl_tiling tiling, + mem_copy_fn_type copy_type); + +#endif /* INTEL_TILED_MEMCPY_SSE41_H */ diff --git a/src/mesa/drivers/dri/i965/meson.build b/src/mesa/drivers/dri/i965/meson.build index b95e2d76489..bf366a6c157 100644 --- a/src/mesa/drivers/dri/i965/meson.build +++ b/src/mesa/drivers/dri/i965/meson.build @@ -129,12 +129,20 @@ files_i965 = files( 'intel_tex_image.c', 'intel_tex_obj.h', 'intel_tex_validate.c', - 'intel_tiled_memcpy.c', - 'intel_tiled_memcpy.h', 'intel_upload.c', 'libdrm_macros.h', ) +files_intel_tiled_memcpy = files( + 'intel_tiled_memcpy_normal.c', + 'intel_tiled_memcpy.h', +) + +files_intel_tiled_memcpy_sse41 = files( + 'intel_tiled_memcpy_sse41.c', + 'intel_tiled_memcpy_sse41.h', +) + i965_gen_libs = [] foreach v : ['40', '45', '50', '60', '70', '75', '80', '90', '100', '110'] i965_gen_libs += static_library( @@ -176,6 +184,30 @@ i965_oa_sources = custom_target( ], ) +intel_tiled_memcpy = static_library( + 'intel_tiled_memcpy', + [files_intel_tiled_memcpy], + include_directories : [ + inc_common, inc_intel, inc_dri_common, inc_drm_uapi, + ], + c_args : [c_vis_args, no_override_init_args, '-msse2'], +) + +if with_sse41 +intel_tiled_memcpy_sse41 = static_library( + 'intel_tiled_memcpy_sse41', + [files_intel_tiled_memcpy_sse41], + include_directories : [ + inc_common, inc_intel, inc_dri_common, inc_drm_uapi, + ], + link_args : [ '-Wl,--exclude-libs=ALL' ], + c_args : [c_vis_args, no_override_init_args, '-Wl,--exclude-libs=ALL', '-msse2', sse41_args], +) +else +intel_tiled_memcpy_sse41 = [] +endif + + libi965 = static_library( 'i965', [files_i965, i965_oa_sources, ir_expression_operation_h, @@ -187,7 +219,7 @@ libi965 = static_library( cpp_args : [cpp_vis_args, c_sse2_args], link_with : [ i965_gen_libs, libintel_common, libintel_dev, libisl, libintel_compiler, - libblorp, + libblorp, intel_tiled_memcpy, intel_tiled_memcpy_sse41 ], dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers], ) |