diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/Makefile.am | 7 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/Makefile.sources | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 102 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_tiled_memcpy.c | 62 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/meson.build | 18 |
5 files changed, 9 insertions, 186 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index ff47add93f4..889d4c68a2b 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -92,14 +92,8 @@ libi965_gen11_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=110 noinst_LTLIBRARIES = \ libi965_dri.la \ - libintel_tiled_memcpy.la \ $(I965_PERGEN_LIBS) -libintel_tiled_memcpy_la_SOURCES = \ - $(intel_tiled_memcpy_FILES) -libintel_tiled_memcpy_la_CFLAGS = \ - $(AM_CFLAGS) $(SSE41_CFLAGS) - libi965_dri_la_SOURCES = \ $(i965_FILES) \ $(i965_oa_GENERATED_FILES) @@ -110,7 +104,6 @@ libi965_dri_la_LIBADD = \ $(top_builddir)/src/intel/compiler/libintel_compiler.la \ $(top_builddir)/src/intel/blorp/libblorp.la \ $(I965_PERGEN_LIBS) \ - libintel_tiled_memcpy.la $(LIBDRM_LIBS) BUILT_SOURCES = $(i965_oa_GENERATED_FILES) diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index ce7633c53c4..db6591ab90a 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -110,13 +110,11 @@ i965_FILES = \ intel_tex_image.c \ intel_tex_obj.h \ intel_tex_validate.c \ + intel_tiled_memcpy.c \ + intel_tiled_memcpy.h \ intel_upload.c \ libdrm_macros.h -intel_tiled_memcpy_FILES = \ - intel_tiled_memcpy.c \ - intel_tiled_memcpy.h - i965_gen4_FILES = \ genX_blorp_exec.c \ genX_state_upload.c diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 269bd706773..7d1fa96b919 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -31,7 +31,6 @@ #include "intel_image.h" #include "intel_mipmap_tree.h" #include "intel_tex.h" -#include "intel_tiled_memcpy.h" #include "intel_blit.h" #include "intel_fbo.h" @@ -3024,7 +3023,7 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree *mt) } static void -intel_miptree_unmap_map(struct brw_context *brw, +intel_miptree_unmap_gtt(struct brw_context *brw, struct intel_mipmap_tree *mt, struct intel_miptree_map *map, unsigned int level, unsigned int slice) @@ -3033,7 +3032,7 @@ intel_miptree_unmap_map(struct brw_context *brw, } static void -intel_miptree_map_map(struct brw_context *brw, +intel_miptree_map_gtt(struct brw_context *brw, struct intel_mipmap_tree *mt, struct intel_miptree_map *map, unsigned int level, unsigned int slice) @@ -3081,7 +3080,7 @@ intel_miptree_map_map(struct brw_context *brw, mt, _mesa_get_format_name(mt->format), x, y, map->ptr, map->stride); - map->unmap = intel_miptree_unmap_map; + map->unmap = intel_miptree_unmap_gtt; } static void @@ -3113,94 +3112,6 @@ intel_miptree_unmap_blit(struct brw_context *brw, intel_miptree_release(&map->linear_mt); } -/* Compute extent parameters for use with tiled_memcpy functions. - * xs are in units of bytes and ys are in units of strides. - */ -static inline void -tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map, - unsigned int level, unsigned int slice, unsigned int *x1_B, - unsigned int *x2_B, unsigned int *y1_el, unsigned int *y2_el) -{ - unsigned int block_width, block_height; - unsigned int x0_el, y0_el; - - _mesa_get_format_block_size(mt->format, &block_width, &block_height); - - assert(map->x % block_width == 0); - assert(map->y % block_height == 0); - - intel_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el); - *x1_B = (map->x / block_width + x0_el) * mt->cpp; - *y1_el = map->y / block_height + y0_el; - *x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * mt->cpp; - *y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el; -} - -static void -intel_miptree_unmap_tiled_memcpy(struct brw_context *brw, - struct intel_mipmap_tree *mt, - struct intel_miptree_map *map, - unsigned int level, - unsigned int slice) -{ - if (map->mode & GL_MAP_WRITE_BIT) { - unsigned int x1, x2, y1, y2; - tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2); - - char *dst = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW); - dst += mt->offset; - - linear_to_tiled(x1, x2, y1, y2, dst, map->ptr, mt->surf.row_pitch, - map->stride, brw->has_swizzling, mt->surf.tiling, memcpy); - - intel_miptree_unmap_raw(mt); - } - _mesa_align_free(map->buffer); - map->buffer = map->ptr = NULL; -} - -static void -intel_miptree_map_tiled_memcpy(struct brw_context *brw, - struct intel_mipmap_tree *mt, - struct intel_miptree_map *map, - unsigned int level, unsigned int slice) -{ - intel_miptree_access_raw(brw, mt, level, slice, - map->mode & GL_MAP_WRITE_BIT); - - unsigned int x1, x2, y1, y2; - tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2); - map->stride = ALIGN(_mesa_format_row_stride(mt->format, map->w), 16); - - /* The tiling and detiling functions require that the linear buffer - * has proper 16-byte alignment (that is, its `x0` is 16-byte - * aligned). Here we over-allocate the linear buffer by enough - * bytes to get the proper alignment. - */ - map->buffer = _mesa_align_malloc(map->stride * (y2 - y1) + (x1 & 0xf), 16); - map->ptr = (char *)map->buffer + (x1 & 0xf); - assert(map->buffer); - - if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { - char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW); - src += mt->offset; - - const mem_copy_fn fn = -#if defined(USE_SSE41) - cpu_has_sse4_1 ? (mem_copy_fn)_mesa_streaming_load_memcpy : -#endif - memcpy; - - tiled_to_linear(x1, x2, y1, y2, map->ptr, src, map->stride, - mt->surf.row_pitch, brw->has_swizzling, mt->surf.tiling, - fn); - - intel_miptree_unmap_raw(mt); - } - - map->unmap = intel_miptree_unmap_tiled_memcpy; -} - static void intel_miptree_map_blit(struct brw_context *brw, struct intel_mipmap_tree *mt, @@ -3732,7 +3643,6 @@ intel_miptree_map(struct brw_context *brw, void **out_ptr, ptrdiff_t *out_stride) { - const struct gen_device_info *devinfo = &brw->screen->devinfo; struct intel_miptree_map *map; assert(mt->surf.samples == 1); @@ -3753,8 +3663,6 @@ intel_miptree_map(struct brw_context *brw, intel_miptree_map_depthstencil(brw, mt, map, level, slice); } else if (use_intel_mipree_map_blit(brw, mt, mode, level, slice)) { intel_miptree_map_blit(brw, mt, map, level, slice); - } else if (mt->surf.tiling != ISL_TILING_LINEAR && devinfo->gen > 4) { - intel_miptree_map_tiled_memcpy(brw, mt, map, level, slice); #if defined(USE_SSE41) } else if (!(mode & GL_MAP_WRITE_BIT) && !mt->compressed && cpu_has_sse4_1 && @@ -3762,9 +3670,7 @@ intel_miptree_map(struct brw_context *brw, intel_miptree_map_movntdqa(brw, mt, map, level, slice); #endif } else { - if (mt->surf.tiling != ISL_TILING_LINEAR) - perf_debug("intel_miptree_map: mapping via gtt"); - intel_miptree_map_map(brw, mt, map, level, slice); + intel_miptree_map_gtt(brw, mt, map, level, slice); } *out_ptr = map->ptr; diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c index 6440dceac36..7c6bde990d6 100644 --- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c +++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c @@ -36,10 +36,6 @@ #include "brw_context.h" #include "intel_tiled_memcpy.h" -#if defined(USE_SSE41) -#include "main/streaming-load-memcpy.h" -#include <smmintrin.h> -#endif #if defined(__SSSE3__) #include <tmmintrin.h> #elif defined(__SSE2__) @@ -217,31 +213,6 @@ rgba8_copy_aligned_src(void *dst, const void *src, size_t bytes) return dst; } -#if defined(USE_SSE41) -static ALWAYS_INLINE void * -_memcpy_streaming_load(void *dest, const void *src, size_t count) -{ - if (count == 16) { - __m128i val = _mm_stream_load_si128((__m128i *)src); - _mm_storeu_si128((__m128i *)dest, val); - return dest; - } else if (count == 64) { - __m128i val0 = _mm_stream_load_si128(((__m128i *)src) + 0); - __m128i val1 = _mm_stream_load_si128(((__m128i *)src) + 1); - __m128i val2 = _mm_stream_load_si128(((__m128i *)src) + 2); - __m128i val3 = _mm_stream_load_si128(((__m128i *)src) + 3); - _mm_storeu_si128(((__m128i *)dest) + 0, val0); - _mm_storeu_si128(((__m128i *)dest) + 1, val1); - _mm_storeu_si128(((__m128i *)dest) + 2, val2); - _mm_storeu_si128(((__m128i *)dest) + 3, val3); - return dest; - } else { - assert(count < 64); /* and (count < 16) for ytiled */ - return memcpy(dest, src, count); - } -} -#endif - /** * Each row from y0 to y1 is copied in three parts: [x0,x1), [x1,x2), [x2,x3). * These ranges are in bytes, i.e. pixels * bytes-per-pixel. @@ -706,12 +677,6 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height, dst, src, dst_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_src); -#if defined(USE_SSE41) - else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy) - return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height, - dst, src, dst_pitch, swizzle_bit, - memcpy, _memcpy_streaming_load); -#endif else unreachable("not reached"); } else { @@ -722,12 +687,6 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, return xtiled_to_linear(x0, x1, x2, x3, y0, y1, dst, src, dst_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_src); -#if defined(USE_SSE41) - else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy) - return xtiled_to_linear(x0, x1, x2, x3, y0, y1, - dst, src, dst_pitch, swizzle_bit, - memcpy, _memcpy_streaming_load); -#endif else unreachable("not reached"); } @@ -760,12 +719,6 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height, dst, src, dst_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_src); -#if defined(USE_SSE41) - else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy) - return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height, - dst, src, dst_pitch, swizzle_bit, - memcpy, _memcpy_streaming_load); -#endif else unreachable("not reached"); } else { @@ -776,12 +729,6 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, return ytiled_to_linear(x0, x1, x2, x3, y0, y1, dst, src, dst_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_src); -#if defined(USE_SSE41) - else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy) - return ytiled_to_linear(x0, x1, x2, x3, y0, y1, - dst, src, dst_pitch, swizzle_bit, - memcpy, _memcpy_streaming_load); -#endif else unreachable("not reached"); } @@ -921,15 +868,6 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2, unreachable("unsupported tiling"); } -#if defined(USE_SSE41) - if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy) { - /* The hidden cacheline sized register used by movntdqa can apparently - * give you stale data, so do an mfence to invalidate it. - */ - _mm_mfence(); - } -#endif - /* Round out to tile boundaries. */ xt0 = ALIGN_DOWN(xt1, tw); xt3 = ALIGN_UP (xt2, tw); diff --git a/src/mesa/drivers/dri/i965/meson.build b/src/mesa/drivers/dri/i965/meson.build index 1eac329f49c..20404d5b059 100644 --- a/src/mesa/drivers/dri/i965/meson.build +++ b/src/mesa/drivers/dri/i965/meson.build @@ -129,13 +129,10 @@ files_i965 = files( 'intel_tex_image.c', 'intel_tex_obj.h', 'intel_tex_validate.c', - 'intel_upload.c', - 'libdrm_macros.h', -) - -files_intel_tiled_memcpy = files( 'intel_tiled_memcpy.c', 'intel_tiled_memcpy.h', + 'intel_upload.c', + 'libdrm_macros.h', ) i965_gen_libs = [] @@ -179,15 +176,6 @@ i965_oa_sources = custom_target( ], ) -intel_tiled_memcpy = static_library( - 'intel_tiled_memcpy', - [files_intel_tiled_memcpy], - include_directories : [ - inc_common, inc_intel, inc_dri_common, inc_drm_uapi, - ], - c_args : [c_vis_args, no_override_init_args, '-msse2', sse41_args], -) - libi965 = static_library( 'i965', [files_i965, i965_oa_sources, ir_expression_operation_h, @@ -199,7 +187,7 @@ libi965 = static_library( cpp_args : [cpp_vis_args, '-msse2'], link_with : [ i965_gen_libs, libintel_common, libintel_dev, libisl, libintel_compiler, - libblorp, intel_tiled_memcpy, + libblorp, ], dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers], ) |