diff options
author | Christian König <[email protected]> | 2010-10-28 20:24:56 +0200 |
---|---|---|
committer | Christian König <[email protected]> | 2010-10-28 20:24:56 +0200 |
commit | 41ed47d6b8fb6c032e2907ef2e49e414c26f35c1 (patch) | |
tree | 8cf267ee3ac5d8b530dd70a28a0d568344aa8304 /src/gallium/auxiliary/util | |
parent | 97a7cf230a70c64fff300931ae7c00aa00449c97 (diff) | |
parent | 5479fa34d9acebd55f68c23a278cf382d0e84248 (diff) |
Merge branch 'master' of ssh://git.freedesktop.org/git/mesa/mesa into pipe-video
Conflicts:
src/gallium/include/pipe/p_format.h
Diffstat (limited to 'src/gallium/auxiliary/util')
-rw-r--r-- | src/gallium/auxiliary/util/u_dl.c | 3 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_format.csv | 3 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_format_srgb.py | 4 | ||||
-rwxr-xr-x | src/gallium/auxiliary/util/u_format_table.py | 4 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_format_zs.c | 53 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_format_zs.h | 16 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_math.h | 5 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_sse.h | 154 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_tile.c | 84 |
9 files changed, 320 insertions, 6 deletions
diff --git a/src/gallium/auxiliary/util/u_dl.c b/src/gallium/auxiliary/util/u_dl.c index 220860ebf4b..aca435d6cad 100644 --- a/src/gallium/auxiliary/util/u_dl.c +++ b/src/gallium/auxiliary/util/u_dl.c @@ -38,6 +38,7 @@ #endif #include "u_dl.h" +#include "u_pointer.h" struct util_dl_library * @@ -58,7 +59,7 @@ util_dl_get_proc_address(struct util_dl_library *library, const char *procname) { #if defined(PIPE_OS_UNIX) - return (util_dl_proc)dlsym((void *)library, procname); + return (util_dl_proc) pointer_to_func(dlsym((void *)library, procname)); #elif defined(PIPE_OS_WINDOWS) return (util_dl_proc)GetProcAddress((HMODULE)library, procname); #else diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index 0811280b97b..8e5d4487a67 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -109,9 +109,12 @@ PIPE_FORMAT_Z32_UNORM , plain, 1, 1, un32, , , , x___, PIPE_FORMAT_Z32_FLOAT , plain, 1, 1, f32 , , , , x___, zs PIPE_FORMAT_Z24_UNORM_S8_USCALED , plain, 1, 1, un24, u8 , , , xy__, zs PIPE_FORMAT_S8_USCALED_Z24_UNORM , plain, 1, 1, u8 , un24, , , yx__, zs +PIPE_FORMAT_X24S8_USCALED , plain, 1, 1, x24, u8 , , , _y__, zs +PIPE_FORMAT_S8X24_USCALED , plain, 1, 1, u8 , x24 , , , _x__, zs PIPE_FORMAT_Z24X8_UNORM , plain, 1, 1, un24, x8 , , , x___, zs PIPE_FORMAT_X8Z24_UNORM , plain, 1, 1, x8 , un24, , , y___, zs PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED , plain, 1, 1, f32, u8 , x24 , , xy__, zs +PIPE_FORMAT_X32_S8X24_USCALED , plain, 1, 1, x32, u8 , x24 , , _y__, zs # YUV formats # http://www.fourcc.org/yuv.php#UYVY diff --git a/src/gallium/auxiliary/util/u_format_srgb.py b/src/gallium/auxiliary/util/u_format_srgb.py index 3e8000f3687..cd63ae78919 100644 --- a/src/gallium/auxiliary/util/u_format_srgb.py +++ b/src/gallium/auxiliary/util/u_format_srgb.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -''' +CopyRight = ''' /************************************************************************** * * Copyright 2010 VMware, Inc. @@ -89,7 +89,7 @@ def main(): print '/* This file is autogenerated by u_format_srgb.py. Do not edit directly. */' print # This will print the copyright message on the top of this file - print __doc__.strip() + print CopyRight.strip() print print '#include "u_format_srgb.h"' print diff --git a/src/gallium/auxiliary/util/u_format_table.py b/src/gallium/auxiliary/util/u_format_table.py index f0b407b8b8e..8cc22a56371 100755 --- a/src/gallium/auxiliary/util/u_format_table.py +++ b/src/gallium/auxiliary/util/u_format_table.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -''' +CopyRight = ''' /************************************************************************** * * Copyright 2010 VMware, Inc. @@ -83,7 +83,7 @@ def write_format_table(formats): print '/* This file is autogenerated by u_format_table.py from u_format.csv. Do not edit directly. */' print # This will print the copyright message on the top of this file - print __doc__.strip() + print CopyRight.strip() print print '#include "u_format.h"' print '#include "u_format_s3tc.h"' diff --git a/src/gallium/auxiliary/util/u_format_zs.c b/src/gallium/auxiliary/util/u_format_zs.c index 792d69c214c..80081e22f7c 100644 --- a/src/gallium/auxiliary/util/u_format_zs.c +++ b/src/gallium/auxiliary/util/u_format_zs.c @@ -918,3 +918,56 @@ util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned d } } + +void +util_format_x24s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_z24_unorm_s8_uscaled_unpack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); +} + +void +util_format_x24s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_z24_unorm_s8_uscaled_pack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); +} + +void +util_format_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_s8_uscaled_z24_unorm_unpack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); +} + +void +util_format_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_s8_uscaled_z24_unorm_pack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); +} + +void +util_format_x32_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); + +} + +void +util_format_x32_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); +} diff --git a/src/gallium/auxiliary/util/u_format_zs.h b/src/gallium/auxiliary/util/u_format_zs.h index 650db4b95fd..1604cc3eee2 100644 --- a/src/gallium/auxiliary/util/u_format_zs.h +++ b/src/gallium/auxiliary/util/u_format_zs.h @@ -192,5 +192,21 @@ util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned void util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); +void +util_format_x24s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_x24s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); +void +util_format_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_x32_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_x32_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_sride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); #endif /* U_FORMAT_ZS_H_ */ diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 69a76814945..37294b7203f 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -118,6 +118,11 @@ __inline double __cdecl atan2(double val) #endif +#ifndef M_SQRT2 +#define M_SQRT2 1.41421356237309504880 +#endif + + #if defined(_MSC_VER) #if _MSC_VER < 1400 && !defined(__cplusplus) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h index 03198c91da4..1df6c872677 100644 --- a/src/gallium/auxiliary/util/u_sse.h +++ b/src/gallium/auxiliary/util/u_sse.h @@ -71,6 +71,96 @@ _mm_castps_si128(__m128 a) #endif /* defined(_MSC_VER) && _MSC_VER < 1500 */ +union m128i { + __m128i m; + ubyte ub[16]; + ushort us[8]; + uint ui[4]; +}; + +static INLINE void u_print_epi8(const char *name, __m128i r) +{ + union { __m128i m; ubyte ub[16]; } u; + u.m = r; + + debug_printf("%s: " + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x\n", + name, + u.ub[0], u.ub[1], u.ub[2], u.ub[3], + u.ub[4], u.ub[5], u.ub[6], u.ub[7], + u.ub[8], u.ub[9], u.ub[10], u.ub[11], + u.ub[12], u.ub[13], u.ub[14], u.ub[15]); +} + +static INLINE void u_print_epi16(const char *name, __m128i r) +{ + union { __m128i m; ushort us[8]; } u; + u.m = r; + + debug_printf("%s: " + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x\n", + name, + u.us[0], u.us[1], u.us[2], u.us[3], + u.us[4], u.us[5], u.us[6], u.us[7]); +} + +static INLINE void u_print_epi32(const char *name, __m128i r) +{ + union { __m128i m; uint ui[4]; } u; + u.m = r; + + debug_printf("%s: " + "%08x/" + "%08x/" + "%08x/" + "%08x\n", + name, + u.ui[0], u.ui[1], u.ui[2], u.ui[3]); +} + +static INLINE void u_print_ps(const char *name, __m128 r) +{ + union { __m128 m; float f[4]; } u; + u.m = r; + + debug_printf("%s: " + "%f/" + "%f/" + "%f/" + "%f\n", + name, + u.f[0], u.f[1], u.f[2], u.f[3]); +} + + +#define U_DUMP_EPI32(a) u_print_epi32(#a, a) +#define U_DUMP_EPI16(a) u_print_epi16(#a, a) +#define U_DUMP_EPI8(a) u_print_epi8(#a, a) +#define U_DUMP_PS(a) u_print_ps(#a, a) + + #if defined(PIPE_ARCH_SSSE3) @@ -98,6 +188,68 @@ _mm_shuffle_epi8(__m128i a, __m128i mask) #endif /* !PIPE_ARCH_SSSE3 */ -#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */ + + +/* Provide an SSE2 implementation of _mm_mullo_epi32() in terms of + * _mm_mul_epu32(). + * + * I suspect this works fine for us because one of our operands is + * always positive, but not sure that this can be used for general + * signed integer multiplication. + * + * This seems close enough to the speed of SSE4 and the real + * _mm_mullo_epi32() intrinsic as to not justify adding an sse4 + * dependency at this point. + */ +static INLINE __m128i mm_mullo_epi32(const __m128i a, const __m128i b) +{ + __m128i a4 = _mm_srli_epi64(a, 32); /* shift by one dword */ + __m128i b4 = _mm_srli_epi64(b, 32); /* shift by one dword */ + __m128i ba = _mm_mul_epu32(b, a); /* multply dwords 0, 2 */ + __m128i b4a4 = _mm_mul_epu32(b4, a4); /* multiply dwords 1, 3 */ + + /* Interleave the results, either with shuffles or (slightly + * faster) direct bit operations: + */ +#if 0 + __m128i ba8 = _mm_shuffle_epi32(ba, 8); + __m128i b4a48 = _mm_shuffle_epi32(b4a4, 8); + __m128i result = _mm_unpacklo_epi32(ba8, b4a48); +#else + __m128i mask = _mm_setr_epi32(~0,0,~0,0); + __m128i ba_mask = _mm_and_si128(ba, mask); + __m128i b4a4_mask_shift = _mm_slli_epi64(b4a4, 32); + __m128i result = _mm_or_si128(ba_mask, b4a4_mask_shift); +#endif + + return result; +} + + +static INLINE void +transpose4_epi32(const __m128i * restrict a, + const __m128i * restrict b, + const __m128i * restrict c, + const __m128i * restrict d, + __m128i * restrict o, + __m128i * restrict p, + __m128i * restrict q, + __m128i * restrict r) +{ + __m128i t0 = _mm_unpacklo_epi32(*a, *b); + __m128i t1 = _mm_unpacklo_epi32(*c, *d); + __m128i t2 = _mm_unpackhi_epi32(*a, *b); + __m128i t3 = _mm_unpackhi_epi32(*c, *d); + + *o = _mm_unpacklo_epi64(t0, t1); + *p = _mm_unpackhi_epi64(t0, t1); + *q = _mm_unpacklo_epi64(t2, t3); + *r = _mm_unpackhi_epi64(t2, t3); +} + +#define SCALAR_EPI32(m, i) _mm_shuffle_epi32((m), _MM_SHUFFLE(i,i,i,i)) + + +#endif /* PIPE_ARCH_SSE */ #endif /* U_SSE_H_ */ diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index f7aa1403d08..44cadbfcdd0 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -217,6 +217,81 @@ z24s8_get_tile_rgba(const unsigned *src, } } +/*** PIPE_FORMAT_S8X24_USCALED ***/ + +/** + * Return S component as four uint32_t in [0..255]. Z part ignored. + */ +static void +s8x24_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float)((*src++ >> 24) & 0xff); + } + + p += dst_stride; + } +} + +/*** PIPE_FORMAT_X24S8_USCALED ***/ + +/** + * Return S component as four uint32_t in [0..255]. Z part ignored. + */ +static void +x24s8_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float)(*src++ & 0xff); + } + p += dst_stride; + } +} + + +/** + * Return S component as four uint32_t in [0..255]. Z part ignored. + */ +static void +s8_get_tile_rgba(const unsigned char *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float)(*src++ & 0xff); + } + p += dst_stride; + } +} /*** PIPE_FORMAT_Z32_FLOAT ***/ @@ -261,10 +336,19 @@ pipe_tile_raw_to_rgba(enum pipe_format format, case PIPE_FORMAT_Z24X8_UNORM: s8z24_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); break; + case PIPE_FORMAT_S8_USCALED: + s8_get_tile_rgba((unsigned char *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_X24S8_USCALED: + s8x24_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; case PIPE_FORMAT_S8_USCALED_Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: z24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); break; + case PIPE_FORMAT_S8X24_USCALED: + x24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; case PIPE_FORMAT_Z32_FLOAT: z32f_get_tile_rgba((float *) src, w, h, dst, dst_stride); break; |