diff options
Diffstat (limited to 'src/gallium/auxiliary/util')
-rw-r--r-- | src/gallium/auxiliary/util/u_dl.c | 3 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_format.csv | 3 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_format_zs.c | 53 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_format_zs.h | 16 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_math.h | 5 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_pack_color.h | 50 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_sse.h | 154 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_tile.c | 84 |
8 files changed, 344 insertions, 24 deletions
diff --git a/src/gallium/auxiliary/util/u_dl.c b/src/gallium/auxiliary/util/u_dl.c index 220860ebf4b..aca435d6cad 100644 --- a/src/gallium/auxiliary/util/u_dl.c +++ b/src/gallium/auxiliary/util/u_dl.c @@ -38,6 +38,7 @@ #endif #include "u_dl.h" +#include "u_pointer.h" struct util_dl_library * @@ -58,7 +59,7 @@ util_dl_get_proc_address(struct util_dl_library *library, const char *procname) { #if defined(PIPE_OS_UNIX) - return (util_dl_proc)dlsym((void *)library, procname); + return (util_dl_proc) pointer_to_func(dlsym((void *)library, procname)); #elif defined(PIPE_OS_WINDOWS) return (util_dl_proc)GetProcAddress((HMODULE)library, procname); #else diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index 0811280b97b..8e5d4487a67 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -109,9 +109,12 @@ PIPE_FORMAT_Z32_UNORM , plain, 1, 1, un32, , , , x___, PIPE_FORMAT_Z32_FLOAT , plain, 1, 1, f32 , , , , x___, zs PIPE_FORMAT_Z24_UNORM_S8_USCALED , plain, 1, 1, un24, u8 , , , xy__, zs PIPE_FORMAT_S8_USCALED_Z24_UNORM , plain, 1, 1, u8 , un24, , , yx__, zs +PIPE_FORMAT_X24S8_USCALED , plain, 1, 1, x24, u8 , , , _y__, zs +PIPE_FORMAT_S8X24_USCALED , plain, 1, 1, u8 , x24 , , , _x__, zs PIPE_FORMAT_Z24X8_UNORM , plain, 1, 1, un24, x8 , , , x___, zs PIPE_FORMAT_X8Z24_UNORM , plain, 1, 1, x8 , un24, , , y___, zs PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED , plain, 1, 1, f32, u8 , x24 , , xy__, zs +PIPE_FORMAT_X32_S8X24_USCALED , plain, 1, 1, x32, u8 , x24 , , _y__, zs # YUV formats # http://www.fourcc.org/yuv.php#UYVY diff --git a/src/gallium/auxiliary/util/u_format_zs.c b/src/gallium/auxiliary/util/u_format_zs.c index 792d69c214c..80081e22f7c 100644 --- a/src/gallium/auxiliary/util/u_format_zs.c +++ b/src/gallium/auxiliary/util/u_format_zs.c @@ -918,3 +918,56 @@ util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned d } } + +void +util_format_x24s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_z24_unorm_s8_uscaled_unpack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); +} + +void +util_format_x24s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_z24_unorm_s8_uscaled_pack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); +} + +void +util_format_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_s8_uscaled_z24_unorm_unpack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); +} + +void +util_format_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height) +{ + util_format_s8_uscaled_z24_unorm_pack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); +} + +void +util_format_x32_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); + +} + +void +util_format_x32_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, + const uint8_t *src_row, unsigned src_stride, + unsigned width, unsigned height) +{ + util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(dst_row, dst_stride, + src_row, src_stride, + width, height); +} diff --git a/src/gallium/auxiliary/util/u_format_zs.h b/src/gallium/auxiliary/util/u_format_zs.h index 650db4b95fd..1604cc3eee2 100644 --- a/src/gallium/auxiliary/util/u_format_zs.h +++ b/src/gallium/auxiliary/util/u_format_zs.h @@ -192,5 +192,21 @@ util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned void util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); +void +util_format_x24s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_x24s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); +void +util_format_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_x32_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + +void +util_format_x32_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_sride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); #endif /* U_FORMAT_ZS_H_ */ diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 69a76814945..37294b7203f 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -118,6 +118,11 @@ __inline double __cdecl atan2(double val) #endif +#ifndef M_SQRT2 +#define M_SQRT2 1.41421356237309504880 +#endif + + #if defined(_MSC_VER) #if _MSC_VER < 1400 && !defined(__cplusplus) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index c90b0fdbc3f..5378f2d782f 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -434,8 +434,8 @@ util_pack_color(const float rgba[4], enum pipe_format format, union util_color * /* Integer versions of util_pack_z and util_pack_z_stencil - useful for * constructing clear masks. */ -static INLINE uint -util_pack_uint_z(enum pipe_format format, unsigned z) +static INLINE uint32_t +util_pack_mask_z(enum pipe_format format, uint32_t z) { switch (format) { case PIPE_FORMAT_Z16_UNORM: @@ -452,29 +452,32 @@ util_pack_uint_z(enum pipe_format format, unsigned z) case PIPE_FORMAT_S8_USCALED: return 0; default: - debug_print_format("gallium: unhandled format in util_pack_z()", format); + debug_print_format("gallium: unhandled format in util_pack_mask_z()", format); assert(0); return 0; } } -static INLINE uint -util_pack_uint_z_stencil(enum pipe_format format, double z, uint s) +static INLINE uint32_t +util_pack_mask_z_stencil(enum pipe_format format, uint32_t z, uint8_t s) { - unsigned packed = util_pack_uint_z(format, z); - - s &= 0xff; + uint32_t packed = util_pack_mask_z(format, z); switch (format) { case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - return packed | (s << 24); + packed |= (uint32_t)s << 24; + break; case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - return packed | s; + packed |= s; + break; case PIPE_FORMAT_S8_USCALED: - return packed | s; + packed |= s; + break; default: - return packed; + break; } + + return packed; } @@ -482,9 +485,11 @@ util_pack_uint_z_stencil(enum pipe_format format, double z, uint s) /** * Note: it's assumed that z is in [0,1] */ -static INLINE uint +static INLINE uint32_t util_pack_z(enum pipe_format format, double z) { + union fi fui; + if (z == 0.0) return 0; @@ -492,24 +497,25 @@ util_pack_z(enum pipe_format format, double z) case PIPE_FORMAT_Z16_UNORM: if (z == 1.0) return 0xffff; - return (uint) (z * 0xffff); + return (uint32_t) (z * 0xffff); case PIPE_FORMAT_Z32_UNORM: /* special-case to avoid overflow */ if (z == 1.0) return 0xffffffff; - return (uint) (z * 0xffffffff); + return (uint32_t) (z * 0xffffffff); case PIPE_FORMAT_Z32_FLOAT: - return (uint)z; + fui.f = (float)z; + return fui.ui; case PIPE_FORMAT_Z24_UNORM_S8_USCALED: case PIPE_FORMAT_Z24X8_UNORM: if (z == 1.0) return 0xffffff; - return (uint) (z * 0xffffff); + return (uint32_t) (z * 0xffffff); case PIPE_FORMAT_S8_USCALED_Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: if (z == 1.0) return 0xffffff00; - return ((uint) (z * 0xffffff)) << 8; + return ((uint32_t) (z * 0xffffff)) << 8; case PIPE_FORMAT_S8_USCALED: /* this case can get it via util_pack_z_stencil() */ return 0; @@ -525,14 +531,14 @@ util_pack_z(enum pipe_format format, double z) * Pack Z and/or stencil values into a 32-bit value described by format. * Note: it's assumed that z is in [0,1] and s in [0,255] */ -static INLINE uint -util_pack_z_stencil(enum pipe_format format, double z, uint s) +static INLINE uint32_t +util_pack_z_stencil(enum pipe_format format, double z, uint8_t s) { - unsigned packed = util_pack_z(format, z); + uint32_t packed = util_pack_z(format, z); switch (format) { case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - packed |= s << 24; + packed |= (uint32_t)s << 24; break; case PIPE_FORMAT_S8_USCALED_Z24_UNORM: packed |= s; diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h index 03198c91da4..1df6c872677 100644 --- a/src/gallium/auxiliary/util/u_sse.h +++ b/src/gallium/auxiliary/util/u_sse.h @@ -71,6 +71,96 @@ _mm_castps_si128(__m128 a) #endif /* defined(_MSC_VER) && _MSC_VER < 1500 */ +union m128i { + __m128i m; + ubyte ub[16]; + ushort us[8]; + uint ui[4]; +}; + +static INLINE void u_print_epi8(const char *name, __m128i r) +{ + union { __m128i m; ubyte ub[16]; } u; + u.m = r; + + debug_printf("%s: " + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x\n", + name, + u.ub[0], u.ub[1], u.ub[2], u.ub[3], + u.ub[4], u.ub[5], u.ub[6], u.ub[7], + u.ub[8], u.ub[9], u.ub[10], u.ub[11], + u.ub[12], u.ub[13], u.ub[14], u.ub[15]); +} + +static INLINE void u_print_epi16(const char *name, __m128i r) +{ + union { __m128i m; ushort us[8]; } u; + u.m = r; + + debug_printf("%s: " + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x\n", + name, + u.us[0], u.us[1], u.us[2], u.us[3], + u.us[4], u.us[5], u.us[6], u.us[7]); +} + +static INLINE void u_print_epi32(const char *name, __m128i r) +{ + union { __m128i m; uint ui[4]; } u; + u.m = r; + + debug_printf("%s: " + "%08x/" + "%08x/" + "%08x/" + "%08x\n", + name, + u.ui[0], u.ui[1], u.ui[2], u.ui[3]); +} + +static INLINE void u_print_ps(const char *name, __m128 r) +{ + union { __m128 m; float f[4]; } u; + u.m = r; + + debug_printf("%s: " + "%f/" + "%f/" + "%f/" + "%f\n", + name, + u.f[0], u.f[1], u.f[2], u.f[3]); +} + + +#define U_DUMP_EPI32(a) u_print_epi32(#a, a) +#define U_DUMP_EPI16(a) u_print_epi16(#a, a) +#define U_DUMP_EPI8(a) u_print_epi8(#a, a) +#define U_DUMP_PS(a) u_print_ps(#a, a) + + #if defined(PIPE_ARCH_SSSE3) @@ -98,6 +188,68 @@ _mm_shuffle_epi8(__m128i a, __m128i mask) #endif /* !PIPE_ARCH_SSSE3 */ -#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */ + + +/* Provide an SSE2 implementation of _mm_mullo_epi32() in terms of + * _mm_mul_epu32(). + * + * I suspect this works fine for us because one of our operands is + * always positive, but not sure that this can be used for general + * signed integer multiplication. + * + * This seems close enough to the speed of SSE4 and the real + * _mm_mullo_epi32() intrinsic as to not justify adding an sse4 + * dependency at this point. + */ +static INLINE __m128i mm_mullo_epi32(const __m128i a, const __m128i b) +{ + __m128i a4 = _mm_srli_epi64(a, 32); /* shift by one dword */ + __m128i b4 = _mm_srli_epi64(b, 32); /* shift by one dword */ + __m128i ba = _mm_mul_epu32(b, a); /* multply dwords 0, 2 */ + __m128i b4a4 = _mm_mul_epu32(b4, a4); /* multiply dwords 1, 3 */ + + /* Interleave the results, either with shuffles or (slightly + * faster) direct bit operations: + */ +#if 0 + __m128i ba8 = _mm_shuffle_epi32(ba, 8); + __m128i b4a48 = _mm_shuffle_epi32(b4a4, 8); + __m128i result = _mm_unpacklo_epi32(ba8, b4a48); +#else + __m128i mask = _mm_setr_epi32(~0,0,~0,0); + __m128i ba_mask = _mm_and_si128(ba, mask); + __m128i b4a4_mask_shift = _mm_slli_epi64(b4a4, 32); + __m128i result = _mm_or_si128(ba_mask, b4a4_mask_shift); +#endif + + return result; +} + + +static INLINE void +transpose4_epi32(const __m128i * restrict a, + const __m128i * restrict b, + const __m128i * restrict c, + const __m128i * restrict d, + __m128i * restrict o, + __m128i * restrict p, + __m128i * restrict q, + __m128i * restrict r) +{ + __m128i t0 = _mm_unpacklo_epi32(*a, *b); + __m128i t1 = _mm_unpacklo_epi32(*c, *d); + __m128i t2 = _mm_unpackhi_epi32(*a, *b); + __m128i t3 = _mm_unpackhi_epi32(*c, *d); + + *o = _mm_unpacklo_epi64(t0, t1); + *p = _mm_unpackhi_epi64(t0, t1); + *q = _mm_unpacklo_epi64(t2, t3); + *r = _mm_unpackhi_epi64(t2, t3); +} + +#define SCALAR_EPI32(m, i) _mm_shuffle_epi32((m), _MM_SHUFFLE(i,i,i,i)) + + +#endif /* PIPE_ARCH_SSE */ #endif /* U_SSE_H_ */ diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index f7aa1403d08..44cadbfcdd0 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -217,6 +217,81 @@ z24s8_get_tile_rgba(const unsigned *src, } } +/*** PIPE_FORMAT_S8X24_USCALED ***/ + +/** + * Return S component as four uint32_t in [0..255]. Z part ignored. + */ +static void +s8x24_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float)((*src++ >> 24) & 0xff); + } + + p += dst_stride; + } +} + +/*** PIPE_FORMAT_X24S8_USCALED ***/ + +/** + * Return S component as four uint32_t in [0..255]. Z part ignored. + */ +static void +x24s8_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float)(*src++ & 0xff); + } + p += dst_stride; + } +} + + +/** + * Return S component as four uint32_t in [0..255]. Z part ignored. + */ +static void +s8_get_tile_rgba(const unsigned char *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float)(*src++ & 0xff); + } + p += dst_stride; + } +} /*** PIPE_FORMAT_Z32_FLOAT ***/ @@ -261,10 +336,19 @@ pipe_tile_raw_to_rgba(enum pipe_format format, case PIPE_FORMAT_Z24X8_UNORM: s8z24_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); break; + case PIPE_FORMAT_S8_USCALED: + s8_get_tile_rgba((unsigned char *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_X24S8_USCALED: + s8x24_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; case PIPE_FORMAT_S8_USCALED_Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: z24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); break; + case PIPE_FORMAT_S8X24_USCALED: + x24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; case PIPE_FORMAT_Z32_FLOAT: z32f_get_tile_rgba((float *) src, w, h, dst, dst_stride); break; |