diff options
Diffstat (limited to 'src/mesa/drivers/dri/common/spantmp2.h')
-rw-r--r-- | src/mesa/drivers/dri/common/spantmp2.h | 564 |
1 files changed, 564 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/common/spantmp2.h b/src/mesa/drivers/dri/common/spantmp2.h new file mode 100644 index 00000000000..ce0a66de702 --- /dev/null +++ b/src/mesa/drivers/dri/common/spantmp2.h @@ -0,0 +1,564 @@ +/* + * Copyright 2000-2001 VA Linux Systems, Inc. + * (C) Copyright IBM Corporation 2004 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file spantmp2.h + * + * Template file of span read / write functions. + * + * \author Keith Whitwell <[email protected]> + * \author Gareth Hughes <[email protected]> + * \author Ian Romanick <[email protected]> + */ + +#include "colormac.h" + +#ifndef DBG +#define DBG 0 +#endif + +#ifndef HW_WRITE_LOCK +#define HW_WRITE_LOCK() HW_LOCK() +#endif + +#ifndef HW_WRITE_UNLOCK +#define HW_WRITE_UNLOCK() HW_UNLOCK() +#endif + +#ifndef HW_READ_LOCK +#define HW_READ_LOCK() HW_LOCK() +#endif + +#ifndef HW_READ_UNLOCK +#define HW_READ_UNLOCK() HW_UNLOCK() +#endif + +#ifndef HW_READ_CLIPLOOP +#define HW_READ_CLIPLOOP() HW_CLIPLOOP() +#endif + +#ifndef HW_WRITE_CLIPLOOP +#define HW_WRITE_CLIPLOOP() HW_CLIPLOOP() +#endif + +#if (SPANTMP_PIXEL_FMT == GL_RGB) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5) + +#define INIT_MONO_PIXEL(p, color) \ + p = PACK_COLOR_565( color[0], color[1], color[2] ) + +#define WRITE_RGBA( _x, _y, r, g, b, a ) \ + do { \ + GLshort * _p = (GLshort *) GET_DST_PTR(_x, _y); \ + _p[0] = ((((int)r & 0xf8) << 8) | (((int)g & 0xfc) << 3) | \ + (((int)b & 0xf8) >> 3)); \ + } while(0) + +#define WRITE_PIXEL( _x, _y, p ) \ + do { \ + GLushort * _p = (GLushort *) GET_DST_PTR(_x, _y); \ + _p[0] = p; \ + } while(0) + +#define READ_RGBA( rgba, _x, _y ) \ + do { \ + GLushort p = *(volatile GLshort *) GET_SRC_PTR(_x, _y); \ + rgba[0] = ((p >> 8) & 0xf8) * 255 / 0xf8; \ + rgba[1] = ((p >> 3) & 0xfc) * 255 / 0xfc; \ + rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8; \ + rgba[3] = 0xff; \ + } while (0) + +#elif (SPANTMP_PIXEL_FMT == GL_BGRA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV) + +# define INIT_MONO_PIXEL(p, color) \ + p = PACK_COLOR_8888(color[3], color[0], color[1], color[2]) + +# define WRITE_RGBA(_x, _y, r, g, b, a) \ + do { \ + GLuint * _p = (GLuint *) GET_DST_PTR(_x, _y); \ + _p[0] = ((r << 16) | (g << 8) | (b << 0) | (a << 24)); \ + } while(0) + +#define WRITE_PIXEL(_x, _y, p) \ + do { \ + GLuint * _p = (GLuint *) GET_DST_PTR(_x, _y); \ + _p[0] = p; \ + } while(0) + +# if defined( USE_X86_ASM ) +# define READ_RGBA(rgba, _x, _y) \ + do { \ + GLuint p = *(volatile GLuint *) GET_SRC_PTR(_x, _y); \ + __asm__ __volatile__( "bswap %0; rorl $8, %0" \ + : "=r" (p) : "r" (p) ); \ + ((GLuint *)rgba)[0] = p; \ + } while (0) +# else +# define READ_RGBA( rgba, _x, _y ) \ + do { \ + GLuint p = *(volatile GLuint *) GET_SRC_PTR(_x, _y); \ + rgba[0] = (p >> 16) & 0xff; \ + rgba[1] = (p >> 8) & 0xff; \ + rgba[2] = (p >> 0) & 0xff; \ + rgba[3] = (p >> 24) & 0xff; \ + } while (0) +# endif + +#else +#error SPANTMP_PIXEL_FMT must be set to a valid value! +#endif + +#if defined( USE_MMX_ASM ) || defined( USE_SSE_ASM ) +#include "x86/read_rgba_span_x86.h" +#include "x86/common_x86_asm.h" +#endif + +static void TAG(WriteRGBASpan)( const GLcontext *ctx, + GLuint n, GLint x, GLint y, + const GLubyte rgba[][4], + const GLubyte mask[] ) +{ + HW_WRITE_LOCK() + { + GLint x1; + GLint n1; + LOCAL_VARS; + + y = Y_FLIP(y); + + HW_WRITE_CLIPLOOP() + { + GLint i = 0; + CLIPSPAN(x,y,n,x1,n1,i); + + if (DBG) fprintf(stderr, "WriteRGBASpan %d..%d (x1 %d)\n", + (int)i, (int)n1, (int)x1); + + if (mask) + { + for (;n1>0;i++,x1++,n1--) + if (mask[i]) + WRITE_RGBA( x1, y, + rgba[i][0], rgba[i][1], + rgba[i][2], rgba[i][3] ); + } + else + { + for (;n1>0;i++,x1++,n1--) + WRITE_RGBA( x1, y, + rgba[i][0], rgba[i][1], + rgba[i][2], rgba[i][3] ); + } + } + HW_ENDCLIPLOOP(); + } + HW_WRITE_UNLOCK(); +} + +static void TAG(WriteRGBSpan)( const GLcontext *ctx, + GLuint n, GLint x, GLint y, + const GLubyte rgb[][3], + const GLubyte mask[] ) +{ + HW_WRITE_LOCK() + { + GLint x1; + GLint n1; + LOCAL_VARS; + + y = Y_FLIP(y); + + HW_WRITE_CLIPLOOP() + { + GLint i = 0; + CLIPSPAN(x,y,n,x1,n1,i); + + if (DBG) fprintf(stderr, "WriteRGBSpan %d..%d (x1 %d)\n", + (int)i, (int)n1, (int)x1); + + if (mask) + { + for (;n1>0;i++,x1++,n1--) + if (mask[i]) + WRITE_RGBA( x1, y, rgb[i][0], rgb[i][1], rgb[i][2], 255 ); + } + else + { + for (;n1>0;i++,x1++,n1--) + WRITE_RGBA( x1, y, rgb[i][0], rgb[i][1], rgb[i][2], 255 ); + } + } + HW_ENDCLIPLOOP(); + } + HW_WRITE_UNLOCK(); +} + +static void TAG(WriteRGBAPixels)( const GLcontext *ctx, + GLuint n, + const GLint x[], + const GLint y[], + const GLubyte rgba[][4], + const GLubyte mask[] ) +{ + HW_WRITE_LOCK() + { + GLint i; + LOCAL_VARS; + + if (DBG) fprintf(stderr, "WriteRGBAPixels\n"); + + HW_WRITE_CLIPLOOP() + { + if (mask) + { + for (i=0;i<n;i++) + { + if (mask[i]) { + const int fy = Y_FLIP(y[i]); + if (CLIPPIXEL(x[i],fy)) + WRITE_RGBA( x[i], fy, + rgba[i][0], rgba[i][1], + rgba[i][2], rgba[i][3] ); + } + } + } + else + { + for (i=0;i<n;i++) + { + const int fy = Y_FLIP(y[i]); + if (CLIPPIXEL(x[i],fy)) + WRITE_RGBA( x[i], fy, + rgba[i][0], rgba[i][1], + rgba[i][2], rgba[i][3] ); + } + } + } + HW_ENDCLIPLOOP(); + } + HW_WRITE_UNLOCK(); +} + + +static void TAG(WriteMonoRGBASpan)( const GLcontext *ctx, + GLuint n, GLint x, GLint y, + const GLchan color[4], + const GLubyte mask[] ) +{ + HW_WRITE_LOCK() + { + GLint x1; + GLint n1; + LOCAL_VARS; + INIT_MONO_PIXEL(p, color); + + y = Y_FLIP( y ); + + if (DBG) fprintf(stderr, "WriteMonoRGBASpan\n"); + + HW_WRITE_CLIPLOOP() + { + GLint i = 0; + CLIPSPAN(x,y,n,x1,n1,i); + if (mask) + { + for (;n1>0;i++,x1++,n1--) + if (mask[i]) + WRITE_PIXEL( x1, y, p ); + } + else + { + for (;n1>0;i++,x1++,n1--) + WRITE_PIXEL( x1, y, p ); + } + } + HW_ENDCLIPLOOP(); + } + HW_WRITE_UNLOCK(); +} + + +static void TAG(WriteMonoRGBAPixels)( const GLcontext *ctx, + GLuint n, + const GLint x[], const GLint y[], + const GLchan color[], + const GLubyte mask[] ) +{ + HW_WRITE_LOCK() + { + GLint i; + LOCAL_VARS; + INIT_MONO_PIXEL(p, color); + + if (DBG) fprintf(stderr, "WriteMonoRGBAPixels\n"); + + HW_WRITE_CLIPLOOP() + { + if (mask) + { + for (i=0;i<n;i++) + if (mask[i]) { + int fy = Y_FLIP(y[i]); + if (CLIPPIXEL( x[i], fy )) + WRITE_PIXEL( x[i], fy, p ); + } + } + else + { + for (i=0;i<n;i++) { + int fy = Y_FLIP(y[i]); + if (CLIPPIXEL( x[i], fy )) + WRITE_PIXEL( x[i], fy, p ); + } + } + } + HW_ENDCLIPLOOP(); + } + HW_WRITE_UNLOCK(); +} + + +static void TAG(ReadRGBASpan)( const GLcontext *ctx, + GLuint n, GLint x, GLint y, + GLubyte rgba[][4]) +{ + HW_READ_LOCK() + { + GLint x1,n1; + LOCAL_VARS; + + y = Y_FLIP(y); + + if (DBG) fprintf(stderr, "ReadRGBASpan\n"); + + HW_READ_CLIPLOOP() + { + GLint i = 0; + CLIPSPAN(x,y,n,x1,n1,i); + for (;n1>0;i++,x1++,n1--) + READ_RGBA( rgba[i], x1, y ); + } + HW_ENDCLIPLOOP(); + } + HW_READ_UNLOCK(); +} + + +#if defined(USE_MMX_ASM) +static void TAG2(ReadRGBASpan,_MMX)( const GLcontext *ctx, + GLuint n, GLint x, GLint y, + GLubyte rgba[][4]) +{ +#ifndef USE_INNER_EMMS + /* The EMMS instruction is directly in-lined here because using GCC's + * built-in _mm_empty function was found to utterly destroy performance. + */ + __asm__ __volatile__( "emms" ); +#endif + + HW_LOCK() + { + GLint x1,n1; + LOCAL_VARS; + + y = Y_FLIP(y); + + if (DBG) fprintf(stderr, "ReadRGBASpan\n"); + + HW_READ_CLIPLOOP() + { + GLint i = 0; + CLIPSPAN(x,y,n,x1,n1,i); + + { + const char * src = (read_buf + x1*4 + y*pitch); + _generic_read_RGBA_span_BGRA8888_REV_MMX( src, rgba[i], n1 ); + } + } + HW_ENDCLIPLOOP(); + } + HW_UNLOCK(); +#ifndef USE_INNER_EMMS + __asm__ __volatile__( "emms" ); +#endif +} +#endif + + +#if defined(USE_SSE_ASM) +static void TAG2(ReadRGBASpan,_SSE2)( const GLcontext *ctx, + GLuint n, GLint x, GLint y, + GLubyte rgba[][4]) +{ + HW_LOCK() + { + GLint x1,n1; + LOCAL_VARS; + + y = Y_FLIP(y); + + if (DBG) fprintf(stderr, "ReadRGBASpan\n"); + + HW_READ_CLIPLOOP() + { + GLint i = 0; + CLIPSPAN(x,y,n,x1,n1,i); + + { + const char * src = (read_buf + x1*4 + y*pitch); + _generic_read_RGBA_span_BGRA8888_REV_SSE2( src, rgba[i], n1 ); + } + } + HW_ENDCLIPLOOP(); + } + HW_UNLOCK(); +} +#endif + +#if defined(USE_SSE_ASM) +static void TAG2(ReadRGBASpan,_SSE)( const GLcontext *ctx, + GLuint n, GLint x, GLint y, + GLubyte rgba[][4]) +{ +#ifndef USE_INNER_EMMS + /* The EMMS instruction is directly in-lined here because using GCC's + * built-in _mm_empty function was found to utterly destroy performance. + */ + __asm__ __volatile__( "emms" ); +#endif + + HW_LOCK() + { + GLint x1,n1; + LOCAL_VARS; + + y = Y_FLIP(y); + + if (DBG) fprintf(stderr, "ReadRGBASpan\n"); + + HW_READ_CLIPLOOP() + { + GLint i = 0; + CLIPSPAN(x,y,n,x1,n1,i); + + { + const char * src = (read_buf + x1*4 + y*pitch); + _generic_read_RGBA_span_BGRA8888_REV_SSE( src, rgba[i], n1 ); + } + } + HW_ENDCLIPLOOP(); + } + HW_UNLOCK(); +#ifndef USE_INNER_EMMS + __asm__ __volatile__( "emms" ); +#endif +} +#endif + + +static void TAG(ReadRGBAPixels)( const GLcontext *ctx, + GLuint n, const GLint x[], const GLint y[], + GLubyte rgba[][4], const GLubyte mask[] ) +{ + HW_READ_LOCK() + { + GLint i; + LOCAL_VARS; + + if (DBG) fprintf(stderr, "ReadRGBAPixels\n"); + + HW_READ_CLIPLOOP() + { + if (mask) + { + for (i=0;i<n;i++) + if (mask[i]) { + int fy = Y_FLIP( y[i] ); + if (CLIPPIXEL( x[i], fy )) + READ_RGBA( rgba[i], x[i], fy ); + } + } + else + { + for (i=0;i<n;i++) { + int fy = Y_FLIP( y[i] ); + if (CLIPPIXEL( x[i], fy )) + READ_RGBA( rgba[i], x[i], fy ); + } + } + } + HW_ENDCLIPLOOP(); + } + HW_READ_UNLOCK(); +} + +static void TAG(InitPointers)(struct swrast_device_driver *swdd) +{ + swdd->WriteRGBASpan = TAG(WriteRGBASpan); + swdd->WriteRGBSpan = TAG(WriteRGBSpan); + swdd->WriteMonoRGBASpan = TAG(WriteMonoRGBASpan); + swdd->WriteRGBAPixels = TAG(WriteRGBAPixels); + swdd->WriteMonoRGBAPixels = TAG(WriteMonoRGBAPixels); + swdd->ReadRGBAPixels = TAG(ReadRGBAPixels); + +#if defined(USE_SSE_ASM) + if ( cpu_has_xmm2 ) { + if (DBG) fprintf( stderr, "Using %s version of ReadRGBASpan\n", "SSE2" ); + swdd->ReadRGBASpan = TAG2(ReadRGBASpan, _SSE2); + } + else +#endif +#if defined(USE_SSE_ASM) + if ( cpu_has_xmm ) { + if (DBG) fprintf( stderr, "Using %s version of ReadRGBASpan\n", "SSE" ); + swdd->ReadRGBASpan = TAG2(ReadRGBASpan, _SSE); + } + else +#endif +#if defined(USE_MMX_ASM) + if ( cpu_has_mmx ) { + if (DBG) fprintf( stderr, "Using %s version of ReadRGBASpan\n", "MMX" ); + swdd->ReadRGBASpan = TAG2(ReadRGBASpan, _MMX); + } + else +#endif + { + if (DBG) fprintf( stderr, "Using %s version of ReadRGBASpan\n", "C" ); + swdd->ReadRGBASpan = TAG(ReadRGBASpan); + } + +} + + +#undef INIT_MONO_PIXEL +#undef WRITE_PIXEL +#undef WRITE_RGBA +#undef READ_RGBA +#undef TAG +#undef TAG2 +#undef GET_SRC_PTR +#undef GET_DST_PTR +#undef SPANTMP_PIXEL_FMT +#undef SPANTMP_PIXEL_TYPE |